23 #define SUMSQCOLS(A) ((A).array().square().colwise().sum())
25 using namespace shogun;
26 using namespace Eigen;
29 : example(ex), target(tar), impostor(imp)
33 bool CImpostorNode::operator<(
const CImpostorNode& rhs)
const
35 if (example == rhs.example)
37 if (target == rhs.target)
38 return impostor < rhs.impostor;
40 return target < rhs.target;
43 return example < rhs.example;
46 void CLMNNImpl::check_training_setup(
CFeatures* features,
const CLabels* labels,
50 "LMNN can only be applied to features that support dot products\n")
52 "LMNN supports only MulticlassLabels\n")
54 "The number of feature vectors must be equal to the number of labels\n")
57 "Currently, LMNN supports only DenseFeatures\n")
64 init_transform = CLMNNImpl::compute_pca_transform(x);
68 "The initial transform must be a square matrix of size equal to the "
69 "number of features\n")
75 SG_SDEBUG(
"Entering CLMNNImpl::find_target_nn().\n")
78 int32_t d = x->get_num_features();
90 for (
index_t i = 0; i < unique_labels.vlen; ++i)
92 int32_t slice_size = 0;
97 idxsmap[slice_size] = j;
102 MatrixXd slice_mat(d, slice_size);
103 for (int32_t j = 0; j < slice_size; ++j)
106 features_slice->set_feature_matrix(
SGMatrix<float64_t>(slice_mat.data(), d, slice_size,
false));
111 labels_vec.set_const(unique_labels[i]);
112 labels_slice->set_labels(labels_vec);
119 for (
index_t j = 0; j < target_slice.num_cols; ++j)
122 target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
133 SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
135 return target_neighbors;
141 int32_t d = x->get_num_features();
146 Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, x->get_num_vectors());
149 for (
index_t i = 0; i < target_nn.num_cols; ++i)
151 for (
index_t j = 0; j < target_nn.num_rows; ++j)
153 VectorXd dx = X.col(i) - X.col(target_nn(j,i));
154 sop += dx*dx.transpose();
163 const uint32_t iter,
const uint32_t correction)
165 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors().\n")
168 int32_t n = x->get_num_vectors();
170 int32_t d = x->get_num_features();
172 int32_t k = target_nn.num_rows;
175 Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, n);
180 MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
185 static ImpostorsSetType Nexact;
188 REQUIRE(correction>0, "The number of iterations between exact updates of the "
189 "impostors set must be greater than 0\n")
190 if ((iter % correction)==0)
192 Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
197 N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
200 SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors().\n")
205 void CLMNNImpl::update_gradient(
CDenseFeatures<float64_t>* x, MatrixXd& G,
206 const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
209 ImpostorsSetType Np_Nc, Nc_Np;
210 set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
211 set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
214 Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
218 for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
220 VectorXd dx1 = X.col(it->example) - X.col(it->target);
221 VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
222 G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
226 for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
228 VectorXd dx1 = X.col(it->example) - X.col(it->target);
229 VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
230 G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
234 void CLMNNImpl::gradient_step(MatrixXd& L,
const MatrixXd& G, float64_t stepsize,
bool diagonal)
239 MatrixXd M = L.transpose()*L;
243 VectorXd m = M.diagonal();
246 zero.resize(m.size());
250 VectorXd l = m.array().max(zero.array()).array().sqrt();
256 L -= stepsize*(2*L*G);
260 void CLMNNImpl::correct_stepsize(float64_t& stepsize,
const SGVector<float64_t> obj,
const uint32_t iter)
265 float64_t
delta = obj[iter] - obj[iter-1];
282 bool CLMNNImpl::check_termination(float64_t stepsize,
const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
284 if (iter >= maxiter-1)
286 SG_SWARNING(
"Maximum number of iterations reached before convergence.");
290 if (stepsize < stepsize_threshold)
292 SG_SDEBUG(
"Step size too small to make more progress. Convergence reached.\n");
298 for (int32_t i = 0; i < 3; ++i)
300 if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
304 SG_SDEBUG(
"No more progress in the objective. Convergence reached.\n");
314 SG_SDEBUG(
"Initializing LMNN transform using PCA.\n");
322 mean_substractor->init(cloned_features);
323 mean_substractor->apply_to_feature_matrix(cloned_features);
328 pca->
init(cloned_features);
335 return pca_transform;
338 MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX,
const SGMatrix<index_t> target_nn)
342 int32_t n = LX.cols();
344 int32_t d = LX.rows();
355 MatrixXd sqdists(k,n);
357 for (int32_t i = 0; i < k; ++i)
363 lx->add_subset(subset_vec);
366 sqdists.row(i) =
SUMSQCOLS(LX - Map<const MatrixXd>(lx->get_feature_matrix().
matrix, d, n)) + 1;
376 ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX,
const MatrixXd& sqdists,
379 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_exact().\n")
382 ImpostorsSetType N = ImpostorsSetType();
385 int32_t n = LX.cols();
387 int32_t d = LX.rows();
389 SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
393 SGVector<float64_t> unique = y->get_unique_labels();
396 for (
index_t i = 0; i < unique.vlen-1; ++i)
399 std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
402 std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
408 for (int32_t j = 0; j < k; ++j)
410 for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
412 for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
417 if (distance <= sqdists(j,iidxs[ii]))
418 N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
420 if (distance <= sqdists(j,gtidxs[jj]))
421 N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
431 SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors_exact().\n")
436 ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists,
439 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_approx().\n")
442 ImpostorsSetType N = ImpostorsSetType();
445 SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
449 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
453 while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
456 REQUIRE(target_idx<target_nn.num_rows,
"The index of the target neighbour in the "
457 "impostors set was not found in the target neighbours matrix. "
458 "There must be a bug in find_impostors_exact.\n")
460 if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
464 SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
469 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact)
472 int32_t n = LX.cols();
474 int32_t d = LX.rows();
476 size_t num_impostors = Nexact.
size();
484 euclidean->set_disable_sqrt(
true);
490 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
491 sqdists[i++] = euclidean->distance(it->example,it->impostor);
499 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
503 std::vector<index_t> idxs;
514 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
518 std::vector<index_t> idxs;
530 std::vector<index_t>& a, std::vector<index_t>& b)