30 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP 31 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP 50 template<
typename SortPolicy = NearestNeighborSort>
96 void Train(
const arma::mat& referenceSet,
98 const size_t numTables,
122 void Search(
const arma::mat& querySet,
124 arma::Mat<size_t>& resultingNeighbors,
125 arma::mat& distances,
126 const size_t numTablesToSearch = 0);
146 void Search(
const size_t k,
147 arma::Mat<size_t>& resultingNeighbors,
148 arma::mat& distances,
149 const size_t numTablesToSearch = 0);
156 template<
typename Archive>
157 void Serialize(Archive& ar,
const unsigned int );
211 template<
typename VecType>
213 arma::uvec& referenceIndices,
214 size_t numTablesToSearch)
const;
227 void BaseCase(
const size_t queryIndex,
228 const size_t referenceIndex,
229 arma::Mat<size_t>& neighbors,
230 arma::mat& distances)
const;
244 void BaseCase(
const size_t queryIndex,
245 const size_t referenceIndex,
246 const arma::mat& querySet,
247 arma::Mat<size_t>& neighbors,
248 arma::mat& distances)
const;
265 arma::Mat<size_t>& neighbors,
266 const size_t queryIndex,
268 const size_t neighbor,
269 const double distance)
const;
318 #include "lsh_search_impl.hpp" void Train(const arma::mat &referenceSet, const size_t numProj, const size_t numTables, const double hashWidth=0.0, const size_t secondHashSize=99901, const size_t bucketSize=500)
Train the LSH model on the given dataset.
size_t numTables
The number of hash tables.
Linear algebra utility functions, generally performed on matrices or vectors.
size_t BucketSize() const
Get the bucket size of the second hash.
void BuildHash()
This function builds a hash table with two levels of hashing as presented in the paper.
LSHSearch()
Create an untrained LSH model.
size_t bucketSize
The bucket size of the second hash.
const arma::mat & Projection(const size_t i) const
Get the projection matrix of the given table.
const arma::mat & ReferenceSet() const
Return the reference dataset.
std::vector< arma::mat > projections
The std::vector containing the projection matrix of each table.
arma::Col< size_t > bucketContentSize
The number of elements present in each hash bucket; should be secondHashSize.
size_t DistanceEvaluations() const
Return the number of distance evaluations performed.
The LSHSearch class; this class builds a hash on the reference set and uses this hash to compute the ...
void Search(const arma::mat &querySet, const size_t k, arma::Mat< size_t > &resultingNeighbors, arma::mat &distances, const size_t numTablesToSearch=0)
Compute the nearest neighbors of the points in the given query set and store the output in the given ...
void ReturnIndicesFromTable(const VecType &queryPoint, arma::uvec &referenceIndices, size_t numTablesToSearch) const
This function takes a query and hashes it into each of the hash tables to get keys for the query and ...
size_t NumProjections() const
Get the number of projections.
void BaseCase(const size_t queryIndex, const size_t referenceIndex, arma::Mat< size_t > &neighbors, arma::mat &distances) const
This is a helper function that computes the distance of the query to the neighbor candidates and appr...
arma::mat offsets
The list of the offsets 'b' for each of the projection for each table.
const arma::mat & Offsets() const
Get the offsets 'b' for each of the projections. (One 'b' per column.)
void Serialize(Archive &ar, const unsigned int)
Serialize the LSH model.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
size_t & DistanceEvaluations()
Modify the number of distance evaluations performed.
arma::Mat< size_t > secondHashTable
The final hash table; should be (< secondHashSize) x bucketSize.
const arma::vec & SecondHashWeights() const
Get the weights of the second hash.
size_t secondHashSize
The big prime representing the size of the second hash.
size_t numProj
The number of projections.
bool ownsSet
If true, we own the reference set.
void InsertNeighbor(arma::mat &distances, arma::Mat< size_t > &neighbors, const size_t queryIndex, const size_t pos, const size_t neighbor, const double distance) const
This is a helper function that efficiently inserts better neighbor candidates into an existing set of...
arma::vec secondHashWeights
The weights of the second hash.
const arma::mat * referenceSet
Reference dataset.
double hashWidth
The hash width.
const arma::Mat< size_t > & SecondHashTable() const
Get the second hash table.
arma::Col< size_t > bucketRowInHashTable
For a particular hash value, points to the row in secondHashTable corresponding to this value...
size_t distanceEvaluations
The number of distance evaluations.
~LSHSearch()
Clean memory.