mlpack
|
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include "ra_search.hpp"
#include "ra_model.hpp"
#include <mlpack/methods/neighbor_search/unmap.hpp>
Functions | |
BINDING_NAME ("K-Rank-Approximate-Nearest-Neighbors (kRANN)") | |
BINDING_SHORT_DESC ("An implementation of rank-approximate k-nearest-neighbor search (kRANN) " " using single-tree and dual-tree algorithms. Given a set of reference " "points and query points, this can find the k nearest neighbors in the " "reference set of each query point using trees; trees that are built can " "be saved for future use.") | |
BINDING_LONG_DESC ("This program will calculate the k rank-approximate-nearest-neighbors of a " "set of points. You may specify a separate set of reference points and " "query points, or just a reference set which will be used as both the " "reference and query set. You must specify the rank approximation (in %) " "(and optionally the success probability).") | |
BINDING_EXAMPLE ("For example, the following will return 5 neighbors from the top 0.1% of " "the data (with probability 0.95) for each point in "+PRINT_DATASET("input")+" and store the distances in "+PRINT_DATASET("distances")+" and the neighbors in "+PRINT_DATASET("neighbors.csv")+":" "\"+PRINT_CALL("krann", "reference", "input", "k", 5, "distances", "distances", "neighbors", "neighbors", "tau", 0.1)+"\" "Note that tau must be set such that the number of points in the " "corresponding percentile of the data is greater than k. Thus, if we " "choose tau = 0.1 with a dataset of 1000 points and k = 5, then we are " "attempting to choose 5 nearest neighbors out of the closest 1 point -- " "this is invalid and the program will terminate with an error message." "\" "The output matrices are organized such that row i and column j in the " "neighbors output file corresponds to the index of the point in the " "reference set which is the i'th nearest neighbor from the point in the " "query set with index j. Row i and column j in the distances output file " "corresponds to the distance between those two points.") | |
BINDING_SEE_ALSO ("@knn", "#knn") | |
BINDING_SEE_ALSO ("@lsh", "#lsh") | |
BINDING_SEE_ALSO ("Rank-approximate nearest neighbor search: Retaining meaning" " and speed in high dimensions (pdf)", "https://papers.nips.cc/paper/" "3864-rank-approximate-nearest-neighbor-search-retaining-meaning-and" "-speed-in-high-dimensions.pdf") | |
BINDING_SEE_ALSO ("mlpack::neighbor::RASearch C++ class documentation", "@doxygen/classmlpack_1_1neighbor_1_1RASearch.html") | |
PARAM_MATRIX_IN ("reference", "Matrix containing the reference dataset.", "r") | |
PARAM_MATRIX_OUT ("distances", "Matrix to output distances into.", "d") | |
PARAM_UMATRIX_OUT ("neighbors", "Matrix to output neighbors into.", "n") | |
PARAM_MODEL_IN (RAModel, "input_model", "Pre-trained kNN model.", "m") | |
PARAM_MODEL_OUT (RAModel, "output_model", "If specified, the kNN model will be" " output here.", "M") | |
PARAM_MATRIX_IN ("query", "Matrix containing query points (optional).", "q") | |
PARAM_INT_IN ("k", "Number of nearest neighbors to find.", "k", 0) | |
PARAM_STRING_IN ("tree_type", "Type of tree to use: 'kd', 'ub', 'cover', 'r', " "'x', 'r-star', 'hilbert-r', 'r-plus', 'r-plus-plus', 'oct'.", "t", "kd") | |
PARAM_INT_IN ("leaf_size", "Leaf size for tree building (used for kd-trees, " "UB trees, R trees, R* trees, X trees, Hilbert R trees, R+ trees, " "R++ trees, and octrees).", "l", 20) | |
PARAM_FLAG ("random_basis", "Before tree-building, project the data onto a " "random orthogonal basis.", "R") | |
PARAM_INT_IN ("seed", "Random seed (if 0, std::time(NULL) is used).", "s", 0) | |
PARAM_DOUBLE_IN ("tau", "The allowed rank-error in terms of the percentile of " "the data.", "T", 5) | |
PARAM_DOUBLE_IN ("alpha", "The desired success probability.", "a", 0.95) | |
PARAM_FLAG ("naive", "If true, sampling will be done without using a tree.", "N") | |
PARAM_FLAG ("single_mode", "If true, single-tree search is used (as opposed to " "dual-tree search.", "S") | |
PARAM_FLAG ("sample_at_leaves", "The flag to trigger sampling at leaves.", "L") | |
PARAM_FLAG ("first_leaf_exact", "The flag to trigger sampling only after " "exactly exploring the first leaf.", "X") | |
PARAM_INT_IN ("single_sample_limit", "The limit on the maximum number of " "samples (and hence the largest node you can approximate).", "z", 20) | |
Implementation of the kRANN executable. Allows some number of standard options.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.