mlpack
Classes | Functions
approx_kfn_main.cpp File Reference
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include "drusilla_select.hpp"
#include "qdafn.hpp"
Include dependency graph for approx_kfn_main.cpp:
This graph shows which files directly or indirectly include this file:

Classes

class  ApproxKFNModel
 

Functions

 BINDING_NAME ("Approximate furthest neighbor search")
 
 BINDING_SHORT_DESC ("An implementation of two strategies for furthest neighbor search. This " "can be used to compute the furthest neighbor of query point(s) from a set " "of points; furthest neighbor models can be saved and reused with future " "query point(s).")
 
 BINDING_LONG_DESC ("This program implements two strategies for furthest neighbor search. " "These strategies are:" "\" " - The 'qdafn' algorithm from \pproximate Furthest Neighbor in High " "Dimensions\by R. Pagh, F. Silvestri, J. Sivertsen, and M. Skala, in " "Similarity Search and Applications 2015 (SISAP)." "\ " - The 'DrusillaSelect' algorithm from \Fast approximate furthest " "neighbors with data-dependent candidate selection\, by R.R. Curtin and " "A.B. Gardner, in Similarity Search and Applications 2016(SISAP)." "\\" "These two strategies give approximate results for the furthest neighbor " "search problem and can be used as fast replacements for other furthest " "neighbor techniques such as those found in the mlpack_kfn program. Note " "that typically, the 'ds' algorithm requires far fewer tables and " "projections than the 'qdafn' algorithm." "\\" "Specify a reference set(set to search in) with "+PRINT_PARAM_STRING("reference")+", specify a query set with "+PRINT_PARAM_STRING("query")+", and specify algorithm parameters with "+PRINT_PARAM_STRING("num_tables")+" and "+PRINT_PARAM_STRING("num_projections")+"(or don 't and defaults will be " "used). The algorithm to be used(either 'ds'---the default---or 'qdafn') " " may be specified with "+PRINT_PARAM_STRING("algorithm")+". Also " "specify the number of neighbors to search for with "+PRINT_PARAM_STRING("k")+"." "\\" "Note that for 'qdafn' in lower dimensions, "+PRINT_PARAM_STRING("num_projections")+" may need to be set to a high " "value in order to return results for each query point." "\\" "If no query set is specified, the reference set will be used as the " "query set. The "+PRINT_PARAM_STRING("output_model")+" output " "parameter may be used to store the built model, and an input model may be " "loaded instead of specifying a reference set with the "+PRINT_PARAM_STRING("input_model")+" option." "\\" "Results for each query point can be stored with the "+PRINT_PARAM_STRING("neighbors")+" and "+PRINT_PARAM_STRING("distances")+" output parameters. Each row of these " "output matrices holds the k distances or neighbor indices for each query " "point.")
 
 BINDING_EXAMPLE ("For example, to find the 5 approximate furthest neighbors with "+PRINT_DATASET("reference_set")+" as the reference set and "+PRINT_DATASET("query_set")+" as the query set using DrusillaSelect, " "storing the furthest neighbor indices to "+PRINT_DATASET("neighbors")+" and the furthest neighbor distances to "+PRINT_DATASET("distances")+", one could call" "\"+PRINT_CALL("approx_kfn", "query", "query_set", "reference", "reference_set", "k", 5, "algorithm", "ds", "neighbors", "neighbors", "distances", "distances")+"\" "and to perform approximate all-furthest-neighbors search with k=1 on the " "set "+PRINT_DATASET("data")+" storing only the furthest neighbor " "distances to "+PRINT_DATASET("distances")+", one could call" "\"+PRINT_CALL("approx_kfn", "reference", "reference_set", "k", 1, "distances", "distances")+"\" "A trained model can be re-used. If a model has been previously saved to "+PRINT_MODEL("model")+", then we may find 3 approximate furthest " "neighbors on a query set "+PRINT_DATASET("new_query_set")+" using " "that model and store the furthest neighbor indices into "+PRINT_DATASET("neighbors")+" by calling" "\"+PRINT_CALL("approx_kfn", "input_model", "model", "query", "new_query_set", "k", 3, "neighbors", "neighbors"))
 
 BINDING_SEE_ALSO ("k-furthest-neighbor search", "#kfn")
 
 BINDING_SEE_ALSO ("k-nearest-neighbor search", "#knn")
 
 BINDING_SEE_ALSO ("Fast approximate furthest neighbors with data-dependent" " candidate selection (pdf)", "http://ratml.org/pub/pdf/2016fast.pdf")
 
 BINDING_SEE_ALSO ("Approximate furthest neighbor in high dimensions (pdf)", "https://pdfs.semanticscholar.org/a4b5/7b9cbf37201fb1d9a56c0f4eefad0466" "9c20.pdf")
 
 BINDING_SEE_ALSO ("mlpack::neighbor::QDAFN class documentation", "@doxygen/classmlpack_1_1neighbor_1_1QDAFN.html")
 
 BINDING_SEE_ALSO ("mlpack::neighbor::DrusillaSelect class documentation", "@doxygen/classmlpack_1_1neighbor_1_1DrusillaSelect.html")
 
 PARAM_MATRIX_IN ("reference", "Matrix containing the reference dataset.", "r")
 
 PARAM_MATRIX_IN ("query", "Matrix containing query points.", "q")
 
 PARAM_INT_IN ("k", "Number of furthest neighbors to search for.", "k", 0)
 
 PARAM_INT_IN ("num_tables", "Number of hash tables to use.", "t", 5)
 
 PARAM_INT_IN ("num_projections", "Number of projections to use in each hash " "table.", "p", 5)
 
 PARAM_STRING_IN ("algorithm", "Algorithm to use: 'ds' or 'qdafn'.", "a", "ds")
 
 PARAM_UMATRIX_OUT ("neighbors", "Matrix to save neighbor indices to.", "n")
 
 PARAM_MATRIX_OUT ("distances", "Matrix to save furthest neighbor distances to.", "d")
 
 PARAM_FLAG ("calculate_error", "If set, calculate the average distance error for" " the first furthest neighbor only.", "e")
 
 PARAM_MATRIX_IN ("exact_distances", "Matrix containing exact distances to " "furthest neighbors; this can be used to avoid explicit calculation when " "--calculate_error is set.", "x")
 
 PARAM_MODEL_IN (ApproxKFNModel, "input_model", "File containing input model.", "m")
 
 PARAM_MODEL_OUT (ApproxKFNModel, "output_model", "File to save output model to.", "M")
 

Detailed Description

Author
Ryan Curtin

Command-line program for various furthest neighbor search algorithms.

mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.