mlpack
|
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include <mlpack/core/math/random.hpp>
#include <mlpack/core/kernels/kernel_traits.hpp>
#include <mlpack/core/kernels/linear_kernel.hpp>
#include <mlpack/core/kernels/polynomial_kernel.hpp>
#include <mlpack/core/kernels/cosine_distance.hpp>
#include <mlpack/core/kernels/gaussian_kernel.hpp>
#include <mlpack/core/kernels/epanechnikov_kernel.hpp>
#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
#include <mlpack/core/kernels/laplacian_kernel.hpp>
#include <mlpack/core/kernels/pspectrum_string_kernel.hpp>
#include <mlpack/core/kernels/spherical_kernel.hpp>
#include <mlpack/core/kernels/triangular_kernel.hpp>
#include <mlpack/methods/hoeffding_trees/hoeffding_tree.hpp>
#include <mlpack/methods/nystroem_method/ordered_selection.hpp>
#include <mlpack/methods/nystroem_method/random_selection.hpp>
#include <mlpack/methods/nystroem_method/kmeans_selection.hpp>
#include <mlpack/methods/nystroem_method/nystroem_method.hpp>
#include <mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp>
#include "kernel_pca.hpp"
Functions | |
BINDING_NAME ("Kernel Principal Components Analysis") | |
BINDING_SHORT_DESC ("An implementation of Kernel Principal Components Analysis (KPCA). This " "can be used to perform nonlinear dimensionality reduction or preprocessing" " on a given dataset.") | |
BINDING_LONG_DESC ("This program performs Kernel Principal Components Analysis (KPCA) on the " "specified dataset with the specified kernel. This will transform the " "data onto the kernel principal components, and optionally reduce the " "dimensionality by ignoring the kernel principal components with the " "smallest eigenvalues." "\" "For the case where a linear kernel is used, this reduces to regular " "PCA." "\" "The kernels that are supported are listed below:" "\" " * 'linear': the standard linear dot product (same as normal PCA):\ " K(x, y)=x^T y\" "\" " *'gaussian':a Gaussian kernel;requires bandwidth:\" " K(x, y)=exp(-(||x - y||^ 2)/(2 *(bandwidth ^ 2)))\" "\" " *'polynomial':polynomial kernel;requires offset and degree:\" " K(x, y)=(x^T y+offset) ^ degree\" "\" " *'hyptan':hyperbolic tangent kernel;requires scale and offset:\" " K(x, y)=tanh(scale *(x^T y)+offset)\" "\" " *'laplacian':Laplacian kernel;requires bandwidth:\" " K(x, y)=exp(-(||x - y||)/bandwidth)\" "\" " *'epanechnikov':Epanechnikov kernel;requires bandwidth:\" " K(x, y)=max(0, 1 -||x - y||^2/bandwidth^2)\" "\" " *'cosine':cosine distance:\" " K(x, y)=1 -(x^T y)/(||x||*||y||)\" "\" "The parameters for each of the kernels should be specified with the " "options "+PRINT_PARAM_STRING("bandwidth")+", "+PRINT_PARAM_STRING("kernel_scale")+", "+PRINT_PARAM_STRING("offset")+", or "+PRINT_PARAM_STRING("degree")+"(or a combination of those parameters)." "\\" "Optionally, the Nystroem method(\Using the Nystroem method to speed " "up kernel machines\, 2001) can be used to calculate the kernel matrix by " "specifying the "+PRINT_PARAM_STRING("nystroem_method")+" parameter. " "This approach works by using a subset of the data as basis to reconstruct " "the kernel matrix;to specify the sampling scheme, the "+PRINT_PARAM_STRING("sampling")+" parameter is used. The " "sampling scheme for the Nystroem method can be chosen from the " "following list:'kmeans', 'random', 'ordered'.") | |
BINDING_EXAMPLE ("For example, the following command will perform KPCA on the dataset "+PRINT_DATASET("input")+" using the Gaussian kernel, and saving the " "transformed data to "+PRINT_DATASET("transformed")+": " "\"+PRINT_CALL("kernel_pca", "input", "input", "kernel", "gaussian", "output", "transformed")) | |
BINDING_SEE_ALSO ("Kernel principal component analysis on Wikipedia", "https://en.wikipedia.org/wiki/Kernel_principal_component_analysis") | |
BINDING_SEE_ALSO ("Kernel Principal Component Analysis (pdf)", "http://pca.narod.ru/scholkopf_kernel.pdf") | |
BINDING_SEE_ALSO ("mlpack::kpca::KernelPCA class documentation", "@doxygen/classmlpack_1_1kpca_1_1KernelPCA.html") | |
PARAM_MATRIX_IN_REQ ("input", "Input dataset to perform KPCA on.", "i") | |
PARAM_MATRIX_OUT ("output", "Matrix to save modified dataset to.", "o") | |
PARAM_STRING_IN_REQ ("kernel", "The kernel to use; see the above documentation " "for the list of usable kernels.", "k") | |
PARAM_INT_IN ("new_dimensionality", "If not 0, reduce the dimensionality of " "the output dataset by ignoring the dimensions with the smallest " "eigenvalues.", "d", 0) | |
PARAM_FLAG ("center", "If set, the transformed data will be centered about the " "origin.", "c") | |
PARAM_FLAG ("nystroem_method", "If set, the Nystroem method will be used.", "n") | |
PARAM_STRING_IN ("sampling", "Sampling scheme to use for the Nystroem method: " "'kmeans', 'random', 'ordered'", "s", "kmeans") | |
PARAM_DOUBLE_IN ("kernel_scale", "Scale, for 'hyptan' kernel.", "S", 1.0) | |
PARAM_DOUBLE_IN ("offset", "Offset, for 'hyptan' and 'polynomial' kernels.", "O", 0.0) | |
PARAM_DOUBLE_IN ("bandwidth", "Bandwidth, for 'gaussian' and 'laplacian' " "kernels.", "b", 1.0) | |
PARAM_DOUBLE_IN ("degree", "Degree of polynomial, for 'polynomial' kernel.", "D", 1.0) | |
template<typename KernelType > | |
void | RunKPCA (arma::mat &dataset, const bool centerTransformedData, const bool nystroem, const size_t newDim, const string &sampling, KernelType &kernel) |
Run RunKPCA on the specified dataset for the given kernel type. | |
Executable for Kernel PCA.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.