mlpack
|
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include <mlpack/core/math/random.hpp>
#include <mlpack/core/math/ccov.hpp>
#include <mlpack/core/data/scaler_methods/max_abs_scaler.hpp>
#include <mlpack/core/data/scaler_methods/mean_normalization.hpp>
#include <mlpack/core/data/scaler_methods/min_max_scaler.hpp>
#include <mlpack/core/data/scaler_methods/pca_whitening.hpp>
#include <mlpack/core/data/scaler_methods/zca_whitening.hpp>
#include <mlpack/core/data/scaler_methods/standard_scaler.hpp>
#include "mlpack/methods/preprocess/scaling_model.hpp"
Functions | |
BINDING_NAME ("Scale Data") | |
BINDING_SHORT_DESC ("A utility to perform feature scaling on datasets using one of six" "techniques. Both scaling and inverse scaling are supported, and" "scalers can be saved and then applied to other datasets.") | |
BINDING_LONG_DESC ("This utility takes a dataset and performs feature scaling using one of " "the six scaler methods namely: 'max_abs_scaler', 'mean_normalization', " "'min_max_scaler' ,'standard_scaler', 'pca_whitening' and 'zca_whitening'." " The function takes a matrix as "+PRINT_PARAM_STRING("input")+" and a scaling method type which you can specify using "+PRINT_PARAM_STRING("scaler_method")+" parameter; the default is " "standard scaler, and outputs a matrix with scaled feature." "\" "The output scaled feature matrix may be saved with the "+PRINT_PARAM_STRING("output")+" output parameters." "\" "The model to scale features can be saved using "+PRINT_PARAM_STRING("output_model")+" and later can be loaded back using"+PRINT_PARAM_STRING("input_model")+".") | |
BINDING_EXAMPLE ("So, a simple example where we want to scale the dataset "+PRINT_DATASET("X")+" into "+PRINT_DATASET("X_scaled")+" with " " standard_scaler as scaler_method, we could run " "\"+PRINT_CALL("preprocess_scale", "input", "X", "output", "X_scaled", "scaler_method", "standard_scaler")+"\" "A simple example where we want to whiten the dataset "+PRINT_DATASET("X")+" into "+PRINT_DATASET("X_whitened")+" with " " PCA as whitening_method and use 0.01 as regularization parameter, " "we could run " "\"+PRINT_CALL("preprocess_scale", "input", "X", "output", "X_scaled", "scaler_method", "pca_whitening", "epsilon", 0.01)+"\" "You can also retransform the scaled dataset back using"+PRINT_PARAM_STRING("inverse_scaling")+". An example to rescale : "+PRINT_DATASET("X_scaled")+" into "+PRINT_DATASET("X")+"using the saved model "+PRINT_PARAM_STRING("input_model")+" is:" "\"+PRINT_CALL("preprocess_scale", "input", "X_scaled", "output", "X", "inverse_scaling", true, "input_model", "saved")+"\" "Another simple example where we want to scale the dataset "+PRINT_DATASET("X")+" into "+PRINT_DATASET("X_scaled")+" with " " min_max_scaler as scaler method, where scaling range is 1 to 3 instead" " of default 0 to 1. We could run " "\"+PRINT_CALL("preprocess_scale", "input", "X", "output", "X_scaled", "scaler_method", "min_max_scaler", "min_value", 1, "max_value", 3)) | |
BINDING_SEE_ALSO ("@preprocess_binarize", "#preprocess_binarize") | |
BINDING_SEE_ALSO ("@preprocess_describe", "#preprocess_describe") | |
BINDING_SEE_ALSO ("@preprocess_imputer", "#preprocess_imputer") | |
PARAM_MATRIX_IN_REQ ("input", "Matrix containing data.", "i") | |
PARAM_MATRIX_OUT ("output", "Matrix to save scaled data to.", "o") | |
PARAM_STRING_IN ("scaler_method", "method to use for scaling, the " "default is standard_scaler.", "a", "standard_scaler") | |
PARAM_DOUBLE_IN ("epsilon", "regularization Parameter for pcawhitening," " or zcawhitening, should be between -1 to 1.", "r", 0.000001) | |
PARAM_INT_IN ("seed", "Random seed (0 for std::time(NULL)).", "s", 0) | |
PARAM_INT_IN ("min_value", "Starting value of range for min_max_scaler.", "b", 0) | |
PARAM_INT_IN ("max_value", "Ending value of range for min_max_scaler.", "e", 1) | |
PARAM_FLAG ("inverse_scaling", "Inverse Scaling to get original dataset", "f") | |
PARAM_MODEL_IN (ScalingModel, "input_model", "Input Scaling model.", "m") | |
PARAM_MODEL_OUT (ScalingModel, "output_model", "Output scaling model.", "M") | |
A binding to scale a dataset.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.