mlpack
|
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include "logistic_regression.hpp"
#include <ensmallen.hpp>
Functions | |
BINDING_NAME ("L2-regularized Logistic Regression and Prediction") | |
BINDING_SHORT_DESC ("An implementation of L2-regularized logistic regression for two-class " "classification. Given labeled data, a model can be trained and saved for " "future use; or, a pre-trained model can be used to classify new points.") | |
BINDING_LONG_DESC ("An implementation of L2-regularized logistic regression using either the " "L-BFGS optimizer or SGD (stochastic gradient descent). This solves the " "regression problem" "\" " y = (1 / 1 + e^-(X * b))" "\" "where y takes values 0 or 1." "\" "This program allows loading a logistic regression model (via the "+PRINT_PARAM_STRING("input_model")+" parameter) " "or training a logistic regression model given training data (specified " "with the "+PRINT_PARAM_STRING("training")+" parameter), or both " "those things at once. In addition, this program allows classification on " "a test dataset (specified with the "+PRINT_PARAM_STRING("test")+" " "parameter) and the classification results may be saved with the "+PRINT_PARAM_STRING("predictions")+" output parameter." " The trained logistic regression model may be saved using the "+PRINT_PARAM_STRING("output_model")+" output parameter." "\" "The training data, if specified, may have class labels as its last " "dimension. Alternately, the "+PRINT_PARAM_STRING("labels")+" " "parameter may be used to specify a separate matrix of labels." "\" "When a model is being trained, there are many options. L2 regularization " "(to prevent overfitting) can be specified with the "+PRINT_PARAM_STRING("lambda")+" option, and the " "optimizer used to train the model can be specified with the "+PRINT_PARAM_STRING("optimizer")+" parameter. Available options are " "'sgd' (stochastic gradient descent) and 'lbfgs' (the L-BFGS optimizer). " "There are also various parameters for the optimizer; the "+PRINT_PARAM_STRING("max_iterations")+" parameter specifies the maximum " "number of allowed iterations, and the "+PRINT_PARAM_STRING("tolerance")+" parameter specifies the tolerance for " "convergence. For the SGD optimizer, the "+PRINT_PARAM_STRING("step_size")+" parameter controls the step size taken " "at each iteration by the optimizer. The batch size for SGD is controlled " "with the "+PRINT_PARAM_STRING("batch_size")+" parameter. If the " "objective function for your data is oscillating between Inf and 0, the " "step size is probably too large. There are more parameters for the " "optimizers, but the C++ interface must be used to access these." "\" "For SGD, an iteration refers to a single point. So to take a single pass " "over the dataset with SGD, "+PRINT_PARAM_STRING("max_iterations")+" should be set to the number of points in the dataset." "\" "Optionally, the model can be used to predict the responses for another " "matrix of data points, if "+PRINT_PARAM_STRING("test")+" is " "specified. The "+PRINT_PARAM_STRING("test")+" parameter can be " "specified without the "+PRINT_PARAM_STRING("training")+" parameter, " "so long as an existing logistic regression model is given with the "+PRINT_PARAM_STRING("input_model")+" parameter. The output predictions " "from the logistic regression model may be saved with the "+PRINT_PARAM_STRING("predictions")+" parameter."+"\" "Note : The following parameters are deprecated and " "will be removed in mlpack 4: "+PRINT_PARAM_STRING("output")+", "+PRINT_PARAM_STRING("output_probabilities")+"\se "+PRINT_PARAM_STRING("predictions")+" instead of "+PRINT_PARAM_STRING("output")+"\se "+PRINT_PARAM_STRING("probabilities")+" instead of "+PRINT_PARAM_STRING("output_probabilities")+"\" "This implementation of logistic regression does not support the general " "multi-class case but instead only the two-class case. Any labels must " "be either 0 or 1. For more classes, see the softmax_regression " "program.") | |
BINDING_EXAMPLE ("As an example, to train a logistic regression model on the data '"+PRINT_DATASET("data")+"' with labels '"+PRINT_DATASET("labels")+"' " "with L2 regularization of 0.1, saving the model to '"+PRINT_MODEL("lr_model")+"', the following command may be used:" "\"+PRINT_CALL("logistic_regression", "training", "data", "labels", "labels", "lambda", 0.1, "output_model", "lr_model")+"\" "Then, to use that model to predict classes for the dataset '"+PRINT_DATASET("test")+"', storing the output predictions in '"+PRINT_DATASET("predictions")+"', the following command may be used: " "\"+PRINT_CALL("logistic_regression", "input_model", "lr_model", "test", "test", "output", "predictions")) | |
BINDING_SEE_ALSO ("@softmax_regression", "#softmax_regression") | |
BINDING_SEE_ALSO ("@random_forest", "#random_forest") | |
BINDING_SEE_ALSO ("Logistic regression on Wikipedia", "https://en.wikipedia.org/wiki/Logistic_regression") | |
BINDING_SEE_ALSO ("mlpack::regression::LogisticRegression C++ class " "documentation", "@doxygen/classmlpack_1_1regression_1_1LogisticRegression.html") | |
PARAM_MATRIX_IN ("training", "A matrix containing the training set (the matrix " "of predictors, X).", "t") | |
PARAM_UROW_IN ("labels", "A matrix containing labels (0 or 1) for the points " "in the training set (y).", "l") | |
PARAM_DOUBLE_IN ("lambda", "L2-regularization parameter for training.", "L", 0.0) | |
PARAM_STRING_IN ("optimizer", "Optimizer to use for training ('lbfgs' or " "'sgd').", "O", "lbfgs") | |
PARAM_DOUBLE_IN ("tolerance", "Convergence tolerance for optimizer.", "e", 1e-10) | |
PARAM_INT_IN ("max_iterations", "Maximum iterations for optimizer (0 indicates " "no limit).", "n", 10000) | |
PARAM_DOUBLE_IN ("step_size", "Step size for SGD optimizer.", "s", 0.01) | |
PARAM_INT_IN ("batch_size", "Batch size for SGD.", "b", 64) | |
PARAM_MODEL_IN (LogisticRegression<>, "input_model", "Existing model " "(parameters).", "m") | |
PARAM_MODEL_OUT (LogisticRegression<>, "output_model", "Output for trained " "logistic regression model.", "M") | |
PARAM_MATRIX_IN ("test", "Matrix containing test dataset.", "T") | |
PARAM_UROW_OUT ("output", "If test data is specified, this matrix is where " "the predictions for the test set will be saved.", "o") | |
PARAM_UROW_OUT ("predictions", "If test data is specified, this matrix is where " "the predictions for the test set will be saved.", "P") | |
PARAM_MATRIX_OUT ("output_probabilities", "If test data is specified, this " "matrix is where the class probabilities for the test set will be saved.", "x") | |
PARAM_MATRIX_OUT ("probabilities", "If test data is specified, this " "matrix is where the class probabilities for the test set will be saved.", "p") | |
PARAM_DOUBLE_IN ("decision_boundary", "Decision boundary for prediction; if the " "logistic function for a point is less than the boundary, the class is " "taken to be 0; otherwise, the class is 1.", "d", 0.5) | |
Main executable for logistic regression.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.