|
| BINDING_NAME ("Decision tree") |
|
| BINDING_SHORT_DESC ("An implementation of an ID3-style decision tree for classification, which" " supports categorical data. Given labeled data with numeric or " "categorical features, a decision tree can be trained and saved; or, an " "existing decision tree can be used for classification on new points.") |
|
| BINDING_LONG_DESC ("Train and evaluate using a decision tree. Given a dataset containing " "numeric or categorical features, and associated labels for each point in " "the dataset, this program can train a decision tree on that data." "\" "The training set and associated labels are specified with the "+PRINT_PARAM_STRING("training")+" and "+PRINT_PARAM_STRING("labels")+" parameters, respectively. The labels should be in the range [0, " "num_classes - 1]. Optionally, if "+PRINT_PARAM_STRING("labels")+" is not specified, the labels are assumed " "to be the last dimension of the training dataset." "\" "When a model is trained, the "+PRINT_PARAM_STRING("output_model")+" " "output parameter may be used to save the trained model. A model may be " "loaded for predictions with the "+PRINT_PARAM_STRING("input_model")+" parameter. The "+PRINT_PARAM_STRING("input_model")+" parameter " "may not be specified when the "+PRINT_PARAM_STRING("training")+" " "parameter is specified. The "+PRINT_PARAM_STRING("minimum_leaf_size")+" parameter specifies the minimum number of training points that must fall" " into each leaf for it to be split. The "+PRINT_PARAM_STRING("minimum_gain_split")+" parameter specifies " "the minimum gain that is needed for the node to split. The "+PRINT_PARAM_STRING("maximum_depth")+" parameter specifies " "the maximum depth of the tree. If "+PRINT_PARAM_STRING("print_training_error")+" is specified, the training " "error will be printed." "\" "Test data may be specified with the "+PRINT_PARAM_STRING("test")+" " "parameter, and if performance numbers are desired for that test set, " "labels may be specified with the "+PRINT_PARAM_STRING("test_labels")+" parameter. Predictions for each test point may be saved via the "+PRINT_PARAM_STRING("predictions")+" output parameter. Class " "probabilities for each prediction may be saved with the "+PRINT_PARAM_STRING("probabilities")+" output parameter.") |
|
| BINDING_EXAMPLE ("For example, to train a decision tree with a minimum leaf size of 20 on " "the dataset contained in "+PRINT_DATASET("data")+" with labels "+PRINT_DATASET("labels")+", saving the output model to "+PRINT_MODEL("tree")+" and printing the training error, one could " "call" "\"+PRINT_CALL("decision_tree", "training", "data", "labels", "labels", "output_model", "tree", "minimum_leaf_size", 20, "minimum_gain_split", 1e-3, "print_training_accuracy", true)+"\" "Then, to use that model to classify points in "+PRINT_DATASET("test_set")+" and print the test error given the " "labels "+PRINT_DATASET("test_labels")+" using that model, while " "saving the predictions for each point to "+PRINT_DATASET("predictions")+", one could call " "\"+PRINT_CALL("decision_tree", "input_model", "tree", "test", "test_set", "test_labels", "test_labels", "predictions", "predictions")) |
|
| BINDING_SEE_ALSO ("Decision stump", "#decision_stump") |
|
| BINDING_SEE_ALSO ("Random forest", "#random_forest") |
|
| BINDING_SEE_ALSO ("Decision trees on Wikipedia", "https://en.wikipedia.org/wiki/Decision_tree_learning") |
|
| BINDING_SEE_ALSO ("Induction of Decision Trees (pdf)", "https://link.springer.com/content/pdf/10.1007/BF00116251.pdf") |
|
| BINDING_SEE_ALSO ("mlpack::tree::DecisionTree class documentation", "@doxygen/classmlpack_1_1tree_1_1DecisionTree.html") |
|
| PARAM_MATRIX_AND_INFO_IN ("training", "Training dataset (may be categorical).", "t") |
|
| PARAM_UROW_IN ("labels", "Training labels.", "l") |
|
| PARAM_MATRIX_AND_INFO_IN ("test", "Testing dataset (may be categorical).", "T") |
|
| PARAM_MATRIX_IN ("weights", "The weight of labels", "w") |
|
| PARAM_UROW_IN ("test_labels", "Test point labels, if accuracy calculation " "is desired.", "L") |
|
| PARAM_INT_IN ("minimum_leaf_size", "Minimum number of points in a leaf.", "n", 20) |
|
| PARAM_DOUBLE_IN ("minimum_gain_split", "Minimum gain for node splitting.", "g", 1e-7) |
|
| PARAM_INT_IN ("maximum_depth", "Maximum depth of the tree (0 means no limit).", "D", 0) |
|
| PARAM_FLAG ("print_training_error", "Print the training error (deprecated; will " "be removed in mlpack 4.0.0).", "e") |
|
| PARAM_FLAG ("print_training_accuracy", "Print the training accuracy.", "a") |
|
| PARAM_MATRIX_OUT ("probabilities", "Class probabilities for each test point.", "P") |
|
| PARAM_UROW_OUT ("predictions", "Class predictions for each test point.", "p") |
|
| PARAM_MODEL_IN (DecisionTreeModel, "input_model", "Pre-trained decision tree, " "to be used with test points.", "m") |
|
| PARAM_MODEL_OUT (DecisionTreeModel, "output_model", "Output for trained decision" " tree.", "M") |
|
- Author
- Ryan Curtin
A command-line program to build a decision tree.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.