mlpack
Classes | Typedefs | Functions
decision_tree_main.cpp File Reference
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include "decision_tree.hpp"
Include dependency graph for decision_tree_main.cpp:
This graph shows which files directly or indirectly include this file:

Classes

class  DecisionTreeModel
 This is the class that we will serialize. More...
 

Typedefs

typedef tuple< DatasetInfo, arma::mat > TupleType
 

Functions

 BINDING_NAME ("Decision tree")
 
 BINDING_SHORT_DESC ("An implementation of an ID3-style decision tree for classification, which" " supports categorical data. Given labeled data with numeric or " "categorical features, a decision tree can be trained and saved; or, an " "existing decision tree can be used for classification on new points.")
 
 BINDING_LONG_DESC ("Train and evaluate using a decision tree. Given a dataset containing " "numeric or categorical features, and associated labels for each point in " "the dataset, this program can train a decision tree on that data." "\" "The training set and associated labels are specified with the "+PRINT_PARAM_STRING("training")+" and "+PRINT_PARAM_STRING("labels")+" parameters, respectively. The labels should be in the range [0, " "num_classes - 1]. Optionally, if "+PRINT_PARAM_STRING("labels")+" is not specified, the labels are assumed " "to be the last dimension of the training dataset." "\" "When a model is trained, the "+PRINT_PARAM_STRING("output_model")+" " "output parameter may be used to save the trained model. A model may be " "loaded for predictions with the "+PRINT_PARAM_STRING("input_model")+" parameter. The "+PRINT_PARAM_STRING("input_model")+" parameter " "may not be specified when the "+PRINT_PARAM_STRING("training")+" " "parameter is specified. The "+PRINT_PARAM_STRING("minimum_leaf_size")+" parameter specifies the minimum number of training points that must fall" " into each leaf for it to be split. The "+PRINT_PARAM_STRING("minimum_gain_split")+" parameter specifies " "the minimum gain that is needed for the node to split. The "+PRINT_PARAM_STRING("maximum_depth")+" parameter specifies " "the maximum depth of the tree. If "+PRINT_PARAM_STRING("print_training_error")+" is specified, the training " "error will be printed." "\" "Test data may be specified with the "+PRINT_PARAM_STRING("test")+" " "parameter, and if performance numbers are desired for that test set, " "labels may be specified with the "+PRINT_PARAM_STRING("test_labels")+" parameter. Predictions for each test point may be saved via the "+PRINT_PARAM_STRING("predictions")+" output parameter. Class " "probabilities for each prediction may be saved with the "+PRINT_PARAM_STRING("probabilities")+" output parameter.")
 
 BINDING_EXAMPLE ("For example, to train a decision tree with a minimum leaf size of 20 on " "the dataset contained in "+PRINT_DATASET("data")+" with labels "+PRINT_DATASET("labels")+", saving the output model to "+PRINT_MODEL("tree")+" and printing the training error, one could " "call" "\"+PRINT_CALL("decision_tree", "training", "data", "labels", "labels", "output_model", "tree", "minimum_leaf_size", 20, "minimum_gain_split", 1e-3, "print_training_accuracy", true)+"\" "Then, to use that model to classify points in "+PRINT_DATASET("test_set")+" and print the test error given the " "labels "+PRINT_DATASET("test_labels")+" using that model, while " "saving the predictions for each point to "+PRINT_DATASET("predictions")+", one could call " "\"+PRINT_CALL("decision_tree", "input_model", "tree", "test", "test_set", "test_labels", "test_labels", "predictions", "predictions"))
 
 BINDING_SEE_ALSO ("Decision stump", "#decision_stump")
 
 BINDING_SEE_ALSO ("Random forest", "#random_forest")
 
 BINDING_SEE_ALSO ("Decision trees on Wikipedia", "https://en.wikipedia.org/wiki/Decision_tree_learning")
 
 BINDING_SEE_ALSO ("Induction of Decision Trees (pdf)", "https://link.springer.com/content/pdf/10.1007/BF00116251.pdf")
 
 BINDING_SEE_ALSO ("mlpack::tree::DecisionTree class documentation", "@doxygen/classmlpack_1_1tree_1_1DecisionTree.html")
 
 PARAM_MATRIX_AND_INFO_IN ("training", "Training dataset (may be categorical).", "t")
 
 PARAM_UROW_IN ("labels", "Training labels.", "l")
 
 PARAM_MATRIX_AND_INFO_IN ("test", "Testing dataset (may be categorical).", "T")
 
 PARAM_MATRIX_IN ("weights", "The weight of labels", "w")
 
 PARAM_UROW_IN ("test_labels", "Test point labels, if accuracy calculation " "is desired.", "L")
 
 PARAM_INT_IN ("minimum_leaf_size", "Minimum number of points in a leaf.", "n", 20)
 
 PARAM_DOUBLE_IN ("minimum_gain_split", "Minimum gain for node splitting.", "g", 1e-7)
 
 PARAM_INT_IN ("maximum_depth", "Maximum depth of the tree (0 means no limit).", "D", 0)
 
 PARAM_FLAG ("print_training_error", "Print the training error (deprecated; will " "be removed in mlpack 4.0.0).", "e")
 
 PARAM_FLAG ("print_training_accuracy", "Print the training accuracy.", "a")
 
 PARAM_MATRIX_OUT ("probabilities", "Class probabilities for each test point.", "P")
 
 PARAM_UROW_OUT ("predictions", "Class predictions for each test point.", "p")
 
 PARAM_MODEL_IN (DecisionTreeModel, "input_model", "Pre-trained decision tree, " "to be used with test points.", "m")
 
 PARAM_MODEL_OUT (DecisionTreeModel, "output_model", "Output for trained decision" " tree.", "M")
 

Detailed Description

Author
Ryan Curtin

A command-line program to build a decision tree.

mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.