mlpack
Typedefs | Functions
hoeffding_tree_main.cpp File Reference
#include <mlpack/prereqs.hpp>
#include <mlpack/core/util/io.hpp>
#include <mlpack/core/util/mlpack_main.hpp>
#include <mlpack/methods/hoeffding_trees/hoeffding_tree.hpp>
#include <mlpack/methods/hoeffding_trees/binary_numeric_split.hpp>
#include <mlpack/methods/hoeffding_trees/information_gain.hpp>
#include <mlpack/methods/hoeffding_trees/hoeffding_tree_model.hpp>
#include <queue>
Include dependency graph for hoeffding_tree_main.cpp:
This graph shows which files directly or indirectly include this file:

Typedefs

typedef tuple< DatasetInfo, arma::mat > TupleType
 

Functions

 BINDING_NAME ("Hoeffding trees")
 
 BINDING_SHORT_DESC ("An implementation of Hoeffding trees, a form of streaming decision tree " "for classification. Given labeled data, a Hoeffding tree can be trained " "and saved for later use, or a pre-trained Hoeffding tree can be used for " "predicting the classifications of new points.")
 
 BINDING_LONG_DESC ("This program implements Hoeffding trees, a form of streaming decision tree" " suited best for large (or streaming) datasets. This program supports " "both categorical and numeric data. Given an input dataset, this program " "is able to train the tree with numerous training options, and save the " "model to a file. The program is also able to use a trained model or a " "model from file in order to predict classes for a given test set." "\" "The training file and associated labels are specified with the "+PRINT_PARAM_STRING("training")+" and "+PRINT_PARAM_STRING("labels")+" parameters, respectively. Optionally, if "+PRINT_PARAM_STRING("labels")+" is not specified, the labels are assumed " "to be the last dimension of the training dataset." "\" "The training may be performed in batch mode " "(like a typical decision tree algorithm) by specifying the "+PRINT_PARAM_STRING("batch_mode")+" option, but this may not be the best " "option for large datasets." "\" "When a model is trained, it may be saved via the "+PRINT_PARAM_STRING("output_model")+" output parameter. A model may be " "loaded from file for further training or testing with the "+PRINT_PARAM_STRING("input_model")+" parameter." "\" "Test data may be specified with the "+PRINT_PARAM_STRING("test")+" " "parameter, and if performance statistics are desired for that test set, " "labels may be specified with the "+PRINT_PARAM_STRING("test_labels")+" parameter. Predictions for each test point may be saved with the "+PRINT_PARAM_STRING("predictions")+" output parameter, and class " "probabilities for each prediction may be saved with the "+PRINT_PARAM_STRING("probabilities")+" output parameter.")
 
 BINDING_EXAMPLE ("For example, to train a Hoeffding tree with confidence 0.99 with data "+PRINT_DATASET("dataset")+", saving the trained tree to "+PRINT_MODEL("tree")+", the following command may be used:" "\"+PRINT_CALL("hoeffding_tree", "training", "dataset", "confidence", 0.99, "output_model", "tree")+"\" "Then, this tree may be used to make predictions on the test set "+PRINT_DATASET("test_set")+", saving the predictions into "+PRINT_DATASET("predictions")+" and the class probabilities into "+PRINT_DATASET("class_probs")+" with the following command: " "\"+PRINT_CALL("hoeffding_tree", "input_model", "tree", "test", "test_set", "predictions", "predictions", "probabilities", "class_probs"))
 
 BINDING_SEE_ALSO ("@decision_tree", "#decision_tree")
 
 BINDING_SEE_ALSO ("@random_forest", "#random_forest")
 
 BINDING_SEE_ALSO ("Mining High-Speed Data Streams (pdf)", "http://dm.cs.washington.edu/papers/vfdt-kdd00.pdf")
 
 BINDING_SEE_ALSO ("mlpack::tree::HoeffdingTree class documentation", "@doxygen/classmlpack_1_1tree_1_1HoeffdingTree.html")
 
 PARAM_MATRIX_AND_INFO_IN ("training", "Training dataset (may be categorical).", "t")
 
 PARAM_UROW_IN ("labels", "Labels for training dataset.", "l")
 
 PARAM_DOUBLE_IN ("confidence", "Confidence before splitting (between 0 and 1).", "c", 0.95)
 
 PARAM_INT_IN ("max_samples", "Maximum number of samples before splitting.", "n", 5000)
 
 PARAM_INT_IN ("min_samples", "Minimum number of samples before splitting.", "I", 100)
 
 PARAM_MODEL_IN (HoeffdingTreeModel, "input_model", "Input trained Hoeffding tree" " model.", "m")
 
 PARAM_MODEL_OUT (HoeffdingTreeModel, "output_model", "Output for trained " "Hoeffding tree model.", "M")
 
 PARAM_MATRIX_AND_INFO_IN ("test", "Testing dataset (may be categorical).", "T")
 
 PARAM_UROW_IN ("test_labels", "Labels of test data.", "L")
 
 PARAM_UROW_OUT ("predictions", "Matrix to output label predictions for test " "data into.", "p")
 
 PARAM_MATRIX_OUT ("probabilities", "In addition to predicting labels, provide " "rediction probabilities in this matrix.", "P")
 
 PARAM_STRING_IN ("numeric_split_strategy", "The splitting strategy to use for " "numeric features: 'domingos' or 'binary'.", "N", "binary")
 
 PARAM_FLAG ("batch_mode", "If true, samples will be considered in batch instead " "of as a stream. This generally results in better trees but at the cost of" " memory usage and runtime.", "b")
 
 PARAM_FLAG ("info_gain", "If set, information gain is used instead of Gini " "impurity for calculating Hoeffding bounds.", "i")
 
 PARAM_INT_IN ("passes", "Number of passes to take over the dataset.", "s", 1)
 
 PARAM_INT_IN ("bins", "If the 'domingos' split strategy is used, this specifies " "the number of bins for each numeric split.", "B", 10)
 
 PARAM_INT_IN ("observations_before_binning", "If the 'domingos' split strategy " "is used, this specifies the number of samples observed before binning is " "performed.", "o", 100)
 

Detailed Description

Author
Ryan Curtin

A command-line executable that can build a streaming decision tree.

mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.