mlpack
|
This class implements a generic decision tree learner. More...
#include <decision_tree.hpp>
Public Types | |
typedef NumericSplitType< FitnessFunction > | NumericSplit |
Allow access to the numeric split type. | |
typedef CategoricalSplitType< FitnessFunction > | CategoricalSplit |
Allow access to the categorical split type. | |
typedef DimensionSelectionType | DimensionSelection |
Allow access to the dimension selection type. | |
Public Member Functions | |
template<typename MatType , typename LabelsType > | |
DecisionTree (MatType data, const data::DatasetInfo &datasetInfo, LabelsType labels, const size_t numClasses, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType()) | |
Construct the decision tree on the given data and labels, where the data can be both numeric and categorical. More... | |
template<typename MatType , typename LabelsType > | |
DecisionTree (MatType data, LabelsType labels, const size_t numClasses, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType()) | |
Construct the decision tree on the given data and labels, assuming that the data is all of the numeric type. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
DecisionTree (MatType data, const data::DatasetInfo &datasetInfo, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType(), const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) | |
Construct the decision tree on the given data and labels with weights, where the data can be both numeric and categorical. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
DecisionTree (const DecisionTree &other, MatType data, const data::DatasetInfo &datasetInfo, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) | |
Take ownership of another decision tree and train on the given data and labels with weights, where the data can be both numeric and categorical. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
DecisionTree (MatType data, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType(), const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) | |
Construct the decision tree on the given data and labels with weights, assuming that the data is all of the numeric type. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
DecisionTree (const DecisionTree &other, MatType data, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType(), const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) | |
Take ownership of another decision tree and train on the given data and labels with weights, assuming that the data is all of the numeric type. More... | |
DecisionTree (const size_t numClasses=1) | |
Construct a decision tree without training it. More... | |
DecisionTree (const DecisionTree &other) | |
Copy another tree. More... | |
DecisionTree (DecisionTree &&other) | |
Take ownership of another tree. More... | |
DecisionTree & | operator= (const DecisionTree &other) |
Copy another tree. More... | |
DecisionTree & | operator= (DecisionTree &&other) |
Take ownership of another tree. More... | |
~DecisionTree () | |
Clean up memory. | |
template<typename MatType , typename LabelsType > | |
double | Train (MatType data, const data::DatasetInfo &datasetInfo, LabelsType labels, const size_t numClasses, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType()) |
Train the decision tree on the given data. More... | |
template<typename MatType , typename LabelsType > | |
double | Train (MatType data, LabelsType labels, const size_t numClasses, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType()) |
Train the decision tree on the given data, assuming that all dimensions are numeric. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
double | Train (MatType data, const data::DatasetInfo &datasetInfo, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType(), const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) |
Train the decision tree on the given weighted data. More... | |
template<typename MatType , typename LabelsType , typename WeightsType > | |
double | Train (MatType data, LabelsType labels, const size_t numClasses, WeightsType weights, const size_t minimumLeafSize=10, const double minimumGainSplit=1e-7, const size_t maximumDepth=0, DimensionSelectionType dimensionSelector=DimensionSelectionType(), const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > *=0) |
Train the decision tree on the given weighted data, assuming that all dimensions are numeric. More... | |
template<typename VecType > | |
size_t | Classify (const VecType &point) const |
Classify the given point, using the entire tree. More... | |
template<typename VecType > | |
void | Classify (const VecType &point, size_t &prediction, arma::vec &probabilities) const |
Classify the given point and also return estimates of the probability for each class in the given vector. More... | |
template<typename MatType > | |
void | Classify (const MatType &data, arma::Row< size_t > &predictions) const |
Classify the given points, using the entire tree. More... | |
template<typename MatType > | |
void | Classify (const MatType &data, arma::Row< size_t > &predictions, arma::mat &probabilities) const |
Classify the given points and also return estimates of the probabilities for each class in the given matrix. More... | |
template<typename Archive > | |
void | serialize (Archive &ar, const uint32_t) |
Serialize the tree. | |
size_t | NumChildren () const |
Get the number of children. | |
const DecisionTree & | Child (const size_t i) const |
Get the child of the given index. | |
DecisionTree & | Child (const size_t i) |
Modify the child of the given index (be careful!). | |
size_t | SplitDimension () const |
Get the split dimension (only meaningful if this is a non-leaf in a trained tree). More... | |
template<typename VecType > | |
size_t | CalculateDirection (const VecType &point) const |
Given a point and that this node is not a leaf, calculate the index of the child node this point would go towards. More... | |
size_t | NumClasses () const |
Get the number of classes in the tree. | |
This class implements a generic decision tree learner.
Its behavior can be controlled via its template arguments.
The class inherits from the auxiliary split information in order to prevent an empty auxiliary split information struct from taking any extra size.
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | MatType | data, |
const data::DatasetInfo & | datasetInfo, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() |
||
) |
Construct the decision tree on the given data and labels, where the data can be both numeric and categorical.
Construct and train without weight.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data or labels are no longer needed to avoid copies.
data | Dataset to train on. |
datasetInfo | Type information for each dimension of the dataset. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | MatType | data, |
LabelsType | labels, | ||
const size_t | numClasses, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() |
||
) |
Construct the decision tree on the given data and labels, assuming that the data is all of the numeric type.
Construct and train.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data or labels are no longer needed to avoid copies.
data | Dataset to train on. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | MatType | data, |
const data::DatasetInfo & | datasetInfo, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Construct the decision tree on the given data and labels with weights, where the data can be both numeric and categorical.
Construct and train with weights.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
data | Dataset to train on. |
datasetInfo | Type information for each dimension of the dataset. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | The weight list of given label. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | const DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & | other, |
MatType | data, | ||
const data::DatasetInfo & | datasetInfo, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Take ownership of another decision tree and train on the given data and labels with weights, where the data can be both numeric and categorical.
Construct and train with weights.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
other | Tree to take ownership of. |
data | Dataset to train on. |
datasetInfo | Type information for each dimension of the dataset. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | The weight list of given label. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | MatType | data, |
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Construct the decision tree on the given data and labels with weights, assuming that the data is all of the numeric type.
Construct and train with weights.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
data | Dataset to train on. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | The Weight list of given labels. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | const DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & | other, |
MatType | data, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Take ownership of another decision tree and train on the given data and labels with weights, assuming that the data is all of the numeric type.
Construct and train with weights.
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
other | Tree to take ownership of. |
data | Dataset to train on. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | The Weight list of given labels. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | const size_t | numClasses = 1 | ) |
Construct a decision tree without training it.
Construct, don't train.
It will be a leaf node with equal probabilities for each class.
numClasses | Number of classes in the dataset. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | const DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & | other | ) |
Copy another tree.
This may use a lot of memory—be sure that it's what you want to do.
other | Tree to copy. |
mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::DecisionTree | ( | DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > && | other | ) |
Take ownership of another tree.
other | Tree to take ownership of. |
size_t mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::CalculateDirection | ( | const VecType & | point | ) | const |
Given a point and that this node is not a leaf, calculate the index of the child node this point would go towards.
This method is primarily used by the Classify() function, but it can be used in a standalone sense too.
point | Point to classify. |
size_t mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Classify | ( | const VecType & | point | ) | const |
Classify the given point, using the entire tree.
Return the class.
The predicted label is returned.
point | Point to classify. |
void mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Classify | ( | const VecType & | point, |
size_t & | prediction, | ||
arma::vec & | probabilities | ||
) | const |
Classify the given point and also return estimates of the probability for each class in the given vector.
Return class probabilities for a given point.
point | Point to classify. |
prediction | This will be set to the predicted class of the point. |
probabilities | This will be filled with class probabilities for the point. |
void mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Classify | ( | const MatType & | data, |
arma::Row< size_t > & | predictions | ||
) | const |
Classify the given points, using the entire tree.
Return the class for a set of points.
The predicted labels for each point are stored in the given vector.
data | Set of points to classify. |
predictions | This will be filled with predictions for each point. |
void mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Classify | ( | const MatType & | data, |
arma::Row< size_t > & | predictions, | ||
arma::mat & | probabilities | ||
) | const |
Classify the given points and also return estimates of the probabilities for each class in the given matrix.
Return the class probabilities for a set of points.
The predicted labels for each point are stored in the given vector.
data | Set of points to classify. |
predictions | This will be filled with predictions for each point. |
probabilities | This will be filled with class probabilities for each point. |
DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::operator= | ( | const DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & | other | ) |
Copy another tree.
This may use a lot of memory—be sure that it's what you want to do.
other | Tree to copy. |
DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > & mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::operator= | ( | DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion > && | other | ) |
Take ownership of another tree.
other | Tree to take ownership of. |
|
inline |
Get the split dimension (only meaningful if this is a non-leaf in a trained tree).
double mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Train | ( | MatType | data, |
const data::DatasetInfo & | datasetInfo, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() |
||
) |
Train the decision tree on the given data.
Train on the given data.
This will overwrite the existing model. The data may have numeric and categorical types, specified by the datasetInfo parameter. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data or labels are no longer needed to avoid copies.
data | Dataset to train on. |
datasetInfo | Type information for each dimension. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
double mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Train | ( | MatType | data, |
LabelsType | labels, | ||
const size_t | numClasses, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() |
||
) |
Train the decision tree on the given data, assuming that all dimensions are numeric.
Train on the given data, assuming all dimensions are numeric.
This will overwrite the given model. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data or labels are no longer needed to avoid copies.
data | Dataset to train on. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
double mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Train | ( | MatType | data, |
const data::DatasetInfo & | datasetInfo, | ||
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Train the decision tree on the given weighted data.
Train on the given weighted data.
This will overwrite the existing model. The data may have numeric and categorical types, specified by the datasetInfo parameter. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
data | Dataset to train on. |
datasetInfo | Type information for each dimension. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | Weights of all the labels |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |
double mlpack::tree::DecisionTree< FitnessFunction, NumericSplitType, CategoricalSplitType, DimensionSelectionType, NoRecursion >::Train | ( | MatType | data, |
LabelsType | labels, | ||
const size_t | numClasses, | ||
WeightsType | weights, | ||
const size_t | minimumLeafSize = 10 , |
||
const double | minimumGainSplit = 1e-7 , |
||
const size_t | maximumDepth = 0 , |
||
DimensionSelectionType | dimensionSelector = DimensionSelectionType() , |
||
const std::enable_if_t< arma::is_arma_type< typename std::remove_reference< WeightsType >::type >::value > * | = 0 |
||
) |
Train the decision tree on the given weighted data, assuming that all dimensions are numeric.
Train on the given weighted data.
This will overwrite the given model. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
Use std::move if data, labels or weights are no longer needed to avoid copies.
data | Dataset to train on. |
labels | Labels for each training point. |
numClasses | Number of classes in the dataset. |
weights | Weights of all the labels |
minimumLeafSize | Minimum number of points in each leaf node. |
minimumGainSplit | Minimum gain for the node to split. |
maximumDepth | Maximum depth for the tree. |
dimensionSelector | Instantiated dimension selection policy. |