The AllCategoricalSplit is a splitting function that will split categorical features into many children: one child for each category.
More...
#include <all_categorical_split.hpp>
|
template<bool UseWeights, typename VecType , typename LabelsType , typename WeightVecType > |
static double | SplitIfBetter (const double bestGain, const VecType &data, const size_t numCategories, const LabelsType &labels, const size_t numClasses, const WeightVecType &weights, const size_t minimumLeafSize, const double minimumGainSplit, arma::vec &splitInfo, AuxiliarySplitInfo &aux) |
| Check if we can split a node. More...
|
|
template<bool UseWeights, typename VecType , typename ResponsesType , typename WeightVecType > |
static double | SplitIfBetter (const double bestGain, const VecType &data, const size_t numCategories, const ResponsesType &responses, const WeightVecType &weights, const size_t minimumLeafSize, const double minimumGainSplit, double &splitInfo, AuxiliarySplitInfo &aux) |
| Check if we can split a node. More...
|
|
static size_t | NumChildren (const double &splitInfo, const AuxiliarySplitInfo &) |
| Return the number of children in the split. More...
|
|
template<typename ElemType > |
static size_t | CalculateDirection (const ElemType &point, const double &splitInfo, const AuxiliarySplitInfo &) |
| Calculate the direction a point should percolate to. More...
|
|
template<typename FitnessFunction>
class mlpack::tree::AllCategoricalSplit< FitnessFunction >
The AllCategoricalSplit is a splitting function that will split categorical features into many children: one child for each category.
This is a generic splitting strategy and can be used for both regression and classification trees.
- Template Parameters
-
FitnessFunction | Fitness function to evaluate gain with. |
◆ CalculateDirection()
template<typename FitnessFunction >
template<typename ElemType >
Calculate the direction a point should percolate to.
- Parameters
-
point | the Point to use. |
splitInfo | Auxiliary information for the split. |
* | (aux) Auxiliary information for the split (Unused). |
◆ NumChildren()
template<typename FitnessFunction >
Return the number of children in the split.
- Parameters
-
splitInfo | Auxiliary information for the split. |
* | (aux) Auxiliary information for the split (Unused). |
◆ SplitIfBetter() [1/2]
template<typename FitnessFunction >
template<bool UseWeights, typename VecType , typename LabelsType , typename WeightVecType >
double mlpack::tree::AllCategoricalSplit< FitnessFunction >::SplitIfBetter |
( |
const double |
bestGain, |
|
|
const VecType & |
data, |
|
|
const size_t |
numCategories, |
|
|
const LabelsType & |
labels, |
|
|
const size_t |
numClasses, |
|
|
const WeightVecType & |
weights, |
|
|
const size_t |
minimumLeafSize, |
|
|
const double |
minimumGainSplit, |
|
|
arma::vec & |
splitInfo, |
|
|
AuxiliarySplitInfo & |
aux |
|
) |
| |
|
static |
Check if we can split a node.
If we can split a node in a way that improves on 'bestGain', then we return the improved gain. Otherwise we return the value 'bestGain'. If a split is made, then splitInfo and aux may be modified. For this particular split type, aux will be empty and splitInfo will store the number of children of the node.
This overload is used only for classification.
- Parameters
-
bestGain | Best gain seen so far (we'll only split if we find gain better than this). |
data | The dimension of data points to check for a split in. |
numCategories | Number of categories in the categorical data. |
labels | Labels for each point. |
numClasses | Number of classes in the dataset. |
weights | Weights associated with labels. |
minimumLeafSize | Minimum number of points in a leaf node for splitting. |
splitInfo | Stores split information on a successful split. |
minimumGainSplit | Minimum gain split. |
aux | Auxiliary split information, which may be modified on a successful split. |
◆ SplitIfBetter() [2/2]
template<typename FitnessFunction >
template<bool UseWeights, typename VecType , typename ResponsesType , typename WeightVecType >
double mlpack::tree::AllCategoricalSplit< FitnessFunction >::SplitIfBetter |
( |
const double |
bestGain, |
|
|
const VecType & |
data, |
|
|
const size_t |
numCategories, |
|
|
const ResponsesType & |
responses, |
|
|
const WeightVecType & |
weights, |
|
|
const size_t |
minimumLeafSize, |
|
|
const double |
minimumGainSplit, |
|
|
double & |
splitInfo, |
|
|
AuxiliarySplitInfo & |
aux |
|
) |
| |
|
static |
Check if we can split a node.
If we can split a node in a way that improves on 'bestGain', then we return the improved gain. Otherwise we return the value 'bestGain'. If a split is made, then splitInfo and aux may be modified. For this particular split type, aux will be empty and splitInfo will store the number of children of the node.
This overload is used only for regression.
- Parameters
-
bestGain | Best gain seen so far (we'll only split if we find gain better than this). |
data | The dimension of data points to check for a split in. |
numCategories | Number of categories in the categorical data. |
responses | Responses for each point. |
weights | Weights associated with responses. |
minimumLeafSize | Minimum number of points in a leaf node for splitting. |
splitInfo | Stores split information on a successful split. |
minimumGainSplit | Minimum gain split. |
aux | Auxiliary split information, which may be modified on a successful split. |
The documentation for this class was generated from the following files: