mlpack  2.0.1
decision_stump.hpp
Go to the documentation of this file.
1 
14 #ifndef __MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP
15 #define __MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP
16 
17 #include <mlpack/core.hpp>
18 
19 namespace mlpack {
20 namespace decision_stump {
21 
35 template<typename MatType = arma::mat>
37 {
38  public:
48  DecisionStump(const MatType& data,
49  const arma::Row<size_t>& labels,
50  const size_t classes,
51  const size_t bucketSize = 10);
52 
64  DecisionStump(const DecisionStump<>& other,
65  const MatType& data,
66  const arma::Row<size_t>& labels,
67  const arma::rowvec& weights);
68 
74  DecisionStump();
75 
86  void Train(const MatType& data,
87  const arma::Row<size_t>& labels,
88  const size_t classes,
89  const size_t bucketSize);
90 
99  void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
100 
102  size_t SplitDimension() const { return splitDimension; }
104  size_t& SplitDimension() { return splitDimension; }
105 
107  const arma::vec& Split() const { return split; }
109  arma::vec& Split() { return split; }
110 
112  const arma::Col<size_t> BinLabels() const { return binLabels; }
114  arma::Col<size_t>& BinLabels() { return binLabels; }
115 
117  template<typename Archive>
118  void Serialize(Archive& ar, const unsigned int /* version */);
119 
120  private:
122  size_t classes;
124  size_t bucketSize;
125 
129  arma::vec split;
131  arma::Col<size_t> binLabels;
132 
141  template<bool UseWeights>
142  double SetupSplitDimension(const arma::rowvec& dimension,
143  const arma::Row<size_t>& labels,
144  const arma::rowvec& weightD);
145 
153  template<typename VecType>
154  void TrainOnDim(const VecType& dimension,
155  const arma::Row<size_t>& labels);
156 
161  void MergeRanges();
162 
169  template<typename VecType>
170  double CountMostFreq(const VecType& subCols);
171 
177  template<typename VecType>
178  int IsDistinct(const VecType& featureRow);
179 
189  template<bool UseWeights, typename VecType, typename WeightVecType>
190  double CalculateEntropy(const VecType& labels,
191  const WeightVecType& weights);
192 
202  template<bool UseWeights>
203  void Train(const MatType& data,
204  const arma::Row<size_t>& labels,
205  const arma::rowvec& weights);
206 };
207 
208 } // namespace decision_stump
209 } // namespace mlpack
210 
211 #include "decision_stump_impl.hpp"
212 
213 #endif
void MergeRanges()
After the "split" matrix has been set up, merge ranges with identical class labels.
void Classify(const MatType &test, arma::Row< size_t > &predictedLabels)
Classification function.
size_t splitDimension
Stores the value of the dimension on which to split.
Linear algebra utility functions, generally performed on matrices or vectors.
arma::Col< size_t > & BinLabels()
Modify the labels for each split bin (be careful!).
size_t SplitDimension() const
Access the splitting dimension.
int IsDistinct(const VecType &featureRow)
Returns 1 if all the values of featureRow are not same.
This class implements a decision stump.
double SetupSplitDimension(const arma::rowvec &dimension, const arma::Row< size_t > &labels, const arma::rowvec &weightD)
Sets up dimension as if it were splitting on it and finds entropy when splitting on dimension...
DecisionStump()
Create a decision stump without training.
const arma::vec & Split() const
Access the splitting values.
void Train(const MatType &data, const arma::Row< size_t > &labels, const size_t classes, const size_t bucketSize)
Train the decision stump on the given data.
arma::Col< size_t > binLabels
Stores the labels for each splitting bin.
void Serialize(Archive &ar, const unsigned int)
Serialize the decision stump.
size_t bucketSize
The minimum number of points in a bucket.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
double CalculateEntropy(const VecType &labels, const WeightVecType &weights)
Calculate the entropy of the given dimension.
arma::vec & Split()
Modify the splitting values (be careful!).
void TrainOnDim(const VecType &dimension, const arma::Row< size_t > &labels)
After having decided the dimension on which to split, train on that dimension.
size_t & SplitDimension()
Modify the splitting dimension (be careful!).
arma::vec split
Stores the splitting values after training.
const arma::Col< size_t > BinLabels() const
Access the labels for each split bin.
size_t classes
The number of classes (we must store this for boosting).
double CountMostFreq(const VecType &subCols)
Count the most frequently occurring element in subCols.