Aakash-kaushik/mlpack/ub__tree__split__impl_8hpp_source.html

 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_UB_TREE_SPLIT_IMPL_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_UB_TREE_SPLIT_IMPL_HPP

 #include "ub_tree_split.hpp"
 #include <mlpack/core/tree/bounds.hpp>

 namespace mlpack {
 namespace tree {

 template<typename BoundType, typename MatType>
 bool UBTreeSplit<BoundType, MatType>::SplitNode(BoundType& bound,
                                                 MatType& data,
                                                 const size_t begin,
                                                 const size_t count,
                                                 SplitInfo& splitInfo)
 {
   constexpr size_t order = sizeof(AddressElemType) * CHAR_BIT;
   if (begin == 0 && count == data.n_cols)
   {
     // Calculate all addresses.
     InitializeAddresses(data);

     // Probably this is not a good idea. Maybe it is better to get
     // a number of distinct samples and find the median.
     std::sort(addresses.begin(), addresses.end(), ComparePair);

     // Save the vector in order to rearrange the dataset later.
     splitInfo.addresses = &addresses;
   }
   else
   {
     // We have already rearranged the dataset.
     splitInfo.addresses = NULL;
   }

   // The bound shouldn't contain too many subrectangles.
   // In order to minimize the number of hyperrectangles we set last bits
   // of the last address in the node to 1 and last bits of the first  address
   // in the next node to zero in such a way that the ordering is not
   // disturbed.
   if (begin + count < data.n_cols)
   {
     // Omit leading equal bits.
     size_t row = 0;
     arma::Col<AddressElemType>& lo = addresses[begin + count - 1].first;
     const arma::Col<AddressElemType>& hi = addresses[begin + count].first;

     for (; row < data.n_rows; row++)
       if (lo[row] != hi[row])
         break;

     size_t bit = 0;

     for (; bit < order; bit++)
       if ((lo[row] & ((AddressElemType) 1 << (order - 1 - bit))) !=
           (hi[row] & ((AddressElemType) 1 << (order - 1 - bit))))
         break;

     bit++;

     // Replace insignificant bits.
     if (bit == order)
     {
       bit = 0;
       row++;
     }
     else
     {
       for (; bit < order; bit++)
         lo[row] |= ((AddressElemType) 1 << (order - 1 - bit));
       row++;
     }

     for (; row < data.n_rows; row++)
       for (; bit < order; bit++)
         lo[row] |= ((AddressElemType) 1 << (order - 1 - bit));
   }

   // The bound shouldn't contain too many subrectangles.
   // In order to minimize the number of hyperrectangles we set last bits
   // of the first address in the next node to 0 and last bits of the last
   // address in the previous node to 1 in such a way that the ordering is not
   // disturbed.
   if (begin > 0)
   {
     // Omit leading equal bits.
     size_t row = 0;
     const arma::Col<AddressElemType>& lo = addresses[begin - 1].first;
     arma::Col<AddressElemType>& hi = addresses[begin].first;

     for (; row < data.n_rows; row++)
       if (lo[row] != hi[row])
         break;

     size_t bit = 0;

     for (; bit < order; bit++)
       if ((lo[row] & ((AddressElemType) 1 << (order - 1 - bit))) !=
           (hi[row] & ((AddressElemType) 1 << (order - 1 - bit))))
         break;

     bit++;

     // Replace insignificant bits.
     if (bit == order)
     {
       bit = 0;
       row++;
     }
     else
     {
       for (; bit < order; bit++)
         hi[row] &= ~((AddressElemType) 1 << (order - 1 - bit));
       row++;
     }

     for (; row < data.n_rows; row++)
       for (; bit < order; bit++)
         hi[row] &= ~((AddressElemType) 1 << (order - 1 - bit));
   }

   // Set the minimum and the maximum addresses.
   for (size_t k = 0; k < bound.Dim(); ++k)
   {
     bound.LoAddress()[k] = addresses[begin].first[k];
     bound.HiAddress()[k] = addresses[begin + count - 1].first[k];
   }
   bound.UpdateAddressBounds(data.cols(begin, begin + count - 1));

   return true;
 }

 template<typename BoundType, typename MatType>
 void UBTreeSplit<BoundType, MatType>::InitializeAddresses(const MatType& data)
 {
   addresses.resize(data.n_cols);

   // Calculate all addresses.
   for (size_t i = 0; i < data.n_cols; ++i)
   {
     addresses[i].first.zeros(data.n_rows);
     bound::addr::PointToAddress(addresses[i].first, data.col(i));
     addresses[i].second = i;
   }
 }

 template<typename BoundType, typename MatType>
 size_t UBTreeSplit<BoundType, MatType>::PerformSplit(
     MatType& data,
     const size_t begin,
     const size_t count,
     const SplitInfo& splitInfo)
 {
   // For the first time we have to rearrange the dataset.
   if (splitInfo.addresses)
   {
     std::vector<size_t> newFromOld(data.n_cols);
     std::vector<size_t> oldFromNew(data.n_cols);

     for (size_t i = 0; i < splitInfo.addresses->size(); ++i)
     {
       newFromOld[i] = i;
       oldFromNew[i] = i;
     }

     for (size_t i = 0; i < splitInfo.addresses->size(); ++i)
     {
       size_t index = (*splitInfo.addresses)[i].second;
       size_t oldI = oldFromNew[i];
       size_t newIndex = newFromOld[index];

       data.swap_cols(i, newFromOld[index]);

       size_t tmp = newFromOld[index];
       newFromOld[index] = i;
       newFromOld[oldI] = tmp;

       tmp = oldFromNew[i];
       oldFromNew[i] = oldFromNew[newIndex];
       oldFromNew[newIndex] = tmp;
     }
   }

   // Since the dataset is sorted we can easily obtain the split column.
   return begin + count / 2;
 }

 template<typename BoundType, typename MatType>
 size_t UBTreeSplit<BoundType, MatType>::PerformSplit(
     MatType& data,
     const size_t begin,
     const size_t count,
     const SplitInfo& splitInfo,
     std::vector<size_t>& oldFromNew)
 {
   // For the first time we have to rearrange the dataset.
   if (splitInfo.addresses)
   {
     std::vector<size_t> newFromOld(data.n_cols);

     for (size_t i = 0; i < splitInfo.addresses->size(); ++i)
       newFromOld[i] = i;

     for (size_t i = 0; i < splitInfo.addresses->size(); ++i)
     {
       size_t index = (*splitInfo.addresses)[i].second;
       size_t oldI = oldFromNew[i];
       size_t newIndex = newFromOld[index];

       data.swap_cols(i, newFromOld[index]);

       size_t tmp = newFromOld[index];
       newFromOld[index] = i;
       newFromOld[oldI] = tmp;

       tmp = oldFromNew[i];
       oldFromNew[i] = oldFromNew[newIndex];
       oldFromNew[newIndex] = tmp;
     }
   }

   // Since the dataset is sorted we can easily obtain the split column.
   return begin + count / 2;
 }

 } // namespace tree
 } // namespace mlpack

 #endif
ub_tree_split.hpp

data

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1

bounds.hpp
Bounds that are useful for binary space partitioning trees.