12 #ifndef MLPACK_CORE_DATA_LOAD_IMPL_HPP 13 #define MLPACK_CORE_DATA_LOAD_IMPL_HPP 26 #include <boost/algorithm/string/trim.hpp> 27 #include <boost/tokenizer.hpp> 28 #include <boost/algorithm/string.hpp> 37 template<
typename Tokenizer>
38 std::vector<std::string> ToTokens(Tokenizer& lineTok)
40 std::vector<std::string> tokens;
41 std::transform(std::begin(lineTok), std::end(lineTok),
42 std::back_inserter(tokens),
43 [&tokens](std::string
const &str)
45 std::string trimmedToken(str);
46 boost::trim(trimmedToken);
47 return std::move(trimmedToken);
54 void TransposeTokens(std::vector<std::vector<std::string>>
const &input,
55 std::vector<std::string>& output,
59 for (
size_t i = 0; i != input.size(); ++i)
61 output.emplace_back(input[i][index]);
67 template <
typename MatType>
68 bool inline inplace_transpose(MatType& X,
bool fatal)
75 catch (
const std::exception& e)
78 Log::Fatal <<
"\nTranspose Operation Failed.\n" 79 "Exception: " << e.what() << std::endl;
81 Log::Warn <<
"\nTranspose Operation Failed.\n" 82 "Exception: " << e.what() << std::endl;
89 bool Load(
const std::string& filename,
90 arma::Mat<eT>& matrix,
93 const arma::file_type inputLoadType)
99 #ifdef _WIN32 // Always open in binary mode on Windows. 100 stream.open(filename.c_str(), std::fstream::in | std::fstream::binary);
102 stream.open(filename.c_str(), std::fstream::in);
104 if (!stream.is_open())
108 Log::Fatal <<
"Cannot open file '" << filename <<
"'. " << std::endl;
110 Log::Warn <<
"Cannot open file '" << filename <<
"'; load failed." 116 arma::file_type loadType = inputLoadType;
117 std::string stringType;
118 if (inputLoadType == arma::auto_detect)
123 if (loadType == arma::file_type_unknown)
127 Log::Fatal <<
"Unable to detect type of '" << filename <<
"'; " 128 <<
"incorrect extension?" << std::endl;
130 Log::Warn <<
"Unable to detect type of '" << filename <<
"'; load " 131 <<
" failed. Incorrect extension?" << std::endl;
139 #ifndef ARMA_USE_HDF5 140 if (inputLoadType == arma::hdf5_binary)
145 Log::Fatal <<
"Attempted to load '" << filename <<
"' as HDF5 data, but " 146 <<
"Armadillo was compiled without HDF5 support. Load failed." 149 Log::Warn <<
"Attempted to load '" << filename <<
"' as HDF5 data, but " 150 <<
"Armadillo was compiled without HDF5 support. Load failed." 158 if (loadType == arma::raw_binary)
159 Log::Warn <<
"Loading '" << filename <<
"' as " << stringType <<
"; " 160 <<
"but this may not be the actual filetype!" << std::endl;
162 Log::Info <<
"Loading '" << filename <<
"' as " << stringType <<
". " 167 if (loadType != arma::hdf5_binary)
168 success = matrix.load(stream, loadType);
170 success = matrix.load(filename, loadType);
177 Log::Fatal <<
"Loading from '" << filename <<
"' failed." << std::endl;
179 Log::Warn <<
"Loading from '" << filename <<
"' failed." << std::endl;
184 Log::Info <<
"Size is " << (transpose ? matrix.n_cols : matrix.n_rows)
185 <<
" x " << (transpose ? matrix.n_rows : matrix.n_cols) <<
".\n";
190 success = inplace_transpose(matrix, fatal);
200 template<
typename eT,
typename PolicyType>
201 bool Load(
const std::string& filename,
202 arma::Mat<eT>& matrix,
205 const bool transpose)
211 std::string extension = Extension(filename);
215 stream.open(filename.c_str(), std::fstream::in);
217 if (!stream.is_open())
221 Log::Fatal <<
"Cannot open file '" << filename <<
"'. " << std::endl;
223 Log::Warn <<
"Cannot open file '" << filename <<
"'; load failed." 229 if (extension ==
"csv" || extension ==
"tsv" || extension ==
"txt")
231 Log::Info <<
"Loading '" << filename <<
"' as CSV dataset. " << std::flush;
235 loader.
Load(matrix, info, transpose);
237 catch (std::exception& e)
248 else if (extension ==
"arff")
250 Log::Info <<
"Loading '" << filename <<
"' as ARFF dataset. " 259 return inplace_transpose(matrix, fatal);
262 catch (std::exception& e)
278 Log::Fatal <<
"Unable to detect type of '" << filename <<
"'; " 279 <<
"incorrect extension?" << std::endl;
281 Log::Warn <<
"Unable to detect type of '" << filename <<
"'; load failed." 282 <<
" Incorrect extension?" << std::endl;
287 Log::Info <<
"Size is " << (transpose ? matrix.n_cols : matrix.n_rows)
288 <<
" x " << (transpose ? matrix.n_rows : matrix.n_cols) <<
".\n";
296 template <
typename eT>
297 bool Load(
const std::string& filename,
298 arma::SpMat<eT>& matrix,
300 const bool transpose)
305 std::string extension = Extension(filename);
309 #ifdef _WIN32 // Always open in binary mode on Windows. 310 stream.open(filename.c_str(), std::fstream::in | std::fstream::binary);
312 stream.open(filename.c_str(), std::fstream::in);
314 if (!stream.is_open())
318 Log::Fatal <<
"Cannot open file '" << filename <<
"'. " << std::endl;
320 Log::Warn <<
"Cannot open file '" << filename <<
"'; load failed." 326 bool unknownType =
false;
327 arma::file_type loadType;
328 std::string stringType;
330 if (extension ==
"tsv" || extension ==
"txt")
332 loadType = arma::coord_ascii;
333 stringType =
"Coordinate Formatted Data for Sparse Matrix";
335 else if (extension ==
"bin")
339 const std::string ARMA_SPM_BIN =
"ARMA_SPM_BIN";
340 std::string rawHeader(ARMA_SPM_BIN.length(),
'\0');
342 std::streampos pos = stream.tellg();
344 stream.read(&rawHeader[0], std::streamsize(ARMA_SPM_BIN.length()));
348 if (rawHeader == ARMA_SPM_BIN)
350 stringType =
"Armadillo binary formatted data for sparse matrix";
351 loadType = arma::arma_binary;
355 stringType =
"raw binary formatted data";
356 loadType = arma::raw_binary;
362 loadType = arma::raw_binary;
371 Log::Fatal <<
"Unable to detect type of '" << filename <<
"'; " 372 <<
"incorrect extension?" << std::endl;
374 Log::Warn <<
"Unable to detect type of '" << filename <<
"'; load failed." 375 <<
" Incorrect extension?" << std::endl;
381 if (loadType == arma::raw_binary)
382 Log::Warn <<
"Loading '" << filename <<
"' as " << stringType <<
"; " 383 <<
"but this may not be the actual filetype!" << std::endl;
385 Log::Info <<
"Loading '" << filename <<
"' as " << stringType <<
". " 390 success = matrix.load(stream, loadType);
397 Log::Fatal <<
"Loading from '" << filename <<
"' failed." << std::endl;
399 Log::Warn <<
"Loading from '" << filename <<
"' failed." << std::endl;
404 Log::Info <<
"Size is " << (transpose ? matrix.n_cols : matrix.n_rows)
405 <<
" x " << (transpose ? matrix.n_rows : matrix.n_cols) <<
".\n";
410 success = inplace_transpose(matrix, fatal);
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Definition: dataset_mapper.hpp:41
Load the csv file.This class use boost::spirit to implement the parser, please refer to following lin...
Definition: load_csv.hpp:36
static void Start(const std::string &name)
Start the given timer.
Definition: timers.cpp:28
void LoadARFF(const std::string &filename, arma::Mat< eT > &matrix)
A utility function to load an ARFF dataset as numeric features (that is, as an Armadillo matrix witho...
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void Load(arma::Mat< T > &inout, DatasetMapper< PolicyType > &infoSet, const bool transpose=true)
Load the file into the given matrix with the given DatasetMapper object.
Definition: load_csv.hpp:55
Definition: dtree_impl.hpp:21
std::string GetStringType(const arma::file_type &type)
Given a file type, return a logical name corresponding to that file type.
Definition: detect_file_type.cpp:30
static MLPACK_EXPORT util::PrefixedOutStream Warn
Prints warning messages prefixed with [WARN ].
Definition: log.hpp:87
arma::file_type AutoDetect(std::fstream &stream, const std::string &filename)
Attempt to auto-detect the type of a file given its extension, and by inspecting the parts of the fil...
Definition: detect_file_type.cpp:192
static void Stop(const std::string &name)
Stop the given timer.
Definition: timers.cpp:36
bool Load(const std::string &filename, arma::Mat< eT > &matrix, const bool fatal=false, const bool transpose=true, const arma::file_type inputLoadType=arma::auto_detect)
Loads a matrix from file, guessing the filetype from the extension.
Definition: load_impl.hpp:89
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84