mlpack
|
#include <mlpack/core.hpp>
#include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
#include <mlpack/core/data/tokenizers/split_by_any_of.hpp>
#include <mlpack/core/data/tokenizers/char_extract.hpp>
#include <mlpack/core/data/string_encoding.hpp>
#include <mlpack/core/data/string_encoding_policies/dictionary_encoding_policy.hpp>
#include <mlpack/core/data/string_encoding_policies/bag_of_words_encoding_policy.hpp>
#include <mlpack/core/data/string_encoding_policies/tf_idf_encoding_policy.hpp>
#include <memory>
#include "test_catch_tools.hpp"
#include "catch.hpp"
#include "serialization.hpp"
Functions | |
template<typename ValueType > | |
void | CheckVectors (const vector< vector< ValueType >> &a, const vector< vector< ValueType >> &b, const ValueType tolerance=1e-5) |
Check the values of two 2D vectors. | |
TEST_CASE ("DictionaryEncodingTest", "[StringEncodingTest]") | |
Test the dictionary encoding algorithm. | |
TEST_CASE ("UnicodeDictionaryEncodingTest", "[StringEncodingTest]") | |
Test the dictionary encoding algorithm with unicode characters. | |
TEST_CASE ("OnePassDictionaryEncodingTest", "[StringEncodingTest]") | |
Test the one pass modification of the dictionary encoding algorithm. | |
TEST_CASE ("SplitByAnyOfTokenizerTest", "[StringEncodingTest]") | |
Test the SplitByAnyOf tokenizer. | |
TEST_CASE ("SplitByAnyOfTokenizerUnicodeTest", "[StringEncodingTest]") | |
Test the SplitByAnyOf tokenizer in case of unicode characters. | |
TEST_CASE ("DictionaryEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the CharExtract tokenizer. | |
TEST_CASE ("OnePassDictionaryEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the one pass modification of the dictionary encoding algorithm in case of individual character encoding. | |
TEST_CASE ("StringEncodingCopyTest", "[StringEncodingTest]") | |
Test the functionality of copy constructor. | |
TEST_CASE ("StringEncodingMoveTest", "[StringEncodingTest]") | |
Test the move assignment operator. | |
template<typename TokenType > | |
void | CheckDictionaries (const StringEncodingDictionary< TokenType > &expected, const StringEncodingDictionary< TokenType > &obtained) |
The function checks that the given dictionaries contain the same data. | |
template<> | |
void | CheckDictionaries (const StringEncodingDictionary< boost::string_view > &expected, const StringEncodingDictionary< boost::string_view > &obtained) |
This is a specialization of the CheckDictionaries() function for the boost::string_view token type. | |
template<> | |
void | CheckDictionaries (const StringEncodingDictionary< int > &expected, const StringEncodingDictionary< int > &obtained) |
This is a specialization of the CheckDictionaries() function for the integer token type. | |
TEST_CASE ("StringEncodingDictionarySerialization", "[StringEncodingTest]") | |
Serialization test for the general template of the StringEncodingDictionary class. | |
TEST_CASE ("SplitByAnyOfDictionaryEncodingSerialization", "[StringEncodingTest]") | |
Serialization test for the dictionary encoding algorithm with the SplitByAnyOf tokenizer. | |
TEST_CASE ("CharExtractDictionaryEncodingSerialization", "[StringEncodingTest]") | |
Serialization test for the dictionary encoding algorithm with the CharExtract tokenizer. | |
TEST_CASE ("BagOfWordsEncodingTest", "[StringEncodingTest]") | |
Test the Bag of Words encoding algorithm. | |
TEST_CASE ("VectorBagOfWordsEncodingTest", "[StringEncodingTest]") | |
Test the Bag of Words encoding algorithm. More... | |
TEST_CASE ("BagOfWordsEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Bag of Words algorithm for individual characters. | |
TEST_CASE ("VectorBagOfWordsEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Bag of Words encoding algorithm in case of individual characters encoding. More... | |
TEST_CASE ("RawCountSmoothIdfEncodingTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the smooth inverse document frequency type. More... | |
TEST_CASE ("VectorRawCountSmoothIdfEncodingTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the smooth inverse document frequency type. More... | |
TEST_CASE ("RawCountSmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the smooth inverse document frequency type. More... | |
TEST_CASE ("VectorRawCountSmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the smooth inverse document frequency type. More... | |
TEST_CASE ("TfIdfRawCountEncodingTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the non-smooth inverse document frequency type. | |
TEST_CASE ("VectorTfIdfRawCountEncodingTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the non-smooth inverse document frequency type. More... | |
TEST_CASE ("RawCountTfIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the non-smooth inverse document frequency type. | |
TEST_CASE ("VectorRawCountTfIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the non-smooth inverse document frequency type. More... | |
TEST_CASE ("BinarySmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the binary term frequency type and the smooth inverse document frequency type. | |
TEST_CASE ("VectorBinarySmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the binary term frequency type and the smooth inverse document frequency type. More... | |
TEST_CASE ("BinaryTfIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the binary term frequency type and the non-smooth inverse document frequency type. | |
TEST_CASE ("SublinearSmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the sublinear term frequency type and the smooth inverse document frequency type. | |
TEST_CASE ("SublinearTfIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the sublinear term frequency type and the non-smooth inverse document frequency type. | |
TEST_CASE ("TermFrequencySmoothIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the standard term frequency type and the smooth inverse document frequency type. | |
TEST_CASE ("TermFrequencyTfIdfEncodingIndividualCharactersTest", "[StringEncodingTest]") | |
Test the Tf-Idf encoding algorithm for individual characters with the standard term frequency type and the non-smooth inverse document frequency type. | |
TEST_CASE ("SplitByAnyOfTfIdfEncodingSerialization", "[StringEncodingTest]") | |
Serialization test for the Tf-Idf encoding algorithm with the SplitByAnyOf tokenizer. | |
Tests for the StringEncoding class.
mlpack is free software; you may redistribute it and/or modify it under the terms of the 3-clause BSD license. You should have received a copy of the 3-clause BSD license along with mlpack. If not, see http://www.opensource.org/licenses/BSD-3-Clause for more information.
TEST_CASE | ( | "VectorBagOfWordsEncodingTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Bag of Words encoding algorithm.
The output is saved into a vector.
TEST_CASE | ( | "VectorBagOfWordsEncodingIndividualCharactersTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Bag of Words encoding algorithm in case of individual characters encoding.
The output type is vector<vector<size_t>>.
TEST_CASE | ( | "RawCountSmoothIdfEncodingTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the smooth inverse document frequency type.
These parameters are the default ones.
TEST_CASE | ( | "VectorRawCountSmoothIdfEncodingTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the smooth inverse document frequency type.
These parameters are the default ones. The output type is vector<vector<double>>.
TEST_CASE | ( | "RawCountSmoothIdfEncodingIndividualCharactersTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the smooth inverse document frequency type.
These parameters are the default ones.
TEST_CASE | ( | "VectorRawCountSmoothIdfEncodingIndividualCharactersTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the smooth inverse document frequency type.
These parameters are the default ones. The output type is vector<vector<double>>.
TEST_CASE | ( | "VectorTfIdfRawCountEncodingTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm with the raw count term frequency type and the non-smooth inverse document frequency type.
The output type is vector<vector<double>>.
TEST_CASE | ( | "VectorRawCountTfIdfEncodingIndividualCharactersTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm for individual characters with the raw count term frequency type and the non-smooth inverse document frequency type.
The output type is vector<vector<double>>.
TEST_CASE | ( | "VectorBinarySmoothIdfEncodingIndividualCharactersTest" | , |
"" | [StringEncodingTest] | ||
) |
Test the Tf-Idf encoding algorithm for individual characters with the binary term frequency type and the smooth inverse document frequency type.
The output type is vector<vector<double>>.