42 #ifndef DATA_STEMMER_GERMAN_HPP_ 43 #define DATA_STEMMER_GERMAN_HPP_ 123 if(token.size() == 1) {
201 for(std::size_t n{1}; n < token.size(); ++n) {
203 switch(
binInv & token[n]) {
247 && (n + 1) < token.size()
259 switch(token[n - 1]) {
293 token.erase(n - 1, 1);
337 switch(token.back()) {
425 std::size_t ignore{};
427 for(std::size_t n{1}; n < token.size(); ++n) {
428 if(token[n - 1] ==
'e' && token[n] ==
'i') {
435 else if(token[n - 1] ==
'i' && token[n] ==
'e') {
444 && token[n - 1] ==
's' 446 && token[n + 1] ==
'h' 460 for(
auto& c : token) {
476 const auto indexLast{token.size() - 1};
479 switch(token[indexLast - 1]) {
481 switch(token[indexLast]) {
496 if(token[indexLast] ==
'd') {
512 switch(token[token.size() - 1]) {
531 for(
auto& c : token) {
540 for(std::size_t n{1}; n < token.size(); ++n) {
541 switch(token[n - 1]) {
constexpr auto binInv
Literal for binary inversion.
Definition: German.hpp:63
constexpr auto utf8mb3
First byte of 3-byte UTF-8 character for capital sharp s.
Definition: German.hpp:72
constexpr auto umlautA2sm
Second byte of UTF-8 umlaut ä.
Definition: German.hpp:75
void stemGerman(std::string &token)
Stems a token in German.
Definition: German.hpp:118
constexpr auto minLengthStrip1
Minimum length of a token to strip one letter from the end.
Definition: German.hpp:60
constexpr auto toLowerCase
Number to add to make uppercase ASCII letters lowercase.
Definition: German.hpp:66
constexpr auto sharpS3l
Third byte of UTF-8 capital sharp s.
Definition: German.hpp:99
constexpr auto sharpS2l
Second byte of UTF-8 capital sharp s.
Definition: German.hpp:96
constexpr auto umlautU2sm
Second byte of UTF-8 umlaut ü.
Definition: German.hpp:87
constexpr auto umlautU2l
Second byte of UTF-8 umlaut Ü.
Definition: German.hpp:90
constexpr auto umlautO2l
Second byte of UTF-8 umlaut Ö.
Definition: German.hpp:84
constexpr auto utf8mb2
First byte of 2-byte UTF-8 characters for umlauts and sharp s.
Definition: German.hpp:69
constexpr auto umlautA2l
Second byte of UTF-8 umlaut Ä.
Definition: German.hpp:78
constexpr auto umlautO2sm
Second byte of UTF-8 umlaut ö.
Definition: German.hpp:81
constexpr auto sharpS2sm
Second byte of UTF-8 sharp s.
Definition: German.hpp:93
constexpr auto minLengthStrip2
Minimum length of a token to strip two letters from the end or the beginning.
Definition: German.hpp:57
Namespace for linguistic stemmers.
Definition: English.hpp:44