crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
English.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2021 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * English.hpp
24  *
25  * Simple English stemmer based on porter2_stemmer by Sean Massung.
26  *
27  * Original: https://github.com/smassung/porter2_stemmer
28  *
29  * Implementation of
30  * http://snowball.tartarus.org/algorithms/english/stemmer.html
31  *
32  * Created on: Aug 3, 2020
33  * Author: ans
34  */
35 
36 #ifndef DATA_STEMMER_ENGLISH_HPP_
37 #define DATA_STEMMER_ENGLISH_HPP_
38 
39 #include "../../_extern/porter2_stemmer/porter2_stemmer.h"
40 
41 #include <string> // std::string
42 
45 
46  /*
47  * DECLARATION
48  */
49 
50  void stemEnglish(std::string& token);
51 
52  /*
53  * IMPLEMENTATION
54  */
55 
57 
61  inline void stemEnglish(std::string& token) {
62  Porter2Stemmer::trim(token);
63  Porter2Stemmer::stem(token);
64  }
65 
66 } /* namespace crawlservpp::Data::Stemmer */
67 
68 #endif /* DATA_STEMMER_ENGLISH_HPP_ */
void trim(std::string &stringToTrim)
Removes whitespaces around a string.
Definition: Strings.hpp:360
void stemEnglish(std::string &token)
Stems a token in English.
Definition: English.hpp:61
Namespace for linguistic stemmers.
Definition: English.hpp:44