crawlserv++  [under development]
Application for crawling and analyzing textual content of websites.
TokenCorrect.hpp
Go to the documentation of this file.
1 /*
2  *
3  * ---
4  *
5  * Copyright (C) 2021 Anselm Schmidt (ans[ät]ohai.su)
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version in addition to the terms of any
11  * licences already herein identified.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  *
21  * ---
22  *
23  * TokenCorrect.hpp
24  *
25  * Corrects tokens using an aspell dictionary.
26  *
27  * Created on: Feb 28, 2021
28  * Author: ans
29  */
30 
31 #ifndef DATA_TOKENCORRECT_HPP_
32 #define DATA_TOKENCORRECT_HPP_
33 
34 #include "../Wrapper/AspellChecker.hpp"
35 #include "../Wrapper/AspellConfig.hpp"
36 
37 #include <string> // std::string
38 #include <vector> // std::vector
39 
40 namespace crawlservpp::Data {
41 
42  /*
43  * DECLARATION
44  */
45 
47  class TokenCorrect {
48 
49  // for convenience
52 
53  public:
56 
57  explicit TokenCorrect(const std::string& language);
58 
60  virtual ~TokenCorrect() = default;
61 
65 
66  void correct(std::string& token);
67 
69 
72 
75  TokenCorrect(TokenCorrect&) = delete;
76 
78  TokenCorrect(TokenCorrect&&) = default;
79 
82 
84  TokenCorrect& operator=(TokenCorrect&&) = default;
85 
87 
88  private:
89  // configuration and spell checker
90  AspellConfig config;
91  AspellChecker checker;
92  };
93 
94  /*
95  * IMPLEMENTATION
96  */
97 
98  /*
99  * CONSTRUCTION AND DESTRUCTION
100  */
101 
103 
108  inline TokenCorrect::TokenCorrect(const std::string& language) {
109  this->config.setOption("encoding", "utf-8"); /* UTF-8 encoding */
110  this->config.setOption("size", "10"); /* small list size (only first word needed) */
111 
112  if(!language.empty()) {
113  this->config.setOption("lang", language); /* language, if not default */
114  }
115 
116  this->checker.create(this->config);
117  }
118 
119  /*
120  * TOKEN CORRECTION
121  */
122 
124 
129  inline void TokenCorrect::correct(std::string& token) {
130  std::vector<std::string> suggested;
131 
132  if(this->checker.check(token, suggested) || suggested.empty()) {
133  return;
134  }
135 
136  token = suggested.front();
137  }
138 
139 } /* namespace crawlservpp::Data */
140 
141 #endif /* DATA_TOKENCORRECT_HPP_ */
virtual ~TokenCorrect()=default
Default destructor.
RAII wrapper for aspell configurations.
Definition: AspellConfig.hpp:58
TokenCorrect & operator=(TokenCorrect &)=delete
Deleted copy assignment operator.
RAII wrapper for aspell spell checkers.
Definition: AspellChecker.hpp:62
void correct(std::string &token)
Corrects a token, if aspell offers at least one correction proposal.
Definition: TokenCorrect.hpp:129
void setOption(const std::string &name, const std::string &value)
Sets an option in the configuration.
Definition: AspellConfig.hpp:186
void create(AspellConfig &configuration)
Creates the spell checker.
Definition: AspellChecker.hpp:197
bool check(const std::string &token, std::vector< std::string > &suggestionsTo)
Checks whether a token is correctly spelled.
Definition: AspellChecker.hpp:254
TokenCorrect(const std::string &language)
Constructor setting options for the token correction.
Definition: TokenCorrect.hpp:108
Corrects tokens using an aspell dictionary.
Definition: TokenCorrect.hpp:47
Namespace for different types of data.