41 #ifndef MODULE_ANALYZER_ALGO_SENTIMENTOVERTIME_HPP_ 42 #define MODULE_ANALYZER_ALGO_SENTIMENTOVERTIME_HPP_ 44 #include "../Thread.hpp" 46 #include "../../../Data/Data.hpp" 47 #include "../../../Data/Dictionary.hpp" 48 #include "../../../Data/Sentiment.hpp" 49 #include "../../../Helper/DateTime.hpp" 50 #include "../../../Helper/FileSystem.hpp" 51 #include "../../../Helper/Memory.hpp" 52 #include "../../../Main/Database.hpp" 53 #include "../../../Struct/QueryStruct.hpp" 54 #include "../../../Struct/StatusSetter.hpp" 55 #include "../../../Struct/TextMap.hpp" 56 #include "../../../Struct/ThreadOptions.hpp" 57 #include "../../../Struct/ThreadStatus.hpp" 68 #include <string_view> 69 #include <unordered_map> 70 #include <unordered_set> 76 using std::string_view_literals::operator
""sv;
135 struct DateCategoryData {
137 double sentimentSum{};
140 std::uint64_t sentimentCount{};
143 std::unordered_set<std::string> articles;
156 using DoubleUInt = std::pair<double, std::uint64_t>;
157 using StringString = std::pair<std::string, std::string>;
159 using ArticleData = std::unordered_map<std::string, DoubleUInt>;
160 using DateData = std::map<std::string, std::vector<DateCategoryData>>;
180 std::string_view
getName()
const override;
206 std::vector<std::string> categoryLabels;
207 std::vector<std::uint64_t> categoryQueries;
208 bool combineSources{
true};
210 bool addArticleSentiment{
false};
211 bool ignoreEmptyDate{
true};
212 bool useThreshold{
false};
218 std::unique_ptr<Data::Sentiment> sentimentAnalyzer;
221 std::vector<QueryStruct> queriesCategories;
225 ArticleData articleData;
227 std::string previousDate;
229 std::size_t currentCorpus{};
233 void saveSentiments();
236 void initQueries()
override;
237 void deleteQueries()
override;
240 [[nodiscard]] DateData::iterator addDate(
const std::string& date);
241 void processSentence(
242 const std::vector<std::string>& tokens,
243 const std::pair<std::size_t, std::size_t>& sentence,
244 const DateData::iterator& dateIt,
245 const std::string& article
247 [[nodiscard]]
float getSentenceScore(
248 const std::pair<std::size_t, std::size_t>& sentence,
249 const std::vector<std::string>& tokens
251 [[nodiscard]] DoubleUInt calculateArticleSentiment(
252 const std::unordered_set<std::string>& articles
254 [[nodiscard]] DoubleUInt calculateArticle(
255 const std::string& article
257 void fillGap(
const std::string& table,
const std::string& date, std::size_t numColumns);
259 const std::string& table,
260 const std::string& date,
261 const std::vector<DateCategoryData>& dataSet,
262 std::size_t numColumns
266 static bool selectFirst(
268 std::size_t& numberTo
270 static bool identifyCurrent(
271 std::size_t sentenceBegin,
272 std::size_t& numberFromTo,
276 [[nodiscard]]
static bool meetsThreshold(
278 std::uint8_t threshold
void onAlgoUnpause() override
Does nothing.
Definition: SentimentOverTime.cpp:237
void onAlgoPause() override
Does nothing.
Definition: SentimentOverTime.cpp:234
Namespace for algorithm classes.
Definition: All.cpp:52
SentimentOverTime(Main::Database &dbBase, const ThreadOptions &threadOptions, const ThreadStatus &threadStatus)
Continues a previously interrupted algorithm run.
Definition: SentimentOverTime.cpp:53
constexpr auto sentimentEmojis
The default emoji dictionary to be used.
Definition: SentimentOverTime.hpp:107
void checkAlgoOptions() override
Checks the configuration options for the algorithm.
Definition: SentimentOverTime.cpp:266
Abstract class providing thread functionality to algorithm (child) classes.
Definition: Thread.hpp:84
Thread status containing its ID, status message, pause state, and progress.
Definition: ThreadStatus.hpp:54
constexpr auto sentimentArticleColumnsPerCategory
Number of columns per category if article-based sentiment is activated.
Definition: SentimentOverTime.hpp:98
constexpr auto sentimentDictionary
The default sentiment dictionary to be used.
Definition: SentimentOverTime.hpp:104
Text map entry.
Definition: TextMap.hpp:49
void onAlgoInit() override
Generates the corpus.
Definition: SentimentOverTime.cpp:148
constexpr auto sentimentUpdateCalculateProgressEvery
Indicates, while calculating, after how many sentences the progress of the thread will be updated...
Definition: SentimentOverTime.hpp:86
Thread options containing the name of the module run, as well as the IDs of the website, URL list, and configuration used.
Definition: ThreadOptions.hpp:40
Class handling database access for the command-and-control and its threads.
Definition: Database.hpp:366
Class for analyzer exceptions to be used by algorithms.
Definition: Thread.hpp:242
constexpr auto sentimentMinNumColumns
Number of default columns to be written to the target table.
Definition: SentimentOverTime.hpp:92
Structure containing all the data needed to keep the status of a thread updated.
Definition: StatusSetter.hpp:57
std::vector< TextMapEntry > TextMap
A text map is defined as a vector of text map entries.
Definition: TextMap.hpp:280
void onAlgoClear() override
Does nothing.
Definition: SentimentOverTime.cpp:240
constexpr auto sentimentMinColumnsPerCategory
Number of columns per category if article-based sentiment is deactivated.
Definition: SentimentOverTime.hpp:95
constexpr auto sentimentPercentageFactor
Factor to convert value to percentage.
Definition: SentimentOverTime.hpp:110
Sentiment analysis using the VADER algorithm.
Definition: SentimentOverTime.hpp:133
constexpr auto sentimentDefaultThreshold
The default threshold (sentiments lower than that number will be ignored).
Definition: SentimentOverTime.hpp:101
Structure to identify a query including its type and result type(s).
Definition: QueryStruct.hpp:40
constexpr auto sentimentUpdateSavingProgressEvery
Indicates, while saving, after how many rows the progress of the thread will be updated.
Definition: SentimentOverTime.hpp:89
std::string_view getName() const override
Returns the name of the algorithm.
Definition: SentimentOverTime.cpp:84
void parseAlgoOption() override
Parses a configuration option for the algorithm.
Definition: SentimentOverTime.cpp:247
void onAlgoTick() override
Calculates the sentence-based sentiment scores in the text corpus.
Definition: SentimentOverTime.cpp:219
void onAlgoInitTarget() override
Initializes the target table for the algorithm.
Definition: SentimentOverTime.cpp:100
void resetAlgo() override
Resets the algorithm.
Definition: SentimentOverTime.cpp:335