libime
historybigram.h
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_
7 #define _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_
8 
9 #include <cstddef>
10 #include <cstdint>
11 #include <functional>
12 #include <istream>
13 #include <memory>
14 #include <ostream>
15 #include <string>
16 #include <string_view>
17 #include <unordered_set>
18 #include <utility>
19 #include <vector>
20 #include <fcitx-utils/macros.h>
21 #include <libime/core/lattice.h>
22 #include <libime/core/libimecore_export.h>
23 
24 namespace libime {
25 
26 class HistoryBigramPrivate;
27 
28 using ValidationCodeExtractor = std::function<std::string(const WordNode *)>;
29 
30 class LIBIMECORE_EXPORT HistoryBigram {
31 public:
32  using WordWithCode = std::pair<std::string, std::string>;
33  using WordWithCodeView = std::pair<std::string_view, std::string_view>;
34 
35  HistoryBigram();
36 
37  FCITX_DECLARE_VIRTUAL_DTOR_MOVE(HistoryBigram);
38 
39  void load(std::istream &in);
40  void loadText(std::istream &in);
41  void save(std::ostream &out);
42  void dump(std::ostream &out);
43  void clear();
44 
45  /// Set unknown probability penatly.
46  /// \param unknown is a log probability.
47  void setUnknownPenalty(float unknown);
48  float unknownPenalty() const;
49 
50  void setUseOnlyUnigram(bool useOnlyUnigram);
51  bool useOnlyUnigram() const;
52 
53  void forget(std::string_view word);
54  void forget(std::string_view word, std::string_view code);
55 
56  bool isUnknown(std::string_view v) const;
57  float score(const WordNode *prev, const WordNode *cur) const;
58  float score(std::string_view prev, std::string_view cur) const;
59  float scoreWithCode(WordWithCodeView prev, WordWithCodeView cur) const;
60  float scoreWithCode(const WordNode *prev, const WordNode *cur,
61  const ValidationCodeExtractor &extractor) const;
62  void add(const SentenceResult &sentence);
63  void add(const std::vector<std::string> &sentence);
64  void addWithCode(const SentenceResult &sentence,
65  const ValidationCodeExtractor &validationCodeExtractor);
66  void
67  addWithCode(const std::vector<WordWithCode> &sentenceWithValidationCode);
68 
69  /// Fill the prediction based on current sentence.
70  void fillPredict(std::unordered_set<std::string> &words,
71  const std::vector<std::string> &sentence,
72  size_t maxSize) const;
73 
74  bool containsBigram(std::string_view prev, std::string_view cur) const;
75 
76  /**
77  * Query the weighted frequency of the unigram.
78  *
79  * @since 1.1.14
80  */
81  float unigramFrequency(WordWithCodeView word) const;
82 
83  /**
84  * Query the weighted frequency of the bigram.
85  *
86  * @since 1.1.14
87  */
88  float bigramFrequency(WordWithCodeView prev, WordWithCodeView cur) const;
89 
90  /**
91  * Query the raw frequency of the unigram.
92  *
93  * @since 1.1.14
94  */
95  int32_t rawUnigramFrequency(WordWithCodeView word) const;
96 
97  /**
98  * Query the raw frequency of the bigram.
99  *
100  * @since 1.1.14
101  */
102  int32_t rawBigramFrequency(WordWithCodeView prev,
103  WordWithCodeView cur) const;
104 
105  void addWithContext(const std::vector<WordWithCode> &context,
106  std::vector<WordWithCode> newSentence);
107 
108 private:
109  std::unique_ptr<HistoryBigramPrivate> d_ptr;
110  FCITX_DECLARE_PRIVATE(HistoryBigram);
111 };
112 } // namespace libime
113 
114 #endif // _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_