libime
languagemodel.h
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_
7 #define _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_
8 
9 #include <array>
10 #include <cstddef>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <string_view>
15 #include <vector>
16 #include <fcitx-utils/macros.h>
17 #include <libime/core/datrie.h>
18 #include <libime/core/libimecore_export.h>
19 
20 namespace libime {
21 
22 using WordIndex = unsigned int;
23 constexpr const unsigned int InvalidWordIndex =
24  std::numeric_limits<WordIndex>::max();
25 constexpr size_t StateSize = 20 + sizeof(void *);
26 using State = std::array<char, StateSize>;
27 
28 class WordNode;
29 class LatticeNode;
30 class LanguageModelPrivate;
31 class LanguageModelResolverPrivate;
32 
33 class LIBIMECORE_EXPORT LanguageModelBase {
34 public:
35  virtual ~LanguageModelBase();
36 
37  virtual WordIndex beginSentence() const = 0;
38  virtual WordIndex endSentence() const = 0;
39  virtual WordIndex unknown() const = 0;
40  virtual const State &beginState() const = 0;
41  virtual const State &nullState() const = 0;
42  virtual WordIndex index(std::string_view view) const = 0;
43  virtual float score(const State &state, const WordNode &word,
44  State &out) const = 0;
45  virtual bool isUnknown(WordIndex idx, std::string_view view) const = 0;
46  bool isNodeUnknown(const LatticeNode &node) const;
47  float singleWordScore(std::string_view word) const;
48  float singleWordScore(const State &state, std::string_view word) const;
49  float wordsScore(const State &state,
50  const std::vector<std::string_view> &word) const;
51 };
52 
54 
55 class LIBIMECORE_EXPORT StaticLanguageModelFile {
56  friend class LanguageModelPrivate;
57 
58 public:
59  explicit StaticLanguageModelFile(const char *file);
60  virtual ~StaticLanguageModelFile();
61 
62  const DATrie<float> &predictionTrie() const;
63 
64 private:
65  std::unique_ptr<StaticLanguageModelFilePrivate> d_ptr;
66  FCITX_DECLARE_PRIVATE(StaticLanguageModelFile);
67 };
68 
69 class LIBIMECORE_EXPORT LanguageModel : public LanguageModelBase {
70 public:
71  explicit LanguageModel(const char *file);
73  std::shared_ptr<const StaticLanguageModelFile> file = nullptr);
74  virtual ~LanguageModel();
75 
76  static size_t maxOrder();
77 
78  std::shared_ptr<const StaticLanguageModelFile> languageModelFile() const;
79 
80  WordIndex beginSentence() const override;
81  WordIndex endSentence() const override;
82  WordIndex unknown() const override;
83  const State &beginState() const override;
84  const State &nullState() const override;
85  WordIndex index(std::string_view word) const override;
86  float score(const State &state, const WordNode &node,
87  State &out) const override;
88  bool isUnknown(WordIndex idx, std::string_view word) const override;
89  void setUnknownPenalty(float unknown);
90  float unknownPenalty() const;
91 
92  unsigned int maxNgramLength(const std::vector<std::string> &words) const;
93 
94 private:
95  std::unique_ptr<LanguageModelPrivate> d_ptr;
96  FCITX_DECLARE_PRIVATE(LanguageModel);
97 };
98 
99 /// \brief a class that provides language model data for different languages.
100 ///
101 /// The resolver will also hold a weak reference to the language model file.
102 /// If the language model file is still alive no new file will be constructed.
103 class LIBIMECORE_EXPORT LanguageModelResolver {
104 public:
106  FCITX_DECLARE_VIRTUAL_DTOR_MOVE(LanguageModelResolver)
107  std::shared_ptr<const StaticLanguageModelFile>
108  languageModelFileForLanguage(const std::string &language);
109 
110 protected:
111  virtual std::string
112  languageModelFileNameForLanguage(const std::string &language) = 0;
113 
114 private:
115  std::unique_ptr<LanguageModelResolverPrivate> d_ptr;
116  FCITX_DECLARE_PRIVATE(LanguageModelResolver);
117 };
118 
119 class LIBIMECORE_EXPORT DefaultLanguageModelResolver
120  : public LanguageModelResolver {
121 public:
122  static DefaultLanguageModelResolver &instance();
123 
124 protected:
125  std::string
126  languageModelFileNameForLanguage(const std::string &language) override;
127 
128 private:
131 };
132 } // namespace libime
133 
134 #endif // _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_
a class that provides language model data for different languages.
Provide a DATrie implementation.