libime
lattice.h
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_LATTICE_H_
7 #define _FCITX_LIBIME_CORE_LATTICE_H_
8 
9 // Workaround a boost missing include bug.
10 #include <boost/type_traits/add_const.hpp>
11 
12 #include <algorithm>
13 #include <cassert>
14 #include <cstddef>
15 #include <memory>
16 #include <ranges>
17 #include <string>
18 #include <string_view>
19 #include <type_traits>
20 #include <unordered_set>
21 #include <utility>
22 #include <vector>
23 #include <boost/iterator/iterator_categories.hpp>
24 #include <boost/range/any_range.hpp>
25 #include <fcitx-utils/macros.h>
26 #include <fcitx-utils/stringutils.h>
27 #include <libime/core/languagemodel.h>
28 #include <libime/core/libimecore_export.h>
29 #include <libime/core/segmentgraph.h>
30 
31 namespace libime {
32 
33 class LatticePrivate;
34 class LatticeNode;
35 
37 public:
38  using Sentence = std::vector<const LatticeNode *>;
39  SentenceResult(Sentence sentence = {}, float score = 0.0F)
40  : sentence_(std::move(sentence)), score_(score) {}
41  FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(SentenceResult)
42 
43  const Sentence &sentence() const { return sentence_; }
44  size_t size() const { return sentence_.size(); }
45 
46  float score() const { return score_; }
47  void adjustScore(float adjust) { score_ += adjust; }
48  void setScore(float score) { score_ = score; }
49 
50  bool operator<(const SentenceResult &rhs) const {
51  return score_ < rhs.score_;
52  }
53 
54  bool operator>(const SentenceResult &rhs) const {
55  return score_ > rhs.score_;
56  }
57 
58  std::string toString() const;
59 
60 private:
61  Sentence sentence_;
62  float score_;
63 };
64 
65 class LIBIMECORE_EXPORT WordNode {
66 public:
67  WordNode(std::string_view word, WordIndex idx) : word_(word), idx_(idx) {}
68 
69  virtual ~WordNode() = default;
70  WordNode(WordNode &&other) noexcept(
71  std::is_nothrow_move_constructible_v<std::string>);
72  WordNode &operator=(WordNode &&other) noexcept(
73  std::is_nothrow_move_assignable_v<std::string>);
74 
75  const std::string &word() const { return word_; }
76  WordIndex idx() const { return idx_; }
77  void setIdx(WordIndex idx) { idx_ = idx; }
78 
79 protected:
80  std::string word_;
81  WordIndex idx_;
82 };
83 
84 class LIBIMECORE_EXPORT LatticeNodeData {
85 public:
86  virtual ~LatticeNodeData() = default;
87 };
88 
89 class LIBIMECORE_EXPORT LatticeNode : public WordNode {
90 public:
91  LatticeNode(std::string_view word, WordIndex idx, SegmentGraphPath path,
92  const State &state, float cost = 0)
93  : WordNode(word, idx), path_(std::move(path)), cost_(cost),
94  state_(state) {
95  assert(path_.size() >= 2);
96  }
97  float cost() const { return cost_; }
98 
99  float score() const { return score_; }
100  void setScore(float score) { score_ = score; }
101 
102  const SegmentGraphNode *from() const { return path_.front(); }
103  const SegmentGraphNode *to() const { return path_.back(); }
104  const SegmentGraphPath &path() const { return path_; }
105 
106  LatticeNode *prev() const { return prev_; }
107  void setPrev(LatticeNode *prev) { prev_ = prev; }
108 
109  template <typename T>
110  requires(std::is_base_of_v<LatticeNode, T>)
111  T &as() {
112  return static_cast<T &>(*this);
113  }
114 
115  template <typename T>
116  requires(std::is_base_of_v<LatticeNode, T>)
117  const T &as() const {
118  return static_cast<const T &>(*this);
119  }
120 
121  /// Return the full word till the begining of the sentence.
122  std::string fullWord() const {
123  size_t length = 0;
124  const auto *pivot = this;
125  while (pivot) {
126  length += pivot->word().size();
127  pivot = pivot->prev();
128  }
129 
130  std::string result;
131  result.resize(length);
132  pivot = this;
133  while (pivot) {
134  const auto &word = pivot->word();
135  length -= word.size();
136  std::copy(word.begin(), word.end(), result.begin() + length);
137  pivot = pivot->prev();
138  }
139 
140  return result;
141  }
142 
143  SentenceResult toSentenceResult(float adjust = 0.0F) const {
144  SentenceResult::Sentence result;
145  const auto *pivot = this;
146  // to skip bos
147  while (pivot->prev()) {
148  if (pivot->to()) {
149  result.emplace_back(pivot);
150  }
151  pivot = pivot->prev();
152  }
153 
154  std::ranges::reverse(result);
155  return {std::move(result), score() + adjust};
156  }
157 
158  State &state() { return state_; }
159 
160 protected:
161  SegmentGraphPath path_;
162  float cost_;
163  float score_ = 0.0F;
164  State state_;
165  LatticeNode *prev_ = nullptr;
166 };
167 
168 inline std::string SentenceResult::toString() const {
169  return fcitx::stringutils::join(
170  sentence_ |
171  std::views::transform([](const auto &item) -> const std::string & {
172  return item->word();
173  }),
174  "");
175 }
176 
177 // Lattice graph is a overlay graph on the SegmentGraph.
178 // Every node in SegmentGraph may have multiple corresponding LatticeNode.
179 // If there is an edge between two lattice nodes, there is a path between their
180 // corresponding SegmentGraphNode.
181 //
182 // For example, pinyin, "xian" has three SegmentGraphNodes.
183 // [0] ---- xian ----- [4]
184 // \- xi -[2] - an -/
185 //
186 // while [0] has only one lattice node, the remaining nodes may has multiple
187 // nodes.
188 // So there is an extra "end" node that links every nodes from [4] to it.
189 class LIBIMECORE_EXPORT Lattice {
190  friend class Decoder;
191  friend class DecoderPrivate;
192  friend class SegmentGraph;
193 
194 public:
195  Lattice();
196  FCITX_DECLARE_VIRTUAL_DTOR_MOVE(Lattice)
197 
198  size_t sentenceSize() const;
199  const SentenceResult &sentence(size_t idx) const;
200  void clear();
201  void discardNode(const std::unordered_set<const SegmentGraphNode *> &node);
202 
203  using NodeRange =
204  boost::any_range<LatticeNode, boost::bidirectional_traversal_tag,
205  const LatticeNode &>;
206 
207  NodeRange nodes(const SegmentGraphNode *node) const;
208 
209 private:
210  std::unique_ptr<LatticePrivate> d_ptr;
211  FCITX_DECLARE_PRIVATE(Lattice);
212 };
213 } // namespace libime
214 
215 #endif // _FCITX_LIBIME_CORE_LATTICE_H_
std::string fullWord() const
Return the full word till the begining of the sentence.
Definition: lattice.h:122