libime
segmentgraph.h
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_
7 #define _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_
8 
9 // Workaround a boost missing include bug.
10 #include <boost/type_traits/add_const.hpp>
11 
12 #include <cassert>
13 #include <cstddef>
14 #include <functional>
15 #include <list>
16 #include <memory>
17 #include <string>
18 #include <string_view>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22 #include <boost/iterator/iterator_categories.hpp>
23 #include <boost/iterator/transform_iterator.hpp>
24 #include <boost/range/any_range.hpp>
25 #include <boost/range/iterator_range_core.hpp>
26 #include <fcitx-utils/element.h>
27 #include <fcitx-utils/macros.h>
28 #include <libime/core/libimecore_export.h>
29 
30 namespace libime {
31 
32 class SegmentGraphBase;
33 class SegmentGraphNode;
34 using SegmentGraphDFSCallback =
35  std::function<bool(const SegmentGraphBase &, const std::vector<size_t> &)>;
36 using SegmentGraphBFSCallback =
37  std::function<bool(const SegmentGraphBase &, const SegmentGraphNode *)>;
38 
39 using SegmentGraphNodeRange =
40  boost::any_range<SegmentGraphNode, boost::bidirectional_traversal_tag>;
41 using SegmentGraphNodeConstRange =
42  boost::any_range<const SegmentGraphNode,
43  boost::bidirectional_traversal_tag>;
44 
45 class LIBIMECORE_EXPORT SegmentGraphNode : public fcitx::Element {
46  friend class SegmentGraph;
47 
48 public:
49  SegmentGraphNode(size_t start) : start_(start) {}
50  SegmentGraphNode(const SegmentGraphNode &node) = delete;
51  virtual ~SegmentGraphNode() {}
52 
53  SegmentGraphNodeConstRange nexts() const {
54  const auto &nexts = childs();
55  return boost::make_iterator_range(
56  boost::make_transform_iterator(nexts.begin(),
57  &SegmentGraphNode::castConst),
58  boost::make_transform_iterator(nexts.end(),
59  &SegmentGraphNode::castConst));
60  }
61 
62  size_t nextSize() const { return childs().size(); }
63 
64  SegmentGraphNodeConstRange prevs() const {
65  const auto &prevs = parents();
66  return boost::make_iterator_range(
67  boost::make_transform_iterator(prevs.begin(),
68  &SegmentGraphNode::castConst),
69  boost::make_transform_iterator(prevs.end(),
70  &SegmentGraphNode::castConst));
71  }
72 
73  size_t prevSize() const { return parents().size(); }
74  size_t index() const { return start_; }
75 
76  bool operator==(const SegmentGraphNode &other) const {
77  return this == &other;
78  }
79  bool operator!=(const SegmentGraphNode &other) const {
80  return !operator==(other);
81  }
82 
83 protected:
84  SegmentGraphNodeRange mutablePrevs() {
85  const auto &prevs = parents();
86  return boost::make_iterator_range(
87  boost::make_transform_iterator(prevs.begin(),
88  &SegmentGraphNode::cast),
89  boost::make_transform_iterator(prevs.end(),
90  &SegmentGraphNode::cast));
91  }
92 
93  SegmentGraphNodeRange mutableNexts() {
94  const auto &nexts = childs();
95  return boost::make_iterator_range(
96  boost::make_transform_iterator(nexts.begin(),
97  &SegmentGraphNode::cast),
98  boost::make_transform_iterator(nexts.end(),
99  &SegmentGraphNode::cast));
100  }
101 
102  void addEdge(SegmentGraphNode &ref) {
103  assert(ref.start_ > start_);
104  addChild(&ref);
105  }
106  void removeEdge(SegmentGraphNode &ref) { removeChild(&ref); }
107 
108  static auto castConst(fcitx::Element *ele) -> const SegmentGraphNode & {
109  return *static_cast<SegmentGraphNode *>(ele);
110  }
111  static auto cast(fcitx::Element *ele) -> SegmentGraphNode & {
112  return *static_cast<SegmentGraphNode *>(ele);
113  }
114 
115 private:
116  size_t start_;
117 };
118 
119 using SegmentGraphPath = std::vector<const SegmentGraphNode *>;
120 using DiscardCallback =
121  std::function<void(const std::unordered_set<const SegmentGraphNode *> &)>;
122 
123 class LIBIMECORE_EXPORT SegmentGraphBase {
124 public:
125  SegmentGraphBase(std::string data) : data_(std::move(data)) {}
126  FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_MOVE_WITHOUT_SPEC(SegmentGraphBase)
127 
128  virtual const SegmentGraphNode &start() const = 0;
129  virtual const SegmentGraphNode &end() const = 0;
130 
131  virtual SegmentGraphNodeConstRange nodes(size_t idx) const = 0;
132  const SegmentGraphNode &node(size_t idx) const {
133  return nodes(idx).front();
134  }
135 
136  // Return the string.
137  const std::string &data() const { return data_; }
138 
139  // Return the size of string.
140  size_t size() const { return data().size(); }
141 
142  std::string_view segment(size_t start, size_t end) const {
143  return {data().data() + start, end - start};
144  }
145 
146  std::string_view segment(const SegmentGraphNode &start,
147  const SegmentGraphNode &end) const {
148  return segment(start.index(), end.index());
149  }
150 
151  bool bfs(const SegmentGraphNode *from,
152  const SegmentGraphBFSCallback &callback) const;
153 
154  bool dfs(const SegmentGraphDFSCallback &callback) const {
155  std::vector<size_t> path;
156  return dfsHelper(path, start(), callback);
157  }
158 
159  // A naive distance, not necessary be the shortest.
160  size_t distanceToEnd(const SegmentGraphNode &node) const {
161  const auto *pNode = &node;
162  const SegmentGraphNode *endNode = &end();
163  size_t distance = 0;
164  while (pNode != endNode) {
165  pNode = &pNode->nexts().front();
166  ++distance;
167  }
168  return distance;
169  }
170 
171  bool isList() const {
172  const SegmentGraphNode *node = &start();
173  const SegmentGraphNode *endNode = &end();
174  while (node != endNode) {
175  if (node->nextSize() != 1) {
176  return false;
177  }
178  node = &node->nexts().front();
179  }
180  return true;
181  }
182 
183  bool checkGraph() const {
184  std::unordered_set<const SegmentGraphNode *> allNodes;
185  for (size_t i = 0, e = size(); i < e; i++) {
186  for (const auto &n : nodes(i)) {
187  if (n.nexts().empty() && n != end()) {
188  return false;
189  }
190  allNodes.insert(&n);
191  }
192  }
193 
194  bfs(&start(), [&allNodes](const SegmentGraphBase &,
195  const SegmentGraphNode *node) {
196  allNodes.erase(node);
197  return true;
198  });
199 
200  return allNodes.empty();
201  }
202 
203  bool checkNodeInGraph(const SegmentGraphNode *node) const {
204  for (size_t i = 0, e = size(); i <= e; i++) {
205  for (const auto &n : nodes(i)) {
206  if (&n == node) {
207  return true;
208  }
209  }
210  }
211  return false;
212  }
213 
214 protected:
215  std::string &mutableData() { return data_; }
216 
217 private:
218  bool dfsHelper(std::vector<size_t> &path, const SegmentGraphNode &start,
219  const SegmentGraphDFSCallback &callback) const {
220  if (start == end()) {
221  return callback(*this, path);
222  }
223  auto nexts = start.nexts();
224  for (const auto &next : nexts) {
225  auto idx = next.index();
226  path.push_back(idx);
227  if (!dfsHelper(path, next, callback)) {
228  return false;
229  }
230  path.pop_back();
231  }
232  return true;
233  }
234 
235  std::string data_;
236 };
237 
238 class LIBIMECORE_EXPORT SegmentGraph : public SegmentGraphBase {
239 public:
240  SegmentGraph(std::string str = {}) : SegmentGraphBase(std::move(str)) {
241  resize(data().size() + 1);
242  if (!data().empty()) {
243  newNode(data().size());
244  }
245  newNode(0);
246  }
247  SegmentGraph(const SegmentGraph &seg) = delete;
248  FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_MOVE_WITHOUT_SPEC(SegmentGraph)
249 
250  const SegmentGraphNode &start() const override { return *graph_[0]; }
251  const SegmentGraphNode &end() const override {
252  return *graph_[data().size()];
253  }
254  void merge(SegmentGraph &graph,
255  const DiscardCallback &discardCallback = {});
256 
257  SegmentGraphNode &ensureNode(size_t idx) {
258  if (nodes(idx).empty()) {
259  newNode(idx);
260  }
261  return mutableNode(idx);
262  }
263 
264  SegmentGraphNodeConstRange nodes(size_t idx) const override {
265  assert(idx < graph_.size());
266  if (graph_[idx]) {
267  return {graph_[idx].get(), graph_[idx].get() + 1};
268  }
269  return {};
270  }
271  using SegmentGraphBase::node;
272 
273  // helper for dag style segments
274  void addNext(size_t from, size_t to) {
275  assert(from < to);
276  assert(to <= data().size());
277  if (nodes(from).empty()) {
278  newNode(from);
279  }
280  if (nodes(to).empty()) {
281  newNode(to);
282  }
283  graph_[from]->addEdge(*graph_[to]);
284  }
285 
286  void appendNewSegment(std::string_view str) {
287  // append empty string is meaningless.
288  if (str.empty()) {
289  return;
290  }
291 
292  size_t oldSize = data().size();
293  mutableData().append(str.data(), str.size());
294  resize(data().size() + 1);
295  auto &newEnd = newNode(data().size());
296  auto &node = mutableNode(oldSize);
297  node.addEdge(newEnd);
298  }
299 
300  void appendToLastSegment(std::string_view str) {
301  // append empty string is meaningless.
302  if (str.empty()) {
303  return;
304  }
305 
306  size_t oldSize = data().size();
307  mutableData().append(str.data(), str.size());
308  resize(data().size() + 1);
309 
310  auto &newEnd = newNode(data().size());
311  auto &node = mutableNode(oldSize);
312  // If old size is 0, then just create an edge from begin to end.
313  if (oldSize == 0) {
314  node.addEdge(newEnd);
315  return;
316  }
317 
318  // Otherwise, iterate the prevs of oldEnd, create a edge between all
319  // prevs and newEnd, and delete old end.
320  for (auto &prev : node.mutablePrevs()) {
321  prev.addEdge(newEnd);
322  }
323  // Remove the old node.
324  graph_[oldSize].reset();
325  }
326 
327  void removeSuffixFrom(size_t idx) {
328  if (idx >= size()) {
329  return;
330  }
331 
332  auto *pNode = &mutableNode(size());
333  const auto *pStart = &start();
334  while (pNode != pStart && pNode->index() > idx) {
335  pNode = &pNode->mutablePrevs().front();
336  }
337 
338  mutableData().erase(idx);
339  resize(data().size() + 1);
340  if (size() == 0) {
341  return;
342  }
343 
344  if (pNode->index() == idx) {
345  return;
346  }
347  auto &newEnd = newNode(size());
348  pNode->addEdge(newEnd);
349  }
350 
351 private:
352  void resize(size_t newSize) {
353  auto oldSize = graph_.size();
354  graph_.resize(newSize);
355  for (; oldSize < newSize; oldSize++) {
356  graph_[oldSize].reset();
357  }
358  }
359 
360  SegmentGraphNode &newNode(size_t idx) {
361  graph_[idx] = std::make_unique<SegmentGraphNode>(idx);
362  return *graph_[idx];
363  }
364 
365  SegmentGraphNode &mutableNode(size_t idx) {
366  return mutableNodes(idx).front();
367  }
368 
369  SegmentGraphNodeRange mutableNodes(size_t idx) {
370  assert(idx < graph_.size());
371  if (graph_[idx]) {
372  return {graph_[idx].get(), graph_[idx].get() + 1};
373  }
374  return {};
375  }
376 
377  size_t check(const SegmentGraph &graph) const;
378  // ptr_vector doesn't have move constructor, G-R-E-A-T
379  std::vector<std::unique_ptr<SegmentGraphNode>> graph_;
380 };
381 } // namespace libime
382 
383 #endif // _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_