libime
tabledecoder.cpp
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 
7 #include "libime/table/tabledecoder.h"
8 #include <algorithm>
9 #include <cstddef>
10 #include <cstdint>
11 #include <iterator>
12 #include <memory>
13 #include <ranges>
14 #include <string>
15 #include <string_view>
16 #include <utility>
17 #include <vector>
18 #include <fcitx-utils/utf8.h>
19 #include "libime/core/languagemodel.h"
20 #include "libime/core/lattice.h"
21 #include "libime/core/segmentgraph.h"
22 #include "libime/table/tablebaseddictionary.h"
23 #include "tabledecoder_p.h"
24 #include "tableoptions.h"
25 #include "tablerule.h"
26 
27 namespace libime {
28 
29 namespace {
30 
31 bool isNotPlaceHolder(const TableRuleEntry &entry) {
32  return !entry.isPlaceHolder();
33 }
34 
35 bool checkRuleCanBeUsedAsAutoRule(const TableRule &rule) {
36  if (rule.flag() != TableRuleFlag::LengthEqual) {
37  return false;
38  }
39 
40  auto range = rule.entries() | std::views::filter(isNotPlaceHolder);
41  auto iter = std::begin(range);
42  auto end = std::end(range);
43  int currentChar = 1;
44  while (iter != end) {
45  int currentIndex = 1;
46  while (iter != end) {
47  if (iter->character() == currentChar) {
48  if (iter->flag() == TableRuleEntryFlag::FromFront &&
49  iter->index() == currentIndex) {
50  currentIndex++;
51  } else {
52  // reset to invalid.
53  currentIndex = 1;
54  break;
55  }
56  } else {
57  break;
58  }
59  ++iter;
60  }
61 
62  if (currentIndex == 1) {
63  return false;
64  }
65  currentChar++;
66  }
67  return currentChar == rule.phraseLength() + 1;
68 }
69 } // namespace
70 
71 uint32_t TableLatticeNode::index() const {
72  return d_ptr ? d_ptr->index_ : 0xFFFFFFFFU;
73 }
74 
75 PhraseFlag TableLatticeNode::flag() const {
76  return d_ptr ? d_ptr->flag_ : PhraseFlag::None;
77 }
78 
79 const std::string &TableLatticeNode::code() const {
80  static const std::string empty;
81  if (!d_ptr) {
82  return empty;
83  }
84  return d_ptr->code_;
85 }
86 
87 size_t TableLatticeNode::codeLength() const {
88  if (!d_ptr) {
89  return 0;
90  }
91  return d_ptr->codeLength_;
92 }
93 
94 TableLatticeNode::TableLatticeNode(
95  std::string_view word, WordIndex idx, SegmentGraphPath path,
96  const State &state, float cost,
97  std::unique_ptr<TableLatticeNodePrivate> data)
98  : LatticeNode(word, idx, std::move(path), state, cost),
99  d_ptr(std::move(data)) {}
100 
101 TableLatticeNode::~TableLatticeNode() = default;
102 
103 LatticeNode *TableDecoder::createLatticeNodeImpl(
104  const SegmentGraphBase & /*graph*/, const LanguageModelBase * /*model*/,
105  std::string_view word, WordIndex idx, SegmentGraphPath path,
106  const State &state, float cost, std::unique_ptr<LatticeNodeData> data,
107  bool /*onlyPath*/) const {
108  std::unique_ptr<TableLatticeNodePrivate> tableData(
109  static_cast<TableLatticeNodePrivate *>(data.release()));
110  return new TableLatticeNode(word, idx, std::move(path), state, cost,
111  std::move(tableData));
112 }
113 
114 bool TableDecoder::needSort(const SegmentGraph &graph,
115  const SegmentGraphNode * /*node*/) const {
116  return graph.start().nextSize() != 1;
117 }
118 
119 SegmentGraph graphForCode(std::string_view s,
120  const TableBasedDictionary &dict) {
121  SegmentGraph graph{std::string(s)};
122  if (s.empty()) {
123  return graph;
124  }
125  graph.addNext(0, graph.size());
126  auto codeLength = fcitx::utf8::length(graph.data());
127  // Rule.
128  if (dict.hasRule() && !dict.tableOptions().autoRuleSet().empty()) {
129  const auto &ruleSet = dict.tableOptions().autoRuleSet();
130  for (const auto &ruleName : ruleSet) {
131  const auto *rule = dict.findRule(ruleName);
132  if (!rule || codeLength != rule->codeLength() ||
133  !checkRuleCanBeUsedAsAutoRule(*rule)) {
134  continue;
135  }
136 
137  std::vector<int> charSizes(rule->phraseLength());
138  for (const auto &entry :
139  rule->entries() | std::views::filter(isNotPlaceHolder)) {
140  auto &charSize = charSizes[entry.character() - 1];
141  charSize = std::max(charSize, entry.index());
142  }
143 
144  int lastIndex = 0;
145  for (auto charSize : charSizes) {
146  graph.addNext(fcitx::utf8::ncharByteLength(graph.data().begin(),
147  lastIndex),
148  fcitx::utf8::ncharByteLength(
149  graph.data().begin(), lastIndex + charSize));
150  lastIndex += charSize;
151  }
152  }
153  }
154 
155  return graph;
156 }
157 } // namespace libime