libime
tablerule.cpp
1 /*
2  * SPDX-FileCopyrightText: 2017-2023 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 
7 #include "tablerule.h"
8 #include <cstdint>
9 #include <cstdlib>
10 #include <istream>
11 #include <ostream>
12 #include <stdexcept>
13 #include <string>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17 #include <fcitx-utils/charutils.h>
18 #include <fcitx-utils/stringutils.h>
19 #include "libime/core/utils_p.h"
20 
21 namespace libime {
22 
23 namespace {
24 
25 constexpr int TAIL_OFFSET = 0x80;
26 
27 int8_t toIndex(uint8_t index) {
28  if (index < TAIL_OFFSET) {
29  return index;
30  }
31  return -static_cast<int8_t>(index - TAIL_OFFSET + 1);
32 }
33 
34 uint8_t fromIndex(int8_t index) {
35  if (index >= 0) {
36  return index;
37  }
38  return (-index) + TAIL_OFFSET - 1;
39 }
40 
41 } // namespace
42 
43 TableRuleEntry::TableRuleEntry(TableRuleEntryFlag flag, uint8_t character,
44  uint8_t encodingIndex)
45  : flag_(flag), character_(character), encodingIndex_(encodingIndex) {}
46 
47 TableRuleEntry::TableRuleEntry(std::istream &in) {
48  throw_if_io_fail(unmarshall(in, flag_));
49  throw_if_io_fail(unmarshall(in, character_));
50  throw_if_io_fail(unmarshall(in, encodingIndex_));
51 }
52 
53 bool TableRuleEntry::isPlaceHolder() const {
54  return character_ == 0 || index() == 0;
55 }
56 
57 int TableRuleEntry::index() const { return toIndex(encodingIndex_); }
58 
59 TableRule::TableRule(const std::string &ruleString, unsigned int maxLength) {
60  if (!ruleString[0]) {
61  throw std::invalid_argument("invalid rule string");
62  }
63 
64  switch (ruleString[0]) {
65  case 'e':
66  case 'E':
67  flag_ = TableRuleFlag::LengthEqual;
68  break;
69 
70  case 'a':
71  case 'A':
72  flag_ = TableRuleFlag::LengthLongerThan;
73  break;
74 
75  default:
76  throw std::invalid_argument("invalid rule string");
77  }
78 
79  auto equalSignPos = ruleString.find('=', 1);
80  if (equalSignPos == std::string::npos) {
81  throw std::invalid_argument("invalid rule string");
82  }
83 
84  auto afterEqualSign = std::string_view(ruleString).substr(equalSignPos + 1);
85  std::vector<std::string> entryStrings =
86  fcitx::stringutils::split(afterEqualSign, "+");
87  if (entryStrings.empty() || entryStrings.size() > maxLength) {
88  throw std::invalid_argument("invalid rule string");
89  }
90 
91  auto beforeEqualSign = std::string_view(ruleString).substr(0, equalSignPos);
92  if (beforeEqualSign.size() != 2 ||
93  !fcitx::charutils::isdigit(beforeEqualSign[1])) {
94  throw std::invalid_argument("invalid rule string");
95  }
96 
97  phraseLength_ = beforeEqualSign[1] - '0';
98  if (phraseLength_ <= 0 || phraseLength_ > maxLength) {
99  throw std::invalid_argument("Invalid phrase length");
100  }
101 
102  for (const auto &entryString : entryStrings) {
103  TableRuleEntryFlag entryFlag;
104  switch (entryString[0]) {
105  case 'p':
106  case 'P':
107  entryFlag = TableRuleEntryFlag::FromFront;
108  break;
109  case 'n':
110  case 'N':
111  entryFlag = TableRuleEntryFlag::FromBack;
112  break;
113  default:
114  throw std::invalid_argument("invalid rule entry flag");
115  }
116 
117  if (entryString.size() != 3 ||
118  !fcitx::charutils::isdigit(entryString[1]) ||
119  !(fcitx::charutils::isdigit(entryString[2]) ||
120  fcitx::charutils::isupper(entryString[2]) ||
121  fcitx::charutils::islower(entryString[2]))) {
122  throw std::invalid_argument("invalid rule entry");
123  }
124 
125  int8_t character = entryString[1] - '0'; // 0 ~ maxLength
126  int8_t index;
127  if (fcitx::charutils::isdigit(entryString[2])) {
128  index = entryString[2] - '0';
129  } else {
130  index = fcitx::charutils::tolower(entryString[2]) - 'z' - 1;
131  }
132  if (character < 0 || character > static_cast<int>(maxLength) ||
133  std::abs(index) > static_cast<int>(maxLength) ||
134  ((character == 0) ^ (index == 0))) {
135  throw std::invalid_argument("invalid rule entry");
136  }
137 
138  entries_.push_back(
139  TableRuleEntry(entryFlag, character, fromIndex(index)));
140  }
141 }
142 TableRule::TableRule(TableRuleFlag _flag, int _phraseLength,
143  std::vector<TableRuleEntry> _entries)
144  : flag_(_flag), phraseLength_(_phraseLength),
145  entries_(std::move(_entries)) {}
146 TableRule::TableRule(std::istream &in) {
147  uint32_t size = 0;
148  throw_if_io_fail(unmarshall(in, flag_));
149  throw_if_io_fail(unmarshall(in, phraseLength_));
150  throw_if_io_fail(unmarshall(in, size));
151  entries_.reserve(size);
152  for (auto i = 0U; i < size; i++) {
153  entries_.emplace_back(in);
154  }
155 }
156 
157 std::string TableRule::name() const {
158  std::string result;
159  result += ((flag_ == TableRuleFlag::LengthEqual) ? 'e' : 'a');
160  result += std::to_string(phraseLength_);
161 
162  return result;
163 }
164 
165 std::string TableRule::toString() const {
166  std::string result;
167 
168  result += name();
169  result += '=';
170  bool first = true;
171  for (const auto &entry : entries_) {
172  if (first) {
173  first = false;
174  } else {
175  result += '+';
176  }
177  result += ((entry.flag() == TableRuleEntryFlag::FromFront) ? 'p' : 'n');
178  result += static_cast<char>('0' + entry.character());
179  auto index = entry.index();
180  if (index >= 0) {
181  result += static_cast<char>('0' + index);
182  } else {
183  result += static_cast<char>('z' + index + 1);
184  }
185  }
186  return result;
187 }
188 
189 size_t TableRule::codeLength() const {
190  size_t sum = 0;
191  for (const auto &entry : entries_) {
192  if (entry.isPlaceHolder()) {
193  continue;
194  }
195  sum += 1;
196  }
197  return sum;
198 }
199 
200 std::ostream &operator<<(std::ostream &out, const TableRuleEntry &r) {
201  if (!marshall(out, r.flag())) {
202  return out;
203  }
204  if (!marshall(out, r.character())) {
205  return out;
206  }
207  if (!marshall(out, r.encodingIndex())) {
208  return out;
209  }
210  return out;
211 }
212 
213 std::ostream &operator<<(std::ostream &out, const TableRule &r) {
214  if (marshall(out, r.flag()) && marshall(out, r.phraseLength()) &&
215  marshall(out, static_cast<uint32_t>(r.entries().size()))) {
216  for (const auto &entry : r.entries()) {
217  if (!(out << entry)) {
218  break;
219  }
220  }
221  }
222  return out;
223 }
224 } // namespace libime