libime
tablecontext.cpp
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #include "tablecontext.h"
7 #include <algorithm>
8 #include <cassert>
9 #include <chrono>
10 #include <cstddef>
11 #include <cstdint>
12 #include <iterator>
13 #include <limits>
14 #include <memory>
15 #include <regex>
16 #include <string>
17 #include <string_view>
18 #include <tuple>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 #include <fcitx-utils/inputbuffer.h>
23 #include <fcitx-utils/log.h>
24 #include <fcitx-utils/macros.h>
25 #include <fcitx-utils/utf8.h>
26 #include "libime/core/historybigram.h"
27 #include "libime/core/inputbuffer.h"
28 #include "libime/core/languagemodel.h"
29 #include "libime/core/lattice.h"
30 #include "libime/core/segmentgraph.h"
31 #include "libime/core/userlanguagemodel.h"
32 #include "libime/table/tablebaseddictionary.h"
33 #include "constants.h"
34 #include "log.h"
35 #include "tablebaseddictionary_p.h"
36 #include "tabledecoder.h"
37 #include "tableoptions.h"
38 
39 namespace libime {
40 
41 namespace {
42 
43 size_t sentenceCodeLength(const SentenceResult &sentence) {
44  const auto *node =
45  static_cast<const TableLatticeNode *>(sentence.sentence()[0]);
46  return node->codeLength();
47 }
48 
49 /// Helper function compare length. If limit is less than 0, it means no
50 /// limit. Avoid unsigned / signed compare.
51 bool lengthLessThanLimit(size_t length, int limit) {
52  if (limit < 0) {
53  return false;
54  }
55  return length < static_cast<size_t>(limit);
56 }
57 
58 template <OrderPolicy policy>
59 struct TableCandidateCompare {
60  TableCandidateCompare(int noSortInputLength, bool sortByCodeLength)
61  : noSortInputLength_(noSortInputLength),
62  sortByCodeLength_(sortByCodeLength) {}
63 
64  // Larger index should be put ahead.
65  static int64_t index(const SentenceResult &sentence) {
66  const auto *const node =
67  static_cast<const TableLatticeNode *>(sentence.sentence()[0]);
68  if (node->flag() == PhraseFlag::User) {
69  return node->index();
70  }
71  return -static_cast<int64_t>(node->index());
72  }
73 
74  bool operator()(const SentenceResult &lhs,
75  const SentenceResult &rhs) const {
76  const bool lIsAuto = TableContext::isAuto(lhs);
77  const bool rIsAuto = TableContext::isAuto(rhs);
78  if (lIsAuto != rIsAuto) {
79  return lIsAuto < rIsAuto;
80  }
81  // non-auto word
82  if (!lIsAuto) {
83  const bool lIsPinyin = TableContext::isPinyin(lhs);
84  const bool rIsPinyin = TableContext::isPinyin(rhs);
85  const auto lLength = sentenceCodeLength(lhs);
86  const auto rLength = sentenceCodeLength(rhs);
87  const bool lShort =
88  static_cast<int>(lLength) <= noSortInputLength_ && !lIsPinyin;
89  const bool rShort =
90  static_cast<int>(rLength) <= noSortInputLength_ && !rIsPinyin;
91  if (lShort != rShort) {
92  return lShort > rShort;
93  }
94  // Always sort result by code length.
95  if (sortByCodeLength_ && lLength != rLength) {
96  return lLength < rLength;
97  }
98 
99  if (lShort) {
100  return index(lhs) > index(rhs);
101  }
102 
103  if constexpr (policy == OrderPolicy::No ||
104  policy == OrderPolicy::Fast) {
105  return index(lhs) > index(rhs);
106  } else if constexpr (policy == OrderPolicy::Freq) {
107  float lScore = lhs.score();
108  float rScore = rhs.score();
109  if (lScore != rScore) {
110  return lScore > rScore;
111  }
112  return index(lhs) > index(rhs);
113  }
114  return false;
115  }
116 
117  return lhs.score() > rhs.score();
118  }
119 
120 private:
121  const int noSortInputLength_;
122  const bool sortByCodeLength_;
123 };
124 
125 struct SelectedCode {
126  SelectedCode(size_t offset, WordNode word, std::string code,
127  PhraseFlag flag, bool commit = true)
128  : offset_(offset), word_(std::move(word)), code_(std::move(code)),
129  flag_(flag), commit_(commit) {}
130  size_t offset_;
131  WordNode word_;
132  std::string code_;
133  PhraseFlag flag_;
134  bool commit_;
135 };
136 
137 bool shouldReplaceCandidate(const SentenceResult &oldSentence,
138  const SentenceResult &newSentence,
139  OrderPolicy policy) {
140  if (newSentence.sentence().size() != oldSentence.sentence().size()) {
141  return newSentence.sentence().size() < oldSentence.sentence().size();
142  }
143  // sentence size are equal, prefer shorter code.
144  if (newSentence.sentence().size() == 1) {
145  auto oldCode = sentenceCodeLength(newSentence);
146  auto newCode = sentenceCodeLength(oldSentence);
147 
148  if (oldCode != newCode) {
149  return oldCode < newCode;
150  }
151 
152  const auto *newNode =
153  static_cast<const TableLatticeNode *>(newSentence.sentence()[0]);
154  switch (policy) {
155  case OrderPolicy::No:
156  if (newNode->flag() != PhraseFlag::User) {
157  return true;
158  }
159  break;
160  case OrderPolicy::Freq:
161  if (newSentence.score() != oldSentence.score()) {
162  return newSentence.score() > oldSentence.score();
163  }
164  [[fallthrough]];
165  case OrderPolicy::Fast:
166  if (newNode->flag() == PhraseFlag::User) {
167  return true;
168  }
169  break;
170  }
171  }
172 
173  return false;
174 }
175 } // namespace
176 
177 class TableContextPrivate : public fcitx::QPtrHolder<TableContext> {
178 public:
180  UserLanguageModel &model)
181  : QPtrHolder(q), dict_(dict), model_(model), decoder_(&dict, &model) {
182  // Maybe use a better heuristics?
183  candidates_.reserve(2048);
184  model_.setCodeExtractor([](const WordNode *word) -> std::string {
185  if (const auto *node =
186  dynamic_cast<const TableLatticeNode *>(word)) {
187  return node->code();
188  }
189  return "";
190  });
191  }
192 
193  // sort should already happened at this point.
194  bool canDoAutoSelect() const {
195  if (candidates_.empty()) {
196  return false;
197  }
198  return !TableContext::isAuto(candidates_[0]);
199  };
200 
201  // sort should already happened at this point.
202  bool hasOnlyOneAutoselectChoice() const {
203  if (!canDoAutoSelect()) {
204  return false;
205  }
206  if (candidates_.size() != 1) {
207  return false;
208  }
209 
210  if (candidates_[0].sentence().size() != 1) {
211  return false;
212  }
213  FCITX_Q();
214  return libime::TableContext::code(candidates_[0]) == q->currentCode() &&
215  (!dict_.tableOptions().exactMatch() ||
216  dict_.hasOneMatchingWord(q->currentCode()));
217  };
218 
219  State currentState() {
220  State state = model_.nullState();
221  if (selected_.empty()) {
222  return state;
223  }
224  State temp;
225  for (auto &s : selected_) {
226  for (auto &item : s) {
227  if (item.word_.word().empty()) {
228  continue;
229  }
230  model_.score(state, item.word_, temp);
231  state = std::move(temp);
232  }
233  }
234  return state;
235  }
236 
237  void resetMatchingState() {
238  lattice_.clear();
239  candidates_.clear();
240  graph_ = SegmentGraph();
241  }
242 
243  size_t selectedLength() const {
244  if (!selected_.empty()) {
245  return selected_.back().back().offset_;
246  }
247  return 0;
248  }
249 
250  void cancel() {
251  if (!selected_.empty()) {
252  selected_.pop_back();
253  }
254  }
255 
256  bool cancelTill(size_t pos) {
257  bool cancelled = false;
258  while (selectedLength() > pos) {
259  cancel();
260  cancelled = true;
261  }
262  return cancelled;
263  }
264 
265  bool learnWord(const std::vector<SelectedCode> &selection) {
266  if (selection.size() == 1) {
267  const auto &select = selection[0];
268  if (select.flag_ == PhraseFlag::None ||
269  select.flag_ == PhraseFlag::User) {
270  dict_.insert(select.code_, select.word_.word(),
271  PhraseFlag::User);
272  } else if (select.flag_ == PhraseFlag::Auto) {
273  // Remove from auto.
274  dict_.removeWord(select.code_, select.word_.word());
275  dict_.insert(select.code_, select.word_.word(),
276  PhraseFlag::User);
277  }
278 
279  return true;
280  }
281  std::string word;
282  for (const auto &selected : selection) {
283  if (!selected.commit_) {
284  return true;
285  }
286  word += selected.word_.word();
287  }
288  return dict_.insert(word, PhraseFlag::User);
289  }
290 
291  bool checkAutoSelect() const {
292  auto lastSegLength = fcitx::utf8::length(graph_.data());
293  // Check by length
294  if (dict_.tableOptions().autoSelectLength() &&
295  !lengthLessThanLimit(lastSegLength,
296  dict_.tableOptions().autoSelectLength())) {
297  return true;
298  }
299 
300  // Check by regex.
301  return dict_.d_func()->autoSelectRegex_ &&
302  std::regex_match(graph_.data(),
303  *dict_.d_func()->autoSelectRegex_,
304  std::regex_constants::match_default);
305  }
306 
307  bool checkNoMatchAutoSelect() const {
308  auto lastSegLength = fcitx::utf8::length(graph_.data());
309  // Check by length
310  if (dict_.tableOptions().noMatchAutoSelectLength() &&
311  !lengthLessThanLimit(
312  lastSegLength,
313  dict_.tableOptions().noMatchAutoSelectLength())) {
314  return true;
315  }
316 
317  // Check by regex.
318  return dict_.d_func()->noMatchAutoSelectRegex_ &&
319  std::regex_match(graph_.data(),
320  *dict_.d_func()->noMatchAutoSelectRegex_,
321  std::regex_constants::match_default);
322  }
323 
324  TableBasedDictionary &dict_;
325  UserLanguageModel &model_;
326  TableDecoder decoder_;
327  Lattice lattice_;
328  SegmentGraph graph_;
329  std::vector<SentenceResult> candidates_;
330  std::vector<std::vector<SelectedCode>> selected_;
331  size_t autoSelectIndex_ = 0;
332 };
333 
334 TableContext::TableContext(TableBasedDictionary &dict, UserLanguageModel &model)
335  : InputBuffer(fcitx::InputBufferOption::FixedCursor),
336  d_ptr(std::make_unique<TableContextPrivate>(this, dict, model)) {}
337 
338 TableContext::~TableContext() {}
339 
340 const TableBasedDictionary &TableContext::dict() const {
341  FCITX_D();
342  return d->dict_;
343 }
344 
345 TableBasedDictionary &TableContext::mutableDict() {
346  FCITX_D();
347  return d->dict_;
348 }
349 
350 const UserLanguageModel &TableContext::model() const {
351  FCITX_D();
352  return d->model_;
353 }
354 
355 UserLanguageModel &TableContext::mutableModel() {
356  FCITX_D();
357  return d->model_;
358 }
359 
360 bool TableContext::isValidInput(uint32_t c) const {
361  FCITX_D();
362  auto matchingKey = d->dict_.tableOptions().matchingKey();
363  return (d->dict_.isInputCode(c) || (matchingKey && matchingKey == c) ||
364  (d->dict_.hasPinyin() && (c <= 'z' && c >= 'a')));
365 }
366 
367 bool TableContext::typeImpl(const char *s, size_t length) {
368  std::string_view view(s, length);
369  auto utf8len = fcitx::utf8::lengthValidated(view);
370  if (utf8len == fcitx::utf8::INVALID_LENGTH) {
371  return false;
372  }
373 
374  bool changed = false;
375  auto range = fcitx::utf8::MakeUTF8CharRange(view);
376  for (auto iter = range.begin(), end = range.end(); iter != end; iter++) {
377  auto pair = iter.charRange();
378  std::string_view chr(&*pair.first,
379  std::distance(pair.first, pair.second));
380  if (!typeOneChar(chr)) {
381  break;
382  }
383  changed = true;
384  }
385  return changed;
386 }
387 
388 void TableContext::erase(size_t from, size_t to) {
389  FCITX_D();
390  if (from == 0 && to >= size()) {
391  d->resetMatchingState();
392  d->selected_.clear();
393  InputBuffer::erase(from, to);
394  } else {
395  d->cancelTill(from);
396  InputBuffer::erase(from, to);
397 
398  auto lastSeg = userInput().substr(selectedLength());
399  d->graph_ = graphForCode(lastSeg, d->dict_);
400  }
401  update();
402 }
403 
404 void TableContext::select(size_t idx) {
405  FCITX_D();
406  assert(idx < d->candidates_.size());
407  auto offset = selectedLength();
408  d->selected_.emplace_back();
409 
410  auto &selection = d->selected_.back();
411  for (const auto &p : d->candidates_[idx].sentence()) {
412  const auto *node = static_cast<const TableLatticeNode *>(p);
413  selection.emplace_back(offset + p->to()->index(),
414  WordNode{p->word(), d->model_.index(p->word())},
415  node->code(), node->flag());
416  }
417 
418  update();
419 }
420 
421 bool TableContext::typeOneChar(std::string_view chr) {
422  FCITX_D();
423  auto lastSeg = userInput().substr(selectedLength());
424  auto lastSegLength = fcitx::utf8::length(lastSeg);
425  // update userInput()
426  if (!InputBuffer::typeImpl(chr.data(), chr.size())) {
427  return false;
428  }
429 
430  const auto &option = d->dict_.tableOptions();
431  // Logic when append a new char:
432  // Auto send disabled:
433  // - keep append to buffer.
434  // Auto send enabled:
435  // - check no match auto select length.
436  bool doAutoSelect = option.autoSelect();
437  if (doAutoSelect) {
438  // No pinyin, because pinyin has no limit on length.
439  // Also, check if it exceeds the code length.
440  doAutoSelect =
441  (!d->dict_.hasPinyin() &&
442  !lengthLessThanLimit(lastSegLength, d->dict_.maxLength()));
443  // Check if it
444  doAutoSelect = doAutoSelect ||
445  (lastSegLength &&
446  d->dict_.isEndKey(fcitx::utf8::getLastChar(lastSeg)));
447  // Check no match auto select.
448  // It means "last segement + chr" has no match, so
449  // we just select lastSeg instead.
450  doAutoSelect =
451  doAutoSelect || (d->checkNoMatchAutoSelect() &&
452  !d->dict_.hasMatchingWords(lastSeg, chr));
453  }
454 
455  if (doAutoSelect) {
456  autoSelect();
457  d->graph_ = graphForCode(chr, d->dict_);
458  } else {
459  lastSeg.append(chr.data(), chr.size());
460  d->graph_ = graphForCode(lastSeg, d->dict_);
461  }
462 
463  update();
464  return true;
465 }
466 
468  FCITX_D();
469  d->autoSelectIndex_ = index;
470 }
471 
472 void TableContext::autoSelect() {
473  FCITX_D();
474  if (selected()) {
475  return;
476  }
477 
478  if (d->canDoAutoSelect()) {
479  auto selectIndex = d->autoSelectIndex_;
480  d->autoSelectIndex_ = 0;
481  if (selectIndex >= candidates().size()) {
482  selectIndex = 0;
483  }
484  select(selectIndex);
485  } else {
486  if (currentCode().empty()) {
487  return;
488  }
489  // Need to calculate this first, otherwise we're breaking the contract
490  // of selected_ (contains no zero-length vec).
491  auto offset = selectedLength();
492  d->selected_.emplace_back();
493  d->selected_.back().emplace_back(
494  offset + d->graph_.data().size(),
495  WordNode{d->graph_.data(), d->model_.unknown()}, d->graph_.data(),
496  PhraseFlag::Invalid, d->dict_.tableOptions().commitRawInput());
497  }
498 
499  update();
500 }
501 
502 void TableContext::update() {
503  FCITX_D();
504  d->autoSelectIndex_ = 0;
505  if (empty()) {
506  return;
507  }
508 
509  if (selected()) {
510  d->resetMatchingState();
511  return;
512  }
513 
514  d->lattice_.clear();
515  State state = d->currentState();
516 
517  auto t0 = std::chrono::high_resolution_clock::now();
518  decltype(t0) t1;
519  d->candidates_.clear();
520 
521  constexpr float max = std::numeric_limits<float>::max();
522  constexpr float min = -std::numeric_limits<float>::max();
523  constexpr int beamSize = 20;
524  constexpr int frameSize = 10;
525  auto lastSegLength = fcitx::utf8::length(d->graph_.data());
526  int nbest = 1;
527  if (lastSegLength == d->dict_.maxLength() &&
528  !d->dict_.tableOptions().autoRuleSet().empty()) {
529  nbest = 5;
530  }
531  if (d->decoder_.decode(d->lattice_, d->graph_, nbest, state, max, min,
532  beamSize, frameSize)) {
533  t1 = std::chrono::high_resolution_clock::now();
534  LIBIME_TABLE_DEBUG()
535  << "Decode: "
536  << std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0)
537  .count();
538  t0 = t1;
539  std::unordered_map<std::string, size_t> dup;
540 
541  auto insertCandidate = [d, &dup](SentenceResult sentence) {
542  auto sentenceString = sentence.toString();
543  auto iter = dup.find(sentenceString);
544  if (iter != dup.end()) {
545  auto idx = iter->second;
546  if (shouldReplaceCandidate(
547  d->candidates_[idx], sentence,
548  d->dict_.tableOptions().orderPolicy())) {
549  d->candidates_[idx] = std::move(sentence);
550  }
551  } else {
552  d->candidates_.emplace_back(std::move(sentence));
553  dup[sentenceString] = d->candidates_.size() - 1;
554  }
555  };
556 
557  auto &graph = d->graph_;
558  const SegmentGraphNode *bos = &graph.start();
559  const SegmentGraphNode *eos = &graph.end();
560  constexpr float pinyinPenalty = -0.5;
561  for (const auto &latticeNode : d->lattice_.nodes(eos)) {
562  if (latticeNode.from() == bos && latticeNode.to() == eos) {
563  auto sentence = latticeNode.toSentenceResult();
564  if (TableContext::isPinyin(sentence)) {
565  sentence.adjustScore(pinyinPenalty);
566  }
567  insertCandidate(std::move(sentence));
568  }
569  }
570 
571  float min = 0;
572  for (const auto &cand : d->candidates_) {
573  min = std::min(min, cand.score());
574  }
575 
576  // FIXME: add an option.
577  const float minDistance = TABLE_DEFAULT_MIN_DISTANCE;
578  for (size_t i = 0, e = d->lattice_.sentenceSize(); i < e; i++) {
579  auto sentence = d->lattice_.sentence(i);
580  if (TableContext::isPinyin(sentence)) {
581  sentence.adjustScore(pinyinPenalty);
582  }
583  auto score = sentence.score();
584  if (!sentence.sentence().empty()) {
585  score = sentence.sentence().back()->score();
586  }
587  // Check the limit, or if there's no candidate.
588  if (min - score < minDistance || candidates().empty()) {
589  insertCandidate(std::move(sentence));
590  }
591  }
592  t1 = std::chrono::high_resolution_clock::now();
593  LIBIME_TABLE_DEBUG()
594  << "Insert candidate: "
595  << std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0)
596  .count();
597  t0 = t1;
598  int noSortLength =
599  lastSegLength < d->dict_.tableOptions().noSortInputLength()
600  ? lastSegLength
601  : d->dict_.tableOptions().noSortInputLength();
602 
603  switch (d->dict_.tableOptions().orderPolicy()) {
604  case OrderPolicy::No:
605  std::sort(
606  d->candidates_.begin(), d->candidates_.end(),
607  TableCandidateCompare<OrderPolicy::No>(
608  noSortLength, d->dict_.tableOptions().sortByCodeLength()));
609  break;
610  case OrderPolicy::Fast:
611  std::sort(
612  d->candidates_.begin(), d->candidates_.end(),
613  TableCandidateCompare<OrderPolicy::Fast>(
614  noSortLength, d->dict_.tableOptions().sortByCodeLength()));
615  break;
616  case OrderPolicy::Freq:
617  std::sort(
618  d->candidates_.begin(), d->candidates_.end(),
619  TableCandidateCompare<OrderPolicy::Freq>(
620  noSortLength, d->dict_.tableOptions().sortByCodeLength()));
621  break;
622  }
623  if (!d->candidates_.empty() && isPinyin(d->candidates_[0])) {
624  auto iter =
625  std::find_if(d->candidates_.begin(), d->candidates_.end(),
626  [](const auto &cand) {
627  return !isAuto(cand) && !isPinyin(cand);
628  });
629  // Make sure first is non pinyin/auto candidate.
630  if (iter != d->candidates_.end()) {
631  std::rotate(d->candidates_.begin(), iter, std::next(iter));
632  }
633  }
634 
635  t1 = std::chrono::high_resolution_clock::now();
636  LIBIME_TABLE_DEBUG()
637  << "Sort: "
638  << std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0)
639  .count();
640  LIBIME_TABLE_DEBUG() << "Number: " << d->candidates_.size();
641  };
642  // Run auto select for the second pass.
643  // if number of candidate is 1, do auto select.
644  if (d->dict_.tableOptions().autoSelect()) {
645  if (d->hasOnlyOneAutoselectChoice() &&
646  lastSegLength <= d->dict_.maxLength() && d->checkAutoSelect()) {
647  autoSelect();
648  }
649  }
650 }
651 
652 TableContext::CandidateRange TableContext::candidates() const {
653  FCITX_D();
654  return d->candidates_;
655 }
656 
657 size_t TableContext::selectedLength() const {
658  FCITX_D();
659  return d->selectedLength();
660 }
661 
662 std::string TableContext::selectedSentence() const {
663  FCITX_D();
664  std::string ss;
665  for (const auto &s : d->selected_) {
666  for (const auto &item : s) {
667  if (item.commit_) {
668  ss += item.word_.word();
669  }
670  }
671  }
672  return ss;
673 }
674 
675 const std::string &TableContext::currentCode() const {
676  FCITX_D();
677  return d->graph_.data();
678 }
679 
680 bool TableContext::selected() const {
681  FCITX_D();
682  if (userInput().empty() || d->selected_.empty()) {
683  return false;
684  }
685  return d->selected_.back().back().offset_ == userInput().size();
686 }
687 
688 size_t TableContext::selectedSize() const {
689  FCITX_D();
690  return d->selected_.size();
691 }
692 
693 std::tuple<std::string, bool> TableContext::selectedSegment(size_t idx) const {
694  FCITX_D();
695  std::string result;
696  bool commit = true;
697  for (const auto &item : d->selected_[idx]) {
698  if (!item.commit_) {
699  commit = false;
700  }
701  result += item.word_.word();
702  }
703  return {std::move(result), commit};
704 }
705 
706 std::string TableContext::selectedCode(size_t idx) const {
707  FCITX_D();
708  std::string result;
709  for (const auto &item : d->selected_[idx]) {
710  result += item.code_;
711  }
712  return result;
713 }
714 
715 size_t TableContext::selectedSegmentLength(size_t idx) const {
716  FCITX_D();
717  size_t prev = 0;
718  if (idx > 0) {
719  prev = d->selected_[idx - 1].back().offset_;
720  }
721  return d->selected_[idx].back().offset_ - prev;
722 }
723 
724 std::string TableContext::preedit() const {
725  std::string result;
726  for (size_t i = 0, e = selectedSize(); i < e; i++) {
727  auto seg = selectedSegment(i);
728  if (std::get<bool>(seg)) {
729  result += std::get<std::string>(seg);
730  } else {
731  result += "(";
732  result += std::get<std::string>(seg);
733  result += ")";
734  }
735  }
736  result += currentCode();
737  return result;
738 }
739 
741  FCITX_D();
742  if (!d->dict_.tableOptions().learning()) {
743  return;
744  }
745 
746  if (d->selected_.empty()) {
747  return;
748  }
749 
750  for (auto &s : d->selected_) {
751  if (!d->learnWord(s)) {
752  return;
753  }
754  }
755  std::vector<libime::HistoryBigram::WordWithCode> newSentence;
756  for (auto &s : d->selected_) {
757  if (s.empty()) {
758  continue;
759  }
760  if (std::ranges::any_of(
761  s, [](const auto &item) { return !item.commit_; })) {
762  continue;
763  }
764  std::string word;
765  std::string code;
766  if (s.size() == 1) {
767  word = s[0].word_.word();
768  code = s[0].code_;
769  } else {
770  for (auto &item : s) {
771  word += item.word_.word();
772  }
773  if (!d->dict_.generate(word, code)) {
774  return;
775  }
776  }
777  if (!word.empty()) {
778  newSentence.emplace_back(std::move(word), std::move(code));
779  }
780  }
781  if (!newSentence.empty()) {
782  d->model_.history().addWithCode(newSentence);
783  }
784 }
785 
787  FCITX_D();
788  if (!d->dict_.tableOptions().learning() || d->selected_.empty()) {
789  return;
790  }
791 
792  if (!d->learnWord(d->selected_.back())) {
793  return;
794  }
795 
796  std::vector<libime::HistoryBigram::WordWithCode> newSentence;
797  const auto &s = d->selected_.back();
798  if (std::ranges::any_of(s,
799  [](const auto &item) { return !item.commit_; })) {
800  return;
801  }
802  std::string word;
803  std::string code;
804  if (s.size() == 1) {
805  word = s[0].word_.word();
806  code = s[0].code_;
807  } else {
808  for (const auto &item : s) {
809  word += item.word_.word();
810  }
811  if (!d->dict_.generate(word, code)) {
812  return;
813  }
814  }
815  if (!word.empty()) {
816  newSentence.emplace_back(std::move(word), std::move(code));
817  }
818  if (!newSentence.empty()) {
819  d->model_.history().addWithCode(newSentence);
820  }
821 }
822 
823 void TableContext::learnAutoPhrase(std::string_view history) {
824  learnAutoPhrase(history, {});
825 }
826 
827 void TableContext::learnAutoPhrase(std::string_view history,
828  const std::vector<std::string> &hints) {
829  FCITX_D();
830  if (!d->dict_.tableOptions().learning() ||
831  !fcitx::utf8::validate(history) ||
832  d->dict_.tableOptions().autoPhraseLength() <= 1) {
833  return;
834  }
835 
836  auto range = fcitx::utf8::MakeUTF8CharRange(history);
837  std::string code;
838 
839  std::vector<std::string> currentHints;
840  size_t i = 0;
841  for (auto iter = std::begin(range); iter != std::end(range); iter++, i++) {
842  auto charBegin = iter.charRange();
843  auto length = fcitx::utf8::length(charBegin.first, history.end());
844  if (length < 2 ||
845  length > static_cast<size_t>(
846  d->dict_.tableOptions().autoPhraseLength())) {
847  continue;
848  }
849  // Make a substring from current char.
850  auto word =
851  history.substr(std::distance(history.begin(), charBegin.first));
852  auto begin = hints.end();
853  if (hints.size() > i) {
854  begin = std::next(hints.begin(), i);
855  }
856  currentHints.assign(begin, hints.end());
857  if (!d->dict_.generateWithHint(word, currentHints, code)) {
858  continue;
859  }
860  auto wordFlag = d->dict_.wordExists(code, word);
861  if (wordFlag == PhraseFlag::None || wordFlag == PhraseFlag::User) {
862  continue;
863  }
864  auto insertResult = d->dict_.insert(code, word, PhraseFlag::Auto);
865  LIBIME_TABLE_DEBUG() << "learnAutoPhrase " << word << " " << code
866  << " AutoPhraseLength: "
867  << d->dict_.tableOptions().autoPhraseLength()
868  << " success: " << insertResult;
869  }
870 }
871 
872 std::string TableContext::candidateHint(size_t idx, bool custom) const {
873  FCITX_D();
874  if (d->candidates_[idx].sentence().size() == 1) {
875  const auto *p = d->candidates_[idx].sentence()[0];
876  if (!p->word().empty()) {
877  const auto *node = static_cast<const TableLatticeNode *>(p);
878  if (node->flag() == PhraseFlag::Pinyin) {
879  if (fcitx::utf8::length(p->word()) == 1) {
880  auto code = d->dict_.reverseLookup(node->word());
881  if (custom) {
882  return d->dict_.hint(code);
883  }
884  return code;
885  }
886  } else {
887  std::string_view code = node->code();
888  auto matchingKey = d->dict_.tableOptions().matchingKey();
889  // If we're not using matching key remove the prefix.
890  // Otherwise show the full code.
891  if (!matchingKey || (currentCode().find(fcitx::utf8::UCS4ToUTF8(
892  matchingKey)) == std::string::npos)) {
893  code.remove_prefix(currentCode().size());
894  }
895  if (custom) {
896  return d->dict_.hint(code);
897  }
898  return std::string{code};
899  }
900  }
901  }
902  return {};
903 }
904 
905 std::string TableContext::code(const SentenceResult &sentence) {
906  if (sentence.size() == 1) {
907  const auto *node =
908  static_cast<const TableLatticeNode *>(sentence.sentence()[0]);
909  return node->code();
910  }
911  return "";
912 }
913 
914 PhraseFlag TableContext::flag(const SentenceResult &sentence) {
915  if (sentence.size() == 1) {
916  const auto *node =
917  static_cast<const TableLatticeNode *>(sentence.sentence()[0]);
918  return node->flag();
919  }
920  return PhraseFlag::Auto;
921 }
922 
923 bool TableContext::isPinyin(const SentenceResult &sentence) {
924  return sentence.size() == 1 && flag(sentence) == PhraseFlag::Pinyin;
925 }
926 
927 bool TableContext::isAuto(const SentenceResult &sentence) {
928  return sentence.size() != 1 || flag(sentence) == PhraseFlag::Auto;
929 }
930 
931 } // namespace libime
Class provide input method support for table-based ones, like wubi.
const std::string & currentCode() const
Current unselected code.
void learn()
Save the current selected text.
void setAutoSelectIndex(size_t index)
Set the auto select index, usually, this is the candidate cursor index.
Input context for table input method.
Definition: tablecontext.h:37
void learnLast()
Save the last selected text.
std::string selectedSentence() const
The concatenation of all selectedSegment where bool == true.
void learnAutoPhrase(std::string_view history)
Learn auto word from string.
std::string preedit() const
A simple preedit implementation.