6 #include "pinyincontext.h" 18 #include <string_view> 20 #include <unordered_map> 21 #include <unordered_set> 24 #include <boost/container_hash/hash.hpp> 25 #include <fcitx-utils/charutils.h> 26 #include <fcitx-utils/inputbuffer.h> 27 #include <fcitx-utils/keysym.h> 28 #include <fcitx-utils/macros.h> 29 #include <fcitx-utils/signals.h> 30 #include <fcitx-utils/stringutils.h> 31 #include <fcitx-utils/utf8.h> 32 #include "libime/core/historybigram.h" 33 #include "libime/core/inputbuffer.h" 34 #include "libime/core/languagemodel.h" 35 #include "libime/core/lattice.h" 36 #include "libime/core/segmentgraph.h" 37 #include "libime/core/userlanguagemodel.h" 38 #include "libime/pinyin/constants.h" 39 #include "pinyindecoder.h" 40 #include "pinyindecoder_p.h" 41 #include "pinyinencoder.h" 42 #include "pinyinime.h" 43 #include "pinyinmatchstate.h" 48 enum class LearnWordResult {
54 enum class SelectedPinyinType {
60 struct SelectedPinyin {
61 SelectedPinyin(
size_t s, PinyinWordNode word, SelectedPinyinType type)
62 : offset_(s), word_(std::move(word)), type_(type) {}
64 const std::string &encodedPinyin()
const {
return word_.encodedPinyin(); }
68 SelectedPinyinType type_;
71 struct CandidateDedupKey {
75 bool operator==(
const CandidateDedupKey &other)
const {
76 return text_ == other.text_ && end_ == other.end_;
80 struct CandidateDedupKeyHash {
81 size_t operator()(
const CandidateDedupKey &key)
const {
82 size_t seed = std::hash<std::string>()(key.text_);
83 boost::hash_combine(seed, key.end_);
88 CandidateDedupKey candidateDedupKey(
const SentenceResult &candidate) {
89 return {.text_ = candidate.toString(),
90 .end_ = candidate.sentence().empty()
92 : candidate.sentence().back()->to()->index()};
100 : QPtrHolder(q), ime_(ime), matchState_(q) {}
102 std::vector<std::vector<SelectedPinyin>> selected_;
105 int maxSentenceLength_ = -1;
110 std::vector<SentenceResult> candidates_;
111 std::unordered_set<std::string> candidatesSet_;
112 mutable bool candidatesToCursorNeedUpdate_ =
true;
113 mutable std::vector<SentenceResult> candidatesToCursor_;
114 mutable std::unordered_set<std::string> candidatesToCursorSet_;
115 std::vector<fcitx::ScopedConnection> conn_;
116 std::list<PinyinWordNode> contextWords_;
118 size_t alignCursorToNextSegment()
const {
120 auto currentCursor = q->cursor();
122 if (currentCursor < start) {
125 while (segs_.nodes(currentCursor - start).empty() &&
126 currentCursor < q->size()) {
129 return currentCursor;
132 bool needCandidatesToCursor()
const {
138 return alignCursorToNextSegment() != q->size();
141 void clearCandidates() {
143 candidatesToCursor_.clear();
144 candidatesToCursorNeedUpdate_ =
false;
145 candidatesSet_.clear();
146 candidatesToCursorSet_.clear();
149 void updateCandidatesToCursor()
const {
151 if (!candidatesToCursorNeedUpdate_) {
154 candidatesToCursorNeedUpdate_ =
false;
155 candidatesToCursor_.clear();
156 candidatesToCursorSet_.clear();
158 std::unordered_map<CandidateDedupKey, size_t, CandidateDedupKeyHash>
160 auto insertCandidate = [
this, &duplicateCandidates](
162 auto key = candidateDedupKey(candidate);
163 auto iter = duplicateCandidates.find(key);
164 if (iter != duplicateCandidates.end()) {
165 auto &oldCandidate = candidatesToCursor_[iter->second];
166 if (candidate.score() > oldCandidate.score()) {
167 oldCandidate = std::move(candidate);
172 candidatesToCursor_.push_back(std::move(candidate));
173 duplicateCandidates.emplace(key, candidatesToCursor_.size() - 1);
174 candidatesToCursorSet_.insert(std::move(key.text_));
178 auto currentCursor = alignCursorToNextSegment();
180 auto nodeRange = lattice_.nodes(&segs_.node(currentCursor - start));
181 if (!nodeRange.empty()) {
182 insertCandidate(nodeRange.front().toSentenceResult());
184 for (
const auto &candidate : candidates_) {
185 const auto &sentence = candidate.sentence();
186 if (sentence.size() == 1) {
187 if (sentence.back()->to()->index() + start > currentCursor) {
190 insertCandidate(candidate);
191 }
else if (sentence.size() > 1) {
192 auto newSentence = sentence;
193 while (!newSentence.empty() &&
194 newSentence.back()->to()->index() + start >
196 newSentence.pop_back();
198 if (!newSentence.empty()) {
200 newSentence.back()->score());
201 insertCandidate(std::move(partial));
207 template <
typename FillSentence>
208 void selectHelper(
const FillSentence &fillSentence) {
210 selected_.emplace_back();
212 auto &selection = selected_.back();
213 fillSentence(selection);
217 if (!remain.empty()) {
218 if (std::all_of(remain.begin(), remain.end(),
219 [](
char c) {
return c ==
'\''; })) {
220 selection.emplace_back(q->size(), PinyinWordNode({}, 0),
221 SelectedPinyinType::Separator);
231 selectHelper([offset, &sentence,
232 this](std::vector<SelectedPinyin> &selection) {
233 for (
const auto &p : sentence.sentence()) {
234 selection.emplace_back(
235 offset + p->to()->index(),
238 ime_->model()->index(p->word())},
240 SelectedPinyinType::Normal);
245 void selectCustom(
size_t inputLength, std::string_view segment,
246 std::string_view encodedPinyin) {
249 selectHelper([
this, offset, &segment, inputLength,
250 &encodedPinyin](std::vector<SelectedPinyin> &selection) {
251 auto index = ime_->model()->index(segment);
252 selection.emplace_back(
253 offset + inputLength,
254 PinyinWordNode{{segment, encodedPinyin}, index},
255 SelectedPinyinType::Custom);
259 std::tuple<LearnWordResult, std::string> learnWord() {
262 if (selected_.empty()) {
263 return {LearnWordResult::Ignored,
""};
266 if (selected_.size() == 1 && selected_[0].size() == 1) {
267 return {LearnWordResult::Ignored,
""};
271 bool hasCustom =
false;
272 size_t totalPinyinLength = 0;
273 bool isAllSingleWord =
true;
274 for (
auto &s : selected_) {
277 (s.empty() || (s.size() == 1 &&
278 (s[0].type_ == SelectedPinyinType::Separator ||
279 s[0].encodedPinyin().size() == 2)));
280 for (
auto &item : s) {
281 if (item.type_ == SelectedPinyinType::Separator) {
284 if (item.type_ == SelectedPinyinType::Custom) {
288 if (item.encodedPinyin().empty() ||
289 item.encodedPinyin().size() % 2 != 0) {
290 return {LearnWordResult::Ignored,
""};
292 totalPinyinLength += item.encodedPinyin().size() / 2;
298 if ((!isAllSingleWord && totalPinyinLength > 4)) {
299 return {LearnWordResult::Ignored,
""};
302 return {LearnWordResult::Ignored,
""};
306 for (
auto &s : selected_) {
307 for (
auto &item : s) {
308 if (item.type_ == SelectedPinyinType::Separator) {
311 assert(!item.encodedPinyin().empty());
312 assert(item.encodedPinyin().size() % 2 == 0);
313 ss += item.word_.word();
314 if (!pinyin.empty()) {
315 pinyin.push_back(
'\'');
317 pinyin += PinyinEncoder::decodeFullPinyin(item.encodedPinyin());
321 if (
auto opt = ime_->dict()->lookupWord(PinyinDictionary::UserDict,
323 return {LearnWordResult::Ignored,
""};
326 ime_->dict()->addWord(PinyinDictionary::UserDict, pinyin, ss,
330 pinyin, PinyinFuzzyFlag::VE_UE);
332 return {hasCustom ? LearnWordResult::Custom : LearnWordResult::Normal,
333 std::string(encodedPinyin.data(), encodedPinyin.size())};
337 void matchPinyinCase(std::string_view ref, std::string &actualPinyin) {
338 if (ref.size() != fcitx::utf8::length(actualPinyin)) {
342 auto iter = fcitx::utf8::MakeUTF8CharIterator(actualPinyin.begin(),
344 for (
size_t i = 0; i < ref.size(); ++i, ++iter) {
345 if (fcitx::charutils::isupper(ref[i])) {
346 auto charRange = iter.charRange();
347 if (iter.charLength() == 1 &&
348 fcitx::charutils::islower(iter.view()[0])) {
349 *charRange.first = fcitx::charutils::toupper(*charRange.first);
350 }
else if (*iter == 0x00fc) {
351 *charRange.first = 0xc3;
352 *std::next(charRange.first) = 0x9c;
358 PinyinContext::PinyinContext(
PinyinIME *ime)
359 :
InputBuffer(fcitx::InputBufferOption::AsciiOnly),
360 d_ptr(std::make_unique<PinyinContextPrivate>(
this, ime)) {
362 d->conn_.emplace_back(
363 ime->connect<PinyinIME::optionChanged>([
this]() { clear(); }));
364 d->conn_.emplace_back(
365 ime->dict()->connect<PinyinDictionary::dictionaryChanged>(
368 d->matchState_.clear();
372 PinyinContext::~PinyinContext() {}
374 void PinyinContext::setUseShuangpin(
bool sp) {
377 d->matchState_.clear();
380 bool PinyinContext::useShuangpin()
const {
385 void PinyinContext::setMaxSentenceLength(
int length) {
387 d->maxSentenceLength_ = length;
388 d->matchState_.clear();
391 int PinyinContext::maxSentenceLength()
const {
393 return d->maxSentenceLength_;
396 bool PinyinContext::typeImpl(
const char *s,
size_t length) {
398 if (d->maxSentenceLength_ > 0 && !d->candidates_.empty()) {
400 for (
const auto &s : d->candidates_[0].sentence()) {
402 auto segLength = s->path().size();
404 std::max(
static_cast<decltype(segLength)
>(1), segLength) - 1;
406 if (size > d->maxSentenceLength_) {
410 auto changed = cancelTill(cursor());
411 changed = InputBuffer::typeImpl(s, length) || changed;
418 void PinyinContext::erase(
size_t from,
size_t to) {
424 if (from == 0 && to >= size()) {
426 d->clearCandidates();
427 d->selected_.clear();
429 d->matchState_.clear();
434 InputBuffer::erase(from, to);
441 void PinyinContext::setCursor(
size_t pos) {
443 auto oldCursor = cursor();
444 auto cancelled = cancelTill(pos);
445 InputBuffer::setCursor(pos);
449 if (cursor() != oldCursor) {
450 d->candidatesToCursorNeedUpdate_ =
true;
457 auto len = selectedLength();
463 if (!d->candidates_.empty()) {
464 for (
const auto &s : d->candidates_[0].sentence()) {
465 for (
auto iter = s->path().begin(),
466 end = std::prev(s->path().end());
467 iter < end; iter++) {
468 auto from = (*iter)->index();
469 auto to = (*std::next(iter))->index();
481 auto len = selectedLength();
487 if (!d->candidates_.empty()) {
488 for (
const auto &s : d->candidates_[0].sentence()) {
489 for (
auto iter = s->path().begin(),
490 end = std::prev(s->path().end());
491 iter < end; iter++) {
492 auto to = (*std::next(iter))->index();
502 const std::vector<SentenceResult> &PinyinContext::candidates()
const {
504 return d->candidates_;
509 return d->candidatesSet_;
512 const std::vector<SentenceResult> &PinyinContext::candidatesToCursor()
const {
514 if (!d->needCandidatesToCursor()) {
515 return d->candidates_;
517 d->updateCandidatesToCursor();
518 return d->candidatesToCursor_;
521 const std::unordered_set<std::string> &
524 if (!d->needCandidatesToCursor()) {
525 return d->candidatesSet_;
527 d->updateCandidatesToCursor();
528 return d->candidatesToCursorSet_;
531 void PinyinContext::select(
size_t idx) {
533 const auto &candidates = this->candidates();
534 assert(idx < candidates.size());
535 d->select(candidates[idx]);
538 void PinyinContext::selectCandidatesToCursor(
size_t idx) {
540 const auto &candidates = this->candidatesToCursor();
541 assert(idx < candidates.size());
542 d->select(candidates[idx]);
546 std::string_view encodedPinyin) {
548 if (inputLength == 0 && selectedLength() + inputLength > size()) {
549 throw std::out_of_range(
"Invalid input length");
551 if (encodedPinyin.size() % 2 != 0) {
552 throw std::invalid_argument(
"Invalid encoded pinyin");
554 d->selectCustom(inputLength, segment, encodedPinyin);
557 bool PinyinContext::cancelTill(
size_t pos) {
558 bool cancelled =
false;
559 while (selectedLength() > pos) {
566 void PinyinContext::cancel() {
568 if (!d->selected_.empty()) {
569 d->selected_.pop_back();
575 d->matchState_.clear();
583 auto *model = d->ime_->model();
584 State state = model->nullState();
585 for (
const auto &word : d->contextWords_) {
587 model->score(state, word, temp);
588 state = std::move(temp);
590 for (
const auto &s : d->selected_) {
591 for (
const auto &item : s) {
592 if (item.word_.word().empty()) {
596 model->score(state, item.word_, temp);
597 state = std::move(temp);
603 void PinyinContext::update() {
611 d->clearCandidates();
614 State state = this->state();
615 if (!d->selected_.empty()) {
616 start = d->selected_.back().back().offset_;
619 if (
auto spProfile = d->matchState_.shuangpinProfile()) {
620 newGraph = PinyinEncoder::parseUserShuangpin(
621 userInput().substr(start), *spProfile, d->ime_->fuzzyFlags());
623 newGraph = PinyinEncoder::parseUserPinyin(
624 userInput().substr(start), d->ime_->correctionProfile().get(),
625 d->ime_->fuzzyFlags());
629 [d](
const std::unordered_set<const SegmentGraphNode *> &nodes) {
630 d->lattice_.discardNode(nodes);
631 d->matchState_.discardNode(nodes);
633 auto &graph = d->segs_;
635 d->ime_->decoder()->decode(d->lattice_, d->segs_, d->ime_->nbest(),
636 state, d->ime_->maxDistance(),
637 d->ime_->minPath(), d->ime_->beamSize(),
638 d->ime_->frameSize(), &d->matchState_);
640 d->clearCandidates();
643 for (
size_t i = 0, e = d->lattice_.sentenceSize(); i < e; i++) {
644 d->candidates_.push_back(d->lattice_.sentence(i));
647 const auto *bos = &graph.start();
649 auto beginSize = d->candidates_.size();
650 for (
size_t i = graph.size(); i > 0; i--) {
652 float max = -std::numeric_limits<float>::max();
653 auto distancePenalty = d->ime_->model()->unknownPenalty() /
654 PINYIN_DISTANCE_PENALTY_FACTOR;
659 for (
const auto &graphNode : graph.nodes(i)) {
660 auto distance = graph.distanceToEnd(graphNode);
661 auto adjust =
static_cast<float>(distance) * distancePenalty;
662 for (
const auto &latticeNode : d->lattice_.nodes(&graphNode)) {
663 if (latticeNode.from() == bos &&
666 if (!d->ime_->model()->isNodeUnknown(latticeNode)) {
667 min = std::min(latticeNode.score(), min);
668 max = std::max(latticeNode.score(), max);
670 d->candidates_.push_back(
671 latticeNode.toSentenceResult(adjust));
677 for (
const auto &graphNode : graph.nodes(i)) {
678 auto distance = graph.distanceToEnd(graphNode);
679 auto adjust =
static_cast<float>(distance) * distancePenalty;
680 for (
const auto &latticeNode : d->lattice_.nodes(&graphNode)) {
681 if (latticeNode.from() == bos &&
684 if ((latticeNode.score() > min &&
685 latticeNode.score() + d->ime_->maxDistance() >
687 static_cast<const PinyinLatticeNode &>(latticeNode)
690 d->candidates_.push_back(
691 latticeNode.toSentenceResult(adjust));
698 for (
const auto &graphNode : graph.nodes(i)) {
699 auto distance = graph.distanceToEnd(graphNode);
700 auto adjust =
static_cast<float>(distance) * distancePenalty;
701 for (
const auto &latticeNode : d->lattice_.nodes(&graphNode)) {
702 if (latticeNode.from() != bos &&
703 latticeNode.score() > min &&
704 latticeNode.score() + d->ime_->maxDistance() > max &&
706 .anyCorrectionOnPath()) {
707 d->candidates_.push_back(
708 latticeNode.toSentenceResult(adjust));
713 std::sort(d->candidates_.begin() + beginSize, d->candidates_.end(),
718 const auto limit = d->ime_->wordCandidateLimit();
719 std::unordered_set<CandidateDedupKey, CandidateDedupKeyHash>
721 auto &candidatesSet = d->candidatesSet_;
722 candidatesSet.clear();
723 std::erase_if(d->candidates_,
724 [&candidatesSet, &duplicateCandidates, &index, &count,
726 bool beforeBeginSize = index++ < beginSize;
727 auto key = candidateDedupKey(candidate);
728 if (duplicateCandidates.contains(key)) {
732 if (!beforeBeginSize && limit) {
733 const bool isSinglePinyinWord =
734 candidate.sentence().size() == 1 &&
740 if (!isSinglePinyinWord) {
741 if (count >= limit) {
748 candidatesSet.insert(key.text_);
749 duplicateCandidates.insert(std::move(key));
754 d->candidatesToCursorNeedUpdate_ =
true;
757 if (cursor() < selectedLength()) {
758 setCursor(selectedLength());
764 if (userInput().empty()) {
768 if (!d->selected_.empty()) {
769 if (d->selected_.back().back().offset_ == size()) {
780 for (
const auto &s : d->selected_) {
781 for (
const auto &item : s) {
782 ss += item.word_.word();
790 if (!d->selected_.empty()) {
791 return d->selected_.back().back().offset_;
796 std::string PinyinContext::preedit()
const {
797 return preedit(ime()->preeditMode());
801 return preeditWithCursor(ime()->preeditMode());
804 std::string PinyinContext::preedit(PinyinPreeditMode mode)
const {
805 return preeditWithCursor(mode).first;
808 std::pair<std::string, size_t>
811 std::string ss = selectedSentence();
812 const auto len = selectedLength();
814 size_t actualCursor = ss.size();
816 c = std::max(c, len);
818 auto resultSize = ss.size();
820 if (!d->candidates_.empty()) {
822 for (
const auto &node : d->candidates_[0].sentence()) {
823 for (
auto iter = node->path().begin(),
824 end = std::prev(node->path().end());
825 iter < end; iter++) {
832 auto from = (*iter)->index();
833 auto to = (*std::next(iter))->index();
834 size_t cursorInPinyin = c - from - len;
835 const size_t startPivot = resultSize;
836 auto pinyin = d->segs_.segment(from, to);
837 MatchedPinyinSyllables syls;
838 if (mode == PinyinPreeditMode::Pinyin) {
843 syls = useShuangpin()
844 ? PinyinEncoder::shuangpinToSyllables(
845 pinyin, *ime()->shuangpinProfile(),
846 PinyinFuzzyFlag::None)
847 : PinyinEncoder::stringToSyllables(
848 pinyin, PinyinFuzzyFlag::None);
850 std::string actualPinyin;
851 if (!syls.empty() && !syls.front().second.empty()) {
852 std::string_view candidatePinyin =
854 auto nthPinyin = std::distance(node->path().begin(), iter);
855 PinyinInitial bestInitial = syls[0].first;
856 PinyinFinal bestFinal = syls[0].second[0].first;
860 if (static_cast<size_t>((nthPinyin * 2) + 2) <=
861 candidatePinyin.size()) {
862 auto candidateInitial =
static_cast<PinyinInitial
>(
863 candidatePinyin[nthPinyin * 2]);
864 auto candidateFinal =
static_cast<PinyinFinal
>(
865 candidatePinyin[(nthPinyin * 2) + 1]);
868 for (
const auto &initial : syls) {
869 for (
const auto &[
final, fuzzy] : initial.second) {
873 if (candidateInitial == initial.first &&
874 (
final == PinyinFinal::Invalid ||
875 candidateFinal ==
final)) {
876 bestInitial = initial.first;
877 if (
final != PinyinFinal::Invalid) {
890 actualPinyin = PinyinEncoder::initialFinalToPinyinString(
891 bestInitial, bestFinal);
892 if (!useShuangpin()) {
893 matchPinyinCase(pinyin, actualPinyin);
896 if (!actualPinyin.empty()) {
897 if (c > from + len && c <= to + len) {
898 if (useShuangpin()) {
899 switch (cursorInPinyin) {
903 if (pinyin.size() == 2 &&
904 syls[0].first == PinyinInitial::Zero) {
905 actualPinyin = fcitx::stringutils::concat(
909 if (syls[0].first != PinyinInitial::Zero) {
911 PinyinEncoder::initialToString(
917 cursorInPinyin = actualPinyin.size();
922 std::min(actualPinyin.size(), cursorInPinyin);
923 cursorInPinyin = fcitx::utf8::ncharByteLength(
924 actualPinyin.begin(), cursorInPinyin);
927 ss.append(actualPinyin);
928 resultSize += actualPinyin.size();
930 ss.append(pinyin.data(), pinyin.size());
931 resultSize += pinyin.size();
933 if (c > from + len && c <= to + len) {
934 actualCursor = startPivot + cursorInPinyin;
940 actualCursor = resultSize;
942 return {ss, actualCursor};
947 std::vector<std::string> newSentence;
948 for (
const auto &s : d->selected_) {
949 for (
const auto &item : s) {
950 if (item.type_ != SelectedPinyinType::Separator) {
951 newSentence.push_back(item.word_.word());
958 std::vector<HistoryBigram::WordWithCode>
961 std::vector<HistoryBigram::WordWithCode> newSentence;
962 for (
const auto &s : d->selected_) {
963 for (
const auto &item : s) {
964 if (item.type_ != SelectedPinyinType::Separator) {
965 newSentence.emplace_back(item.word_.word(),
966 item.encodedPinyin());
976 for (
const auto &s : d->selected_) {
977 for (
const auto &item : s) {
978 if (!item.encodedPinyin().empty()) {
979 if (!pinyin.empty()) {
980 pinyin.push_back(
'\'');
982 pinyin += PinyinEncoder::decodeFullPinyin(item.encodedPinyin());
991 return candidateFullPinyin(d->candidates_[idx]);
997 for (
const auto &node : candidate.sentence()) {
999 if (!pinyin.empty()) {
1000 pinyin.push_back(
'\'');
1002 pinyin += PinyinEncoder::decodeFullPinyin(
1015 std::vector<HistoryBigram::WordWithCode> newSentence;
1016 if (
auto [result, encodedWordPinyin] = d->learnWord();
1017 result != LearnWordResult::Ignored) {
1019 if (result == LearnWordResult::Normal) {
1021 newSentence.push_back({sentence(), encodedWordPinyin});
1026 newSentence = selectedWordsWithPinyin();
1029 if (std::ranges::any_of(newSentence, [](
const auto &word) {
1030 return word.second.empty();
1036 auto context = contextWordsWithPinyin();
1037 d->ime_->model()->history().addWithContext(contextWordsWithPinyin(),
1038 std::move(newSentence));
1042 const std::vector<std::string> &contextWords) {
1044 d->contextWords_.clear();
1045 appendContextWords(contextWords);
1050 d->contextWords_.clear();
1054 const std::vector<std::string> &contextWords) {
1057 size_t needed = LanguageModel::maxOrder() - 1;
1059 for (
const auto &word :
1060 std::span{contextWords}.last(std::min(contextWords.size(), needed))) {
1061 d->contextWords_.push_back(
1062 PinyinWordNode({word,
""}, d->ime_->model()->index(word)));
1064 while (d->contextWords_.size() > needed) {
1065 d->contextWords_.pop_front();
1071 std::vector<std::string> words;
1072 words.reserve(d->contextWords_.size());
1073 for (
const auto &word : d->contextWords_) {
1074 words.push_back(word.word());
1080 const std::vector<HistoryBigram::WordWithCode> &contextWordsWithPinyin) {
1082 d->contextWords_.clear();
1083 appendContextWordsWithPinyin(contextWordsWithPinyin);
1087 const std::vector<HistoryBigram::WordWithCode> &contextWordsWithPinyin) {
1090 size_t needed = LanguageModel::maxOrder() - 1;
1092 for (
const auto &word : std::span{contextWordsWithPinyin}.last(
1093 std::min(contextWordsWithPinyin.size(), needed))) {
1094 d->contextWords_.push_back(
1095 PinyinWordNode(word, d->ime_->model()->index(word.first)));
1097 while (d->contextWords_.size() > needed) {
1098 d->contextWords_.pop_front();
1102 std::vector<HistoryBigram::WordWithCode>
1105 std::vector<HistoryBigram::WordWithCode> words;
1106 words.reserve(d->contextWords_.size());
1107 for (
const auto &word : d->contextWords_) {
1108 words.push_back({word.word(), word.encodedPinyin()});
1113 bool PinyinContext::learnWord() {
return false; }
const std::unordered_set< std::string > & candidateSet() const
Return the set of candidates, useful for deduplication.
const std::unordered_set< std::string > & candidatesToCursorSet() const
Return the set of candidates to current cursor.
std::vector< HistoryBigram::WordWithCode > contextWordsWithPinyin() const
Get context words with pinyin for better prediction.
std::pair< std::string, size_t > preeditWithCursor() const
Mixed preedit (selected hanzi + pinyin).
State state() const
Opaque language model state.
void learn()
Add the selected part to history if selected() == true.
Provides shared data for PinyinContext.
size_t selectedLength() const
Selected pinyin length.
std::string candidateFullPinyin(size_t i) const
Get the full pinyin string of certain candidate.
void appendContextWordsWithPinyin(const std::vector< HistoryBigram::WordWithCode > &contextWordsWithPinyin)
Append context words with pinyin for better prediction.
static std::vector< char > encodeFullPinyinWithFlags(std::string_view pinyin, PinyinFuzzyFlags flags)
Encode a quote separated pinyin string.
bool selected() const
Whether the input is fully selected.
void appendContextWords(const std::vector< std::string > &contextWords)
Append context words for better prediction.
void selectCustom(size_t inputLength, std::string_view segment, std::string_view encodedPinyin="")
Create a custom selection.
void setContextWords(const std::vector< std::string > &contextWords)
Set context words for better prediction.
int pinyinAfterCursor() const
Return the position of last pinyin.
std::string selectedSentence() const
Selected hanzi.
std::vector< std::string > selectedWords() const
Selected hanzi segments.
void setContextWordsWithPinyin(const std::vector< HistoryBigram::WordWithCode > &contextWordsWithPinyin)
Set context words with pinyin for better prediction.
void clearContextWords()
Clear context words.
std::vector< std::string > contextWords() const
Get context words for better prediction.
std::string selectedFullPinyin() const
Get the full pinyin string of the selected part.
std::vector< HistoryBigram::WordWithCode > selectedWordsWithPinyin() const
Selected hanzi with encoded pinyin.
int pinyinBeforeCursor() const
Return the position of last pinyin.