6 #include "pinyinencoder.h" 13 #include <initializer_list> 19 #include <string_view> 21 #include <unordered_map> 24 #include <boost/bimap.hpp> 25 #include <boost/container/static_vector.hpp> 26 #include <fcitx-utils/charutils.h> 27 #include <fcitx-utils/log.h> 28 #include <fcitx-utils/stringutils.h> 29 #include "libime/core/segmentgraph.h" 30 #include "pinyincorrectionprofile.h" 31 #include "pinyindata.h" 32 #include "pinyindata_p.h" 33 #include "shuangpinprofile.h" 37 static const std::string emptyString;
39 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
40 PinyinFuzzyFlags fuzzy) {
41 log << fuzzy.toInteger();
45 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
46 PinyinInitial initial) {
47 log << PinyinEncoder::initialToString(initial);
51 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
53 log << PinyinEncoder::finalToString(
final);
57 fcitx::LogMessageBuilder &operator<<(fcitx::LogMessageBuilder &log,
59 log << syl.toString();
63 template <
typename L,
typename R>
65 makeBimap(std::initializer_list<
typename boost::bimap<L, R>::value_type> list) {
66 return boost::bimap<L, R>(list.begin(), list.end());
69 static const auto initialMap = makeBimap<PinyinInitial, std::string>({
70 {PinyinInitial::B,
"b"}, {PinyinInitial::P,
"p"},
71 {PinyinInitial::M,
"m"}, {PinyinInitial::F,
"f"},
72 {PinyinInitial::D,
"d"}, {PinyinInitial::T,
"t"},
73 {PinyinInitial::N,
"n"}, {PinyinInitial::L,
"l"},
74 {PinyinInitial::G,
"g"}, {PinyinInitial::K,
"k"},
75 {PinyinInitial::H,
"h"}, {PinyinInitial::J,
"j"},
76 {PinyinInitial::Q,
"q"}, {PinyinInitial::X,
"x"},
77 {PinyinInitial::ZH,
"zh"}, {PinyinInitial::CH,
"ch"},
78 {PinyinInitial::SH,
"sh"}, {PinyinInitial::R,
"r"},
79 {PinyinInitial::Z,
"z"}, {PinyinInitial::C,
"c"},
80 {PinyinInitial::S,
"s"}, {PinyinInitial::Y,
"y"},
81 {PinyinInitial::W,
"w"}, {PinyinInitial::Zero,
""},
84 static const auto finalMap = makeBimap<PinyinFinal, std::string>({
85 {PinyinFinal::A,
"a"}, {PinyinFinal::AI,
"ai"},
86 {PinyinFinal::AN,
"an"}, {PinyinFinal::ANG,
"ang"},
87 {PinyinFinal::AO,
"ao"}, {PinyinFinal::E,
"e"},
88 {PinyinFinal::EI,
"ei"}, {PinyinFinal::EN,
"en"},
89 {PinyinFinal::ENG,
"eng"}, {PinyinFinal::ER,
"er"},
90 {PinyinFinal::O,
"o"}, {PinyinFinal::ONG,
"ong"},
91 {PinyinFinal::OU,
"ou"}, {PinyinFinal::I,
"i"},
92 {PinyinFinal::IA,
"ia"}, {PinyinFinal::IE,
"ie"},
93 {PinyinFinal::IAO,
"iao"}, {PinyinFinal::IU,
"iu"},
94 {PinyinFinal::IAN,
"ian"}, {PinyinFinal::IN,
"in"},
95 {PinyinFinal::IANG,
"iang"}, {PinyinFinal::ING,
"ing"},
96 {PinyinFinal::IONG,
"iong"}, {PinyinFinal::U,
"u"},
97 {PinyinFinal::UA,
"ua"}, {PinyinFinal::UO,
"uo"},
98 {PinyinFinal::UAI,
"uai"}, {PinyinFinal::UI,
"ui"},
99 {PinyinFinal::UAN,
"uan"}, {PinyinFinal::UN,
"un"},
100 {PinyinFinal::UANG,
"uang"}, {PinyinFinal::V,
"v"},
101 {PinyinFinal::UE,
"ue"}, {PinyinFinal::VE,
"ve"},
102 {PinyinFinal::NG,
"ng"}, {PinyinFinal::Zero,
""},
103 {PinyinFinal::Letter_A,
"A"}, {PinyinFinal::Letter_B,
"B"},
104 {PinyinFinal::Letter_C,
"C"}, {PinyinFinal::Letter_D,
"D"},
105 {PinyinFinal::Letter_E,
"E"}, {PinyinFinal::Letter_F,
"F"},
106 {PinyinFinal::Letter_G,
"G"}, {PinyinFinal::Letter_H,
"H"},
107 {PinyinFinal::Letter_I,
"I"}, {PinyinFinal::Letter_J,
"J"},
108 {PinyinFinal::Letter_K,
"K"}, {PinyinFinal::Letter_L,
"L"},
109 {PinyinFinal::Letter_M,
"M"}, {PinyinFinal::Letter_N,
"N"},
110 {PinyinFinal::Letter_O,
"O"}, {PinyinFinal::Letter_P,
"P"},
111 {PinyinFinal::Letter_Q,
"Q"}, {PinyinFinal::Letter_R,
"R"},
112 {PinyinFinal::Letter_S,
"S"}, {PinyinFinal::Letter_T,
"T"},
113 {PinyinFinal::Letter_U,
"U"}, {PinyinFinal::Letter_V,
"V"},
114 {PinyinFinal::Letter_W,
"W"}, {PinyinFinal::Letter_X,
"X"},
115 {PinyinFinal::Letter_Y,
"Y"}, {PinyinFinal::Letter_Z,
"Z"},
118 static const int maxPinyinLength = 6;
122 std::string_view match;
123 bool isCompletePinyin;
126 bool hasMatchInMap(
const PinyinMap &map, std::string_view range,
127 PinyinFuzzyFlags flags) {
128 auto iterPair = map.equal_range(range);
129 if (iterPair.first != iterPair.second) {
130 for (
const auto &item :
131 std::ranges::subrange(iterPair.first, iterPair.second)) {
132 if (flags.test(item.flags())) {
141 template <
typename Iter>
143 const PinyinMap &map) {
144 if ((*iter ==
'i' || *iter ==
'u' || *iter ==
'v') &&
145 !flags.testAny(PinyinFuzzyFlags{PinyinFuzzyFlag::Correction,
146 PinyinFuzzyFlag::Letter})) {
147 return {.valid =
false,
148 .match = std::string_view(&*iter, std::distance(iter, end)),
149 .isCompletePinyin =
false};
151 if (std::distance(iter, end) > maxPinyinLength) {
152 end = iter + maxPinyinLength;
154 auto range = std::string_view(&*iter, std::distance(iter, end));
155 for (; !range.empty(); range.remove_suffix(1)) {
156 if (hasMatchInMap(map, range, flags)) {
158 return {.valid =
true,
161 (range !=
"m" && range !=
"n" && range !=
"r")};
163 if (range.size() <= 2) {
164 auto iter = initialMap.right.find(std::string{range});
165 if (iter != initialMap.right.end()) {
167 .valid =
true, .match = range, .isCompletePinyin =
false};
172 assert(range.empty());
173 range = std::string_view(&*iter, 1);
175 return {.valid =
false, .match = range, .isCompletePinyin =
false};
178 std::string PinyinSyllable::toString()
const {
179 return PinyinEncoder::initialToString(initial_) +
180 PinyinEncoder::finalToString(final_);
182 SegmentGraph PinyinEncoder::parseUserPinyin(std::string userPinyin,
183 PinyinFuzzyFlags flags) {
184 return parseUserPinyin(std::move(userPinyin),
nullptr, flags);
188 PinyinEncoder::parseUserPinyin(std::string userPinyin,
190 PinyinFuzzyFlags flags) {
192 auto pinyin = result.data();
193 const auto end = pinyin.end();
196 flags = flags.unset(PinyinFuzzyFlag::Correction);
198 std::vector<PinyinFuzzyFlags> flagsToTry = {flags};
199 if (flags.test(PinyinFuzzyFlag::Correction)) {
200 flagsToTry.push_back(flags.unset(PinyinFuzzyFlag::Correction));
202 if (flags.test(PinyinFuzzyFlag::AdvancedTypo)) {
203 flagsToTry.push_back(flags.unset(PinyinFuzzyFlag::AdvancedTypo)
204 .unset(PinyinFuzzyFlag::Correction));
207 const auto &pinyinMap = profile ? profile->
pinyinMap() : getPinyinMapV2();
209 std::priority_queue<size_t, std::vector<size_t>, std::greater<>> q;
216 }
while (!q.empty() && q.top() == top);
217 if (top >= pinyin.size()) {
220 auto iter = std::next(pinyin.begin(), top);
222 while (iter != pinyin.end() && *iter ==
'\'') {
225 auto next = std::distance(pinyin.begin(), iter);
226 result.addNext(top, next);
227 if (static_cast<size_t>(next) < pinyin.size()) {
232 if (fcitx::charutils::isupper(*iter)) {
233 result.addNext(top, top + 1);
237 for (
const auto fuzzyFlags : flagsToTry) {
238 auto [valid, str, isCompletePinyin] =
239 longestMatch(iter, end, fuzzyFlags, pinyinMap);
242 if (!valid || !isCompletePinyin) {
243 result.addNext(top, top + str.size());
244 q.push(top + str.size());
253 std::array<size_t, 2> nextSize;
254 size_t nNextSize = 0;
257 if (str.size() > 1 && top + str.size() < pinyin.size() &&
258 pinyin[top + str.size()] !=
'\'' &&
259 (str.back() ==
'a' || str.back() ==
'e' ||
260 str.back() ==
'g' || str.back() ==
'n' ||
261 str.back() ==
'o' || str.back() ==
'r' ||
263 fuzzyFlags.test(PinyinFuzzyFlag::Correction)) &&
264 hasMatchInMap(pinyinMap, str.substr(0, str.size() - 1),
267 auto nextMatch = longestMatch(iter + str.size(), end,
268 fuzzyFlags, pinyinMap);
269 auto nextMatchAlt = longestMatch(iter + str.size() - 1, end,
270 fuzzyFlags, pinyinMap);
272 str.size() - 1 + nextMatchAlt.match.size();
282 std::tuple<bool, bool, bool> compare(
283 nextMatch.valid,
true, nextMatch.isCompletePinyin);
284 std::tuple<bool, bool, bool> compareAlt(
285 nextMatchAlt.valid, matchSizeAlt > str.size(),
286 nextMatchAlt.isCompletePinyin);
288 if (compare >= compareAlt) {
289 result.addNext(top, top + str.size());
290 q.push(top + str.size());
291 nextSize[nNextSize++] = str.size();
293 if (compare <= compareAlt) {
294 result.addNext(top, top + str.size() - 1);
295 q.push(top + str.size() - 1);
296 nextSize[nNextSize++] = str.size() - 1;
299 result.addNext(top, top + str.size());
300 q.push(top + str.size());
301 nextSize[nNextSize++] = str.size();
304 for (
size_t i = 0; i < nNextSize; i++) {
305 auto nextPinyin = str.substr(0, nextSize[i]);
306 if (nextPinyin ==
"din" || nextPinyin ==
"bon" ||
307 nextPinyin ==
"won") {
308 result.addNext(top, top + 2);
309 result.addNext(top + 2, top + 3);
310 }
else if (nextPinyin ==
"bong" || nextPinyin ==
"wong") {
312 result.addNext(top, top + 2);
313 result.addNext(top + 2, top + 4);
314 result.addNext(top + 2, top + 3);
317 }
else if ((nextPinyin.size() >= 4 &&
318 fuzzyFlags.test(PinyinFuzzyFlag::Inner)) ||
319 (nextPinyin.size() == 3 &&
320 flags.test(PinyinFuzzyFlag::InnerShort))) {
321 const auto &innerSegments = getInnerSegmentV2();
322 auto iter = innerSegments.find(nextPinyin);
323 if (iter != innerSegments.end()) {
324 for (
const auto &innerSeg : iter->second) {
326 top + innerSeg.first.size());
327 result.addNext(top + innerSeg.first.size(),
331 }
else if (nextPinyin.size() == 2 &&
332 flags.test(PinyinFuzzyFlag::InnerShort) &&
333 nextPinyin ==
"ng") {
337 result.addNext(top, top + 1);
338 result.addNext(top + 1, top + 2);
347 SegmentGraph PinyinEncoder::parseUserShuangpin(std::string userPinyin,
349 PinyinFuzzyFlags flags) {
350 flags = flags.unset(PinyinFuzzyFlag::AdvancedTypo);
352 auto pinyin = result.data();
357 const auto &table = sp.table();
358 while (i < pinyin.size()) {
360 while (i < pinyin.size() && pinyin[i] ==
'\'') {
364 result.addNext(start, i);
367 auto initial = pinyin[i];
368 if (fcitx::charutils::isupper(initial)) {
369 result.addNext(i, i + 1);
374 if (i + 1 < pinyin.size() && pinyin[i + 1] !=
'\'') {
375 final = pinyin[i + 1];
378 std::string match{initial};
380 match.push_back(
final);
383 auto longestMatchInTable = [flags](decltype(table) t,
384 const std::string &v) {
386 while (!py.empty()) {
387 auto iter = t.find(py);
388 if (iter != t.end()) {
389 for (
const auto &p : iter->second) {
390 if (flags.test(p.second)) {
400 auto iter = longestMatchInTable(table, match);
401 if (iter != table.end()) {
402 result.addNext(i, i + iter->first.size());
403 i = i + iter->first.size();
405 result.addNext(i, i + 1);
410 if (pinyin.size() >= 4 && flags.test(PinyinFuzzyFlag::PartialSp)) {
412 while (i < pinyin.size()) {
414 while (i < pinyin.size() && pinyin[i] ==
'\'') {
421 if (!result.ensureNode(i).isChild(&result.ensureNode(i + 1))) {
422 result.addNext(i, i + 1);
432 return encodeFullPinyinWithFlags(pinyin, PinyinFuzzyFlag::None);
437 PinyinFuzzyFlags flags) {
438 std::vector<std::string> pinyins = fcitx::stringutils::split(pinyin,
"'");
439 std::vector<char> result;
440 result.resize(pinyins.size() * 2);
442 for (
const auto &singlePinyin : pinyins) {
443 const auto &map = getPinyinMapV2();
444 auto [begin, end] = map.equal_range(singlePinyin);
446 throw std::invalid_argument(
"invalid full pinyin: " +
447 std::string{pinyin});
451 return flags.test(entry.flags());
453 begin = std::find_if(begin, end, pred);
455 throw std::invalid_argument(
"invalid full pinyin: " +
456 std::string{pinyin});
460 begin = std::next(begin);
461 if (!std::none_of(begin, end, pred)) {
462 throw std::invalid_argument(
"invalid full pinyin: " +
463 std::string{pinyin});
466 result[idx++] =
static_cast<char>(iter->initial());
467 result[idx++] =
static_cast<char>(iter->final());
473 std::vector<char> PinyinEncoder::encodeOneUserPinyin(std::string pinyin) {
474 if (pinyin.empty()) {
477 auto graph = parseUserPinyin(std::move(pinyin), PinyinFuzzyFlag::None);
478 std::vector<char> result;
481 while (node->nextSize()) {
483 node = &node->nexts().front();
484 auto seg = graph.segment(*prev, *node);
485 if (seg.empty() || seg[0] ==
'\'') {
488 auto syls = stringToSyllables(seg, PinyinFuzzyFlag::None);
492 result.push_back(static_cast<char>(syls[0].first));
493 result.push_back(static_cast<char>(syls[0].second[0].first));
498 bool PinyinEncoder::isValidUserPinyin(
const char *data,
size_t size) {
503 for (
size_t i = 0; i < size / 2; i++) {
504 if (!PinyinEncoder::isValidInitial(data[i * 2])) {
511 std::string PinyinEncoder::decodeFullPinyin(
const char *data,
size_t size) {
513 throw std::invalid_argument(
"invalid pinyin key");
516 for (
size_t i = 0, e = size / 2; i < e; i++) {
520 result += initialToString(static_cast<PinyinInitial>(data[i * 2]));
521 result += finalToString(static_cast<PinyinFinal>(data[(i * 2) + 1]));
526 const std::string &PinyinEncoder::initialToString(PinyinInitial initial) {
527 const static std::vector<std::string> s = []() {
528 std::vector<std::string> s;
529 s.resize(lastInitial - firstInitial + 1);
530 for (
char c = firstInitial; c <= lastInitial; c++) {
531 auto iter = initialMap.left.find(static_cast<PinyinInitial>(c));
532 s[c - firstInitial] = iter->second;
536 auto c =
static_cast<char>(initial);
537 if (c >= firstInitial && c <= lastInitial) {
538 return s[c - firstInitial];
543 PinyinInitial PinyinEncoder::stringToInitial(
const std::string &str) {
544 auto iter = initialMap.right.find(str);
545 if (iter != initialMap.right.end()) {
548 return PinyinInitial::Invalid;
551 const std::string &PinyinEncoder::finalToString(PinyinFinal
final) {
552 const static std::vector<std::string> s = []() {
553 std::vector<std::string> s;
554 s.resize(lastLetter - firstFinal + 1);
555 for (
char c = firstFinal; c <= lastLetter; c++) {
556 auto iter = finalMap.left.find(static_cast<PinyinFinal>(c));
557 s[c - firstFinal] = iter->second;
561 auto c =
static_cast<char>(
final);
562 if (c >= firstFinal && c <= lastLetter) {
563 return s[c - firstFinal];
568 PinyinFinal PinyinEncoder::stringToFinal(
const std::string &str) {
569 auto iter = finalMap.right.find(str);
570 if (iter != finalMap.right.end()) {
573 return PinyinFinal::Invalid;
576 bool PinyinEncoder::isValidInitialFinal(PinyinInitial initial,
578 if (initial != PinyinInitial::Invalid &&
final != PinyinFinal::Invalid) {
580 ((
static_cast<int16_t
>(initial) - PinyinEncoder::firstInitial) *
581 (PinyinEncoder::lastLetter - PinyinEncoder::firstFinal + 1)) +
582 (
static_cast<int16_t
>(
final) - PinyinEncoder::firstFinal);
583 const auto &a = getEncodedInitialFinal();
584 return encode < static_cast<int>(a.size()) && a[encode];
589 std::string PinyinEncoder::initialFinalToPinyinString(PinyinInitial initial,
591 std::string result = initialToString(initial);
592 std::string finalString;
594 case PinyinFinal::VE:
596 if (initial == PinyinInitial::N || initial == PinyinInitial::L) {
597 if (
final == PinyinFinal::VE) {
606 finalString = finalToString(
final);
609 result.append(finalString);
615 template <
typename FuzzyValue,
typename Adjuster>
616 void getFuzzy(FuzzyPinyinSyllables<FuzzyValue> &syls,
PinyinSyllable syl,
617 PinyinFuzzyFlags flags,
bool isSp,
const Adjuster &adjuster) {
619 boost::container::static_vector<std::tuple<PinyinInitial, PinyinFuzzyFlags>,
621 initials{{syl.initial(), PinyinFuzzyFlag::None}};
622 boost::container::static_vector<std::tuple<PinyinFinal, PinyinFuzzyFlags>,
624 finals{{syl.final(), PinyinFuzzyFlag::None}};
627 if (syl.final() == PinyinFinal::Invalid && !isSp) {
628 if (syl.initial() == PinyinInitial::C) {
629 flags |= PinyinFuzzyFlag::C_CH;
631 if (syl.initial() == PinyinInitial::Z) {
632 flags |= PinyinFuzzyFlag::Z_ZH;
634 if (syl.initial() == PinyinInitial::S) {
635 flags |= PinyinFuzzyFlag::S_SH;
639 const static std::vector<
640 std::tuple<PinyinInitial, PinyinInitial, PinyinFuzzyFlag>>
642 {PinyinInitial::C, PinyinInitial::CH, PinyinFuzzyFlag::C_CH},
643 {PinyinInitial::S, PinyinInitial::SH, PinyinFuzzyFlag::S_SH},
644 {PinyinInitial::Z, PinyinInitial::ZH, PinyinFuzzyFlag::Z_ZH},
645 {PinyinInitial::F, PinyinInitial::H, PinyinFuzzyFlag::F_H},
646 {PinyinInitial::L, PinyinInitial::N, PinyinFuzzyFlag::L_N},
647 {PinyinInitial::L, PinyinInitial::R, PinyinFuzzyFlag::L_R},
650 for (
const auto &initialFuzzy : initialFuzzies) {
651 if ((syl.initial() == std::get<0>(initialFuzzy) ||
652 syl.initial() == std::get<1>(initialFuzzy)) &&
653 flags.test(std::get<2>(initialFuzzy))) {
654 initials.emplace_back((syl.initial() == std::get<0>(initialFuzzy)
655 ? std::get<1>(initialFuzzy)
656 : std::get<0>(initialFuzzy)),
657 std::get<2>(initialFuzzy));
662 const static std::vector<
663 std::tuple<PinyinFinal, PinyinFinal, PinyinFuzzyFlag>>
665 {PinyinFinal::V, PinyinFinal::U, PinyinFuzzyFlag::V_U},
666 {PinyinFinal::AN, PinyinFinal::ANG, PinyinFuzzyFlag::AN_ANG},
667 {PinyinFinal::EN, PinyinFinal::ENG, PinyinFuzzyFlag::EN_ENG},
668 {PinyinFinal::IAN, PinyinFinal::IANG, PinyinFuzzyFlag::IAN_IANG},
669 {PinyinFinal::IN, PinyinFinal::ING, PinyinFuzzyFlag::IN_ING},
670 {PinyinFinal::U, PinyinFinal::OU, PinyinFuzzyFlag::U_OU},
671 {PinyinFinal::UAN, PinyinFinal::UANG, PinyinFuzzyFlag::UAN_UANG},
672 {PinyinFinal::VE, PinyinFinal::UE, PinyinFuzzyFlag::VE_UE},
675 for (
const auto &finalFuzzy : finalFuzzies) {
676 if ((syl.final() == std::get<0>(finalFuzzy) ||
677 syl.final() == std::get<1>(finalFuzzy)) &&
678 flags.test(std::get<2>(finalFuzzy))) {
679 finals.emplace_back((syl.final() == std::get<0>(finalFuzzy)
680 ? std::get<1>(finalFuzzy)
681 : std::get<0>(finalFuzzy)),
682 std::get<2>(finalFuzzy));
689 const static std::vector<std::tuple<PinyinFinal, PinyinFinal>>
691 {PinyinFinal::A, PinyinFinal::AN},
692 {PinyinFinal::A, PinyinFinal::ANG},
693 {PinyinFinal::A, PinyinFinal::AI},
694 {PinyinFinal::A, PinyinFinal::AO},
695 {PinyinFinal::E, PinyinFinal::EI},
696 {PinyinFinal::E, PinyinFinal::EN},
697 {PinyinFinal::E, PinyinFinal::ENG},
698 {PinyinFinal::E, PinyinFinal::ER},
699 {PinyinFinal::O, PinyinFinal::OU},
700 {PinyinFinal::O, PinyinFinal::ONG},
702 if (initials.size() == 1 &&
703 std::get<0>(initials[0]) == PinyinInitial::Zero &&
704 flags.test(PinyinFuzzyFlag::PartialFinal)) {
705 for (
const auto &partialFinal : partialFinals) {
706 if (syl.final() == std::get<0>(partialFinal)) {
707 finals.emplace_back(std::get<1>(partialFinal),
708 PinyinFuzzyFlag::PartialFinal);
713 for (
size_t i = 0; i < initials.size(); i++) {
714 for (
size_t j = 0; j < finals.size(); j++) {
715 auto initial = std::get<0>(initials[i]);
716 auto final = std::get<0>(finals[j]);
717 auto flags = std::get<1>(initials[i]) | std::get<1>(finals[j]);
718 if ((i == 0 && j == 0) ||
final == PinyinFinal::Invalid ||
719 PinyinEncoder::isValidInitialFinal(initial,
final)) {
720 auto iter = std::find_if(
721 syls.begin(), syls.end(),
722 [initial](
const auto &p) {
return p.first == initial; });
723 if (iter == syls.end()) {
724 syls.emplace_back(std::piecewise_construct,
725 std::forward_as_tuple(initial),
726 std::forward_as_tuple());
727 iter = std::prev(syls.end());
729 auto &finals = iter->second;
730 if (std::find_if(finals.begin(), finals.end(),
732 return p.first ==
final;
733 }) == finals.end()) {
734 finals.emplace_back(
final, adjuster(flags));
741 template <
typename FuzzyValue,
typename Adjuster>
742 FuzzyPinyinSyllables<FuzzyValue>
743 stringToSyllablesImpl(std::string_view pinyinView,
const PinyinMap &map,
744 PinyinFuzzyFlags flags,
const Adjuster &adjuster) {
745 FuzzyPinyinSyllables<FuzzyValue> result;
746 std::string pinyin(pinyinView);
749 if (pinyin !=
"m" && pinyin !=
"n" && pinyin !=
"r") {
750 auto iterPair = map.equal_range(pinyin);
751 for (
const auto &item :
752 std::ranges::subrange(iterPair.first, iterPair.second)) {
753 if (flags.test(item.flags())) {
754 getFuzzy(result, {item.initial(), item.final()}, flags,
756 [&adjuster, &item](PinyinFuzzyFlags flags) {
757 return adjuster(item.flags() | flags);
763 auto iter = initialMap.right.find(pinyin);
764 if (initialMap.right.end() != iter) {
765 getFuzzy(result, {iter->second, PinyinFinal::Invalid}, flags,
769 if (pinyin.size() == 1 && fcitx::charutils::islower(pinyin[0]) &&
770 flags.test(PinyinFuzzyFlag::Letter)) {
772 {PinyinInitial::Zero, PinyinEncoder::letterToFinal(pinyin[0])},
774 false, [&adjuster](PinyinFuzzyFlags flags) {
775 return adjuster(flags | PinyinFuzzyFlag::Letter);
779 if (result.empty()) {
781 std::piecewise_construct,
782 std::forward_as_tuple(PinyinInitial::Invalid),
783 std::forward_as_tuple(
784 1, std::make_pair(PinyinFinal::Invalid,
785 adjuster(PinyinFuzzyFlag::None))));
791 for (
auto &p : result) {
792 if (p.second.size() == 1 && p.second[0] == PinyinFinal::Invalid) {
794 for (
char test = PinyinEncoder::firstFinal;
795 test <= PinyinEncoder::lastFinal; test++) {
796 auto final =
static_cast<PinyinFinal
>(test);
797 if (PinyinEncoder::isValidInitialFinal(p.first,
final)) {
798 p.second.push_back(
final);
811 MatchedPinyinSyllables
812 PinyinEncoder::stringToSyllables(std::string_view pinyinView,
813 PinyinFuzzyFlags flags) {
814 auto adjuster = [](
const PinyinFuzzyFlags &flags) {
815 return flags != PinyinFuzzyFlag::None;
817 return stringToSyllablesImpl<bool>(pinyinView, getPinyinMapV2(), flags,
821 MatchedPinyinSyllablesWithFuzzyFlags
822 PinyinEncoder::stringToSyllablesWithFuzzyFlags(
824 PinyinFuzzyFlags flags) {
825 auto identity = [](
const PinyinFuzzyFlags &flags) {
return flags; };
826 return stringToSyllablesImpl<PinyinFuzzyFlags>(
827 pinyinView, profile ? profile->
pinyinMap() : getPinyinMapV2(), flags,
833 template <
typename FuzzyValue,
typename Adjuster>
834 FuzzyPinyinSyllables<FuzzyValue>
835 shuangpinToSyllablesImpl(std::string_view pinyinView,
837 const Adjuster &adjuster) {
838 assert(pinyinView.size() <= 2);
839 std::string pinyin(pinyinView);
841 const auto &table = sp.table();
842 auto iter = table.find(pinyin);
845 if (pinyinView.size() > 1) {
847 flags = flags.unset(PinyinFuzzyFlag::PartialFinal);
850 FuzzyPinyinSyllables<FuzzyValue> result;
851 if (iter != table.end()) {
852 for (
const auto &p : iter->second) {
853 if (flags.test(p.second)) {
854 getFuzzy(result, {p.first.initial(), p.first.final()}, flags,
856 [base = p.second, &adjuster](PinyinFuzzyFlags flags) {
857 return adjuster(flags | base);
863 if (pinyin.length() == 1 && ((fcitx::charutils::islower(pinyin[0]) &&
864 flags.test(PinyinFuzzyFlag::Letter)) ||
865 fcitx::charutils::isupper(pinyin[0]))) {
866 bool isLower = fcitx::charutils::islower(pinyin[0]);
868 {PinyinInitial::Zero, PinyinEncoder::letterToFinal(pinyin[0])},
870 true, [&adjuster, isLower](PinyinFuzzyFlags flags) {
872 flags |= PinyinFuzzyFlag::Letter;
874 return adjuster(flags);
878 if (result.empty()) {
880 std::piecewise_construct,
881 std::forward_as_tuple(PinyinInitial::Invalid),
882 std::forward_as_tuple(
883 1, std::make_pair(PinyinFinal::Invalid,
884 adjuster(PinyinFuzzyFlag::None))));
892 MatchedPinyinSyllables
893 PinyinEncoder::shuangpinToSyllables(std::string_view pinyinView,
895 PinyinFuzzyFlags flags) {
896 auto adjuster = [](
const PinyinFuzzyFlags &flags) {
897 return flags != PinyinFuzzyFlag::None;
899 return shuangpinToSyllablesImpl<bool>(pinyinView, sp, flags, adjuster);
902 MatchedPinyinSyllablesWithFuzzyFlags
903 PinyinEncoder::shuangpinToSyllablesWithFuzzyFlags(std::string_view pinyinView,
905 PinyinFuzzyFlags flags) {
906 auto identity = [](
const PinyinFuzzyFlags &flags) {
return flags; };
907 return shuangpinToSyllablesImpl<PinyinFuzzyFlags>(pinyinView, sp, flags,
912 PinyinEncoder::shuangpinToPinyin(std::string_view pinyinView,
914 assert(pinyinView.size() <= 2);
915 auto syls = shuangpinToSyllables(pinyinView, sp, PinyinFuzzyFlag::None);
916 if (!syls.empty() && !syls[0].second.empty() && !syls[0].second[0].second) {
917 auto initial = syls[0].first;
918 auto final = syls[0].second[0].first;
919 return initialToString(initial) + finalToString(
final);
925 return final >= PinyinFinal::Letter_A &&
final <= PinyinFinal::Letter_Z;
928 PinyinFinal PinyinEncoder::letterToFinal(
char c) {
929 if (c >=
'a' && c <=
'z') {
930 return static_cast<PinyinFinal
>(
931 static_cast<char>(PinyinFinal::Letter_A) + (c -
'a'));
933 if (c >=
'A' && c <=
'Z') {
934 return static_cast<PinyinFinal
>(
935 static_cast<char>(PinyinFinal::Letter_A) + (c -
'A'));
937 return PinyinFinal::Invalid;
static bool isFinalLetter(PinyinFinal final)
Check if the final is a letter.
static std::vector< char > encodeFullPinyinWithFlags(std::string_view pinyin, PinyinFuzzyFlags flags)
Encode a quote separated pinyin string.
const PinyinMap & pinyinMap() const
Return the updated pinyin map.
Class that holds updated Pinyin correction mapping based on correction mapping.
static std::vector< char > encodeFullPinyin(std::string_view pinyin)
Encode a quote separated pinyin string.