6 #include "shuangpinprofile.h" 17 #include <string_view> 19 #include <unordered_map> 22 #include <fcitx-utils/charutils.h> 23 #include <fcitx-utils/macros.h> 24 #include <fcitx-utils/stringutils.h> 25 #include "pinyincorrectionprofile.h" 26 #include "pinyindata.h" 27 #include "pinyinencoder.h" 28 #include "shuangpindata.h" 37 std::string zeroS_ =
"o";
38 std::unordered_multimap<char, PinyinFinal> finalMap_;
39 std::unordered_multimap<char, PinyinInitial> initialMap_;
40 std::unordered_multimap<std::string, std::pair<PinyinInitial, PinyinFinal>>
42 std::set<PinyinFinal> finalSet_;
43 ShuangpinProfile::ValidInputSetType validInputs_;
44 ShuangpinProfile::ValidInputSetType validInitials_;
45 ShuangpinProfile::TableType spTable_;
49 for (
char c =
'a'; c <=
'z'; c++) {
50 validInputs_.insert(c);
52 for (
const auto &p : initialMap_) {
53 validInputs_.insert(p.first);
55 std::unordered_map<PinyinFinal, char> singleCharFinal;
56 for (
const auto &p : finalMap_) {
57 validInputs_.insert(p.first);
58 if (PinyinEncoder::finalToString(p.second).size() == 1) {
59 singleCharFinal[p.second] = p.first;
63 for (
const auto &p : initialFinalMap_) {
64 for (
auto c : p.first) {
65 validInputs_.insert(c);
69 std::set<char> initialChars;
70 for (
auto zero : zeroS_) {
72 validInputs_.insert(zero);
73 initialChars.insert(zero);
79 for (
auto c = PinyinEncoder::firstInitial;
80 c <= PinyinEncoder::lastInitial; c++) {
81 const auto &initialString =
82 PinyinEncoder::initialToString(static_cast<PinyinInitial>(c));
83 if (initialString.size() == 1) {
84 initialChars.insert(initialString[0]);
88 for (
auto &p : initialMap_) {
89 initialChars.insert(p.first);
94 std::set<char> finalChars;
95 for (
auto c = PinyinEncoder::firstFinal; c <= PinyinEncoder::lastFinal;
97 auto f =
static_cast<PinyinFinal
>(c);
98 const auto &finalString = PinyinEncoder::finalToString(f);
99 if (finalString.size() == 1 && !singleCharFinal.contains(f)) {
100 finalChars.insert(finalString[0]);
101 singleCharFinal[f] = finalString[0];
105 for (
auto &p : finalMap_) {
106 finalChars.insert(p.first);
109 for (
const auto &[
final, chr] : singleCharFinal) {
110 auto [begin, end] = finalMap_.equal_range(chr);
111 if (std::find_if(begin, end, [
final =
final](
const auto &item) {
112 return item.second ==
final;
114 finalMap_.emplace(chr,
final);
118 auto addPinyinToList =
119 [](std::multimap<PinyinSyllable, PinyinFuzzyFlags> &pys,
120 PinyinInitial i, PinyinFinal f, PinyinFuzzyFlags flags) {
122 if (flags == PinyinFuzzyFlag::None) {
124 auto iter = pys.find(s);
126 if (iter != pys.end() &&
127 iter->second != PinyinFuzzyFlag::None) {
131 if (iter == pys.end()) {
132 pys.emplace(s, flags);
135 auto iterPair = pys.equal_range(s);
137 if (iterPair.first != iterPair.second) {
138 if (iterPair.first->second == PinyinFuzzyFlag::None) {
142 for (
auto i = iterPair.first; i != iterPair.second;
144 if (i->second == flags) {
150 pys.emplace(s, flags);
156 std::multimap<PinyinSyllable, PinyinFuzzyFlags> &pys,
157 const std::string &py) {
158 const auto &map = getPinyinMapV2();
159 auto iterPair = map.equal_range(py);
160 if (iterPair.first != iterPair.second) {
161 for (
const auto &item : std::ranges::subrange(
162 iterPair.first, iterPair.second)) {
166 if (item.flags().test(PinyinFuzzyFlag::AdvancedTypo)) {
169 addPinyinToList(pys, item.initial(), item.final(),
176 if (zeroS_.find(
'*') != std::string::npos) {
180 for (
auto c : finalChars) {
182 auto finalIterPair = finalMap_.equal_range(c);
183 for (
auto &item : std::ranges::subrange(finalIterPair.first,
184 finalIterPair.second)) {
185 if (PinyinEncoder::isValidInitialFinal(PinyinInitial::Zero,
188 const auto &finalString =
189 PinyinEncoder::finalToString(item.second);
190 if (finalString.size() == 1) {
191 input = std::string{c, c};
193 auto final = PinyinEncoder::stringToFinal(
194 std::string{finalString[0]});
195 if (
final != PinyinFinal::Invalid) {
196 auto singleCharFinalIter =
197 singleCharFinal.find(
final);
198 if (singleCharFinalIter !=
199 singleCharFinal.end()) {
201 singleCharFinalIter->second, c};
205 spTable_[input].emplace(
207 PinyinFuzzyFlag::None);
214 for (
auto c1 : initialChars) {
215 for (
auto c2 : finalChars) {
216 std::string input{c1, c2};
217 auto &pys = spTable_[input];
219 std::vector<PinyinInitial> initials;
220 std::vector<PinyinFinal> finals;
221 auto initialIterPair = initialMap_.equal_range(c1);
222 if (initialIterPair.first != initialIterPair.second) {
223 for (
auto &item : std::ranges::subrange(
224 initialIterPair.first, initialIterPair.second)) {
225 initials.push_back(item.second);
228 auto initial = PinyinEncoder::stringToInitial(std::string{c1});
229 if (initial != PinyinInitial::Invalid) {
230 initials.push_back(initial);
233 if (zeroS_.find(c1) != std::string::npos) {
234 initials.push_back(PinyinInitial::Zero);
237 auto finalIterPair = finalMap_.equal_range(c2);
238 for (
auto &item : std::ranges::subrange(finalIterPair.first,
239 finalIterPair.second)) {
240 finals.push_back(item.second);
243 for (
auto i : initials) {
244 for (
auto f : finals) {
245 auto py = PinyinEncoder::initialToString(i) +
246 PinyinEncoder::finalToString(f);
252 spTable_.erase(input);
258 for (
const auto &p : initialFinalMap_) {
259 auto &pys = spTable_[p.first];
260 auto py = PinyinEncoder::initialToString(p.second.first) +
261 PinyinEncoder::finalToString(p.second.second);
266 for (
const auto &p : getPinyinMapV2()) {
268 if (p.pinyin() ==
"ng") {
272 if (p.pinyin().size() == 2 && p.initial() == PinyinInitial::Zero &&
273 (!spTable_.contains(p.pinyin()) ||
274 zeroS_.find(
'*') != std::string::npos)) {
275 auto &pys = spTable_[p.pinyin()];
281 for (
char c : validInputs_) {
282 std::string input{c};
283 auto &pys = spTable_[input];
284 auto initial = PinyinEncoder::stringToInitial(std::string{c});
285 if (initial != PinyinInitial::Invalid) {
286 addPinyinToList(pys, initial, PinyinFinal::Invalid,
287 PinyinFuzzyFlag::None);
289 auto initialIterPair = initialMap_.equal_range(c);
290 for (
auto &item : std::ranges::subrange(initialIterPair.first,
291 initialIterPair.second)) {
292 addPinyinToList(pys, item.second, PinyinFinal::Invalid,
293 PinyinFuzzyFlag::None);
297 auto [begin, end] = finalMap_.equal_range(c);
298 for (
auto &item : std::ranges::subrange(begin, end)) {
299 const auto final = item.second;
300 if (PinyinEncoder::finalToString(
final).size() == 1 &&
301 PinyinEncoder::isValidInitialFinal(PinyinInitial::Zero,
304 addPinyinToList(pys, PinyinInitial::Zero,
final,
305 PinyinFuzzyFlag::None);
310 spTable_.erase(input);
314 std::vector<std::tuple<std::string, PinyinSyllable, PinyinFuzzyFlags>>
317 if (correctionProfile !=
nullptr) {
318 const auto &correctionMap = correctionProfile->
correctionMap();
319 for (
const auto &[input, pys] : spTable_) {
321 if (input.size() < 2) {
324 for (
size_t i = 0; i < input.size(); i++) {
326 auto swap = correctionMap.find(chr);
327 if (swap == correctionMap.end() || swap->second.empty()) {
330 std::string newInput = input;
331 for (
auto sub : swap->second) {
333 for (
const auto &x : pys) {
334 newEntries.emplace_back(
336 x.second | PinyinFuzzyFlag::Correction);
344 for (
const auto &[input, syllable, flags] : newEntries) {
345 auto &pys = spTable_[input];
346 pys.emplace(syllable, flags);
349 for (
const auto &sp : spTable_) {
350 assert(!sp.first.empty() && sp.first.size() <= 2);
351 validInitials_.insert(sp.first[0]);
356 ShuangpinProfile::ShuangpinProfile(ShuangpinBuiltinProfile profile)
357 : ShuangpinProfile::ShuangpinProfile(profile,
nullptr) {}
359 ShuangpinProfile::ShuangpinProfile(std::istream &in)
360 : ShuangpinProfile::ShuangpinProfile(in,
nullptr) {}
362 ShuangpinProfile::ShuangpinProfile(
363 ShuangpinBuiltinProfile profile,
365 : d_ptr(std::make_unique<ShuangpinProfilePrivate>()) {
367 const SP_C *c =
nullptr;
368 const SP_S *s =
nullptr;
370 case ShuangpinBuiltinProfile::Ziranma:
375 case ShuangpinBuiltinProfile::MS:
379 case ShuangpinBuiltinProfile::Ziguang:
383 case ShuangpinBuiltinProfile::ABC:
387 case ShuangpinBuiltinProfile::Zhongwenzhixing:
388 c = SPMap_C_Zhongwenzhixing;
389 s = SPMap_S_Zhongwenzhixing;
391 case ShuangpinBuiltinProfile::PinyinJiajia:
392 c = SPMap_C_PinyinJiaJia;
393 s = SPMap_S_PinyinJiaJia;
396 case ShuangpinBuiltinProfile::Xiaohe:
401 case ShuangpinBuiltinProfile::GB:
407 throw std::invalid_argument(
"Invalid profile");
410 for (
auto i = 0; c[i].cJP; i++) {
411 auto final = PinyinEncoder::stringToFinal(c[i].strQP);
412 d->finalMap_.emplace(c[i].cJP,
final);
413 d->finalSet_.insert(
final);
416 for (
auto i = 0; s[i].cJP; i++) {
417 d->initialMap_.emplace(s[i].cJP,
418 PinyinEncoder::stringToInitial(s[i].strQP));
421 d->buildShuangpinTable(correctionProfile);
424 ShuangpinProfile::ShuangpinProfile(
426 : d_ptr(std::make_unique<ShuangpinProfilePrivate>()) {
429 bool isDefault =
false;
430 while (std::getline(in, lineBuf)) {
431 auto line = fcitx::stringutils::trimView(lineBuf);
432 if (line.empty() || line.starts_with(
'#')) {
436 std::string_view option(
"方案名称=");
437 if (fcitx::stringutils::consumePrefix(line, option)) {
438 isDefault = (line ==
"自然码" || line ==
"微软" || line ==
"紫光" ||
439 line ==
"拼音加加" || line ==
"中文之星" ||
440 line ==
"智能ABC" || line ==
"小鹤");
448 auto tolowerInPlace = [](std::string &s) {
449 std::transform(s.begin(), s.end(), s.begin(),
450 [](
char c) {
return fcitx::charutils::tolower(c); });
453 if (line[0] ==
'=' && line.size() > 1) {
454 d->zeroS_ = std::string(line.substr(1));
455 tolowerInPlace(d->zeroS_);
459 auto equal = line.find(
'=');
461 if (equal == std::string_view::npos || equal == 0) {
465 if (equal + 2 == line.size()) {
466 std::string pinyin{line.substr(0, equal)};
467 auto key = fcitx::charutils::tolower(line[equal + 1]);
468 if (
auto final = PinyinEncoder::stringToFinal(pinyin);
469 final != PinyinFinal::Invalid) {
470 d->finalMap_.emplace(key,
final);
471 }
else if (
auto initial = PinyinEncoder::stringToInitial(pinyin);
472 initial != PinyinInitial::Invalid) {
473 d->initialMap_.emplace(key, initial);
475 }
else if (equal + 3 == line.size()) {
476 std::string_view pinyin = line.substr(0, equal);
477 std::string key{line.substr(equal + 1)};
481 if (result.size() != 2) {
484 d->initialFinalMap_.emplace(
485 key, std::make_pair(static_cast<PinyinInitial>(result[0]),
486 static_cast<PinyinFinal>(result[1])));
492 d->buildShuangpinTable(correctionProfile);
498 void ShuangpinProfile::buildShuangpinTable() {}
500 const ShuangpinProfile::TableType &ShuangpinProfile::table()
const {
505 const ShuangpinProfile::ValidInputSetType &
506 ShuangpinProfile::validInput()
const {
508 return d->validInputs_;
511 const ShuangpinProfile::ValidInputSetType &
512 ShuangpinProfile::validInitial()
const {
514 return d->validInitials_;
Class that holds updated Pinyin correction mapping based on correction mapping.
static std::vector< char > encodeFullPinyin(std::string_view pinyin)
Encode a quote separated pinyin string.
const std::unordered_map< char, std::vector< char > > & correctionMap() const
Return the correction mapping.