7 #include "prediction.h" 12 #include <unordered_set> 15 #include <fcitx-utils/macros.h> 17 #include "historybigram.h" 18 #include "languagemodel.h" 28 Prediction::Prediction() : d_ptr(std::make_unique<PredictionPrivate>()) {}
30 Prediction::~Prediction() =
default;
32 void Prediction::setLanguageModel(
const LanguageModel *model) {
37 void Prediction::setHistoryBigram(
const HistoryBigram *bigram) {
52 std::vector<std::string>
53 Prediction::predict(
const std::vector<std::string> &sentence,
60 State state = d->model_->nullState();
62 std::vector<WordNode> nodes;
63 nodes.reserve(sentence.size());
64 for (
const auto &word : sentence) {
65 auto idx = d->model_->index(word);
66 nodes.emplace_back(word, idx);
67 d->model_->score(state, nodes.back(), outState);
70 return predict(state, sentence, realMaxSize);
73 std::vector<std::pair<std::string, float>>
74 Prediction::predictWithScore(
const State &state,
75 const std::vector<std::string> &sentence,
82 size_t maxSize = realMaxSize * 2;
83 std::unordered_set<std::string> words;
85 if (
auto file = d->model_->languageModelFile()) {
86 std::string search =
"<unk>";
87 if (!sentence.empty()) {
88 search = sentence.back();
91 const auto &trie = file->predictionTrie();
92 trie.foreach(search, [&trie, &words,
96 trie.suffix(buf, len, pos);
97 words.emplace(std::move(buf));
99 return maxSize <= 0 || words.size() < maxSize;
104 d->bigram_->fillPredict(words, sentence, maxSize);
107 std::vector<std::pair<std::string, float>> temps;
108 for (
auto word : words) {
109 auto score = d->model_->singleWordScore(state, word);
110 temps.emplace_back(std::move(word), score);
112 std::sort(temps.begin(), temps.end(), [](
auto &lhs,
auto &rhs) {
113 if (lhs.second != rhs.second) {
114 return lhs.second > rhs.second;
116 return lhs.first < rhs.first;
119 if (realMaxSize && temps.size() > realMaxSize) {
120 temps.resize(realMaxSize);
125 std::vector<std::string>
126 Prediction::predict(
const State &state,
127 const std::vector<std::string> &sentence,
128 size_t realMaxSize) {
130 auto temps = predictWithScore(state, sentence, realMaxSize);
131 std::vector<std::string> result;
132 result.reserve(temps.size());
133 for (
auto &temp : temps) {
134 result.emplace_back(std::move(temp.first));
Provide a DATrie implementation.
This is a trie based on cedar<www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/>.