libime
pinyindictionary.h
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_
7 #define _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_
8 
9 #include <cstddef>
10 #include <functional>
11 #include <istream>
12 #include <memory>
13 #include <optional>
14 #include <ostream>
15 #include <string_view>
16 #include <unordered_set>
17 #include <fcitx-utils/flags.h>
18 #include <fcitx-utils/macros.h>
19 #include <libime/core/dictionary.h>
20 #include <libime/core/segmentgraph.h>
21 #include <libime/core/triedictionary.h>
22 #include <libime/pinyin/libimepinyin_export.h>
23 #include <libime/pinyin/pinyinencoder.h>
24 
25 namespace libime {
26 
27 enum class PinyinDictFormat { Text, Binary };
28 
29 class PinyinDictionaryPrivate;
30 
31 using PinyinMatchCallback =
32  std::function<bool(std::string_view, std::string_view, float)>;
33 
34 using PinyinTrie = typename TrieDictionary::TrieType;
35 
36 /**
37  * Flag for a given sub dictionary in PinyinDictionary.
38  */
39 enum class PinyinDictFlag {
40  /// No Flag
41  NoFlag = 0,
42  /// The dictionary can only be used to search the whole match sentence
43  FullMatch = (1 << 1),
44  /**
45  * The dictionary is disabled and should be skipped for matching.
46  * @since 1.0.10
47  */
48  Disabled = (1 << 2)
49 };
50 
51 using PinyinDictFlags = fcitx::Flags<PinyinDictFlag>;
52 
53 /**
54  * PinyinDictionary is a set of dictionaries for Pinyin.
55  */
56 class LIBIMEPINYIN_EXPORT PinyinDictionary : public TrieDictionary {
57 public:
58  explicit PinyinDictionary();
60 
61  // Load dicitonary for a specific dict.
62  void load(size_t idx, std::istream &in, PinyinDictFormat format);
63  void load(size_t idx, const char *filename, PinyinDictFormat format);
64 
65  // Match the word by encoded pinyin.
66  void matchWords(const char *data, size_t size,
67  PinyinMatchCallback callback) const;
68  // Match the word by encoded pinyin.
69  void matchWordsPrefix(const char *data, size_t size,
70  PinyinMatchCallback callback) const;
71 
72  void save(size_t idx, const char *filename, PinyinDictFormat format);
73  void save(size_t idx, std::ostream &out, PinyinDictFormat format);
74 
75  void addWord(size_t idx, std::string_view fullPinyin,
76  std::string_view hanzi, float cost = 0.0F);
77  bool removeWord(size_t idx, std::string_view fullPinyin,
78  std::string_view hanzi);
79  std::optional<float> lookupWord(size_t idx, std::string_view fullPinyin,
80  std::string_view hanzi) const;
81 
82  void setFlags(size_t idx, PinyinDictFlags flags);
83 
84  /**
85  * Load text format into the Trie
86  *
87  * @param in input stream
88  * @param format dict format.
89  * @see TrieDictionary::setTrie
90  * @since 1.1.7
91  */
92  static TrieType load(std::istream &in, PinyinDictFormat format);
93 
94  using dictionaryChanged = TrieDictionary::dictionaryChanged;
95 
96 protected:
97  void
98  matchPrefixImpl(const SegmentGraph &graph,
99  const GraphMatchCallback &callback,
100  const std::unordered_set<const SegmentGraphNode *> &ignore,
101  void *helper) const override;
102 
103 private:
104  void loadText(size_t idx, std::istream &in);
105  void loadBinary(size_t idx, std::istream &in);
106  void saveText(size_t idx, std::ostream &out);
107 
108  std::unique_ptr<PinyinDictionaryPrivate> d_ptr;
109  FCITX_DECLARE_PRIVATE(PinyinDictionary);
110 };
111 } // namespace libime
112 
113 #endif // _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_
PinyinDictionary is a set of dictionaries for Pinyin.