libime
datrie.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: 2015-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 
7 #ifndef _FCITX_LIBIME_CORE_DATRIE_H_
8 #define _FCITX_LIBIME_CORE_DATRIE_H_
9 
10 /// \file
11 /// \brief Provide a DATrie implementation.
12 
13 #include <libime/core/libimecore_export.h>
14 
15 #include <cstddef>
16 #include <cstdint>
17 #include <functional>
18 #include <istream>
19 #include <memory>
20 #include <ostream>
21 #include <string>
22 #include <string_view>
23 #include <tuple>
24 #include <vector>
25 #include <fcitx-utils/macros.h>
26 
27 namespace libime {
28 
29 template <typename V, bool ORDERED = true, int MAX_TRIAL = 1>
30 class DATriePrivate;
31 
32 template <typename T>
33 struct NaN {
34  static constexpr auto N1 = -1;
35  static constexpr auto N2 = -2;
36 };
37 template <>
38 struct NaN<float> {
39  static constexpr auto N1 = 0x7fc00001;
40  static constexpr auto N2 = 0x7fc00002;
41 };
42 
43 /**
44  * This is a trie based on cedar<www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/>.
45  *
46  * comparing with original version, this version takes care of endianness when
47  * doing serialization, and handle the possible unaligned memory access on some
48  * platform.
49  *
50  * It also exploits the C++11 feature which can let you implement update easily
51  * with lambda, comparing with the original version's add/set.
52  *
53  */
54 template <typename T>
55 class DATrie {
56 public:
57  using value_type = T;
58  using position_type = uint64_t;
59  using callback_type =
60  std::function<bool(value_type, size_t, position_type)>;
61  using updater_type = std::function<value_type(value_type)>;
62 
63  /*
64  * The actual value may be different on different CPU, use isNoValue
65  * isNoPath, or isValid instead.
66  */
67  enum {
68  NO_VALUE LIBIMECORE_DEPRECATED = NaN<value_type>::N1,
69  NO_PATH LIBIMECORE_DEPRECATED = NaN<value_type>::N2
70  };
71  DATrie();
72  DATrie(const char *filename);
73  DATrie(std::istream &in);
74 
75  FCITX_DECLARE_VIRTUAL_DTOR_COPY_AND_MOVE(DATrie)
76 
77  void load(std::istream &in);
78  void save(const char *filename);
79  void save(std::ostream &stream);
80 
81  size_t size() const;
82  bool empty() const;
83 
84  // retrieve the string via len and pos
85  void suffix(std::string &s, size_t len, position_type pos) const;
86 
87  // result will be NO_VALUE
88  value_type exactMatchSearch(const char *key, size_t len) const;
89  value_type exactMatchSearch(std::string_view key) const {
90  return exactMatchSearch(key.data(), key.size());
91  }
92 
93  int32_t exactMatchSearchRaw(const char *key, size_t len) const;
94  int32_t exactMatchSearchRaw(std::string_view key) const {
95  return exactMatchSearchRaw(key.data(), key.size());
96  }
97 
98  bool hasExactMatch(std::string_view key) const;
99 
100  DATrie<T>::value_type traverse(std::string_view key,
101  position_type &from) const {
102  return traverse(key.data(), key.size(), from);
103  }
104  DATrie<T>::value_type traverse(const char *key, size_t len,
105  position_type &from) const;
106 
107  int32_t traverseRaw(std::string_view key, position_type &from) const {
108  return traverseRaw(key.data(), key.size(), from);
109  }
110  int32_t traverseRaw(const char *key, size_t len, position_type &from) const;
111 
112  // set value
113  void set(std::string_view key, value_type val) {
114  return set(key.data(), key.size(), val);
115  }
116  void set(const char *key, size_t len, value_type val);
117 
118  void update(std::string_view key, updater_type updater) {
119  update(key.data(), key.size(), updater);
120  }
121  void update(const char *key, size_t len, updater_type updater);
122 
123  void dump(value_type *data, std::size_t size) const;
124  void dump(std::vector<value_type> &data) const;
125  void dump(
126  std::vector<std::tuple<value_type, size_t, position_type>> &data) const;
127 
128  // remove key
129  bool erase(std::string_view key, position_type from = 0) {
130  return erase(key.data(), key.size(), from);
131  }
132  bool erase(const char *key, size_t len, position_type from = 0);
133  bool erase(position_type from = 0);
134 
135  // call callback on each key
136  bool foreach(callback_type func, position_type pos = 0) const;
137  // search by prefix
138  bool foreach(const char *prefix, size_t size, callback_type func,
139  position_type pos = 0) const;
140  bool foreach(std::string_view prefix, callback_type func,
141  position_type pos = 0) const {
142  return foreach(prefix.data(), prefix.size(), func, pos);
143  }
144  void clear();
145  void shrink_tail();
146 
147  static bool isValid(value_type v);
148  static bool isNoPath(value_type v);
149  static bool isNoValue(value_type v);
150 
151  static bool isValidRaw(int32_t v);
152  static bool isNoPathRaw(int32_t v);
153  static bool isNoValueRaw(int32_t v);
154 
155  static value_type noPath();
156  static value_type noValue();
157 
158  static value_type decode(int32_t raw);
159 
160  size_t mem_size() const;
161 
162 private:
163  std::unique_ptr<DATriePrivate<value_type>> d;
164 };
165 
166 extern template class LIBIMECORE_EXPORT DATrie<float>;
167 extern template class LIBIMECORE_EXPORT DATrie<int32_t>;
168 extern template class LIBIMECORE_EXPORT DATrie<uint32_t>;
169 } // namespace libime
170 
171 #endif // _FCITX_LIBIME_CORE_DATRIE_H_
This is a trie based on cedar<www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/>.
Definition: datrie.h:55