Fcitx
inputbuffer.cpp
1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  *
6  */
7 #include "inputbuffer.h"
8 #include <cstddef>
9 #include <cstdint>
10 #include <iterator>
11 #include <memory>
12 #include <stdexcept>
13 #include <string>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17 #include "macros.h"
18 #include "utf8.h"
19 
20 namespace fcitx {
21 
23 public:
24  InputBufferPrivate(InputBufferOptions options) : options_(options) {}
25 
26  // make sure acc_[i] is valid, i \in [0, size()]
27  // acc_[i] = sum(j \in 0..i-1 | sz_[j])
28  void ensureAccTill(size_t i) const {
29  if (accDirty_ <= i) {
30  if (accDirty_ == 0) {
31  // acc_[0] is always 0
32  accDirty_++;
33  }
34  for (auto iter = std::next(sz_.begin(), accDirty_ - 1),
35  e = std::next(sz_.begin(), i);
36  iter < e; iter++) {
37  acc_[accDirty_] = acc_[accDirty_ - 1] + *iter;
38  accDirty_++;
39  }
40  }
41  }
42 
43  inline bool isAsciiOnly() const {
44  return options_.test(InputBufferOption::AsciiOnly);
45  }
46 
47  inline bool isFixedCursor() const {
48  return options_.test(InputBufferOption::FixedCursor);
49  }
50 
51  const InputBufferOptions options_;
52  std::string input_;
53  size_t cursor_ = 0;
54  std::vector<size_t> sz_; // utf8 lengthindex helper
55  size_t maxSize_ = 0;
56  mutable std::vector<size_t> acc_ = {0};
57  mutable size_t accDirty_ = 0;
58 };
59 
61  : d_ptr(std::make_unique<InputBufferPrivate>(options)) {}
62 
63 InputBuffer::~InputBuffer() = default;
64 
66  FCITX_D();
67  return d->options_;
68 }
69 
70 bool InputBuffer::type(uint32_t unicode) {
71  return type(fcitx::utf8::UCS4ToUTF8(unicode));
72 }
73 
74 const std::string &InputBuffer::userInput() const {
75  FCITX_D();
76  return d->input_;
77 }
78 
79 bool InputBuffer::typeImpl(const char *s, size_t length) {
80  FCITX_D();
81  std::string_view view(s, length);
82  auto utf8Length = fcitx::utf8::lengthValidated(view);
83  if (utf8Length == fcitx::utf8::INVALID_LENGTH) {
84  throw std::invalid_argument("Invalid UTF-8 string");
85  }
86  if (d->isAsciiOnly() && utf8Length != view.size()) {
87  throw std::invalid_argument(
88  "ascii only buffer only accept ascii only string");
89  }
90  if (d->maxSize_ && (utf8Length + size() > d->maxSize_)) {
91  return false;
92  }
93  d->input_.insert(std::next(d->input_.begin(), cursorByChar()), view.begin(),
94  view.end());
95  if (!d->isAsciiOnly()) {
96  auto pos = d->cursor_;
97  for (auto chrView : utf8::MakeUTF8StringViewRange(view)) {
98  d->sz_.insert(std::next(d->sz_.begin(), pos), chrView.size());
99  pos++;
100  }
101 
102  d->acc_.resize(d->sz_.size() + 1);
103  auto newDirty = d->cursor_ > 0 ? d->cursor_ - 1 : 0;
104  if (d->accDirty_ > newDirty) {
105  d->accDirty_ = newDirty;
106  }
107  }
108  d->cursor_ += utf8Length;
109  return true;
110 }
111 
113  FCITX_D();
114  if (d->isAsciiOnly()) {
115  return d->cursor_;
116  }
117  if (d->cursor_ == size()) {
118  return d->input_.size();
119  }
120  d->ensureAccTill(d->cursor_);
121  return d->acc_[d->cursor_];
122 }
123 
124 size_t InputBuffer::cursor() const {
125  FCITX_D();
126  return d->cursor_;
127 }
128 
129 size_t InputBuffer::size() const {
130  FCITX_D();
131  return d->isAsciiOnly() ? d->input_.size() : d->sz_.size();
132 }
133 
135  FCITX_D();
136  if (d->isFixedCursor()) {
137  if (cursor != size()) {
138  throw std::out_of_range(
139  "only valid position of cursor is size() for fixed cursor");
140  }
141  return;
142  }
143 
144  if (d->cursor_ > size()) {
145  throw std::out_of_range("cursor position out of range");
146  }
147  d->cursor_ = cursor;
148 }
149 
150 void InputBuffer::setMaxSize(size_t s) {
151  FCITX_D();
152  d->maxSize_ = s;
153 }
154 
155 size_t InputBuffer::maxSize() const {
156  FCITX_D();
157  return d->maxSize_;
158 }
159 
160 void InputBuffer::erase(size_t from, size_t to) {
161  FCITX_D();
162  if (from < to && to <= size()) {
163  if (d->isFixedCursor() && to != size()) {
164  return;
165  }
166 
167  size_t fromByChar;
168  size_t lengthByChar;
169  if (d->isAsciiOnly()) {
170  fromByChar = from;
171  lengthByChar = to - from;
172  } else {
173  d->ensureAccTill(to);
174  fromByChar = d->acc_[from];
175  lengthByChar = d->acc_[to] - fromByChar;
176  d->sz_.erase(std::next(d->sz_.begin(), from),
177  std::next(d->sz_.begin(), to));
178  d->accDirty_ = from;
179  d->acc_.resize(d->sz_.size() + 1);
180  }
181  if (d->cursor_ > from) {
182  if (d->cursor_ <= to) {
183  d->cursor_ = from;
184  } else {
185  d->cursor_ -= to - from;
186  }
187  }
188  d->input_.erase(fromByChar, lengthByChar);
189  }
190 }
191 
192 std::pair<size_t, size_t> InputBuffer::rangeAt(size_t i) const {
193  FCITX_D();
194  if (i >= size()) {
195  throw std::out_of_range("out of range");
196  }
197  if (d->isAsciiOnly()) {
198  return {i, i + 1};
199  }
200  d->ensureAccTill(i);
201  return {d->acc_[i], d->acc_[i] + d->sz_[i]};
202 }
203 
204 std::string_view InputBuffer::viewAt(size_t i) const {
205  auto [start, end] = rangeAt(i);
206  return std::string_view(userInput()).substr(start, end - start);
207 }
208 
209 uint32_t InputBuffer::charAt(size_t i) const {
210  FCITX_D();
211  if (i >= size()) {
212  throw std::out_of_range("out of range");
213  }
214  if (d->isAsciiOnly()) {
215  return d->input_[i];
216  }
217  d->ensureAccTill(i);
218  return utf8::getChar(d->input_.begin() + d->acc_[i],
219  d->input_.begin() + d->acc_[i] + d->sz_[i]);
220 }
221 
222 size_t InputBuffer::sizeAt(size_t i) const {
223  FCITX_D();
224  if (d->isAsciiOnly()) {
225  return 1;
226  }
227  return d->sz_[i];
228 }
229 
231  FCITX_D();
232  d->input_.shrink_to_fit();
233  d->sz_.shrink_to_fit();
234  d->acc_.shrink_to_fit();
235 }
236 } // namespace fcitx
bool type(const char *s, size_t length)
Type a C-String with length into buffer.
Definition: inputbuffer.h:52
Whether the input buffer only supports cursor at the end of buffer.
size_t size() const
Size of buffer, by number of utf8 character.
std::string UCS4ToUTF8(uint32_t code)
Convert UCS4 to UTF8 string.
Definition: utf8.cpp:19
size_t length(Iter start, Iter end)
Return the number UTF-8 characters in the string iterator range.
Definition: utf8.h:33
size_t cursorByChar() const
Cursor position by char (byte).
virtual void erase(size_t from, size_t to)
Erase a range of character.
Definition: action.cpp:17
Definition: matchrule.h:78
InputBuffer(InputBufferOptions options=InputBufferOption::NoOption)
Create a input buffer with options.
Definition: inputbuffer.cpp:60
void shrinkToFit()
Save memory by call shrink to fit to internal buffer.
C++ Utility functions for handling utf8 strings.
const std::string & userInput() const
Utf8 string in the buffer.
Definition: inputbuffer.cpp:74
InputBufferOptions options() const
Get the buffer option.
Definition: inputbuffer.cpp:65
virtual bool typeImpl(const char *s, size_t length)
Type a certain length of utf8 character to the buffer.
Definition: inputbuffer.cpp:79
size_t lengthValidated(Iter start, Iter end)
Validate and return the number UTF-8 characters in the string iterator range.
Definition: utf8.h:60
constexpr size_t INVALID_LENGTH
Possible return value of lengthValidated if the string is not valid.
Definition: utf8.h:53
uint32_t getChar(Iter iter, Iter end)
Get next UCS4 char from iter, do not cross end.
Definition: utf8.h:104
uint32_t charAt(size_t i) const
UCS-4 char in the buffer. Will raise exception if i is out of range.
size_t sizeAt(size_t i) const
Byte size at position i.
std::pair< size_t, size_t > rangeAt(size_t i) const
Byte range for character at position i.
Generic InputBuffer to be used to handle user&#39;s preedit.
virtual void setCursor(size_t cursor)
Set cursor position, by character.
Class provides bit flag support for Enum.
Definition: flags.h:33
size_t maxSize() const
Get the max size of the buffer.
size_t cursor() const
Cursor position by utf8 character.
void setMaxSize(size_t s)
Set max size of the buffer.
The input buffer is ascii character only, non ascii char will raise exception.