Fcitx
stringutils.cpp
1 /*
2  * SPDX-FileCopyrightText: 2015-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  *
6  */
7 #include "stringutils.h"
8 #include <cassert>
9 #include <climits>
10 #include <cstring>
11 #include <initializer_list>
12 #include <optional>
13 #include <string>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17 #include <fcitx-utils/fcitxutils_export.h>
18 #include "charutils.h"
19 #include "macros.h"
20 
21 namespace fcitx::stringutils {
22 namespace details {
23 
24 std::string
25 concatPieces(std::initializer_list<std::pair<const char *, std::size_t>> list) {
26  std::size_t size = 0;
27  for (auto pair : list) {
28  size += pair.second;
29  }
30  std::string result;
31  result.reserve(size);
32  for (const auto &pair : list) {
33  result.append(pair.first, pair.first + pair.second);
34  }
35  assert(result.size() == size);
36  return result;
37 }
38 
39 std::string concatPathPieces(
40  std::initializer_list<std::pair<const char *, std::size_t>> list) {
41  if (!list.size()) {
42  return {};
43  }
44 
45  bool first = true;
46  bool firstPieceIsSlash = false;
47  std::size_t size = 0;
48  for (const auto &pair : list) {
49  if (first) {
50  if (pair.first[pair.second - 1] == '/') {
51  firstPieceIsSlash = true;
52  }
53  first = false;
54  } else {
55  size += 1;
56  }
57  size += pair.second;
58  }
59  if (list.size() > 1 && firstPieceIsSlash) {
60  size -= 1;
61  }
62  std::string result;
63  result.reserve(size);
64  first = true;
65  for (auto pair : list) {
66  if (first) {
67  first = false;
68  } else if (firstPieceIsSlash) {
69  firstPieceIsSlash = false;
70  } else {
71  result += '/';
72  }
73 
74  result.append(pair.first, pair.first + pair.second);
75  }
76  assert(result.size() == size);
77  return result;
78 }
79 } // namespace details
80 
81 FCITXUTILS_DEPRECATED_EXPORT bool startsWith(const std::string &str,
82  const std::string &prefix) {
83  return startsWith(std::string_view(str), std::string_view(prefix));
84 }
85 
86 bool startsWith(std::string_view str, std::string_view prefix) {
87  if (str.size() < prefix.size()) {
88  return false;
89  }
90 
91  return (str.compare(0, prefix.size(), prefix) == 0);
92 }
93 
94 FCITXUTILS_DEPRECATED_EXPORT bool endsWith(const std::string &str,
95  const std::string &suffix) {
96  return endsWith(std::string_view(str), std::string_view(suffix));
97 }
98 
99 bool endsWith(std::string_view str, std::string_view suffix) {
100  if (str.size() < suffix.size()) {
101  return false;
102  }
103 
104  return (str.compare(str.size() - suffix.size(), suffix.size(), suffix) ==
105  0);
106 }
107 
108 inline std::pair<std::string::size_type, std::string::size_type>
109 trimInplaceImpl(std::string_view str) {
110  auto start = str.find_first_not_of(FCITX_WHITESPACE);
111  if (start == std::string::npos) {
112  return {str.size(), str.size()};
113  }
114 
115  auto end = str.size();
116  while (end > start && charutils::isspace(str[end - 1])) {
117  --end;
118  }
119 
120  return {start, end};
121 }
122 
123 FCITXUTILS_DEPRECATED_EXPORT
124 std::pair<std::string::size_type, std::string::size_type>
125 trimInplace(const std::string &str) {
126  return trimInplaceImpl(str);
127 }
128 
129 std::pair<std::string::size_type, std::string::size_type>
130 trimInplace(std::string_view str) {
131  return trimInplaceImpl(str);
132 }
133 
134 FCITXUTILS_DEPRECATED_EXPORT
135 std::string trim(const std::string &str) { return trim(std::string_view(str)); }
136 
137 std::string trim(std::string_view str) {
138  auto pair = trimInplaceImpl(str);
139  return {str.begin() + pair.first, str.begin() + pair.second};
140 }
141 
142 std::string_view trimView(std::string_view str) {
143  auto pair = trimInplace(str);
144  return str.substr(pair.first, pair.second - pair.first);
145 }
146 
147 FCITXUTILS_DEPRECATED_EXPORT
148 std::vector<std::string> split(const std::string &str, const std::string &delim,
149  SplitBehavior behavior) {
150  return split(std::string_view(str), std::string_view(delim), behavior);
151 }
152 
153 std::vector<std::string> split(std::string_view str, std::string_view delim,
154  SplitBehavior behavior) {
155  std::vector<std::string> strings;
156  std::string::size_type lastPos;
157  std::string::size_type pos;
158  if (behavior == SplitBehavior::SkipEmpty) {
159  lastPos = str.find_first_not_of(delim, 0);
160  } else {
161  lastPos = 0;
162  }
163  pos = str.find_first_of(delim, lastPos);
164 
165  while (std::string::npos != pos || std::string::npos != lastPos) {
166  strings.push_back(std::string(str.substr(lastPos, pos - lastPos)));
167  if (behavior == SplitBehavior::SkipEmpty) {
168  lastPos = str.find_first_not_of(delim, pos);
169  } else {
170  if (pos == std::string::npos) {
171  break;
172  }
173  lastPos = pos + 1;
174  }
175  pos = str.find_first_of(delim, lastPos);
176  }
177 
178  return strings;
179 }
180 
181 FCITXUTILS_DEPRECATED_EXPORT std::vector<std::string>
182 split(const std::string &str, const std::string &delim) {
183  return split(std::string_view(str), std::string_view(delim));
184 }
185 
186 std::vector<std::string> split(std::string_view str, std::string_view delim) {
187  return split(str, delim, SplitBehavior::SkipEmpty);
188 }
189 
190 std::string replaceAll(std::string str, const std::string &before,
191  const std::string &after) {
192  if (before.empty()) {
193  return str;
194  }
195 
196  constexpr int MAX_REPLACE_INDICES_NUM = 128;
197 
198  size_t pivot = 0;
199  std::string newString;
200  size_t lastLen = 0;
201  size_t indices[MAX_REPLACE_INDICES_NUM];
202 
203  size_t newStringPos = 0;
204  size_t oldStringPos = 0;
205 
206  auto copyAndMoveOn = [&newString, &newStringPos](std::string_view source,
207  size_t pos,
208  size_t length) {
209  if (length == 0) {
210  return;
211  }
212  // Append source[pos..pos+length] to newString.
213  newString.replace(newStringPos, length, source, pos, length);
214  newStringPos += length;
215  };
216 
217  do {
218 
219  int nIndices = 0;
220  while (nIndices < MAX_REPLACE_INDICES_NUM) {
221  pivot = str.find(before, pivot);
222  if (pivot == std::string::npos) {
223  break;
224  }
225 
226  indices[nIndices++] = pivot;
227  pivot += before.size();
228  }
229 
230  if (nIndices) {
231  if (!lastLen) {
232  lastLen = str.size() + nIndices * after.size() -
233  nIndices * before.size();
234  newString.resize(lastLen);
235  } else {
236  size_t newLen = lastLen + nIndices * after.size() -
237  nIndices * before.size();
238  lastLen = newLen;
239  newString.resize(newLen);
240  }
241 
242  // string s is split as
243  // oldStringPos, indices[0], indices[0] + before.size(), indices[1],
244  // indices[1] + before.size()
245  // .... indices[nIndices - 1], indices[nIndices - 1] + before.size()
246  copyAndMoveOn(str, oldStringPos, indices[0] - oldStringPos);
247  copyAndMoveOn(after, 0, after.size());
248 
249  for (int i = 1; i < nIndices; i++) {
250  copyAndMoveOn(str, indices[i - 1] + before.size(),
251  indices[i] - (indices[i - 1] + before.size()));
252  copyAndMoveOn(after, 0, after.size());
253  }
254 
255  oldStringPos = indices[nIndices - 1] + before.size();
256  }
257  } while (pivot != std::string::npos);
258 
259  if (!lastLen) {
260  return str;
261  }
262 
263  copyAndMoveOn(str, oldStringPos, str.size() - oldStringPos);
264  newString.resize(newStringPos);
265 
266  return newString;
267 }
268 
269 #define REHASH(a) \
270  if (ol_minus_1 < sizeof(unsigned int) * CHAR_BIT) \
271  hashHaystack -= (a) << ol_minus_1; \
272  hashHaystack <<= 1
273 
274 const char *backwardSearch(const char *haystack, size_t l, const char *needle,
275  size_t ol, size_t from) {
276  if (ol > l) {
277  return nullptr;
278  }
279  size_t delta = l - ol;
280  if (from > l) {
281  return nullptr;
282  }
283  if (from > delta) {
284  from = delta;
285  }
286 
287  const char *end = haystack;
288  haystack += from;
289  const unsigned int ol_minus_1 = ol - 1;
290  const char *n = needle + ol_minus_1;
291  const char *h = haystack + ol_minus_1;
292  unsigned int hashNeedle = 0;
293  unsigned int hashHaystack = 0;
294  size_t idx;
295  for (idx = 0; idx < ol; ++idx) {
296  hashNeedle = ((hashNeedle << 1) + *(n - idx));
297  hashHaystack = ((hashHaystack << 1) + *(h - idx));
298  }
299  hashHaystack -= *haystack;
300  while (haystack >= end) {
301  hashHaystack += *haystack;
302  if (hashHaystack == hashNeedle && memcmp(needle, haystack, ol) == 0) {
303  return haystack;
304  }
305  --haystack;
306  REHASH(*(haystack + ol));
307  }
308  return nullptr;
309 }
310 
311 char *backwardSearch(char *haystack, size_t l, const char *needle, size_t ol,
312  size_t from) {
313  return const_cast<char *>(backwardSearch(
314  static_cast<const char *>(haystack), l, needle, ol, from));
315 }
316 
317 size_t backwardSearch(const std::string &haystack, const std::string &needle,
318  size_t from) {
319  const auto *cstr = haystack.c_str();
320  const auto *result = backwardSearch(cstr, haystack.size(), needle.c_str(),
321  needle.size(), from);
322  if (result) {
323  return result - cstr;
324  }
325  return std::string::npos;
326 }
327 
328 enum class UnescapeState { NORMAL, ESCAPE };
329 
330 bool unescape(std::string &str, bool unescapeQuote) {
331  if (str.empty()) {
332  return true;
333  }
334 
335  size_t i = 0;
336  size_t j = 0;
337  UnescapeState state = UnescapeState::NORMAL;
338  do {
339  switch (state) {
340  case UnescapeState::NORMAL:
341  if (str[i] == '\\') {
342  state = UnescapeState::ESCAPE;
343  } else {
344  str[j] = str[i];
345  j++;
346  }
347  break;
348  case UnescapeState::ESCAPE:
349  if (str[i] == '\\') {
350  str[j] = '\\';
351  j++;
352  } else if (str[i] == 'n') {
353  str[j] = '\n';
354  j++;
355  } else if (str[i] == '\"' && unescapeQuote) {
356  str[j] = '\"';
357  j++;
358  } else {
359  return false;
360  }
361  state = UnescapeState::NORMAL;
362  break;
363  }
364  } while (str[i++]);
365  str.resize(j - 1);
366  return true;
367 }
368 
369 std::optional<std::string> unescapeForValue(std::string_view str) {
370  bool unescapeQuote = false;
371  // having quote at beginning and end, escape
372  if (str.size() >= 2 && str.front() == '"' && str.back() == '"') {
373  unescapeQuote = true;
374  str = str.substr(1, str.size() - 2);
375  }
376  if (str.empty()) {
377  return std::string();
378  }
379 
380  std::string value(str);
381  if (!stringutils::unescape(value, unescapeQuote)) {
382  return std::nullopt;
383  }
384  return value;
385 }
386 
387 std::string escapeForValue(std::string_view str) {
388  std::string value;
389  value.reserve(str.size());
390  const bool needQuote =
391  str.find_first_of("\f\r\t\v \"") != std::string::npos;
392  if (needQuote) {
393  value.push_back('"');
394  }
395  for (char c : str) {
396  switch (c) {
397  case '\\':
398  value.append("\\\\");
399  break;
400  case '\n':
401  value.append("\\n");
402  break;
403  case '"':
404  value.append("\\\"");
405  break;
406  default:
407  value.push_back(c);
408  break;
409  }
410  }
411  if (needQuote) {
412  value.push_back('"');
413  }
414 
415  return value;
416 }
417 
418 bool consumePrefix(std::string_view &str, std::string_view prefix) {
419  if (stringutils::startsWith(str, prefix)) {
420  str = str.substr(prefix.size());
421  return true;
422  }
423  return false;
424 }
425 
426 } // namespace fcitx::stringutils
std::vector< std::string > split(std::string_view str, std::string_view delim)
Split the string by delim.
std::optional< std::string > unescapeForValue(std::string_view str)
unescape a string, that is potentially quoted.
bool unescape(std::string &str, bool unescapeQuote)
Inplace unescape a string contains slash, new line, optionally quote.
bool endsWith(std::string_view str, std::string_view suffix)
Check if a string ends with a suffix.
Definition: stringutils.cpp:99
bool consumePrefix(std::string_view &str, std::string_view prefix)
Return a substring of input str if str starts with given prefix.
size_t length(Iter start, Iter end)
Return the number UTF-8 characters in the string iterator range.
Definition: utf8.h:33
std::pair< std::string::size_type, std::string::size_type > trimInplace(std::string_view str)
Trim the whitespace by returning start end end of first and list non whitespace character position...
std::string_view trimView(std::string_view str)
Trim the white space in string view.
std::string replaceAll(std::string str, const std::string &before, const std::string &after)
Replace all substring appearance of before with after.
bool startsWith(std::string_view str, std::string_view prefix)
Check if a string starts with a prefix.
Definition: stringutils.cpp:86
String handle utilities.
std::string escapeForValue(std::string_view str)
escape a string, add quote if needed.
size_t backwardSearch(const std::string &haystack, const std::string &needle, size_t from)
Fast backward substring search.
std::string trim(std::string_view str)
Trim the white space in str.
Local independent API to detect character type.