Fcitx
stringutils.cpp
1 /*
2  * SPDX-FileCopyrightText: 2015-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  *
6  */
7 #include "stringutils.h"
8 #include <cassert>
9 #include <climits>
10 #include <cstring>
11 #include <initializer_list>
12 #include <optional>
13 #include <string>
14 #include <string_view>
15 #include <utility>
16 #include <vector>
17 #include <fcitx-utils/fcitxutils_export.h>
18 #include "charutils.h"
19 #include "macros.h"
20 
21 namespace fcitx::stringutils {
22 namespace details {
23 
24 std::string
25 concatPieces(std::initializer_list<std::pair<const char *, std::size_t>> list) {
26  std::size_t size = 0;
27  for (auto pair : list) {
28  size += pair.second;
29  }
30  std::string result;
31  result.reserve(size);
32  for (const auto &pair : list) {
33  result.append(pair.first, pair.first + pair.second);
34  }
35  assert(result.size() == size);
36  return result;
37 }
38 
39 std::string concatPathPieces(
40  std::initializer_list<std::pair<const char *, std::size_t>> list) {
41  if (!list.size()) {
42  return {};
43  }
44 
45  bool first = true;
46  bool firstPieceIsSlash = false;
47  std::size_t size = 0;
48  for (const auto &pair : list) {
49  if (first) {
50  if (pair.first[pair.second - 1] == '/') {
51  firstPieceIsSlash = true;
52  }
53  first = false;
54  } else {
55  size += 1;
56  }
57  size += pair.second;
58  }
59  if (list.size() > 1 && firstPieceIsSlash) {
60  size -= 1;
61  }
62  std::string result;
63  result.reserve(size);
64  first = true;
65  for (auto pair : list) {
66  if (first) {
67  first = false;
68  } else if (firstPieceIsSlash) {
69  firstPieceIsSlash = false;
70  } else {
71  result += '/';
72  }
73 
74  result.append(pair.first, pair.first + pair.second);
75  }
76  assert(result.size() == size);
77  return result;
78 }
79 } // namespace details
80 
81 FCITXUTILS_DEPRECATED_EXPORT bool startsWith(const std::string &str,
82  const std::string &prefix) {
83  return str.starts_with(prefix);
84 }
85 
86 bool startsWith(std::string_view str, std::string_view prefix) {
87  return str.starts_with(prefix);
88 }
89 
90 FCITXUTILS_DEPRECATED_EXPORT bool endsWith(const std::string &str,
91  const std::string &suffix) {
92  return str.ends_with(suffix);
93 }
94 
95 bool endsWith(std::string_view str, std::string_view suffix) {
96  return str.ends_with(suffix);
97 }
98 
99 inline std::pair<std::string::size_type, std::string::size_type>
100 trimInplaceImpl(std::string_view str) {
101  auto start = str.find_first_not_of(FCITX_WHITESPACE);
102  if (start == std::string::npos) {
103  return {str.size(), str.size()};
104  }
105 
106  auto end = str.size();
107  while (end > start && charutils::isspace(str[end - 1])) {
108  --end;
109  }
110 
111  return {start, end};
112 }
113 
114 FCITXUTILS_DEPRECATED_EXPORT
115 std::pair<std::string::size_type, std::string::size_type>
116 trimInplace(const std::string &str) {
117  return trimInplaceImpl(str);
118 }
119 
120 std::pair<std::string::size_type, std::string::size_type>
121 trimInplace(std::string_view str) {
122  return trimInplaceImpl(str);
123 }
124 
125 FCITXUTILS_DEPRECATED_EXPORT
126 std::string trim(const std::string &str) { return trim(std::string_view(str)); }
127 
128 std::string trim(std::string_view str) {
129  auto pair = trimInplaceImpl(str);
130  return {str.begin() + pair.first, str.begin() + pair.second};
131 }
132 
133 std::string_view trimView(std::string_view str) {
134  auto pair = trimInplace(str);
135  return str.substr(pair.first, pair.second - pair.first);
136 }
137 
138 FCITXUTILS_DEPRECATED_EXPORT
139 std::vector<std::string> split(const std::string &str, const std::string &delim,
140  SplitBehavior behavior) {
141  return split(std::string_view(str), std::string_view(delim), behavior);
142 }
143 
144 std::vector<std::string> split(std::string_view str, std::string_view delim,
145  SplitBehavior behavior) {
146  std::vector<std::string> strings;
147  std::string::size_type lastPos;
148  std::string::size_type pos;
149  if (behavior == SplitBehavior::SkipEmpty) {
150  lastPos = str.find_first_not_of(delim, 0);
151  } else {
152  lastPos = 0;
153  }
154  pos = str.find_first_of(delim, lastPos);
155 
156  while (std::string::npos != pos || std::string::npos != lastPos) {
157  strings.push_back(std::string(str.substr(lastPos, pos - lastPos)));
158  if (behavior == SplitBehavior::SkipEmpty) {
159  lastPos = str.find_first_not_of(delim, pos);
160  } else {
161  if (pos == std::string::npos) {
162  break;
163  }
164  lastPos = pos + 1;
165  }
166  pos = str.find_first_of(delim, lastPos);
167  }
168 
169  return strings;
170 }
171 
172 FCITXUTILS_DEPRECATED_EXPORT std::vector<std::string>
173 split(const std::string &str, const std::string &delim) {
174  return split(std::string_view(str), std::string_view(delim));
175 }
176 
177 std::vector<std::string> split(std::string_view str, std::string_view delim) {
178  return split(str, delim, SplitBehavior::SkipEmpty);
179 }
180 
181 std::string replaceAll(std::string str, const std::string &before,
182  const std::string &after) {
183  if (before.empty()) {
184  return str;
185  }
186 
187  constexpr int MAX_REPLACE_INDICES_NUM = 128;
188 
189  size_t pivot = 0;
190  std::string newString;
191  size_t lastLen = 0;
192  size_t indices[MAX_REPLACE_INDICES_NUM];
193 
194  size_t newStringPos = 0;
195  size_t oldStringPos = 0;
196 
197  auto copyAndMoveOn = [&newString, &newStringPos](std::string_view source,
198  size_t pos,
199  size_t length) {
200  if (length == 0) {
201  return;
202  }
203  // Append source[pos..pos+length] to newString.
204  newString.replace(newStringPos, length, source, pos, length);
205  newStringPos += length;
206  };
207 
208  do {
209 
210  int nIndices = 0;
211  while (nIndices < MAX_REPLACE_INDICES_NUM) {
212  pivot = str.find(before, pivot);
213  if (pivot == std::string::npos) {
214  break;
215  }
216 
217  indices[nIndices++] = pivot;
218  pivot += before.size();
219  }
220 
221  if (nIndices) {
222  if (!lastLen) {
223  lastLen = str.size() + nIndices * after.size() -
224  nIndices * before.size();
225  newString.resize(lastLen);
226  } else {
227  size_t newLen = lastLen + nIndices * after.size() -
228  nIndices * before.size();
229  lastLen = newLen;
230  newString.resize(newLen);
231  }
232 
233  // string s is split as
234  // oldStringPos, indices[0], indices[0] + before.size(), indices[1],
235  // indices[1] + before.size()
236  // .... indices[nIndices - 1], indices[nIndices - 1] + before.size()
237  copyAndMoveOn(str, oldStringPos, indices[0] - oldStringPos);
238  copyAndMoveOn(after, 0, after.size());
239 
240  for (int i = 1; i < nIndices; i++) {
241  copyAndMoveOn(str, indices[i - 1] + before.size(),
242  indices[i] - (indices[i - 1] + before.size()));
243  copyAndMoveOn(after, 0, after.size());
244  }
245 
246  oldStringPos = indices[nIndices - 1] + before.size();
247  }
248  } while (pivot != std::string::npos);
249 
250  if (!lastLen) {
251  return str;
252  }
253 
254  copyAndMoveOn(str, oldStringPos, str.size() - oldStringPos);
255  newString.resize(newStringPos);
256 
257  return newString;
258 }
259 
260 #define REHASH(a) \
261  if (ol_minus_1 < sizeof(unsigned int) * CHAR_BIT) \
262  hashHaystack -= (a) << ol_minus_1; \
263  hashHaystack <<= 1
264 
265 const char *backwardSearch(const char *haystack, size_t l, const char *needle,
266  size_t ol, size_t from) {
267  if (ol > l) {
268  return nullptr;
269  }
270  size_t delta = l - ol;
271  if (from > l) {
272  return nullptr;
273  }
274  if (from > delta) {
275  from = delta;
276  }
277 
278  const char *end = haystack;
279  haystack += from;
280  const unsigned int ol_minus_1 = ol - 1;
281  const char *n = needle + ol_minus_1;
282  const char *h = haystack + ol_minus_1;
283  unsigned int hashNeedle = 0;
284  unsigned int hashHaystack = 0;
285  size_t idx;
286  for (idx = 0; idx < ol; ++idx) {
287  hashNeedle = ((hashNeedle << 1) + *(n - idx));
288  hashHaystack = ((hashHaystack << 1) + *(h - idx));
289  }
290  hashHaystack -= *haystack;
291  while (haystack >= end) {
292  hashHaystack += *haystack;
293  if (hashHaystack == hashNeedle && memcmp(needle, haystack, ol) == 0) {
294  return haystack;
295  }
296  --haystack;
297  REHASH(*(haystack + ol));
298  }
299  return nullptr;
300 }
301 
302 char *backwardSearch(char *haystack, size_t l, const char *needle, size_t ol,
303  size_t from) {
304  return const_cast<char *>(backwardSearch(
305  static_cast<const char *>(haystack), l, needle, ol, from));
306 }
307 
308 size_t backwardSearch(const std::string &haystack, const std::string &needle,
309  size_t from) {
310  const auto *cstr = haystack.c_str();
311  const auto *result = backwardSearch(cstr, haystack.size(), needle.c_str(),
312  needle.size(), from);
313  if (result) {
314  return result - cstr;
315  }
316  return std::string::npos;
317 }
318 
319 enum class UnescapeState { NORMAL, ESCAPE };
320 
321 bool unescape(std::string &str, bool unescapeQuote) {
322  if (str.empty()) {
323  return true;
324  }
325 
326  size_t i = 0;
327  size_t j = 0;
328  UnescapeState state = UnescapeState::NORMAL;
329  do {
330  switch (state) {
331  case UnescapeState::NORMAL:
332  if (str[i] == '\\') {
333  state = UnescapeState::ESCAPE;
334  } else {
335  str[j] = str[i];
336  j++;
337  }
338  break;
339  case UnescapeState::ESCAPE:
340  if (str[i] == '\\') {
341  str[j] = '\\';
342  j++;
343  } else if (str[i] == 'n') {
344  str[j] = '\n';
345  j++;
346  } else if (str[i] == '\"' && unescapeQuote) {
347  str[j] = '\"';
348  j++;
349  } else {
350  return false;
351  }
352  state = UnescapeState::NORMAL;
353  break;
354  }
355  } while (str[i++]);
356  str.resize(j - 1);
357  return true;
358 }
359 
360 std::optional<std::string> unescapeForValue(std::string_view str) {
361  bool unescapeQuote = false;
362  // having quote at beginning and end, escape
363  if (str.size() >= 2 && str.front() == '"' && str.back() == '"') {
364  unescapeQuote = true;
365  str = str.substr(1, str.size() - 2);
366  }
367  if (str.empty()) {
368  return std::string();
369  }
370 
371  std::string value(str);
372  if (!stringutils::unescape(value, unescapeQuote)) {
373  return std::nullopt;
374  }
375  return value;
376 }
377 
378 std::string escapeForValue(std::string_view str) {
379  std::string value;
380  value.reserve(str.size());
381  const bool needQuote =
382  str.find_first_of("\f\r\t\v \"") != std::string::npos;
383  if (needQuote) {
384  value.push_back('"');
385  }
386  for (char c : str) {
387  switch (c) {
388  case '\\':
389  value.append("\\\\");
390  break;
391  case '\n':
392  value.append("\\n");
393  break;
394  case '"':
395  value.append("\\\"");
396  break;
397  default:
398  value.push_back(c);
399  break;
400  }
401  }
402  if (needQuote) {
403  value.push_back('"');
404  }
405 
406  return value;
407 }
408 
409 bool consumePrefix(std::string_view &str, std::string_view prefix) {
410  if (str.starts_with(prefix)) {
411  str = str.substr(prefix.size());
412  return true;
413  }
414  return false;
415 }
416 
417 } // namespace fcitx::stringutils
std::vector< std::string > split(std::string_view str, std::string_view delim)
Split the string by delim.
std::optional< std::string > unescapeForValue(std::string_view str)
unescape a string, that is potentially quoted.
bool unescape(std::string &str, bool unescapeQuote)
Inplace unescape a string contains slash, new line, optionally quote.
bool consumePrefix(std::string_view &str, std::string_view prefix)
Return a substring of input str if str starts with given prefix.
size_t length(Iter start, Iter end)
Return the number UTF-8 characters in the string iterator range.
Definition: utf8.h:33
std::pair< std::string::size_type, std::string::size_type > trimInplace(std::string_view str)
Trim the whitespace by returning start end end of first and list non whitespace character position...
std::string_view trimView(std::string_view str)
Trim the white space in string view.
bool endsWith(std::string_view str, std::string_view suffix)
Check if a string ends with a suffix.
Definition: stringutils.cpp:95
std::string replaceAll(std::string str, const std::string &before, const std::string &after)
Replace all substring appearance of before with after.
bool startsWith(std::string_view str, std::string_view prefix)
Check if a string starts with a prefix.
Definition: stringutils.cpp:86
String handle utilities.
std::string escapeForValue(std::string_view str)
escape a string, add quote if needed.
size_t backwardSearch(const std::string &haystack, const std::string &needle, size_t from)
Fast backward substring search.
std::string trim(std::string_view str)
Trim the white space in str.
Local independent API to detect character type.