kodi
RegExp.h
1 /*
2  * Copyright (C) 2005-2018 Team Kodi
3  * This file is part of Kodi - https://kodi.tv
4  *
5  * SPDX-License-Identifier: GPL-2.0-or-later
6  * See LICENSES/README.md for more information.
7  */
8 
9 #pragma once
10 
12 
13 #include <string>
14 #include <vector>
15 
16 /* make sure stdlib.h is included before including pcre.h inside the
17  namespace; this works around stdlib.h definitions also living in
18  the PCRE namespace */
19 #include <stdlib.h>
20 
21 namespace PCRE {
22 struct real_pcre_jit_stack; // forward declaration for PCRE without JIT
23 typedef struct real_pcre_jit_stack pcre_jit_stack;
24 #include <pcre.h>
25 }
26 
27 class CRegExp
28 {
29 public:
30  enum studyMode
31  {
32  NoStudy = 0, // do not study expression
33  StudyRegExp = 1, // study expression (slower compilation, faster find)
34  StudyWithJitComp // study expression and JIT-compile it, if possible (heavyweight optimization)
35  };
36  enum utf8Mode
37  {
38  autoUtf8 = -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF
39  // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found
40  asciiOnly = 0, // process regexp and strings as single-byte encoded strings
41  forceUtf8 = 1 // enable UTF-8 mode (with Unicode properties)
42  };
43 
44  static const int m_MaxNumOfBackrefrences = 20;
50  CRegExp(bool caseless = false, utf8Mode utf8 = asciiOnly);
61  CRegExp(bool caseless, utf8Mode utf8, const char *re, studyMode study = NoStudy);
62 
63  CRegExp(const CRegExp& re);
64  ~CRegExp();
65 
73  bool RegComp(const char *re, studyMode study = NoStudy);
74 
82  bool RegComp(const std::string& re, studyMode study = NoStudy)
83  { return RegComp(re.c_str(), study); }
84 
93  int RegFind(const char* str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
102  int RegFind(const std::string& str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1)
103  { return PrivateRegFind(str.length(), str.c_str(), startoffset, maxNumberOfCharsToTest); }
104  std::string GetReplaceString(const std::string& sReplaceExp) const;
105  int GetFindLen() const
106  {
107  if (!m_re || !m_bMatched)
108  return 0;
109 
110  return (m_iOvector[1] - m_iOvector[0]);
111  };
112  int GetSubCount() const { return m_iMatchCount - 1; } // PCRE returns the number of sub-patterns + 1
113  int GetSubStart(int iSub) const;
114  int GetSubStart(const std::string& subName) const;
115  int GetSubLength(int iSub) const;
116  int GetSubLength(const std::string& subName) const;
117  int GetCaptureTotal() const;
118  std::string GetMatch(int iSub = 0) const;
119  std::string GetMatch(const std::string& subName) const;
120  const std::string& GetPattern() const { return m_pattern; }
121  bool GetNamedSubPattern(const char* strName, std::string& strMatch) const;
122  int GetNamedSubPatternNumber(const char* strName) const;
123  void DumpOvector(int iLog);
128  inline bool IsCompiled(void) const
129  { return !m_pattern.empty(); }
130  CRegExp& operator= (const CRegExp& re);
131  static bool IsUtf8Supported(void);
132  static bool AreUnicodePropertiesSupported(void);
133  static bool LogCheckUtf8Support(void);
134  static bool IsJitSupported(void);
135 
136 private:
137  int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
138  void InitValues(bool caseless = false, CRegExp::utf8Mode utf8 = asciiOnly);
139  static bool requireUtf8(const std::string& regexp);
140  static int readCharXCode(const std::string& regexp, size_t& pos);
141  static bool isCharClassWithUnicode(const std::string& regexp, size_t& pos);
142 
143  void Cleanup();
144  inline bool IsValidSubNumber(int iSub) const;
145 
146  PCRE::pcre* m_re;
147  PCRE::pcre_extra* m_sd;
148  static const int OVECCOUNT=(m_MaxNumOfBackrefrences + 1) * 3;
149  unsigned int m_offset;
150  int m_iOvector[OVECCOUNT];
151  utf8Mode m_utf8Mode;
152  int m_iMatchCount;
153  int m_iOptions;
154  bool m_jitCompiled;
155  bool m_bMatched;
156  PCRE::pcre_jit_stack* m_jitStack;
157  std::string m_subject;
158  std::string m_pattern;
159  static int m_Utf8Supported;
160  static int m_UcpSupported;
161  static int m_JitSupported;
162 };
163 
164 typedef std::vector<CRegExp> VECCREGEXP;
165 
bool IsCompiled(void) const
Check is RegExp object is ready for matching.
Definition: RegExp.h:128
Definition: RegExp.h:27
Definition: RegExp.h:21
int RegFind(const std::string &str, unsigned int startoffset=0, int maxNumberOfCharsToTest=-1)
Find first match of regular expression in given string.
Definition: RegExp.h:102
Definition: LibInputPointer.h:13
bool RegComp(const std::string &re, studyMode study=NoStudy)
Compile (prepare) regular expression.
Definition: RegExp.h:82