doxygen
utf8.h
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * Copyright (C) 1997-2021 by Dimitri van Heesch.
4  *
5  * Permission to use, copy, modify, and distribute this software and its
6  * documentation under the terms of the GNU General Public License is hereby
7  * granted. No representations are made about the suitability of this software
8  * for any purpose. It is provided "as is" without express or implied warranty.
9  * See the GNU General Public License for more details.
10  *
11  * Documents produced by Doxygen are derivative works derived from the
12  * input used in their production; they are not affected by this license.
13  *
14  */
15 
16 #ifndef UTF8_H
17 #define UTF8_H
18 
19 #include <cstdint>
20 #include <string>
21 
22 class TextStream;
23 
34 std::string convertUTF8ToLower(const std::string &input);
35 
39 std::string convertUTF8ToUpper(const std::string &input);
40 
44 std::string getUTF8CharAt(const std::string &input,size_t pos);
45 
49 uint32_t getUnicodeForUTF8CharAt(const std::string &input,size_t pos);
50 
54 uint8_t getUTF8CharNumBytes(char firstByte);
55 
59 const char *writeUTF8Char(TextStream &t,const char *s);
60 
62 bool lastUTF8CharIsMultibyte(const std::string &input);
63 
65 bool isUTF8CharUpperCase(const std::string &input,size_t pos);
66 
70 int isUTF8NonBreakableSpace(const char *input);
71 
72 #endif
uint32_t getUnicodeForUTF8CharAt(const std::string &input, size_t pos)
Returns the 32bit Unicode value matching character at byte position pos in the UTF8 encoded input...
Definition: utf8.cpp:135
bool isUTF8CharUpperCase(const std::string &input, size_t pos)
Returns true iff the input string at byte position pos holds an upper case character.
Definition: utf8.cpp:218
uint8_t getUTF8CharNumBytes(char firstByte)
Returns the number of bytes making up a single UTF8 character given the first byte in the sequence...
Definition: utf8.cpp:23
int isUTF8NonBreakableSpace(const char *input)
Check if the first character pointed at by input is a non-breakable whitespace character.
Definition: utf8.cpp:228
Text streaming class that buffers data.
Definition: textstream.h:33
std::string getUTF8CharAt(const std::string &input, size_t pos)
Returns the UTF8 character found at byte position pos in the input string.
Definition: utf8.cpp:127
std::string convertUTF8ToLower(const std::string &input)
Converts the input string into a lower case version, also taking into account non-ASCII characters th...
Definition: utf8.cpp:187
bool lastUTF8CharIsMultibyte(const std::string &input)
Returns true iff the last character in input is a multibyte character.
Definition: utf8.cpp:212
std::string convertUTF8ToUpper(const std::string &input)
Converts the input string into a upper case version, also taking into account non-ASCII characters th...
Definition: utf8.cpp:192
const char * writeUTF8Char(TextStream &t, const char *s)
Writes the UTF8 character pointed to by s to stream t and returns a pointer to the next character...
Definition: utf8.cpp:197