|
| void | usage (char *filename) |
| | Tell the user how to use this program. More...
|
| |
| void | serialise (const std::string &operation, const std::vector< size_t > &list) |
| | Walk through each of the lists turning it into C code. More...
|
| |
| unsigned | process_unicodedata (const char *line, unsigned last_codepoint) |
| | Given a line form the UnicodeData.txt file, work out which functions should know about this codepoint. More...
|
| |
| void | process_proplist (const char *line) |
| | Given a line form the PropList.txt file, work out which functions should know about this codepoint. More...
|
| |
|
void | make_codepoint_isalnum (void) |
| | Construct the codepoint_isalnum[] map for determining whether or not a given codepoint is alphanumeric.
|
| |
|
void | foldcase (std::vector< int > &destination, int codepoint) |
| |
| void | process_normalisation_recursively (std::vector< int > &answer, int head_codepoint) |
| | Given a codepoint apply the normalisation rules recursively to get an expansion. More...
|
| |
| void | process_JASS_normalization (const char *line) |
| | Process a single line of UnicodeData.txt and extract the normaliation of that codepoint. More...
|
| |
| void | normalize (void) |
| |
| void | process_casefolding (const char *line) |
| | process a single line of CaseFolding.txt and extract the full case folding data (that is, the "C+F" subset) More...
|
| |
|
int | main_event (int argc, char *argv[]) |
| |
|
int | main (int argc, char *argv[]) |
| |
|
|
static const size_t | MAX_CODEPOINT = 0x10FFFF |
| |
|
std::vector< size_t > | alpha |
| | list of alphabetical characters
|
| |
|
std::vector< size_t > | uppercase |
| | list of uppercase characters
|
| |
|
std::vector< size_t > | lowercase |
| | list of lowercase characgers
|
| |
|
std::vector< size_t > | digit |
| | list of digits
|
| |
|
std::vector< size_t > | alphanumeric |
| | list of alphanumeric characters
|
| |
|
std::vector< size_t > | punc |
| | list of punctuation symbols
|
| |
|
std::vector< size_t > | space |
| | list of Unicode space characters (not a superset C's isspace())
|
| |
|
std::vector< size_t > | whitespace |
| | list of space characters (is a superset C's isspace())
|
| |
|
std::vector< size_t > | mark |
| | list of diacritic marks
|
| |
|
std::vector< size_t > | symbol |
| | list of symbols
|
| |
|
std::vector< size_t > | control |
| | list of control characters
|
| |
|
std::vector< size_t > | graphical |
| | list of graphical (printable) characters
|
| |
|
std::vector< size_t > | xdigit |
| | list of Unicode defined hexadecimal characters
|
| |
| std::map< int, std::vector< int > > | JASS_normalisation |
| | JASS normalisation rules (one codepoint can become more than one codepoint) More...
|
| |
|
std::map< int, std::vector< int > > | casefold |
| | The casefolded version of the codepoint.
|
| |
|
std::map< size_t, bool > | codepoint_isalnum |
| |
|
std::vector< size_t > | xmlnamestartchar |
| |
|
std::vector< size_t > | xmlnamechar |
| |
Generate C sourcecode for is() methods for Unicode UTF-8.
- Author
- Andrew Trotman
- Copyright
- 2016 Andrew Trotman