OpenMiner  0.0.1a
Voxel game engine
Classes | Enumerations | Functions
sol::unicode Namespace Reference

Classes

struct  decoded_result
 
struct  encoded_result
 
struct  unicode_detail
 

Enumerations

enum  error_code {
  error_code::ok = 0, error_code::invalid_code_point, error_code::invalid_code_unit, error_code::invalid_leading_surrogate,
  error_code::invalid_trailing_surrogate, error_code::sequence_too_short, error_code::overlong_sequence
}
 

Functions

const string_viewto_string (error_code ec)
 
encoded_result< char > code_point_to_utf8 (char32_t codepoint)
 
encoded_result< char16_t > code_point_to_utf16 (char32_t codepoint)
 
encoded_result< char32_t > code_point_to_utf32 (char32_t codepoint)
 
template<typename It >
decoded_result< It > utf8_to_code_point (It it, It last)
 
template<typename It >
decoded_result< It > utf16_to_code_point (It it, It last)
 
template<typename It >
decoded_result< It > utf32_to_code_point (It it, It last)
 

Enumeration Type Documentation

§ error_code

Function Documentation

§ code_point_to_utf16()

encoded_result<char16_t> sol::unicode::code_point_to_utf16 ( char32_t  codepoint)
inline
9038  {
9039  encoded_result<char16_t> er;
9040 
9041  if (codepoint <= unicode_detail::last_bmp_value) {
9042  er.code_units_size = 1;
9043  er.code_units = std::array<char16_t, 4>{ { static_cast<char16_t>(codepoint) } };
9044  er.error = error_code::ok;
9045  }
9046  else {
9047  auto normal = codepoint - unicode_detail::normalizing_value;
9048  auto lead = unicode_detail::first_lead_surrogate + ((normal & unicode_detail::lead_surrogate_bitmask) >> unicode_detail::lead_shifted_bits);
9049  auto trail = unicode_detail::first_trail_surrogate + (normal & unicode_detail::trail_surrogate_bitmask);
9050  er.code_units = std::array<char16_t, 4>{ {
9051  static_cast<char16_t>(lead),
9052  static_cast<char16_t>(trail)
9053  } };
9054  er.code_units_size = 2;
9055  er.error = error_code::ok;
9056  }
9057  return er;
9058  }

§ code_point_to_utf32()

encoded_result<char32_t> sol::unicode::code_point_to_utf32 ( char32_t  codepoint)
inline
9060  {
9061  encoded_result<char32_t> er;
9062  er.code_units_size = 1;
9063  er.code_units[0] = codepoint;
9064  er.error = error_code::ok;
9065  return er;
9066  }

§ code_point_to_utf8()

encoded_result<char> sol::unicode::code_point_to_utf8 ( char32_t  codepoint)
inline
9004  {
9005  encoded_result<char> er;
9006  er.error = error_code::ok;
9007  if (codepoint <= unicode_detail::last_1byte_value) {
9008  er.code_units_size = 1;
9009  er.code_units = std::array<char, 4>{ { static_cast<char>(codepoint) } };
9010  }
9011  else if (codepoint <= unicode_detail::last_2byte_value) {
9012  er.code_units_size = 2;
9013  er.code_units = std::array<char, 4>{{
9014  static_cast<char>(0xC0 | ((codepoint & 0x7C0) >> 6)),
9015  static_cast<char>(0x80 | (codepoint & 0x3F)),
9016  }};
9017  }
9018  else if (codepoint <= unicode_detail::last_3byte_value) {
9019  er.code_units_size = 3;
9020  er.code_units = std::array<char, 4>{{
9021  static_cast<char>(0xE0 | ((codepoint & 0xF000) >> 12)),
9022  static_cast<char>(0x80 | ((codepoint & 0xFC0) >> 6)),
9023  static_cast<char>(0x80 | (codepoint & 0x3F)),
9024  }};
9025  }
9026  else {
9027  er.code_units_size = 4;
9028  er.code_units = std::array<char, 4>{ {
9029  static_cast<char>(0xF0 | ((codepoint & 0x1C0000) >> 18)),
9030  static_cast<char>(0x80 | ((codepoint & 0x3F000) >> 12)),
9031  static_cast<char>(0x80 | ((codepoint & 0xFC0) >> 6)),
9032  static_cast<char>(0x80 | (codepoint & 0x3F)),
9033  } };
9034  }
9035  return er;
9036  }

§ to_string()

const string_view& sol::unicode::to_string ( error_code  ec)
inline
8913  {
8914  static const string_view arr[4] = {
8915  "ok",
8916  "invalid code points",
8917  "invalid code unit",
8918  "overlong sequence"
8919  };
8920  return arr[static_cast<std::size_t>(ec)];
8921  }
basic_string_view< char > string_view
Definition: sol.hpp:1186

§ utf16_to_code_point()

template<typename It >
decoded_result<It> sol::unicode::utf16_to_code_point ( It  it,
It  last 
)
inline
9147  {
9148  decoded_result<It> dr;
9149  if (it == last) {
9150  dr.next = it;
9151  dr.error = error_code::sequence_too_short;
9152  return dr;
9153  }
9154 
9155  char16_t lead = static_cast<char16_t>(*it);
9156 
9157  if (!unicode_detail::is_surrogate(lead)) {
9158  ++it;
9159  dr.codepoint = static_cast<char32_t>(lead);
9160  dr.next = it;
9161  dr.error = error_code::ok;
9162  return dr;
9163  }
9164  if (!unicode_detail::is_lead_surrogate(lead)) {
9165  dr.error = error_code::invalid_leading_surrogate;
9166  dr.next = it;
9167  return dr;
9168  }
9169 
9170  ++it;
9171  auto trail = *it;
9172  if (!unicode_detail::is_trail_surrogate(trail)) {
9173  dr.error = error_code::invalid_trailing_surrogate;
9174  dr.next = it;
9175  return dr;
9176  }
9177 
9178  dr.codepoint = unicode_detail::combine_surrogates(lead, trail);
9179  dr.next = ++it;
9180  dr.error = error_code::ok;
9181  return dr;
9182  }

§ utf32_to_code_point()

template<typename It >
decoded_result<It> sol::unicode::utf32_to_code_point ( It  it,
It  last 
)
inline
9185  {
9186  decoded_result<It> dr;
9187  if (it == last) {
9188  dr.next = it;
9189  dr.error = error_code::sequence_too_short;
9190  return dr;
9191  }
9192  dr.codepoint = static_cast<char32_t>(*it);
9193  dr.next = ++it;
9194  dr.error = error_code::ok;
9195  return dr;
9196  }

§ utf8_to_code_point()

template<typename It >
decoded_result<It> sol::unicode::utf8_to_code_point ( It  it,
It  last 
)
inline
9069  {
9070  decoded_result<It> dr;
9071  if (it == last) {
9072  dr.next = it;
9073  dr.error = error_code::sequence_too_short;
9074  return dr;
9075  }
9076 
9077  unsigned char b0 = *it;
9078  std::size_t length = unicode_detail::sequence_length(b0);
9079 
9080  if (length == 1) {
9081  dr.codepoint = static_cast<char32_t>(b0);
9082  dr.error = error_code::ok;
9083  ++it;
9084  dr.next = it;
9085  return dr;
9086  }
9087 
9088  auto is_invalid = [](unsigned char b) { return b == 0xC0 || b == 0xC1 || b > 0xF4; };
9089  auto is_continuation = [](unsigned char b) {
9090  return (b & unicode_detail::continuation_mask) == unicode_detail::continuation_signature;
9091  };
9092 
9093  if (is_invalid(b0) || is_continuation(b0)) {
9094  dr.error = error_code::invalid_code_unit;
9095  dr.next = it;
9096  return dr;
9097  }
9098 
9099  ++it;
9100  std::array<unsigned char, 4> b;
9101  b[0] = b0;
9102  for (std::size_t i = 1; i < length; ++i) {
9103  b[i] = *it;
9104  if (!is_continuation(b[i])) {
9105  dr.error = error_code::invalid_code_unit;
9106  dr.next = it;
9107  return dr;
9108  }
9109  ++it;
9110  }
9111 
9112  char32_t decoded;
9113  switch (length) {
9114  case 2:
9115  decoded = unicode_detail::decode(b[0], b[1]);
9116  break;
9117  case 3:
9118  decoded = unicode_detail::decode(b[0], b[1], b[2]);
9119  break;
9120  default:
9121  decoded = unicode_detail::decode(b[0], b[1], b[2], b[3]);
9122  break;
9123  }
9124 
9125  auto is_overlong = [](char32_t u, std::size_t bytes) {
9126  return u <= unicode_detail::last_1byte_value
9127  || (u <= unicode_detail::last_2byte_value && bytes > 2)
9128  || (u <= unicode_detail::last_3byte_value && bytes > 3);
9129  };
9130  if (is_overlong(decoded, length)) {
9131  dr.error = error_code::overlong_sequence;
9132  return dr;
9133  }
9134  if (unicode_detail::is_surrogate(decoded) || decoded > unicode_detail::last_code_point) {
9135  dr.error = error_code::invalid_code_point;
9136  return dr;
9137  }
9138 
9139  // then everything is fine
9140  dr.codepoint = decoded;
9141  dr.error = error_code::ok;
9142  dr.next = it;
9143  return dr;
9144  }