|
|
@ -20,12 +20,12 @@ namespace
|
|
|
|
{
|
|
|
|
{
|
|
|
|
size_t i = 0;
|
|
|
|
size_t i = 0;
|
|
|
|
unsigned long unicode;
|
|
|
|
unsigned long unicode;
|
|
|
|
size_t todo;
|
|
|
|
size_t numbytes;
|
|
|
|
unsigned char ch = utf8[i++];
|
|
|
|
unsigned char ch = utf8[i++];
|
|
|
|
if (ch <= 0x7F)
|
|
|
|
if (ch <= 0x7F)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unicode = ch;
|
|
|
|
unicode = ch;
|
|
|
|
todo = 0;
|
|
|
|
numbytes = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch <= 0xBF)
|
|
|
|
else if (ch <= 0xBF)
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -34,23 +34,23 @@ namespace
|
|
|
|
else if (ch <= 0xDF)
|
|
|
|
else if (ch <= 0xDF)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unicode = ch&0x1F;
|
|
|
|
unicode = ch&0x1F;
|
|
|
|
todo = 1;
|
|
|
|
numbytes = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch <= 0xEF)
|
|
|
|
else if (ch <= 0xEF)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unicode = ch&0x0F;
|
|
|
|
unicode = ch&0x0F;
|
|
|
|
todo = 2;
|
|
|
|
numbytes = 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ch <= 0xF7)
|
|
|
|
else if (ch <= 0xF7)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unicode = ch&0x07;
|
|
|
|
unicode = ch&0x07;
|
|
|
|
todo = 3;
|
|
|
|
numbytes = 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
else
|
|
|
|
{
|
|
|
|
{
|
|
|
|
throw std::logic_error("not a UTF-8 string");
|
|
|
|
throw std::logic_error("not a UTF-8 string");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (size_t j = 0; j < todo; ++j)
|
|
|
|
for (size_t j = 0; j < numbytes; ++j)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unsigned char ch = utf8[i++];
|
|
|
|
unsigned char ch = utf8[i++];
|
|
|
|
if (ch < 0x80 || ch > 0xBF)
|
|
|
|
if (ch < 0x80 || ch > 0xBF)
|
|
|
|