[multiple] fix json encoding

(thx helmut)

fix json encoding to operate on unicode code points
personal/stbuehler/tests-path
Glenn Strauss 2022-06-10 12:13:11 -04:00
parent 9ac5da2720
commit ca407dca5d
2 changed files with 20 additions and 2 deletions

View File

@ -764,6 +764,11 @@ buffer_append_bs_escaped (buffer * const restrict b,
}
else { /* BS_ESCAPE_JSON */
/*(technically do not have to escape DEL (\127) or higher)*/
/*(would be faster if handled in tighter do/while loop above)*/
if (c >= 127) {
buffer_append_char(b, (char)c);
break;
}
d = buffer_extend(b, 6);
d[0] = '\\';
d[1] = 'u';

View File

@ -1096,15 +1096,28 @@ magnet_buffer_append_bsdec (buffer * const restrict b,
unsigned char lo = hex2int(((unsigned char *)s)[4]);
if (0xFF == hi || 0xFF == lo)
break;
c = (hi << 4) | lo;
if (__builtin_expect( (s[1] != '0'), 0)
|| __builtin_expect( (s[2] != '0'), 0)) {
unsigned char hhi = hex2int(((unsigned char *)s)[1]);
unsigned char hlo = hex2int(((unsigned char *)s)[2]);
if (0xFF == hhi || 0xFF == hlo)
break;
*d++ = (hhi << 4) | hlo;
c |= (int)((hhi << 12) | (hlo << 8));
if ((unsigned int)c - 0xd800u < 0x800)
break; /* 0xD800 - 0xDFFF ill-formed UTF-8 */
}
/* adapted from
* https://stackoverflow.com/questions/4607413/is-there-a-c-library-to-convert-unicode-code-points-to-utf-8 */
if (__builtin_expect( (c > 0x7F), 0)) {
if (c < 0x800)
*d++ = 0xC0 | (c >> 6);
else {
*d++ = 0xE0 | (c >> 12);
*d++ = 0x80 | ((c >> 6) & 0x3F);
}
c = 0x80 | (c & 0x3F);
}
c = (hi << 4) | lo;
s += 4;
}
break;