Browse Source

fix utf-8 json encoding for outside basic multilingual plane

master
Felix von Leitner 5 years ago
parent
commit
13896db585
  1. 4
      textcode/fmt_jsonescape.c
  2. 2
      textcode/scan_jsonescape.c

4
textcode/fmt_jsonescape.c

@ -47,7 +47,7 @@ escape:
if (dest) {
dest[written ]='\\';
dest[written+1]='u';
fmt_xlong(dest+written+2,0xd800 + ((u>>10) & 0x3ff));
fmt_xlong(dest+written+2,0xd800 + ((u>>10) & 0x3bf));
dest[written+6]='\\';
dest[written+7]='u';
fmt_xlong(dest+written+8,0xdc00 + (u & 0x3ff));
@ -82,6 +82,6 @@ int main() {
/* test escaping of unprintable characters */
assert(fmt_jsonescape(buf,"\001x",2)==7 && !memcmp(buf,"\\u0001x",7));
/* test conversion of large UTF-8 chars to UTF-16 surrogate pairs (poop emoji) */
assert(fmt_jsonescape(buf,"\xf0\x9f\x92\xa9x",5)==13 && !memcmp(buf,"\\ud87d\\udca9x",13));
assert(fmt_jsonescape(buf,"\xf0\x9f\x92\xa9x",5)==13 && !memcmp(buf,"\\ud83d\\udca9x",13));
}
#endif

2
textcode/scan_jsonescape.c

@ -99,7 +99,7 @@ int main() {
assert(scan_jsonescape("a\\udafd0",buf,&l)==1);
/* correct surrogate pair */
assert(scan_jsonescape("a\\ud834\\udd1eb",buf,&l)==14 && l==6 && !memcmp(buf,"a\xf0\x9d\x84\x9e""b",6));
assert(scan_jsonescape("\\ud87d\\udca9x",buf,&l)==13 && l==5 && !memcmp(buf,"\xf0\x9f\x92\xa9x",5));
assert(scan_jsonescape("\\ud83d\\udca9x",buf,&l)==13 && l==5 && !memcmp(buf,"\xf0\x9f\x92\xa9x",5));
/* how about some incorrect UTF-8? */
assert(scan_jsonescape("a\xc0\xaf",buf,&l)==1 && l==1 && !memcmp(buf,"a",1));
return 0;

Loading…
Cancel
Save