Mirror of :pserver:cvs@cvs.fefe.de:/cvs libowfat https://www.fefe.de/libowfat/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

107 lines
3.5 KiB

  1. #include "fmt.h"
  2. #include "textcode.h"
  3. #include "scan.h"
  4. size_t scan_jsonescape(const char *src,char *dest,size_t *destlen) {
  5. register const unsigned char* s=(const unsigned char*) src;
  6. size_t written=0,i;
  7. char c;
  8. unsigned int prev,cur,todo;
  9. prev=cur=(unsigned int)-1;
  10. for (i=0; s[i]; ++i) {
  11. if (s[i]=='"') {
  12. if (prev!=(unsigned int)-1) goto abort;
  13. goto done;
  14. }
  15. if ((c=s[i])=='\\') {
  16. switch (s[i+1]) {
  17. case '\\':
  18. // c='\\'; // c already is backslash
  19. break;
  20. case '"': c='"'; break;
  21. case 'n': c='\n'; break;
  22. case 'r': c='\r'; break;
  23. case 'b': c='\b'; break;
  24. case 'f': c='\f'; break;
  25. case 't': c='\t'; break;
  26. case 'u':
  27. {
  28. size_t j;
  29. for (cur=j=0; j<4; ++j) {
  30. char x=scan_fromhex(s[i+2+j]);
  31. if (x<0) goto abort; // not hex -> invalid input
  32. cur=(cur<<4) | x;
  33. }
  34. if (cur>=0xd800 && cur<=0xdbff) {
  35. // utf-16 surrogate pair; needs to be followed by another
  36. // surrogate. We need to read both and convert to UTF-8
  37. if (prev!=(unsigned int)-1) goto abort; // two lead surrogates
  38. prev=cur;
  39. i+=5; // we want i to go up by 6, 1 is done by the for loop
  40. continue; // write nothing!
  41. } else if (cur>=0xdc00 && cur<=0xdfff) {
  42. if (prev==(unsigned int)-1) goto abort; // no lead surrogate
  43. todo=(cur&0x3ff) | ((prev&0x3ff) << 10) | 0x10000;
  44. } else
  45. todo=cur;
  46. written+=fmt_utf8(dest?dest+written:dest,todo);
  47. i+=5;
  48. prev=-1;
  49. continue;
  50. }
  51. default:
  52. c=s[i+1];
  53. break;
  54. }
  55. ++i;
  56. }
  57. if (prev!=(unsigned int)-1) goto abort;
  58. /* We expect utf-8 incoming. Make sure it's valid. */
  59. if (!scan_utf8(src+i,4,NULL)) goto abort;
  60. if (dest) dest[written]=c;
  61. ++written;
  62. }
  63. done:
  64. *destlen=written;
  65. return i;
  66. abort:
  67. if (prev!=(unsigned int)-1) i-=6; // if we abort and there still was an open surrogate pair, cancel it
  68. *destlen=written;
  69. return i;
  70. }
  71. #ifdef UNITTEST
  72. #include <assert.h>
  73. #include <string.h>
  74. int main() {
  75. char buf[100];
  76. size_t l;
  77. assert(scan_jsonescape("fnord",buf,&l)==5 && l==5 && !memcmp(buf,"fnord",5));
  78. /* is \n properly unescaped? */
  79. assert(scan_jsonescape("a\\nb",buf,&l)==4 && l==3 && !memcmp(buf,"a\nb",3));
  80. assert(scan_jsonescape("a\\rb",buf,&l)==4 && l==3 && !memcmp(buf,"a\rb",3));
  81. assert(scan_jsonescape("a\\bb",buf,&l)==4 && l==3 && !memcmp(buf,"a\bb",3));
  82. assert(scan_jsonescape("a\\fb",buf,&l)==4 && l==3 && !memcmp(buf,"a\fb",3));
  83. assert(scan_jsonescape("a\\tb",buf,&l)==4 && l==3 && !memcmp(buf,"a\tb",3));
  84. assert(scan_jsonescape("a\\\\b",buf,&l)==4 && l==3 && !memcmp(buf,"a\\b",3));
  85. assert(scan_jsonescape("a\\/b",buf,&l)==4 && l==3 && !memcmp(buf,"a/b",3));
  86. assert(scan_jsonescape("a\\\"b",buf,&l)==4 && l==3 && !memcmp(buf,"a\"b",3));
  87. /* does a double quote end the string? */
  88. assert(scan_jsonescape("a\"b",buf,&l)==1 && l==1 && !memcmp(buf,"a",1));
  89. /* how about unicode escape */
  90. assert(scan_jsonescape("a\\u005cb",buf,&l)==8 && l==3 && !memcmp(buf,"a\\b",3));
  91. /* a trailing surrogate pair with no lead before it */
  92. assert(scan_jsonescape("a\\udead\"",buf,&l)==1);
  93. /* a lead surrogate pair with no trailer behind it */
  94. assert(scan_jsonescape("a\\udafd\"",buf,&l)==1);
  95. assert(scan_jsonescape("a\\udafd\\udafd",buf,&l)==1);
  96. assert(scan_jsonescape("a\\udafd0",buf,&l)==1);
  97. /* correct surrogate pair */
  98. assert(scan_jsonescape("a\\ud834\\udd1eb",buf,&l)==14 && l==6 && !memcmp(buf,"a\xf0\x9d\x84\x9e""b",6));
  99. /* how about some incorrect UTF-8? */
  100. assert(scan_jsonescape("a\xc0\xaf",buf,&l)==1 && l==1 && !memcmp(buf,"a",1));
  101. return 0;
  102. }
  103. #endif