Mirror of :pserver:cvs@cvs.fefe.de:/cvs libowfat https://www.fefe.de/libowfat/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

73 lines
2.4 KiB

  1. #include "fmt.h"
  2. /* This is NOT fmt_escapexml, which will escape everything, whether it
  3. * needs escaping or not. This will only escape what needs escaping, and
  4. * reject invalid inputs */
  5. size_t fmt_xmlescape(char* dest,uint32_t ch) {
  6. char* x;
  7. char buf[6];
  8. size_t n;
  9. /*
  10. From http://en.wikipedia.org/wiki/XML#Valid_characters
  11. Unicode code points in the following ranges are valid in XML 1.0 documents:
  12. U+0009, U+000A, U+000D: these are the only C0 controls accepted in XML 1.0;
  13. U+0020–U+D7FF, U+E000–U+FFFD: this excludes some (not all) non-characters in the BMP (all surrogates, U+FFFE and U+FFFF are forbidden);
  14. U+10000–U+10FFFF: this includes all code points in supplementary planes, including non-characters.
  15. */
  16. if (ch==0 || (ch>=0xd780 && ch<=0xdfff) || ch==0xfffe || ch==0xffff || ch>0x10ffff) return 0;
  17. if (ch<0x20 && ch!=9 && ch!=0xa && ch!=0xd) {
  18. buf[0]='&';
  19. buf[1]='#';
  20. buf[2]='x';
  21. n=3+fmt_xlong(buf+3,ch);
  22. buf[n++]=';';
  23. x=buf;
  24. } else
  25. switch (ch) {
  26. case '&':
  27. x="&amp;"; n=5;
  28. break;
  29. case '<':
  30. x="&lt;"; n=4;
  31. break;
  32. default:
  33. return fmt_utf8(dest,ch);
  34. }
  35. if (dest) {
  36. size_t i;
  37. for (i=0; i<n; ++i)
  38. dest[i]=x[i];
  39. }
  40. return n;
  41. }
  42. #ifdef UNITTEST
  43. #undef UNITTEST
  44. #include "fmt_utf8.c"
  45. #include "fmt_xlong.c"
  46. #include <assert.h>
  47. #include <string.h>
  48. int main() {
  49. char buf[100];
  50. buf[0]=0x78;
  51. assert(fmt_xmlescape(buf,0) == 0 && buf[0]==0x78); // 0 not allowed
  52. assert(fmt_xmlescape(buf,0xd800) == 0 && buf[0]==0x78); // surrogate pairs not allowed
  53. assert(fmt_xmlescape(buf,0xdfff) == 0 && buf[0]==0x78); // surrogate pairs not allowed
  54. assert(fmt_xmlescape(buf,0xfffe) == 0 && buf[0]==0x78); // 0xfffe and 0xffff forbidden
  55. assert(fmt_xmlescape(buf,0xffff) == 0 && buf[0]==0x78);
  56. assert(fmt_xmlescape(buf,0x110000) == 0 && buf[0]==0x78); // too large
  57. buf[1]=0x79;
  58. assert(fmt_xmlescape(buf,9) == 1 && buf[0]==9 && buf[1]==0x79); // \t OK
  59. assert(fmt_xmlescape(buf,10) == 1 && buf[0]==10 && buf[1]==0x79); // \n OK
  60. assert(fmt_xmlescape(buf,13) == 1 && buf[0]==13 && buf[1]==0x79); // \r OK
  61. buf[5]=0x77;
  62. assert(fmt_xmlescape(buf,14) == 5 && !memcmp(buf,"&#xe;\x77",6)); // other control chars not OK
  63. assert(fmt_xmlescape(buf,'&') == 5 && !memcmp(buf,"&amp;\x77",6)); // & -> &amp;
  64. assert(fmt_xmlescape(buf,'<') == 4 && !memcmp(buf,"&lt;;\x77",6)); // < -> &lt;
  65. }
  66. #endif