summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix von Leitner <felix-libowfat@fefe.de>2018-09-30 19:18:35 +0000
committerFelix von Leitner <felix-libowfat@fefe.de>2018-09-30 19:18:35 +0000
commit133aa01439c80094aca30e3ba76042e8aa3eff99 (patch)
treeb505a8dc363e2f095f0063a3f78a3afda92c22ce
parentcc06de3f94cec2cc039e7b7ead67f46374e51296 (diff)
downloadlibowfat-133aa01439c80094aca30e3ba76042e8aa3eff99.tar.gz
libowfat-133aa01439c80094aca30e3ba76042e8aa3eff99.zip
fix unit test for fmt_escapecharc
add unit tests for fmt_xmlescape
-rw-r--r--fmt/fmt_escapecharc.c2
-rw-r--r--fmt/fmt_xmlescape.c36
2 files changed, 35 insertions, 3 deletions
diff --git a/fmt/fmt_escapecharc.c b/fmt/fmt_escapecharc.c
index 99fb779..f67f12d 100644
--- a/fmt/fmt_escapecharc.c
+++ b/fmt/fmt_escapecharc.c
@@ -77,7 +77,7 @@ int main() {
assert(fmt_escapecharc(buf,'\t')==2 && !memcmp(buf,"\\t",2));
assert(fmt_escapecharc(buf,'\v')==2 && !memcmp(buf,"\\v",2));
assert(fmt_escapecharc(buf,'\\')==2 && !memcmp(buf,"\\\\",2));
- assert(fmt_escapecharc(buf,'1')==4 && !memcmp(buf,"\\001",2));
+ assert(fmt_escapecharc(buf,'1')==4 && !memcmp(buf,"\\061",4));
assert(fmt_escapecharc(buf,0xfefe)==6 && !memcmp(buf,"\\ufefe",6));
assert(fmt_escapecharc(buf,0xfefec0de)==10 && !memcmp(buf,"\\Ufefec0de",10));
return 0;
diff --git a/fmt/fmt_xmlescape.c b/fmt/fmt_xmlescape.c
index b3733f8..6b47d84 100644
--- a/fmt/fmt_xmlescape.c
+++ b/fmt/fmt_xmlescape.c
@@ -1,7 +1,11 @@
#include "fmt.h"
+/* This is NOT fmt_escapexml, which will escape everything, whether it
+ * needs escaping or not. This will only escape what needs escaping, and
+ * reject invalid inputs */
size_t fmt_xmlescape(char* dest,uint32_t ch) {
char* x;
+ char buf[6];
size_t n;
/*
From http://en.wikipedia.org/wiki/XML#Valid_characters
@@ -12,8 +16,7 @@ Unicode code points in the following ranges are valid in XML 1.0 documents:
U+10000–U+10FFFF: this includes all code points in supplementary planes, including non-characters.
*/
if (ch==0 || (ch>=0xd780 && ch<=0xdfff) || ch==0xfffe || ch==0xffff || ch>0x10ffff) return 0;
- if ((ch&0x7f)<20 && ch!=9 && ch!=0xa && ch!=0xd && ch!=0x85) {
- char buf[6];
+ if (ch<0x20 && ch!=9 && ch!=0xa && ch!=0xd) {
buf[0]='&';
buf[1]='#';
buf[2]='x';
@@ -38,3 +41,32 @@ Unicode code points in the following ranges are valid in XML 1.0 documents:
}
return n;
}
+
+#ifdef UNITTEST
+#undef UNITTEST
+
+#include "fmt_utf8.c"
+#include "fmt_xlong.c"
+
+#include <assert.h>
+#include <string.h>
+
+int main() {
+ char buf[100];
+ buf[0]=0x78;
+ assert(fmt_xmlescape(buf,0) == 0 && buf[0]==0x78); // 0 not allowed
+ assert(fmt_xmlescape(buf,0xd800) == 0 && buf[0]==0x78); // surrogate pairs not allowed
+ assert(fmt_xmlescape(buf,0xdfff) == 0 && buf[0]==0x78); // surrogate pairs not allowed
+ assert(fmt_xmlescape(buf,0xfffe) == 0 && buf[0]==0x78); // 0xfffe and 0xffff forbidden
+ assert(fmt_xmlescape(buf,0xffff) == 0 && buf[0]==0x78);
+ assert(fmt_xmlescape(buf,0x110000) == 0 && buf[0]==0x78); // too large
+ buf[1]=0x79;
+ assert(fmt_xmlescape(buf,9) == 1 && buf[0]==9 && buf[1]==0x79); // \t OK
+ assert(fmt_xmlescape(buf,10) == 1 && buf[0]==10 && buf[1]==0x79); // \n OK
+ assert(fmt_xmlescape(buf,13) == 1 && buf[0]==13 && buf[1]==0x79); // \r OK
+ buf[5]=0x77;
+ assert(fmt_xmlescape(buf,14) == 5 && !memcmp(buf,"&#xe;\x77",6)); // other control chars not OK
+ assert(fmt_xmlescape(buf,'&') == 5 && !memcmp(buf,"&amp;\x77",6)); // & -> &amp;
+ assert(fmt_xmlescape(buf,'<') == 4 && !memcmp(buf,"&lt;;\x77",6)); // < -> &lt;
+}
+#endif