diff options
authorFelix von Leitner <>2014-09-17 13:58:59 +0000
committerFelix von Leitner <>2014-09-17 13:58:59 +0000
commitb3b2c14973713ce33c4d3eaabffe8620dad3d81c (patch)
parent95999b640a48b6cfa1390a3237a44db8173aa906 (diff)
add scan_html_tagarg
4 files changed, 34 insertions, 17 deletions
diff --git a/CHANGES b/CHANGES
index 30fb65f..4c3d9a3 100644
@@ -16,7 +16,7 @@
switch epoll from level triggering to edge triggering
introduce io_eagain_read and io_eagain_write (discontinue using io_eagain plz)
fix buffer_get
- add fmt_html_tagarg, fmt_xml
+ add fmt_html_tagarg, fmt_xml, scan_html_tagarg
save 8 bytes in taia.h for 64-bit systems
diff --git a/textcode.h b/textcode.h
index 336668a..2bb1132 100644
--- a/textcode.h
+++ b/textcode.h
@@ -57,19 +57,24 @@ size_t fmt_base85(char* dest,const char* src,size_t len);
/* These read one line from src, decode it, and write the result to
* dest. The number of decoded bytes is written to destlen. dest
* should be able to hold strlen(src) bytes as a rule of thumb. */
-size_t scan_uuencoded(const char *src,char *dest,size_t *destlen);
-size_t scan_base64(const char *src,char *dest,size_t *destlen);
-size_t scan_quotedprintable(const char *src,char *dest,size_t *destlen);
-size_t scan_urlencoded(const char *src,char *dest,size_t *destlen);
-size_t scan_urlencoded2(const char *src,char *dest,size_t *destlen);
-size_t scan_yenc(const char *src,char *dest,size_t *destlen);
-size_t scan_hexdump(const char *src,char *dest,size_t *destlen);
-size_t scan_html(const char *src,char *dest,size_t *destlen);
-size_t scan_cescape(const char *src,char *dest,size_t *destlen);
-size_t scan_ldapescape(const char* src,char* dest,size_t *destlen);
-size_t scan_jsonescape(const char* src,char* dest,size_t *destlen);
-size_t scan_base85(const char* src,char* dest,size_t *destlen);
+size_t scan_uuencoded(const char* src,char* dest,size_t* destlen);
+size_t scan_base64(const char* src,char* dest,size_t* destlen);
+size_t scan_quotedprintable(const char* src,char* dest,size_t* destlen);
+size_t scan_urlencoded(const char* src,char* dest,size_t* destlen);
+size_t scan_urlencoded2(const char* src,char* dest,size_t* destlen);
+size_t scan_yenc(const char* src,char* dest,size_t* destlen);
+size_t scan_hexdump(const char* src,char* dest,size_t* destlen);
+/* decodes all html5-standardized &foo; escapes, and also
+ * "<br>" to "\n" and "<p>" to "\n\n", leaves the rest of the tags alone */
+size_t scan_html(const char* src,char* dest,size_t* destlen);
+/* decodes all html5-standardized &foo; escapes, but leaves all tags
+ * alone */
+size_t scan_html_tagarg(const char* src,char* dest,size_t* destlen);
+size_t scan_cescape(const char* src,char* dest,size_t* destlen);
+size_t scan_ldapescape(const char* src,char* dest,size_t* destlen);
+size_t scan_jsonescape(const char* src,char* dest,size_t* destlen);
+size_t scan_base85(const char* src,char* dest,size_t* destlen);
/* WARNING: these functions _append_ to the stralloc, not overwrite! */
diff --git a/textcode/fmt_html.c b/textcode/fmt_html.c
index bd3e238..4847d90 100644
--- a/textcode/fmt_html.c
+++ b/textcode/fmt_html.c
@@ -13,7 +13,11 @@ size_t fmt_html(char* dest,const char* src,size_t len) {
case '<': seq="&lt;"; goto doit;
case '>': seq="&gt;"; goto doit;
case '\n':
- seq="<br>";
+ if (i<len && s[i+1]=='\n') {
+ seq="<p>";
+ ++i;
+ } else
+ seq="<br>";
diff --git a/textcode/scan_html.c b/textcode/scan_html.c
index 89fee30..97e9122 100644
--- a/textcode/scan_html.c
+++ b/textcode/scan_html.c
@@ -25,7 +25,7 @@ static const char* lookup(size_t ofs,const char* t) {
return NULL;
-size_t scan_html(const char *src,char *dest,size_t *destlen) {
+static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,int flag) {
register const unsigned char* s=(const unsigned char*) src;
size_t written=0,i;
for (i=0; s[i]; ++i) {
@@ -58,7 +58,7 @@ size_t scan_html(const char *src,char *dest,size_t *destlen) {
} else
- } else if (s[i]=='<') {
+ } else if (flag && s[i]=='<') {
if (case_starts((const char*)s+i+1,"br>")) {
@@ -75,3 +75,11 @@ size_t scan_html(const char *src,char *dest,size_t *destlen) {
return i;
+size_t scan_html_tagarg(const char *src,char *dest,size_t *destlen) {
+ return scan_html_inner(src,dest,destlen,1);
+size_t scan_html(const char *src,char *dest,size_t *destlen) {
+ return scan_html_inner(src,dest,destlen,0);