Browse Source

add man page and unit tests for scan_base64url

master
Felix von Leitner 4 years ago
parent
commit
efafd510b8
  1. 8
      textcode/scan_base64.3
  2. 4
      textcode/scan_base64.c
  3. 32
      textcode/scan_base64url.3
  4. 31
      textcode/scan_base64url.c
  5. 63
      textcode/scan_html.c

8
textcode/scan_base64.3

@ -16,10 +16,6 @@ Note that real world base64 encoded data is sometimes permitted to
contain whitespace characters or new lines. This function will not allow
those and return the decoded data until then.
base64 works by taking 3 bytes of binary input and converting them into
4 bytes of printable ASCII. If the input ends in the middle of a base64
4-byte-tuple, scan_base64 will disregard the whole tuple.
Many base64 variants demand padding in the last block. Some don't. This
implementation will consume padding if it is there, but will not
complain if it is not.
@ -30,7 +26,7 @@ dest can be NULL. destlen can be NULL.
scan_base64 returns the number of bytes successfully scanned and
processed from src.
.SH EXAMPLES
scan_base64("%9FYO<F0`",buf,&i) -> return 8, i=5, buf="fnord"
scan_base64("Zm5vcmQ=",buf,&i) -> return 8, i=5, buf="fnord"
.SH "SEE ALSO"
scan_xlong(3), scan_8long(3), fmt_ulong(3)
scan_base64url(3), scan_xlong(3), scan_8long(3), fmt_ulong(3)

4
textcode/scan_base64.c

@ -48,8 +48,10 @@ int main() {
char buf[100];
size_t i,l;
memset(buf,0,10); assert(scan_base64("Zm5vcmQ=",buf,&l)==8 && l==5 && !memcmp(buf,"fnord",6));
/* check that we don't insist on the padding */
memset(buf,0,10); assert(scan_base64("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
memset(buf,0,10); assert(scan_base64("//8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xff",3));
/* check the special non-isalnum chars :) */
memset(buf,0,10); assert(scan_base64("/+8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xef",3));
return 0;
}
#endif

32
textcode/scan_base64url.3

@ -0,0 +1,32 @@
.TH scan_base64url 3
.SH NAME
scan_base64url \- decode base64url encoded data
.SH SYNTAX
.B #include <libowfat/textcode.h>
size_t \fBscan_base64url\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR);
.SH DESCRIPTION
base64url is a variant of base64 for use in URLs (standard base64 uses /
and +, which can cause problems in URLs, so base64url uses - and _
instead; also base64url does not use = padding at the end).
scan_base64url decodes base64url encoded data from src into dest.
It will stop when it encountes any non-valid input characters.
It will then write the number of decoded bytes in dest into *destlen,
and return the number of bytes decoded from src.
Many base64 variants demand padding in the last block. Some don't. This
implementation will consume padding if it is there, but will not
complain if it is not.
dest can be NULL. destlen can be NULL.
.SH "RETURN VALUE"
scan_base64url returns the number of bytes successfully scanned and
processed from src.
.SH EXAMPLES
scan_base64url("Zm5vcmQ",buf,&i) -> return 7, i=5, buf="fnord"
.SH "SEE ALSO"
scan_base64(3), scan_xlong(3), scan_8long(3), fmt_ulong(3)

31
textcode/scan_base64url.c

@ -15,17 +15,36 @@ static inline int dec(unsigned char x) {
size_t scan_base64url(const char *src,char *dest,size_t *destlen) {
unsigned short tmp=0,bits=0;
register const unsigned char* s=(const unsigned char*) src;
const char* orig=dest;
for (;;) {
size_t i,j=0;
for (i=0;;) {
int a=dec(*s);
if (a<0) break;
if (a<0) break; /* base64url does not have padding */
tmp=(tmp<<6)|a; bits+=6;
++s;
if (bits>=8) {
*dest=(tmp>>(bits-=8));
++dest;
bits-=8;
if (dest) dest[i]=(tmp>>bits);
++i;
}
}
*destlen=dest-orig;
if (destlen) *destlen=i;
return (const char*)s-src;
}
#ifdef UNITTEST
#include <assert.h>
#include <string.h>
#include <stdio.h>
int main() {
char buf[100];
size_t i,l;
/* check that we don't consume padding */
memset(buf,0,10); assert(scan_base64url("Zm5vcmQ=",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
/* check that we don't insist on the padding */
memset(buf,0,10); assert(scan_base64url("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
/* check the special non-isalnum chars :) */
memset(buf,0,10); assert(scan_base64url("_-8=",buf,&l)==3 && l==2 && !memcmp(buf,"\xff\xef",3));
return 0;
}
#endif

63
textcode/scan_html.c

@ -26,8 +26,8 @@ static const char* lookup(size_t ofs,const char* t) {
}
enum htmlmode { /* <a href="http://example.com/&quot;foo">libowfat&lt;home</a> */
OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> `libowfat<home` */
TAGARG, /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> `http://example.com/"foo´ */
OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> libowfat<home */
TAGARG, /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> http://example.com/"foo */
};
static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum htmlmode mode) {
@ -42,49 +42,41 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum ht
size_t j;
if ((s[i+2]&~32)=='X') {
j=scan_xlong(src+i+3,&l);
if (!j) j+=3;
if (j) j+=3;
} else {
j=scan_ulong(src+i+2,&l);
if (!j) j+=3;
if (j) j+=2;
}
if (s[i+j]==';') {
i+=j;
written+=fmt_utf8(dest+written,l);
written+=fmt_utf8(dest?dest+written:0,l);
} else {
dest[written++]='&';
if (dest) dest[written]='&';
++written;
}
continue;
}
utf8=lookup(1,src+i+1);
if (utf8) {
size_t l=strlen(utf8);
memcpy(dest+written,utf8,l);
if (dest) memcpy(dest+written,utf8,l);
written+=l;
i+=2+str_chr(src+i+2,';');
continue;
} else
dest[written]='&';
if (dest) dest[written]='&';
} else if (s[i]=='<') {
if (mode == OUTSIDE) break;
if (case_starts((const char*)s+i+1,"br>")) {
dest[written]='\n';
i+=3;
} else if (case_starts((const char*)s+i+1,"p>")) {
dest[written]='\n'; ++written;
dest[written]='\n';
i+=3;
} else
dest[written]=s[i];
break;
} else if (s[i]=='"' && mode==TAGARG) {
if (i==0) { dq=1; continue; }
break;
} else if (mode==TAGARG && !dq && (s[i]==' ' || s[i]=='\t' || s[i]=='\n'))
break;
else
dest[written]=s[i];
if (dest) dest[written]=s[i];
++written;
}
*destlen=written;
if (destlen) *destlen=written;
return i;
}
@ -98,13 +90,40 @@ size_t scan_html(const char *src,char *dest,size_t *destlen) {
#ifdef UNITTEST
#include <assert.h>
#undef UNITTEST
#include <scan/scan_fromhex.c>
#include <scan/scan_xlongn.c>
#include <scan/scan_xlong.c>
#include <scan/scan_ulongn.c>
#include <scan/scan_ulong.c>
#include <str/str_chr.c>
#include <fmt/fmt_utf8.c>
#include <stdio.h>
int main() {
char* html="<a href=\"http://example.com/&quot;foo\">libowfat&lt;home</a>";
char buf[100];
size_t destlen;
/* check that we stop at < */
assert(scan_html(html,buf,&destlen)==0 && destlen==0);
assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat<home",13));
assert(scan_html_tagarg(strchr(html,'"')+1,buf,&destlen)==28 && destlen==23 && !memcmp(buf,"http://example.com/\"foo",23));
/* check that we properly decode &lt; */
memset(buf,'?',sizeof(buf));
assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat<home?",14));
/* check that we stop at " and properly decode &quot; */
memset(buf,'?',sizeof(buf));
assert(scan_html_tagarg(strchr(html,'"')+1,buf,&destlen)==28 && destlen==23 && !memcmp(buf,"http://example.com/\"foo?",24));
/* check that we pass through invalid escapes */
memset(buf,'?',sizeof(buf));
assert(scan_html("&fnord;",buf,&destlen)==7 && destlen==7 && !memcmp(buf,"&fnord;?",8));
memset(buf,'?',sizeof(buf));
assert(scan_html("&#x;",buf,&destlen)==4 && destlen==4 && !memcmp(buf,"&#x;?",5));
memset(buf,'?',sizeof(buf));
assert(scan_html("&#;",buf,&destlen)==3 && destlen==3 && !memcmp(buf,"&#;?",4));
/* check that &#x[hex]; is decoded properly */
memset(buf,'?',sizeof(buf));
assert(scan_html("&#x1;",buf,&destlen)==5 && destlen==1 && buf[0]==1 && buf[1]=='?');
/* check that &#[decimal]; is decoded properly */
memset(buf,'?',sizeof(buf));
assert(scan_html("&#1;",buf,&destlen)==4 && destlen==1 && buf[0]==1 && buf[1]=='?');
}
#endif

Loading…
Cancel
Save