diff --git a/.gitignore b/.gitignore index fc6317cf..c27120cf 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ sconsbuild/ stamp-h1 test_base64 test_buffer +test_burl test_configfile test_request versionstamp.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f876fbf7..9d5d6c26 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -553,7 +553,7 @@ add_definitions(-DHAVE_CONFIG_H) include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) set(COMMON_SRC - base64.c buffer.c log.c + base64.c buffer.c burl.c log.c http_kv.c keyvalue.c chunk.c http_chunk.c stream.c fdevent.c gw_backend.c stat_cache.c plugin.c joblist.c etag.c array.c @@ -650,6 +650,13 @@ add_executable(test_buffer ) add_test(NAME test_buffer COMMAND test_buffer) +add_executable(test_burl + t/test_burl.c + burl.c + buffer.c +) +add_test(NAME test_burl COMMAND test_burl) + add_executable(test_base64 t/test_base64.c buffer.c @@ -871,6 +878,8 @@ if(WITH_LIBUNWIND) target_link_libraries(test_buffer ${LIBUNWIND_LDFLAGS}) add_target_properties(test_buffer COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) + target_link_libraries(test_burl ${LIBUNWIND_LDFLAGS}) + add_target_properties(test_burl COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) target_link_libraries(test_base64 ${LIBUNWIND_LDFLAGS}) add_target_properties(test_base64 COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) target_link_libraries(test_configfile ${PCRE_LDFLAGS} ${LIBUNWIND_LDFLAGS}) diff --git a/src/Makefile.am b/src/Makefile.am index 3160388f..28fd4dfa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,11 +1,18 @@ AM_CFLAGS = $(FAM_CFLAGS) $(LIBUNWIND_CFLAGS) -noinst_PROGRAMS=t/test_buffer t/test_base64 t/test_configfile t/test_request +noinst_PROGRAMS=\ + t/test_buffer \ + t/test_burl \ + t/test_base64 \ + t/test_configfile \ + t/test_request + sbin_PROGRAMS=lighttpd lighttpd-angel LEMON=$(top_builddir)/src/lemon$(BUILD_EXEEXT) TESTS=\ t/test_buffer$(EXEEXT) \ + t/test_burl$(EXEEXT) \ t/test_base64$(EXEEXT) \ t/test_configfile$(EXEEXT) \ t/test_request$(EXEEXT) @@ -56,7 +63,7 @@ BUILT_SOURCES = parsers versionstamp MAINTAINERCLEANFILES = configparser.c configparser.h mod_ssi_exprparser.c mod_ssi_exprparser.h CLEANFILES = versionstamp.h versionstamp.h.tmp lemon$(BUILD_EXEEXT) -common_src=base64.c buffer.c log.c \ +common_src=base64.c buffer.c burl.c log.c \ http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ stat_cache.c plugin.c joblist.c etag.c array.c \ @@ -384,7 +391,7 @@ mod_wstunnel_la_LDFLAGS = $(common_module_ldflags) mod_wstunnel_la_LIBADD = $(common_libadd) $(CRYPTO_LIB) -hdr = server.h base64.h buffer.h network.h log.h http_kv.h keyvalue.h \ +hdr = server.h base64.h buffer.h burl.h network.h log.h http_kv.h keyvalue.h \ response.h request.h fastcgi.h chunk.h \ first.h settings.h http_chunk.h \ algo_sha1.h md5.h http_auth.h http_vhostdb.h stream.h \ @@ -517,6 +524,9 @@ t_test_buffer_LDADD = $(LIBUNWIND_LIBS) t_test_base64_SOURCES = t/test_base64.c base64.c buffer.c t_test_base64_LDADD = $(LIBUNWIND_LIBS) +t_test_burl_SOURCES = t/test_burl.c burl.c buffer.c +t_test_burl_LDADD = $(LIBUNWIND_LIBS) + t_test_configfile_SOURCES = t/test_configfile.c buffer.c array.c data_config.c data_string.c http_kv.c vector.c log.c sock_addr.c t_test_configfile_LDADD = $(PCRE_LIB) $(LIBUNWIND_LIBS) diff --git a/src/SConscript b/src/SConscript index ffcbb3df..4c7d96d5 100644 --- a/src/SConscript +++ b/src/SConscript @@ -55,7 +55,7 @@ def GatherLibs(env, *libs): libs = RemoveDuplicateLibs(env['LIBS'] + list(libs) + [env['APPEND_LIBS']]) return WorkaroundFreeBSDLibOrder(libs) -common_src = Split("base64.c buffer.c log.c \ +common_src = Split("base64.c buffer.c burl.c log.c \ http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ stat_cache.c plugin.c joblist.c etag.c array.c \ diff --git a/src/base.h b/src/base.h index 615d5584..ea040f46 100644 --- a/src/base.h +++ b/src/base.h @@ -360,6 +360,7 @@ typedef struct { unsigned short http_header_strict; unsigned short http_host_strict; unsigned short http_host_normalize; + unsigned short http_url_normalize; unsigned short high_precision_timestamps; time_t loadts; double loadavg[3]; diff --git a/src/burl.c b/src/burl.c new file mode 100644 index 00000000..3eadb6ce --- /dev/null +++ b/src/burl.c @@ -0,0 +1,357 @@ +#include "first.h" +#include "burl.h" + +#include + +#include "buffer.h" + +static const char hex_chars_uc[] = "0123456789ABCDEF"; + +/* everything except: ! $ & ' ( ) * + , - . / 0-9 : ; = ? @ A-Z _ a-z ~ */ +static const char encoded_chars_http_uri_reqd[] = { + /* + 0 1 2 3 4 5 6 7 8 9 A B C D E F + */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ + 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F space " # % */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ +}; + + +/* c (char) and n (nibble) MUST be unsigned integer types */ +#define li_cton(c,n) \ + (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) + +/* b (byte) MUST be unsigned integer type + * https://en.wikipedia.org/wiki/UTF-8 + * reject overlong encodings of 7-byte ASCII and invalid UTF-8 + * (but does not detect other overlong multi-byte encodings) */ +#define li_utf8_invalid_byte(b) ((b) >= 0xF5 || ((b)|0x1) == 0xC1) + + +static int burl_is_unreserved (const int c) +{ + return (light_isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'); +} + + +static int burl_normalize_basic_unreserved_fix (buffer *b, buffer *t, int i, int qs) +{ + int j = i; + const int used = (int)buffer_string_length(b); + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char * const p = + (unsigned char *)buffer_string_prepare_copy(t,i+(used-i)*3+1); + unsigned int n1, n2; + memcpy(p, s, (size_t)i); + for (; i < used; ++i, ++j) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = j; + p[j] = s[i]; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (burl_is_unreserved(x)) { + p[j] = x; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[n1]; /*(s[i+1] & 0xdf)*/ + p[++j] = hex_chars_uc[n2]; /*(s[i+2] & 0xdf)*/ + if (li_utf8_invalid_byte(x)) qs = -2; + } + i+=2; + } + else if (s[i] == '#') break; /* ignore fragment */ + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + if (li_utf8_invalid_byte(s[i])) qs = -2; + } + } + buffer_commit(t, (size_t)j); + buffer_copy_buffer(b, t); + return qs; +} + + +static int burl_normalize_basic_unreserved (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2, x; + int qs = -1; + + for (int i = 0; i < used; ++i) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = i; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2) + && !burl_is_unreserved((x = (n1 << 4) | n2))) { + if (li_utf8_invalid_byte(x)) qs = -2; + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + else if (s[i] == '#') { /* ignore fragment */ + buffer_string_set_length(b, (size_t)i); + break; + } + else { + qs = burl_normalize_basic_unreserved_fix(b, t, i, qs); + break; + } + } + + return qs; +} + + +static int burl_normalize_basic_required_fix (buffer *b, buffer *t, int i, int qs) +{ + int j = i; + const int used = (int)buffer_string_length(b); + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char * const p = + (unsigned char *)buffer_string_prepare_copy(t,i+(used-i)*3+1); + unsigned int n1, n2; + memcpy(p, s, (size_t)i); + for (; i < used; ++i, ++j) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = j; + p[j] = s[i]; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (!encoded_chars_http_uri_reqd[x] + && (qs < 0 ? (x!='/'&&x!='?') : (x!='&'&&x!='='&&x!=';'))) { + p[j] = x; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[n1]; /*(s[i+1] & 0xdf)*/ + p[++j] = hex_chars_uc[n2]; /*(s[i+2] & 0xdf)*/ + if (li_utf8_invalid_byte(x)) qs = -2; + } + i+=2; + } + else if (s[i] == '#') break; /* ignore fragment */ + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + if (li_utf8_invalid_byte(s[i])) qs = -2; + } + } + buffer_commit(t, (size_t)j); + buffer_copy_buffer(b, t); + return qs; +} + + +static int burl_normalize_basic_required (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2, x; + int qs = -1; + + for (int i = 0; i < used; ++i) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = i; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2) + && (encoded_chars_http_uri_reqd[(x = (n1 << 4) | n2)] + ||(qs < 0 ? (x=='/'||x=='?') : (x=='&'||x=='='||x==';')))){ + if (li_utf8_invalid_byte(x)) qs = -2; + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + else if (s[i] == '#') { /* ignore fragment */ + buffer_string_set_length(b, (size_t)i); + break; + } + else { + qs = burl_normalize_basic_required_fix(b, t, i, qs); + break; + } + } + + return qs; +} + + +static int burl_contains_ctrls (const buffer *b) +{ + const char * const s = b->ptr; + const int used = (int)buffer_string_length(b); + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && (s[i+1] < '2' || (s[i+1] == '7' && s[i+2] == 'F'))) + return 1; + } + return 0; +} + + +static void burl_normalize_qs20_to_plus_fix (buffer *b, int i) +{ + char * const s = b->ptr; + const int used = (int)buffer_string_length(b); + int j = i; + for (; i < used; ++i, ++j) { + s[j] = s[i]; + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == '0') { + s[j] = '+'; + i+=2; + } + } + buffer_string_set_length(b, j); +} + + +static void burl_normalize_qs20_to_plus (buffer *b, int qs) +{ + const char * const s = b->ptr; + const int used = qs < 0 ? 0 : (int)buffer_string_length(b); + int i; + if (qs < 0) return; + for (i = qs+1; i < used; ++i) { + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == '0') break; + } + if (i != used) burl_normalize_qs20_to_plus_fix(b, i); +} + + +static int burl_normalize_2F_to_slash_fix (buffer *b, int qs, int i) +{ + char * const s = b->ptr; + const int blen = (int)buffer_string_length(b); + const int used = qs < 0 ? blen : qs; + int j = i; + for (; i < used; ++i, ++j) { + s[j] = s[i]; + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == 'F') { + s[j] = '/'; + i+=2; + } + } + if (qs >= 0) { + memmove(s+j, s+qs, blen - qs); + j += blen - qs; + } + buffer_string_set_length(b, j); + return qs; +} + + +static int burl_normalize_2F_to_slash (buffer *b, int qs, int flags) +{ + /*("%2F" must already have been uppercased during normalization)*/ + const char * const s = b->ptr; + const int used = qs < 0 ? (int)buffer_string_length(b) : qs; + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == 'F') { + return (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE) + ? burl_normalize_2F_to_slash_fix(b, qs, i) + : -2; /*(flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)*/ + } + } + return qs; +} + + +static int burl_normalize_path (buffer *b, buffer *t, int qs, int flags) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + int path_simplify = 0; + for (int i = 0, len = qs < 0 ? used : qs; i < len; ++i) { + if (s[i] == '.' && (s[i+1] != '.' || ++i) + && (s[i+1] == '/' || s[i+1] == '?' || s[i+1] == '\0')) { + path_simplify = 1; + break; + } + do { ++i; } while (i < len && s[i] != '/'); + if (s[i] == '/' && s[i+1] == '/') { /*(s[len] != '/')*/ + path_simplify = 1; + break; + } + } + + if (path_simplify) { + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT) return -2; + if (qs >= 0) { + buffer_copy_string_len(t, b->ptr+qs, used - qs); + buffer_string_set_length(b, qs); + } + + buffer_path_simplify(b, b); + + if (qs >= 0) { + qs = (int)buffer_string_length(b); + buffer_append_string_len(b, CONST_BUF_LEN(t)); + } + } + + return qs; +} + + +int burl_normalize (buffer *b, buffer *t, int flags) +{ + int qs; + + #if defined(__WIN32) || defined(__CYGWIN__) + /* Windows and Cygwin treat '\\' as '/' if '\\' is present in path; + * convert to '/' for consistency before percent-encoding + * normalization which will convert '\\' to "%5C" in the URL. + * (Clients still should not be sending '\\' unencoded in requests.) */ + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS) { + for (char *p = b->ptr; *p != '?' && *p != '\0'; ++p) { + if (*p == '\\') *p = '/'; + } + } + #endif + + qs = (flags & HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED) + ? burl_normalize_basic_required(b, t) + : burl_normalize_basic_unreserved(b, t); + if (-2 == qs) return -2; + + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) { + if (burl_contains_ctrls(b)) return -2; + } + + if (flags & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) { + qs = burl_normalize_2F_to_slash(b, qs, flags); + if (-2 == qs) return -2; + } + + if (flags & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) { + qs = burl_normalize_path(b, t, qs, flags); + if (-2 == qs) return -2; + } + + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS) { + if (qs >= 0) burl_normalize_qs20_to_plus(b, qs); + } + + return qs; +} diff --git a/src/burl.h b/src/burl.h new file mode 100644 index 00000000..d81cc572 --- /dev/null +++ b/src/burl.h @@ -0,0 +1,25 @@ +#ifndef INCLUDED_BURL_H +#define INCLUDED_BURL_H +#include "first.h" + +#include "buffer.h" + +enum burl_opts_e { + HTTP_PARSEOPT_HEADER_STRICT = 0x1 + ,HTTP_PARSEOPT_HOST_STRICT = 0x2 + ,HTTP_PARSEOPT_HOST_NORMALIZE = 0x4 + ,HTTP_PARSEOPT_URL_NORMALIZE = 0x8/*normalize chars %-encoded, uppercase hex*/ + ,HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED =0x10 /* decode unreserved */ + ,HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED =0x20 /* decode (un)reserved*/ + ,HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT =0x40 + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS=0x80 /* "\\" -> "/" Cygwin */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE =0x100/* "%2F"-> "/" */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT =0x200 + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE =0x400/* "." ".." "//" */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT =0x800 + ,HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS =0x1000 +}; + +int burl_normalize (buffer *b, buffer *t, int flags); + +#endif diff --git a/src/configfile.c b/src/configfile.c index c3405439..7ce44dfb 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -1,13 +1,14 @@ #include "first.h" #include "base.h" +#include "burl.h" #include "fdevent.h" +#include "keyvalue.h" #include "log.h" #include "stream.h" #include "configparser.h" #include "configfile.h" -#include "request.h" #include "stat_cache.h" #include @@ -63,10 +64,103 @@ static void config_warn_openssl_module (server *srv) { } #endif +static int config_http_parseopts (server *srv, array *a) { + unsigned short int opts = srv->srvconf.http_url_normalize; + unsigned short int decode_2f = 1; + int rc = 1; + if (!array_is_kvstring(a)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "unexpected value for server.http-parseopts; " + "expected list of \"key\" => \"[enable|disable]\""); + return 0; + } + for (size_t i = 0; i < a->used; ++i) { + const data_string * const ds = (data_string *)a->data[i]; + unsigned short int opt; + if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize-unreserved"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize-required"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-ctrls-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-backslash-trans"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-2f-decode"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-2f-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-dotseg-remove"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-dotseg-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-query-20-plus"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + else { + log_error_write(srv, __FILE__, __LINE__, "sb", + "unrecognized key for server.http-parseopts:", + ds->key); + rc = 0; + continue; + } + if (buffer_is_equal_string(ds->value, CONST_STR_LEN("enable"))) + opts |= opt; + else if (buffer_is_equal_string(ds->value, CONST_STR_LEN("disable"))) { + opts &= ~opt; + if (opt == HTTP_PARSEOPT_URL_NORMALIZE) { + opts = 0; + break; + } + if (opt == HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE) { + decode_2f = 0; + } + } + else { + log_error_write(srv, __FILE__, __LINE__, "sbsbs", + "unrecognized value for server.http-parseopts:", + ds->key, "=>", ds->value, + "(expect \"[enable|disable]\")"); + rc = 0; + } + } + if (opts != 0) { + opts |= HTTP_PARSEOPT_URL_NORMALIZE; + if ((opts & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) + == (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "conflicting options in server.http-parseopts:" + "url-path-2f-decode, url-path-2f-reject"); + rc = 0; + } + if ((opts & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) + == (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "conflicting options in server.http-parseopts:" + "url-path-dotseg-remove, url-path-dotseg-reject"); + rc = 0; + } + if (!(opts & (HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED + |HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED))) { + opts |= HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + if (decode_2f + && !(opts & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) + opts |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + } + } + srv->srvconf.http_url_normalize = opts; + return rc; +} + static int config_insert(server *srv) { size_t i; int ret = 0; buffer *stat_cache_string; + array *http_parseopts; config_values_t cv[] = { { "server.bind", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 0 */ @@ -164,6 +258,7 @@ static int config_insert(server *srv) { { "server.error-intercept", NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_CONNECTION }, /* 79 */ { "server.syslog-facility", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 80 */ { "server.socket-perms", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION }, /* 81 */ + { "server.http-parseopts", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_SERVER }, /* 82 */ { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET } }; @@ -204,6 +299,8 @@ static int config_insert(server *srv) { cv[74].destination = &(srv->srvconf.http_host_normalize); cv[78].destination = &(srv->srvconf.max_request_field_size); cv[80].destination = srv->srvconf.syslog_facility; + http_parseopts = array_init(); + cv[82].destination = http_parseopts; srv->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *)); @@ -212,7 +309,7 @@ static int config_insert(server *srv) { -analyzer */ for (i = 0; i < srv->config_context->used; i++) { - data_config const* config = (data_config const*)srv->config_context->data[i]; + data_config * const config = (data_config *)srv->config_context->data[i]; specific_config *s; s = calloc(1, sizeof(specific_config)); @@ -363,6 +460,35 @@ static int config_insert(server *srv) { } } + if (0 == i) { + if (!config_http_parseopts(srv, http_parseopts)) { + ret = HANDLER_ERROR; + break; + } + } + + if (srv->srvconf.http_url_normalize + && COMP_HTTP_QUERY_STRING == config->comp) { + switch(config->cond) { + case CONFIG_COND_NE: + case CONFIG_COND_EQ: + /* (can use this routine as long as it does not perform + * any regex-specific normalization of first arg) */ + pcre_keyvalue_burl_normalize_key(config->string, srv->tmp_buf); + break; + case CONFIG_COND_NOMATCH: + case CONFIG_COND_MATCH: + pcre_keyvalue_burl_normalize_key(config->string, srv->tmp_buf); + if (!data_config_pcre_compile(config)) { + ret = HANDLER_ERROR; + } + break; + default: + break; + } + if (HANDLER_ERROR == ret) break; + } + #if !(defined HAVE_LIBSSL && defined HAVE_OPENSSL_SSL_H) if (s->ssl_enabled) { log_error_write(srv, __FILE__, __LINE__, "s", @@ -372,6 +498,7 @@ static int config_insert(server *srv) { } #endif } + array_free(http_parseopts); { specific_config *s = srv->config_storage[0]; @@ -380,6 +507,7 @@ static int config_insert(server *srv) { |(srv->srvconf.http_host_strict ?(HTTP_PARSEOPT_HOST_STRICT |HTTP_PARSEOPT_HOST_NORMALIZE):0) |(srv->srvconf.http_host_normalize ?(HTTP_PARSEOPT_HOST_NORMALIZE):0); + s->http_parseopts |= srv->srvconf.http_url_normalize; } if (0 != stat_cache_choose_engine(srv, stat_cache_string)) { diff --git a/src/keyvalue.c b/src/keyvalue.c index a2def34c..69a4a9f1 100644 --- a/src/keyvalue.c +++ b/src/keyvalue.c @@ -200,3 +200,122 @@ handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ return HANDLER_GO_ON; } #endif + + +/* modified from burl_normalize_basic() to handle %% extra encoding layer */ + +/* c (char) and n (nibble) MUST be unsigned integer types */ +#define li_cton(c,n) \ + (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) + +static void pcre_keyvalue_burl_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) { + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + } +} + +static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && s[i+1]=='%' + && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) { + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */ + i+=3; + } + } +} + +static const char hex_chars_uc[] = "0123456789ABCDEF"; + +static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +/* Basic normalization of regex and regex replacement to mirror some of + * the normalizations performed on request URI (for better compatibility). + * Note: not currently attempting to replace unnecessary percent-encoding + * (would need to know if regex was intended to match url-path or + * query-string or both, and then would have to regex-escape if those + * chars where special regex chars such as . * + ? ( ) [ ] | and more) + * Not attempting to percent-encode chars which should be encoded, again + * since regex might target url-path, query-string, or both, and we would + * have to avoid percent-encoding special regex chars. + * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x + * Preserve improper %-encoded sequences which are not %XX (using hex chars) + * Intentionally not performing path simplification (e.g. ./ ../) + * If regex-specific normalizations begin to be made to k here, + * must revisit callers, e.g. one configfile.c use on non-regex string. + * "%%" (percent_percent) is used in regex replacement strings since + * otherwise "%n" is used to indicate regex backreference where n is number. + */ + +void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t) +{ + pcre_keyvalue_burl_percent_toupper(k); + pcre_keyvalue_burl_percent_high_UTF8(k, t); +} + +void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t) +{ + pcre_keyvalue_burl_percent_percent_toupper(v); + pcre_keyvalue_burl_percent_percent_high_UTF8(v, t); +} diff --git a/src/keyvalue.h b/src/keyvalue.h index b175b6af..b50e7615 100644 --- a/src/keyvalue.h +++ b/src/keyvalue.h @@ -23,5 +23,7 @@ pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void); int pcre_keyvalue_buffer_append(struct server *srv, pcre_keyvalue_buffer *kvb, buffer *key, buffer *value); void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb); handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, buffer *input, buffer *result); +void pcre_keyvalue_burl_normalize_key(buffer *k, buffer *t); +void pcre_keyvalue_burl_normalize_value(buffer *v, buffer *t); #endif diff --git a/src/meson.build b/src/meson.build index 7a47d078..18a7ddbc 100644 --- a/src/meson.build +++ b/src/meson.build @@ -539,6 +539,7 @@ common_src = [ 'array.c', 'base64.c', 'buffer.c', + 'burl.c', 'chunk.c', 'configfile-glue.c', 'connections-glue.c', @@ -692,6 +693,12 @@ test('test_buffer', executable('test_buffer', build_by_default: false, )) +test('test_burl', executable('test_burl', + sources: ['t/test_burl.c', 'burl.c', 'buffer.c'], + dependencies: common_flags + libunwind, + build_by_default: false, +)) + test('test_base64', executable('test_base64', sources: ['t/test_base64.c', 'buffer.c', 'base64.c'], dependencies: common_flags + libunwind, diff --git a/src/mod_redirect.c b/src/mod_redirect.c index f5e1b4f2..6790929a 100644 --- a/src/mod_redirect.c +++ b/src/mod_redirect.c @@ -98,6 +98,10 @@ SETDEFAULTS_FUNC(mod_redirect_set_defaults) { for (j = 0; j < da->value->used; j++) { data_string *ds = (data_string *)da->value->data[j]; + if (srv->srvconf.http_url_normalize) { + pcre_keyvalue_burl_normalize_key(ds->key, srv->tmp_buf); + pcre_keyvalue_burl_normalize_value(ds->value, srv->tmp_buf); + } if (0 != pcre_keyvalue_buffer_append(srv, s->redirect, ds->key, ds->value)) { log_error_write(srv, __FILE__, __LINE__, "sb", "pcre-compile failed for", ds->key); diff --git a/src/mod_rewrite.c b/src/mod_rewrite.c index ae6592f9..07e03f7a 100644 --- a/src/mod_rewrite.c +++ b/src/mod_rewrite.c @@ -85,6 +85,10 @@ static int parse_config_entry(server *srv, array *ca, pcre_keyvalue_buffer *kvb, for (j = 0; j < da->value->used; j++) { data_string *ds = (data_string *)da->value->data[j]; + if (srv->srvconf.http_url_normalize) { + pcre_keyvalue_burl_normalize_key(ds->key, srv->tmp_buf); + pcre_keyvalue_burl_normalize_value(ds->value, srv->tmp_buf); + } if (0 != pcre_keyvalue_buffer_append(srv, kvb, ds->key, ds->value)) { log_error_write(srv, __FILE__, __LINE__, "sb", "pcre-compile failed for", ds->key); diff --git a/src/request.c b/src/request.c index ac7141d4..4e7b1792 100644 --- a/src/request.c +++ b/src/request.c @@ -2,6 +2,7 @@ #include "request.h" #include "base.h" +#include "burl.h" #include "http_kv.h" #include "log.h" #include "sock_addr.h" @@ -650,7 +651,9 @@ int http_request_parse(server *srv, connection *con) { /* check uri for invalid characters */ jlen = buffer_string_length(con->request.uri); - if (http_header_strict) { + if ((con->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) { + j = jlen; /* URI will be checked in http_response_prepare() */ + } else if (http_header_strict) { for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ; } else { char *z = memchr(con->request.uri->ptr, '\0', jlen); diff --git a/src/request.h b/src/request.h index 7f3ac6e0..ac5f1a14 100644 --- a/src/request.h +++ b/src/request.h @@ -5,12 +5,6 @@ #include "base_decls.h" #include "buffer.h" -typedef enum { - HTTP_PARSEOPT_HEADER_STRICT = 1 - ,HTTP_PARSEOPT_HOST_STRICT = 2 - ,HTTP_PARSEOPT_HOST_NORMALIZE = 4 -} http_parseopts_e; - int http_request_parse(server *srv, connection *con); int http_request_host_normalize(buffer *b, int scheme_port); int http_request_host_policy(connection *con, buffer *b, const buffer *scheme); diff --git a/src/response.c b/src/response.c index 1f4e1500..4ad3ad54 100644 --- a/src/response.c +++ b/src/response.c @@ -2,6 +2,7 @@ #include "response.h" #include "base.h" +#include "burl.h" #include "fdevent.h" #include "http_kv.h" #include "log.h" @@ -277,12 +278,6 @@ handler_t http_response_prepare(server *srv, connection *con) { /* no decision yet, build conf->filename */ if (con->mode == DIRECT && buffer_is_empty(con->physical.path)) { - - if (!con->async_callback) { - - - char *qstr; - /* we only come here when we have the parse the full request again * * a HANDLER_COMEBACK from mod_rewrite and mod_fastcgi might be a @@ -295,6 +290,8 @@ handler_t http_response_prepare(server *srv, connection *con) { * * */ + if (!con->async_callback) { + config_cond_cache_reset(srv, con); config_setup_connection(srv, con); /* Perhaps this could be removed at other places. */ @@ -306,7 +303,7 @@ handler_t http_response_prepare(server *srv, connection *con) { * prepare strings * * - uri.path_raw - * - uri.path (secure) + * - uri.path * - uri.query * */ @@ -331,36 +328,79 @@ handler_t http_response_prepare(server *srv, connection *con) { buffer_copy_buffer(con->uri.authority, con->request.http_host); buffer_to_lower(con->uri.authority); - /** their might be a fragment which has to be cut away */ - if (NULL != (qstr = strchr(con->request.uri->ptr, '#'))) { - buffer_string_set_length(con->request.uri, qstr - con->request.uri->ptr); - } - - /** extract query string from request.uri */ - if (NULL != (qstr = strchr(con->request.uri->ptr, '?'))) { - buffer_copy_string (con->uri.query, qstr + 1); - buffer_copy_string_len(con->uri.path_raw, con->request.uri->ptr, qstr - con->request.uri->ptr); - } else { - buffer_reset (con->uri.query); + if (con->request.http_method == HTTP_METHOD_CONNECT + || (con->request.http_method == HTTP_METHOD_OPTIONS + && con->request.uri->ptr[0] == '*' + && con->request.uri->ptr[1] == '\0')) { + /* CONNECT ... (or) OPTIONS * ... */ buffer_copy_buffer(con->uri.path_raw, con->request.uri); - } + buffer_copy_buffer(con->uri.path, con->uri.path_raw); + buffer_reset(con->uri.query); + } else { + char *qstr; + if (con->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { + /*size_t len = buffer_string_length(con->request.uri);*/ + int qs = burl_normalize(con->request.uri, srv->tmp_buf, con->conf.http_parseopts); + if (-2 == qs) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "invalid character in URI -> 400", + con->request.uri); + con->keep_alive = 0; + con->http_status = 400; /* Bad Request */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + qstr = (-1 == qs) ? NULL : con->request.uri->ptr+qs; + #if 0 /* future: might enable here, or below for all requests */ + /* (Note: total header size not recalculated on HANDLER_COMEBACK + * even if other request headers changed during processing) + * (If (0 != con->loops_per_request), then the generated request + * is too large. Should a different error be returned?) */ + con->header_len -= len; + len = buffer_string_length(con->request.uri); + con->header_len += len; + if (len > MAX_HTTP_REQUEST_URI) { + con->keep_alive = 0; + con->http_status = 414; /* Request-URI Too Long */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + if (con->header_len > MAX_HTTP_REQUEST_HEADER) { + log_error_write(srv, __FILE__, __LINE__, "sds", + "request header fields too large:", con->header_len, "-> 431"); + con->keep_alive = 0; + con->http_status = 431; /* Request Header Fields Too Large */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + #endif + } else { + qstr = strchr(con->request.uri->ptr, '#');/* discard fragment */ + if (qstr) buffer_string_set_length(con->request.uri, qstr - con->request.uri->ptr); + qstr = strchr(con->request.uri->ptr, '?'); + } - /* decode url to path - * - * - decode url-encodings (e.g. %20 -> ' ') - * - remove path-modifiers (e.g. /../) - */ + /** extract query string from request.uri */ + if (NULL != qstr) { + const char * const pstr = con->request.uri->ptr; + const size_t plen = (size_t)(qstr - pstr); + const size_t rlen = buffer_string_length(con->request.uri); + buffer_copy_string_len(con->uri.query, qstr + 1, rlen - plen - 1); + buffer_copy_string_len(con->uri.path_raw, pstr, plen); + } else { + buffer_reset(con->uri.query); + buffer_copy_buffer(con->uri.path_raw, con->request.uri); + } + + /* decode url to path + * + * - decode url-encodings (e.g. %20 -> ' ') + * - remove path-modifiers (e.g. /../) + */ - if (con->request.http_method == HTTP_METHOD_OPTIONS && - con->uri.path_raw->ptr[0] == '*' && con->uri.path_raw->ptr[1] == '\0') { - /* OPTIONS * ... */ - buffer_copy_buffer(con->uri.path, con->uri.path_raw); - } else if (con->request.http_method == HTTP_METHOD_CONNECT) { buffer_copy_buffer(con->uri.path, con->uri.path_raw); - } else { - buffer_copy_buffer(srv->tmp_buf, con->uri.path_raw); - buffer_urldecode_path(srv->tmp_buf); - buffer_path_simplify(con->uri.path, srv->tmp_buf); + buffer_urldecode_path(con->uri.path); + buffer_path_simplify(con->uri.path, con->uri.path); } con->conditional_is_valid[COMP_SERVER_SOCKET] = 1; /* SERVERsocket */ diff --git a/src/server.c b/src/server.c index 8a53ac55..404fd1c7 100644 --- a/src/server.c +++ b/src/server.c @@ -2,14 +2,12 @@ #include "server.h" #include "buffer.h" +#include "burl.h" #include "network.h" #include "log.h" #include "rand.h" -#include "response.h" -#include "request.h" #include "chunk.h" #include "http_auth.h" -#include "http_chunk.h" #include "http_vhostdb.h" #include "fdevent.h" #include "connections.h" @@ -287,6 +285,13 @@ static server *server_init(void) { srv->srvconf.http_header_strict = 1; srv->srvconf.http_host_strict = 1; /*(implies http_host_normalize)*/ srv->srvconf.http_host_normalize = 0; + srv->srvconf.http_url_normalize = HTTP_PARSEOPT_URL_NORMALIZE + | HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED + | HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + srv->srvconf.http_url_normalize = 0; /* temporary; change in future */ srv->srvconf.high_precision_timestamps = 0; srv->srvconf.max_request_field_size = 8192; srv->srvconf.loadavg[0] = 0.0; diff --git a/src/t/test_burl.c b/src/t/test_burl.c new file mode 100644 index 00000000..e83bebea --- /dev/null +++ b/src/t/test_burl.c @@ -0,0 +1,142 @@ +#include "first.h" + +#include +#include + +#include "burl.h" + +static void run_burl_normalize (buffer *psrc, buffer *ptmp, int flags, int line, const char *in, size_t in_len, const char *out, size_t out_len) { + int qs; + buffer_copy_string_len(psrc, in, in_len); + qs = burl_normalize(psrc, ptmp, flags); + if (out_len == (size_t)-2) { + if (-2 == qs) return; + fprintf(stderr, + "%s.%d: %s('%s') failed: expected error, got '%s'\n", + __FILE__, line, __func__+4, in, psrc->ptr); + } + else { + if (buffer_is_equal_string(psrc, out, out_len)) return; + fprintf(stderr, + "%s.%d: %s('%s') failed: expected '%s', got '%s'\n", + __FILE__, line, __func__+4, in, out, psrc->ptr); + } + fflush(stderr); + abort(); +} + +static void test_burl_normalize (void) { + buffer *psrc = buffer_init(); + buffer *ptmp = buffer_init(); + int flags; + + flags = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/"), CONST_STR_LEN("/abc/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/def"), CONST_STR_LEN("/abc/def")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?"), CONST_STR_LEN("/abc?")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d"), CONST_STR_LEN("/abc?d")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d="), CONST_STR_LEN("/abc?d=")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e"), CONST_STR_LEN("/abc?d=e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&"), CONST_STR_LEN("/abc?d=e&")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f"), CONST_STR_LEN("/abc?d=e&f")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#any"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2F"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2f"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%20"), CONST_STR_LEN("/%20")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2b"), CONST_STR_LEN("/%2B")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2B"), CONST_STR_LEN("/%2B")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3a"), CONST_STR_LEN("/%3A")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3A"), CONST_STR_LEN("/%3A")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/~test%20ä_"), CONST_STR_LEN("/~test%20%C3%A4_")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\375"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\376"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), "", (size_t)-2); + + flags = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/"), CONST_STR_LEN("/abc/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/def"), CONST_STR_LEN("/abc/def")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?"), CONST_STR_LEN("/abc?")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d"), CONST_STR_LEN("/abc?d")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d="), CONST_STR_LEN("/abc?d=")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e"), CONST_STR_LEN("/abc?d=e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&"), CONST_STR_LEN("/abc?d=e&")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f"), CONST_STR_LEN("/abc?d=e&f")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#any"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2F"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2f"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%20"), CONST_STR_LEN("/%20")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2b"), CONST_STR_LEN("/+")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2B"), CONST_STR_LEN("/+")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3a"), CONST_STR_LEN("/:")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3A"), CONST_STR_LEN("/:")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/~test%20ä_"), CONST_STR_LEN("/~test%20%C3%A4_")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\375"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\376"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), "", (size_t)-2); + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\a"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\t"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\r"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\177"), "", (size_t)-2); + + #if defined(__WIN32) || defined(__CYGWIN__) + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a\\b"), CONST_STR_LEN("/a/b")); + #endif + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=/"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2Fb"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb?c=/"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2Fb"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("./a/b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("../a/b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/./b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b"), CONST_STR_LEN("/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/."), CONST_STR_LEN("/a/b/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/.."), CONST_STR_LEN("/a/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b/.."), CONST_STR_LEN("/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("./a/b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("../a/b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/./b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/."), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/.."), "", (size_t)-2); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=d+e"), CONST_STR_LEN("/a/b?c=d+e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=d%20e"), CONST_STR_LEN("/a/b?c=d+e")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + + buffer_free(psrc); + buffer_free(ptmp); +} + +int main (void) { + test_burl_normalize(); + return 0; +} diff --git a/src/t/test_request.c b/src/t/test_request.c index b73127e1..43903464 100644 --- a/src/t/test_request.c +++ b/src/t/test_request.c @@ -5,6 +5,7 @@ #include #include "base.h" +#include "burl.h" static void test_request_connection_reset(connection *con) {