From 3eb7902e10ba75b3f2eb159e244d0d8e5037ccd2 Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Tue, 1 May 2018 00:20:26 -0400 Subject: [PATCH] [core] server.http-parseopts URL normalization opt (fixes #1720) server.http-parseopts = ( ... ) URL normalization options Note: *not applied* to CONNECT method Note: In a future release, URL normalization likely enabled by default (normalize URL, reject control chars, remove . and .. path segments) To prepare for this change, lighttpd.conf configurations should explicitly select desired behavior by enabling or disabling: server.http-parseopts = ( "url-normalize" => "enable", ... ) server.http-parseopts = ( "url-normalize" => "disable" ) x-ref: "lighttpd ... compares URIs to patterns in the (1) url.redirect and (2) url.rewrite configuration settings before performing URL decoding, which might allow remote attackers to bypass intended access restrictions, and obtain sensitive information or possibly modify data." https://www.cvedetails.com/cve/CVE-2008-4359/ "Rewrite/redirect rules and URL encoding" https://redmine.lighttpd.net/issues/1720 --- .gitignore | 1 + src/CMakeLists.txt | 11 +- src/Makefile.am | 16 +- src/SConscript | 2 +- src/base.h | 1 + src/burl.c | 357 +++++++++++++++++++++++++++++++++++++++++++ src/burl.h | 25 +++ src/configfile.c | 132 +++++++++++++++- src/keyvalue.c | 119 +++++++++++++++ src/keyvalue.h | 2 + src/meson.build | 7 + src/mod_redirect.c | 4 + src/mod_rewrite.c | 4 + src/request.c | 5 +- src/request.h | 6 - src/response.c | 106 +++++++++---- src/server.c | 11 +- src/t/test_burl.c | 142 +++++++++++++++++ src/t/test_request.c | 1 + 19 files changed, 902 insertions(+), 50 deletions(-) create mode 100644 src/burl.c create mode 100644 src/burl.h create mode 100644 src/t/test_burl.c diff --git a/.gitignore b/.gitignore index fc6317cf..c27120cf 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ sconsbuild/ stamp-h1 test_base64 test_buffer +test_burl test_configfile test_request versionstamp.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f876fbf7..9d5d6c26 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -553,7 +553,7 @@ add_definitions(-DHAVE_CONFIG_H) include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) set(COMMON_SRC - base64.c buffer.c log.c + base64.c buffer.c burl.c log.c http_kv.c keyvalue.c chunk.c http_chunk.c stream.c fdevent.c gw_backend.c stat_cache.c plugin.c joblist.c etag.c array.c @@ -650,6 +650,13 @@ add_executable(test_buffer ) add_test(NAME test_buffer COMMAND test_buffer) +add_executable(test_burl + t/test_burl.c + burl.c + buffer.c +) +add_test(NAME test_burl COMMAND test_burl) + add_executable(test_base64 t/test_base64.c buffer.c @@ -871,6 +878,8 @@ if(WITH_LIBUNWIND) target_link_libraries(test_buffer ${LIBUNWIND_LDFLAGS}) add_target_properties(test_buffer COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) + target_link_libraries(test_burl ${LIBUNWIND_LDFLAGS}) + add_target_properties(test_burl COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) target_link_libraries(test_base64 ${LIBUNWIND_LDFLAGS}) add_target_properties(test_base64 COMPILE_FLAGS ${LIBUNWIND_CFLAGS}) target_link_libraries(test_configfile ${PCRE_LDFLAGS} ${LIBUNWIND_LDFLAGS}) diff --git a/src/Makefile.am b/src/Makefile.am index 3160388f..28fd4dfa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,11 +1,18 @@ AM_CFLAGS = $(FAM_CFLAGS) $(LIBUNWIND_CFLAGS) -noinst_PROGRAMS=t/test_buffer t/test_base64 t/test_configfile t/test_request +noinst_PROGRAMS=\ + t/test_buffer \ + t/test_burl \ + t/test_base64 \ + t/test_configfile \ + t/test_request + sbin_PROGRAMS=lighttpd lighttpd-angel LEMON=$(top_builddir)/src/lemon$(BUILD_EXEEXT) TESTS=\ t/test_buffer$(EXEEXT) \ + t/test_burl$(EXEEXT) \ t/test_base64$(EXEEXT) \ t/test_configfile$(EXEEXT) \ t/test_request$(EXEEXT) @@ -56,7 +63,7 @@ BUILT_SOURCES = parsers versionstamp MAINTAINERCLEANFILES = configparser.c configparser.h mod_ssi_exprparser.c mod_ssi_exprparser.h CLEANFILES = versionstamp.h versionstamp.h.tmp lemon$(BUILD_EXEEXT) -common_src=base64.c buffer.c log.c \ +common_src=base64.c buffer.c burl.c log.c \ http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ stat_cache.c plugin.c joblist.c etag.c array.c \ @@ -384,7 +391,7 @@ mod_wstunnel_la_LDFLAGS = $(common_module_ldflags) mod_wstunnel_la_LIBADD = $(common_libadd) $(CRYPTO_LIB) -hdr = server.h base64.h buffer.h network.h log.h http_kv.h keyvalue.h \ +hdr = server.h base64.h buffer.h burl.h network.h log.h http_kv.h keyvalue.h \ response.h request.h fastcgi.h chunk.h \ first.h settings.h http_chunk.h \ algo_sha1.h md5.h http_auth.h http_vhostdb.h stream.h \ @@ -517,6 +524,9 @@ t_test_buffer_LDADD = $(LIBUNWIND_LIBS) t_test_base64_SOURCES = t/test_base64.c base64.c buffer.c t_test_base64_LDADD = $(LIBUNWIND_LIBS) +t_test_burl_SOURCES = t/test_burl.c burl.c buffer.c +t_test_burl_LDADD = $(LIBUNWIND_LIBS) + t_test_configfile_SOURCES = t/test_configfile.c buffer.c array.c data_config.c data_string.c http_kv.c vector.c log.c sock_addr.c t_test_configfile_LDADD = $(PCRE_LIB) $(LIBUNWIND_LIBS) diff --git a/src/SConscript b/src/SConscript index ffcbb3df..4c7d96d5 100644 --- a/src/SConscript +++ b/src/SConscript @@ -55,7 +55,7 @@ def GatherLibs(env, *libs): libs = RemoveDuplicateLibs(env['LIBS'] + list(libs) + [env['APPEND_LIBS']]) return WorkaroundFreeBSDLibOrder(libs) -common_src = Split("base64.c buffer.c log.c \ +common_src = Split("base64.c buffer.c burl.c log.c \ http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ stat_cache.c plugin.c joblist.c etag.c array.c \ diff --git a/src/base.h b/src/base.h index 615d5584..ea040f46 100644 --- a/src/base.h +++ b/src/base.h @@ -360,6 +360,7 @@ typedef struct { unsigned short http_header_strict; unsigned short http_host_strict; unsigned short http_host_normalize; + unsigned short http_url_normalize; unsigned short high_precision_timestamps; time_t loadts; double loadavg[3]; diff --git a/src/burl.c b/src/burl.c new file mode 100644 index 00000000..3eadb6ce --- /dev/null +++ b/src/burl.c @@ -0,0 +1,357 @@ +#include "first.h" +#include "burl.h" + +#include + +#include "buffer.h" + +static const char hex_chars_uc[] = "0123456789ABCDEF"; + +/* everything except: ! $ & ' ( ) * + , - . / 0-9 : ; = ? @ A-Z _ a-z ~ */ +static const char encoded_chars_http_uri_reqd[] = { + /* + 0 1 2 3 4 5 6 7 8 9 A B C D E F + */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ + 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F space " # % */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ +}; + + +/* c (char) and n (nibble) MUST be unsigned integer types */ +#define li_cton(c,n) \ + (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) + +/* b (byte) MUST be unsigned integer type + * https://en.wikipedia.org/wiki/UTF-8 + * reject overlong encodings of 7-byte ASCII and invalid UTF-8 + * (but does not detect other overlong multi-byte encodings) */ +#define li_utf8_invalid_byte(b) ((b) >= 0xF5 || ((b)|0x1) == 0xC1) + + +static int burl_is_unreserved (const int c) +{ + return (light_isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'); +} + + +static int burl_normalize_basic_unreserved_fix (buffer *b, buffer *t, int i, int qs) +{ + int j = i; + const int used = (int)buffer_string_length(b); + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char * const p = + (unsigned char *)buffer_string_prepare_copy(t,i+(used-i)*3+1); + unsigned int n1, n2; + memcpy(p, s, (size_t)i); + for (; i < used; ++i, ++j) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = j; + p[j] = s[i]; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (burl_is_unreserved(x)) { + p[j] = x; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[n1]; /*(s[i+1] & 0xdf)*/ + p[++j] = hex_chars_uc[n2]; /*(s[i+2] & 0xdf)*/ + if (li_utf8_invalid_byte(x)) qs = -2; + } + i+=2; + } + else if (s[i] == '#') break; /* ignore fragment */ + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + if (li_utf8_invalid_byte(s[i])) qs = -2; + } + } + buffer_commit(t, (size_t)j); + buffer_copy_buffer(b, t); + return qs; +} + + +static int burl_normalize_basic_unreserved (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2, x; + int qs = -1; + + for (int i = 0; i < used; ++i) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = i; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2) + && !burl_is_unreserved((x = (n1 << 4) | n2))) { + if (li_utf8_invalid_byte(x)) qs = -2; + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + else if (s[i] == '#') { /* ignore fragment */ + buffer_string_set_length(b, (size_t)i); + break; + } + else { + qs = burl_normalize_basic_unreserved_fix(b, t, i, qs); + break; + } + } + + return qs; +} + + +static int burl_normalize_basic_required_fix (buffer *b, buffer *t, int i, int qs) +{ + int j = i; + const int used = (int)buffer_string_length(b); + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char * const p = + (unsigned char *)buffer_string_prepare_copy(t,i+(used-i)*3+1); + unsigned int n1, n2; + memcpy(p, s, (size_t)i); + for (; i < used; ++i, ++j) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = j; + p[j] = s[i]; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (!encoded_chars_http_uri_reqd[x] + && (qs < 0 ? (x!='/'&&x!='?') : (x!='&'&&x!='='&&x!=';'))) { + p[j] = x; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[n1]; /*(s[i+1] & 0xdf)*/ + p[++j] = hex_chars_uc[n2]; /*(s[i+2] & 0xdf)*/ + if (li_utf8_invalid_byte(x)) qs = -2; + } + i+=2; + } + else if (s[i] == '#') break; /* ignore fragment */ + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + if (li_utf8_invalid_byte(s[i])) qs = -2; + } + } + buffer_commit(t, (size_t)j); + buffer_copy_buffer(b, t); + return qs; +} + + +static int burl_normalize_basic_required (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2, x; + int qs = -1; + + for (int i = 0; i < used; ++i) { + if (!encoded_chars_http_uri_reqd[s[i]]) { + if (s[i] == '?' && -1 == qs) qs = i; + } + else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2) + && (encoded_chars_http_uri_reqd[(x = (n1 << 4) | n2)] + ||(qs < 0 ? (x=='/'||x=='?') : (x=='&'||x=='='||x==';')))){ + if (li_utf8_invalid_byte(x)) qs = -2; + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + else if (s[i] == '#') { /* ignore fragment */ + buffer_string_set_length(b, (size_t)i); + break; + } + else { + qs = burl_normalize_basic_required_fix(b, t, i, qs); + break; + } + } + + return qs; +} + + +static int burl_contains_ctrls (const buffer *b) +{ + const char * const s = b->ptr; + const int used = (int)buffer_string_length(b); + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && (s[i+1] < '2' || (s[i+1] == '7' && s[i+2] == 'F'))) + return 1; + } + return 0; +} + + +static void burl_normalize_qs20_to_plus_fix (buffer *b, int i) +{ + char * const s = b->ptr; + const int used = (int)buffer_string_length(b); + int j = i; + for (; i < used; ++i, ++j) { + s[j] = s[i]; + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == '0') { + s[j] = '+'; + i+=2; + } + } + buffer_string_set_length(b, j); +} + + +static void burl_normalize_qs20_to_plus (buffer *b, int qs) +{ + const char * const s = b->ptr; + const int used = qs < 0 ? 0 : (int)buffer_string_length(b); + int i; + if (qs < 0) return; + for (i = qs+1; i < used; ++i) { + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == '0') break; + } + if (i != used) burl_normalize_qs20_to_plus_fix(b, i); +} + + +static int burl_normalize_2F_to_slash_fix (buffer *b, int qs, int i) +{ + char * const s = b->ptr; + const int blen = (int)buffer_string_length(b); + const int used = qs < 0 ? blen : qs; + int j = i; + for (; i < used; ++i, ++j) { + s[j] = s[i]; + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == 'F') { + s[j] = '/'; + i+=2; + } + } + if (qs >= 0) { + memmove(s+j, s+qs, blen - qs); + j += blen - qs; + } + buffer_string_set_length(b, j); + return qs; +} + + +static int burl_normalize_2F_to_slash (buffer *b, int qs, int flags) +{ + /*("%2F" must already have been uppercased during normalization)*/ + const char * const s = b->ptr; + const int used = qs < 0 ? (int)buffer_string_length(b) : qs; + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && s[i+1] == '2' && s[i+2] == 'F') { + return (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE) + ? burl_normalize_2F_to_slash_fix(b, qs, i) + : -2; /*(flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)*/ + } + } + return qs; +} + + +static int burl_normalize_path (buffer *b, buffer *t, int qs, int flags) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + int path_simplify = 0; + for (int i = 0, len = qs < 0 ? used : qs; i < len; ++i) { + if (s[i] == '.' && (s[i+1] != '.' || ++i) + && (s[i+1] == '/' || s[i+1] == '?' || s[i+1] == '\0')) { + path_simplify = 1; + break; + } + do { ++i; } while (i < len && s[i] != '/'); + if (s[i] == '/' && s[i+1] == '/') { /*(s[len] != '/')*/ + path_simplify = 1; + break; + } + } + + if (path_simplify) { + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT) return -2; + if (qs >= 0) { + buffer_copy_string_len(t, b->ptr+qs, used - qs); + buffer_string_set_length(b, qs); + } + + buffer_path_simplify(b, b); + + if (qs >= 0) { + qs = (int)buffer_string_length(b); + buffer_append_string_len(b, CONST_BUF_LEN(t)); + } + } + + return qs; +} + + +int burl_normalize (buffer *b, buffer *t, int flags) +{ + int qs; + + #if defined(__WIN32) || defined(__CYGWIN__) + /* Windows and Cygwin treat '\\' as '/' if '\\' is present in path; + * convert to '/' for consistency before percent-encoding + * normalization which will convert '\\' to "%5C" in the URL. + * (Clients still should not be sending '\\' unencoded in requests.) */ + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS) { + for (char *p = b->ptr; *p != '?' && *p != '\0'; ++p) { + if (*p == '\\') *p = '/'; + } + } + #endif + + qs = (flags & HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED) + ? burl_normalize_basic_required(b, t) + : burl_normalize_basic_unreserved(b, t); + if (-2 == qs) return -2; + + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) { + if (burl_contains_ctrls(b)) return -2; + } + + if (flags & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) { + qs = burl_normalize_2F_to_slash(b, qs, flags); + if (-2 == qs) return -2; + } + + if (flags & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) { + qs = burl_normalize_path(b, t, qs, flags); + if (-2 == qs) return -2; + } + + if (flags & HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS) { + if (qs >= 0) burl_normalize_qs20_to_plus(b, qs); + } + + return qs; +} diff --git a/src/burl.h b/src/burl.h new file mode 100644 index 00000000..d81cc572 --- /dev/null +++ b/src/burl.h @@ -0,0 +1,25 @@ +#ifndef INCLUDED_BURL_H +#define INCLUDED_BURL_H +#include "first.h" + +#include "buffer.h" + +enum burl_opts_e { + HTTP_PARSEOPT_HEADER_STRICT = 0x1 + ,HTTP_PARSEOPT_HOST_STRICT = 0x2 + ,HTTP_PARSEOPT_HOST_NORMALIZE = 0x4 + ,HTTP_PARSEOPT_URL_NORMALIZE = 0x8/*normalize chars %-encoded, uppercase hex*/ + ,HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED =0x10 /* decode unreserved */ + ,HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED =0x20 /* decode (un)reserved*/ + ,HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT =0x40 + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS=0x80 /* "\\" -> "/" Cygwin */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE =0x100/* "%2F"-> "/" */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT =0x200 + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE =0x400/* "." ".." "//" */ + ,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT =0x800 + ,HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS =0x1000 +}; + +int burl_normalize (buffer *b, buffer *t, int flags); + +#endif diff --git a/src/configfile.c b/src/configfile.c index c3405439..7ce44dfb 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -1,13 +1,14 @@ #include "first.h" #include "base.h" +#include "burl.h" #include "fdevent.h" +#include "keyvalue.h" #include "log.h" #include "stream.h" #include "configparser.h" #include "configfile.h" -#include "request.h" #include "stat_cache.h" #include @@ -63,10 +64,103 @@ static void config_warn_openssl_module (server *srv) { } #endif +static int config_http_parseopts (server *srv, array *a) { + unsigned short int opts = srv->srvconf.http_url_normalize; + unsigned short int decode_2f = 1; + int rc = 1; + if (!array_is_kvstring(a)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "unexpected value for server.http-parseopts; " + "expected list of \"key\" => \"[enable|disable]\""); + return 0; + } + for (size_t i = 0; i < a->used; ++i) { + const data_string * const ds = (data_string *)a->data[i]; + unsigned short int opt; + if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize-unreserved"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-normalize-required"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-ctrls-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-backslash-trans"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-2f-decode"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-2f-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-dotseg-remove"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-path-dotseg-reject"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + else if (buffer_is_equal_string(ds->key, CONST_STR_LEN("url-query-20-plus"))) + opt = HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + else { + log_error_write(srv, __FILE__, __LINE__, "sb", + "unrecognized key for server.http-parseopts:", + ds->key); + rc = 0; + continue; + } + if (buffer_is_equal_string(ds->value, CONST_STR_LEN("enable"))) + opts |= opt; + else if (buffer_is_equal_string(ds->value, CONST_STR_LEN("disable"))) { + opts &= ~opt; + if (opt == HTTP_PARSEOPT_URL_NORMALIZE) { + opts = 0; + break; + } + if (opt == HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE) { + decode_2f = 0; + } + } + else { + log_error_write(srv, __FILE__, __LINE__, "sbsbs", + "unrecognized value for server.http-parseopts:", + ds->key, "=>", ds->value, + "(expect \"[enable|disable]\")"); + rc = 0; + } + } + if (opts != 0) { + opts |= HTTP_PARSEOPT_URL_NORMALIZE; + if ((opts & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) + == (HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "conflicting options in server.http-parseopts:" + "url-path-2f-decode, url-path-2f-reject"); + rc = 0; + } + if ((opts & (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) + == (HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE + |HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT)) { + log_error_write(srv, __FILE__, __LINE__, "s", + "conflicting options in server.http-parseopts:" + "url-path-dotseg-remove, url-path-dotseg-reject"); + rc = 0; + } + if (!(opts & (HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED + |HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED))) { + opts |= HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + if (decode_2f + && !(opts & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT)) + opts |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + } + } + srv->srvconf.http_url_normalize = opts; + return rc; +} + static int config_insert(server *srv) { size_t i; int ret = 0; buffer *stat_cache_string; + array *http_parseopts; config_values_t cv[] = { { "server.bind", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 0 */ @@ -164,6 +258,7 @@ static int config_insert(server *srv) { { "server.error-intercept", NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_CONNECTION }, /* 79 */ { "server.syslog-facility", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 80 */ { "server.socket-perms", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION }, /* 81 */ + { "server.http-parseopts", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_SERVER }, /* 82 */ { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET } }; @@ -204,6 +299,8 @@ static int config_insert(server *srv) { cv[74].destination = &(srv->srvconf.http_host_normalize); cv[78].destination = &(srv->srvconf.max_request_field_size); cv[80].destination = srv->srvconf.syslog_facility; + http_parseopts = array_init(); + cv[82].destination = http_parseopts; srv->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *)); @@ -212,7 +309,7 @@ static int config_insert(server *srv) { -analyzer */ for (i = 0; i < srv->config_context->used; i++) { - data_config const* config = (data_config const*)srv->config_context->data[i]; + data_config * const config = (data_config *)srv->config_context->data[i]; specific_config *s; s = calloc(1, sizeof(specific_config)); @@ -363,6 +460,35 @@ static int config_insert(server *srv) { } } + if (0 == i) { + if (!config_http_parseopts(srv, http_parseopts)) { + ret = HANDLER_ERROR; + break; + } + } + + if (srv->srvconf.http_url_normalize + && COMP_HTTP_QUERY_STRING == config->comp) { + switch(config->cond) { + case CONFIG_COND_NE: + case CONFIG_COND_EQ: + /* (can use this routine as long as it does not perform + * any regex-specific normalization of first arg) */ + pcre_keyvalue_burl_normalize_key(config->string, srv->tmp_buf); + break; + case CONFIG_COND_NOMATCH: + case CONFIG_COND_MATCH: + pcre_keyvalue_burl_normalize_key(config->string, srv->tmp_buf); + if (!data_config_pcre_compile(config)) { + ret = HANDLER_ERROR; + } + break; + default: + break; + } + if (HANDLER_ERROR == ret) break; + } + #if !(defined HAVE_LIBSSL && defined HAVE_OPENSSL_SSL_H) if (s->ssl_enabled) { log_error_write(srv, __FILE__, __LINE__, "s", @@ -372,6 +498,7 @@ static int config_insert(server *srv) { } #endif } + array_free(http_parseopts); { specific_config *s = srv->config_storage[0]; @@ -380,6 +507,7 @@ static int config_insert(server *srv) { |(srv->srvconf.http_host_strict ?(HTTP_PARSEOPT_HOST_STRICT |HTTP_PARSEOPT_HOST_NORMALIZE):0) |(srv->srvconf.http_host_normalize ?(HTTP_PARSEOPT_HOST_NORMALIZE):0); + s->http_parseopts |= srv->srvconf.http_url_normalize; } if (0 != stat_cache_choose_engine(srv, stat_cache_string)) { diff --git a/src/keyvalue.c b/src/keyvalue.c index a2def34c..69a4a9f1 100644 --- a/src/keyvalue.c +++ b/src/keyvalue.c @@ -200,3 +200,122 @@ handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ return HANDLER_GO_ON; } #endif + + +/* modified from burl_normalize_basic() to handle %% extra encoding layer */ + +/* c (char) and n (nibble) MUST be unsigned integer types */ +#define li_cton(c,n) \ + (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) + +static void pcre_keyvalue_burl_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) { + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + } +} + +static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && s[i+1]=='%' + && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) { + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */ + i+=3; + } + } +} + +static const char hex_chars_uc[] = "0123456789ABCDEF"; + +static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +/* Basic normalization of regex and regex replacement to mirror some of + * the normalizations performed on request URI (for better compatibility). + * Note: not currently attempting to replace unnecessary percent-encoding + * (would need to know if regex was intended to match url-path or + * query-string or both, and then would have to regex-escape if those + * chars where special regex chars such as . * + ? ( ) [ ] | and more) + * Not attempting to percent-encode chars which should be encoded, again + * since regex might target url-path, query-string, or both, and we would + * have to avoid percent-encoding special regex chars. + * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x + * Preserve improper %-encoded sequences which are not %XX (using hex chars) + * Intentionally not performing path simplification (e.g. ./ ../) + * If regex-specific normalizations begin to be made to k here, + * must revisit callers, e.g. one configfile.c use on non-regex string. + * "%%" (percent_percent) is used in regex replacement strings since + * otherwise "%n" is used to indicate regex backreference where n is number. + */ + +void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t) +{ + pcre_keyvalue_burl_percent_toupper(k); + pcre_keyvalue_burl_percent_high_UTF8(k, t); +} + +void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t) +{ + pcre_keyvalue_burl_percent_percent_toupper(v); + pcre_keyvalue_burl_percent_percent_high_UTF8(v, t); +} diff --git a/src/keyvalue.h b/src/keyvalue.h index b175b6af..b50e7615 100644 --- a/src/keyvalue.h +++ b/src/keyvalue.h @@ -23,5 +23,7 @@ pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void); int pcre_keyvalue_buffer_append(struct server *srv, pcre_keyvalue_buffer *kvb, buffer *key, buffer *value); void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb); handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, buffer *input, buffer *result); +void pcre_keyvalue_burl_normalize_key(buffer *k, buffer *t); +void pcre_keyvalue_burl_normalize_value(buffer *v, buffer *t); #endif diff --git a/src/meson.build b/src/meson.build index 7a47d078..18a7ddbc 100644 --- a/src/meson.build +++ b/src/meson.build @@ -539,6 +539,7 @@ common_src = [ 'array.c', 'base64.c', 'buffer.c', + 'burl.c', 'chunk.c', 'configfile-glue.c', 'connections-glue.c', @@ -692,6 +693,12 @@ test('test_buffer', executable('test_buffer', build_by_default: false, )) +test('test_burl', executable('test_burl', + sources: ['t/test_burl.c', 'burl.c', 'buffer.c'], + dependencies: common_flags + libunwind, + build_by_default: false, +)) + test('test_base64', executable('test_base64', sources: ['t/test_base64.c', 'buffer.c', 'base64.c'], dependencies: common_flags + libunwind, diff --git a/src/mod_redirect.c b/src/mod_redirect.c index f5e1b4f2..6790929a 100644 --- a/src/mod_redirect.c +++ b/src/mod_redirect.c @@ -98,6 +98,10 @@ SETDEFAULTS_FUNC(mod_redirect_set_defaults) { for (j = 0; j < da->value->used; j++) { data_string *ds = (data_string *)da->value->data[j]; + if (srv->srvconf.http_url_normalize) { + pcre_keyvalue_burl_normalize_key(ds->key, srv->tmp_buf); + pcre_keyvalue_burl_normalize_value(ds->value, srv->tmp_buf); + } if (0 != pcre_keyvalue_buffer_append(srv, s->redirect, ds->key, ds->value)) { log_error_write(srv, __FILE__, __LINE__, "sb", "pcre-compile failed for", ds->key); diff --git a/src/mod_rewrite.c b/src/mod_rewrite.c index ae6592f9..07e03f7a 100644 --- a/src/mod_rewrite.c +++ b/src/mod_rewrite.c @@ -85,6 +85,10 @@ static int parse_config_entry(server *srv, array *ca, pcre_keyvalue_buffer *kvb, for (j = 0; j < da->value->used; j++) { data_string *ds = (data_string *)da->value->data[j]; + if (srv->srvconf.http_url_normalize) { + pcre_keyvalue_burl_normalize_key(ds->key, srv->tmp_buf); + pcre_keyvalue_burl_normalize_value(ds->value, srv->tmp_buf); + } if (0 != pcre_keyvalue_buffer_append(srv, kvb, ds->key, ds->value)) { log_error_write(srv, __FILE__, __LINE__, "sb", "pcre-compile failed for", ds->key); diff --git a/src/request.c b/src/request.c index ac7141d4..4e7b1792 100644 --- a/src/request.c +++ b/src/request.c @@ -2,6 +2,7 @@ #include "request.h" #include "base.h" +#include "burl.h" #include "http_kv.h" #include "log.h" #include "sock_addr.h" @@ -650,7 +651,9 @@ int http_request_parse(server *srv, connection *con) { /* check uri for invalid characters */ jlen = buffer_string_length(con->request.uri); - if (http_header_strict) { + if ((con->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) { + j = jlen; /* URI will be checked in http_response_prepare() */ + } else if (http_header_strict) { for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ; } else { char *z = memchr(con->request.uri->ptr, '\0', jlen); diff --git a/src/request.h b/src/request.h index 7f3ac6e0..ac5f1a14 100644 --- a/src/request.h +++ b/src/request.h @@ -5,12 +5,6 @@ #include "base_decls.h" #include "buffer.h" -typedef enum { - HTTP_PARSEOPT_HEADER_STRICT = 1 - ,HTTP_PARSEOPT_HOST_STRICT = 2 - ,HTTP_PARSEOPT_HOST_NORMALIZE = 4 -} http_parseopts_e; - int http_request_parse(server *srv, connection *con); int http_request_host_normalize(buffer *b, int scheme_port); int http_request_host_policy(connection *con, buffer *b, const buffer *scheme); diff --git a/src/response.c b/src/response.c index 1f4e1500..4ad3ad54 100644 --- a/src/response.c +++ b/src/response.c @@ -2,6 +2,7 @@ #include "response.h" #include "base.h" +#include "burl.h" #include "fdevent.h" #include "http_kv.h" #include "log.h" @@ -277,12 +278,6 @@ handler_t http_response_prepare(server *srv, connection *con) { /* no decision yet, build conf->filename */ if (con->mode == DIRECT && buffer_is_empty(con->physical.path)) { - - if (!con->async_callback) { - - - char *qstr; - /* we only come here when we have the parse the full request again * * a HANDLER_COMEBACK from mod_rewrite and mod_fastcgi might be a @@ -295,6 +290,8 @@ handler_t http_response_prepare(server *srv, connection *con) { * * */ + if (!con->async_callback) { + config_cond_cache_reset(srv, con); config_setup_connection(srv, con); /* Perhaps this could be removed at other places. */ @@ -306,7 +303,7 @@ handler_t http_response_prepare(server *srv, connection *con) { * prepare strings * * - uri.path_raw - * - uri.path (secure) + * - uri.path * - uri.query * */ @@ -331,36 +328,79 @@ handler_t http_response_prepare(server *srv, connection *con) { buffer_copy_buffer(con->uri.authority, con->request.http_host); buffer_to_lower(con->uri.authority); - /** their might be a fragment which has to be cut away */ - if (NULL != (qstr = strchr(con->request.uri->ptr, '#'))) { - buffer_string_set_length(con->request.uri, qstr - con->request.uri->ptr); - } - - /** extract query string from request.uri */ - if (NULL != (qstr = strchr(con->request.uri->ptr, '?'))) { - buffer_copy_string (con->uri.query, qstr + 1); - buffer_copy_string_len(con->uri.path_raw, con->request.uri->ptr, qstr - con->request.uri->ptr); - } else { - buffer_reset (con->uri.query); + if (con->request.http_method == HTTP_METHOD_CONNECT + || (con->request.http_method == HTTP_METHOD_OPTIONS + && con->request.uri->ptr[0] == '*' + && con->request.uri->ptr[1] == '\0')) { + /* CONNECT ... (or) OPTIONS * ... */ buffer_copy_buffer(con->uri.path_raw, con->request.uri); - } + buffer_copy_buffer(con->uri.path, con->uri.path_raw); + buffer_reset(con->uri.query); + } else { + char *qstr; + if (con->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE) { + /*size_t len = buffer_string_length(con->request.uri);*/ + int qs = burl_normalize(con->request.uri, srv->tmp_buf, con->conf.http_parseopts); + if (-2 == qs) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "invalid character in URI -> 400", + con->request.uri); + con->keep_alive = 0; + con->http_status = 400; /* Bad Request */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + qstr = (-1 == qs) ? NULL : con->request.uri->ptr+qs; + #if 0 /* future: might enable here, or below for all requests */ + /* (Note: total header size not recalculated on HANDLER_COMEBACK + * even if other request headers changed during processing) + * (If (0 != con->loops_per_request), then the generated request + * is too large. Should a different error be returned?) */ + con->header_len -= len; + len = buffer_string_length(con->request.uri); + con->header_len += len; + if (len > MAX_HTTP_REQUEST_URI) { + con->keep_alive = 0; + con->http_status = 414; /* Request-URI Too Long */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + if (con->header_len > MAX_HTTP_REQUEST_HEADER) { + log_error_write(srv, __FILE__, __LINE__, "sds", + "request header fields too large:", con->header_len, "-> 431"); + con->keep_alive = 0; + con->http_status = 431; /* Request Header Fields Too Large */ + con->file_finished = 1; + return HANDLER_FINISHED; + } + #endif + } else { + qstr = strchr(con->request.uri->ptr, '#');/* discard fragment */ + if (qstr) buffer_string_set_length(con->request.uri, qstr - con->request.uri->ptr); + qstr = strchr(con->request.uri->ptr, '?'); + } - /* decode url to path - * - * - decode url-encodings (e.g. %20 -> ' ') - * - remove path-modifiers (e.g. /../) - */ + /** extract query string from request.uri */ + if (NULL != qstr) { + const char * const pstr = con->request.uri->ptr; + const size_t plen = (size_t)(qstr - pstr); + const size_t rlen = buffer_string_length(con->request.uri); + buffer_copy_string_len(con->uri.query, qstr + 1, rlen - plen - 1); + buffer_copy_string_len(con->uri.path_raw, pstr, plen); + } else { + buffer_reset(con->uri.query); + buffer_copy_buffer(con->uri.path_raw, con->request.uri); + } + + /* decode url to path + * + * - decode url-encodings (e.g. %20 -> ' ') + * - remove path-modifiers (e.g. /../) + */ - if (con->request.http_method == HTTP_METHOD_OPTIONS && - con->uri.path_raw->ptr[0] == '*' && con->uri.path_raw->ptr[1] == '\0') { - /* OPTIONS * ... */ - buffer_copy_buffer(con->uri.path, con->uri.path_raw); - } else if (con->request.http_method == HTTP_METHOD_CONNECT) { buffer_copy_buffer(con->uri.path, con->uri.path_raw); - } else { - buffer_copy_buffer(srv->tmp_buf, con->uri.path_raw); - buffer_urldecode_path(srv->tmp_buf); - buffer_path_simplify(con->uri.path, srv->tmp_buf); + buffer_urldecode_path(con->uri.path); + buffer_path_simplify(con->uri.path, con->uri.path); } con->conditional_is_valid[COMP_SERVER_SOCKET] = 1; /* SERVERsocket */ diff --git a/src/server.c b/src/server.c index 8a53ac55..404fd1c7 100644 --- a/src/server.c +++ b/src/server.c @@ -2,14 +2,12 @@ #include "server.h" #include "buffer.h" +#include "burl.h" #include "network.h" #include "log.h" #include "rand.h" -#include "response.h" -#include "request.h" #include "chunk.h" #include "http_auth.h" -#include "http_chunk.h" #include "http_vhostdb.h" #include "fdevent.h" #include "connections.h" @@ -287,6 +285,13 @@ static server *server_init(void) { srv->srvconf.http_header_strict = 1; srv->srvconf.http_host_strict = 1; /*(implies http_host_normalize)*/ srv->srvconf.http_host_normalize = 0; + srv->srvconf.http_url_normalize = HTTP_PARSEOPT_URL_NORMALIZE + | HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED + | HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE + | HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + srv->srvconf.http_url_normalize = 0; /* temporary; change in future */ srv->srvconf.high_precision_timestamps = 0; srv->srvconf.max_request_field_size = 8192; srv->srvconf.loadavg[0] = 0.0; diff --git a/src/t/test_burl.c b/src/t/test_burl.c new file mode 100644 index 00000000..e83bebea --- /dev/null +++ b/src/t/test_burl.c @@ -0,0 +1,142 @@ +#include "first.h" + +#include +#include + +#include "burl.h" + +static void run_burl_normalize (buffer *psrc, buffer *ptmp, int flags, int line, const char *in, size_t in_len, const char *out, size_t out_len) { + int qs; + buffer_copy_string_len(psrc, in, in_len); + qs = burl_normalize(psrc, ptmp, flags); + if (out_len == (size_t)-2) { + if (-2 == qs) return; + fprintf(stderr, + "%s.%d: %s('%s') failed: expected error, got '%s'\n", + __FILE__, line, __func__+4, in, psrc->ptr); + } + else { + if (buffer_is_equal_string(psrc, out, out_len)) return; + fprintf(stderr, + "%s.%d: %s('%s') failed: expected '%s', got '%s'\n", + __FILE__, line, __func__+4, in, out, psrc->ptr); + } + fflush(stderr); + abort(); +} + +static void test_burl_normalize (void) { + buffer *psrc = buffer_init(); + buffer *ptmp = buffer_init(); + int flags; + + flags = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/"), CONST_STR_LEN("/abc/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/def"), CONST_STR_LEN("/abc/def")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?"), CONST_STR_LEN("/abc?")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d"), CONST_STR_LEN("/abc?d")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d="), CONST_STR_LEN("/abc?d=")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e"), CONST_STR_LEN("/abc?d=e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&"), CONST_STR_LEN("/abc?d=e&")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f"), CONST_STR_LEN("/abc?d=e&f")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#any"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2F"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2f"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%20"), CONST_STR_LEN("/%20")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2b"), CONST_STR_LEN("/%2B")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2B"), CONST_STR_LEN("/%2B")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3a"), CONST_STR_LEN("/%3A")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3A"), CONST_STR_LEN("/%3A")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/~test%20ä_"), CONST_STR_LEN("/~test%20%C3%A4_")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\375"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\376"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), "", (size_t)-2); + + flags = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/"), CONST_STR_LEN("/abc/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/def"), CONST_STR_LEN("/abc/def")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?"), CONST_STR_LEN("/abc?")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d"), CONST_STR_LEN("/abc?d")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d="), CONST_STR_LEN("/abc?d=")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e"), CONST_STR_LEN("/abc?d=e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&"), CONST_STR_LEN("/abc?d=e&")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f"), CONST_STR_LEN("/abc?d=e&f")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc?d=e&f=g#any"), CONST_STR_LEN("/abc?d=e&f=g")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2F"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2f"), CONST_STR_LEN("/%2F")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%20"), CONST_STR_LEN("/%20")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2b"), CONST_STR_LEN("/+")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%2B"), CONST_STR_LEN("/+")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3a"), CONST_STR_LEN("/:")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3A"), CONST_STR_LEN("/:")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/~test%20ä_"), CONST_STR_LEN("/~test%20%C3%A4_")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\375"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\376"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), "", (size_t)-2); + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\a"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\t"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\r"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\177"), "", (size_t)-2); + + #if defined(__WIN32) || defined(__CYGWIN__) + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_BACKSLASH_TRANS; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a\\b"), CONST_STR_LEN("/a/b")); + #endif + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=/"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2Fb"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb?c=/"), CONST_STR_LEN("/a/b?c=/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2fb"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a%2Fb"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=%2f"), CONST_STR_LEN("/a/b?c=/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("./a/b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("../a/b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/./b"), CONST_STR_LEN("/a/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b"), CONST_STR_LEN("/b")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/."), CONST_STR_LEN("/a/b/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/.."), CONST_STR_LEN("/a/")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b/.."), CONST_STR_LEN("/")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("./a/b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("../a/b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/./b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/../b"), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/."), "", (size_t)-2); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b/.."), "", (size_t)-2); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT; + + flags |= HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=d+e"), CONST_STR_LEN("/a/b?c=d+e")); + run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/a/b?c=d%20e"), CONST_STR_LEN("/a/b?c=d+e")); + flags &= ~HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS; + + buffer_free(psrc); + buffer_free(ptmp); +} + +int main (void) { + test_burl_normalize(); + return 0; +} diff --git a/src/t/test_request.c b/src/t/test_request.c index b73127e1..43903464 100644 --- a/src/t/test_request.c +++ b/src/t/test_request.c @@ -5,6 +5,7 @@ #include #include "base.h" +#include "burl.h" static void test_request_connection_reset(connection *con) {