From 80638252dcdd3ed8aea276245a902f2ec47f2b4f Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Sun, 25 Nov 2018 19:07:53 -0500 Subject: [PATCH] [multiple] validate UTF-8 in url-decoded paths validate UTF-8 in url-decoded paths obtained elsewhere than from request (burl_normalize(), if enabled with server.http-parseopts, checks url for overlong encodings of ASCII chars in the HTTP request-line) --- src/buffer.c | 33 +++++++++++++++++++++++++++++++++ src/buffer.h | 1 + src/http-header-glue.c | 15 +++++++++++++++ src/mod_ssi.c | 20 +++++++++++++++----- src/mod_webdav.c | 5 +++++ 5 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/buffer.c b/src/buffer.c index da65a775..77d76a8e 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -772,6 +772,39 @@ void buffer_urldecode_query(buffer *url) { buffer_urldecode_internal(url, 1); } +int buffer_is_valid_UTF8(const buffer *b) { + /* https://www.w3.org/International/questions/qa-forms-utf-8 */ + const unsigned char *c = (unsigned char *)b->ptr; + while (*c) { + + /*(note: includes ctrls)*/ + if ( c[0] < 0x80 ) { ++c; continue; } + + if ( 0xc2 <= c[0] && c[0] <= 0xdf + && 0x80 <= c[1] && c[1] <= 0xbf ) { c+=2; continue; } + + if ( ( ( 0xe0 == c[0] + && 0xa0 <= c[1] && c[1] <= 0xbf) + || ( 0xe1 <= c[0] && c[0] <= 0xef && c[0] != 0xed + && 0x80 <= c[1] && c[1] <= 0xbf) + || ( 0xed == c[0] + && 0x80 <= c[1] && c[1] <= 0x9f) ) + && 0x80 <= c[2] && c[2] <= 0xbf ) { c+=3; continue; } + + if ( ( ( 0xf0 == c[0] + && 0x90 <= c[1] && c[1] <= 0xbf) + || ( 0xf1 <= c[0] && c[0] <= 0xf3 + && 0x80 <= c[1] && c[1] <= 0xbf) + || ( 0xf4 == c[0] + && 0x80 <= c[1] && c[1] <= 0x8f) ) + && 0x80 <= c[2] && c[2] <= 0xbf + && 0x80 <= c[3] && c[3] <= 0xbf ) { c+=4; continue; } + + return 0; /* invalid */ + } + return 1; /* valid */ +} + /* - special case: empty string returns empty string * - on windows or cygwin: replace \ with / * - strip leading spaces diff --git a/src/buffer.h b/src/buffer.h index 189d526c..733cb43a 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -134,6 +134,7 @@ void buffer_copy_string_encoded_cgi_varnames(buffer *b, const char *s, size_t s_ void buffer_urldecode_path(buffer *url); void buffer_urldecode_query(buffer *url); +int buffer_is_valid_UTF8(const buffer *b); void buffer_path_simplify(buffer *dest, buffer *src); void buffer_to_lower(buffer *b); diff --git a/src/http-header-glue.c b/src/http-header-glue.c index f7c39b1d..7ae7ddba 100644 --- a/src/http-header-glue.c +++ b/src/http-header-glue.c @@ -554,6 +554,15 @@ static void http_response_xsendfile (server *srv, connection *con, buffer *path, } buffer_urldecode_path(path); + if (!buffer_is_valid_UTF8(path)) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "X-Sendfile invalid UTF-8 after url-decode:", path); + if (con->http_status < 400) { + con->http_status = 502; + con->mode = DIRECT; + } + return; + } buffer_path_simplify(path, path); if (con->conf.force_lowercase_filenames) { buffer_to_lower(path); @@ -627,6 +636,12 @@ static void http_response_xsendfile2(server *srv, connection *con, const buffer for (pos = ++range; *pos && *pos != ' ' && *pos != ','; pos++) ; buffer_urldecode_path(b); + if (!buffer_is_valid_UTF8(b)) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "X-Sendfile2 invalid UTF-8 after url-decode:", b); + con->http_status = 502; + break; + } buffer_path_simplify(b, b); if (con->conf.force_lowercase_filenames) { buffer_to_lower(b); diff --git a/src/mod_ssi.c b/src/mod_ssi.c index cc032ee3..20984aa7 100644 --- a/src/mod_ssi.c +++ b/src/mod_ssi.c @@ -511,6 +511,11 @@ static int process_ssi_stmt(server *srv, connection *con, handler_ctx *p, const buffer_copy_string(srv->tmp_buf, file_path); buffer_urldecode_path(srv->tmp_buf); + if (!buffer_is_valid_UTF8(srv->tmp_buf)) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "SSI invalid UTF-8 after url-decode:", srv->tmp_buf); + break; + } buffer_path_simplify(srv->tmp_buf, srv->tmp_buf); buffer_append_string_buffer(p->stat_fn, srv->tmp_buf); } else { @@ -518,17 +523,22 @@ static int process_ssi_stmt(server *srv, connection *con, handler_ctx *p, const size_t remain; if (virt_path[0] == '/') { - buffer_copy_string(p->stat_fn, virt_path); + buffer_copy_string(srv->tmp_buf, virt_path); } else { /* there is always a / */ sl = strrchr(con->uri.path->ptr, '/'); - buffer_copy_string_len(p->stat_fn, con->uri.path->ptr, sl - con->uri.path->ptr + 1); - buffer_append_string(p->stat_fn, virt_path); + buffer_copy_string_len(srv->tmp_buf, con->uri.path->ptr, sl - con->uri.path->ptr + 1); + buffer_append_string(srv->tmp_buf, virt_path); } - buffer_urldecode_path(p->stat_fn); - buffer_path_simplify(srv->tmp_buf, p->stat_fn); + buffer_urldecode_path(srv->tmp_buf); + if (!buffer_is_valid_UTF8(srv->tmp_buf)) { + log_error_write(srv, __FILE__, __LINE__, "sb", + "SSI invalid UTF-8 after url-decode:", srv->tmp_buf); + break; + } + buffer_path_simplify(srv->tmp_buf, srv->tmp_buf); /* we have an uri */ diff --git a/src/mod_webdav.c b/src/mod_webdav.c index aa3e9106..1fde022e 100644 --- a/src/mod_webdav.c +++ b/src/mod_webdav.c @@ -1996,6 +1996,11 @@ static handler_t mod_webdav_copymove(server *srv, connection *con, plugin_data * } buffer_urldecode_path(p->uri.path); + if (!buffer_is_valid_UTF8(p->uri.path)) { + /* invalid UTF-8 after url-decode */ + con->http_status = 400; + return HANDLER_FINISHED; + } buffer_path_simplify(p->uri.path, p->uri.path); if (buffer_string_is_empty(p->uri.path) || p->uri.path->ptr[0] != '/') {