diff --git a/src/base.h b/src/base.h index f3e5c7d3..871e68a6 100644 --- a/src/base.h +++ b/src/base.h @@ -300,6 +300,7 @@ typedef struct { unsigned short etag_use_mtime; unsigned short etag_use_size; unsigned short force_lowercase_filenames; /* if the FS is case-insensitive, force all files to lower-case */ + unsigned int http_parseopts; unsigned int max_request_size; int listen_backlog; @@ -550,6 +551,10 @@ typedef struct { unsigned short enable_cores; unsigned short reject_expect_100_with_417; buffer *xattr_name; + + unsigned short http_header_strict; + unsigned short http_host_strict; + unsigned short http_host_normalize; } server_config; typedef struct server_socket { diff --git a/src/configfile.c b/src/configfile.c index 7198b964..11f1cc7f 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -8,6 +8,7 @@ #include "configparser.h" #include "configfile.h" #include "proc_open.h" +#include "request.h" #include @@ -114,6 +115,9 @@ static int config_insert(server *srv) { { "mimetype.xattr-name", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 69 */ { "server.listen-backlog", NULL, T_CONFIG_INT, T_CONFIG_SCOPE_CONNECTION }, /* 70 */ { "server.error-handler-404", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION }, /* 71 */ + { "server.http-parseopt-header-strict",NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 72 */ + { "server.http-parseopt-host-strict", NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 73 */ + { "server.http-parseopt-host-normalize",NULL,T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 74 */ { "server.host", "use server.bind instead", @@ -178,6 +182,9 @@ static int config_insert(server *srv) { cv[68].destination = &(srv->srvconf.upload_temp_file_size); cv[69].destination = srv->srvconf.xattr_name; + cv[72].destination = &(srv->srvconf.http_header_strict); + cv[73].destination = &(srv->srvconf.http_host_strict); + cv[74].destination = &(srv->srvconf.http_host_normalize); srv->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *)); @@ -300,6 +307,15 @@ static int config_insert(server *srv) { } } + { + specific_config *s = srv->config_storage[0]; + s->http_parseopts= /*(global, but stored in con->conf.http_parseopts)*/ + (srv->srvconf.http_header_strict ?(HTTP_PARSEOPT_HEADER_STRICT) :0) + |(srv->srvconf.http_host_strict ?(HTTP_PARSEOPT_HOST_STRICT + |HTTP_PARSEOPT_HOST_NORMALIZE):0) + |(srv->srvconf.http_host_normalize ?(HTTP_PARSEOPT_HOST_NORMALIZE):0); + } + if (buffer_string_is_empty(stat_cache_string)) { srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; } else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("simple"))) { @@ -391,6 +407,8 @@ static int config_insert(server *srv) { int config_setup_connection(server *srv, connection *con) { specific_config *s = srv->config_storage[0]; + PATCH(http_parseopts); + PATCH(allow_http11); PATCH(mimetypes); PATCH(document_root); diff --git a/src/request.c b/src/request.c index 08047b55..77b90fea 100644 --- a/src/request.c +++ b/src/request.c @@ -12,7 +12,7 @@ #include #include -static int request_check_hostname(server *srv, connection *con, buffer *host) { +static int request_check_hostname(buffer *host) { enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL; size_t i; int label_len = 0; @@ -21,9 +21,6 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */ int level = 0; - UNUSED(srv); - UNUSED(con); - /* * hostport = host [ ":" port ] * host = hostname | IPv4address | IPv6address @@ -35,9 +32,6 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { * port = *digit */ - /* no Host: */ - if (buffer_is_empty(host)) return 0; - host_len = buffer_string_length(host); /* IPv6 adress */ @@ -209,6 +203,124 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { return 0; } +int http_request_host_normalize(buffer *b) { + /* + * check for and canonicalize numeric IP address and portnum (optional) + * (IP address may be followed by ":portnum" (optional)) + * - IPv6: "[...]" + * - IPv4: "x.x.x.x" + * - IPv4: 12345678 (32-bit decimal number) + * - IPv4: 012345678 (32-bit octal number) + * - IPv4: 0x12345678 (32-bit hex number) + * + * allow any chars (except ':' and '\0' and stray '[' or ']') + * (other code may check chars more strictly or more pedantically) + * ':' delimits (optional) port at end of string + * "[]" wraps IPv6 address literal + * '\0' should have been rejected earlier were it present + * + * any chars includes, but is not limited to: + * - allow '-' any where, even at beginning of word + * (security caution: might be confused for cmd flag if passed to shell) + * - allow all-digit TLDs + * (might be mistaken for IPv4 addr by inet_aton() + * unless non-digits appear in subdomain) + */ + + /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 + * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. + * Not using inet_pton() (when available) on IPv4 for similar reasons. */ + + const char * const p = b->ptr; + const size_t blen = buffer_string_length(b); + long port = 0; + + if (*p != '[') { + char * const colon = (char *)memchr(p, ':', blen); + if (colon) { + if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ + if (colon[1] != '\0') { + char *e; + port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ + if (0 < port && port <= USHRT_MAX && *e == '\0') { + /* valid port */ + } else { + return -1; + } + } /*(else ignore stray colon at string end)*/ + buffer_commit(b, (size_t)(colon - p)); /*(remove port str)*/ + } + + if (light_isdigit(*p)) { + /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ + struct in_addr addr; + #if defined(HAVE_INET_ATON) /*(Windows does not provide inet_aton())*/ + if (0 != inet_aton(p, &addr)) + #else + if ((addr.s_addr = inet_addr(p)) != INADDR_NONE) + #endif + { + #if defined(HAVE_INET_PTON)/*(expect inet_ntop() if inet_pton())*/ + #ifndef INET_ADDRSTRLEN + #define INET_ADDRSTRLEN 16 + #endif + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, (const void *)&addr, buf, sizeof(buf)); + buffer_copy_string(b, buf); + #else + buffer_copy_string(b, inet_ntoa(addr)); /*(not thread-safe)*/ + #endif + } + } + } else { /* IPv6 addr */ + #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) + + struct in6_addr addr; + char *bracket = b->ptr+blen-1; + int rc; + char buf[INET6_ADDRSTRLEN]; + if (blen == 2) return -1; /*(invalid "[]")*/ + if (*bracket != ']') { + bracket = (char *)memchr(b->ptr+1, ']', blen-1); + if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ + return -1; + } + if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ + char *e; + port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ + if (0 < port && port <= USHRT_MAX && *e == '\0') { + /* valid port */ + } else { + return -1; + } + } + } + + *bracket = '\0';/*(terminate IPv6 string)*/ + rc = inet_pton(AF_INET6, b->ptr+1, &addr); + *bracket = ']'; /*(restore bracket)*/ + if (1 != rc) return -1; + + inet_ntop(AF_INET6,(const void *)&addr, buf, sizeof(buf)); + buffer_commit(b, 1); /* truncate after '[' */ + buffer_append_string(b, buf); + buffer_append_string_len(b, CONST_STR_LEN("]")); + + #else + + return -1; + + #endif + } + + if (port) { + buffer_append_string_len(b, CONST_STR_LEN(":")); + buffer_append_int(b, (int)port); + } + + return 0; +} + #if 0 #define DUMP_HEADER #endif @@ -302,6 +414,7 @@ int http_request_parse(server *srv, connection *con) { size_t i, first, ilen; int done = 0; + const unsigned int http_header_strict = (con->conf.http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); /* * Request: "^(GET|POST|HEAD) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" @@ -478,13 +591,18 @@ int http_request_parse(server *srv, connection *con) { /* check uri for invalid characters */ jlen = buffer_string_length(con->request.uri); - for (j = 0; j < jlen; j++) { - if (!request_uri_is_valid_char(con->request.uri->ptr[j])) { - unsigned char buf[2]; + if (http_header_strict) { + for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ; + } else { + char *z = memchr(con->request.uri->ptr, '\0', jlen); + j = (NULL == z) ? jlen : (size_t)(z - con->request.uri->ptr); + } + if (j < jlen) { con->http_status = 400; con->keep_alive = 0; if (srv->srvconf.log_request_header_on_error) { + unsigned char buf[2]; buf[0] = con->request.uri->ptr[j]; buf[1] = '\0'; @@ -507,7 +625,6 @@ int http_request_parse(server *srv, connection *con) { } return 0; - } } buffer_copy_buffer(con->request.orig_uri, con->request.uri); @@ -705,7 +822,7 @@ int http_request_parse(server *srv, connection *con) { } break; default: - if (*cur < 32 || ((unsigned char)*cur) >= 127) { + if (http_header_strict ? (*cur < 32 || ((unsigned char)*cur) >= 127) : *cur == '\0') { con->http_status = 400; con->keep_alive = 0; con->response.keep_alive = 0; @@ -1028,9 +1145,9 @@ int http_request_parse(server *srv, connection *con) { case '\t': /* strip leading WS */ if (value == cur) value = cur+1; - /* fallthrough */ + break; default: - if (*cur >= 0 && *cur < 32 && *cur != '\t') { + if (http_header_strict ? (*cur >= 0 && *cur < 32) : *cur == '\0') { if (srv->srvconf.log_request_header_on_error) { log_error_write(srv, __FILE__, __LINE__, "sds", "invalid char in header", (int)*cur, "-> 400"); @@ -1087,8 +1204,11 @@ int http_request_parse(server *srv, connection *con) { } /* check hostname field if it is set */ - if (NULL != con->request.http_host && - 0 != request_check_hostname(srv, con, con->request.http_host)) { + if (!buffer_is_empty(con->request.http_host) && + (((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_STRICT) && + 0 != request_check_hostname(con->request.http_host)) + || ((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) && + 0 != http_request_host_normalize(con->request.http_host)))) { if (srv->srvconf.log_request_header_on_error) { log_error_write(srv, __FILE__, __LINE__, "s", diff --git a/src/request.h b/src/request.h index bb05bcd7..a3aeccdf 100644 --- a/src/request.h +++ b/src/request.h @@ -4,7 +4,14 @@ #include "server.h" +typedef enum { + HTTP_PARSEOPT_HEADER_STRICT = 1 + ,HTTP_PARSEOPT_HOST_STRICT = 2 + ,HTTP_PARSEOPT_HOST_NORMALIZE = 4 +} http_parseopts_e; + int http_request_parse(server *srv, connection *con); int http_request_header_finished(server *srv, connection *con); +int http_request_host_normalize(buffer *b); #endif diff --git a/src/server.c b/src/server.c index fe88799f..b3f36a75 100644 --- a/src/server.c +++ b/src/server.c @@ -277,6 +277,9 @@ static server *server_init(void) { srv->srvconf.upload_tempdirs = array_init(); srv->srvconf.reject_expect_100_with_417 = 1; srv->srvconf.xattr_name = buffer_init_string("Content-Type"); + srv->srvconf.http_header_strict = 1; + srv->srvconf.http_host_strict = 1; /*(implies http_host_normalize)*/ + srv->srvconf.http_host_normalize = 0; /* use syslog */ srv->errorlog_fd = STDERR_FILENO;