lighttpd 1.4.x https://www.lighttpd.net/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1310 lines
36 KiB

#include "first.h"
#include "request.h"
#include "keyvalue.h"
#include "log.h"
#include <sys/stat.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
static int request_check_hostname(buffer *host) {
enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL;
size_t i;
int label_len = 0;
size_t host_len, hostport_len;
char *colon;
int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */
int level = 0;
/*
* hostport = host [ ":" port ]
* host = hostname | IPv4address | IPv6address
* hostname = *( domainlabel "." ) toplabel [ "." ]
* domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
* toplabel = alpha | alpha *( alphanum | "-" ) alphanum
* IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
* IPv6address = "[" ... "]"
* port = *digit
*/
/* IPv6 adress */
if (host->ptr[0] == '[') {
char *c = host->ptr + 1;
int colon_cnt = 0;
/* check the address inside [...] */
for (; *c && *c != ']'; c++) {
if (*c == ':') {
if (++colon_cnt > 7) {
return -1;
}
} else if (!light_isxdigit(*c) && '.' != *c) {
return -1;
}
}
/* missing ] */
if (!*c) {
return -1;
}
/* check port */
if (*(c+1) == ':') {
for (c += 2; *c; c++) {
if (!light_isdigit(*c)) {
return -1;
}
}
}
else if ('\0' != *(c+1)) {
/* only a port is allowed to follow [...] */
return -1;
}
return 0;
}
hostport_len = host_len = buffer_string_length(host);
if (NULL != (colon = memchr(host->ptr, ':', host_len))) {
char *c = colon + 1;
/* check portnumber */
for (; *c; c++) {
if (!light_isdigit(*c)) return -1;
}
/* remove the port from the host-len */
host_len = colon - host->ptr;
}
/* Host is empty */
if (host_len == 0) return -1;
/* if the hostname ends in a "." strip it */
if (host->ptr[host_len-1] == '.') {
/* shift port info one left */
if (NULL != colon) memmove(colon-1, colon, hostport_len - host_len);
buffer_string_set_length(host, --hostport_len);
if (--host_len == 0) return -1;
}
/* scan from the right and skip the \0 */
for (i = host_len; i-- > 0; ) {
const char c = host->ptr[i];
switch (stage) {
case TOPLABEL:
if (c == '.') {
/* only switch stage, if this is not the last character */
if (i != host_len - 1) {
if (label_len == 0) {
return -1;
}
/* check the first character at right of the dot */
if (is_ip == 0) {
if (!light_isalnum(host->ptr[i+1])) {
return -1;
}
} else if (!light_isdigit(host->ptr[i+1])) {
is_ip = 0;
} else if ('-' == host->ptr[i+1]) {
return -1;
} else {
/* just digits */
is_ip = 1;
}
stage = DOMAINLABEL;
label_len = 0;
level++;
} else if (i == 0) {
/* just a dot and nothing else is evil */
return -1;
}
} else if (i == 0) {
/* the first character of the hostname */
if (!light_isalnum(c)) {
return -1;
}
label_len++;
} else {
if (c != '-' && !light_isalnum(c)) {
return -1;
}
if (is_ip == -1) {
if (!light_isdigit(c)) is_ip = 0;
}
label_len++;
}
break;
case DOMAINLABEL:
if (is_ip == 1) {
if (c == '.') {
if (label_len == 0) {
return -1;
}
label_len = 0;
level++;
} else if (!light_isdigit(c)) {
return -1;
} else {
label_len++;
}
} else {
if (c == '.') {
if (label_len == 0) {
return -1;
}
/* c is either - or alphanum here */
if ('-' == host->ptr[i+1]) {
return -1;
}
label_len = 0;
level++;
} else if (i == 0) {
if (!light_isalnum(c)) {
return -1;
}
label_len++;
} else {
if (c != '-' && !light_isalnum(c)) {
return -1;
}
label_len++;
}
}
break;
}
}
/* a IP has to consist of 4 parts */
if (is_ip == 1 && level != 3) {
return -1;
}
if (label_len == 0) {
return -1;
}
return 0;
}
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
int http_request_host_normalize(buffer *b) {
/*
* check for and canonicalize numeric IP address and portnum (optional)
* (IP address may be followed by ":portnum" (optional))
* - IPv6: "[...]"
* - IPv4: "x.x.x.x"
* - IPv4: 12345678 (32-bit decimal number)
* - IPv4: 012345678 (32-bit octal number)
* - IPv4: 0x12345678 (32-bit hex number)
*
* allow any chars (except ':' and '\0' and stray '[' or ']')
* (other code may check chars more strictly or more pedantically)
* ':' delimits (optional) port at end of string
* "[]" wraps IPv6 address literal
* '\0' should have been rejected earlier were it present
*
* any chars includes, but is not limited to:
* - allow '-' any where, even at beginning of word
* (security caution: might be confused for cmd flag if passed to shell)
* - allow all-digit TLDs
* (might be mistaken for IPv4 addr by inet_aton()
* unless non-digits appear in subdomain)
*/
/* Note: not using getaddrinfo() since it does not support "[]" around IPv6
* and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
* Not using inet_pton() (when available) on IPv4 for similar reasons. */
const char * const p = b->ptr;
const size_t blen = buffer_string_length(b);
long port = 0;
if (*p != '[') {
char * const colon = (char *)memchr(p, ':', blen);
if (colon) {
if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
if (colon[1] != '\0') {
char *e;
port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
if (0 < port && port <= USHRT_MAX && *e == '\0') {
/* valid port */
} else {
return -1;
}
} /*(else ignore stray colon at string end)*/
buffer_string_set_length(b, (size_t)(colon - p)); /*(remove port str)*/
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
}
if (light_isdigit(*p)) {
/* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
struct in_addr addr;
#if defined(HAVE_INET_ATON) /*(Windows does not provide inet_aton())*/
if (0 != inet_aton(p, &addr))
#else
if ((addr.s_addr = inet_addr(p)) != INADDR_NONE)
#endif
{
#if defined(HAVE_INET_PTON)/*(expect inet_ntop() if inet_pton())*/
#ifndef INET_ADDRSTRLEN
#define INET_ADDRSTRLEN 16
#endif
char buf[INET_ADDRSTRLEN];
inet_ntop(AF_INET, (const void *)&addr, buf, sizeof(buf));
buffer_copy_string(b, buf);
#else
buffer_copy_string(b, inet_ntoa(addr)); /*(not thread-safe)*/
#endif
}
}
} else { /* IPv6 addr */
#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
struct in6_addr addr;
char *bracket = b->ptr+blen-1;
char *percent = strchr(b->ptr+1, '%');
size_t len;
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
int rc;
char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
if (blen <= 2) return -1; /*(invalid "[]")*/
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
if (*bracket != ']') {
bracket = (char *)memchr(b->ptr+1, ']', blen-1);
if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){
return -1;
}
if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
char *e;
port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
if (0 < port && port <= USHRT_MAX && *e == '\0') {
/* valid port */
} else {
return -1;
}
}
}
*bracket = '\0';/*(terminate IPv6 string)*/
if (percent) *percent = '\0'; /*(remove %interface from address)*/
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
rc = inet_pton(AF_INET6, b->ptr+1, &addr);
if (percent) *percent = '%'; /*(restore %interface)*/
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
*bracket = ']'; /*(restore bracket)*/
if (1 != rc) return -1;
inet_ntop(AF_INET6,(const void *)&addr, buf, sizeof(buf));
len = strlen(buf);
if (percent) {
if (percent > bracket) return -1;
if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
memcpy(buf+len, percent, (size_t)(bracket - percent));
len += (size_t)(bracket - percent);
}
buffer_string_set_length(b, 1); /* truncate after '[' */
buffer_append_string_len(b, buf, len);
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
buffer_append_string_len(b, CONST_STR_LEN("]"));
#else
return -1;
#endif
}
if (port) {
buffer_append_string_len(b, CONST_STR_LEN(":"));
buffer_append_int(b, (int)port);
}
return 0;
}
#if 0
#define DUMP_HEADER
#endif
static int http_request_split_value(array *vals, buffer *b) {
size_t i, len;
int state = 0;
const char *current;
const char *token_start = NULL, *token_end = NULL;
/*
* parse
*
* val1, val2, val3, val4
*
* into a array (more or less a explode() incl. striping of whitespaces
*/
if (buffer_string_is_empty(b)) return 0;
current = b->ptr;
len = buffer_string_length(b);
for (i = 0; i <= len; ++i, ++current) {
data_string *ds;
switch (state) {
case 0: /* find start of a token */
switch (*current) {
case ' ':
case '\t': /* skip white space */
case ',': /* skip empty token */
break;
case '\0': /* end of string */
return 0;
default:
/* found real data, switch to state 1 to find the end of the token */
token_start = token_end = current;
state = 1;
break;
}
break;
case 1: /* find end of token and last non white space character */
switch (*current) {
case ' ':
case '\t':
/* space - don't update token_end */
break;
case ',':
case '\0': /* end of string also marks the end of a token */
if (NULL == (ds = (data_string *)array_get_unused_element(vals, TYPE_STRING))) {
ds = data_string_init();
}
buffer_copy_string_len(ds->value, token_start, token_end-token_start+1);
array_insert_unique(vals, (data_unset *)ds);
state = 0;
break;
default:
/* no white space, update token_end to include current character */
token_end = current;
break;
}
break;
}
}
return 0;
}
static int request_uri_is_valid_char(unsigned char c) {
if (c <= 32) return 0;
if (c == 127) return 0;
if (c == 255) return 0;
return 1;
}
int http_request_parse(server *srv, connection *con) {
char *uri = NULL, *proto = NULL, *method = NULL, con_length_set;
int is_key = 1, key_len = 0, is_ws_after_key = 0, in_folding;
char *value = NULL, *key = NULL;
char *reqline_host = NULL;
int reqline_hostlen = 0;
enum { HTTP_CONNECTION_UNSET, HTTP_CONNECTION_KEEPALIVE, HTTP_CONNECTION_CLOSE } keep_alive_set = HTTP_CONNECTION_UNSET;
int line = 0;
int request_line_stage = 0;
size_t i, first, ilen;
int done = 0;
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
const unsigned int http_header_strict = (con->conf.http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
/*
* Request: "^(GET|POST|HEAD) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
* Option : "^([-a-zA-Z]+): (.+)$"
* End : "^$"
*/
if (con->conf.log_request_header) {
log_error_write(srv, __FILE__, __LINE__, "sdsdSb",
"fd:", con->fd,
"request-len:", buffer_string_length(con->request.request),
"\n", con->request.request);
}
if (con->request_count > 1 &&
con->request.request->ptr[0] == '\r' &&
con->request.request->ptr[1] == '\n') {
/* we are in keep-alive and might get \r\n after a previous POST request.*/
buffer_copy_string_len(con->parse_request, con->request.request->ptr + 2, buffer_string_length(con->request.request) - 2);
} else {
/* fill the local request buffer */
fix buffer, chunk and http_chunk API * remove unused structs and functions (buffer_array, read_buffer) * change return type from int to void for many functions, as the return value (indicating error/success) was never checked, and the function would only fail on programming errors and not on invalid input; changed functions to use force_assert instead of returning an error. * all "len" parameters now are the real size of the memory to be read. the length of strings is given always without the terminating 0. * the "buffer" struct still counts the terminating 0 in ->used, provide buffer_string_length() to get the length of a string in a buffer. unset config "strings" have used == 0, which is used in some places to distinguish unset values from "" (empty string) values. * most buffer usages should now use it as string container. * optimise some buffer copying by "moving" data to other buffers * use (u)intmax_t for generic int-to-string functions * remove unused enum values: UNUSED_CHUNK, ENCODING_UNSET * converted BUFFER_APPEND_SLASH to inline function (no macro feature needed) * refactor: create chunkqueue_steal: moving (partial) chunks into another queue * http_chunk: added separate function to terminate chunked body instead of magic handling in http_chunk_append_mem(). http_chunk_append_* now handle empty chunks, and never terminate the chunked body. From: Stefan Bühler <stbuehler@web.de> git-svn-id: svn://svn.lighttpd.net/lighttpd/branches/lighttpd-1.4.x@2975 152afb58-edef-0310-8abb-c4023f1b3aa9
7 years ago
buffer_copy_buffer(con->parse_request, con->request.request);
}
keep_alive_set = 0;
con_length_set = 0;
/* parse the first line of the request
*
* should be:
*
* <method> <uri> <protocol>\r\n
* */
ilen = buffer_string_length(con->parse_request);
for (i = 0, first = 0; i < ilen && line == 0; i++) {
switch(con->parse_request->ptr[i]) {
case '\r':
if (con->parse_request->ptr[i+1] == '\n') {
http_method_t r;
char *nuri = NULL;
size_t j, jlen;
/* \r\n -> \0\0 */
con->parse_request->ptr[i] = '\0';
con->parse_request->ptr[i+1] = '\0';
buffer_copy_string_len(con->request.request_line, con->parse_request->ptr, i);
if (request_line_stage != 2) {
con->http_status = 400;
con->response.keep_alive = 0;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "incomplete request line -> 400");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
proto = con->parse_request->ptr + first;
*(uri - 1) = '\0';
*(proto - 1) = '\0';
/* we got the first one :) */
if (HTTP_METHOD_UNSET == (r = get_http_method_key(method))) {
con->http_status = 501;
con->response.keep_alive = 0;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "unknown http-method -> 501");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
con->request.http_method = r;
/*
* RFC2616 says:
*
* HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT
*
* */
if (0 == strncmp(proto, "HTTP/", sizeof("HTTP/") - 1)) {
char * major = proto + sizeof("HTTP/") - 1;
char * minor = strchr(major, '.');
char *err = NULL;
int major_num = 0, minor_num = 0;
int invalid_version = 0;
if (NULL == minor || /* no dot */
minor == major || /* no major */
*(minor + 1) == '\0' /* no minor */) {
invalid_version = 1;
} else {
*minor = '\0';
major_num = strtol(major, &err, 10);
if (*err != '\0') invalid_version = 1;
*minor++ = '.';
minor_num = strtol(minor, &err, 10);
if (*err != '\0') invalid_version = 1;
}
if (invalid_version) {
con->http_status = 400;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "unknown protocol -> 400");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
if (major_num == 1 && minor_num == 1) {
con->request.http_version = con->conf.allow_http11 ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
} else if (major_num == 1 && minor_num == 0) {
con->request.http_version = HTTP_VERSION_1_0;
} else {
con->http_status = 505;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "unknown HTTP version -> 505");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
} else {
con->http_status = 400;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "unknown protocol -> 400");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
if (0 == strncmp(uri, "http://", 7) &&
NULL != (nuri = strchr(uri + 7, '/'))) {
reqline_host = uri + 7;
reqline_hostlen = nuri - reqline_host;
buffer_copy_string_len(con->request.uri, nuri, proto - nuri - 1);
} else if (0 == strncmp(uri, "https://", 8) &&
NULL != (nuri = strchr(uri + 8, '/'))) {
reqline_host = uri + 8;
reqline_hostlen = nuri - reqline_host;
buffer_copy_string_len(con->request.uri, nuri, proto - nuri - 1);
} else {
/* everything looks good so far */
buffer_copy_string_len(con->request.uri, uri, proto - uri - 1);
}
/* check uri for invalid characters */
jlen = buffer_string_length(con->request.uri);
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
if (http_header_strict) {
for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ;
} else {
char *z = memchr(con->request.uri->ptr, '\0', jlen);
j = (NULL == z) ? jlen : (size_t)(z - con->request.uri->ptr);
}
if (j < jlen) {
con->http_status = 400;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
[config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016
6 years ago
unsigned char buf[2];
buf[0] = con->request.uri->ptr[j];
buf[1] = '\0';
if (con->request.uri->ptr[j] > 32 &&
con->request.uri->ptr[j] != 127) {
/* the character is printable -> print it */
log_error_write(srv, __FILE__, __LINE__, "ss",
"invalid character in URI -> 400",
buf);
} else {
/* a control-character, print ascii-code */
log_error_write(srv, __FILE__, __LINE__, "sd",
"invalid character in URI -> 400",
con->request.uri->ptr[j]);
}
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
fix buffer, chunk and http_chunk API * remove unused structs and functions (buffer_array, read_buffer) * change return type from int to void for many functions, as the return value (indicating error/success) was never checked, and the function would only fail on programming errors and not on invalid input; changed functions to use force_assert instead of returning an error. * all "len" parameters now are the real size of the memory to be read. the length of strings is given always without the terminating 0. * the "buffer" struct still counts the terminating 0 in ->used, provide buffer_string_length() to get the length of a string in a buffer. unset config "strings" have used == 0, which is used in some places to distinguish unset values from "" (empty string) values. * most buffer usages should now use it as string container. * optimise some buffer copying by "moving" data to other buffers * use (u)intmax_t for generic int-to-string functions * remove unused enum values: UNUSED_CHUNK, ENCODING_UNSET * converted BUFFER_APPEND_SLASH to inline function (no macro feature needed) * refactor: create chunkqueue_steal: moving (partial) chunks into another queue * http_chunk: added separate function to terminate chunked body instead of magic handling in http_chunk_append_mem(). http_chunk_append_* now handle empty chunks, and never terminate the chunked body. From: Stefan Bühler <stbuehler@web.de> git-svn-id: svn://svn.lighttpd.net/lighttpd/branches/lighttpd-1.4.x@2975 152afb58-edef-0310-8abb-c4023f1b3aa9
7 years ago
buffer_copy_buffer(con->request.orig_uri, con->request.uri);
con->http_status = 0;
i++;
line++;
first = i+1;
}
break;
case ' ':
switch(request_line_stage) {
case 0:
/* GET|POST|... */
method = con->parse_request->ptr + first;
first = i + 1;
break;
case 1:
/* /foobar/... */
uri = con->parse_request->ptr + first;
first = i + 1;
break;
default:
/* ERROR, one space to much */
con->http_status = 400;
con->response.keep_alive = 0;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "overlong request line -> 400");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
request_line_stage++;
break;
}
}
in_folding = 0;
if (buffer_string_is_empty(con->request.uri)) {
con->http_status = 400;
con->response.keep_alive = 0;
con->keep_alive = 0;
if (srv->srvconf.log_request_header_on_error) {
log_error_write(srv, __FILE__, __LINE__, "s", "no uri specified -> 400");
log_error_write(srv, __FILE__, __LINE__, "Sb",
"request-header:\n",
con->request.request);
}
return 0;
}
if (reqline_host) {
/* Insert as host header */
data_string *ds;
if (NULL == (ds = (data_string *)array_get_unused_element(con->request.headers, TYPE_STRING))) {
ds = data_string_init();
}
buffer_copy_string_len(ds->key, CONST_STR_LEN("Host"));
buffer_copy_string_len(ds->value, reqline_host, reqline_hostlen);
array_insert_unique(con->request.headers, (data_unset *)ds);
con->request.http_host = ds->value;
}
for (; i <= ilen && !done; i++) {
char *cur = con->parse_request->ptr + i;
if (is_key) {
size_t j;
int got_colon = 0;