From 255269d799185186bda64c04bf7a2efeeffbb8d6 Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Sat, 30 Jun 2018 20:44:47 -0400 Subject: [PATCH] [mod_redirect,mod_rewrite] encoding options (fixes #443, fixes #911) Provide means to encode redirect and rewrite backreference substitutions In addition to $1 and %1, the following modifiers are now supported, followed by the number for the backreference, e.g. ${esc:1} ${noesc:...} no escaping ${esc:...} escape all non-alphanumeric - . _ ~ incl double-escape % ${escape:...} escape all non-alphanumeric - . _ ~ incl double-escape % ${escnde:...} escape all non-alphanumeric - . _ ~ but no double-esc % ${tolower:...} ${toupper:...} %{noesc:...} %{esc:...} %{escape:...} %{escnde:...} %{tolower:...} %{toupper:...} Provide means to substitute URI parts without needing a regex match (and can be preceded by encoding modifier, e.g. ${tolower:url.authority}) ${url.scheme} ${url.authority} ${url.port} ${url.path} ${url.query} ${qsa} appends query string, if not empty x-ref: "[PATCH] mod_redirect: Add support for url-encoding backreferences, map %%n->%n, $$n->$n" https://redmine.lighttpd.net/issues/443 "Need for URL encoding in mod_redirect and possibly mod_rewrite" https://redmine.lighttpd.net/issues/911 --- src/burl.c | 147 +++++++++++++++++++++++++++++++++++++ src/burl.h | 12 ++++ src/keyvalue.c | 175 +++++++++++++++++++++++++++++++++++++++------ src/mod_redirect.c | 1 + src/mod_rewrite.c | 1 + 5 files changed, 313 insertions(+), 23 deletions(-) diff --git a/src/burl.c b/src/burl.c index 3eadb6ce..c3650498 100644 --- a/src/burl.c +++ b/src/burl.c @@ -355,3 +355,150 @@ int burl_normalize (buffer *b, buffer *t, int flags) return qs; } + + +static void burl_append_encode_nde (buffer * const b, const char * const str, const size_t len) +{ + /* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~ + * unless already percent-encoded (does not double-encode) */ + /* Note: not checking for invalid UTF-8 */ + char * const p = buffer_string_prepare_append(b, len*3); + unsigned int n1, n2; + int j = 0; + for (unsigned int i = 0; i < len; ++i, ++j) { + if (str[i]=='%' && li_cton(str[i+1], n1) && li_cton(str[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (burl_is_unreserved((int)x)) { + p[j] = (char)x; + } + else { /* leave UTF-8, control chars, and required chars encoded */ + p[j] = '%'; + p[++j] = str[i+1]; + p[++j] = str[i+2]; + } + i+=2; + } + else if (burl_is_unreserved(str[i])) { + p[j] = str[i]; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[str[i] & 0xF]; + } + } + buffer_commit(b, j); +} + + +static void burl_append_encode_psnde (buffer * const b, const char * const str, const size_t len) +{ + /* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~ plus / + * unless already percent-encoded (does not double-encode) */ + /* Note: not checking for invalid UTF-8 */ + char * const p = buffer_string_prepare_append(b, len*3); + unsigned int n1, n2; + int j = 0; + for (unsigned int i = 0; i < len; ++i, ++j) { + if (str[i]=='%' && li_cton(str[i+1], n1) && li_cton(str[i+2], n2)) { + const unsigned int x = (n1 << 4) | n2; + if (burl_is_unreserved((int)x)) { + p[j] = (char)x; + } + else { /* leave UTF-8, control chars, and required chars encoded */ + p[j] = '%'; + p[++j] = str[i+1]; + p[++j] = str[i+2]; + } + i+=2; + } + else if (burl_is_unreserved(str[i]) || str[i] == '/') { + p[j] = str[i]; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[str[i] & 0xF]; + } + } + buffer_commit(b, j); +} + + +static void burl_append_encode_all (buffer * const b, const char * const str, const size_t len) +{ + /* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~ + * Note: double-encodes any existing '%') */ + /* Note: not checking for invalid UTF-8 */ + char * const p = buffer_string_prepare_append(b, len*3); + int j = 0; + for (unsigned int i = 0; i < len; ++i, ++j) { + if (burl_is_unreserved(str[i])) { + p[j] = str[i]; + } + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[str[i] & 0xF]; + } + } + buffer_commit(b, j); +} + + +static void burl_offset_tolower (buffer * const b, const size_t off) +{ + /*(skips over all percent-encodings, including encoding of alpha chars)*/ + for (char *p = b->ptr+off; p[0]; ++p) { + if (p[0] >= 'A' && p[0] <= 'Z') p[0] |= 0x20; + else if (p[0]=='%' && light_isxdigit(p[1]) && light_isxdigit(p[2])) + p+=2; + } +} + + +static void burl_offset_toupper (buffer * const b, const size_t off) +{ + /*(skips over all percent-encodings, including encoding of alpha chars)*/ + for (char *p = b->ptr+off; p[0]; ++p) { + if (p[0] >= 'a' && p[0] <= 'z') p[0] &= 0xdf; + else if (p[0]=='%' && light_isxdigit(p[1]) && light_isxdigit(p[2])) + p+=2; + } +} + + +void burl_append (buffer * const b, const char * const str, const size_t len, const int flags) +{ + size_t off = 0; + + if (0 == len) return; + + if (0 == flags) { + buffer_append_string_len(b, str, len); + return; + } + + if (flags & (BURL_TOUPPER|BURL_TOLOWER)) off = buffer_string_length(b); + + if (flags & BURL_ENCODE_NONE) { + buffer_append_string_len(b, str, len); + } + else if (flags & BURL_ENCODE_ALL) { + burl_append_encode_all(b, str, len); + } + else if (flags & BURL_ENCODE_NDE) { + burl_append_encode_nde(b, str, len); + } + else if (flags & BURL_ENCODE_PSNDE) { + burl_append_encode_psnde(b, str, len); + } + + /* note: not normalizing str, which could come from arbitrary header, + * so it is possible that alpha chars are percent-encoded upper/lowercase */ + if (flags & (BURL_TOLOWER|BURL_TOUPPER)) { + (flags & BURL_TOLOWER) + ? burl_offset_tolower(b, off) /*(flags & BURL_TOLOWER)*/ + : burl_offset_toupper(b, off); /*(flags & BURL_TOUPPER)*/ + } +} diff --git a/src/burl.h b/src/burl.h index a7d45563..8d870c5f 100644 --- a/src/burl.h +++ b/src/burl.h @@ -7,6 +7,7 @@ struct burl_parts_t { buffer *scheme; buffer *authority; + unsigned short port; buffer *path; buffer *query; }; @@ -29,4 +30,15 @@ enum burl_opts_e { int burl_normalize (buffer *b, buffer *t, int flags); +enum burl_recoding_e { + BURL_TOLOWER = 0x0001 + ,BURL_TOUPPER = 0x0002 + ,BURL_ENCODE_NONE = 0x0004 + ,BURL_ENCODE_ALL = 0x0008 + ,BURL_ENCODE_NDE = 0x0010 /* encode delims, but no-double-encode (NDE) */ + ,BURL_ENCODE_PSNDE = 0x0020 /* similar to NDE, but preserve literal slash */ +}; + +void burl_append (buffer * const b, const char * const str, const size_t len, const int flags); + #endif diff --git a/src/keyvalue.c b/src/keyvalue.c index 69a4a9f1..9664736f 100644 --- a/src/keyvalue.c +++ b/src/keyvalue.c @@ -2,6 +2,7 @@ #include "keyvalue.h" #include "base.h" +#include "burl.h" #include "log.h" #include @@ -112,6 +113,146 @@ void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) { } #ifdef HAVE_PCRE_H +static void pcre_keyvalue_buffer_append_match(buffer *b, const char **list, int n, unsigned int num, int flags) { + if (num < (unsigned int)n) { /* n is always > 0 */ + burl_append(b, list[num], strlen(list[num]), flags); + } +} + +static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, pcre_keyvalue_ctx *ctx, unsigned int num, int flags) { + const struct cond_cache_t * const cache = ctx->cache; + if (!cache) return; /* no enclosing match context */ + if (num < (unsigned int)cache->patterncount) { + const int off = cache->matches[(num <<= 1)]; /*(num *= 2)*/ + const int len = cache->matches[num+1] - off; + burl_append(b, cache->comp_value->ptr + off, (size_t)len, flags); + } +} + +static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const char **list, int n, pcre_keyvalue_ctx *ctx) { + const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */ + int flags = 0; + while (!light_isdigit(*p) && *p != '}' && *p != '\0') { + if (0) { + } + else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') { + p+=3; + if (p[0] == ':') { + flags |= BURL_ENCODE_ALL; + p+=1; + } + else if (0 == strncmp((const char *)p, "ape:", 4)) { + flags |= BURL_ENCODE_ALL; + p+=4; + } + else if (0 == strncmp((const char *)p, "nde:", 4)) { + flags |= BURL_ENCODE_NDE; + p+=4; + } + else if (0 == strncmp((const char *)p, "psnde:", 6)) { + flags |= BURL_ENCODE_PSNDE; + p+=6; + } + else { /* skip unrecognized esc... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 'n' && p[1] == 'o') { + p+=2; + if (0 == strncmp((const char *)p, "esc:", 4)) { + flags |= BURL_ENCODE_NONE; + p+=4; + } + else if (0 == strncmp((const char *)p, "escape:", 7)) { + flags |= BURL_ENCODE_NONE; + p+=7; + } + else { /* skip unrecognized no... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 't' && p[1] == 'o') { + p+=2; + if (0 == strncmp((const char *)p, "lower:", 6)) { + flags |= BURL_TOLOWER; + p+=6; + } + else if (0 == strncmp((const char *)p, "upper:", 6)) { + flags |= BURL_TOLOWER; + p+=6; + } + else { /* skip unrecognized to... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') { + p+=4; + if (0 == strncmp((const char *)p, "scheme}", 7)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->scheme), flags); + p+=6; + } + else if (0 == strncmp((const char *)p, "authority}", 10)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->authority), flags); + p+=9; + } + else if (0 == strncmp((const char *)p, "port}", 5)) { + buffer_append_int(b, (int)ctx->burl->port); + p+=4; + } + else if (0 == strncmp((const char *)p, "path}", 5)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->path), flags); + p+=4; + } + else if (0 == strncmp((const char *)p, "query}", 6)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->query), flags); + p+=5; + } + else { /* skip unrecognized url.* */ + p = (const unsigned char *)strchr((const char *)p, '}'); + if (NULL == p) return -1; + } + break; + } + else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') { + const buffer *qs = ctx->burl->query; + if (!buffer_is_empty(qs)) { + if (NULL != strchr(b->ptr, '?')) { + if (!buffer_string_is_empty(qs)) + buffer_append_string_len(b, CONST_STR_LEN("&")); + } + else { + buffer_append_string_len(b, CONST_STR_LEN("?")); + } + burl_append(b, CONST_BUF_LEN(qs), flags); + } + p+=3; + break; + } + else ++p; /* skip unrecognized char */ + } + if (*p == '\0') return -1; + if (*p != '}') { /* light_isdigit(*p) */ + unsigned int num = *p - '0'; + ++p; + if (light_isdigit(*p)) num = num * 10 + (*p++ - '0'); + if (*p != '}') { + p = (const unsigned char *)strchr((const char *)p, '}'); + if (NULL == p) return -1; + } + if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */ + pattern[0] == '$' /*(else '%')*/ + ? pcre_keyvalue_buffer_append_match(b, list, n, num, flags) + : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags); + } + return (int)(p + 1 - (unsigned char *)pattern - 2); +} + static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const char **list, int n, pcre_keyvalue_ctx *ctx) { const char *pattern = patternb->ptr; const size_t pattern_len = buffer_string_length(patternb); @@ -119,37 +260,25 @@ static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const /* search for $... or %... pattern substitutions */ - buffer_reset(b); + buffer_string_set_length(b, 0); for (size_t k = 0; k + 1 < pattern_len; ++k) { if (pattern[k] == '$' || pattern[k] == '%') { - size_t num = pattern[k + 1] - '0'; buffer_append_string_len(b, pattern + start, k - start); - if (!light_isdigit((unsigned char)pattern[k + 1])) { + if (pattern[k + 1] == '{') { + int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, list, n, ctx); + if (num < 0) return; /* error; truncate result */ + k += (size_t)num; + } else if (light_isdigit(((unsigned char *)pattern)[k + 1])) { + unsigned int num = (unsigned int)pattern[k + 1] - '0'; + pattern[k] == '$' /*(else '%')*/ + ? pcre_keyvalue_buffer_append_match(b, list, n, num, 0) + : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0); + } else { /* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */ buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2); - } else if (pattern[k] == '$') { - /* n is always > 0 */ - if (num < (size_t)n) { - buffer_append_string(b, list[num]); - } - } else if (ctx->cache) { - const struct cond_cache_t * const cache = ctx->cache; - if (num < (size_t)cache->patterncount) { - num <<= 1; /* n *= 2 */ - buffer_append_string_len(b, - cache->comp_value->ptr + cache->matches[num], - cache->matches[num + 1] - cache->matches[num]); - } - } else { - #if 0 - /* we have no context, we are global */ - log_error_write(srv, __FILE__, __LINE__, "ss", - "used a redirect/rewrite containing a %[0-9]+ in the global scope, ignored:", - pattern); - #endif } k++; diff --git a/src/mod_redirect.c b/src/mod_redirect.c index 3d863649..69c4c37a 100644 --- a/src/mod_redirect.c +++ b/src/mod_redirect.c @@ -160,6 +160,7 @@ URIHANDLER_FUNC(mod_redirect_uri_handler) { ctx.burl = &burl; burl.scheme = con->uri.scheme; burl.authority = con->uri.authority; + burl.port = sock_addr_get_port(&con->srv_socket->addr); burl.path = con->uri.path_raw; burl.query = con->uri.query; diff --git a/src/mod_rewrite.c b/src/mod_rewrite.c index 39441bcb..10bc5f09 100644 --- a/src/mod_rewrite.c +++ b/src/mod_rewrite.c @@ -253,6 +253,7 @@ static handler_t process_rewrite_rules(server *srv, connection *con, plugin_data ctx.burl = &burl; burl.scheme = con->uri.scheme; burl.authority = con->uri.authority; + burl.port = sock_addr_get_port(&con->srv_socket->addr); burl.path = con->uri.path_raw; burl.query = con->uri.query;