From 7ea2d407344046c81e5c6410f8a4f447efe8fdfc Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Fri, 19 Nov 2021 15:50:02 -0500 Subject: [PATCH] [core] save config regex captures only if used save config regex captures separately only if used by url.redirect or url.rewrite replacement directives within the condition (or for conditions containing directives from any other module which calls config_capture() for its directives during init) keep pointer to match data (cond_match_t *) in r->cond_match[] rather than cond_match_t to reduce data copying in h2_init_stream(). h2_init_stream() copies the results for already-evaluated conditions to avoid re-evaluating connection-level conditions for each and every stream. When conditions are reset, then the pointer in r->cond_match[] is updated when the condition is re-evaluated. (This all assumes that HTTP/2 connection-level conditions are not unset or re-evaluated once HTTP/2 streams begin.) --- src/base.h | 1 + src/configfile-glue.c | 12 +++++++--- src/configfile.c | 15 ++++++++++++ src/configfile.h | 1 + src/h2.c | 5 ++-- src/keyvalue.h | 5 ++-- src/mod_redirect.c | 19 +++++++++++---- src/mod_rewrite.c | 56 +++++++++++++++++++++++++++---------------- src/plugin_config.h | 3 +++ src/reqpool.c | 10 +++++--- src/request.h | 3 ++- 11 files changed, 94 insertions(+), 36 deletions(-) diff --git a/src/base.h b/src/base.h index 5ed28f68..d0cc761a 100644 --- a/src/base.h +++ b/src/base.h @@ -146,6 +146,7 @@ typedef struct { struct server { void *plugin_slots; array *config_context; + int config_captures; struct fdevents *ev; int (* network_backend_write)(int fd, chunkqueue *cq, off_t max_bytes, log_error_st *errh); diff --git a/src/configfile-glue.c b/src/configfile-glue.c index bba5dcc8..bcf8cd22 100644 --- a/src/configfile-glue.c +++ b/src/configfile-glue.c @@ -39,6 +39,13 @@ void config_get_config_cond_info(config_cond_info * const cfginfo, uint32_t idx) cfginfo->comp_key = dc->comp_key; } +int config_capture(server *srv, int idx) { + data_config * const dc = (data_config *)config_reference.data[idx]; + return (dc->capture_idx) + ? dc->capture_idx + : (dc->capture_idx = ++srv->config_captures); +} + int config_feature_bool (const server *srv, const char *feature, int default_value) { return srv->srvconf.feature_flags ? config_plugin_value_tobool( @@ -543,7 +550,8 @@ static cond_result_t config_check_cond_nocache(request_st * const r, const data_ break; case CONFIG_COND_NOMATCH: case CONFIG_COND_MATCH: { - cond_match_t *cond_match = r->cond_match + dc->context_ndx; + cond_match_t * const cond_match = + r->cond_match[dc->capture_idx] = r->cond_match_data+dc->capture_idx; match = (dc->cond == CONFIG_COND_MATCH); match ^= (data_config_pcre_exec(dc, cache, l, cond_match) > 0); break; @@ -596,8 +604,6 @@ static void config_cond_clear_node(cond_cache_t * const cond_cache, const data_c /** * reset the config-cache for a named item - * - * if the item is COND_LAST_ELEMENT we reset all items */ void config_cond_cache_reset_item(request_st * const r, comp_key_t item) { cond_cache_t * const cond_cache = r->cond_cache; diff --git a/src/configfile.c b/src/configfile.c index bbf55838..f2295b09 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -1228,6 +1228,21 @@ int config_finalize(server *srv, const buffer *default_server_tag) { return 0; } + /* adjust cond_match_data list size if regex config conditions present */ + if (srv->config_context->used > 1) { + const data_config *dcrx = NULL; + for (uint32_t i = 1; i < srv->config_context->used; ++i) { + const data_config * const dc = + (data_config *)srv->config_context->data[i]; + if ((dc->cond==CONFIG_COND_MATCH || dc->cond==CONFIG_COND_NOMATCH) + && 0 == dc->capture_idx) { + dcrx = dc; + break; + } + } + if (dcrx || srv->config_captures) ++srv->config_captures; + } + return 1; } diff --git a/src/configfile.h b/src/configfile.h index f38263c6..ae75e713 100644 --- a/src/configfile.h +++ b/src/configfile.h @@ -37,6 +37,7 @@ struct data_config { void *regex; struct pcre_extra *regex_study; #endif + int capture_idx; int ext; buffer comp_tag; const char *comp_key; diff --git a/src/h2.c b/src/h2.c index f48da03b..fd4b8046 100644 --- a/src/h2.c +++ b/src/h2.c @@ -2571,8 +2571,9 @@ h2_init_stream (request_st * const h2r, connection * const con) r->conditional_is_valid = h2r->conditional_is_valid; memcpy(r->cond_cache, h2r->cond_cache, used * sizeof(cond_cache_t)); #ifdef HAVE_PCRE_H - if (used > 1) /*(save 128b per con if no conditions)*/ - memcpy(r->cond_match, h2r->cond_match, used * sizeof(cond_match_t)); + if (srv->config_captures > 1) + memcpy(r->cond_match, h2r->cond_match, + srv->config_captures * sizeof(cond_match_t)); #endif /*(see request_config_reset() and request_reset_ex())*/ r->server_name = h2r->server_name; diff --git a/src/keyvalue.h b/src/keyvalue.h index ec0f9f2b..88fc32a1 100644 --- a/src/keyvalue.h +++ b/src/keyvalue.h @@ -19,8 +19,9 @@ typedef struct pcre_keyvalue_ctx { typedef struct { struct pcre_keyvalue *kv; uint32_t used; - uint16_t x0; - uint16_t x1; + int x0; + int x1; + int cfgidx; } pcre_keyvalue_buffer; __attribute_cold__ diff --git a/src/mod_redirect.c b/src/mod_redirect.c index 638b60af..e36c34fc 100644 --- a/src/mod_redirect.c +++ b/src/mod_redirect.c @@ -78,14 +78,23 @@ static void mod_redirect_patch_config(request_st * const r, plugin_data * const static pcre_keyvalue_buffer * mod_redirect_parse_list(server *srv, const array *a, const int condidx) { const int pcre_jit = config_feature_bool(srv, "server.pcre_jit", 1); pcre_keyvalue_buffer * const kvb = pcre_keyvalue_buffer_init(); - kvb->x0 = (unsigned short)condidx; + kvb->cfgidx = condidx; buffer * const tb = srv->tmp_buf; + int percent = 0; for (uint32_t j = 0; j < a->used; ++j) { data_string *ds = (data_string *)a->data[j]; if (srv->srvconf.http_url_normalize) { pcre_keyvalue_burl_normalize_key(&ds->key, tb); pcre_keyvalue_burl_normalize_value(&ds->value, tb); } + for (const char *s = ds->value.ptr; (s = strchr(s, '%')); ++s) { + if (s[1] == '%') + ++s; + else if (light_isdigit(s[1]) || s[1] == '{') { + percent |= 1; + break; + } + } if (!pcre_keyvalue_buffer_append(srv->errh, kvb, &ds->key, &ds->value, pcre_jit)) { log_error(srv->errh, __FILE__, __LINE__, @@ -94,6 +103,8 @@ static pcre_keyvalue_buffer * mod_redirect_parse_list(server *srv, const array * return NULL; } } + if (percent) + kvb->x0 = config_capture(srv, condidx); return kvb; } @@ -157,10 +168,10 @@ URIHANDLER_FUNC(mod_redirect_uri_handler) { if (!p->conf.redirect || !p->conf.redirect->used) return HANDLER_GO_ON; ctx.cache = NULL; - if (p->conf.redirect->x0) { /*(p->conf.redirect->x0 is context_idx)*/ + if (p->conf.redirect->x0) { /*(p->conf.redirect->x0 is capture_idx)*/ ctx.cond_match_count = - r->cond_cache[p->conf.redirect->x0].patterncount; - ctx.cache = r->cond_match + p->conf.redirect->x0; + r->cond_cache[p->conf.redirect->cfgidx].patterncount; + ctx.cache = r->cond_match[p->conf.redirect->x0]; } ctx.burl = &burl; burl.scheme = &r->uri.scheme; diff --git a/src/mod_rewrite.c b/src/mod_rewrite.c index 79c44683..a547f0bd 100644 --- a/src/mod_rewrite.c +++ b/src/mod_rewrite.c @@ -99,16 +99,25 @@ static pcre_keyvalue_buffer * mod_rewrite_parse_list(server *srv, const array *a if (NULL == kvb) { allocated = 1; kvb = pcre_keyvalue_buffer_init(); - kvb->x0 = (unsigned short)condidx; + kvb->cfgidx = condidx; } buffer * const tb = srv->tmp_buf; + int percent = 0; for (uint32_t j = 0; j < a->used; ++j) { data_string *ds = (data_string *)a->data[j]; if (srv->srvconf.http_url_normalize) { pcre_keyvalue_burl_normalize_key(&ds->key, tb); pcre_keyvalue_burl_normalize_value(&ds->value, tb); } + for (const char *s = ds->value.ptr; (s = strchr(s, '%')); ++s) { + if (s[1] == '%') + ++s; + else if (light_isdigit(s[1]) || s[1] == '{') { + percent |= 1; + break; + } + } if (!pcre_keyvalue_buffer_append(srv->errh, kvb, &ds->key, &ds->value, pcre_jit)) { log_error(srv->errh, __FILE__, __LINE__, @@ -117,6 +126,8 @@ static pcre_keyvalue_buffer * mod_rewrite_parse_list(server *srv, const array *a return NULL; } } + if (percent && 0 == kvb->x0) + kvb->x0 = config_capture(srv, condidx); return kvb; } @@ -207,7 +218,7 @@ SETDEFAULTS_FUNC(mod_rewrite_set_defaults) { kvb = cpv->v.v; } - if (kvb) kvb->x1 = (unsigned short)kvb->used; /* repeat_idx */ + if (kvb) kvb->x1 = (int)kvb->used; /* repeat_idx */ if ((cpv = rewrite_repeat)) { cpv->v.v = mod_rewrite_parse_list(srv, cpv->v.a, kvb, condidx); @@ -223,7 +234,7 @@ SETDEFAULTS_FUNC(mod_rewrite_set_defaults) { kvb_NF = cpv->v.v; } - if (kvb_NF) kvb_NF->x1 = (unsigned short)kvb_NF->used; /* repeat_idx */ + if (kvb_NF) kvb_NF->x1 = (int)kvb_NF->used; /* repeat_idx */ if ((cpv = rewrite_repeat_NF)) { cpv->v.v = mod_rewrite_parse_list(srv, cpv->v.a, kvb_NF, condidx); @@ -243,6 +254,23 @@ SETDEFAULTS_FUNC(mod_rewrite_set_defaults) { return HANDLER_GO_ON; } +__attribute_cold__ +static handler_t process_rewrite_rules_loop_error(request_st * const r, const int cfgidx) { + if (0 != cfgidx) { + config_cond_info cfginfo; + config_get_config_cond_info(&cfginfo, (uint32_t)cfgidx); + log_error(r->conf.errh, __FILE__, __LINE__, + "ENDLESS LOOP IN rewrite-rule DETECTED ... aborting request, " + "perhaps you want to use url.rewrite-once instead of " + "url.rewrite-repeat (%s)", cfginfo.comp_key); + } + else + log_error(r->conf.errh, __FILE__, __LINE__, + "ENDLESS LOOP IN rewrite-rule DETECTED ... aborting request"); + + return HANDLER_ERROR; +} + URIHANDLER_FUNC(mod_rewrite_con_reset) { r->plugin_ctx[((plugin_data *)p_d)->id] = NULL; return HANDLER_GO_ON; @@ -255,31 +283,17 @@ static handler_t process_rewrite_rules(request_st * const r, plugin_data *p, con if (r->plugin_ctx[p->id]) { uintptr_t * const hctx = (uintptr_t *)(r->plugin_ctx + p->id); - if (((++*hctx) & 0x1FF) > 100) { - if (0 != kvb->x0) { - config_cond_info cfginfo; - config_get_config_cond_info(&cfginfo, kvb->x0); - log_error(r->conf.errh, __FILE__, __LINE__, - "ENDLESS LOOP IN rewrite-rule DETECTED ... aborting request, " - "perhaps you want to use url.rewrite-once instead of " - "url.rewrite-repeat (%s)", cfginfo.comp_key); - return HANDLER_ERROR; - } - - log_error(r->conf.errh, __FILE__, __LINE__, - "ENDLESS LOOP IN rewrite-rule DETECTED ... aborting request"); - return HANDLER_ERROR; + return process_rewrite_rules_loop_error(r, kvb->cfgidx); } - if (*hctx & REWRITE_STATE_FINISHED) return HANDLER_GO_ON; } ctx.cache = NULL; - if (kvb->x0) { /*(kvb->x0 is context_idx)*/ + if (kvb->x0) { /*(kvb->x0 is capture_idx)*/ ctx.cond_match_count = - r->cond_cache[kvb->x0].patterncount; - ctx.cache = r->cond_match + kvb->x0; + r->cond_cache[kvb->cfgidx].patterncount; + ctx.cache = r->cond_match[kvb->x0]; } ctx.burl = &burl; burl.scheme = &r->uri.scheme; diff --git a/src/plugin_config.h b/src/plugin_config.h index 4b96f679..48c8079e 100644 --- a/src/plugin_config.h +++ b/src/plugin_config.h @@ -49,6 +49,9 @@ typedef struct { __attribute_cold__ void config_get_config_cond_info(config_cond_info *cfginfo, uint32_t idx); +__attribute_cold__ +int config_capture(server *srv, int idx); + __attribute_cold__ void config_init(server *srv); diff --git a/src/reqpool.c b/src/reqpool.c index 4c76bf97..dab31702 100644 --- a/src/reqpool.c +++ b/src/reqpool.c @@ -61,10 +61,11 @@ request_init_data (request_st * const r, connection * const con, server * const force_assert(NULL != r->cond_cache); #ifdef HAVE_PCRE_H - if (srv->config_context->used > 1) {/*(save 128b per con if no conditions)*/ - r->cond_match = - calloc(srv->config_context->used, sizeof(cond_match_t)); + if (srv->config_captures) {/*(save 128b per con if no regex conditions)*/ + r->cond_match = calloc(srv->config_captures, sizeof(cond_match_t *)); force_assert(NULL != r->cond_match); + r->cond_match_data = calloc(srv->config_captures, sizeof(cond_match_t)); + force_assert(NULL != r->cond_match_data); } #endif @@ -224,7 +225,10 @@ request_free_data (request_st * const r) free(r->plugin_ctx); free(r->cond_cache); + #ifdef HAVE_PCRE_H free(r->cond_match); + free(r->cond_match_data); + #endif /* note: r is not zeroed here and r is not freed here */ } diff --git a/src/request.h b/src/request.h index e3a987de..6a0525b5 100644 --- a/src/request.h +++ b/src/request.h @@ -138,7 +138,8 @@ struct request_st { /* config conditions (internal) */ uint32_t conditional_is_valid; struct cond_cache_t *cond_cache; - struct cond_match_t *cond_match; + struct cond_match_t **cond_match; + struct cond_match_t *cond_match_data; request_config conf;