From b700a8ca09b31cfc00ea6a3b6592b233761d5643 Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Fri, 25 Dec 2020 03:56:39 -0500 Subject: [PATCH] [multiple] etag.[ch] -> http_etag.[ch]; better imp more efficient implementation of HTTP ETag generation and comparison modify dekhash() to take hash value to allow for incremental hashing --- src/CMakeLists.txt | 2 +- src/Makefile.am | 4 +- src/SConscript | 2 +- src/algo_md.h | 11 +-- src/configfile.c | 2 +- src/etag.c | 183 ----------------------------------------- src/etag.h | 20 ----- src/http-header-glue.c | 7 +- src/http_etag.c | 86 +++++++++++++++++++ src/http_etag.h | 27 ++++++ src/meson.build | 2 +- src/mod_deflate.c | 2 +- src/mod_magnet.c | 6 +- src/mod_ssi.c | 6 +- src/mod_webdav.c | 20 ++--- src/stat_cache.c | 4 +- 16 files changed, 142 insertions(+), 242 deletions(-) delete mode 100644 src/etag.c delete mode 100644 src/etag.h create mode 100644 src/http_etag.c create mode 100644 src/http_etag.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b9949da5..983e1c7a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -734,7 +734,7 @@ set(COMMON_SRC base64.c buffer.c burl.c log.c http_header.c http_kv.c keyvalue.c chunk.c http_chunk.c stream.c fdevent.c gw_backend.c - stat_cache.c plugin.c etag.c array.c + stat_cache.c plugin.c http_etag.c array.c data_string.c data_array.c data_integer.c algo_md5.c algo_sha1.c algo_splaytree.c diff --git a/src/Makefile.am b/src/Makefile.am index 035b719c..724648fa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -72,7 +72,7 @@ CLEANFILES = versionstamp.h versionstamp.h.tmp lemon$(BUILD_EXEEXT) common_src=base64.c buffer.c burl.c log.c \ http_header.c http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ - stat_cache.c plugin.c etag.c array.c \ + stat_cache.c plugin.c http_etag.c array.c \ data_string.c data_array.c \ data_integer.c \ algo_md5.c algo_sha1.c algo_splaytree.c \ @@ -461,7 +461,7 @@ hdr = base64.h buffer.h burl.h network.h log.h http_kv.h keyvalue.h \ http_auth.h http_date.h http_header.h http_vhostdb.h stream.h \ fdevent.h gw_backend.h connections.h base.h base_decls.h stat_cache.h \ plugin.h plugin_config.h \ - etag.h array.h vector.h \ + http_etag.h array.h vector.h \ fdevent_impl.h network_write.h configfile.h \ mod_ssi.h mod_ssi_expr.h \ sock_addr_cache.h \ diff --git a/src/SConscript b/src/SConscript index 4a20c3e6..6e60a3a3 100644 --- a/src/SConscript +++ b/src/SConscript @@ -58,7 +58,7 @@ def GatherLibs(env, *libs): common_src = Split("base64.c buffer.c burl.c log.c \ http_header.c http_kv.c keyvalue.c chunk.c \ http_chunk.c stream.c fdevent.c gw_backend.c \ - stat_cache.c plugin.c etag.c array.c \ + stat_cache.c plugin.c http_etag.c array.c \ data_string.c data_array.c \ data_integer.c \ algo_md5.c algo_sha1.c algo_splaytree.c \ diff --git a/src/algo_md.h b/src/algo_md.h index f3fac6eb..06d9734e 100644 --- a/src/algo_md.h +++ b/src/algo_md.h @@ -28,14 +28,15 @@ static inline uint32_t djbhash(const char *str, const uint32_t len, uint32_t has /* Donald E. Knuth * The Art Of Computer Programming Volume 3 * Chapter 6.4, Topic: Sorting and Search */ +/*(len should be passed as initial hash value. + * On subsequent calls, pass intermediate hash value for incremental hashing)*/ __attribute_pure__ -static inline uint32_t dekhash (const char *str, const uint32_t len); -static inline uint32_t dekhash (const char *str, const uint32_t len) +static inline uint32_t dekhash (const char *str, const uint32_t len, uint32_t hash); +static inline uint32_t dekhash (const char *str, const uint32_t len, uint32_t hash) { const unsigned char * const s = (const unsigned char *)str; - uint32_t h = len; - for (uint32_t i = 0; i < len; ++i) h = (h << 5) ^ (h >> 27) ^ s[i]; - return h; + for (uint32_t i = 0; i < len; ++i) hash = (hash << 5) ^ (hash >> 27) ^ s[i]; + return hash; } diff --git a/src/configfile.c b/src/configfile.c index 29d967b4..63ff5fdb 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -2,8 +2,8 @@ #include "base.h" #include "burl.h" -#include "etag.h" #include "fdevent.h" +#include "http_etag.h" #include "keyvalue.h" #include "log.h" #include "stream.h" diff --git a/src/etag.c b/src/etag.c deleted file mode 100644 index 9d1ff713..00000000 --- a/src/etag.c +++ /dev/null @@ -1,183 +0,0 @@ -#include "first.h" - -#include "algo_md.h" -#include "buffer.h" -#include "etag.h" - -#include - -int etag_is_equal(const buffer *etag, const char *line, int weak_ok) { - enum { - START = 0, - CHECK, - CHECK_QUOTED, - SKIP, - SKIP_QUOTED, - TAIL - } state = START; - - const char *current; - const char *tok_start; - const char *tok = NULL; - int matched; - - if ('*' == line[0] && '\0' == line[1]) { - return 1; - } - - if (!etag || buffer_string_is_empty(etag)) return 0; - tok_start = etag->ptr; - - if ('W' == tok_start[0]) { - if (!weak_ok || '/' != tok_start[1]) return 0; /* bad etag */ - tok_start = tok_start + 2; - } - - if ('"' != tok_start[0]) return 0; /* bad etag */ - /* we start comparing after the first '"' */ - ++tok_start; - - for (current = line; *current; ++current) { - switch (state) { - case START: - /* wait for etag to start; ignore whitespace and ',' */ - switch (*current) { - case 'W': - /* weak etag always starts with 'W/"' */ - if ('/' != *++current) return 0; /* bad etag list */ - if ('"' != *++current) return 0; /* bad etag list */ - if (!weak_ok) { - state = SKIP; - } else { - state = CHECK; - tok = tok_start; - } - break; - case '"': - /* strong etag starts with '"' */ - state = CHECK; - tok = tok_start; - break; - case ' ': - case ',': - case '\t': - case '\r': - case '\n': - break; - default: - return 0; /* bad etag list */ - } - break; - case CHECK: - /* compare etags (after the beginning '"') - * quoted-pairs must match too (i.e. quoted in both strings): - * > (RFC 2616:) both validators MUST be identical in every way - */ - matched = *tok && *tok == *current; - ++tok; - switch (*current) { - case '\\': - state = matched ? CHECK_QUOTED : SKIP_QUOTED; - break; - case '"': - if (*tok) { - /* bad etag - string should end after '"' */ - return 0; - } - if (matched) { - /* matching etag: strings were equal */ - return 1; - } - - state = TAIL; - break; - default: - if (!matched) { - /* strings not matching, skip remainder of etag */ - state = SKIP; - } - break; - } - break; - case CHECK_QUOTED: - if (!*tok || *tok != *current) { - /* strings not matching, skip remainder of etag */ - state = SKIP; - break; - } - ++tok; - state = CHECK; - break; - case SKIP: - /* wait for final (not quoted) '"' */ - switch (*current) { - case '\\': - state = SKIP_QUOTED; - break; - case '"': - state = TAIL; - break; - } - break; - case SKIP_QUOTED: - state = SKIP; - break; - case TAIL: - /* search for ',', ignore white space */ - switch (*current) { - case ',': - state = START; - break; - case ' ': - case '\t': - case '\r': - case '\n': - break; - default: - return 0; /* bad etag list */ - } - break; - } - } - /* no matching etag found */ - return 0; -} - -int etag_create(buffer *etag, const struct stat *st, int flags) { - if (0 == flags) return 0; - - buffer_clear(etag); - - if (flags & ETAG_USE_INODE) { - buffer_append_int(etag, st->st_ino); - buffer_append_string_len(etag, CONST_STR_LEN("-")); - } - - if (flags & ETAG_USE_SIZE) { - buffer_append_int(etag, st->st_size); - buffer_append_string_len(etag, CONST_STR_LEN("-")); - } - - if (flags & ETAG_USE_MTIME) { - buffer_append_int(etag, st->st_mtime); - #ifdef st_mtime /* use high-precision timestamp if available */ - #if defined(__APPLE__) && defined(__MACH__) - buffer_append_int(etag, st->st_mtimespec.tv_nsec); - #else - buffer_append_int(etag, st->st_mtim.tv_nsec); - #endif - #endif - } - - return 0; -} - - -void -etag_mutate (buffer * const mut, const buffer * const etag) { - /* mut and etag may be the same, so calculate hash before modifying mut */ - const uint32_t h = dekhash(CONST_BUF_LEN(etag)); - buffer_copy_string_len(mut, CONST_STR_LEN("\"")); - buffer_append_int(mut, h); - buffer_append_string_len(mut, CONST_STR_LEN("\"")); -} diff --git a/src/etag.h b/src/etag.h deleted file mode 100644 index ecceacb3..00000000 --- a/src/etag.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef ETAG_H -#define ETAG_H -#include "first.h" - -#include "buffer.h" - -#ifdef _AIX -#include -#else -struct stat; /* declaration */ -#endif - -typedef enum { ETAG_USE_INODE = 1, ETAG_USE_MTIME = 2, ETAG_USE_SIZE = 4 } etag_flags_t; - -int etag_is_equal(const buffer *etag, const char *matches, int weak_ok); -int etag_create(buffer *etag, const struct stat *st, int flags); -void etag_mutate(buffer *mut, const buffer *etag); - - -#endif diff --git a/src/http-header-glue.c b/src/http-header-glue.c index 7a3bba78..6f7c1d7e 100644 --- a/src/http-header-glue.c +++ b/src/http-header-glue.c @@ -8,9 +8,9 @@ #include "chunk.h" #include "fdevent.h" #include "log.h" -#include "etag.h" #include "http_chunk.h" #include "http_date.h" +#include "http_etag.h" #include "http_header.h" #include "response.h" #include "sock_addr.h" @@ -200,7 +200,7 @@ int http_response_handle_cachable(request_st * const r, const buffer * const lmo CONST_STR_LEN("If-None-Match")))) { /*(weak etag comparison must not be used for ranged requests)*/ int range_request = (0 != light_btst(r->rqst_htags, HTTP_HEADER_RANGE)); - if (etag_is_equal(&r->physical.etag, vb->ptr, !range_request)) { + if (http_etag_matches(&r->physical.etag, vb->ptr, !range_request)) { if (http_method_get_or_head(r->http_method)) { r->http_status = 304; return HANDLER_FINISHED; @@ -635,8 +635,7 @@ void http_response_send_file (request_st * const r, buffer * const path) { const buffer *etag = stat_cache_etag_get(sce, r->conf.etag_flags); if (!buffer_string_is_empty(etag)) { - /* generate e-tag */ - etag_mutate(&r->physical.etag, etag); + buffer_copy_buffer(&r->physical.etag, etag); http_header_response_set(r, HTTP_HEADER_ETAG, CONST_STR_LEN("ETag"), CONST_BUF_LEN(&r->physical.etag)); diff --git a/src/http_etag.c b/src/http_etag.c new file mode 100644 index 00000000..3ebd51b9 --- /dev/null +++ b/src/http_etag.c @@ -0,0 +1,86 @@ +/* + * http_etag - HTTP ETag manipulation + * + * Copyright(c) 2015,2020 Glenn Strauss gstrauss()gluelogic.com All rights reserved + * License: BSD 3-clause (same as lighttpd) + */ +#include "first.h" + +#include "http_etag.h" + +#include +#include + +#include "algo_md.h" +#include "buffer.h" + +int +http_etag_matches (const buffer * const etag, const char *s, const int weak_ok) +{ + if ('*' == s[0] && '\0' == s[1]) return 1; + if (buffer_string_is_empty(etag)) return 0; + + uint32_t etag_sz = buffer_string_length(etag); + const char *etag_ptr = etag->ptr; + + if (etag_ptr[0] == 'W' && etag_ptr[1] == '/') { + if (!weak_ok) return 0; + etag_ptr += 2; + etag_sz -= 2; + } + + while (*s) { + while (*s == ' ' || *s == '\t' || *s == ',') ++s; + if (s[0] == 'W' && s[1] == '/' ? (s+=2, weak_ok) : 1) { + if (0 == strncmp(s, etag_ptr, etag_sz) || *s == '*') { + s += (*s != '*' ? etag_sz : 1); + if (*s == '\0' || *s == ' ' || *s == '\t' || *s == ',') + return 1; + } + } + while (*s != '\0' && *s != ',') ++s; + } + return 0; +} + +static void +http_etag_remix (buffer * const etag, const char * const str, const uint32_t len) +{ + uint32_t h = dekhash(str, len, len); /*(pass len as initial hash value)*/ + uint32_t elen = buffer_string_length(etag); + if (elen > 2) /*(expect "..." if set)*/ + h = dekhash(etag->ptr+1, elen-2, h); + buffer_string_set_length(etag, 1); + etag->ptr[0] = '\"'; + buffer_append_int(etag, h); + buffer_append_string_len(etag, CONST_STR_LEN("\"")); +} + +void +http_etag_create (buffer * const etag, const struct stat * const st, const int flags) +{ + if (0 == flags) return; + + uint64_t x[4]; + uint32_t len = 0; + + if (flags & ETAG_USE_INODE) + x[len++] = (uint64_t)st->st_ino; + + if (flags & ETAG_USE_SIZE) + x[len++] = (uint64_t)st->st_size; + + if (flags & ETAG_USE_MTIME) { + x[len++] = (uint64_t)st->st_mtime; + #ifdef st_mtime /* use high-precision timestamp if available */ + #if defined(__APPLE__) && defined(__MACH__) + x[len++] = (uint64_t)st->st_mtimespec.tv_nsec; + #else + x[len++] = (uint64_t)st->st_mtim.tv_nsec; + #endif + #endif + } + + buffer_clear(etag); + http_etag_remix(etag, (char *)x, len << 3); +} diff --git a/src/http_etag.h b/src/http_etag.h new file mode 100644 index 00000000..d200bf48 --- /dev/null +++ b/src/http_etag.h @@ -0,0 +1,27 @@ +/* + * http_etag - HTTP ETag manipulation + * + * Copyright(c) 2015,2020 Glenn Strauss gstrauss()gluelogic.com All rights reserved + * License: BSD 3-clause (same as lighttpd) + */ +#ifndef INCLUDED_HTTP_ETAG_H +#define INCLUDED_HTTP_ETAG_H +#include "first.h" + +#include "buffer.h" + +#ifdef _AIX +#include +#else +struct stat; /* declaration */ +#endif + +typedef enum { ETAG_USE_INODE = 1, ETAG_USE_MTIME = 2, ETAG_USE_SIZE = 4 } etag_flags_t; + +__attribute_pure__ +int http_etag_matches (const buffer *etag, const char *matches, int weak_ok); + +void http_etag_create (buffer *etag, const struct stat *st, int flags); + + +#endif diff --git a/src/meson.build b/src/meson.build index b7ed481f..1ad451b7 100644 --- a/src/meson.build +++ b/src/meson.build @@ -702,7 +702,7 @@ common_src = [ 'data_array.c', 'data_integer.c', 'data_string.c', - 'etag.c', + 'http_etag.c', 'fdevent_freebsd_kqueue.c', 'fdevent_libev.c', 'fdevent_linux_sysepoll.c', diff --git a/src/mod_deflate.c b/src/mod_deflate.c index 6bc506fc..d3e2f3b1 100644 --- a/src/mod_deflate.c +++ b/src/mod_deflate.c @@ -113,8 +113,8 @@ #include "fdevent.h" #include "log.h" #include "buffer.h" -#include "etag.h" #include "http_chunk.h" +#include "http_etag.h" #include "http_header.h" #include "response.h" #include "stat_cache.h" diff --git a/src/mod_magnet.c b/src/mod_magnet.c index 992ebc88..d9cbfdcd 100644 --- a/src/mod_magnet.c +++ b/src/mod_magnet.c @@ -13,7 +13,6 @@ #include "sock_addr.h" #include "stat_cache.h" #include "status_counter.h" -#include "etag.h" #include #include @@ -345,10 +344,7 @@ static int magnet_stat(lua_State *L) { request_st * const r = magnet_get_request(L); const buffer *etag = stat_cache_etag_get(sce, r->conf.etag_flags); if (!buffer_string_is_empty(etag)) { - /* we have to mutate the etag */ - buffer * const tb = r->tmp_buf; - etag_mutate(tb, etag); - lua_pushlstring(L, CONST_BUF_LEN(tb)); + lua_pushlstring(L, CONST_BUF_LEN(etag)); } else { lua_pushnil(L); } diff --git a/src/mod_ssi.c b/src/mod_ssi.c index cbc8cde1..a6abbb38 100644 --- a/src/mod_ssi.c +++ b/src/mod_ssi.c @@ -4,6 +4,7 @@ #include "fdevent.h" #include "log.h" #include "buffer.h" +#include "http_etag.h" #include "http_header.h" #include "stat_cache.h" @@ -37,8 +38,6 @@ # include #endif -#include "etag.h" - static handler_ctx * handler_ctx_init(plugin_data *p, log_error_st *errh) { handler_ctx *hctx = calloc(1, sizeof(*hctx)); force_assert(hctx); @@ -1204,8 +1203,7 @@ static int mod_ssi_handle_request(request_st * const r, handler_ctx * const p) { if (st.st_mtime < include_file_last_mtime) st.st_mtime = include_file_last_mtime; - etag_create(&r->physical.etag, &st, r->conf.etag_flags); - etag_mutate(&r->physical.etag, &r->physical.etag); + http_etag_create(&r->physical.etag, &st, r->conf.etag_flags); http_header_response_set(r, HTTP_HEADER_ETAG, CONST_STR_LEN("ETag"), CONST_BUF_LEN(&r->physical.etag)); const buffer * const mtime = http_response_set_last_modified(r, st.st_mtime); diff --git a/src/mod_webdav.c b/src/mod_webdav.c index d8e289ac..7372023b 100644 --- a/src/mod_webdav.c +++ b/src/mod_webdav.c @@ -231,8 +231,8 @@ #include "chunk.h" #include "fdevent.h" #include "http_date.h" +#include "http_etag.h" #include "http_header.h" -#include "etag.h" #include "log.h" #include "request.h" #include "response.h" /* http_response_redirect_to_directory() */ @@ -2203,12 +2203,11 @@ webdav_if_match_or_unmodified_since (request_st * const r, struct stat *st) buffer *etagb = &r->physical.etag; if (NULL != st && (NULL != im || NULL != inm)) { - etag_create(etagb, st, r->conf.etag_flags); - etag_mutate(etagb, etagb); + http_etag_create(etagb, st, r->conf.etag_flags); } if (NULL != im) { - if (NULL == st || !etag_is_equal(etagb, im->ptr, 0)) + if (NULL == st || !http_etag_matches(etagb, im->ptr, 0)) return 412; /* Precondition Failed */ } @@ -2216,7 +2215,7 @@ webdav_if_match_or_unmodified_since (request_st * const r, struct stat *st) if (NULL == st ? !buffer_is_equal_string(inm,CONST_STR_LEN("*")) || (errno != ENOENT && errno != ENOTDIR) - : etag_is_equal(etagb, inm->ptr, 1)) + : http_etag_matches(etagb, inm->ptr, 1)) return 412; /* Precondition Failed */ } @@ -2236,9 +2235,8 @@ webdav_response_etag (request_st * const r, struct stat *st) { if (0 != r->conf.etag_flags) { buffer *etagb = &r->physical.etag; - etag_create(etagb, st, r->conf.etag_flags); + http_etag_create(etagb, st, r->conf.etag_flags); stat_cache_update_entry(CONST_BUF_LEN(&r->physical.path), st, etagb); - etag_mutate(etagb, etagb); http_header_response_set(r, HTTP_HEADER_ETAG, CONST_STR_LEN("ETag"), CONST_BUF_LEN(etagb)); @@ -3162,8 +3160,7 @@ webdav_propfind_live_props (const webdav_propfind_bufs * const restrict pb, case WEBDAV_PROP_GETETAG: if (0 != pb->r->conf.etag_flags) { buffer *etagb = &pb->r->physical.etag; - etag_create(etagb, &pb->st, pb->r->conf.etag_flags); - etag_mutate(etagb, etagb); + http_etag_create(etagb, &pb->st, pb->r->conf.etag_flags); buffer_append_string_len(b, CONST_STR_LEN( "")); buffer_append_string_buffer(b, etagb); @@ -3788,10 +3785,9 @@ webdav_has_lock (request_st * const r, } if (S_ISDIR(st.st_mode)) continue;/*we ignore etag if dir*/ buffer *etagb = &r->physical.etag; - etag_create(etagb, &st, r->conf.etag_flags); - etag_mutate(etagb, etagb); + http_etag_create(etagb, &st, r->conf.etag_flags); *p = '\0'; - int ematch = etag_is_equal(etagb, etag, 0); + int ematch = http_etag_matches(etagb, etag, 0); *p = ']'; if (!ematch) { http_status_set_error(r, 412); /* Precondition Failed */ diff --git a/src/stat_cache.c b/src/stat_cache.c index 941eb8b4..be595a5d 100644 --- a/src/stat_cache.c +++ b/src/stat_cache.c @@ -3,7 +3,7 @@ #include "stat_cache.h" #include "log.h" #include "fdevent.h" -#include "etag.h" +#include "http_etag.h" #include "algo_splaytree.h" #include @@ -1058,7 +1058,7 @@ const buffer * stat_cache_etag_get(stat_cache_entry *sce, int flags) { if (S_ISREG(sce->st.st_mode) || S_ISDIR(sce->st.st_mode)) { if (0 == flags) return NULL; - etag_create(&sce->etag, &sce->st, flags); + http_etag_create(&sce->etag, &sce->st, flags); return &sce->etag; }