[core] buffer_append_bs_escaped_json()

separate func from buffer_append_bs_escaped() so that both
buffer_append_bs_escaped() and buffer_append_bs_escaped_json()
can be slightly more specialized and optimized
This commit is contained in:
Glenn Strauss 2022-06-10 13:15:18 -04:00
parent ca407dca5d
commit 91ce3b0857
6 changed files with 125 additions and 52 deletions

View File

@ -715,12 +715,10 @@ void buffer_append_string_c_escaped(buffer * const restrict b, const char * cons
void
buffer_append_bs_escaped (buffer * const restrict b,
const char * restrict s, const size_t len,
const buffer_bs_escape_t esc)
const char * restrict s, const size_t len)
{
/* replaces non-printable chars with escaped string
* default: \xHH where HH is the hex representation of the byte
* json: \u00HH where HH is the hex representation of the byte
* exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
/* Intended for use escaping string to be surrounded by double-quotes */
/* Performs single pass over string and is optimized for ASCII;
@ -755,29 +753,61 @@ buffer_append_bs_escaped (buffer * const restrict b,
d[1] = c;
break;
default:
if (0 == esc) { /* BS_ESCAPE_DEFAULT */
/* non printable char => \xHH */
d = buffer_extend(b, 4);
d[0] = '\\';
d[1] = 'x';
d += 2;
}
else { /* BS_ESCAPE_JSON */
/*(technically do not have to escape DEL (\127) or higher)*/
/*(would be faster if handled in tighter do/while loop above)*/
if (c >= 127) {
buffer_append_char(b, (char)c);
break;
}
d = buffer_extend(b, 6);
d[0] = '\\';
d[1] = 'u';
d[2] = '0';
d[3] = '0';
d += 4;
}
d[0] = hex_chars_uc[c >> 4];
d[1] = hex_chars_uc[c & 0xF];
/* non printable char => \xHH */
d = buffer_extend(b, 4);
d[0] = '\\';
d[1] = 'x';
d[2] = hex_chars_uc[c >> 4];
d[3] = hex_chars_uc[c & 0xF];
break;
}
}
}
void
buffer_append_bs_escaped_json (buffer * const restrict b,
const char * restrict s, const size_t len)
{
/* replaces non-printable chars with escaped string
* json: \u00HH where HH is the hex representation of the byte
* exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
/* Intended for use escaping string to be surrounded by double-quotes */
buffer_string_prepare_append(b, len);
for (const char * const end = s+len; s < end; ++s) {
unsigned int c;
const char * const ptr = s;
do {
c = *(const unsigned char *)s;
} while (c >= ' ' && c != '"' && c != '\\' && ++s < end);
if (s - ptr) buffer_append_string_len(b, ptr, s - ptr);
if (s == end)
return;
/* ('\a', '\v' shortcuts are technically not json-escaping) */
/* ('\0' is also omitted due to the possibility of string corruption if
* the receiver supports decoding octal escapes (\000) and the escaped
* string contains \0 followed by two digits not part of escaping)*/
char *d;
switch (c) {
case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r':
c = "0000000abtnvfr"[c];
__attribute_fallthrough__
case '"': case '\\':
d = buffer_extend(b, 2);
d[0] = '\\';
d[1] = c;
break;
default:
d = buffer_extend(b, 6);
d[0] = '\\';
d[1] = 'u';
d[2] = '0';
d[3] = '0';
d[4] = hex_chars_uc[c >> 4];
d[5] = hex_chars_uc[c & 0xF];
break;
}
}

View File

@ -183,13 +183,9 @@ void buffer_append_string_encoded(buffer * restrict b, const char * restrict s,
__attribute_nonnull__()
void buffer_append_string_c_escaped(buffer * restrict b, const char * restrict s, size_t s_len);
typedef enum {
BS_ESCAPE_DEFAULT
,BS_ESCAPE_JSON
} buffer_bs_escape_t;
/* escape non-printable chars, '"', '\\', and chars which high bit set */
void buffer_append_bs_escaped (buffer * restrict b, const char * restrict s, size_t len, buffer_bs_escape_t esc);
void buffer_append_bs_escaped (buffer * restrict b, const char * restrict s, size_t len);
void buffer_append_bs_escaped_json (buffer * restrict b, const char * restrict s, size_t len);
__attribute_nonnull__()
void buffer_urldecode_path(buffer *b);

View File

@ -164,6 +164,13 @@ typedef struct {
format_fields *default_format;/* allocated if default format */
} plugin_data;
typedef void(esc_fn_t)(buffer * restrict b, const char * restrict s, size_t len);
typedef enum {
BS_ESCAPE_DEFAULT
,BS_ESCAPE_JSON
} buffer_bs_escape_t;
INIT_FUNC(mod_accesslog_init) {
return calloc(1, sizeof(plugin_data));
}
@ -584,14 +591,12 @@ TRIGGER_FUNC(log_access_periodic_flush) {
return HANDLER_GO_ON;
}
#define accesslog_append_escaped buffer_append_bs_escaped
static void
accesslog_append_buffer (buffer * const restrict dest,
const buffer * const restrict b, const int esc)
const buffer * const restrict b, esc_fn_t esc_fn)
{
if (!buffer_string_is_empty(b))
accesslog_append_escaped(dest, BUF_PTR_LEN(b), esc);
esc_fn(dest, BUF_PTR_LEN(b));
else
buffer_append_char(dest, '-');
}
@ -610,7 +615,8 @@ __attribute_noinline__
static void
accesslog_append_cookie (buffer * const restrict dest,
const request_st * const restrict r,
const buffer * const restrict name, const int esc)
const buffer * const restrict name,
esc_fn_t esc_fn)
{
const buffer * const vb =
http_header_request_get(r, HTTP_HEADER_COOKIE, CONST_STR_LEN("Cookie"));
@ -625,7 +631,7 @@ accesslog_append_cookie (buffer * const restrict dest,
for (str = v; *str != '\0' && *str != ';'; ++str) ;
if (str == v) break;
do { --str; } while (str > v && (*str == ' ' || *str == '\t'));
accesslog_append_escaped(dest, v, str - v + 1, esc);
esc_fn(dest, v, str - v + 1);
break;
}
else {
@ -758,7 +764,7 @@ __attribute_cold__
__attribute_noinline__
static void
log_access_record_cold (buffer * const b, const request_st * const r,
const format_field * const f, const int esc)
const format_field * const f, esc_fn_t esc_fn)
{
connection * const con = r->con;
switch (f->field) {
@ -797,16 +803,15 @@ log_access_record_cold (buffer * const b, const request_st * const r,
{
const uint32_t len = buffer_clen(&r->target);
const char * const qmark = memchr(r->target.ptr, '?', len);
accesslog_append_escaped(b, r->target.ptr,
qmark ? (uint32_t)(qmark - r->target.ptr)
: len, esc);
esc_fn(b, r->target.ptr,
qmark ? (uint32_t)(qmark - r->target.ptr) : len);
}
break;
case FORMAT_QUERY_STRING:
accesslog_append_escaped(b, BUF_PTR_LEN(&r->uri.query), esc);
esc_fn(b, BUF_PTR_LEN(&r->uri.query));
break;
case FORMAT_FILENAME:
accesslog_append_buffer(b, &r->physical.path, esc);
accesslog_append_buffer(b, &r->physical.path, esc_fn);
break;
case FORMAT_CONNECTION_STATUS:
buffer_append_char(b, (r->state == CON_STATE_RESPONSE_END)
@ -829,7 +834,7 @@ log_access_record_cold (buffer * const b, const request_st * const r,
}
}
static int log_access_record (const request_st * const r, buffer * const b, format_fields * const parsed_format, const buffer_bs_escape_t esc) {
static int log_access_record (const request_st * const r, buffer * const b, format_fields * const parsed_format, esc_fn_t esc) {
const buffer *vb;
unix_timespec64_t ts = { 0, 0 };
int flush = 0;
@ -880,7 +885,7 @@ static int log_access_record (const request_st * const r, buffer * const b, form
/*(attempt to reconstruct request line)*/
http_method_append(b, r->http_method);
buffer_append_char(b, ' ');
accesslog_append_escaped(b, BUF_PTR_LEN(&r->target_orig), esc);
esc(b, BUF_PTR_LEN(&r->target_orig));
buffer_append_char(b, ' ');
http_version_append(b, r->http_version);
break;
@ -930,8 +935,11 @@ REQUESTDONE_FUNC(log_access_write) {
? (buffer_clear(r->tmp_buf), r->tmp_buf)
: &fdlog->b;
esc_fn_t * const esc_fn = !p->conf.escaping
? buffer_append_bs_escaped
: buffer_append_bs_escaped_json;
const int flush =
log_access_record(r, b, p->conf.parsed_format, p->conf.escaping);
log_access_record(r, b, p->conf.parsed_format, esc_fn);
#ifdef HAVE_SYSLOG_H
if (p->conf.use_syslog) {

View File

@ -1098,7 +1098,7 @@ static int http_read_directory(handler_ctx * const p) {
p->jcomma = 1;
buffer_append_string_len(p->jb, CONST_STR_LEN( "{\"name\":\""));
}
buffer_append_bs_escaped(p->jb, d_name, dsz, BS_ESCAPE_JSON);
buffer_append_bs_escaped_json(p->jb, d_name, dsz);
const char *t;
size_t tlen;

View File

@ -1162,7 +1162,7 @@ static int magnet_bsdec(lua_State *L) {
return 1;
}
static int magnet_bsenc(lua_State *L, const buffer_bs_escape_t esc) {
static int magnet_bsenc(lua_State *L, const int esc_json) {
if (lua_isnoneornil(L, -1)) {
lua_pushlstring(L, "", 0);
return 1;
@ -1173,18 +1173,21 @@ static int magnet_bsenc(lua_State *L, const buffer_bs_escape_t esc) {
return 1;
}
buffer * const b = magnet_tmpbuf_acquire(L);
buffer_append_bs_escaped(b, s.ptr, s.len, esc);
if (esc_json)
buffer_append_bs_escaped(b, s.ptr, s.len);
else
buffer_append_bs_escaped_json(b, s.ptr, s.len);
lua_pushlstring(L, BUF_PTR_LEN(b));
magnet_tmpbuf_release(b);
return 1;
}
static int magnet_bsenc_default(lua_State *L) {
return magnet_bsenc(L, BS_ESCAPE_DEFAULT);
return magnet_bsenc(L, 0);
}
static int magnet_bsenc_json(lua_State *L) {
return magnet_bsenc(L, BS_ESCAPE_JSON);
return magnet_bsenc(L, 1);
}
static int magnet_xmlenc(lua_State *L) {

View File

@ -144,6 +144,41 @@ static void test_buffer_append_path_len(void) {
buffer_free(b);
}
static void test_buffer_append_bs_escaped(void) {
buffer *b = buffer_init();
buffer_append_bs_escaped_json(b, CONST_STR_LEN(" "));
assert(buffer_eq_slen(b, CONST_STR_LEN(" ")));
buffer_clear(b);
buffer_append_bs_escaped_json(b, CONST_STR_LEN("\0"));
assert(buffer_eq_slen(b, CONST_STR_LEN("\\u0000")));
buffer_clear(b);
buffer_append_bs_escaped_json(b, CONST_STR_LEN("\1"));
assert(buffer_eq_slen(b, CONST_STR_LEN("\\u0001")));
buffer_clear(b);
buffer_append_bs_escaped_json(b, CONST_STR_LEN("\n"));
assert(buffer_eq_slen(b, CONST_STR_LEN("\\n")));
buffer_clear(b);
buffer_append_bs_escaped_json(b, CONST_STR_LEN("é"));
assert(buffer_eq_slen(b, CONST_STR_LEN("é")));
buffer_clear(b);
buffer_append_bs_escaped_json(b, CONST_STR_LEN("ö"));
assert(buffer_eq_slen(b, CONST_STR_LEN("ö")));
#if 0
buffer_clear(b);
magnet_buffer_append_bsdec(b, CONST_STR_LEN("\\u00E9"));
assert(buffer_eq_slen(b, CONST_STR_LEN("é")));
buffer_clear(b);
magnet_buffer_append_bsdec(b, CONST_STR_LEN("\\u00F6"));
assert(buffer_eq_slen(b, CONST_STR_LEN("ö")));
#endif
/* TODO: more */
buffer_free(b);
}
void test_buffer (void);
void test_buffer (void)
{
@ -151,4 +186,5 @@ void test_buffer (void)
test_buffer_to_lower_upper();
test_buffer_string_space();
test_buffer_append_path_len();
test_buffer_append_bs_escaped();
}