[core] RFC 7233 Range handling for non-streaming

RFC 7233 Range handling for all non-streaming responses,
including (non-streaming) dynamic responses

(previously Range responses handled only for static files)
This commit is contained in:
Glenn Strauss 2021-02-25 21:42:59 -05:00
parent d68e639b71
commit cc35c03c3c
3 changed files with 20 additions and 286 deletions

View File

@ -305,267 +305,9 @@ handler_t http_response_reqbody_read_error (request_st * const r, int http_statu
}
static int http_response_coalesce_ranges (off_t * const ranges, int n)
{
/* coalesce/combine overlapping ranges and ranges separated by a
* gap which is smaller than the overhead of sending multiple parts
* (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
* (ranges are known to be positive, so subtract 80 instead of add 80
* to avoid any chance of integer overflow)
* (max n should be limited in caller since a malicious set of ranges has
* n^2 cost for the simplistic algorithm below)
* (sorting the ranges and then combining would lower the cost, but the
* cost should not be an issue since client should not send many ranges
* and caller should restrict the max number of ranges to limit abuse)
* [RFC7233] 4.1 206 Partial Content recommends:
* When a multipart response payload is generated, the server SHOULD send
* the parts in the same order that the corresponding byte-range-spec
* appeared in the received Range header field, excluding those ranges
* that were deemed unsatisfiable or that were coalesced into other ranges
*/
for (int i = 0; i+2 < n; i += 2) {
const off_t b = ranges[i];
const off_t e = ranges[i+1];
for (int j = i+2; j < n; j += 2) {
/* common case: ranges do not overlap */
if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
continue;
/* else ranges do overlap, so combine into first range */
ranges[i] = b <= ranges[j] ? b : ranges[j];
ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
/* restart outer loop from beginning */
n -= 2;
i = -2;
break;
}
}
return n;
}
static int http_response_parse_range(request_st * const r, stat_cache_entry * const sce, const char * const range) {
int n = 0;
int error;
off_t start, end;
const off_t st_size = sce->st.st_size;
const char *s, *minus;
static const char boundary[] = "fkj49sn38dcn3";
const buffer *content_type =
http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
CONST_STR_LEN("Content-Type"));
off_t ranges[16];
start = 0;
end = st_size - 1;
for (s = range, error = 0;
!error && *s && NULL != (minus = strchr(s, '-')); ) {
char *err;
off_t la = 0, le;
*((const char **)&err) = s; /*(quiet clang --analyze)*/
if (s != minus) {
la = strtoll(s, &err, 10);
if (err != minus) {
/* should not have multiple range-unit in Range, but
* handle just in case multiple Range headers merged */
while (*s == ' ' || *s == '\t') ++s;
if (0 != strncmp(s, "bytes=", 6)) return -1;
s += 6;
if (s != minus) {
la = strtoll(s, &err, 10);
if (err != minus) return -1;
}
}
}
if (s == minus) {
/* -<stop> */
le = strtoll(s, &err, 10);
if (le == 0) {
/* RFC 2616 - 14.35.1 */
r->http_status = 416;
error = 1;
} else if (*err == '\0') {
/* end */
s = err;
end = st_size - 1;
start = st_size + le;
} else if (*err == ',') {
s = err + 1;
end = st_size - 1;
start = st_size + le;
} else {
error = 1;
}
} else if (*(minus+1) == '\0' || *(minus+1) == ',') {
/* <start>- */
/* ok */
if (*(err + 1) == '\0') {
s = err + 1;
end = st_size - 1;
start = la;
} else if (*(err + 1) == ',') {
s = err + 2;
end = st_size - 1;
start = la;
} else {
error = 1;
}
} else {
/* <start>-<stop> */
le = strtoll(minus+1, &err, 10);
/* RFC 2616 - 14.35.1 */
if (la > le) {
error = 1;
}
if (*err == '\0') {
/* ok, end*/
s = err;
end = le;
start = la;
} else if (*err == ',') {
s = err + 1;
end = le;
start = la;
} else {
/* error */
error = 1;
}
}
if (!error) {
if (start < 0) start = 0;
/* RFC 2616 - 14.35.1 */
if (end > st_size - 1) end = st_size - 1;
if (start > st_size - 1) {
error = 1;
r->http_status = 416;
}
}
if (!error) {
if (n < (int)(sizeof(ranges)/sizeof(*ranges))) {
ranges[n] = start;
ranges[n+1] = end;
n += 2;
}
else { /* excessive num ranges in request */
error = 1;
r->http_status = 416;
}
}
}
/* something went wrong */
if (error) return -1;
if (n > 2) n = http_response_coalesce_ranges(ranges, n);
for (int i = 0; i < n; i += 2) {
start = ranges[i];
end = ranges[i+1];
if (n > 2) {
/* write boundary-header */
buffer *b = r->tmp_buf;
buffer_copy_string_len(b, CONST_STR_LEN("\r\n--"));
buffer_append_string_len(b, boundary, sizeof(boundary)-1);
/* write Content-Range */
buffer_append_string_len(b, CONST_STR_LEN("\r\nContent-Range: bytes "));
buffer_append_int(b, start);
buffer_append_string_len(b, CONST_STR_LEN("-"));
buffer_append_int(b, end);
buffer_append_string_len(b, CONST_STR_LEN("/"));
buffer_append_int(b, st_size);
if (content_type) {
buffer_append_string_len(b, CONST_STR_LEN("\r\nContent-Type: "));
buffer_append_string_buffer(b, content_type);
}
/* write END-OF-HEADER */
buffer_append_string_len(b, CONST_STR_LEN("\r\n\r\n"));
http_chunk_append_mem(r, CONST_BUF_LEN(b));
}
http_chunk_append_file_ref_range(r, sce, start, end - start + 1);
}
buffer * const tb = r->tmp_buf;
if (n > 2) {
/* add boundary end */
buffer_copy_string_len(tb, "\r\n--", 4);
buffer_append_string_len(tb, boundary, sizeof(boundary)-1);
buffer_append_string_len(tb, "--\r\n", 4);
http_chunk_append_mem(r, CONST_BUF_LEN(tb));
/* set header-fields */
buffer_copy_string_len(tb, CONST_STR_LEN("multipart/byteranges; boundary="));
buffer_append_string_len(tb, boundary, sizeof(boundary)-1);
/* overwrite content-type */
http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
CONST_STR_LEN("Content-Type"),
CONST_BUF_LEN(tb));
} else {
/* add Content-Range-header */
buffer_copy_string_len(tb, CONST_STR_LEN("bytes "));
buffer_append_int(tb, start);
buffer_append_string_len(tb, CONST_STR_LEN("-"));
buffer_append_int(tb, end);
buffer_append_string_len(tb, CONST_STR_LEN("/"));
buffer_append_int(tb, st_size);
http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
CONST_STR_LEN("Content-Range"),
CONST_BUF_LEN(tb));
}
/* ok, the file is set-up */
return 0;
}
__attribute_pure__
static int http_response_match_if_range(request_st * const r, const buffer * const mtime) {
const buffer *vb = http_header_request_get(r, HTTP_HEADER_IF_RANGE,
CONST_STR_LEN("If-Range"));
return NULL == vb
|| ((vb->ptr[0] == '"')
? buffer_is_equal(vb, &r->physical.etag) /*compare ETag ("...") */
: mtime && buffer_is_equal(vb, mtime)); /*compare Last-Modified*/
}
void http_response_send_file (request_st * const r, buffer * const path) {
stat_cache_entry * const sce = stat_cache_get_entry_open(path, r->conf.follow_symlink);
const buffer *mtime = NULL;
const buffer *vb;
int allow_caching = (0 == r->http_status || 200 == r->http_status);
if (NULL == sce) {
@ -630,15 +372,6 @@ void http_response_send_file (request_st * const r, buffer * const path) {
}
}
if (!http_method_get_or_head(r->http_method)
|| r->http_version < HTTP_VERSION_1_1)
r->conf.range_requests = 0;
if (r->conf.range_requests) {
http_header_response_append(r, HTTP_HEADER_ACCEPT_RANGES,
CONST_STR_LEN("Accept-Ranges"),
CONST_STR_LEN("bytes"));
}
if (allow_caching) {
if (!light_btst(r->resp_htags, HTTP_HEADER_ETAG)
&& 0 != r->conf.etag_flags) {
@ -674,19 +407,6 @@ void http_response_send_file (request_st * const r, buffer * const path) {
return;
}
if (r->conf.range_requests
&& (200 == r->http_status || 0 == r->http_status)
&& NULL != (vb = http_header_request_get(r, HTTP_HEADER_RANGE,
CONST_STR_LEN("Range")))
&& !light_btst(r->resp_htags, HTTP_HEADER_CONTENT_ENCODING)
&& http_response_match_if_range(r, mtime) /* "If-Range" */
&& !buffer_string_is_empty(vb) && 0 == strncmp(vb->ptr, "bytes=", 6)) {
r->resp_body_finished = 1;
if (0 == http_response_parse_range(r, sce, vb->ptr+6))
r->http_status = 206;
return;
}
/* if we are still here, prepare body */
/* we add it here for all requests

View File

@ -11,6 +11,7 @@
#include "chunk.h"
#include "http_chunk.h"
#include "http_date.h"
#include "http_range.h"
#include "plugin.h"
@ -868,6 +869,10 @@ http_response_write_prepare(request_st * const r)
}
if (r->resp_body_finished) {
/* check for Range request (current impl requires resp_body_finished) */
if (r->conf.range_requests && http_range_rfc7233(r) >= 400)
http_response_static_errdoc(r); /* 416 Range Not Satisfiable */
/* set content-length if length is known and not already set */
if (!(r->resp_htags
& (light_bshift(HTTP_HEADER_CONTENT_LENGTH)

View File

@ -8,7 +8,7 @@ BEGIN {
use strict;
use IO::Socket;
use Test::More tests => 53;
use Test::More tests => 54;
use LightyTest;
my $tf = LightyTest->new();
@ -300,15 +300,14 @@ Range: bytes=0-1,97-98
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => <<EOF
\r
--fkj49sn38dcn3\r
Content-Range: bytes 0-1/100\r
Content-Type: text/plain\r
Content-Range: bytes 0-1/100\r
\r
12\r
--fkj49sn38dcn3\r
Content-Range: bytes 97-98/100\r
Content-Type: text/plain\r
Content-Range: bytes 97-98/100\r
\r
hi\r
--fkj49sn38dcn3--\r
@ -316,6 +315,16 @@ EOF
} ];
ok($tf->handle_http($t) == 0, 'GET, Range 0-1,97-98 (ranges not merged)');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0-
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'Content-Range' => 'bytes 0-5/6' } ];
ok($tf->handle_http($t) == 0, 'GET, Range 0-');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.1
Host: 123.example.org
@ -323,7 +332,7 @@ Connection: close
Range: bytes=0--
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 416 } ];
ok($tf->handle_http($t) == 0, 'GET, Range 0--');
$t->{REQUEST} = ( <<EOF
@ -333,7 +342,7 @@ Connection: close
Range: bytes=-2-3
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 416 } ];
ok($tf->handle_http($t) == 0, 'GET, Range -2-3');
$t->{REQUEST} = ( <<EOF