[core] coalesce nearby ranges in Range requests

Range requests must be HTTP/1.1 or later (not HTTP/1.0)
personal/stbuehler/tests-path
Glenn Strauss 2020-10-10 19:59:55 -04:00
parent 2f7839e88f
commit 96abd9cfb8
6 changed files with 111 additions and 35 deletions

View File

@ -185,10 +185,7 @@ int http_response_handle_cachable(request_st * const r, const buffer * const mti
if ((vb = http_header_request_get(r, HTTP_HEADER_IF_NONE_MATCH,
CONST_STR_LEN("If-None-Match")))) {
/*(weak etag comparison must not be used for ranged requests)*/
int range_request =
(light_btst(r->rqst_htags, HTTP_HEADER_RANGE)
&& r->conf.range_requests
&& (200 == r->http_status || 0 == r->http_status));
int range_request = (0 != light_btst(r->rqst_htags, HTTP_HEADER_RANGE));
if (etag_is_equal(&r->physical.etag, vb->ptr, !range_request)) {
if (http_method_get_or_head(r->http_method)) {
r->http_status = 304;
@ -328,6 +325,46 @@ handler_t http_response_reqbody_read_error (request_st * const r, int http_statu
}
static int http_response_coalesce_ranges (off_t * const ranges, int n)
{
/* coalesce/combine overlapping ranges and ranges separated by a
* gap which is smaller than the overhead of sending multiple parts
* (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
* (ranges are known to be positive, so subtract 80 instead of add 80
* to avoid any chance of integer overflow)
* (max n should be limited in caller since a malicious set of ranges has
* n^2 cost for the simplistic algorithm below)
* (sorting the ranges and then combining would lower the cost, but the
* cost should not be an issue since client should not send many ranges
* and caller should restrict the max number of ranges to limit abuse)
* [RFC7233] 4.1 206 Partial Content recommends:
* When a multipart response payload is generated, the server SHOULD send
* the parts in the same order that the corresponding byte-range-spec
* appeared in the received Range header field, excluding those ranges
* that were deemed unsatisfiable or that were coalesced into other ranges
*/
for (int i = 0; i+2 < n; i += 2) {
const off_t b = ranges[i];
const off_t e = ranges[i+1];
for (int j = i+2; j < n; j += 2) {
/* common case: ranges do not overlap */
if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
continue;
/* else ranges do overlap, so combine into first range */
ranges[i] = b <= ranges[j] ? b : ranges[j];
ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
/* restart outer loop from beginning */
n -= 2;
i = -2;
break;
}
}
return n;
}
static int http_response_parse_range(request_st * const r, const buffer * const path, const int fd, const stat_cache_entry * const sce, const char * const range) {
int n = 0;
int error;
@ -465,6 +502,8 @@ static int http_response_parse_range(request_st * const r, const buffer * const
/* something went wrong */
if (error) return -1;
if (n > 2) n = http_response_coalesce_ranges(ranges, n);
for (int i = 0; i < n; i += 2) {
start = ranges[i];
end = ranges[i+1];
@ -601,6 +640,9 @@ void http_response_send_file (request_st * const r, buffer * const path) {
}
}
if (!http_method_get_or_head(r->http_method)
|| r->http_version < HTTP_VERSION_1_1)
r->conf.range_requests = 0;
if (r->conf.range_requests) {
http_header_response_append(r, HTTP_HEADER_ACCEPT_RANGES,
CONST_STR_LEN("Accept-Ranges"),

View File

@ -163,12 +163,14 @@ EOF
}
$t->{REQUEST} = ( <<EOF
GET / HTTP/1.0
GET / HTTP/1.1
Host: www.example.org
If-None-Match: W/$etag
Connection: close
Range: bytes=0-0
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206, 'HTTP-Content' => '<' } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => '<' } ];
ok($tf->handle_http($t) == 0, 'A weak etag does not match for ranged requests');
$t->{REQUEST} = ( <<EOF

10
tests/docroot/123/100.txt Normal file
View File

@ -0,0 +1,10 @@
123456789
123456789
123456789
123456789
123456789
123456789
123456789
123456789
123456789
abcdefghi

View File

@ -1 +1 @@
EXTRA_DIST=12345.html 12345.txt dummyfile.bla phpinfo.php
EXTRA_DIST=100.txt 12345.html 12345.txt dummyfile.bla phpinfo.php

View File

@ -54,7 +54,7 @@ touch "${tmpdir}/servers/www.example.org/pages/image.jpg" \
"${tmpdir}/servers/www.example.org/pages/Foo.txt" \
"${tmpdir}/servers/www.example.org/pages/a" \
"${tmpdir}/servers/www.example.org/pages/index.html~"
echo "12345" > "${tmpdir}/servers/www.example.org/pages/range.pdf"
echo "12345" > "${tmpdir}/servers/123.example.org/pages/range.pdf"
printf "%-40s" "preparing infrastructure"

View File

@ -8,7 +8,7 @@ BEGIN {
use strict;
use IO::Socket;
use Test::More tests => 51;
use Test::More tests => 52;
use LightyTest;
my $tf = LightyTest->new();
@ -253,80 +253,97 @@ ok($tf->handle_http($t) == 0, 'POST via Transfer-Encoding: chunked; chunked head
## ranges
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0-3
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206, 'HTTP-Content' => '1234' } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => '1234' } ];
ok($tf->handle_http($t) == 0, 'GET, Range 0-3');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=-3
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206, 'HTTP-Content' => '45'."\n" } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => '45'."\n" } ];
ok($tf->handle_http($t) == 0, 'GET, Range -3');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=3-
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206, 'HTTP-Content' => '45'."\n" } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => '45'."\n" } ];
ok($tf->handle_http($t) == 0, 'GET, Range 3-');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0-1,3-4
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206, 'HTTP-Content' => <<EOF
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => '12345' } ];
ok($tf->handle_http($t) == 0, 'GET, Range 0-1,3-4 (ranges merged)');
$t->{REQUEST} = ( <<EOF
GET /100.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0-1,97-98
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206, 'HTTP-Content' => <<EOF
\r
--fkj49sn38dcn3\r
Content-Range: bytes 0-1/6\r
Content-Range: bytes 0-1/100\r
Content-Type: text/plain\r
\r
12\r
--fkj49sn38dcn3\r
Content-Range: bytes 3-4/6\r
Content-Range: bytes 97-98/100\r
Content-Type: text/plain\r
\r
45\r
hi\r
--fkj49sn38dcn3--\r
EOF
} ];
ok($tf->handle_http($t) == 0, 'GET, Range 0-1,3-4');
ok($tf->handle_http($t) == 0, 'GET, Range 0-1,97-98 (ranges not merged)');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0--
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 200 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200 } ];
ok($tf->handle_http($t) == 0, 'GET, Range 0--');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=-2-3
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 200 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200 } ];
ok($tf->handle_http($t) == 0, 'GET, Range -2-3');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=-0
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 416, 'HTTP-Content' => <<EOF
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 416, 'HTTP-Content' => <<EOF
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@ -343,12 +360,13 @@ EOF
ok($tf->handle_http($t) == 0, 'GET, Range -0');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=25-
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 416, 'HTTP-Content' => <<EOF
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 416, 'HTTP-Content' => <<EOF
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@ -384,31 +402,35 @@ ok($tf->handle_http($t) == 0, 'larger headers');
$t->{REQUEST} = ( <<EOF
GET /range.pdf HTTP/1.0
GET /range.pdf HTTP/1.1
Host: 123.example.org
Range: bytes=0-
Connection: close
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 200 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200 } ];
ok($tf->handle_http($t) == 0, 'GET, Range with range-requests-disabled');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: 0
Range: bytes=0-3
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 200, 'HTTP-Content' => "12345\n" } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 200, 'HTTP-Content' => "12345\n" } ];
ok($tf->handle_http($t) == 0, 'GET, Range invalid range-unit (first)');
$t->{REQUEST} = ( <<EOF
GET /12345.txt HTTP/1.0
GET /12345.txt HTTP/1.1
Host: 123.example.org
Connection: close
Range: bytes=0-3
Range: 0
EOF
);
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.0', 'HTTP-Status' => 206 } ];
$t->{RESPONSE} = [ { 'HTTP-Protocol' => 'HTTP/1.1', 'HTTP-Status' => 206 } ];
ok($tf->handle_http($t) == 0, 'GET, Range ignore invalid range (second)');
$t->{REQUEST} = ( <<EOF