Browse Source

Our scanner routine for the URI query string

master
Dirk Engling 15 years ago
parent
commit
d6b963d80a
  1. 57
      scan_urlencoded_query.c
  2. 20
      scan_urlencoded_query.h
  3. 4
      trackerlogic.c

57
scan_urlencoded_query.c

@ -0,0 +1,57 @@
#include "scan.h"
#define BREAK_AT_QUESTIONMARK (1<<0)
#define BREAK_AT_WHITESPACE (1<<1)
#define BREAK_AT_AMPERSAND (1<<2)
#define BREAK_AT_EQUALSIGN (1<<3)
#define SCAN_PATH ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE )
#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN )
#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE )
// Idea is to do a in place replacement or guarantee at least
// strlen( string ) bytes in deststring
// watch http://www.ietf.org/rfc/rfc2396.txt
// unreserved = alphanum | mark
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
// we add '%' to the matrix to not stop at encoded chars.
static const unsigned char reserved_matrix[] = { 0xA2, 0x63, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47};
inline int is_unreserved( unsigned char c ) const {
if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7));
}
size_t scan_urlencoded_query(char **string, char *deststring, int flags) {
register const unsigned char* s=*(const unsigned char*) string;
const unsigned char *d = deststring;
register unsigned char b, c;
while ( is_unreserved( c = *s++) ) {
if (c=='%') {
if( ( c = scan_fromhex(*s++) ) < 0 ) return -1;
if( ( b = scan_fromhex(*s++) ) < 0 ) return -1;
c=(c<<4)|b;
}
*d++ = c;
}
switch( c ) {
case 0: case '\r': case '\n': case ' ':
if ( flags & BREAK_AT_WHITESPACE == 0 ) return -1;
break;
case '?':
if ( flags & BREAK_AT_QUESTIONMARK == 0 ) return -1;
break;
case '=':
if ( flags & BREAK_AT_EQUALSIGN == 0 ) return -1;
break;
case '&':
if ( flags & BREAK_AT_AMPERSAND == 0 ) return -1;
break;
default:
return -1;
}
*string = s;
return d - deststring;
}

20
scan_urlencoded_query.h

@ -0,0 +1,20 @@
#ifdef __SCAN_URLENCODED_QUERY_H__
#define __SCAN_URLENCODED_QUERY_H__
#define BREAK_AT_QUESTIONMARK (1<<0)
#define BREAK_AT_WHITESPACE (1<<1)
#define BREAK_AT_AMPERSAND (1<<2)
#define BREAK_AT_EQUALSIGN (1<<3)
#define SCAN_PATH ( BREAK_AT_QUESTIONMARK | BREAK_AT_WHITESPACE )
#define SCAN_SEARCHPATH_PARAM ( BREAK_AT_EQUALSIGN )
#define SCAN_SEARCHPATH_VALUE ( BREAK_AT_AMPERSAND | BREAK_AT_WHITESPACE )
// string pointer to source, pointer to after terminator on return
// deststring pointer to destination
// flags determines, what to parse
// returns number of valid converted characters in deststring
// or -1 for parse error
size_t scan_urlencoded_query(char **string, char *deststring, int flags);
#endif

4
trackerlogic.c

@ -162,7 +162,7 @@ void return_peers_for_torrent( ot_torrent torrent, unsigned long amount, char *r
// Compacts a torrents peer list
// * torrents older than OT_TIMEOUT are being kicked
// * is rather expansive
// * is rather expensive
// * if this fails, torrent file is invalid, should add flag
//
void heal_torrent( ot_torrent torrent ) {
@ -269,7 +269,7 @@ int init_logic( char *directory ) {
// Scan directory for filenames in the form [0-9A-F]{20}
// * I know this looks ugly, but I've seen A-F to match umlauts as well in strange locales
// * lower case for .. better being safe than sorry, this is not expansive here :)
// * lower case for .. better being safe than sorry, this is not expensive here :)
if( !glob(
"[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]"
"[0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef][0-9ABCDEFabcdef]"

Loading…
Cancel
Save