An open and free bittorrent tracker https://erdgeist.org/gitweb/opentracker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

143 lines
4.7 KiB

13 years ago
15 years ago
13 years ago
15 years ago
15 years ago
15 years ago
  1. /* This software was written by Dirk Engling <erdgeist@erdgeist.org>
  2. It is considered beerware. Prost. Skol. Cheers or whatever.
  3. $id$ */
  4. /* Opentracker */
  5. #include "scan_urlencoded_query.h"
  6. /* Libwofat */
  7. #include "scan.h"
  8. /* System */
  9. #include <string.h>
  10. /* Idea is to do a in place replacement or guarantee at least
  11. strlen( string ) bytes in deststring
  12. watch http://www.ietf.org/rfc/rfc2396.txt
  13. unreserved = alphanum | mark
  14. mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  15. we add '%' to the matrix to not stop at encoded chars.
  16. After losing too many requests to being too strict, add the following characters to reserved matrix
  17. relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
  18. */
  19. /* This matrix holds for each ascii character the information,
  20. whether it is a non-terminating character for on of the three
  21. scan states we are in, that is 'path', 'param' and 'value' from
  22. /path?param=value&param=value, it is encoded in bit 0, 1 and 2
  23. respectively
  24. The top bit of lower nibble indicates, whether this character is
  25. a hard terminator, ie. \0, \n or \s, where the whole scanning
  26. process should terminate
  27. */
  28. static const unsigned char is_unreserved[256] = {
  29. 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  30. 8,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
  31. 4,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,7,
  32. 8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,7,0,
  33. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  34. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  35. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  36. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  37. };
  38. /* Do a fast nibble to hex representation conversion */
  39. static unsigned char fromhex(unsigned char x) {
  40. x-='0'; if( x<=9) return x;
  41. x&=~0x20; x-='A'-'0';
  42. if( x<6 ) return x+10;
  43. return 0xff;
  44. }
  45. /* Skip the value of a param=value pair */
  46. void scan_urlencoded_skipvalue( char **string ) {
  47. const unsigned char* s=*(const unsigned char**) string;
  48. unsigned char f;
  49. /* Since we are asked to skip the 'value', we assume to stop at
  50. terminators for a 'value' string position */
  51. while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
  52. /* If we stopped at a hard terminator like \0 or \n, make the
  53. next scan_urlencoded_query encounter it again */
  54. if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
  55. *string = (char*)s;
  56. }
  57. int scan_find_keywords( const ot_keywords * keywords, char **string, SCAN_SEARCHPATH_FLAG flags) {
  58. char *deststring = *string;
  59. ssize_t match_length = scan_urlencoded_query(string, deststring, flags );
  60. if( match_length < 0 ) return match_length;
  61. if( match_length == 0 ) return -3;
  62. while( keywords->key ) {
  63. if( !strncmp( keywords->key, deststring, match_length ) && !keywords->key[match_length] )
  64. return keywords->value;
  65. keywords++;
  66. }
  67. return -3;
  68. }
  69. ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags) {
  70. const unsigned char* s=*(const unsigned char**) string;
  71. unsigned char *d = (unsigned char*)deststring;
  72. unsigned char b, c;
  73. /* This is the main decoding loop.
  74. 'flag' determines, which characters are non-terminating in current context
  75. (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
  76. */
  77. while( is_unreserved[ c = *s++ ] & flags ) {
  78. /* When encountering an url escaped character, try to decode */
  79. if( c=='%') {
  80. if( ( b = fromhex(*s++) ) == 0xff ) return -1;
  81. if( ( c = fromhex(*s++) ) == 0xff ) return -1;
  82. c|=(b<<4);
  83. }
  84. /* Write (possibly decoded) character to output */
  85. *d++ = c;
  86. }
  87. switch( c ) {
  88. case 0: case '\r': case '\n': case ' ':
  89. /* If we started scanning on a hard terminator, indicate we've finished */
  90. if( d == (unsigned char*)deststring ) return -2;
  91. /* Else make the next call to scan_urlencoded_param encounter it again */
  92. --s;
  93. break;
  94. case '?':
  95. if( flags != SCAN_PATH ) return -1;
  96. break;
  97. case '=':
  98. if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
  99. break;
  100. case '&':
  101. if( flags == SCAN_PATH ) return -1;
  102. if( flags == SCAN_SEARCHPATH_PARAM ) --s;
  103. break;
  104. default:
  105. return -1;
  106. }
  107. *string = (char *)s;
  108. return d - (unsigned char*)deststring;
  109. }
  110. ssize_t scan_fixed_int( char *data, size_t len, int *tmp ) {
  111. int minus = 0;
  112. *tmp = 0;
  113. if( *data == '-' ) --len, ++data, ++minus;
  114. while( (len > 0) && (*data >= '0') && (*data <= '9') ) { --len; *tmp = 10**tmp + *data++-'0'; }
  115. if( minus ) *tmp = -*tmp;
  116. return len;
  117. }
  118. const char *g_version_scan_urlencoded_query_c = "$Source: /home/cvsroot/opentracker/scan_urlencoded_query.c,v $: $Revision: 1.34 $\n";