An open and free bittorrent tracker https://erdgeist.org/gitweb/opentracker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

144 lines
4.7 KiB

  1. /* This software was written by Dirk Engling <erdgeist@erdgeist.org>
  2. It is considered beerware. Prost. Skol. Cheers or whatever.
  3. $id$ */
  4. /* Opentracker */
  5. #include "scan_urlencoded_query.h"
  6. /* Libwofat */
  7. #include "scan.h"
  8. /* System */
  9. #include <string.h>
  10. /* Idea is to do a in place replacement or guarantee at least
  11. strlen( string ) bytes in deststring
  12. watch http://www.ietf.org/rfc/rfc2396.txt
  13. unreserved = alphanum | mark
  14. mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  15. we add '%' to the matrix to not stop at encoded chars.
  16. After losing too many requests to being too strict, add the following characters to reserved matrix
  17. relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
  18. */
  19. /* This matrix holds for each ascii character the information,
  20. whether it is a non-terminating character for on of the three
  21. scan states we are in, that is 'path', 'param' and 'value' from
  22. /path?param=value&param=value, it is encoded in bit 0, 1 and 2
  23. respectively
  24. The top bit of lower nibble indicates, whether this character is
  25. a hard terminator, ie. \0, \n or \s, where the whole scanning
  26. process should terminate
  27. */
  28. static const unsigned char is_unreserved[256] = {
  29. 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  30. 8,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
  31. 4,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,7,
  32. 8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,7,0,
  33. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  34. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  35. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  36. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  37. };
  38. /* Do a fast nibble to hex representation conversion */
  39. static unsigned char fromhex(unsigned char x) {
  40. x-='0'; if( x<=9) return x;
  41. x&=~0x20; x-='A'-'0';
  42. if( x<6 ) return x+10;
  43. return 0xff;
  44. }
  45. /* Skip the value of a param=value pair */
  46. void scan_urlencoded_skipvalue( char **string ) {
  47. const unsigned char* s=*(const unsigned char**) string;
  48. unsigned char f;
  49. /* Since we are asked to skip the 'value', we assume to stop at
  50. terminators for a 'value' string position */
  51. while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
  52. /* If we stopped at a hard terminator like \0 or \n, make the
  53. next scan_urlencoded_query encounter it again */
  54. if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
  55. *string = (char*)s;
  56. }
  57. int scan_find_keywords( const ot_keywords * keywords, char **string, SCAN_SEARCHPATH_FLAG flags) {
  58. char *deststring = *string;
  59. ssize_t match_length = scan_urlencoded_query(string, deststring, flags );
  60. if( match_length < 0 ) return match_length;
  61. if( match_length == 0 ) return -3;
  62. while( keywords->key ) {
  63. if( !strncmp( keywords->key, deststring, match_length ) && !keywords->key[match_length] )
  64. return keywords->value;
  65. keywords++;
  66. }
  67. return -3;
  68. }
  69. ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags) {
  70. const unsigned char* s=*(const unsigned char**) string;
  71. unsigned char *d = (unsigned char*)deststring;
  72. unsigned char b, c;
  73. /* This is the main decoding loop.
  74. 'flag' determines, which characters are non-terminating in current context
  75. (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
  76. */
  77. while( is_unreserved[ c = *s++ ] & flags ) {
  78. /* When encountering an url escaped character, try to decode */
  79. if( c=='%') {
  80. if( ( b = fromhex(*s++) ) == 0xff ) return -1;
  81. if( ( c = fromhex(*s++) ) == 0xff ) return -1;
  82. c|=(b<<4);
  83. }
  84. /* Write (possibly decoded) character to output */
  85. *d++ = c;
  86. }
  87. switch( c ) {
  88. case 0: case '\r': case '\n': case ' ':
  89. /* If we started scanning on a hard terminator, indicate we've finished */
  90. if( d == (unsigned char*)deststring ) return -2;
  91. /* Else make the next call to scan_urlencoded_param encounter it again */
  92. --s;
  93. break;
  94. case '?':
  95. if( flags != SCAN_PATH ) return -1;
  96. break;
  97. case '=':
  98. if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
  99. break;
  100. case '&':
  101. if( flags == SCAN_PATH ) return -1;
  102. if( flags == SCAN_SEARCHPATH_PARAM ) --s;
  103. break;
  104. default:
  105. return -1;
  106. }
  107. *string = (char *)s;
  108. return d - (unsigned char*)deststring;
  109. }
  110. ssize_t scan_fixed_int( char *data, size_t len, int *tmp ) {
  111. int minus = 0;
  112. *tmp = 0;
  113. if( *data == '-' ) --len, ++data, ++minus;
  114. while( (len > 0) && (*data >= '0') && (*data <= '9') ) { --len; *tmp = 10**tmp + *data++-'0'; }
  115. if( minus ) *tmp = -*tmp;
  116. return len;
  117. }
  118. const char *g_version_scan_urlencoded_query_c = "$Source: /home/cvsroot/opentracker/scan_urlencoded_query.c,v $: $Revision: 1.35 $\n";