Browse Source

add asciiz parsing

master
Felix von Leitner 1 year ago
parent
commit
04f6a50952
  1. 95
      buffer/bs_peek.c
  2. 66
      buffer/prs_asciiz.c
  3. 58
      buffer/prs_asciiz_fixedlen.c
  4. 117
      parse.h

95
buffer/bs_peek.c

@ -0,0 +1,95 @@
#include "parse.h"
unsigned char bs_peek(struct bytestream* bs) {
unsigned char r;
char c;
if (bs->cur>=bs->max) { // EOF or already error state?
bs->max=0; // signal error
bs->cur=1;
return 0; // return 0
}
switch (bs->type) {
case MEMBUF:
r=bs->u.base[bs->cur];
break;
case IOBUF:
{
int ret=buffer_peekc(bs->u.b, &c);
if (ret==1) {
r=c;
} else {
bs->max=0;
bs->cur=1;
return 0;
}
}
break;
case BSTREAM:
r=bs_peek(bs->u.bs);
break;
default:
r=0; // cannot happen
}
return r;
}
#ifdef UNITTEST
#include <assert.h>
int main() {
struct bytestream bs = BS_FROM_MEMBUF("fx", 1);
/* first test: membuf.
* See if we get all the bytes we put in and then error is signaled */
assert(bs_peek(&bs) == 'f');
assert(bs_peek(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_get(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_peek(&bs) == 0);
assert(bs_err(&bs));
/* second test: iobuf with no limit. Otherwise the same. */
struct buffer b;
buffer_init_staticcontents(&b, "fx", 1);
bs_init_iobuf(&bs, &b);
assert(bs_peek(&bs) == 'f');
assert(bs_peek(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_get(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_peek(&bs) == 0);
assert(bs_err(&bs));
/* third test: iobuf with limit. Otherwise the same. */
buffer_init_staticcontents(&b, "fx", 2);
bs_init_iobuf_size(&bs, &b, 1);
assert(bs_peek(&bs) == 'f');
assert(bs_peek(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_get(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_peek(&bs) == 0);
assert(bs_err(&bs));
/* fourth test: iobuf with EOF */
buffer_init_staticcontents(&b, "fx", 1);
bs_init_iobuf(&bs, &b); // bytestream has no limit but will hit EOF in backing buffer
assert(bs_peek(&bs) == 'f');
assert(bs_peek(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_get(&bs) == 'f');
assert(!bs_err(&bs));
assert(bs_peek(&bs) == 0);
assert(bs_err(&bs));
return 0;
}
#endif

66
buffer/prs_asciiz.c

@ -0,0 +1,66 @@
#include "parse.h"
static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
/* Calling this function with len==0 is an error. */
/* destsize will be clamped to the maximum number representable in ssize_t */
ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t len) {
size_t i;
/* The maximum value of ssize_t is half that of size_t.
* So we arbitrarily decide to limit len to it here. */
if (len>max_ssize_t) len=max_ssize_t;
if (len==0) {
bs->cur = 1; // mark bytestream state as erroneous
bs->max = 0;
return -1;
}
for (i=0; i+1<len; ++i)
if ((dest[i] = bs_get(bs)) == 0) {
/* we might get here either because there actually was a 0 byte in
* the stream, or because we hit EOF. So check if we hit EOF here
* and return -1 then. */
return (bs->cur<=bs->max) ? (ssize_t)i : -1;
}
/* if we get here, we read len-1 bytes and there was no 0 byte. */
if ((dest[i] = bs_peek(bs))) { // the loop went till i+1<len and we checked that len!=0
// the next byte was not 0, so signal error
bs->cur = 1;
bs->max = 0;
// but still write 0 terminator to dest
dest[i] = 0;
return -1;
} else {
bs_get(bs); // the next byte was 0, so consume it
return i;
}
}
#ifdef UNITTEST
#include <assert.h>
#undef UNITTEST
#include "buffer/bs_init_membuf.c"
#include "buffer/bs_get.c"
#include "buffer/buffer_peekc.c"
#include "buffer/buffer_getc.c"
#include "buffer/bs_peek.c"
#include "buffer/buffer_feed.c"
#include "buffer/buffer_stubborn2.c"
int main() {
struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
char buf[100];
assert(prs_asciiz(&bs, buf, sizeof buf) == 6); // return value should be strlen("fnord\n")
assert(!memcmp(buf,"fnord\n",7)); // returned string should be "fnord\n" with 0 terminator
assert(bs_get(&bs) == 'x'); // should have consumed the 0 terminator from bytestream
bs_init_membuf(&bs, "fnord\n\0x", 8);
assert(prs_asciiz(&bs, buf, 5) == -1); // no 0 terminator in first 5 bytes, expect error
assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf
assert(bs_err(&bs)); // bytestream should be in error state now
}
#endif

58
buffer/prs_asciiz_fixedlen.c

@ -0,0 +1,58 @@
#include "parse.h"
static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
/* Some protocols have a fixed field length for a string,
* If the string is shorter than the field, the rest is filled with 0
* bytes. But it is not an error if there are no 0 bytes.
* This function is for those cases (the filename field in the tar file
* header is an example of this).
* For a field of length 8, you need to pass destsize as 9 so we can add
* a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
* The return value is strlen(dest). */
ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t len) {
size_t i;
ssize_t r=0;
/* The maximum value of ssize_t is half that of size_t.
* So we arbitrarily decide to limit len to it here. */
if (len>max_ssize_t) len=max_ssize_t;
if (len==0) {
bs->cur = 1; // mark bytestream state as erroneous
bs->max = 0;
return -1;
}
for (i=0; i+1<len; ++i)
if ((dest[i] = bs_get(bs)) == 0 && r==0)
r=i; // note the position of the first 0 byte
dest[i] = 0; // add 0 terminator
if (dest[r]) r=i;
return bs_err(bs) ? -1 : r;
}
#ifdef UNITTEST
#include <assert.h>
#undef UNITTEST
#include "buffer/bs_init_membuf.c"
#include "buffer/bs_get.c"
#include "buffer/buffer_peekc.c"
#include "buffer/buffer_getc.c"
#include "buffer/bs_peek.c"
#include "buffer/buffer_feed.c"
#include "buffer/buffer_stubborn2.c"
int main() {
struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
char buf[100];
assert(prs_asciiz_fixedlen(&bs, buf, 8) == 6); // return value should be strlen("fnord\n")
assert(!memcmp(buf,"fnord\n\0\0",8)); // returned string should be "fnord\n" and the rest filled with 0 bytes
assert(!bs_err(&bs));
bs_init_membuf(&bs, "fnord\n\0x", 8);
assert(prs_asciiz_fixedlen(&bs, buf, 5) == 4); // no 0 terminator in first 4 bytes
assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf
assert(bs_get(&bs) == 'd'); // 0 terminator in buf was artificial, 'd' was not consumed
assert(!bs_err(&bs)); // bytestream should be ok
}
#endif

117
parse.h

@ -0,0 +1,117 @@
/* this header file comes from libowfat, http://www.fefe.de/libowfat/ */
#ifndef PARSE_H
#define PARSE_H
/* for size_t: */
#include <stddef.h>
/* for uint32_t: */
#include <stdint.h>
#include <libowfat/buffer.h>
#include <libowfat/uint16.h>
#include <libowfat/uint32.h>
#include <libowfat/uint64.h>
#ifdef __cplusplus
extern "C" {
#endif
/* This file declares an API for decoding binary messages.
Goals:
- You say in advance if there is a message size limit
- The object can be bound to a memory buffer of an I/O buffer
- After having set up the object, you get two APIs:
1. read bytes
2. was there an error?
If you parse a memory buffer manually, you need to do range
checking for every byte. If you parse from an I/O buffer
manually, you need to check for end of file or I/O error after
every byte.
This API will return 0 bytes and set the error flag when you read
past the limit. That way you don't have to check after every
byte, but only once at the end of each sub-message.
- Many binary protocols have a message length and then sub-packets.
For example, an IPv4 packet has a header with a length and the IP
options. With this abstraction here, you would have one
bytestream for the packet (with size limit set to how many bytes
came in from the network) and then you would make a new
bytestream for the IP header and another one for the option
headers. Each would only let you read bytes from that subregion,
and would do bounds checking at instantiation time to make sure
it physically fits into to upper layer space.
*/
struct bytestream {
enum {
MEMBUF,
IOBUF,
BSTREAM
} type;
size_t cur, max;
union {
const unsigned char* base;
struct buffer* b;
struct bytestream* bs;
} u;
};
void bs_init_membuf(struct bytestream* bs,const unsigned char* membuf,size_t len);
void bs_init_iobuf(struct bytestream* bs,struct buffer* b);
void bs_init_iobuf_size(struct bytestream* bs,struct buffer* b,size_t maxlen);
void bs_init_bstream_size(struct bytestream* bs,struct bytestream* parent,size_t maxlen);
#define BS_FROM_MEMBUF(buf,len) { .type=MEMBUF, .max=(len), .u.base=(const unsigned char*)(buf) }
#define BS_FROM_BUFFER(buffer) { .type=IOBUF, .max=(size_t)-1, .u.b=(buffer) }
#define BS_FROM_BUFFER_SIZE(buffer,len) { .type=IOBUF, .max=(len), u.b=(buffer) }
/* return next byte from stream or 0 if EOF or read error. */
unsigned char bs_get(struct bytestream* bs);
/* like bs_get but do not advance position in stream. */
unsigned char bs_peek(struct bytestream* bs);
/* was there a read error or did we attempt to read more than maxlen bytes? */
int bs_err(struct bytestream* bs);
/* Can we read this much more bytes from the bytestream? */
int bs_capacitycheck(struct bytestream* bs,size_t capacity);
uint16_t prs_u16(struct bytestream* bs);
uint16_t prs_u16_big(struct bytestream* bs);
uint32_t prs_u32(struct bytestream* bs);
uint32_t prs_u32_big(struct bytestream* bs);
uint64_t prs_u64(struct bytestream* bs);
uint64_t prs_u64_big(struct bytestream* bs);
/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
/* Calling this function with destsize==0 is an error. */
/* destsize will be clamped to the maximum number representable in ssize_t */
ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t destsize);
/* Some protocols have a fixed field length for a string,
* If the string is shorter than the field, the rest is filled with 0
* bytes. But it is not an error if there are no 0 bytes.
* This function is for those cases (the filename field in the tar file
* header is an example of this).
* For a field of length 8, you need to pass destsize as 9 so we can add
* a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
* The return value is strlen(dest). */
ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t destsize);
#ifdef __cplusplus
}
#endif
#endif
Loading…
Cancel
Save