#include #include #include #include #include "log.h" #include "http_req.h" #include "http_req_parser.h" /* declare prototypes for the parser */ void *http_req_parserAlloc(void *(*mallocProc)(size_t)); void http_req_parserFree(void *p, void (*freeProc)(void*)); void http_req_parserTrace(FILE *TraceFILE, char *zTracePrompt); void http_req_parser(void *, int, buffer *, http_req_ctx_t *); typedef struct { chunkqueue *cq; chunk *c; /* current chunk in the chunkqueue */ size_t offset; /* current offset in current chunk */ chunk *lookup_c; size_t lookup_offset; int last_token_id; int is_key; int is_statusline; } http_req_tokenizer_t; http_req *http_request_init(void) { http_req *req = calloc(1, sizeof(*req)); req->uri_raw = buffer_init(); req->headers = array_init(); return req; } void http_request_reset(http_req *req) { if (!req) return; buffer_reset(req->uri_raw); array_reset(req->headers); } void http_request_free(http_req *req) { if (!req) return; buffer_free(req->uri_raw); array_free(req->headers); free(req); } static int http_req_get_next_char(http_req_tokenizer_t *t, unsigned char *c) { if (t->c->mem->used == 0) { TRACE("chunk-len: %zd", t->c->mem->used); } if (t->offset == t->c->mem->used - 1) { /* end of chunk, open next chunk */ if (!t->c->next) return -1; t->c = t->c->next; /* skip empty chunks */ while (t->c && t->c->mem->used == 0) t->c = t->c->next; if (!t->c) return -1; t->offset = 0; } *c = t->c->mem->ptr[t->offset++]; t->lookup_offset = t->offset; t->lookup_c = t->c; #if 0 fprintf(stderr, "%s.%d: get: %c (%d) at offset: %d\r\n", __FILE__, __LINE__, *c > 31 ? *c : ' ', *c, t->offset - 1); #endif return 0; } static int http_req_lookup_next_char(http_req_tokenizer_t *t, unsigned char *c) { if (t->lookup_c->mem->used == 0) { TRACE("chunk-len: %zd", t->lookup_c->mem->used); } if (t->lookup_offset == t->lookup_c->mem->used - 1) { /* end of chunk, open next chunk */ if (!t->lookup_c->next) return -1; t->lookup_c = t->lookup_c->next; /* skip empty chunks */ while (t->lookup_c && t->lookup_c->mem->used == 0) t->lookup_c = t->lookup_c->next; if (!t->lookup_c) return -1; t->lookup_offset = 0; } *c = t->lookup_c->mem->ptr[t->lookup_offset++]; #if 0 fprintf(stderr, "%s.%d: lookup: %c (%d) at offset: %d\r\n", __FILE__, __LINE__, *c > 31 ? *c : ' ', *c, t->lookup_offset - 1); #endif return 0; } typedef enum { PARSER_UNSET, PARSER_OK, PARSER_ERROR, PARSER_EOF } http_req_parser_t; static http_req_parser_t http_req_tokenizer( http_req_tokenizer_t *t, int *token_id, buffer *token ) { unsigned char c; int tid = 0; /* push the token to the parser */ while (tid == 0 && 0 == http_req_get_next_char(t, &c)) { switch (c) { case ':': tid = TK_COLON; t->is_key = 0; break; case ' ': case '\t': if (t->last_token_id == TK_CRLF) { /* WS as the start of a line */ tid = TK_TAB; t->is_key = 0; } /* ignore the rest of the WS-chars */ break; case '\r': if (0 != http_req_lookup_next_char(t, &c)) return PARSER_EOF; if (c == '\n') { tid = TK_CRLF; t->c = t->lookup_c; t->offset = t->lookup_offset; t->is_statusline = 0; t->is_key = 1; } else { ERROR("CR with out LF at pos: %zu", t->offset); return PARSER_ERROR; } break; case '\n': tid = TK_CRLF; t->is_statusline = 0; t->is_key = 1; break; default: while (c >= 32 && c != 127 && c != 255) { if (t->is_statusline) { if (c == 32) break; /* the space is a splitter in the statusline */ } else { if (t->is_key) { if (c == ':') break; /* the : is the splitter between key and value */ if (c == ' ') break; /* no spaces in keys */ } } if (0 != http_req_lookup_next_char(t, &c)) return PARSER_EOF; } if (t->c == t->lookup_c && t->offset == t->lookup_offset + 1) { ERROR("invalid char (%d) at pos: %zu", c, t->offset); return PARSER_ERROR; } tid = TK_STRING; /* the lookup points to the first invalid char */ t->lookup_offset--; /* no overlapping string */ if (t->c == t->lookup_c) { buffer_copy_string_len(token, t->c->mem->ptr + t->offset - 1, t->lookup_offset - t->offset + 1); } else { /* first chunk */ buffer_copy_string_len(token, t->c->mem->ptr + t->offset - 1, t->c->mem->used - t->offset); /* chunks in the middle */ for (t->c = t->c->next; t->c != t->lookup_c; t->c = t->c->next) { buffer_append_string_buffer(token, t->c->mem); t->offset = t->c->mem->used - 1; } /* last chunk */ buffer_append_string_len(token, t->c->mem->ptr, t->lookup_offset); } t->offset = t->lookup_offset; break; } } if (tid) { *token_id = tid; return PARSER_OK; } return PARSER_EOF; } parse_status_t http_request_parse_cq(chunkqueue *cq, http_req *req) { http_req_tokenizer_t t; void *pParser = NULL; int token_id = 0; buffer *token = NULL; http_req_ctx_t context; parse_status_t ret = PARSE_UNSET; http_req_parser_t parser_ret; t.cq = cq; t.c = cq->first; t.offset = t.c->offset; t.is_key = 0; t.is_statusline = 1; t.last_token_id = 0; context.ok = 1; context.errmsg = buffer_init(); context.req = req; context.unused_buffers = buffer_pool_init(); pParser = http_req_parserAlloc( malloc ); token = buffer_init(); array_reset(req->headers); while((PARSER_OK == (parser_ret = http_req_tokenizer(&t, &token_id, token))) && context.ok) { http_req_parser(pParser, token_id, token, &context); token = buffer_pool_get(context.unused_buffers); /* CRLF CRLF ... the header end sequence */ if (t.last_token_id == TK_CRLF && token_id == TK_CRLF) break; t.last_token_id = token_id; } // Tokenizer failed if (parser_ret == PARSER_ERROR) { ret = PARSE_ERROR; } /* oops, the parser failed */ if (context.ok == 0) { ret = PARSE_ERROR; if (!buffer_is_empty(context.errmsg)) { TRACE("parsing failed: %s", SAFE_BUF_STR(context.errmsg)); } else { chunk *c; buffer *hdr = buffer_init(); for (c = cq->first; c; c = c->next) { if (c == cq->first) { buffer_append_string_len(hdr, c->mem->ptr + t.c->offset, c->mem->used - 1 - t.c->offset); } else { buffer_append_string_buffer(hdr, c->mem); } } TRACE("parsing failed at token (%s [%d]), header: %s", SAFE_BUF_STR(token), token_id, SAFE_BUF_STR(hdr)); buffer_free(hdr); } } http_req_parser(pParser, 0, token, &context); http_req_parserFree(pParser, free); if (context.ok == 0) { /* we are missing the some tokens */ if (!buffer_is_empty(context.errmsg)) { TRACE("parsing failed: %s", SAFE_BUF_STR(context.errmsg)); } if (ret == PARSE_UNSET) { ret = buffer_is_empty(context.errmsg) ? PARSE_NEED_MORE : PARSE_ERROR; } } else if (parser_ret == PARSER_EOF) { // didn't see CRLF CRLF, no other error till now ret = PARSE_NEED_MORE; } else { chunk *c; for (c = cq->first; c != t.c; c = c->next) { c->offset = c->mem->used - 1; } c->offset = t.offset; ret = PARSE_SUCCESS; } buffer_pool_append(context.unused_buffers, token); buffer_pool_free(context.unused_buffers); buffer_free(context.errmsg); return ret; }