And that's done. This HTTP parser seems stable now.

This commit is contained in:
Alcaro 2015-01-23 00:25:49 +01:00
parent e3c6d8286b
commit 06697aa197

View File

@ -33,6 +33,10 @@ int main()
{ {
struct http* http3=http_new("http://www.wikipedia.org/"); struct http* http3=http_new("http://www.wikipedia.org/");
while (!http_poll(http3, NULL, NULL)) {} while (!http_poll(http3, NULL, NULL)) {}
size_t q;
char*w=http_data(http3,&q,false);
printf("%.*s\n",(int)q,w);
//struct http* http1=http_new("http://floating.muncher.se:22/"); //struct http* http1=http_new("http://floating.muncher.se:22/");
//struct http* http2=http_new("http://floating.muncher.se/sepulcher/"); //struct http* http2=http_new("http://floating.muncher.se/sepulcher/");
//struct http* http3=http_new("http://www.wikipedia.org/"); //struct http* http3=http_new("http://www.wikipedia.org/");
@ -81,10 +85,19 @@ int main()
struct http { struct http {
int fd; int fd;
int status; int status;
uint8_t* data;
char part;
char bodytype;
bool error;
//char padding[5];
size_t pos; size_t pos;
size_t len; size_t len;
size_t buflen;
char * data;
}; };
enum { p_header_top, p_header, p_body, p_body_chunklen, p_done, p_error };
enum { t_full, t_len, t_chunk };
static bool http_parse_url(char * url, char* * domain, int* port, char* * location) static bool http_parse_url(char * url, char* * domain, int* port, char* * location)
{ {
@ -234,6 +247,13 @@ struct http* http_new(const char * url)
state->fd=fd; state->fd=fd;
state->status=-1; state->status=-1;
state->data=NULL; state->data=NULL;
state->part=p_header_top;
state->bodytype=t_full;
state->error=false;
state->pos=0;
state->len=0;
state->buflen=512;
state->data=malloc(state->buflen);
return state; return state;
fail: fail:
@ -249,171 +269,147 @@ int http_fd(struct http* state)
bool http_poll(struct http* state, size_t* progress, size_t* total) bool http_poll(struct http* state, size_t* progress, size_t* total)
{ {
//TODO: these variables belong in struct http - they're here for now because that's easier. ssize_t newlen=0;
enum { p_header_top, p_header, p_body, p_body_chunklen, p_done, p_error };
enum { t_full, t_len, t_chunk };
char part = p_header_top;
char bodytype = t_full;
bool error=false;
size_t pos=0;
size_t len=0;
size_t buflen=512;
char * data=malloc(buflen);
//end of struct http
ssize_t newlen; if (state->error) goto fail;
again:
newlen=0;
if (part < p_body) if (state->part < p_body)
{ {
//newlen=http_recv(state->fd, &error, data+pos, buflen-pos); newlen=http_recv(state->fd, &state->error, state->data + state->pos, state->buflen - state->pos);
newlen=http_recv(state->fd, &error, data+pos, 1); //newlen=http_recv(state->fd, &state->error, state->data + state->pos, 1);
if (newlen<0) goto fail; if (newlen<0) goto fail;
if (newlen==0) goto again; if (newlen==0) return false;
if (pos+newlen >= buflen-64) if (state->pos + newlen >= state->buflen - 64)
{ {
buflen*=2; state->buflen *= 2;
data=realloc(data, buflen); state->data = realloc(state->data, state->buflen);
} }
pos+=newlen; state->pos += newlen;
while (part < p_body) while (state->part < p_body)
{ {
char * dataend=data+pos; char * dataend = state->data + state->pos;
//printf("%li\n",pos); char * lineend = memchr(state->data, '\n', state->pos);
char * lineend=memchr(data, '\n', pos);
//printf("%i '%s'\n",pos,data);
if (!lineend) break; if (!lineend) break;
*lineend='\0'; *lineend='\0';
if (lineend!=data && lineend[-1]=='\r') lineend[-1]='\0'; if (lineend != state->data && lineend[-1]=='\r') lineend[-1]='\0';
//puts(data); if (state->part == p_header_top)
if (part==p_header_top)
{ {
if (strncmp(data, "HTTP/1.", strlen("HTTP/1."))!=0) goto fail; if (strncmp(state->data, "HTTP/1.", strlen("HTTP/1."))!=0) goto fail;
state->status=strtoul(data+strlen("HTTP/1.1 "), NULL, 10); state->status=strtoul(state->data + strlen("HTTP/1.1 "), NULL, 10);
part=p_header; state->part = p_header;
} }
else else
{ {
if (!strncmp(data, "Content-Length: ", strlen("Content-Length: "))) if (!strncmp(state->data, "Content-Length: ", strlen("Content-Length: ")))
{ {
bodytype=t_len; state->bodytype = t_len;
len=strtol(data+strlen("Content-Length: "), NULL, 10); state->len = strtol(state->data + strlen("Content-Length: "), NULL, 10);
} }
if (!strcmp(data, "Transfer-Encoding: chunked")) if (!strcmp(state->data, "Transfer-Encoding: chunked"))
{ {
bodytype=t_chunk; state->bodytype=t_chunk;
} }
//TODO: save headers somewhere //TODO: save headers somewhere
if (*data=='\0') if (state->data[0]=='\0')
{ {
part=p_body; state->part = p_body;
if (bodytype==t_chunk) part=p_body_chunklen; if (state->bodytype == t_chunk) state->part = p_body_chunklen;
} }
} }
memmove(data, lineend+1, dataend-(lineend+1)); memmove(state->data, lineend+1, dataend-(lineend+1));
pos=(dataend-(lineend+1)); state->pos = (dataend-(lineend+1));
//printf("[%s] %li\n",data,pos);
} }
if (part>=p_body) if (state->part >= p_body)
{ {
newlen=pos; newlen = state->pos;
pos=0; state->pos = 0;
} }
} }
if (part >= p_body && part < p_done) if (state->part >= p_body && state->part < p_done)
{ {
//printf("%li/%li %.*s\n",pos,len,pos,data);
if (!newlen) if (!newlen)
{ {
//newlen=http_recv(state->fd, &error, data+pos, buflen-pos); newlen=http_recv(state->fd, &state->error, state->data + state->pos, state->buflen - state->pos);
newlen=http_recv(state->fd, &error, data+pos, 1); //newlen=http_recv(state->fd, &state->error, state->data + state->pos, 1);
if (newlen<0) if (newlen<0)
{ {
if (bodytype==t_full) part=p_done; if (state->bodytype==t_full) state->part=p_done;
else goto fail; else goto fail;
newlen=0; newlen=0;
} }
if (newlen==0) goto again; if (newlen==0) return false;
//printf("%lu+%lu >= %lu-64\n",pos,newlen,buflen); if (state->pos + newlen >= state->buflen - 64)
if (pos+newlen >= buflen-64)
{ {
buflen*=2; state->buflen *= 2;
data=realloc(data, buflen); state->data = realloc(state->data, state->buflen);
} }
} }
parse_again: parse_again:
if (bodytype==t_chunk) if (state->bodytype == t_chunk)
{ {
if (part==p_body_chunklen) if (state->part == p_body_chunklen)
{ {
pos+=newlen; state->pos += newlen;
if (pos-len >= 2) if (state->pos - state->len >= 2)
{ {
//len=start of chunk including \r\n //len=start of chunk including \r\n
//pos=end of data //pos=end of data
char * fullend=data+pos; char * fullend = state->data + state->pos;
char * end=memchr(data+len+2, '\n', pos-len-2); char * end=memchr(state->data + state->len + 2, '\n', state->pos - state->len - 2);
if (end) if (end)
{ {
size_t chunklen=strtoul(data+len, NULL, 16); size_t chunklen = strtoul(state->data+state->len, NULL, 16);
pos=len; state->pos = state->len;
end++; end++;
memmove(data+len, end, fullend-end); memmove(state->data+state->len, end, fullend-end);
len=chunklen; state->len = chunklen;
newlen=(fullend-end); newlen = (fullend-end);
//len=num bytes //len=num bytes
//newlen=unparsed bytes after \n //newlen=unparsed bytes after \n
//pos=start of chunk including \r\n //pos=start of chunk including \r\n
part=p_body; state->part = p_body;
if (len==0) if (state->len == 0)
{ {
part=p_done; state->part = p_done;
len=pos; state->len = state->pos;
} }
goto parse_again; goto parse_again;
} }
} }
} }
else if (part==p_body) else if (state->part == p_body)
{ {
if (newlen >= len) if (newlen >= state->len)
{ {
pos+=len; state->pos += state->len;
newlen-=len; newlen -= state->len;
len=pos; state->len = state->pos;
part=p_body_chunklen; state->part = p_body_chunklen;
goto parse_again; goto parse_again;
} }
else else
{ {
pos+=newlen; state->pos += newlen;
len-=newlen; state->len -= newlen;
} }
} }
} }
else else
{ {
pos+=newlen; state->pos += newlen;
if (pos==len) part=p_done; if (state->pos == state->len) state->part=p_done;
if (pos>len) goto fail; if (state->pos > state->len) goto fail;
} }
} }
return (state->part==p_done);
if (part==p_done)
{
data[len]='\0';
puts(data);
return true;
}
else goto again;
fail: fail:
puts("ERR"); state->error = true;
part=p_error; state->part = p_error;
state->status = -1;
return true; return true;
} }
@ -424,6 +420,11 @@ int http_status(struct http* state)
uint8_t* http_data(struct http* state, size_t* len, bool accept_error) uint8_t* http_data(struct http* state, size_t* len, bool accept_error)
{ {
if (!accept_error && state->error)
{
if (len) *len=0;
return NULL;
}
if (len) *len=state->len; if (len) *len=state->len;
return state->data; return state->data;
} }