Overhauled header parsing

This commit is contained in:
Neale Pickett 2012-02-17 17:50:05 -07:00
parent 23123e1cac
commit f8c2f7604b
2 changed files with 333 additions and 264 deletions

260
eris.c
View File

@ -34,11 +34,12 @@
#endif #endif
#define DUMP() DUMPf("") #define DUMP() DUMPf("")
#define DUMP_d(v) DUMPf("%s = %d", #v, v) #define DUMP_d(v) DUMPf("%s = %d", #v, v)
#define DUMP_u(v) DUMPf("%s = %u", #v, v)
#define DUMP_x(v) DUMPf("%s = 0x%x", #v, v) #define DUMP_x(v) DUMPf("%s = 0x%x", #v, v)
#define DUMP_s(v) DUMPf("%s = %s", #v, v) #define DUMP_s(v) DUMPf("%s = %s", #v, v)
#define DUMP_c(v) DUMPf("%s = %c", #v, v) #define DUMP_c(v) DUMPf("%s = %c", #v, v)
#define DUMP_p(v) DUMPf("%s = %p", #v, v) #define DUMP_p(v) DUMPf("%s = %p", #v, v)
#define DUMP_buf(v, l) DUMPf("%s = %.*s", #v, l, v) #define DUMP_buf(v, l) DUMPf("%s = %.*s", #v, (int)(l), v)
/* /*
* the following is the time in seconds that fnord should wait for a valid * the following is the time in seconds that fnord should wait for a valid
@ -158,10 +159,10 @@ badrequest(long code, const char *httpcomment, const char *message)
retcode = code; retcode = code;
dolog(0); dolog(0);
printf("HTTP/1.0 %ld %s\r\nConnection: close\r\n", code, httpcomment); printf("HTTP/1.0 %ld %s\r\nConnection: close\r\n", code, httpcomment);
if (message && message[0]) { if (message) {
printf("Content-Length: %lu\r\nContent-Type: text/html\r\n\r\n", printf("Content-Length: %lu\r\nContent-Type: text/html\r\n\r\n",
(unsigned long) strlen(message)); (unsigned long) (strlen(message) * 2) + 15);
fputs(message, stdout); printf("<title>%s</title>%s", message, message);
} else { } else {
fputs("\r\n", stdout); fputs("\r\n", stdout);
} }
@ -216,44 +217,61 @@ elen(register const char *const *e)
return i; return i;
} }
static ssize_t /*
read_header(int fd, char *buf, size_t buflen, size_t *headerlen) * Read header block. Try to read from stdin until \r?\n\r?\n is
* encountered. Read no more than *buflen bytes. Convert bare \n to
* \r\n. Preserve state across calls, provided buf is the same. Returns:
* 1 found 0 not found, call again -1 EOF or other error
*/
static int
read_header(FILE * f, char *buf, size_t * buflen)
{ {
size_t len = 0; static char *lastbuf = NULL;
int found = 0; static int found = 0;
size_t p = 0; static int bare = 1; /* LF here would be bare */
int bufsize = *buflen;
while (found < 2) { if (lastbuf != buf) {
int tmp; lastbuf = buf;
bare = 1;
found = 0;
}
*buflen = 0;
tmp = read(fd, buf + len, buflen - len); while (*buflen + bare < bufsize) {
if (tmp < 0) { int c = getchar();
switch (c) {
case EOF:
if (errno == EWOULDBLOCK) {
return 0;
} else {
return -1; return -1;
} }
if (tmp == 0) {
break;
}
len += tmp;
for (; (found < 2) && (p < len); p += 1) {
switch (buf[p]) {
case '\n':
found += 1;
break; break;
case '\r': case '\r':
bare = 0;
break;
case '\n':
if (bare) {
buf[(*buflen)++] = '\r';
bare = 1;
}
found += 1;
break; break;
default: default:
found = 0; found = 0;
bare = 1;
break; break;
} }
buf[(*buflen)++] = c;
if (found == 2) {
return 1;
} }
} }
if (headerlen) { return 0;
*headerlen = p;
}
return len;
} }
char * char *
@ -407,8 +425,9 @@ cgi_child(int sig)
signal(SIGCHLD, cgi_child); signal(SIGCHLD, cgi_child);
} }
/* Convert bare \n to \r\n in header. Return 0 if /*
* header is over. */ * Convert bare \n to \r\n in header. Return 0 if header is over.
*/
static int static int
cgi_send_correct_http(const char *s, unsigned int sl) cgi_send_correct_http(const char *s, unsigned int sl)
{ {
@ -489,12 +508,16 @@ start_cgi(int nph, const char *pathinfo, const char *const *envp)
size_t len; size_t len;
if (startup) { if (startup) {
/* XXX: could block :< */ /*
len = read_header(fd[0], ibuf, sizeof ibuf, NULL); * XXX: could block :<
*/
// len = read_header(fd[0], ibuf, sizeof ibuf,
// NULL);
len = 0;
} else { } else {
len = read(fd[0], ibuf, sizeof ibuf); len = read(fd[0], ibuf, sizeof ibuf);
} }
DUMP_d(len); DUMP_u((unsigned int) len);
if (0 == len) { if (0 == len) {
break; break;
@ -509,7 +532,7 @@ start_cgi(int nph, const char *pathinfo, const char *const *envp)
if (startup) { if (startup) {
if (nph) { /* NPH-CGI */ if (nph) { /* NPH-CGI */
startup = 0; startup = 0;
printf("%.*s", len, ibuf); printf("%.*s", (int) len, ibuf);
/* /*
* skip HTTP/x.x * skip HTTP/x.x
*/ */
@ -741,6 +764,9 @@ matchcommalist(const char *needle, const char *haystack)
* return nonzero if match was found * return nonzero if match was found
*/ */
int len = strlen(needle); int len = strlen(needle);
DUMP_s(needle);
DUMP_s(haystack);
if (strncmp(needle, haystack, len)) if (strncmp(needle, haystack, len))
return 0; return 0;
switch (haystack[len]) { switch (haystack[len]) {
@ -758,6 +784,9 @@ static int
findincommalist(const char *needle, const char *haystack) findincommalist(const char *needle, const char *haystack)
{ {
const char *accept; const char *accept;
DUMP_s(needle);
DUMP_s(haystack);
for (accept = haystack; accept;) { for (accept = haystack; accept;) {
/* /*
* format: foo/bar, * format: foo/bar,
@ -772,6 +801,7 @@ findincommalist(const char *needle, const char *haystack)
++tmp; ++tmp;
} }
final = (*tmp == 0 || *tmp == ';'); final = (*tmp == 0 || *tmp == ';');
DUMP_s(accept);
if (matchcommalist("*/*", accept)) if (matchcommalist("*/*", accept))
break; break;
if (matchcommalist(haystack, accept)) if (matchcommalist(haystack, accept))
@ -971,7 +1001,7 @@ static struct stat st;
* try to return a file * try to return a file
*/ */
static int static int
doit(char *buf, int buflen, char *url, int explicit) doit(char *headerbuf, size_t headerlen, char *url, int explicit)
{ {
int fd = -1; int fd = -1;
char *accept; char *accept;
@ -979,23 +1009,32 @@ doit(char *buf, int buflen, char *url, int explicit)
++url; ++url;
getmimetype(url, explicit); getmimetype(url, explicit);
{ {
char *b = buf; char *b = headerbuf;
int l = buflen; int l = headerlen;
for (;;) { for (;;) {
char *h = header(b, l, "Accept"); char *h = header(b, l, "Accept");
DUMP_p(h);
DUMP_s(h);
DUMP_s(mimetype);
if (!h) if (!h)
goto ok; goto ok;
if (findincommalist(mimetype, h)) if (findincommalist(mimetype, h)) {
DUMP();
goto ok; goto ok;
}
l -= (h - b) + 1; l -= (h - b) + 1;
b = h + 1; b = h + 1;
} }
DUMP();
retcode = 406; retcode = 406;
goto bad; goto bad;
} }
ok: ok:
DUMP();
if (encoding) { /* see if client accepts the encoding */ if (encoding) { /* see if client accepts the encoding */
char *tmp = header(buf, buflen, "Accept-Encoding"); char *tmp =
header(headerbuf, headerlen, "Accept-Encoding");
if (!tmp || !strstr(tmp, "gzip")) { if (!tmp || !strstr(tmp, "gzip")) {
retcode = 406; retcode = 406;
goto bad; goto bad;
@ -1017,7 +1056,7 @@ doit(char *buf, int buflen, char *url, int explicit)
*/ */
{ {
char *field = char *field =
header(buf, buflen, "If-Modified-Since"); header(headerbuf, headerlen, "If-Modified-Since");
if (field) { if (field) {
time_t ims; time_t ims;
@ -1031,7 +1070,7 @@ doit(char *buf, int buflen, char *url, int explicit)
} }
rangestart = 0; rangestart = 0;
rangeend = st.st_size; rangeend = st.st_size;
if ((accept = header(buf, buflen, "Range"))) { if ((accept = header(headerbuf, headerlen, "Range"))) {
/* /*
* format: "bytes=17-23", "bytes=23-" * format: "bytes=17-23", "bytes=23-"
*/ */
@ -1460,15 +1499,14 @@ serve_static_data(int fd)
int int
main(int argc, char *argv[], const char *const *envp) main(int argc, char *argv[], const char *const *envp)
{ {
char buf[MAXHEADERLEN]; char headerbuf[MAXHEADERLEN];
char *nurl, char *nurl,
*origurl; *origurl;
int docgi = 0; int docgi = 0;
int dirlist = 0; int dirlist = 0;
int redirect = 0; int redirect = 0;
int portappend = 0; int portappend = 0;
size_t len; size_t headerlen;
ssize_t in;
{ {
int opt; int opt;
@ -1488,7 +1526,8 @@ main(int argc, char *argv[], const char *const *envp)
portappend = 1; portappend = 1;
break; break;
default: default:
fprintf(stderr, "Usage: %s [-c] [-d] [-r] [-p]\n", argv[0]); fprintf(stderr, "Usage: %s [-c] [-d] [-r] [-p]\n",
argv[0]);
return 69; return 69;
} }
} }
@ -1501,40 +1540,56 @@ main(int argc, char *argv[], const char *const *envp)
handlenext: handlenext:
encoding = 0; encoding = 0;
alarm(READTIMEOUT); alarm(READTIMEOUT);
in = read_header(0, buf, sizeof buf, &len); headerlen = sizeof headerbuf;
if (len < 10) switch (read_header(stdin, headerbuf, &headerlen)) {
badrequest(400, "Bad Request", case -1:
"<title>Bad Request</title>That does not look like HTTP to me..."); return 0;
buf[len] = 0; break;
case 0:
badrequest(400, "Bad Request", "Header too long");
break;
}
if (!strncasecmp(buf, "GET /", 5)) { alarm(0);
if (headerlen < 10)
badrequest(400, "Bad Request", "That does not look like HTTP");
if (!memcmp(headerbuf, "GET /", 5)) {
method = GET; method = GET;
url = buf + 4; url = headerbuf + 4;
} else if (!strncasecmp(buf, "POST /", 6)) { } else if (!memcmp(headerbuf, "POST /", 6)) {
method = POST; method = POST;
url = buf + 5; url = headerbuf + 5;
} else if (!strncasecmp(buf, "HEAD /", 6)) { } else if (!memcmp(headerbuf, "HEAD /", 6)) {
method = HEAD; method = HEAD;
url = buf + 5; url = headerbuf + 5;
} else } else
badrequest(400, "Bad Request", badrequest(405, "Method Not Allowed", "Unsupported HTTP method.");
"<title>Bad Request</title>Unsupported HTTP method.");
origurl = url; origurl = url;
{ {
char *nl = strchr(buf, '\r'); /*
char *space = strchr(url, ' '); * If we got here we are *guaranteed* (by read_header) to have at
if (space >= nl) * least one \r
badrequest(400, "Bad Request", */
"<title>Bad Request</title>HTTP/0.9 not supported"); char *nl = memchr(headerbuf, '\r', headerlen);
if (strncmp(space + 1, "HTTP/1.", 7)) char *space = memchr(url, ' ', nl - url);
badrequest(400, "Bad Request",
"<title>Bad Request</title>Only HTTP 1.x supported"); if (!space) {
badrequest(400, "Bad Request", "HTTP/0.9 not supported");
}
if (memcmp(space + 1, "HTTP/1.", 7))
badrequest(400, "Bad Request", "Only HTTP/1.x supported");
*space = 0; *space = 0;
httpversion = space[8] - '0'; httpversion = space[8] - '0';
if (httpversion > 1) {
badrequest(400, "Bad Request", "HTTP Version not supported");
}
keepalive = 0; keepalive = 0;
/* /*
@ -1572,39 +1627,42 @@ main(int argc, char *argv[], const char *const *envp)
++d; ++d;
} }
*d = 0; *d = 0;
/*
* not good enough, we need a second pass
*/
} }
/*
* XXX: check use of uri to see if it needs to be duplicated
*/
uri = strdup(url); uri = strdup(url);
} }
{ {
char *tmp; char *tmp;
ua = header(buf, len, "User-Agent"); ua = header(headerbuf, headerlen, "User-Agent");
refer = header(buf, len, "Referer"); refer = header(headerbuf, headerlen, "Referer");
accept_enc = header(buf, len, "Accept-Encoding"); accept_enc = header(headerbuf, headerlen, "Accept-Encoding");
if ((tmp = header(buf, len, "Connection"))) { /* see if it's if ((tmp = header(headerbuf, headerlen, "Connection"))) { /* see
* if
* it's
* *
* "keep-alive" * "keep-alive"
* or * "close" */ * * or "close" */
if (!strcasecmp(tmp, "keep-alive")) if (!strcasecmp(tmp, "keep-alive"))
keepalive = 1; keepalive = 1;
else if (!strcasecmp(tmp, "close")) else if (!strcasecmp(tmp, "close"))
keepalive = -1; keepalive = -1;
} }
cookie = header(buf, len, "Cookie"); cookie = header(headerbuf, headerlen, "Cookie");
auth_type = header(buf, len, "Authorization"); auth_type = header(headerbuf, headerlen, "Authorization");
if (method == POST) { if (method == POST) {
content_type = header(buf, len, "Content-Type"); content_type = header(headerbuf, headerlen, "Content-Type");
content_len = header(buf, len, "Content-Length"); content_len = header(headerbuf, headerlen, "Content-Length");
if (content_len) {
post_len = strtoul(content_len, NULL, 10); if ((!content_type) || (!content_len)) {
post_miss = buf + len + 1; badrequest(411, "Length Required",
post_mlen = in - len - 1; "POST missing Content-Type or Content-Length");
if (post_len <= post_mlen)
post_mlen = post_len;
} }
post_len = strtoul(content_len, NULL, 10);
} }
} }
@ -1614,7 +1672,7 @@ main(int argc, char *argv[], const char *const *envp)
{ {
char *Buf; char *Buf;
int i; int i;
host = header(buf, len, "Host"); host = header(headerbuf, headerlen, "Host");
if (!host) if (!host)
i = 100; i = 100;
else else
@ -1645,8 +1703,7 @@ main(int argc, char *argv[], const char *const *envp)
for (i = strlen(host); i >= 0; --i) for (i = strlen(host); i >= 0; --i)
if ((host[i] = tolower(host[i])) == '/') if ((host[i] = tolower(host[i])) == '/')
hostb0rken: hostb0rken:
badrequest(400, "Bad Request", badrequest(400, "Bad Request", "Invalid host header");
"<title>Bad Request</title>Bullshit Host header");
if (host[0] == '.') if (host[0] == '.')
goto hostb0rken; goto hostb0rken;
if (keepalive > 0) { if (keepalive > 0) {
@ -1680,7 +1737,7 @@ main(int argc, char *argv[], const char *const *envp)
} }
if (chdir("default") && argc < 2) { if (chdir("default") && argc < 2) {
badrequest(404, "Not Found", badrequest(404, "Not Found",
"<title>Not Found</title>This host is not served here."); "This host is not served here.");
} }
} }
} }
@ -1693,7 +1750,7 @@ main(int argc, char *argv[], const char *const *envp)
pid_t child; pid_t child;
const char *authorization; const char *authorization;
authorization = header(buf, len, "Authorization"); authorization = header(headerbuf, headerlen, "Authorization");
child = fork(); child = fork();
if (child < 0) { if (child < 0) {
badrequest(500, "Internal Server Error", badrequest(500, "Internal Server Error",
@ -1745,16 +1802,19 @@ main(int argc, char *argv[], const char *const *envp)
if (docgi) { if (docgi) {
char *tmp, char *tmp,
*pathinfo; *pathinfo;
pathinfo = 0; pathinfo = 0;
for (tmp = url; tmp < nurl; ++tmp) for (tmp = url; tmp < nurl; ++tmp) {
if (findcgi(tmp)) { if (findcgi(tmp)) {
nurl = tmp; nurl = tmp;
if (tmp[4] == '/') if (tmp[4] == '/')
pathinfo = tmp + 4; pathinfo = tmp + 4;
break; break;
} }
}
if (pathinfo) { if (pathinfo) {
int len = strlen(pathinfo) + 1; int len = strlen(pathinfo) + 1;
tmp = alloca(len); tmp = alloca(len);
memcpy(tmp, pathinfo, len); memcpy(tmp, pathinfo, len);
*pathinfo = 0; *pathinfo = 0;
@ -1762,6 +1822,7 @@ main(int argc, char *argv[], const char *const *envp)
} }
if (findcgi(nurl)) { if (findcgi(nurl)) {
int i; int i;
if ((method == HEAD)) if ((method == HEAD))
badrequest(400, "Bad Request", badrequest(400, "Bad Request",
"Illegal HTTP method for Gateway call."); "Illegal HTTP method for Gateway call.");
@ -1786,7 +1847,9 @@ main(int argc, char *argv[], const char *const *envp)
{ {
int fd; int fd;
if ((fd = doit(buf, len, url, 1)) >= 0) { /* file was there */ if ((fd = doit(headerbuf, headerlen, url, 1)) >= 0) { /* file
* was
* there */
/* /*
* look if file.gz is also there and acceptable * look if file.gz is also there and acceptable
*/ */
@ -1797,7 +1860,7 @@ main(int argc, char *argv[], const char *const *envp)
strcpy(fnord, url); strcpy(fnord, url);
strcpy(fnord + ul, ".gz"); strcpy(fnord + ul, ".gz");
fd2 = doit(buf, len, fnord, 0); fd2 = doit(headerbuf, headerlen, fnord, 0);
if (fd2 >= 0) { /* yeah! */ if (fd2 >= 0) { /* yeah! */
url = fnord; url = fnord;
close(fd); close(fd);
@ -1837,11 +1900,10 @@ main(int argc, char *argv[], const char *const *envp)
/* /*
* "Sun, 06 Nov 1994 08:49:37 GMT" * "Sun, 06 Nov 1994 08:49:37 GMT"
*/ */
printf("Last-Modified: %.3s, %02d %.3s %d %02d:%02d:%02d GMT\r\n", printf
days + (3 * x->tm_wday), ("Last-Modified: %.3s, %02d %.3s %d %02d:%02d:%02d GMT\r\n",
x->tm_mday, days + (3 * x->tm_wday), x->tm_mday,
months + (3 * x->tm_mon), months + (3 * x->tm_mon), x->tm_year + 1900,
x->tm_year + 1900,
x->tm_hour, x->tm_min, x->tm_sec); x->tm_hour, x->tm_min, x->tm_sec);
} }
if (rangestart || rangeend != st.st_size) { if (rangestart || rangeend != st.st_size) {
@ -1892,12 +1954,10 @@ main(int argc, char *argv[], const char *const *envp)
if (dirlist) { if (dirlist) {
handledirlist(origurl); handledirlist(origurl);
} }
badrequest(404, "Not Found", badrequest(404, "Not Found", "No such file or directory.");
"<title>Not Found</title>No such file or directory.");
} }
case 406: case 406:
badrequest(406, "Not Acceptable", badrequest(406, "Not Acceptable", "Nothing acceptable found.");
"<title>Not Acceptable</title>Nothing acceptable found.");
case 416: case 416:
badrequest(416, "Requested Range Not Satisfiable", ""); badrequest(416, "Requested Range Not Satisfiable", "");
case 304: case 304:

View File

@ -15,6 +15,15 @@ class LinesTests(unittest.TestCase):
def assertLinesEqual(self, a, b): def assertLinesEqual(self, a, b):
self.assertSequenceEqual(a.split(b'\n'), b.split(b'\n')) self.assertSequenceEqual(a.split(b'\n'), b.split(b'\n'))
class NewlineTests(LinesTests):
def testBareNL(self):
p = eris()
so, se = p.communicate(b'GET / HTTP/1.0\n\n')
self.assertRegexpMatches(so, b'HTTP/1.0 200 OK\r\nServer: eris/2\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 6\r\nLast-Modified: (Mon|Tue|Wed|Thu|Fri|Sat|Sun), .. (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) 2... ..:..:.. GMT\r\n\r\njames\n')
self.assertLinesEqual(se, b'10.1.2.3 200 6 127.0.0.1 (null) (null) /index.html\n')
class ArgTests(LinesTests): class ArgTests(LinesTests):
def check_index(self, *args): def check_index(self, *args):
p = eris(*args) p = eris(*args)