CGI beginning to work again

This commit is contained in:
Neale Pickett 2012-02-23 22:53:26 -07:00
parent 688d7c9b7e
commit 03d532240a
4 changed files with 154 additions and 199 deletions

View File

@ -1,4 +1,7 @@
2.0: 2.0:
Replace poll with select, which is more portable and may be
slightly faster; however, it's only called for CGI and by
that point you've lost quite a bit in terms of speed
Remove Accept header parsing: it was broken and the result was Remove Accept header parsing: it was broken and the result was
that the Accept header had no effect that the Accept header had no effect
Remove the .gz trick: I never used it, but I would not be averse Remove the .gz trick: I never used it, but I would not be averse

View File

@ -98,6 +98,13 @@ title "Second MIME-Type"
ls / > /dev/null # Delay required to work around test #3 ls / > /dev/null # Delay required to work around test #3
printf 'GET /a HTTP/1.1\r\nHost: a\r\nConnection: keep-alive\r\n\r\n') | $HTTPD 2>/dev/null | grep -q 'text/plain\|application/octet-stream' && pass || fail printf 'GET /a HTTP/1.1\r\nHost: a\r\nConnection: keep-alive\r\n\r\n') | $HTTPD 2>/dev/null | grep -q 'text/plain\|application/octet-stream' && pass || fail
# 6. Should consume POST data; instead tries to read POST data as second request
title "POST to static HTML"
(printf 'POST / HTTP/1.1\r\nHost: a\r\nConnection: keep-alive\r\nContent-Type: text/plain\r\nContent-Length: 1\r\n\r\n';
ls / > /dev/null
printf 'aPOST / HTTP/1.1\r\nHost: a\r\nConnection: keep-alive\r\nContent-Type: text/plain\r\nContent-Length: 1\r\n\r\na') | $HTTPD 2>/dev/null | grep -c '^HTTP/1.' | grep -q 2 && pass || fail
cat <<EOD cat <<EOD
----------------------------------------- -----------------------------------------
$successes of $tests tests passed ($failures failed). $successes of $tests tests passed ($failures failed).

307
eris.c
View File

@ -14,7 +14,7 @@
#include <sys/wait.h> #include <sys/wait.h>
#include <grp.h> #include <grp.h>
#include <errno.h> #include <errno.h>
#include <sys/poll.h> #include <sys/select.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netinet/tcp.h> #include <netinet/tcp.h>
@ -22,16 +22,14 @@
#include <sys/mman.h> #include <sys/mman.h>
#include <limits.h> #include <limits.h>
#ifndef min
#define min(a,b) ((a)<(b)?(a):(b))
#endif
/* /*
* Some things I use for debugging * Some things I use for debugging
*/ */
#define XXNODUMP
#ifndef NODUMP
#define DUMPf(fmt, args...) fprintf(stderr, "%s:%s:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##args) #define DUMPf(fmt, args...) fprintf(stderr, "%s:%s:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##args)
#else
#define DUMPf(fmt, args...)
#endif
#define DUMP() DUMPf("") #define DUMP() DUMPf("")
#define DUMP_d(v) DUMPf("%s = %d", #v, v) #define DUMP_d(v) DUMPf("%s = %d", #v, v)
#define DUMP_u(v) DUMPf("%s = %u", #v, v) #define DUMP_u(v) DUMPf("%s = %u", #v, v)
@ -55,33 +53,39 @@
#define CGI_TIMEOUT (5*60) /* 5 minutes time-out for CGI to complete */ #define CGI_TIMEOUT (5*60) /* 5 minutes time-out for CGI to complete */
#ifdef __linux__
/* /*
* defining USE_SENDFILE enables zero-copy TCP on Linux for static files. * defining USE_SENDFILE enables zero-copy TCP on Linux for static files.
* I measured over 320 meg per second with apache bench over localhost * I (Fefe) measured over 320 meg per second with apache bench over localhost
* with sendfile and keep-alive. However, sendfile does not work with * with sendfile and keep-alive.
* large files and may be considered cheating ;-) Also, sendfile is a
* blocking operation. Thus, no timeout handling.
*/ */
#define USE_SENDFILE #define USE_SENDFILE
#ifndef __linux__
#undef USE_SENDFILE
#endif
#ifdef USE_SENDFILE
#include <sys/sendfile.h> #include <sys/sendfile.h>
#endif #endif
/*
* Memory-mapping may result in a performance boost. thttpd does it,
* but for a different reason (to cache frequently-accessed files).
* XXX: Some performance testing is in order here.
*/
#ifdef _POSIX_MAPPED_FILES
#define USE_MMAP #define USE_MMAP
#ifndef _POSIX_MAPPED_FILES
#undef USE_MMAP
#endif #endif
enum { UNKNOWN, GET, HEAD, POST } method; /*
* TCP_CORK is a Linux extension to work around a TCP problem.
* http://www.baus.net/on-tcp_cork has a good description.
* XXX: Since we do our own buffering, TCP_CORK may not be helping
* with anything. This needs testing.
*/
#ifdef TCP_CORK #ifdef TCP_CORK
static int corked; static int corked;
#endif #endif
enum { UNKNOWN, GET, HEAD, POST } method;
static long retcode = 404; /* used for logging code */ static long retcode = 404; /* used for logging code */
char *host = "?"; /* Host: header */ char *host = "?"; /* Host: header */
char *port; /* also Host: header, :80 part */ char *port; /* also Host: header, :80 part */
@ -100,8 +104,6 @@ char *uri; /* copy of url before demangling */
char *content_type; char *content_type;
char *content_len; char *content_len;
char *auth_type; char *auth_type;
char *post_miss;
unsigned long post_mlen;
unsigned long post_len = 0; unsigned long post_len = 0;
#if _FILE_OFFSET_BITS == 64 #if _FILE_OFFSET_BITS == 64
@ -123,8 +125,8 @@ char *remote_ip;
char *remote_port; char *remote_port;
char *remote_ident; char *remote_ident;
#define BUFFER_OUTSIZE 8192 #define BUFFER_SIZE 8192
char stdout_buf[BUFFER_OUTSIZE]; char stdout_buf[BUFFER_SIZE];
static void static void
sanitize(char *ua) sanitize(char *ua)
@ -224,22 +226,33 @@ elen(register const char *const *e)
* 1 found 0 not found, call again -1 EOF or other error * 1 found 0 not found, call again -1 EOF or other error
*/ */
static int static int
read_header(FILE * f, char *buf, size_t * buflen) read_header(FILE *f, char *buf, size_t * buflen)
{ {
/*
* I'm not crazy about all these static variables. But the idea,
* which seems to work, is that you pass in things like it was
* a read call, and then just keep passing that same stuff in over
* and over until it returns 0.
*
* Further down this winds up looking pretty nice. In here, sort
* of gross.
*/
static char *lastbuf = NULL; static char *lastbuf = NULL;
static int found = 0; static int found = 0;
static int bare = 1; /* LF here would be bare */ static int bare = 1; /* LF here would be bare */
int bufsize = *buflen; static int bufsize = 0;
if (lastbuf != buf) { if (lastbuf != buf) {
lastbuf = buf; lastbuf = buf;
bare = 1; bare = 1;
found = 0; found = 0;
} bufsize = *buflen;
*buflen = 0; *buflen = 0;
}
while (*buflen + bare < bufsize) { while (*buflen + bare < bufsize) {
int c = getchar(); int c = fgetc(f);
switch (c) { switch (c) {
case EOF: case EOF:
@ -384,10 +397,12 @@ do_cgi(const char *pathinfo, const char *const *envp)
char tmp[PATH_MAX]; char tmp[PATH_MAX];
i = strrchr(url, '/') - url; i = strrchr(url, '/') - url;
if (i) {
strncpy(tmp, url + 1, i); strncpy(tmp, url + 1, i);
tmp[i] = 0; tmp[i] = 0;
chdir(tmp); chdir(tmp);
} }
}
{ {
char tmp[PATH_MAX]; char tmp[PATH_MAX];
@ -425,175 +440,117 @@ cgi_child(int sig)
signal(SIGCHLD, cgi_child); signal(SIGCHLD, cgi_child);
} }
/*
* Convert bare \n to \r\n in header. Return 0 if header is over.
*/
static int
cgi_send_correct_http(const char *s, unsigned int sl)
{
unsigned int i;
int newline = 0;
for (i = 0; i < sl; i += 1) {
switch (s[i]) {
case '\r':
if (s[i + 1] == '\n') {
i += 1;
case '\n':
printf("\r\n");
if (newline) {
fwrite(s + i + 1, sl - i - 1, 1, stdout);
return 0;
} else {
newline = 1;
}
break;
} else {
default:
newline = 0;
putchar(s[i]);
}
break;
}
}
return 1;
}
static void static void
start_cgi(int nph, const char *pathinfo, const char *const *envp) start_cgi(int nph, const char *pathinfo, const char *const *envp)
{ {
// XXX: Is it safe to reuse headerbuf from main?
size_t size = 0; size_t size = 0;
int n;
int pid; int pid;
char ibuf[8192], char cgiheader[BUFFER_SIZE];
obuf[8192]; size_t cgiheaderlen = BUFFER_SIZE;
int fd[2], int fd[2],
df[2]; df[2];
FILE *cin;
if (pipe(fd) || pipe(df)) { if (pipe(fd) || pipe(df) || !(cin = fdopen(fd[0], "r"))) {
badrequest(500, "Internal Server Error", badrequest(500, "Internal Server Error",
"Server Resource problem."); "Server Resource problem.");
} }
if ((pid = fork())) { if ((pid = fork())) {
if (pid > 0) { if (pid > 0) {
struct pollfd pfd[2]; int nfds;
int nr = 1; fd_set rfds, wfds;
int startup = 1; int passthru = nph;
fcntl(fd[0], F_SETFL, O_NONBLOCK);
signal(SIGCHLD, cgi_child); signal(SIGCHLD, cgi_child);
signal(SIGPIPE, SIG_IGN); /* NO! no signal! */ signal(SIGPIPE, SIG_IGN); /* NO! no signal! */
close(df[0]); close(df[0]);
close(fd[1]); close(fd[1]);
pfd[0].fd = fd[0]; FD_ZERO(&rfds);
pfd[0].events = POLLIN; FD_ZERO(&wfds);
pfd[0].revents = 0; FD_SET(fd[0], &rfds);
nfds = fd[0];
pfd[1].fd = df[1]; if (post_len) {
pfd[1].events = POLLOUT; /* have post data */
pfd[1].revents = 0; FD_SET(df[0], &wfds);
if (df[0] > nfds) {
if (post_len) nfds = df[0];
++nr; /* have post data */ }
else } else if (df[1] >= 0) {
close(df[1]); /* no post data */ close(df[1]); /* no post data */
df[1] = -1;
}
while (poll(pfd, nr, -1) != -1) { while (select(nfds+1, &rfds, &wfds, NULL, NULL) != -1) {
/* if (FD_ISSET(fd[0], &rfds)) {
* read from cgi if (passthru) {
*/
if (pfd[0].revents & POLLIN) {
size_t len; size_t len;
if (startup) { /* Re-use this big buffer */
/* len = fread(cgiheader, 1, sizeof cgiheader, cin);
* XXX: could block :<
*/
// len = read_header(fd[0], ibuf, sizeof ibuf,
// NULL);
len = 0;
} else {
len = read(fd[0], ibuf, sizeof ibuf);
}
DUMP_u((unsigned int) len);
if (0 == len) { if (0 == len) {
/* CGI is done */
break; break;
} }
if (len == -1) { fwrite(cgiheader, 1, len, stdout);
goto cgi_500; size += len;
} } else {
int ret;
/* ret = read_header(cin, cgiheader, &cgiheaderlen);
* startup if (0 == ret) {
/* Call read_header again */
} else if (-1 == ret) {
/* EOF or error */
badrequest(500, "CGI Error",
"CGI output too weird");
} else {
/* Entire header is in memory now */
passthru = 1;
/* XXX: I think we need to look for Location:
* anywhere, but fnord got away with checking
* only the first header field, so I will too.
*/ */
if (startup) { if (memcmp(cgiheader, "Location: ", 10) == 0) {
if (nph) { /* NPH-CGI */
startup = 0;
printf("%.*s", (int) len, ibuf);
/*
* skip HTTP/x.x
*/
retcode = strtoul(ibuf + 9, NULL, 10);
} else { /* CGI */
if (memcmp(ibuf, "Location: ", 10) == 0) {
retcode = 302; retcode = 302;
printf printf
("HTTP/1.0 302 CGI-Redirect\r\nConnection: close\r\n"); ("HTTP/1.0 302 CGI-Redirect\r\nConnection: close\r\n");
signal(SIGCHLD, SIG_IGN); fwrite(cgiheader, 1, cgiheaderlen, stdout);
cgi_send_correct_http(ibuf, n);
fflush(stdout);
dolog(0); dolog(0);
exit(0); exit(0);
} else { }
retcode = 200; retcode = 200;
printf("HTTP/1.0 200 OK\r\nServer: " printf("HTTP/1.0 200 OK\r\nServer: "
FNORD FNORD
"\r\nPragma: no-cache\r\nConnection: close\r\n"); "\r\nPragma: no-cache\r\nConnection: close\r\n");
signal(SIGCHLD, SIG_IGN); signal(SIGCHLD, SIG_IGN);
cgi_send_correct_http(ibuf, len); fwrite(cgiheader, 1, cgiheaderlen, stdout);
startup = 0;
} }
} }
} } else if (FD_ISSET(df[1], &wfds)) {
/*
* non startup
*/
else {
fwrite(ibuf, len, 1, stdout);
}
size += len;
if (pfd[0].revents & POLLHUP)
break;
}
/* /*
* write to cgi the post data * write to cgi the post data
*/ */
else if (nr > 1 && pfd[1].revents & POLLOUT) { if (post_len) {
if (post_miss) { size_t len;
write(df[1], post_miss, post_mlen); char buf[BUFFER_SIZE];
post_miss = 0; size_t nmemb = min(BUFFER_SIZE, post_len);
} else if (post_mlen < post_len) {
n = read(0, obuf, sizeof(obuf)); len = fread(buf, 1, nmemb, stdin);
if (n < 1) if (len < 1) {
goto cgi_500; break;
post_mlen += n;
write(df[1], obuf, n);
} else {
--nr;
close(df[1]);
} }
} else if (pfd[0].revents & POLLHUP) post_len -= len;
break; write(df[1], buf, len);
else { } else {
cgi_500:if (startup) close(df[1]);
badrequest(500, "Internal Server Error",
"Looks like the CGI crashed.");
else {
printf("\n\nLooks like the CGI crashed.\n\n");
break;
} }
} }
} }
@ -1204,36 +1161,25 @@ static int
serve_read_write(int fd) serve_read_write(int fd)
{ {
char tmp[4096]; char tmp[4096];
struct pollfd duh;
time_t now,
fini;
char *tmp2; char *tmp2;
int len; int len;
off_t todo = rangeend - rangestart; off_t todo = rangeend - rangestart;
duh.fd = 1;
duh.events = POLLOUT;
if (rangestart) if (rangestart)
lseek(fd, rangestart, SEEK_SET); lseek(fd, rangestart, SEEK_SET);
while (todo > 0) { while (todo > 0) {
int olen; int olen;
fini = time(&now) + WRITETIMEOUT;
len = read(fd, tmp, todo > 4096 ? 4096 : todo);
olen = len; olen = len;
tmp2 = tmp; tmp2 = tmp;
while (len > 0) { while (len > 0) {
int written; int written;
switch (poll(&duh, 1, (fini - now) * 1000)) {
case 0: if ((written = write(1, tmp2, len)) < 0) {
if (now < fini)
continue; /* fall through */
case -1:
return 1; /* timeout or error */
}
if ((written = write(1, tmp2, len)) < 0)
return -1; return -1;
}
len -= written; len -= written;
tmp2 += written; tmp2 += written;
time(&now);
} }
todo -= olen; todo -= olen;
} }
@ -1248,9 +1194,7 @@ serve_mmap(int fd)
unsigned long mapofs; unsigned long mapofs;
char *map, char *map,
*tmp2; *tmp2;
struct pollfd duh;
time_t now,
fini;
mapstart = rangestart & (~(off_t) 0xfff); /* round down to 4k page */ mapstart = rangestart & (~(off_t) 0xfff); /* round down to 4k page */
maplen = rangeend - mapstart; maplen = rangeend - mapstart;
mapofs = rangestart - mapstart; mapofs = rangestart - mapstart;
@ -1274,27 +1218,21 @@ serve_mmap(int fd)
} else } else
return serve_read_write(fd); return serve_read_write(fd);
} }
duh.fd = 1;
duh.events = POLLOUT;
while (rangestart < rangeend) { while (rangestart < rangeend) {
int len; int len;
fini = time(&now) + WRITETIMEOUT;
len = maplen - mapofs; len = maplen - mapofs;
tmp2 = map + mapofs; tmp2 = map + mapofs;
while (len > 0) { while (len > 0) {
int written; int written;
switch (poll(&duh, 1, (fini - now) * 1000)) {
case 0: if ((written = write(1, tmp2, len)) < 0) {
if (now < fini) alarm(0);
continue; /* fall through */
case -1:
return 1; /* timeout or error */
}
if ((written = write(1, tmp2, len)) < 0)
return -1; return -1;
}
len -= written; len -= written;
tmp2 += written; tmp2 += written;
time(&now);
} }
rangestart += maplen - mapofs; rangestart += maplen - mapofs;
mapstart += maplen; mapstart += maplen;
@ -1433,6 +1371,7 @@ main(int argc, char *argv[], const char *const *envp)
alarm(READTIMEOUT); alarm(READTIMEOUT);
headerlen = sizeof headerbuf; headerlen = sizeof headerbuf;
// XXX: I need a way to signal that this is a new read with the same buffer.
switch (read_header(stdin, headerbuf, &headerlen)) { switch (read_header(stdin, headerbuf, &headerlen)) {
case -1: case -1:
return 0; return 0;
@ -1781,7 +1720,13 @@ main(int argc, char *argv[], const char *const *envp)
} }
fputs("\r\n", stdout); fputs("\r\n", stdout);
if (method == GET || method == POST) { if (method == GET || method == POST) {
switch (serve_static_data(fd)) { int ret;
alarm(WRITETIMEOUT);
ret = serve_static_data(fd);
alarm(0);
switch (ret) {
case 0: case 0:
break; break;
case -1: case -1:

View File

@ -96,18 +96,18 @@ class CGITests(BasicTests):
def testSet(self): def testSet(self):
so, se = self.get('/cgi/set.cgi', 'default') so, se = self.get('/cgi/set.cgi', 'default')
self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:GET\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\n') self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2.0\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:GET\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\n')
self.assertLinesEqual(se, b'10.1.2.3 200 242 default (null) (null) /cgi/set.cgi\n') self.assertLinesEqual(se, b'10.1.2.3 200 218 default (null) (null) /cgi/set.cgi\n')
def testSetArgs(self): def testSetArgs(self):
so, se = self.get('/cgi/set.cgi?a=1&b=2&c=3', 'default') so, se = self.get('/cgi/set.cgi?a=1&b=2&c=3', 'default')
self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:GET\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\nQUERY_STRING:a=1&b=2&c=3\n') self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2.0\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:GET\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\nQUERY_STRING:a=1&b=2&c=3\n')
self.assertLinesEqual(se, b'10.1.2.3 200 267 default (null) (null) /cgi/set.cgi\n') self.assertLinesEqual(se, b'10.1.2.3 200 243 default (null) (null) /cgi/set.cgi\n')
def testPost(self): def testPost(self):
so, se = self.post('/cgi/set.cgi', 'default', 'a=1&b=2&c=3') so, se = self.post('/cgi/set.cgi', 'default', 'a=1&b=2&c=3')
self.assertLinesEqual(se, b'10.1.2.3 200 330 default (null) (null) /cgi/set.cgi\n') self.assertLinesEqual(se, b'10.1.2.3 200 330 default (null) (null) /cgi/set.cgi\n')
self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:POST\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\nCONTENT_TYPE:application/x-www-form-urlencoded\nCONTENT_LENGTH:11\nForm data: a=1&b=2&c=3') self.assertLinesEqual(so, b'HTTP/1.0 200 OK\r\nServer: eris/2.0\r\nPragma: no-cache\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nGATEWAY_INTERFACE:CGI/1.1\nSERVER_PROTOCOL:HTTP/1.0\nSERVER_SOFTWARE:eris/2.0\nSERVER_NAME:default\nSERVER_PORT:80\nREQUEST_METHOD:POST\nREQUEST_URI:/cgi/set.cgi\nSCRIPT_NAME:/cgi/set.cgi\nREMOTE_ADDR:10.1.2.3\nREMOTE_PORT:5858\nCONTENT_TYPE:application/x-www-form-urlencoded\nCONTENT_LENGTH:11\nForm data: a=1&b=2&c=3')
# XXX: Test posting to static html with keepalive # XXX: Test posting to static html with keepalive
# (it probably won't discard content-length octets) # (it probably won't discard content-length octets)