From b58da7211df6ec98b42346b638d18a99fd5b4938 Mon Sep 17 00:00:00 2001 From: Neale Pickett Date: Tue, 16 Aug 2011 14:36:11 -0600 Subject: [PATCH] Import upstream 1.10 --- CHANGES | 101 +++ COPYING | 339 ++++++++ Makefile | 66 ++ README | 72 ++ README.auth | 20 + SPEED | 32 + TODO | 2 + buffer.h | 70 ++ buffer_1.c | 7 + buffer_2.c | 7 + buffer_flush.c | 10 + buffer_put.c | 17 + buffer_putflush.c | 7 + buffer_puts.c | 6 + buffer_putspace.c | 7 + buffer_putulong.c | 8 + buffer_putulonglong.c | 8 + buffer_stubborn.c | 15 + byte.h | 38 + byte_copy.c | 28 + byte_diff.c | 21 + cgi-post.c | 55 ++ contrib/fnord-auth-0.1.tgz | Bin 0 -> 3361 bytes convert.pl | 40 + fmt.h | 76 ++ fmt_ulong.c | 11 + fmt_ulonglong.c | 12 + fnord-conf | 33 + http | 2 + httpd.c | 1677 ++++++++++++++++++++++++++++++++++++ scan.h | 61 ++ scan_ulong.c | 13 + scan_ulonglong.c | 13 + str.h | 43 + str_chr.c | 13 + str_copy.c | 13 + str_diff.c | 20 + str_diffn.c | 21 + str_len.c | 12 + str_start.c | 14 + 40 files changed, 3010 insertions(+) create mode 100644 CHANGES create mode 100644 COPYING create mode 100644 Makefile create mode 100644 README create mode 100644 README.auth create mode 100644 SPEED create mode 100644 TODO create mode 100644 buffer.h create mode 100644 buffer_1.c create mode 100644 buffer_2.c create mode 100644 buffer_flush.c create mode 100644 buffer_put.c create mode 100644 buffer_putflush.c create mode 100644 buffer_puts.c create mode 100644 buffer_putspace.c create mode 100644 buffer_putulong.c create mode 100644 buffer_putulonglong.c create mode 100644 buffer_stubborn.c create mode 100644 byte.h create mode 100644 byte_copy.c create mode 100644 byte_diff.c create mode 100644 cgi-post.c create mode 100644 contrib/fnord-auth-0.1.tgz create mode 100644 convert.pl create mode 100644 fmt.h create mode 100644 fmt_ulong.c create mode 100644 fmt_ulonglong.c create mode 100755 fnord-conf create mode 100755 http create mode 100644 httpd.c create mode 100644 scan.h create mode 100644 scan_ulong.c create mode 100644 scan_ulonglong.c create mode 100644 str.h create mode 100644 str_chr.c create mode 100644 str_copy.c create mode 100644 str_diff.c create mode 100644 str_diffn.c create mode 100644 str_len.c create mode 100644 str_start.c diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..449bcf7 --- /dev/null +++ b/CHANGES @@ -0,0 +1,101 @@ +1.10: + have fallback in case sendfile fails + +1.9: + chdir to cgi's base dir (Kuba Winnicki) + set HTTP_ACCEPT_ENCODING environment variable (Kuba Winnicki) + We actually should export all HTTP headers as HTTP_[header] + Any takers? + Try not to send error message HTTP headers if we already sent the + headers from the CGI (Kuba Winnicki) + <=ims (Gerrit Pape) + 64-bit file I/O cleanliness + fix HTTP ranges (Joachim Berdal Haga via Gerrit Pape) + +1.8: + keep current environment in CGI (Laurent Bercot) + make fnord-conf use the UID and not the user name (Fridtjof Busse) + fix typo in buffer_putulonglong (Gerrit Pape) + fix CGI POST off-by-two typo (Mark Hopf) + fix gif->png conversion (Thomas Seck) + remove == bashism from fnord-conf (Thomas Seck) + add bittorrent mime type + make authorization data available to CGIs for GET, too (Paul Jarc) + fix conversion of host name to lower case (Gerrit Pape) + add small test cgi: cgi-post.c + fix CGI POST bug (Moe Wibble) + fix CGI PATH_TRANSLATED bug (Nicolas George) + add optional authentication support (Nicolas George, see README.auth) + make sure error messages are text/html + move /. -> /: conversion before demangling so it can actually be + used as security measure for installations that don't use chroot + +1.7: + add .mov and .qt for quicktime, .mpg for video/mpeg and .wav for audio/x-wav + add mmap based file serving (should do zero-copy tcp just like sendfile) + add Pragma: no-cache to CGI responses + fix (apparently not exploitable) buffer overrun in do_cgi + This bug was found by Ralf Wildenhues. To my knowledge it is + impossible to exploit this bug on any platform known to me. + fix (harmless) access to uninitialized data + +1.6: + add support for $PATH_INFO in CGI environment. + add .pac for netscape proxy autoconfig + add .sig for application/pgp-signature + +1.5: + fix write timeout handling (found by Lukas Beeler) + fix fnord-conf to use the symbolic account name in run script + (Sebastian D.B. Krause) + +1.4: + add dangling symlink based whole-host redirection (see README). This + has the advantage that it can serve normal sites and redirect sites + on the same IP. + add support for non-TCP UCSPI environments (like ucspi-ssl). Please + get the latest version of my ucspi-tcp IPv6 patch as I violated the + UCSPI spec with all versions before 0.88-diff11. + change logging from "127.0.0.1 200 23 Links_(0.96;_Unix) none /index.html" + to "127.0.0.1 200 23 localhost Links_(0.96;_Unix) none /index.html" + (i.e. include the Host: header). Suggested by Thomas Bader. + add "immediate mode". If you give fnord a command line argument, it + will change to that directory and if no "default" directory is + given, it will assume there are no virtual hosts and serve from the + current directory. I have a shell script called "http" that does + tcpserver -RHl localhost 0 8000 /home/leitner/bin/fnord-idx . + to share some directory on my hard drive with some poor Windows + users without npoll (http://www.fefe.de/ncp/). fnord-idx is a new + target (a fnord with DIR_LIST) that is auto-built by make. + +1.3: + make directory listings use non-proportional fonts (thanks, Antonio Dias) + fnord will now optionally (default: enabled) normalize the incoming + host name, i.e. "www.domain.com" -> "www.domain.com:80". That + should cut down on the number of symbolic links. ;) + remove timeout error message. fnord will not drop the connection + without error message. Mozilla used to display the error message + when the user caused another request on the connection with the + timeout. + Uwe Ohse found two more compilation problems. + +1.2: + Olaf: I changed my initial CGI-interface to NOT use the filesystem but + two pipes. + Add whole-host redirect (see README) + Olaf: added direcory-lists and "index.cgi" support (normal CGI only ! + "nph-index.cgi" is not supported). Fixed some problematic parts in the + CGI-interface (\n -> \r\n converter for http-header and CGI crash + handling) + Fix gzip encoding bug that only happened with keep-alive + +1.1: + ship with the parts from libowfat that we actually use + minor speed-up. sendfile is a drag for very small files, so those are + now sent through the same buffer the header is sent through. That + sends the whole answer in one TCP packet if you are lucky, even + without the TCP_CORK magic from Linux. Major speed-up for + benchmarks ;) + +1.0: + initial release diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8ea86d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,66 @@ +CC=gcc +CXX=g++ + +#LIBOWFAT=../libowfat/ +DIET=diet -Os + +CFLAGS=-Os -fomit-frame-pointer +#CFLAGS=-g + +all: fnord fnord-cgi fnord-idx + +fnord: httpd + cp -p $^ $@ + -strip -R .note -R .comment $@ + +httpd: httpd.o libowfat.a + $(DIET) $(CC) -o $@ $^ $(CFLAGS) + +fnord-cgi: httpd-cgi.o libowfat.a + $(DIET) $(CC) -o $@ $^ $(CFLAGS) + -strip -R .note -R .comment $@ + +fnord-idx: httpd-idx.o libowfat.a + $(DIET) $(CC) -o $@ $^ $(CFLAGS) + -strip -R .note -R .comment $@ + +libowfat.a: httpd.o buffer_1.o buffer_puts.o buffer_flush.o buffer_put.o \ +buffer_putulong.o buffer_2.o buffer_putspace.o buffer_stubborn.o \ +buffer_putflush.o str_copy.o fmt_ulong.o byte_diff.o byte_copy.o \ +str_len.o str_diff.o str_chr.o str_diffn.o str_start.o scan_ulong.o + ar cru $@ $^ + -ranlib $@ + +httpd.o: httpd.c + $(DIET) $(CC) -pipe $(CFLAGS) -c $^ -DFNORD=\"fnord/$(shell head -n 1 CHANGES|sed 's/://')\" + +httpd-cgi.o: httpd.c + $(DIET) $(CC) -pipe $(CFLAGS) -c httpd.c -o $@ -DCGI -DFNORD=\"fnord/$(shell head -n 1 CHANGES|sed 's/://')\" + +httpd-idx.o: httpd.c + $(DIET) $(CC) -pipe $(CFLAGS) -c httpd.c -o $@ -DDIR_LIST -DFNORD=\"fnord/$(shell head -n 1 CHANGES|sed 's/://')\" + +%.o: %.c + $(DIET) $(CC) -pipe $(CFLAGS) -c $^ + +%.o: %.cpp + $(DIET) $(CXX) -pipe $(CFLAGS) -c $^ + +.PHONY: rename clean install server +server: fnord + tcpserver -v -RHl localhost 0 8000 ./fnord + +clean: + rm -f *.[oa] httpd fnord fnord-cgi fnord-idx + +install: + test -d /command || mkdir /command + +CURNAME=$(notdir $(shell pwd)) +VERSION=fnord-$(shell head -n 1 CHANGES|sed 's/://') + +tar: rename + cd .. && tar cvvf $(VERSION).tar.bz2 --use=bzip2 --exclude CVS --exclude bin-* --exclude .cvsignore --exclude default $(VERSION) + +rename: + if test $(CURNAME) != $(VERSION); then cd .. && mv $(CURNAME) $(VERSION); fi diff --git a/README b/README new file mode 100644 index 0000000..39b5b02 --- /dev/null +++ b/README @@ -0,0 +1,72 @@ +Usage: + + tcpserver -v -RHl localhost -u 1234 -g 1234 0 80 ./httpd + +Will log to stderr in the form + + 127.0.0.1 200 23 localhost Links_(0.96;_Unix) none /index.html + +where 127.0.0.1 is the client IP, 200 is the HTTP exit code, 23 is the +size of the content that was served (or 0 for unsuccessful exit codes), +localhost is the Host: header (the virtual host), the next token is the +user agent with spaces replaced by underscores, the next token (none) is +the Referer HTTP header or "none" if none was given, and the rest of +each line is the decoded requested URL. + +fnord-httpd does simple virtual hosting. If the Host: HTTP header is +there, fnord will try to chdir to a directory of that name, i.e. if the +client asks for "/" on host "www.fefe.de:80", fnord will look for +"www.fefe.de:80/index.html". Fnord will also try the directory +"default" if no specific directory for the virtual host was there. If +the directory is a dangling symlink and fnord was compiled with +-DREDIRECT (default), fnord will redirect the whole site. Examples: + + lrwxrwxrwx 1 leitner users 19 May 5 01:09 www.foo.de:80 -> http://www.baz.de/ + lrwxrwxrwx 1 leitner users 20 May 5 01:12 www.bar.de:80 -> =http://www.baz.de/ + +http://www.foo.de/blub.html will be redirected to http://www.baz.de/blub.html. +http://www.bar.de/blub.html will be redirected to http://www.baz.de/. + +fnord implements el-cheapo HTTP ranges (only byte ranges and only of the +form x-y, not multiple ranges). + +fnord implements content type matching and Accepts: parsing, but the +content type table is compiled in, i.e. to change it, you have to change +the source code. Shouldn't be a problem because you _have_ the source +code ;) + +fnord implements HTTP redirection. If a file is not found, but a +dangling symlink is there under the same name, fnord will issue a +redirection to the contents of that symlink. To be RFC compliant, the +symlink must point to a full URL, i.e. + + ln -s ftp://foobar.math.fu-berlin.de/pub/dietlibc/dietlibc-0.11.tar.bz2 dietlibc-0.11.tar.bz2 + +fnord implements in-place substitution of .gif to .png and of * to *.gz +if the file is available and the client supports the mime-type and +content-encoding. That means you can save substantial bandwidth by +having an index.html.gz for each index.html, as most clients can +transparently decode gzipped files. + +fnord will change dots at the start of file or directory names to colons +in the query before trying to answer them. + +fnord understands and implements keep-alive connections. + +fnord can use sendfile on Linux to enable zero-copy TCP. + +If fnord is compiled with CGI support enabled, it will regard files +whose names end with ".cgi" as CGI programs and try to execute them. +CGI programs starting with "nph-" will be handled as no-parse-header +CGIs. Please see http://hoohoo.ncsa.uiuc.edu/cgi/interface.html for the +CGI specification. + +fnord 1.3 introduced a different method of whole-host redirection, which +can still be enabled by compiling fnord with with -DOLD_STYLE_REDIRECT. +Files that are not found locally will be redirected to $REDIRECT_HOST +with the current URI stripped of leading slashes, i.e. if +http://your.server/foo/bar.html is requested but not found, and +$REDIRECT_HOST is set to http://www.yahoo.com/, a redirect to +http://www.yahoo.com/foo/bar.html will be issued. If $REDIRECT_HOST is +unset but $REDIRECT_URI is set, the URI part will be discarded and all +URLs will be redirected to $REDIRECT_URI. diff --git a/README.auth b/README.auth new file mode 100644 index 0000000..a49f9c0 --- /dev/null +++ b/README.auth @@ -0,0 +1,20 @@ +The auth patch was contributed by Nicolas George. Here is the excerpt from his +email describing how to use it. Firstly, #define AUTH or add -DAUTH to CFLAGS. + + Then, I have implemented some basic handling of HTTP authentication. The + concept is that if there is a file .http-auth in the root of one site, + then it is an executable (most likely a script) that handles + authorizations. It is called with first argument the virtual host name, + second argument the path to the requested file, and third argument the + value of the Authorization header, if present. If it exists + successfully, access is granted, else 401. + + An example of .http-auth that grants the access only to the user Cigaes + with password foober is: + + #!/bin/sh + [ x"$3" = x"Basic Q2lnYWVzOmZvb2Jhcg==" ] + + The format of the Authorization is not very convenient. The mangled + string is the base64 encoding of "$username:$password". + diff --git a/SPEED b/SPEED new file mode 100644 index 0000000..f6742da --- /dev/null +++ b/SPEED @@ -0,0 +1,32 @@ +fnord is meant to be used under Linux with the diet libc +(http://www.fefe.de/dietlibc/). These are actual apache bench results, +all on localhost on a 100k JPEG test file, 1000 requests with a +concurrency of 10. + +To be fair, I linked thttpd, mini_httpd and fnord against the diet libc. +I did not try this with apache, though. Since apache does not exec +anything, it should not matter much, though. + +mini_httpd forks for each request, apparently does not support +keep-alive and compared to fnord does not incur the overhead of execve +for each request. + +thttpd is the fastest web server known to me. + +Values are time in seconds for the whole transaction (1000 downloads, 10 +parallel connections). + +server software keep-alive no keep-alive +---------------------------------------------------------------- +mini_httpd 1.15c 1.690 0.943 +apache 1.3.22 1.236 1.178 +thttpd 2.21b 0.896 0.839 +fnord 1.008 1.331 +fnord w/ sendfile 0.316 0.912 + +Please note that fnord actually plays in the same league as others even +without keep-alive and sendfile support. That is surprising since fnord +has one fork() _and_ one execve() as overhead for each request! As the +difference between keep-alive and non-keep-alive shows, that difference +is not very large. That is the achievement of the diet libc, which +reduces the normally significant libc overhead to zero. diff --git a/TODO b/TODO new file mode 100644 index 0000000..aea8a33 --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +investigate CGI keep-alive + diff --git a/buffer.h b/buffer.h new file mode 100644 index 0000000..ad414fd --- /dev/null +++ b/buffer.h @@ -0,0 +1,70 @@ +#ifndef BUFFER_H +#define BUFFER_H + +typedef struct buffer { + char *x; + unsigned int p; + unsigned int n; + unsigned int a; + int fd; + int (*op)(); +} buffer; + +#define BUFFER_INIT(op,fd,buf,len) { (buf), 0, 0, (len), (fd), (op) } +#define BUFFER_INIT_READ(op,fd,buf,len) BUFFER_INIT(op,fd,buf,len) /*obsolete*/ +#define BUFFER_INSIZE 8192 +#define BUFFER_OUTSIZE 8192 + +extern void buffer_init(buffer* b,int (*op)(),int fd,char* y,unsigned int ylen); + +extern int buffer_flush(buffer* b); +extern int buffer_put(buffer* b,const char* x,unsigned int len); +extern int buffer_putalign(buffer* b,const char* x,unsigned int len); +extern int buffer_putflush(buffer* b,const char* x,unsigned int len); +extern int buffer_puts(buffer* b,const char* x); +extern int buffer_putsalign(buffer* b,const char* x); +extern int buffer_putsflush(buffer* b,const char* x); + +extern int buffer_putspace(buffer* b); + +#define buffer_PUTC(s,c) \ + ( ((s)->a != (s)->p) \ + ? ( (s)->x[(s)->p++] = (c), 0 ) \ + : buffer_put((s),&(c),1) \ + ) + +extern int buffer_get(buffer* b,char* x,unsigned int len); +extern int buffer_bget(buffer* b,char* x,unsigned int len); +extern int buffer_feed(buffer* b); +extern int buffer_getc(buffer* b,char* x); +extern int buffer_getn(buffer* b,char* x,unsigned int len); +extern int buffer_get_token(buffer* b,char* x,unsigned int len,const char* charset,unsigned int setlen); +#define buffer_getline(b,x,len) buffer_get_token((b),(x),(len),"\n",1) + +extern char *buffer_peek(buffer* b); +extern void buffer_seek(buffer* b,unsigned int len); + +#define buffer_PEEK(s) ( (s)->x + (s)->p ) +#define buffer_SEEK(s,len) ( (s)->p += (len) ) + +#define buffer_GETC(s,c) \ + ( ((s)->p < (s>->n) \ + ? ( *(c) = *buffer_PEEK(s), buffer_SEEK((s),1), 1 ) \ + : buffer_get((s),(c),1) \ + ) + +extern int buffer_copy(buffer* out,buffer* in); + +extern int buffer_putulong(buffer *b,unsigned long l); +extern int buffer_put8long(buffer *b,unsigned long l); +extern int buffer_putxlong(buffer *b,unsigned long l); +extern int buffer_putlong(buffer *b,unsigned long l); +extern int buffer_putulonglong(buffer *b,unsigned long long l); + +extern buffer *buffer_0; +extern buffer *buffer_0small; +extern buffer *buffer_1; +extern buffer *buffer_1small; +extern buffer *buffer_2; + +#endif diff --git a/buffer_1.c b/buffer_1.c new file mode 100644 index 0000000..e1d9538 --- /dev/null +++ b/buffer_1.c @@ -0,0 +1,7 @@ +#include +#include "buffer.h" + +char buffer_1_space[BUFFER_INSIZE]; +static buffer it = BUFFER_INIT(write,1,buffer_1_space,sizeof buffer_1_space); +buffer *buffer_1 = ⁢ + diff --git a/buffer_2.c b/buffer_2.c new file mode 100644 index 0000000..ea31d21 --- /dev/null +++ b/buffer_2.c @@ -0,0 +1,7 @@ +#include +#include "buffer.h" + +char buffer_2_space[BUFFER_INSIZE]; +static buffer it = BUFFER_INIT(write,2,buffer_2_space,sizeof buffer_2_space); +buffer *buffer_2 = ⁢ + diff --git a/buffer_flush.c b/buffer_flush.c new file mode 100644 index 0000000..6db8c7b --- /dev/null +++ b/buffer_flush.c @@ -0,0 +1,10 @@ +#include "buffer.h" + +extern int buffer_stubborn(int (*op)(),int fd,const char* buf, unsigned int len); + +extern int buffer_flush(buffer* b) { + register int p; + if (!(p=b->p)) return 0; /* buffer already empty */ + b->p=0; + return buffer_stubborn(b->op,b->fd,b->x,p); +} diff --git a/buffer_put.c b/buffer_put.c new file mode 100644 index 0000000..d53b6cc --- /dev/null +++ b/buffer_put.c @@ -0,0 +1,17 @@ +#include "byte.h" +#include "buffer.h" + +extern int buffer_stubborn(int (*op)(),int fd,const char* buf, unsigned int len); + +int buffer_put(buffer* b,const char* buf,unsigned int len) { + if (len>b->a-b->p) { /* doesn't fit */ + if (buffer_flush(b)==-1) return -1; + if (len>b->a) { + if (buffer_stubborn(b->op,b->fd,buf,len)<0) return -1; + return 0; + } + } + byte_copy(b->x+b->p, len, buf); + b->p+=len; + return 0; +} diff --git a/buffer_putflush.c b/buffer_putflush.c new file mode 100644 index 0000000..9e8e87e --- /dev/null +++ b/buffer_putflush.c @@ -0,0 +1,7 @@ +#include "buffer.h" + +int buffer_putflush(buffer* b,const char* x,unsigned int len) { + if (buffer_put(b,x,len)<0) return -1; + if (buffer_flush(b)<0) return -1; + return 0; +} diff --git a/buffer_puts.c b/buffer_puts.c new file mode 100644 index 0000000..e53f675 --- /dev/null +++ b/buffer_puts.c @@ -0,0 +1,6 @@ +#include "str.h" +#include "buffer.h" + +int buffer_puts(buffer* b,const char* x) { + return buffer_put(b,x,str_len(x)); +} diff --git a/buffer_putspace.c b/buffer_putspace.c new file mode 100644 index 0000000..48fea2a --- /dev/null +++ b/buffer_putspace.c @@ -0,0 +1,7 @@ +#include "str.h" +#include "buffer.h" + +int buffer_putspace(buffer* b) { + static char space=' '; + return buffer_put(b,&space,1); +} diff --git a/buffer_putulong.c b/buffer_putulong.c new file mode 100644 index 0000000..0e6da90 --- /dev/null +++ b/buffer_putulong.c @@ -0,0 +1,8 @@ +#include "buffer.h" +#include "fmt.h" + +int buffer_putulong(buffer *b,unsigned long l) { + char buf[FMT_ULONG]; + return buffer_put(b,buf,fmt_ulong(buf,l)); +} + diff --git a/buffer_putulonglong.c b/buffer_putulonglong.c new file mode 100644 index 0000000..861acf5 --- /dev/null +++ b/buffer_putulonglong.c @@ -0,0 +1,8 @@ +#include "buffer.h" +#include "fmt.h" + +int buffer_putulonglong(buffer *b,unsigned long long l) { + char buf[FMT_ULONG]; + return buffer_put(b,buf,fmt_ulonglong(buf,l)); +} + diff --git a/buffer_stubborn.c b/buffer_stubborn.c new file mode 100644 index 0000000..c94a1d0 --- /dev/null +++ b/buffer_stubborn.c @@ -0,0 +1,15 @@ +#include +#include "buffer.h" + +int buffer_stubborn(int (*op)(),int fd,const char* buf, unsigned int len) { + int w; + while (len) { + if ((w=op(fd,buf,len))<0) { + if (errno == EINTR) continue; + return -1; + }; + buf+=w; + len-=w; + } + return 0; +} diff --git a/byte.h b/byte.h new file mode 100644 index 0000000..e598124 --- /dev/null +++ b/byte.h @@ -0,0 +1,38 @@ +#ifndef BYTE_H +#define BYTE_H + +#include + +#ifndef __pure__ +#define __pure__ +#endif + +/* byte_chr returns the smallest integer i between 0 and len-1 + * inclusive such that one[i] equals needle, or len it not found. */ +unsigned int byte_chr(const void* haystack, unsigned int len, char needle) __pure__; + +/* byte_rchr returns the largest integer i between 0 and len-1 inclusive + * such that one[i] equals needle, or len if not found. */ +unsigned int byte_rchr(const void* haystack,unsigned int len,char needle) __pure__; + +/* byte_copy copies in[0] to out[0], in[1] to out[1], ... and in[len-1] + * to out[len-1]. */ +void byte_copy(void* out, unsigned int len, const void* in); + +/* byte_copyr copies in[len-1] to out[len-1], in[len-2] to out[len-2], + * ... and in[0] to out[0] */ +void byte_copyr(void* out, unsigned int len, const void* in); + +/* byte_diff returns negative, 0, or positive, depending on whether the + * string a[0], a[1], ..., a[len-1] is lexicographically smaller + * than, equal to, or greater than the string b[0], b[1], ..., + * b[len-1]. When the strings are different, byte_diff does not read + * bytes past the first difference. */ +int byte_diff(const void* a, unsigned int len, const void* b) __pure__; + +/* byte_zero sets the bytes out[0], out[1], ..., out[len-1] to 0 */ +void byte_zero(void* out, unsigned len); + +#define byte_equal(s,n,t) (!byte_diff((s),(n),(t))) + +#endif diff --git a/byte_copy.c b/byte_copy.c new file mode 100644 index 0000000..26c01a8 --- /dev/null +++ b/byte_copy.c @@ -0,0 +1,28 @@ +#include "byte.h" + +/* byte_copy copies in[0] to out[0], in[1] to out[1], ... and in[len-1] + * to out[len-1]. */ +void byte_copy(void* out, unsigned int len, const void* in) { + register char* s=out; + register const char* t=in; + register const char* u=t+len; + if (len>127) { + while ((unsigned long)s&(sizeof(unsigned long)-1)) { + if (t==u) break; *s=*t; ++s; ++t; + } + /* s (destination) is now unsigned long aligned */ +#ifndef __i386__ + if (!((unsigned long)t&(sizeof(unsigned long)-1))) +#endif + while (t+sizeof(unsigned long)<=u) { + *(unsigned long*)s=*(unsigned long*)t; + s+=sizeof(unsigned long); t+=sizeof(unsigned long); + } + } + for (;;) { + if (t==u) break; *s=*t; ++s; ++t; + if (t==u) break; *s=*t; ++s; ++t; + if (t==u) break; *s=*t; ++s; ++t; + if (t==u) break; *s=*t; ++s; ++t; + } +} diff --git a/byte_diff.c b/byte_diff.c new file mode 100644 index 0000000..b7c0864 --- /dev/null +++ b/byte_diff.c @@ -0,0 +1,21 @@ +#include "byte.h" + +/* byte_diff returns negative, 0, or positive, depending on whether the + * string one[0], one[1], ..., one[len-1] is lexicographically smaller + * than, equal to, or greater than the string one[0], one[1], ..., + * one[len-1]. When the strings are different, byte_diff does not read + * bytes past the first difference. */ +int byte_diff(const void* a, unsigned int len, const void* b) { + register const char* s=a; + register const char* t=b; + register const char* u=t+len; + register int j; + j=0; + for (;;) { + if (t==u) break; if ((j=(*s-*t))) break; ++s; ++t; + if (t==u) break; if ((j=(*s-*t))) break; ++s; ++t; + if (t==u) break; if ((j=(*s-*t))) break; ++s; ++t; + if (t==u) break; if ((j=(*s-*t))) break; ++s; ++t; + } + return j; +} diff --git a/cgi-post.c b/cgi-post.c new file mode 100644 index 0000000..2a30cc2 --- /dev/null +++ b/cgi-post.c @@ -0,0 +1,55 @@ +#include +#include + +int main() { + char* method=getenv("REQUEST_METHOD"); + if (!method) { + __write1("Content-Type: text/plain\r\n\r\nFatal: $REQUEST_METHOD not set!\n"); + return 1; + } + if (!strcmp(method,"GET")) { + char* c=getenv("QUERY_STRING"); + __write1("Content-Type: text/plain\r\n\r\n"); + if (c) + write(1,c,strlen(c)); + else { + __write1("Fatal: $QUERY_STRING not set!\n"); + return 1; + } + } else if (!strcmp(method,"POST")) { + char* c=getenv("CONTENT_TYPE"); + char* d=getenv("CONTENT_LENGTH"); + int l; + if (!c) { + __write1("Content-Type: text/plain\r\n\r\nFatal: $CONTENT_TYPE not set!\n"); + return 1; + } + if (!d) { + __write1("Content-Type: text/plain\r\n\r\nFatal: $CONTENT_LENGTH not set!\n"); + return 1; + } + { + char* e; + l=strtoul(d,&e,10); + if (e==d || *e) { + __write1("Content-Type: text/plain\r\n\r\nFatal: $CONTENT_LENGTH not a number: "); + __write1(d); + __write1("\n"); + return 1; + } + } + __write1("Content-Type: "); __write1(c); __write1("\r\n"); + __write1("Content-Length: "); __write1(d); __write1("\r\n\r\n"); + while (l>0) { + char buf[2048]; + int r; + r=read(0,buf,sizeof(buf)); + if (r==-1) return 1; + if (r==0) break; + l-=r; + write(1,buf,r); + } + } else { + puts("Content-Type: text/plain\r\n\r\nFatal: $REQUEST_METHOD is neither GET nor POST!\n"); + } +} diff --git a/contrib/fnord-auth-0.1.tgz b/contrib/fnord-auth-0.1.tgz new file mode 100644 index 0000000000000000000000000000000000000000..8f805b691c4ab4981a1d90497ee7ac2d21ed1f4b GIT binary patch literal 3361 zcmV++4c_t}iwFRVd5=^4l2mzkNqv&3|W%Ykku#5A* zd;H@sCx<^AoUM|Ff|J|R^*iTZ*EZAn?{>RA#P2AZn@>Plr4V<)|M~o%jhqM~-;Sq7 z$N=Ai$8qhUfW~wOPGd!%pfM>w&Sukz)l3N?_Gis!HqwGki~}M2@KDV9g|V9WEvh=K1@eO z%swD@LY*333{xi_3G67I*mfHZZJ06`GZv%xV+NLS(*(n_coPXRz4tkM>ek1y$F1&!9bu zd5Kk%c78rPxcG7B^z`lCpz>=)9-(c@ICipy_Uy`;y2+;8=4vNe{#vPgf&f$=Q3Y>l zIbkFi?j4?-ej+<+B$z!#*Wd@hEGH(~nX{6l-Pe1kgRHT1da|4UZky8atA79%L z>*;M@I@miYRE?0{?%>ii@%Q_qogYpc185mu1WAHHWFb6>Y< zCGTodMen!L(c!DpLCZD6n;@1Xq}Yd?=u-CHhi4y2?px`bXCHTWZ}C$Igv5P#_FYh@L(wU|Ge6+jK@;{ zuPLSVpN=75?f)MG-E}}^uc#5 zfD0CeRVsse6K6HX3h{-@UBhydMv?P7yP!JpLbb;9LIkx7Wa(S1I>DfI>EYHOrB*7y z)?Zqm*YGg7+y9#@j1fh$*=nGYUx?ynjmNk&!hsQ9nrWD$ zj}JBJqlD8f0wDsmyP%ohDinw8w}}(73$D%sP>Qv-MgQlD9mp5+Ku1uIMDd%SkB&+; zW-z^@CLRZO)VLJ&7gGzFh1TJq?JPz>jdsinLMtGYVl(e^fjRG`U4<%kd;osHyy|QL z)P&=UlfC0NN59l+unh{-BrzI%F;TGzJKKa;ns9MJtDK7qTmlg0bcDKCrSt&rRlt!J8uG!Yf`a`!qP`=A)WcDR=2V$J7i2W z&J_+(#P{jtcx`SiX&dCb+QmW2#=j^j>RC#ST|04A zNd^=V`6zVG)Bes68)__$ix@W=prk=UAsyJFJ|zpo+SqV$ty`t`4Nz;mXJS;dF%sPq zn0)dVcP(i!X5)y(d3H@unvk*B$`H-qrihq`-lPuA5ChyG#CEl!0$27vJdb|$kaw`QW=N>?=%F*+ zM^@%2tfrqrgcto6S%*vgO~@@7f!REzu7kVnVA^PG;R{Y%^xg@GPokSd^<9-hcMY*x z;*I-Q^<2cCyF9ko3or56+-2v`=Z)6MTt|K^{*+4>bRu^ac#?8nLZs5sO3QO@UwHfC zv^cd=143#HoQ9e=rfA+tiXj)4;*0~A(UKF!R#%QjM&=!o5hfALaZu$+RZ^Es4jvB{ zh9d60i(x2JY=N&_Yn@W7tpYaJrpPUo-WPOcFc*E26vCDy`^mSESF4oO_ysw(`VHb%t zazVE!ed0s$-4D^H^N?4PI*qm5a|&WJmZ{w$&v#ekN+_IM@6_OrKW0S~U{GoisrWj+MqRM5cqNZ5K zBx7qQF+YlNlN4e!J~%u3aj(U9h5IKb$0uOK+!W!tHCqq3I7y=ad^TIjbbjfr_E!m| z{Ljh$&fe?&74uk{|KHR~`5#U1Y3uyYW59#5J@oA=crd%)3lnl8?*KDVaHWhDcn` zi7B}og2yNygPyqor3*mYAB$ptH&4jw>XkOl0NQmo486eLt47C?`>K?sc(#VhVSf-zQLx zJD16$Gw=-*oCg=aR1#5=22MnUjlY>+E7^Z`xZR2j+9j`o$1?j@^tt;Vo89&Ok4FI> z(9;0_JHA=|)N_5qk`>Fik^g4eyoQIuLi-o%$d!43<@T?a?7!Pl*Z2P)1wLhe@}*^5 z!(Rs_`%f-0ukP*-!4muL_LQ>yca%_a?! zr*(UjQ0D2|R;LLq^|K#N;`5t-4vC&t`tEQNT`cZmXSzB*U3_}@Y~u3|eEx}O3O;3W z+ME=Wo(KBHDnN38|MNDh#kig+sGsfec{o~m;aDx#{0sv@e2s4Ajrh^ir~ zhNyIYq9IAGd+%6iNJ2vr8j{eELmXhS@pOfg&=FNf zR2@-uMAZ>hM^qhAbwt$>wTmRXU(KYhNqqezq}we?dcfsl$D6x;>`&qvh;yS^jkv}Y zvS{MQbj;O+>jk%?U^ub)b(4C4NJKg?tU-F>jwI?V3=3DoL|S52l*k?UQQArzyKNwy z>_9l0I$OxSwRo6|gYXW; zIG>4DKANkhV_ZYD{0Xbzs@=;Kwm~Z&a3d@h{QsXA;$@j)nKrY7a`&_Z4BVW z;KhRa`^5P)iJ))*E`St7TQH~yq$=8io|DQY2cHE>`6Kbd5X^N2Dc>=wT%8*@K%YeZ zGt@wxQv;##cTE1m0a!lug|D*Y7PAfPW>JvTgbJ20Clc?2(qAzBG>;0+<27t=FBrZB za^y&=_mKQ!0BG1RUxGfbP6?KJPWQhAYaJfQTf2+5n&YkA$y>|u?md)u=PuqF=Y74{ z&{8GsEUI2mWcy+D8vQ3+Z+BX#`}opmEVgi%IXZNn+?l?<}7(}Pw!F16Du!c3P;gR5fF1qBP08jt`Rrafs literal 0 HcmV?d00001 diff --git a/convert.pl b/convert.pl new file mode 100644 index 0000000..3fd803e --- /dev/null +++ b/convert.pl @@ -0,0 +1,40 @@ +#!/usr/bin/perl + +use Compress::Zlib; + +$PWD=`pwd`; +chomp $PWD; +push @dirs,$PWD; + +while ($#dirs>=0) { + my $x=shift @dirs; + opendir DIR,$x || die "can't chdir to $x\n"; + foreach $i (readdir DIR) { + next if (substr($i,0,1) eq "."); + if (-d "$x/$i") { + push @dirs,"$x/$i"; + } elsif (-f "$x/$i") { + next if ($i =~ m/\.gz$/); + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat("$x/$i"); + my $gzmtime=(stat("$x/$i.gz"))[9]; + if (not defined $gzmtime or $gzmtime<$mtime) { + print "gzipping $x/$i...\n"; + if ($#ARGV<0) { + open FILE,"$x/$i" || die "can't open $x/$i\n"; + my $gz = gzopen("$x/$i.gz","wb") + or die "can't open $x/$i.gz: $gzerrno\n"; + while () { + $gz->gzwrite($_) + or die "error writing: $gzerrno\n"; + } + $gz->gzclose; + close FILE; + utime $atime, $mtime, "$x/$i.gz" or die "can't utime $x/$i.gz\n"; + my $gzsize=(stat("$x/$i.gz"))[7]; + unlink "$x/$i.gz" if ($gzsize>=$size); + } + } + } + } + closedir DIR; +} diff --git a/fmt.h b/fmt.h new file mode 100644 index 0000000..03290d6 --- /dev/null +++ b/fmt.h @@ -0,0 +1,76 @@ +#ifndef FMT_H +#define FMT_H + +#include "str.h" + +#define FMT_ULONG 40 /* enough space to hold 2^128 - 1 in decimal, plus \0 */ +#define FMT_8LONG 44 /* enough space to hold 2^128 - 1 in octal, plus \0 */ +#define FMT_XLONG 33 /* enough space to hold 2^128 - 1 in hexadecimal, plus \0 */ +#define FMT_LEN ((char *) 0) /* convenient abbreviation */ + +/* The formatting routines do not append \0! + * Use them like this: buf[fmt_ulong(buf,number)]=0; */ + +/* convert signed src integer -23 to ASCII '-','2','3', return length. + * If dest is not NULL, write result to dest */ +unsigned int fmt_long(char *dest,signed long src); + +/* convert unsigned src integer 23 to ASCII '2','3', return length. + * If dest is not NULL, write result to dest */ +unsigned int fmt_ulong(char *dest,unsigned long src); + +/* same for long long */ +unsigned int fmt_ulonglong(char *dest,unsigned long long src); + +/* convert unsigned src integer 0x23 to ASCII '2','3', return length. + * If dest is not NULL, write result to dest */ +unsigned int fmt_xlong(char *dest,unsigned long src); + +/* convert unsigned src integer 023 to ASCII '2','3', return length. + * If dest is not NULL, write result to dest */ +unsigned int fmt_8long(char *dest,unsigned long src); + +#define fmt_uint(dest,src) fmt_ulong(dest,src) +#define fmt_int(dest,src) fmt_long(dest,src) +#define fmt_xint(dest,src) fmt_xlong(dest,src) +#define fmt_8int(dest,src) fmt_8long(dest,src) + +/* Like fmt_ulong, but prepend '0' while length is smaller than padto. + * Does not truncate! */ +unsigned int fmt_ulong0(char *,unsigned long src,unsigned int padto); + +#define fmt_uint0(buf,src,padto) fmt_ulong0(buf,src,padto) + +/* convert src double 1.7 to ASCII '1','.','7', return length. + * If dest is not NULL, write result to dest */ +unsigned int fmt_double(char *dest, double d,int max,int prec); + +/* if src is negative, write '-' and return 1. + * if src is positive, write '+' and return 1. + * otherwise return 0 */ +unsigned int fmt_plusminus(char *dest,int src); + +/* if src is negative, write '-' and return 1. + * otherwise return 0. */ +unsigned int fmt_minus(char *dest,int src); + +/* copy str to dest until \0 byte, return number of copied bytes. */ +unsigned int fmt_str(char *dest,const char *src); + +/* copy str to dest until \0 byte or limit bytes copied. + * return number of copied bytes. */ +unsigned int fmt_strn(char *dest,const char *src,unsigned int limit); + +/* "foo" -> " foo" + * write padlen-srclen spaces, if that is >= 0. Then copy srclen + * characters from src. Truncate only if total length is larger than + * maxlen. Return number of characters written. */ +unsigned int fmt_pad(char* dest,const char* src,unsigned int srclen,unsigned int padlen,unsigned int maxlen); + +/* "foo" -> "foo " + * append padlen-srclen spaces after dest, if that is >= 0. Truncate + * only if total length is larger than maxlen. Return number of + * characters written. */ +unsigned int fmt_fill(char* dest,unsigned int srclen,unsigned int padlen,unsigned int maxlen); + +#endif diff --git a/fmt_ulong.c b/fmt_ulong.c new file mode 100644 index 0000000..712e502 --- /dev/null +++ b/fmt_ulong.c @@ -0,0 +1,11 @@ +#include "fmt.h" + +unsigned int fmt_ulong(char *dest,unsigned long i) { + register unsigned long len,tmp,len2; + /* first count the number of bytes needed */ + for (len=1, tmp=i; tmp>9; ++len) tmp/=10; + if (dest) + for (tmp=i, dest+=len, len2=len+1; --len2; tmp/=10) + *--dest = (tmp%10)+'0'; + return len; +} diff --git a/fmt_ulonglong.c b/fmt_ulonglong.c new file mode 100644 index 0000000..6148761 --- /dev/null +++ b/fmt_ulonglong.c @@ -0,0 +1,12 @@ +#include "fmt.h" + +unsigned int fmt_ulonglong(char *dest,unsigned long long i) { + register unsigned long len,len2; + register unsigned long long tmp; + /* first count the number of bytes needed */ + for (len=1, tmp=i; tmp>9; ++len) tmp/=10; + if (dest) + for (tmp=i, dest+=len, len2=len+1; --len2; tmp/=10) + *--dest = (tmp%10)+'0'; + return len; +} diff --git a/fnord-conf b/fnord-conf new file mode 100755 index 0000000..697422c --- /dev/null +++ b/fnord-conf @@ -0,0 +1,33 @@ +#!/bin/sh + +set -e + +acct=$1 +logacct=$2 +dir=$3 +webroot=$4 +myip=$5 + +if test x$webroot = x; then + echo "fnord-conf: usage: fnord-conf acct logacct /fnord /webroot [ myip ]" + exit 1; +fi + +# install stuff +mkdir -p $dir +echo "#!/bin/sh" > $dir/run +echo "cd $webroot" >> $dir/run +echo "exec envuidgid $acct tcpserver -RHl localhost ${myip:-0} 80 fnord 2>&1" >> $dir/run +chmod 755 $dir/run + +# tell old daemontools that it should spawn a log process, too +chmod a+t $dir + +# install logging stuff +mkdir $dir/log +echo "#!/bin/sh" > $dir/log/run +echo "exec setuidgid $logacct multilog t ./main" >> $dir/log/run +chmod 755 $dir/log/run + +mkdir $dir/log/main +chown $logacct $dir/log/main diff --git a/http b/http new file mode 100755 index 0000000..f971002 --- /dev/null +++ b/http @@ -0,0 +1,2 @@ +#!/bin/sh +tcpserver -RHl localhost 0 80 fnord-idx . diff --git a/httpd.c b/httpd.c new file mode 100644 index 0000000..c4e6fde --- /dev/null +++ b/httpd.c @@ -0,0 +1,1677 @@ +/* simple httpd to be started from tcpserver */ +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fmt.h" +#include "buffer.h" +#include "byte.h" +#include "scan.h" + +/* uncomment the following line to enable support for CGI */ +// #define CGI + +#ifdef CGI +/* uncomment the following line to enable support for "index.cgi" + * That is: if "index.html" is not present then look for "index.cgi" */ +#define INDEX_CGI +#endif + +/* the following switch will make fnord normalize the Host: HTTP header + * from "foo" to "foo:80" */ +#define NORMALIZE_HOST + +/* uncomment the following line to enable support for autogenerated + * directory-listings for directories without index */ +/* #define DIR_LIST */ + +#ifdef DIR_LIST +/* uncomment the following line to enable support for system symlink + * dereferencingr. + * HOPE YOU KNOW WHAT YOU'RE LINKING ! + * + * e.g.: if a file foo is a symlink to /etc/passwd and you don't have a + * chroot enviroment then the system-wide /etc/passwd is provided !!! + * + * If the symlink is dangling OR this option is not active the symlink is + * provided as a new http-uri. + * + * e.g.: is foo a symlink to /etc/passwd than the clinet gets a href to + * http:///etc/passwd */ +/* #define SYSTEM_SYMLINK_DEREF */ +#endif + +/* uncomment the following line to get full-host redirection. + * If a file is not found locally, and $REDIRECT_HOST is set, fnord will + * issue a redirect to strcat($REDIRECT_HOST,uri). Otherwise, if + * $REDIRECT_URI is set, fnord will issue a redirect to $REDIRECT_URI. + * Only if those fail will a 404 error be returned. */ +#define OLD_STYLE_REDIRECT + +/* uncomment the following line to get full-host redirection. + * if the virtual host directory/symlink is a broken symlink, fnord will + * issue a redirect. If the contents of the symlink starts with an + * equal sign ('='), fnord will throw away the URI part. */ +#define REDIRECT + +/* uncomment the following line to make fnord tarpit queries from + * EmailSiphon (an email harvester for spammers) */ +#define TARPIT + +/* uncomment the following line to make fnord chroot to the current + * working directory and drop privileges */ +#define CHROOT + +/* uncomment the following line to make fnord support connection + * keep-alive */ +#define KEEPALIVE + +/* the following is the time in seconds that fnord should wait for a + * valid HTTP request */ +#define READTIMEOUT 20 + +/* the following is the time in seconds that fnord should wait before + * aborting a request when trying to write the answer */ +#define WRITETIMEOUT 20 + +#define CGI_TIMEOUT (5*60) /* 5 minutes time-out for CGI to complete */ + +/* defining USE_SENDFILE enables zero-copy TCP on Linux for static + * files. I measured over 320 meg per second with apache bench over + * localhost with sendfile and keep-alive. However, sendfile does not + * work with large files and may be considered cheating ;-) + * Also, sendfile is a blocking operation. Thus, no timeout handling. */ +#define USE_SENDFILE + +#ifndef __linux__ +#undef USE_SENDFILE +#endif + +#ifdef USE_SENDFILE +#include +#endif + +#ifndef O_NDELAY +#define O_NDELAY O_NONBLOCK +#endif + +#define USE_MMAP +#ifndef _POSIX_MAPPED_FILES +#undef USE_MMAP +#endif + +enum { UNKNOWN, GET, HEAD, POST } method; + +#ifdef TCP_CORK +static int corked; +#endif +static long retcode=404; /* used for logging code */ +char *host="?"; /* Host: header */ +char *port; /* also Host: header, :80 part */ +char *args; /* URL behind ? (for CGIs) */ +char *url; /* string between GET and HTTP/1.0, demangled */ +char *ua="?"; /* user-agent */ +char *refer; /* Referrer: header */ +char *accept_enc; /* Accept-Encoding */ +int httpversion; /* 0 == 1.0, 1 == 1.1 */ +#ifdef KEEPALIVE +int keepalive=0; /* should we keep the connection alive? */ +int rootdir; /* fd of root directory, so we can fchdir back for keep-alive */ +#endif +#ifdef CGI +char *cookie; /* Referrer: header */ +char *uri; /* copy of url before demangling */ +char *content_type; +char *content_len; +char *auth_type; +char *post_miss; +unsigned long post_mlen; +unsigned long post_len=0; +#endif + +#if _FILE_OFFSET_BITS == 64 +static unsigned long long rangestart, rangeend; /* for ranged queries */ +#define scan_range scan_ulonglong +#define buffer_putrange buffer_putulonglong +#include "scan_ulonglong.c" +#include "fmt_ulonglong.c" +#include "buffer_putulonglong.c" +#else +static unsigned long rangestart, rangeend; /* for ranged queries */ +#define scan_range scan_ulong +#define buffer_putrange buffer_putulong +#endif + +static const char days[] = "SunMonTueWedThuFriSat"; +static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec"; + +#define MAXHEADERLEN 8192 + +char* remote_ip; +#ifdef CGI +char* remote_port; +char* remote_ident; +#endif + +static void sanitize(char* ua) { /* replace strings with underscores for logging */ + int j; + for (j=0; ua[j]; ++j) if (isspace(ua[j])) ua[j]='_'; +} + +static int buffer_put2digits(buffer* b,unsigned int i) { + char x[2]; + x[0]=(i/10)+'0'; + x[1]=(i%10)+'0'; + return buffer_put(b,x,2); +} + +static void dolog(off_t len) { /* write a log line to stderr */ +#ifdef COLF + time_t t=time(0); + struct tm* x=localtime(&t); + int l=-(timezone/60); + buffer_puts(buffer_2,remote_ip?remote_ip:"0.0.0.0"); + buffer_puts(buffer_2," - - ["); + + buffer_put2digits(buffer_2,x->tm_mday); + buffer_puts(buffer_2,"/"); + buffer_put(buffer_2,months+3*x->tm_mon,3); + buffer_puts(buffer_2,"/"); + buffer_put2digits(buffer_2,(x->tm_year+1900)/100); + buffer_put2digits(buffer_2,(x->tm_year+1900)%100); + buffer_puts(buffer_2,":"); + buffer_put2digits(buffer_2,x->tm_hour); + buffer_puts(buffer_2,":"); + buffer_put2digits(buffer_2,x->tm_min); + buffer_puts(buffer_2,":"); + buffer_put2digits(buffer_2,x->tm_sec); + buffer_puts(buffer_2,l>=0?" +":" -"); + if (l<0) l=-l; + buffer_put2digits(buffer_2,l/60); + buffer_put2digits(buffer_2,l%60); + buffer_puts(buffer_2,"] \""); + switch (method) { + case GET: buffer_puts(buffer_2,"GET "); break; + case POST: buffer_puts(buffer_2,"POST "); break; + case HEAD: buffer_puts(buffer_2,"HEAD "); break; + default: buffer_puts(buffer_2,"? "); break; + } + buffer_puts(buffer_2,url); + buffer_puts(buffer_2,httpversion?" HTTP/1.1\" ":" HTTP/1.0\" "); + buffer_putulong(buffer_2,retcode); + buffer_putspace(buffer_2); + buffer_putrange(buffer_2,len); + +#else + buffer_puts(buffer_2,remote_ip?remote_ip:"0.0.0.0"); + buffer_putspace(buffer_2); + buffer_putulong(buffer_2,retcode); + buffer_putspace(buffer_2); + buffer_putrange(buffer_2,len); + buffer_putspace(buffer_2); + sanitize(host); + buffer_puts(buffer_2,host); + buffer_putspace(buffer_2); + sanitize(ua); + buffer_puts(buffer_2,ua); + buffer_putspace(buffer_2); + if (!refer) refer="none"; + sanitize(refer); + buffer_puts(buffer_2,refer); + buffer_putspace(buffer_2); + if (url) + buffer_puts(buffer_2,url); + else + buffer_puts(buffer_2,"(null)"); +#endif + buffer_puts(buffer_2,"\n"); + buffer_flush(buffer_2); +} + +/* output an error message and exit */ +static void badrequest(long code,const char *httpcomment,const char *message) { + retcode=code; + dolog(0); + buffer_puts(buffer_1,"HTTP/1.0 "); + buffer_putulong(buffer_1,code); + buffer_putspace(buffer_1); + buffer_puts(buffer_1,httpcomment); + buffer_puts(buffer_1,"\r\nContent-Type: text/html\r\nConnection: close\r\n\r\n"); + buffer_puts(buffer_1,message); + buffer_flush(buffer_1); + exit(0); +} + +#ifdef CGI +#define CGIENVLEN 21 + +static const char *cgivars[CGIENVLEN] = { + "GATEWAY_INTERFACE=", + "SERVER_PROTOCOL=", + "SERVER_SOFTWARE=", + "SERVER_NAME=", + "SERVER_PORT=", + "REQUEST_METHOD=", + "REQUEST_URI=", + "SCRIPT_NAME=", + "REMOTE_ADDR=", + "REMOTE_PORT=", + "REMOTE_IDENT=", + "HTTP_USER_AGENT=", + "HTTP_COOKIE=", + "HTTP_REFERER=", + "HTTP_ACCEPT_ENCODING=", + "AUTH_TYPE=", + "CONTENT_TYPE=", + "CONTENT_LENGTH=", + "QUERY_STRING=", + "PATH_INFO=", + "PATH_TRANSLATED=" +}; + +static int iscgivar(const char *s) { + register unsigned int i=0; + for (;i0) { + if (WIFSIGNALED(n)) { + if (WTERMSIG(n)==SIGALRM) + badrequest(504,"Gateway Time-out","Gateway has hit the Time-out."); + else + badrequest(502,"Bad Gateway","Gateway broken or unavailable."); + } + } + signal(SIGCHLD,cgi_child); +} + +static void cgi_send_correct_http(const char*s,unsigned int sl) { + unsigned int i; + char ch=0; + for (i=0;i0) { + struct pollfd pfd[2]; + int nr=1; + int startup=1; + + signal(SIGCHLD,cgi_child); + signal(SIGPIPE,SIG_IGN); /* NO! no signal! */ + + close(df[0]); + close(fd[1]); + + pfd[0].fd=fd[0]; + pfd[0].events=POLLIN; + pfd[0].revents=0; + + pfd[1].fd=df[1]; + pfd[1].events=POLLOUT; + pfd[1].revents=0; + + if (post_len) ++nr; /* have post data */ + else close(df[1]); /* no post data */ + + while(poll(pfd,nr,-1)!=-1) { + /* read from cgi */ + if (pfd[0].revents&POLLIN) { + if (!(n=read(fd[0],ibuf,sizeof(ibuf)))) break; + if (n<0) goto cgi_500; + /* startup */ + if (startup) { + startup=0; + if (nph) { /* NPH-CGI */ + buffer_put(buffer_1,ibuf,n); + scan_ulong(ibuf+9,&retcode); /* only get error code / str_len("HTTP/x.x ")==9 */ + } + else { /* CGI */ + if (byte_diff(ibuf,10,"Location: ")==0) { + retcode=302; + buffer_puts(buffer_1,"HTTP/1.0 302 CGI-Redirect\r\nConnection: close\r\n"); + signal(SIGCHLD,SIG_IGN); + cgi_send_correct_http(ibuf,n); + buffer_flush(buffer_1); + dolog(0); + exit(0); + } + else { + retcode=200; + buffer_puts(buffer_1,"HTTP/1.0 200 OK\r\nServer: "FNORD"\r\nPragma: no-cache\r\nConnection: close\r\n"); + signal(SIGCHLD,SIG_IGN); + cgi_send_correct_http(ibuf,n); + } + } + } + /* non startup */ + else { + buffer_put(buffer_1,ibuf,n); + } + size+=n; + if (pfd[0].revents&POLLHUP) break; + } + /* write to cgi the post data */ + else if (nr>1 && pfd[1].revents&POLLOUT) { + if (post_miss) { + write(df[1],post_miss,post_mlen); + post_miss=0; + } + else if (post_mlen='0' && c<='9') return c-'0'; + else { c|=' '; + if (c>='a' && c<='f') return c-'a'+10; + } + return -1; +} + +/* header(buf,buflen,"User-Agent")="Mozilla" */ +static char* header(char* buf,int buflen,const char* hname) { + int slen=str_len(hname); + int i; + char* c; +// printf("buflen %d, slen %d\n",buflen,slen); + for (i=0; i0 && url[ext]!='.' && url[ext]!='/') --ext; + if (url[ext]=='.') { + ++ext; + if (str_equal(url+ext,"bz2")) goto octetstream; + if (str_equal(url+ext,"gz")) { + if (!encoding) { + if (explicit) goto octetstream; + encoding="gzip"; + save=url[ext-1]; + url[ext-1]=0; + getmimetype(url,explicit); + url[ext-1]=save; + } else +octetstream: + mimetype="application/octet-stream"; + } else { + int i; + for (i=0; mimetab[i].name; ++i) + if (str_equal(mimetab[i].name,url+ext)) { + mimetype=(char*)mimetab[i].type; + break; + } + } + } +} + +static int matchcommalist(const char* needle,const char* haystack) { + /* needle: "text/html", + * haystack: the accept header, "text/html, text/plain\r\n" */ + /* return nonzero if match was found */ + int len=str_len(needle); + if (!byte_equal(needle,len,haystack)) return 0; + switch (haystack[len]) { + case ';': case ',': case '\r': case '\n': case 0: return 1; + } + return 0; +} + +static int findincommalist(const char* needle,const char* haystack) { + const char* accept; + for (accept=haystack; accept;) { + /* format: foo/bar, */ + const char *tmp=accept; + int final; + while (*tmp) { + if (*tmp==';') break; else + if (*tmp==',') break; + ++tmp; + } + final=(*tmp==0 || *tmp==';'); + if (matchcommalist("*/*",accept)) break; + if (matchcommalist(haystack,accept)) break; + accept=tmp+1; + if (final) return 0; + } + return 1; +} + +static int parsetime(const char*c,struct tm* x) { + unsigned long tmp; + c+=scan_ulong(c,&tmp); x->tm_hour=tmp; + if (*c!=':') return -1; ++c; + c+=scan_ulong(c,&tmp); x->tm_min=tmp; + if (*c!=':') return -1; ++c; + c+=scan_ulong(c,&tmp); x->tm_sec=tmp; + if (*c!=' ') return -1; + return 0; +} + +static time_t parsedate(const char*c) { + struct tm x; + int i; + unsigned long tmp; + if (!c) return (time_t)-1; + /* "Sun, 06 Nov 1994 08:49:37 GMT", + * "Sunday, 06-Nov-94 08:49:37 GMT" and + * "Sun Nov 6 08:49:37 1994" */ + if (c[3]==',') c+=5; else + if (c[6]==',') c+=8; else { + c+=4; + for (i=0; i<12; ++i) { +// fprintf(stderr,"comparing %s to %.3s\n",c,months+i*3); + if (!strncasecmp(c,months+i*3,3)) { + x.tm_mon=i; break; + } + } + c+=4; if (*c==' ') ++c; + c+=scan_ulong(c,&tmp); x.tm_mday=tmp; + ++c; + if (parsetime(c,&x)) return (time_t)-1; + c+=9; + c+=scan_ulong(c,&tmp); x.tm_year=tmp-1900; + goto done; + } + c+=scan_ulong(c,&tmp); x.tm_mday=tmp; + ++c; + for (i=0; i<12; ++i) + if (!strncasecmp(c,months+i*3,3)) { + x.tm_mon=i; break; + } + c+=4; + c+=scan_ulong(c,&tmp); + if (tmp>1000) x.tm_year=tmp-1900; else + if (tmp<70) x.tm_year=tmp+100; else + x.tm_year=tmp; + ++c; + if (parsetime(c,&x)) return (time_t)-1; +done: + x.tm_wday=x.tm_yday=x.tm_isdst=0; + return mktime(&x); +} + +static struct stat st; + +/* try to return a file */ +static int doit(char* buf,int buflen,char* url,int explicit) { + int fd=-1; + char* accept; + time_t ims; + while (url[0]=='/') ++url; + getmimetype(url,explicit); + { + char *b=buf; + int l=buflen; + for (;;) { + char *h=header(b,l,"Accept"); + if (!h) goto ok; + if (findincommalist(mimetype,h)) goto ok; + l-=(h-b)+1; + b=h+1; + } + retcode=406; goto bad; + } +ok: + if (encoding) { /* see if client accepts the encoding */ + char *tmp=header(buf,buflen,"Accept-Encoding"); + if (!tmp || !strstr(tmp,"gzip")) + { retcode=406; goto bad; } + } + if ((fd=open(url,O_RDONLY))>=0) { + if (fstat(fd,&st)) goto bad; + /* no directories */ + if (S_ISDIR(st.st_mode)) goto bad; + /* see if the peer accepts MIME type */ + /* see if the document has been changed */ + ims=parsedate(header(buf,buflen,"If-Modified-Since")); + if (ims!=(time_t)-1 && st.st_mtime<=ims) { retcode=304; goto bad; } + rangestart=0; rangeend=st.st_size; + if ((accept=header(buf,buflen,"Range"))) { + /* format: "bytes=17-23", "bytes=23-" */ + if (!strncmp(accept,"bytes=",6)) { + int i; + accept+=6; + i=scan_range(accept,&rangestart); + if (i) { + accept+=i; + if (*accept=='-') { + ++accept; + if (*accept) { + i=scan_range(accept,&rangeend); + if (!i) rangeend=st.st_size; else ++rangeend; + } + } + } + } + if (rangestart>rangeend || rangeend>st.st_size) { retcode=416; goto bad; } + } + return fd; +bad: + if (fd>=0) close(fd); + } + return -1; +} + +static void redirectboilerplate() { + buffer_puts(buffer_1,"HTTP/1.0 301 Go Away\r\nConnection: close\r\nLocation: "); +} + +static void handleredirect(const char *url,const char* origurl) { + char symlink[1024]; + int len; +#ifdef OLD_STYLE_REDIRECT + char* env; +#endif + while (*url=='/') ++url; + if ((len=readlink(url,symlink,1023))>0) { + /* el-cheapo redirection */ + redirectboilerplate(); + buffer_put(buffer_1,symlink,len); +#ifdef OLD_STYLE_REDIRECT +fini: +#endif + retcode=301; + buffer_puts(buffer_1,"\r\n\r\n"); + dolog(0); + buffer_flush(buffer_1); + exit(0); + } +#ifdef OLD_STYLE_REDIRECT + if ((env=getenv("REDIRECT_HOST"))) { + redirectboilerplate(); + buffer_puts(buffer_1,env); + while (*origurl=='/') ++origurl; + buffer_puts(buffer_1,origurl); + goto fini; + } else if ((env=getenv("REDIRECT_URI"))) { + redirectboilerplate(); + buffer_puts(buffer_1,env); + goto fini; + } +#endif +} + +#ifdef DIR_LIST +static void hdl_encode_html(const char*s,unsigned int sl) { + int i; + for (i=0;i159) { +encode_dec: + buffer_puts(buffer_1,"&#"); + buffer_putulong(buffer_1,ch); + buffer_puts(buffer_1,";"); + } + else if ((ch>128)||(ch<32)) { + buffer_put(buffer_1," ",1); + } + else if (ch=='"') buffer_puts(buffer_1,"""); + else if (ch=='&') buffer_puts(buffer_1,"&"); + else if (ch=='<') buffer_puts(buffer_1,"<"); + else if (ch=='>') buffer_puts(buffer_1,">"); + else buffer_put(buffer_1,&ch,1); + } +} +static int buffer_puthex(unsigned int i) { + unsigned int t; + char x[4]; + t='0'|(i>>4)&0xf; + if (t>'9') t+=39; + i='0'|(i&0xf); + if (i>'9') i+=39; + x[0]='%'; + x[1]=t; + x[2]=i; + return buffer_put(buffer_1,x,3); +} +static void hdl_encode_uri(const char*s,unsigned int sl) { + int i; + for (i=0;i32)&&(ch<127)) + buffer_put(buffer_1,&ch,1); + else + buffer_puthex(ch); + } +} +static void handledirlist(const char*origurl) { + DIR*dir; + unsigned int nl=str_len(origurl); + const char*nurl=origurl; + url=(char*)origurl; + while (nurl[0]=='/') ++nurl; + if (nurl<=origurl) return; + nl=str_len(nurl); + if (nurl[nl-1]!='/') return; + if (!stat(nl?nurl:".",&st) && (S_ISDIR(st.st_mode)) && ((st.st_mode&S_IRWXO)==5)) { + if (nl) chdir(nurl); + if (dir=opendir(".")) { + struct dirent*de; + unsigned int i,size=32+nl; + buffer_puts(buffer_1,"HTTP/1.0 200 OK\r\nServer: "FNORD"\r\nConnection: close\r\n"); + buffer_puts(buffer_1,"Content-Type: text/html\r\n"); + buffer_puts(buffer_1,"\r\n

Directory Listing: /"); + hdl_encode_html(nurl,nl); + buffer_puts(buffer_1,"

\n
\n");
+      if (nl!=0) {
+	for (i=nl-2;i>0;--i) if (nurl[i]=='/') break;
+	buffer_puts(buffer_1,"0) buffer_puts(buffer_1,"/");
+	buffer_puts(buffer_1,"\">Parent directory");
+	buffer_puts(buffer_1,"\n");
+	size+=40+i;
+      }
+      while(de=readdir(dir)) {
+	char symlink[1024];
+	char*p=de->d_name;
+	unsigned int pl,dl=str_len(de->d_name);
+	pl=dl;
+	if (de->d_name[0]=='.') continue;	/* hidden files -> skip */
+	if (lstat(de->d_name,&st)) continue;	/* can't stat -> skip */
+	if (S_ISDIR(st.st_mode)) buffer_puts(buffer_1,"[DIR] ");
+	else if (S_ISLNK(st.st_mode)) {
+#ifdef SYSTEM_SYMLINK_DEREF
+	  if (stat(de->d_name,&st))			/* dangling symlink */
+#endif
+	  {
+	    if ((pl=readlink(de->d_name,symlink,1023))<1) continue;
+	    p=symlink;
+	  }
+	  buffer_puts(buffer_1,"[LNK] ");	/* a symlink to something ... */
+	}
+	else if (S_ISREG(st.st_mode)) buffer_puts(buffer_1,"[TXT] ");
+	else continue;				/* not a file we can provide -> skip */
+	/* write a href */
+	buffer_puts(buffer_1,"");
+	if (de->d_name[0]==':') de->d_name[0]='.';	/* fnord special ... */
+	hdl_encode_html(de->d_name,dl);
+	buffer_puts(buffer_1,"\n");
+	size+=22+(dl<<1);
+      }
+      closedir(dir);
+      buffer_puts(buffer_1,"
\n"); + buffer_flush(buffer_1); + retcode=200; + dolog(size); + exit(0); + } + } +} +#endif + +#ifdef INDEX_CGI +static int handleindexcgi(const char *testurl,const char* origurl,char* space) { + unsigned int ul,ol=str_len(origurl); + char*test; + while (testurl[0]=='/') ++testurl,--ol; + ul=str_len(testurl); + if (str_diff(testurl+ol,"index.html")) return 0; /* no request for index.html */ + test=space; + ++test; + ul-=4; + byte_copy(test,ul,testurl); + test[ul]='c'; + test[++ul]='g'; + test[++ul]='i'; + test[++ul]=0; + if (stat(test,&st)) return 0; /* no index.cgi present */ + ul=1; + if (st.st_gid==getegid()) ul=010; + if (st.st_uid==geteuid()) ul=0100; + if (!(st.st_mode&ul)) return 0; /* should be executable */ + *(--test)='/'; + url=test; + return 1; /* Wow... now start "index.cgi" */ +} +#endif + +static void get_ucspi_env(void) { + char* ucspi=getenv("PROTO"); + if (ucspi) { + char* buf=alloca(str_len(ucspi)+20); + unsigned int tmp=str_copy(buf,ucspi); + buf[tmp+str_copy(buf+tmp,"REMOTEIP")]=0; + remote_ip=getenv(buf); +#ifdef CGI + buf[tmp+str_copy(buf+tmp,"REMOTEPORT")]=0; + remote_port=getenv(buf); + buf[tmp+str_copy(buf+tmp,"REMOTEINFO")]=0; + remote_ident=getenv(buf); +#endif + } +} + +#ifdef CGI +static int findcgi(const char* c) { + return (c[0]=='.' && c[1]=='c' && + c[2]=='g' && c[3]=='i' && + (c[4]=='/' || c[4]==0)); +} +#endif + +static int serve_read_write(int fd) { + char tmp[4096]; + struct pollfd duh; + time_t now,fini; + char* tmp2; + int len; + off_t todo=rangeend-rangestart; + duh.fd=1; + duh.events=POLLOUT; + if (rangestart) lseek(fd,rangestart,SEEK_SET); + while (todo>0) { + int olen; + fini=time(&now)+WRITETIMEOUT; + len=read(fd,tmp,todo>4096?4096:todo); + olen=len; + tmp2=tmp; + while (len>0) { + int written; + switch (poll(&duh,1,(fini-now)*1000)) { + case 0: if (now64*1024*1024) maplen=64*1024*1024; + map=mmap(0,maplen,PROT_READ,MAP_PRIVATE,fd,mapstart); + if (map==MAP_FAILED) { + if (errno==EINVAL && mapstart) { + /* try rounded to 64k pages */ + mapstart=rangestart&0xffff; + maplen=rangeend-mapstart; + mapofs=rangestart-mapstart; + map=mmap(0,maplen,PROT_READ,MAP_PRIVATE,fd,mapstart); + if (map==MAP_FAILED) + /* didn't work, use read/write instead. */ + return serve_read_write(fd); + } else return serve_read_write(fd); + } + duh.fd=1; + duh.events=POLLOUT; + while (rangestart0) { + int written; + switch (poll(&duh,1,(fini-now)*1000)) { + case 0: if (now64*1024*1024) maplen=64*1024*1024; + map=mmap(0,maplen,PROT_READ,MAP_SHARED,fd,mapstart); + if (map==MAP_FAILED) + /* can't happen, really */ + return serve_read_write(fd); + } + } + return 0; +} + +/* write from offset "rangestart" to offset "rangeend" to fd #1 */ +static int serve_static_data(int fd) { + off_t len=rangeend-rangestart; +#ifdef TCP_CORK + corked=0; +#endif + if (len<4096) { /* for small files, sendfile is actually slower */ + char tmp[4096]; + if (rangestart) lseek(fd,rangestart,SEEK_SET); + read(fd,tmp,len); /* if read fails, we can't back down now. + We already committed on the content-length */ + buffer_put(buffer_1,tmp,len); + buffer_flush(buffer_1); + return 0; + } +#ifdef USE_SENDFILE + { + off_t offset=rangestart; +#ifdef TCP_CORK + { + int one=1; + setsockopt(1,IPPROTO_TCP,TCP_CORK,&one,sizeof(one)); + corked=1; + } +#endif + buffer_flush(buffer_1); + { + off_t l=rangeend-rangestart; + do { + off_t c; + c=(l>(1ul<<31))?1ul<<31:l; + if (sendfile(1,fd,&offset,c)==-1) +#ifdef USE_MMAP + return serve_mmap(fd); +#else + return serve_read_write(fd); +#endif + l-=c; + } while (l); + } + return 0; + } +#else + buffer_flush(buffer_1); +#ifdef TCP_CORK + { + int one=1; + setsockopt(1,IPPROTO_TCP,TCP_CORK,&one,sizeof(one)); + corked=1; + } +#endif +#ifdef USE_MMAP + return serve_mmap(fd); +#else + return serve_read_write(fd); +#endif +#endif +} + +int main(int argc,char *argv[],const char *const *envp) { + char buf[MAXHEADERLEN]; +#if 0 + char buf2[MAXHEADERLEN]; +#endif + char *nurl,*origurl; + int len; + int in; + + if (argc>1) chdir(argv[1]); + +#ifdef CHROOT + if (chroot(".")) { + if (errno!=EPERM) + goto error500; + /* else fnord was called with uid!=0, i.e. it already is chroot */ + } else { + char *tmp; + if (chdir("/")) goto error500; + if ((tmp=getenv("GID"))) { + long gid; + if (tmp[scan_ulong(tmp,&gid)]==0) { + gid_t gi=gid; + if (setgroups(1,&gi)) goto error500; + } else goto error500; + } + if ((tmp=getenv("UID"))) { + long uid; + if (tmp[scan_ulong(tmp,&uid)]==0) { + if (setuid(uid)) goto error500; + } else goto error500; + } + } +#endif + signal(SIGPIPE,SIG_IGN); + get_ucspi_env(); + +#ifdef KEEPALIVE +handlenext: + encoding=0; +#endif +// alarm(20); + + { + int found=0; + time_t fini,now; + struct pollfd duh; + + fini=time(&now)+READTIMEOUT; + duh.fd=0; + duh.events=POLLIN; + for (in=len=0;found<2;) { + int tmp; + switch (poll(&duh,1,READTIMEOUT*1000)) { + case 0: if (time(&now)1) break; + } + } + } + if (len<10) badrequest(400,"Bad Request","Bad RequestThat does not look like HTTP to me..."); + buf[len]=0; + + if (!strncasecmp(buf,"GET /",5)) { + method=GET; + url=buf+4; + } else if (!strncasecmp(buf,"POST /",6)) { + method=POST; + url=buf+5; + } else if (!strncasecmp(buf,"HEAD /",6)) { + method=HEAD; + url=buf+5; + } else + badrequest(400,"Bad Request","Bad RequestUnsupported HTTP method."); + + origurl=url; + + { + int nl=str_chr(buf,'\r'); + int space=str_chr(url,' '); + if (space>=nl) + badrequest(400,"Bad Request","Bad RequestHTTP/0.9 not supported"); + if (str_diffn(url+space+1,"HTTP/1.",7)) + badrequest(400,"Bad Request","Bad RequestOnly HTTP 1.x supported"); + url[space]=0; + httpversion=url[space+8]-'0'; +#ifdef KEEPALIVE + keepalive=0; +#endif + + /* demangle path in-place */ + { + register char *tmp,*d; + for (tmp=d=url; *tmp; ++tmp) { + if (*tmp=='?') { args=tmp+1; break; } + if (*tmp==' ') break; + if (*tmp=='%') { + int a,b; + a=fromhex(tmp[1]); + b=fromhex(tmp[2]); + if (a>=0 && b>=0) { + *d=(a<<4)+b; + tmp+=2; + } else + *d=*tmp; + } else + *d=*tmp; + if (d>url+1 && *d=='/' && d[-1]==':' && d[-2]=='/') d-=2; + if (d>url && *d=='/' && d[-1]=='/') --d; + if (d>url && *d=='.' && d[-1]=='/') *d=':'; + ++d; + } + *d=0; + /* not good enough, we need a second pass */ + } + +#ifdef CGI + uri=alloca(space+1); + byte_copy(uri,space+1,url); +#endif + } + + { + char *tmp; + if ((tmp=header(buf,len,"User-Agent"))) ua=tmp; + if ((tmp=header(buf,len,"Referer"))) refer=tmp; + if ((tmp=header(buf,len,"Accept-Encoding"))) accept_enc=tmp; +#ifdef KEEPALIVE + if ((tmp=header(buf,len,"Connection"))) { /* see if it's "keep-alive" or "close" */ + if (!strcasecmp(tmp,"keep-alive")) + keepalive=1; + else if (!strcasecmp(tmp,"close")) + keepalive=-1; + } +#endif +#ifdef CGI + if ((tmp=header(buf,len,"Cookie"))) cookie=tmp; + if ((tmp=header(buf,len,"Authorization"))) auth_type=tmp; + if (method==POST) { + if ((tmp=header(buf,len,"Content-Type"))) content_type=tmp; + if ((tmp=header(buf,len,"Content-Length"))) content_len=tmp; + if (tmp) { + scan_ulong(tmp,&post_len); + post_miss=buf+len+1; + post_mlen=in-len-1; + if (post_len<=post_mlen) post_mlen=post_len; + } + } +#endif + } + +#ifdef TARPIT + if (str_equal(ua,"EmailSiphon")) { sleep(120); exit(0); } +#endif + + port=getenv("TCPLOCALPORT"); + if (!port) port="80"; + { + char *Buf; + int i; + host=header(buf,len,"Host"); + if (!host) i=100; else i=str_len(host)+7; + Buf=alloca(i); + if (!host) { + char *ip=getenv("TCPLOCALIP"); + if (!ip) ip="127.0.0.1"; + if (str_len(ip)+str_len(port)>90) exit(101); + host=Buf; + i=str_copy(Buf,ip); + i+=str_copy(Buf+i,":"); + i+=str_copy(Buf+i,port); +#ifdef NORMALIZE_HOST + } else { + int colon=str_chr(host,':'); + if (host[colon]==0) { + i=str_copy(Buf,host); + i+=str_copy(Buf+i,":"); + i+=str_copy(Buf+i,port); + host=Buf; + } +#endif + } + for (i=str_len(host); i >= 0; --i) + if ((host[i]=tolower(host[i]))=='/') +hostb0rken: + badrequest(400,"Bad Request","Bad RequestBullshit Host header"); + if (host[0]=='.') goto hostb0rken; +// fprintf(stderr,"host %s\n",host); +#ifdef KEEPALIVE + if (keepalive>0) { + if ((rootdir=open(".",O_RDONLY))<0) + keepalive=-1; + } +#endif + if (chdir(host)) { +#ifdef REDIRECT + char symlink[1024]; + int linklen; + if ((linklen=readlink(host,symlink,sizeof symlink))>0) { + /* it is a broken symlink. Do a redirection */ + redirectboilerplate(); + if (symlink[0]=='=') { + buffer_put(buffer_1,symlink+1,linklen-1); + } else { + buffer_put(buffer_1,symlink,linklen); + while (url[0]=='/') ++url; + buffer_puts(buffer_1,url); + } + retcode=301; + buffer_puts(buffer_1,"\r\n\r\n"); + dolog(0); + buffer_flush(buffer_1); + exit(0); + } +#endif + if (chdir("default") && argc<2) { + badrequest(404,"Not Found","Not FoundThis host is not served here."); + } + } + } +#ifdef AUTH + { + char *auth_script = ".http-auth"; + struct stat st; + + if(!stat(auth_script, &st)) { + pid_t child; + const char *authorization; + + authorization = header(buf, len, "Authorization"); + child = fork(); + if(child < 0) { + badrequest(500, "Internal Server Error", "Server Resource problem."); + } else if(child == 0) { + const char *argv[5] = { auth_script, host, url, authorization, NULL }; + + dup2(2, 1); + execve(auth_script, argv, envp); + _exit(1); + } else { + int status; + pid_t childr; + + while((childr = waitpid(child, &status, 0)) < 0 && errno == EINTR); + if(childr != child) + badrequest(500, "Internal Server Error", "Server system problem."); + if(!WIFEXITED(status) || WEXITSTATUS(status)) { + retcode = 401; + dolog(0); + buffer_puts(buffer_1,"HTTP/1.0 401 Authorization Required\r\n" + "WWW-Authenticate: Basic realm=\""); + buffer_puts(buffer_1, host); + buffer_puts(buffer_1,"\"\r\nConnection: close\r\n\r\n" + "Access to this site is restricted.\r\n" + "Please provide credentials.\r\n"); + buffer_flush(buffer_1); + exit(0); + } + } + } + } +#endif /* AUTH */ + nurl=url+str_len(url); + if (nurl>url) --nurl; + if (*nurl=='/') { + int i; + nurl=alloca(str_len(url)+12); + i=str_copy(nurl,url); + i+=str_copy(nurl+i,"index.html"); + nurl[i]=0; + url=nurl; + nurl=url+i; + } +#ifdef CGI + nurl-=3; + { + char* tmp,* pathinfo; + pathinfo=0; + for (tmp=url; tmp-1;--i) { + if ((nurl[0]=='/')&&(nurl[1]=='n')&&(nurl[2]=='p')&&(nurl[3]=='h')&&(nurl[4]=='-')) + start_cgi(1,pathinfo,envp); /* start a NPH-CGI */ + --nurl; + } +#ifdef INDEX_CGI + indexcgi: +#endif + start_cgi(0,pathinfo,envp); /* start a CGI */ + } + } +#endif + + { + int fd; + if ((fd=doit(buf,len,url,1))>=0) { /* file was there */ + /* look if file.gz is also there and acceptable */ + char *fnord=alloca(str_len(url)+4); + int i,fd2,trypng=0; + char *oldencoding=encoding; + char *oldmimetype=mimetype; + i=str_copy(fnord,url); + if (i>4 && str_equal(fnord+i-4,".gif")) { + trypng=1; + str_copy(fnord+i-3,"png"); + } else + str_copy(fnord+i,".gz"); + fd2=doit(buf,len,fnord,0); + if (fd2>=0) { /* yeah! */ + url=fnord; + close(fd); + fd=fd2; + } else { + encoding=oldencoding; + if (trypng) mimetype=oldmimetype; + } + retcode=200; + dolog(st.st_size); + if (rangestart || rangeend!=st.st_size) + buffer_puts(buffer_1,"HTTP/1.0 206 Partial Content\r\nServer: "FNORD"\r\nContent-Type: "); + else + buffer_puts(buffer_1,"HTTP/1.0 200 OK\r\nServer: "FNORD"\r\nContent-Type: "); + buffer_puts(buffer_1,mimetype); + buffer_puts(buffer_1,"\r\n"); +#ifdef KEEPALIVE + switch (keepalive) { + case -1: buffer_puts(buffer_1,"Connection: close\r\n"); break; + case 1: buffer_puts(buffer_1,"Connection: Keep-Alive\r\n"); break; + } +#endif + if (encoding) { + buffer_puts(buffer_1,"Content-Encoding: "); + buffer_puts(buffer_1,encoding); + buffer_puts(buffer_1,"\r\n"); + } + buffer_puts(buffer_1,"Content-Length: "); + buffer_putrange(buffer_1,rangeend-rangestart); + buffer_puts(buffer_1,"\r\nLast-Modified: "); + { + struct tm* x=gmtime(&st.st_mtime); + /* "Sun, 06 Nov 1994 08:49:37 GMT" */ + buffer_put(buffer_1,days+3*x->tm_wday,3); + buffer_puts(buffer_1,", "); + buffer_put2digits(buffer_1,x->tm_mday); + buffer_puts(buffer_1," "); + buffer_put(buffer_1,months+3*x->tm_mon,3); + buffer_puts(buffer_1," "); + buffer_put2digits(buffer_1,(x->tm_year+1900)/100); + buffer_put2digits(buffer_1,(x->tm_year+1900)%100); + buffer_puts(buffer_1," "); + buffer_put2digits(buffer_1,x->tm_hour); + buffer_puts(buffer_1,":"); + buffer_put2digits(buffer_1,x->tm_min); + buffer_puts(buffer_1,":"); + buffer_put2digits(buffer_1,x->tm_sec); + buffer_puts(buffer_1," GMT\r\n"); + } + if (rangestart || rangeend!=st.st_size) { + buffer_puts(buffer_1,"Accept-Ranges: bytes\r\nContent-Range: bytes "); + buffer_putrange(buffer_1,rangestart); + buffer_puts(buffer_1,"-"); + buffer_putrange(buffer_1,rangeend-1); + buffer_puts(buffer_1,"/"); + buffer_putrange(buffer_1,st.st_size); + buffer_puts(buffer_1,"\r\n"); + } + buffer_puts(buffer_1,"\r\n"); + if (method==GET || method==POST) { + switch (serve_static_data(fd)) { + case 0: break; + case -1: goto error500; + case 1: return 1; + } +#ifdef KEEPALIVE +#ifdef TCP_CORK + if (corked) { + int zero=0; + setsockopt(1,IPPROTO_TCP,TCP_CORK,&zero,sizeof(zero)); + } +#endif + if (keepalive>0) { + close(fd); + fchdir(rootdir); close(rootdir); + goto handlenext; + } +#endif + exit(0); +error500: + retcode=500; + } else + buffer_flush(buffer_1); + } + } +#ifdef CHROOT +tuttikaputti: +#endif + switch (retcode) { + case 404: + { + char* space=alloca(strlen(url)+2); +#ifdef INDEX_CGI + if (handleindexcgi(url,origurl,space)) goto indexcgi; +#endif + handleredirect(url,origurl); +#ifdef DIR_LIST + handledirlist(origurl); +#endif + badrequest(404,"Not Found","Not FoundNo such file or directory."); + } + case 406: badrequest(406,"Not Acceptable","Not AcceptableNothing acceptable found."); + case 416: badrequest(416,"Requested Range Not Satisfiable",""); + case 304: badrequest(304,"Not Changed",""); + case 500: badrequest(500,"Internal Server Error",""); + } + return 1; +} diff --git a/scan.h b/scan.h new file mode 100644 index 0000000..b6a00a6 --- /dev/null +++ b/scan.h @@ -0,0 +1,61 @@ +#ifndef SCAN_H +#define SCAN_H + +#include +#ifndef __pure__ +#define __pure__ +#endif + +/* interpret src as ASCII decimal number, write number to dest and + * return the number of bytes that were parsed */ +extern unsigned int scan_ulong(const char *src,unsigned long *dest); + +/* same, for long long */ +extern unsigned int scan_ulonglong(const char *src,unsigned long long *dest); + +/* interpret src as ASCII hexadecimal number, write number to dest and + * return the number of bytes that were parsed */ +extern unsigned int scan_xlong(const char *src,unsigned long *dest); + +/* interpret src as ASCII octal number, write number to dest and + * return the number of bytes that were parsed */ +extern unsigned int scan_8long(const char *src,unsigned long *dest); + +/* interpret src as signed ASCII decimal number, write number to dest + * and return the number of bytes that were parsed */ +extern unsigned int scan_long(const char *src,signed long *dest); + +extern unsigned int scan_uint(const char *src,unsigned int *dest); +extern unsigned int scan_xint(const char *src,unsigned int *dest); +extern unsigned int scan_8int(const char *src,unsigned int *dest); +extern unsigned int scan_int(const char *src,signed int *dest); + +extern unsigned int scan_ushort(const char *src,unsigned short *dest); +extern unsigned int scan_xshort(const char *src,unsigned short *dest); +extern unsigned int scan_8short(const char *src,unsigned short *dest); +extern unsigned int scan_short(const char *src,signed short *dest); + +/* interpret src as double precision floating point number, + * write number to dest and return the number of bytes that were parsed */ +extern unsigned int scan_double(const char *in, double *dest); + +/* if *src=='-', set *dest to -1 and return 1. + * if *src=='+', set *dest to 1 and return 1. + * otherwise set *dest to 1 return 0. */ +extern unsigned int scan_plusminus(const char *src,signed int *dest); + +/* return the highest integer n<=limit so that isspace(in[i]) for all 0<=i<=n */ +extern unsigned int scan_whitenskip(const char *in,unsigned int limit) __pure__; + +/* return the highest integer n<=limit so that !isspace(in[i]) for all 0<=i<=n */ +extern unsigned int scan_nonwhitenskip(const char *in,unsigned int limit) __pure__; + +/* return the highest integer n<=limit so that in[i] is element of + * charset (ASCIIZ string) for all 0<=i<=n */ +extern unsigned int scan_charsetnskip(const char *in,const char *charset,unsigned int limit) __pure__; + +/* return the highest integer n<=limit so that in[i] is not element of + * charset (ASCIIZ string) for all 0<=i<=n */ +extern unsigned int scan_noncharsetnskip(const char *in,const char *charset,unsigned int limit) __pure__; + +#endif diff --git a/scan_ulong.c b/scan_ulong.c new file mode 100644 index 0000000..02d3f2b --- /dev/null +++ b/scan_ulong.c @@ -0,0 +1,13 @@ +#include "scan.h" + +unsigned int scan_ulong(const char *src,unsigned long *dest) { + register const char *tmp=src; + register int l=0; + register unsigned char c; + while ((c=*tmp-'0')<10) { + l=l*10+c; + ++tmp; + } + *dest=l; + return tmp-src; +} diff --git a/scan_ulonglong.c b/scan_ulonglong.c new file mode 100644 index 0000000..c28594d --- /dev/null +++ b/scan_ulonglong.c @@ -0,0 +1,13 @@ +#include "scan.h" + +unsigned int scan_ulonglong(const char *src,unsigned long long *dest) { + register const char *tmp=src; + register unsigned long long int l=0; + register unsigned char c; + while ((c=*tmp-'0')<10) { + l=l*10+c; + ++tmp; + } + *dest=l; + return tmp-src; +} diff --git a/str.h b/str.h new file mode 100644 index 0000000..5d59345 --- /dev/null +++ b/str.h @@ -0,0 +1,43 @@ +#ifndef STR_H +#define STR_H + +#include +#ifndef __pure__ +#define __pure__ +#endif + +/* str_copy copies leading bytes from in to out until \0. + * return number of copied bytes. */ +extern unsigned int str_copy(char *out,const char *in); + +/* str_diff returns negative, 0, or positive, depending on whether the + * string a[0], a[1], ..., a[n]=='\0' is lexicographically smaller than, + * equal to, or greater than the string b[0], b[1], ..., b[m-1]=='\0'. + * If the strings are different, str_diff does not read bytes past the + * first difference. */ +extern int str_diff(const char *a,const char *b) __pure__; + +/* str_diffn returns negative, 0, or positive, depending on whether the + * string a[0], a[1], ..., a[n]=='\0' is lexicographically smaller than, + * equal to, or greater than the string b[0], b[1], ..., b[m-1]=='\0'. + * If the strings are different, str_diffn does not read bytes past the + * first difference. The strings will be considered equal if the first + * limit characters match. */ +extern int str_diffn(const char *a,const char *b,unsigned int limit) __pure__; + +/* str_len returns the index of \0 in s */ +extern unsigned int str_len(const char *s) __pure__; + +/* str_chr returns the index of the first occurance of needle or \0 in haystack */ +extern unsigned int str_chr(const char *haystack,char needle) __pure__; + +/* str_rchr returns the index of the last occurance of needle or \0 in haystack */ +extern unsigned int str_rchr(const char *haystack,char needle) __pure__; + +/* str_start returns 1 if the b is a prefix of a, 0 otherwise */ +extern int str_start(const char *a,const char *b) __pure__; + +/* convenience shortcut to test for string equality */ +#define str_equal(s,t) (!str_diff((s),(t))) + +#endif diff --git a/str_chr.c b/str_chr.c new file mode 100644 index 0000000..347ef00 --- /dev/null +++ b/str_chr.c @@ -0,0 +1,13 @@ +#include "str.h" + +unsigned int str_chr(const char *in, char needle) { + register const char* t=in; + register const char c=needle; + for (;;) { + if (!*t || *t==c) break; ++t; + if (!*t || *t==c) break; ++t; + if (!*t || *t==c) break; ++t; + if (!*t || *t==c) break; ++t; + } + return t-in; +} diff --git a/str_copy.c b/str_copy.c new file mode 100644 index 0000000..48b22c0 --- /dev/null +++ b/str_copy.c @@ -0,0 +1,13 @@ +#include "str.h" + +unsigned int str_copy(char *out,const char *in) { + register char* s=out; + register const char* t=in; + for (;;) { + if (!(*s=*t)) break; ++s; ++t; + if (!(*s=*t)) break; ++s; ++t; + if (!(*s=*t)) break; ++s; ++t; + if (!(*s=*t)) break; ++s; ++t; + } + return s-out; +} diff --git a/str_diff.c b/str_diff.c new file mode 100644 index 0000000..fe74e80 --- /dev/null +++ b/str_diff.c @@ -0,0 +1,20 @@ +#include "byte.h" + +/* str_diff returns negative, 0, or positive, depending on whether the + * string a[0], a[1], ..., a[n]=='\0' is lexicographically smaller than, + * equal to, or greater than the string b[0], b[1], ..., b[m-1]=='\0'. + * When the strings are different, str_diff does not read bytes past the + * first difference. */ +int str_diff(const char* a, const char* b) { + register const char* s=a; + register const char* t=b; + register int j; + j=0; + for (;;) { + if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + } + return j; +} diff --git a/str_diffn.c b/str_diffn.c new file mode 100644 index 0000000..a54904c --- /dev/null +++ b/str_diffn.c @@ -0,0 +1,21 @@ +#include "byte.h" + +/* str_diff returns negative, 0, or positive, depending on whether the + * string a[0], a[1], ..., a[n]=='\0' is lexicographically smaller than, + * equal to, or greater than the string b[0], b[1], ..., b[m-1]=='\0'. + * When the strings are different, str_diff does not read bytes past the + * first difference. */ +int str_diffn(const char* a, const char* b, unsigned int limit) { + register const char* s=a; + register const char* t=b; + register const char* u=t+limit; + register int j; + j=0; + for (;;) { + if (t>=u) break; if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if (t>=u) break; if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if (t>=u) break; if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + if (t>=u) break; if ((j=(*s-*t))) break; if (!*t) break; ++s; ++t; + } + return j; +} diff --git a/str_len.c b/str_len.c new file mode 100644 index 0000000..cf1541a --- /dev/null +++ b/str_len.c @@ -0,0 +1,12 @@ +#include "str.h" + +unsigned int str_len(const char *in) { + register const char* t=in; + for (;;) { + if (!*t) break; ++t; + if (!*t) break; ++t; + if (!*t) break; ++t; + if (!*t) break; ++t; + } + return t-in; +} diff --git a/str_start.c b/str_start.c new file mode 100644 index 0000000..c49ea19 --- /dev/null +++ b/str_start.c @@ -0,0 +1,14 @@ +#include "str.h" + +int str_start (register const char *s, register const char *t) +{ + register char x ; + + for (;;) + { + x = *t++; if (!x) return 1; if (x != *s++) return 0; + x = *t++; if (!x) return 1; if (x != *s++) return 0; + x = *t++; if (!x) return 1; if (x != *s++) return 0; + x = *t++; if (!x) return 1; if (x != *s++) return 0; + } +}