First stab at automated conversion

2009-07-09 01:03:38 -06:00 · 2009-07-09 01:03:38 -06:00 · 4e1fa0174d
commit 4e1fa0174d
4 changed files with 5440 additions and 0 deletions
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 PAGES = page*.tex
 horrors2.dvi: horrors2.ltx $(PAGES)
 	latex $<
 	latex $<
 horrors2.pdf: horrors2.ltx $(PAGES)
 	pdflatex $<
 	pdflatex $<
--- a/horrors2.ltx
+++ b/horrors2.ltx
@ -0,0 +1,60 @@
 \documentclass{book}
 %\usepackage[T1]{fontenc}
 %\usepackage{anttor}
 %\usepackage{tgothic}
 \title{Horrors 2: Bavarious Reasons}
 \newcommand{\by}[1]{{\large By {\sf #1}}\\\bigskip}
 \begin{document}
 \tableofcontents
 \part{Bavarious Reasons}
 \include{page1}
 \part{You're no Hakan}
 %\include{page2}
 \part{The Chronicles Of Biddick}
 %\include{page3}
 \part{My God, It's Full Of Stars}
 %\include{page4}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Appendices
 \appendix
 \part{Inflamed Appendices}
 \chapter{Colophon}
 This book is a collection of stories from the Something Awful Forums,
 inspired by the 1991 book {\em Horrors} by Ben Biddick.  Most were written
 as a response to a challenge by forums moderator AYBraham:
 \begin{quotation}
  Your goal: Write a short story in the theme of {\em Horrors}, the collection
  of short stories written by Mr. Ben Beddick when he was 13 years
  old. Each story is encouraged to feature the tragic protagonist Luke
  Bavarius, and must be more than 500 words each. Each story must show
  that "kids need to be respected and listened to."
 \end{quotation}
 This book was typeset by the \LaTeX{} document preparation system, using
 the {\sf book} class.
 It was set in the Antykwa Toru\'nska (``Antiqua of Torun'') typeface,
 the most horriffying of all the standard \TeX{}live typefaces.  The face
 was designed by Zygfryd Gardzielewski (1914--2001), and first cast in
 metal in 1960 in the Grafmasz typefoundry in Warsaw.  The variant used
 in this document was digitized by Janusz Marian Nowacki, a tremendously
 cool dude from Poland, based off of photocopies of the original design.
 \end{document}
--- a/page1.tex
+++ b/page1.tex
--- a/sa2ltx.py
+++ b/sa2ltx.py
@ -0,0 +1,76 @@
 #! /usr/bin/python3
 import optparse
 import xml.dom.minidom
 import xml.etree.ElementTree
 import re
 quotes_re = re.compile(r'"([^"]+)"')
 dots_re = re.compile(r'\.\.\.+')
 crap_re = re.compile(r'<p class="editedby">.*</p>', re.DOTALL)
 tag_re = re.compile(r'<[^>]+>')
 def by_class(e, classname):
    todo = [e]
    while todo:
        i = todo.pop(0)
        if i.get('class') == classname:
            yield i
        todo = i.getchildren() + todo
 def first_by_class(e, classname):
    for i in by_class(e, classname):
        return i
 def table_to_ltx(t):
    dt = first_by_class(t, 'author')
    username = dt.text
    if not username:
        # Moderators
        username = dt.getchildren()[-1].tail
    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
    print('\\by{%s}' % username)
    body = first_by_class(t, 'postbody')
    s = xml.etree.ElementTree.tostring(body)
    s = s.replace('<br />', '\n')
    s = s.replace('<i>', '{\\em ')
    s = s.replace('</i>', '}')
    s = s.replace('<b>', '{\\bf ')
    s = s.replace('</b>', '}')
    s = crap_re.sub('', s)
    s = tag_re.sub('', s)
    s = dots_re.sub('{\ldots}', s)
    s = quotes_re.sub(r"``\1''", s)
    print(s)
 def doc_to_ltx(doc):
    for e in doc.getiterator('table'):
        if e.get('class') == 'post':
            table_to_ltx(e)
 def main():
    p = optparse.OptionParser()
    (opts, args) = p.parse_args()
    for a in args:
        f = open(a, encoding='iso-8859-1')
        parser = xml.etree.ElementTree.XMLTreeBuilder()
        parser.entity.update(nbsp=" ",
                             rsaquo=">",
                             lsquo="`",
                             rsquo="'",
                             ldquo="``",
                             rdquo="''",
                             hellip="{\\ldots}",
                             ndash="---",
                             mdash="---",
                             iexcl="{\\!`}",
                             copy="{\\copyright}",
                             )
        doc = xml.etree.ElementTree.parse(f, parser)
        doc_to_ltx(doc)
 main()