cruft
·
2009-11-18
toxhtml.py
1#! /usr/bin/env python3
2
3import re
4
5block1_re = re.compile(r'{\\(?P<cmd>[\w*]+) (?P<txt>[^{}]+)}')
6block2_re = re.compile(r'\\(?P<cmd>[\w*]+)(\[[^]]+\])?{(?P<txt>[^{}]*)}')
7
8
9chapauth = None
10chapimg = None
11display = True
12pfx = ''
13
14def texsub(m):
15 global pfx, display
16
17 cmd = m.group('cmd')
18 txt = m.group('txt')
19 if cmd == 'em':
20 return '<em>%s</em>' % txt
21 elif cmd == 'bf':
22 return '<strong>%s</strong>' % txt
23 elif cmd == 'sf':
24 return '<samp>%s</samp>' % txt
25 elif cmd == 'sc':
26 return '<span class="sc">%s</span>' % txt
27 elif cmd == 'rm':
28 return '<span class="rm">%s</span>' % txt
29 elif cmd == 'url':
30 return '<a href="%s">%s</a>' % (txt, txt)
31 elif cmd == 'begin':
32 if txt in ('center',):
33 return
34 elif txt in ('quotation', 'quote'):
35 pfx = '> '
36 elif txt == 'textblock':
37 display = False
38 else:
39 print(cmd, txt)
40 raise TypeError(cmd)
41 elif cmd == 'end':
42 if txt == 'textblock':
43 display = True
44 else:
45 pfx = ''
46 return ''
47 elif cmd in ('include',
48 'chapter',
49 'chapimg',
50 'chapauth',
51 'illustration',
52 'scriptsize',
53 'section*',
54 'part'):
55 return '#%s %s' % (cmd, txt)
56 elif cmd in ('pagenumbering',
57 'includegraphics',
58 'newcommand',
59 'hbox'):
60 return ''
61 elif cmd in ('TeX',
62 'LaTeX'):
63 return cmd
64 else:
65 print(cmd, txt)
66 raise TypeError(cmd)
67
68decor_stack = []
69decor_re = re.compile(r'({\\(?P<cmd>\w\w) ?|})')
70
71def decorsub(m):
72 cmd = m.group('cmd')
73 if not cmd:
74 if not decor_stack:
75 return m.group(0)
76 cmd = decor_stack.pop()
77 return '</%s>' % cmd
78 else:
79 if cmd == 'bf':
80 cmd = 'strong'
81 elif cmd == 'em':
82 pass
83 elif cmd == 'sc':
84 decor_stack.append('span')
85 return '<span class="sc">'
86 else:
87 raise TypeError(cmd)
88 decor_stack.append(cmd)
89 return '<%s>' % cmd
90
91def art(artist, url, title=None):
92 alt = title or ("Artwork by %s" % artist)
93 print('<div class="art">')
94 print('<img src="%s" alt="%s" />' % (url, alt))
95 if title:
96 atxt = '<em>%s</em> by %s' % (title, artist)
97 else:
98 atxt = alt
99 print('<p class="artist">%s</p>' % (atxt))
100 print('</div>')
101
102outbuf = ''
103
104def outline(l):
105 global chapimg, chapauth, outbuf
106
107 l = l.strip()
108 if not l:
109 print(outbuf)
110 elif l[0] == '%':
111 return
112 l = l.replace(r'\'e', 'é')
113 l = l.replace(r'\,c', 'ç')
114 l = l.replace(r'\'n', 'ń')
115 l = l.replace("''", '”')
116 l = l.replace("``", '“')
117 l = l.replace("'", '’')
118 l = l.replace("`", '‘')
119 l = l.replace('---', '—')
120 l = l.replace('--', '–')
121 l = l.replace('\\\\', '<br />')
122 l = l.replace('\_', '_')
123 l = l.replace('\#', '#')
124 l = l.replace('\$', '$')
125 l = l.replace('\ ', ' ')
126 l = l.replace(r'\-', '')
127 l = l.replace(r'\~n', 'ñ')
128 l = l.replace(r'{\ldots}', '…')
129 l = l.replace(r'\ldots', '…')
130 l = l.replace(r'\copyright', '©')
131 l = block1_re.sub(texsub, l)
132 l = block2_re.sub(texsub, l)
133 l = l.replace('{}', '')
134 l = decor_re.sub(decorsub, l)
135
136 if not l:
137 return
138 if l[0] == '#':
139 if l.startswith('#include'):
140 include(l[9:] + '.tex')
141 elif l.startswith('#chapimg'):
142 chapimg = l[9:-1].split('{')
143 elif l.startswith('#chapauth'):
144 chapauth = l[10:]
145 elif l.startswith('#chapter'):
146 print('<h1 class="chapter">%s</h1>' % l[9:])
147 if chapauth:
148 print('<h2 class="author">by %s</h2>' % chapauth)
149 chapauth = None
150 if chapimg:
151 art(chapimg[0], chapimg[1])
152 chapimg = None
153 elif l.startswith('#part'):
154 print('<h1 class="part">%s</h1>' % l[6:])
155 elif l.startswith('#illustration'):
156 artist, title, url = l[14:].split('{')
157 title = title[:-1]
158 url = url[:-1]
159 art(artist, url, title)
160 elif l.startswith('#section*'):
161 print('<h2 class="section">%s</h2>' % l[10:])
162 else:
163 print('<--! %s -->' % l)
164 elif l[0] == '\\':
165 what = l[1:5].lower()
166 if what in ('bigs', 'vfil'):
167 print('<br class="bigskip"/>')
168 elif what == 'newp':
169 print('<br class="pagebreak"/>')
170 elif what == 'noin':
171 print(l[10:])
172 elif what in ('hbox',
173 'inde',
174 'tabl',
175 'appe',
176 'page',
177 'list'):
178 pass
179 elif l[1:9] == 'maketitl':
180 print('<h1>Horrors 2</h1>')
181 print('<h2>The Something Awful Forums</h2>')
182 else:
183 print('================= %r' % what)
184 elif display:
185 print('%s%s' % (pfx, l))
186
187def include(fn):
188 if fn == 'praise.tex':
189 return
190 f = open(fn)
191
192 for l in f:
193 outline(l)
194
195f = open('horrors2.ltx')
196
197# skip LaTeX crap
198for l in f:
199 if l.startswith('\\begin{document'):
200 break
201
202for l in f:
203 outline(l)