#! /usr/bin/python3 import optparse import xml.dom.minidom import xml.etree.ElementTree import re quotes_re = re.compile(r'"([^"]+)"') dots_re = re.compile(r'\.\.\.+') crap_re = re.compile(r'
.*
', re.DOTALL) tag_re = re.compile(r'<[^>]+>') def by_class(e, classname): todo = [e] while todo: i = todo.pop(0) if i.get('class') == classname: yield i todo = i.getchildren() + todo def first_by_class(e, classname): for i in by_class(e, classname): return i def table_to_ltx(t): dt = first_by_class(t, 'author') username = dt.text if not username: # Moderators username = dt.getchildren()[-1].tail print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%') print('\\by{%s}' % username) body = first_by_class(t, 'postbody') s = xml.etree.ElementTree.tostring(body) s = s.replace('