Merge pull request #1 from dirtbags/py3

Python 3
2018-10-11 08:27:28 -06:00 · 2018-10-11 08:27:28 -06:00 · ad8d412f73
parent 2ab42e81ff 0180c9caf0
commit ad8d412f73
10 changed files with 467 additions and 443 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,16 @@
+Dirtbags Netarch Library
+========================
+
+This is a library for advanced
+[network archaeology](https://sites.google.com/view/cyberfire/foundry/classes/network-archaeology).
+
+It provides a heavily field-tested framework for
+exploring unknown TCP-based protocols,
+and room to grow these explorations into full-blown decoders.
+
+Get going
+=========
+
+Documentation sucks, sorry.
+The way we go about things is to copy `dumbdecode.py` to a new file,
+and start hacking onto it.
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1,7 @@
+Things We Need To Do
+====================
+
+* documentation
+* remove lingering py2-isms
+* more logical way to chain together dispatcher, tcp resequencer
+* some way to parallelize work
--- a/dumbdecode.py
+++ b/dumbdecode.py
@ -0,0 +1,13 @@
+#! /usr/bin/python3
+
+import sys
+from netarch import ip
+from netarch import *
+
+s = None
+reseq = ip.Dispatch(*sys.argv[1:])
+for h, d in reseq:
+    srv, first, chunk = d
+    if not s:
+        s = ip.Session(first)
+    s.handle(srv, first, chunk, reseq.last)
--- a/gapstr.py
+++ b/gapstr.py
@ -1,246 +0,0 @@
-#! /usr/bin/python
-
-## 2008 Massive Blowout
-
-"""Functions to treat a list as a byte array with gaps.
-
-Lists should have only byte and numeric items.
-
-"""
-
-import __init__
-import sys
-
-class GapString:
-    def __init__(self, init=None, drop='?'):
-        self.contents = []
-        self.length = 0
-        self.drop = drop
-
-        if init:
-            self.append(init)
-
-    def __len__(self):
-        return int(self.length)
-
-    def loss(self):
-        ret = 0
-        for i in self.contents:
-            try:
-                ret += i
-            except TypeError:
-                pass
-        return ret
-
-    def __repr__(self):
-        return '<GapString of length %d>' % self.length
-
-    def append(self, i):
-        try:
-            self.length += len(i)
-            self.contents.append(i)
-        except TypeError:
-            self.length += i
-            self.contents.append(i)
-
-    def pop(self, idx=-1):
-        item = self.contents.pop(idx)
-        try:
-            self.length -= item
-        except TypeError:
-            self.length -= len(item)
-        return GapString(item)
-        
-
-    def __str__(self):
-        ret = []
-        for i in self.contents:
-            try:
-                ret.append(self.drop * i)
-            except TypeError:
-                ret.append(i)
-        return ''.join(ret)
-
-    def __iter__(self):
-        for i in self.contents:
-            try:
-                for c in i:
-                    yield c
-            except TypeError:
-                for j in range(i):
-                    yield self.drop
-
-    def __nonzero__(self):
-        return self.length > 0
-
-    def hasgaps(self):
-        for i in self.contents:
-            try:
-                len(i)
-            except TypeError:
-                return True
-        return False
-
-    def hexdump(self, fd=sys.stdout):
-        offset = 0
-
-        d = __init__.HexDumper(fd)
-        for i in self.contents:
-            try:
-                for j in xrange(i):
-                    d.dump_drop()
-            except TypeError:
-                for c in i:
-                    d.dump_chr(c)
-        d.finish()
-
-    def extend(self, other):
-        self.contents += other.contents
-        self.length += other.length
-
-    def __getslice__(self, start, end):
-        end = min(self.length, end)
-        start = min(self.length, start)
-
-        new = self.__class__(drop=self.drop)
-        new.length = max(end - start, 0)
-        if new.length == 0:
-            new.contents = []
-            return new
-        new.contents = self.contents[:]
-
-        l = self.length - new.length - start
-
-        # Trim off the beginning
-        while start >= 0:
-            i = new.contents.pop(0)
-            try:
-                start -= i
-                if start < 0:
-                    new.contents.insert(0, -start)
-            except TypeError:
-                start -= len(i)
-                if start < 0:
-                    new.contents.insert(0, i[start:])
-
-        # Trim off the end
-        while l >= 0:
-            i = new.contents.pop()
-            try:
-                l -= i
-                if l < 0:
-                    new.contents.append(-l)
-            except TypeError:
-                l -= len(i)
-                if l < 0:
-                    new.contents.append(i[:-l])
-
-        return new
-
-    def __getitem__(self, idx):
-        if False:
-            c = self[idx:idx+1]
-            if c.hasgaps():
-                return self.drop[0]
-            else:
-                return c.contents[0][0]
-        else:
-            l = 0
-            for i in self.contents:
-                try:
-                    l += len(i)
-                except TypeError:
-                    l += i
-                if l > idx:
-                    offs = idx - l
-                    try:
-                        return i[offs]
-                    except:
-                        return self.drop[0]
-            raise IndexError('Out of bounds')
-
-    def __add__(self, other):
-        if isinstance(other, str):
-            self.append(other)
-        else:
-            new = self.__class__(drop=self.drop)
-            new.extend(self)
-            new.extend(other)
-            return new
-
-    def __xor__(self, mask):
-        try:
-            mask = [ord(c) for c in mask]
-        except TypeError:
-            pass
-        try:
-            masklen = len(mask)
-        except TypeError:
-            masklen = 1
-            mask = [mask]
-
-        new = self.__class__(drop=self.drop)
-        for i in self.contents:
-            try:
-                r = []
-                offset = len(new) % masklen
-                for c in i:
-                    o = ord(c)
-                    r.append(chr(o ^ mask[offset]))
-                    offset = (offset + 1) % masklen
-                new.append(''.join(r))
-            except TypeError:
-                new.append(i)
-        return new
-
-    def index(self, needle):
-        pos = 0
-        for i in self.contents:
-            try:
-                return pos + i.index(needle)
-            except AttributeError:
-                pos += i
-            except ValueError:
-                pos += len(i)
-        raise ValueError('substring not found')
-
-    def split(self, pivot=' ', times=None):
-        ret = []
-        cur = self
-        while (not times) or (len(ret) < times):
-            try:
-                pos = cur.index(pivot)
-            except ValueError:
-                break
-            ret.append(cur[:pos])
-            cur = cur[pos+len(pivot):]
-        ret.append(cur)
-        return ret
-
-    def startswith(self, what):
-        return (what == str(self[:len(what)]))
-
-    def endswith(self, what):
-        return (what == str(self[-len(what):]))
-
-
-if __name__ == '__main__':
-    gs = GapString()
-    gs.append('hi')
-    assert str(gs) == 'hi'
-    assert str(gs[:40]) == 'hi'
-    gs.append(3)
-    assert str(gs) == 'hi???'
-    assert str(gs[:40]) == 'hi???'
-    assert str(gs[:3]) == 'hi?'
-    assert str(gs[-4:]) == 'i???'
-    assert str(gs + gs) == 'hi???hi???'
-    assert str(gs ^ 1) == 'ih???'
-
-    gs = GapString()
-    gs.append('123456789A')
-    assert str(gs[:4]) == '1234'
-    assert len(gs[:4]) == 4
-    assert len(gs[6:]) == 4
-    assert str(gs[:0]) == ''
-
--- a/netarch/init.py
+++ b/netarch/init.py
@ -1,156 +1,156 @@
-#! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-## 2008 Massive Blowout
+#! /usr/bin/python3

+import binascii
 import sys
 import struct

-stdch = (u'␀·········␊··␍··'
-         u'················'
-         u' !"#$%&\'()*+,-./'
-         u'0123456789:;<=>?'
-         u'@ABCDEFGHIJKLMNO'
-         u'PQRSTUVWXYZ[\]^_'
-         u'`abcdefghijklmno'
-         u'pqrstuvwxyz{|}~·'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················')
+stdch = (
+    '␀·········␊··␍··'
+    '················'
+    ' !"#$%&\'()*+,-./'
+    '0123456789:;<=>?'
+    '@ABCDEFGHIJKLMNO'
+    'PQRSTUVWXYZ[\]^_'
+    '`abcdefghijklmno'
+    'pqrstuvwxyz{|}~·'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+)

-decch = (u'␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏'
-         u'␐␑␒␓␔␕␖␗␘␙␚·····'
-         u'␠!"#$%&\'()*+,-./'
-         u'0123456789:;<=>?'
-         u'@ABCDEFGHIJKLMNO'
-         u'PQRSTUVWXYZ[\]^_'
-         u'`abcdefghijklmno'
-         u'pqrstuvwxyz{|}~␡'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················'
-         u'················')
+decch = (
+    '␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏'
+    '␐␑␒␓␔␕␖␗␘␙␚·····'
+    '␠!"#$%&\'()*+,-./'
+    '0123456789:;<=>?'
+    '@ABCDEFGHIJKLMNO'
+    'PQRSTUVWXYZ[\]^_'
+    '`abcdefghijklmno'
+    'pqrstuvwxyz{|}~␡'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+    '················'
+)

-cgach = (u'·☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
-         u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼'
-         u' !"#$%&\'()*+,-./'
-         u'0123456789:;<=>?'
-         u'@ABCDEFGHIJKLMNO'
-         u'PQRSTUVWXYZ[\]^_'
-         u'`abcdefghijklmno'
-         u'pqrstuvwxyz{|}~⌂'
-         u'ÇüéâäàåçêëèïîìÄÅ'
-         u'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ'
-         u'áíóúñÑªº¿⌐¬½¼¡«»'
-         u'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐'
-         u'└┴┬├─┼╞╟╚╔╩╦╠═╬╧'
-         u'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀'
-         u'αßΓπΣσµτΦΘΩδ∞φε∩'
-         u'≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤')
-
-shpch = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
-         u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼'
-         u'␣!"#$%&\'()*+,-./'
-         u'0123456789:;<=>?'
-         u'@ABCDEFGHIJKLMNO'
-         u'PQRSTUVWXYZ[\]^_'
-         u'`abcdefghijklmno'
-         u'pqrstuvwxyz{|}~⌂'
-         u'ÇüéâäàåçêëèïîìÄÅ'
-         u'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ'
-         u'áíóúñÑªº¿⌐¬½¼¡«»'
-         u'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐'
-         u'└┴┬├─┼╞╟╚╔╩╦╠═╬╧'
-         u'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀'
-         u'αßΓπΣσµτΦΘΩδ∞φε∩'
-         u'≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤')
+cgach = (
+    '□☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
+	'►◄↕‼¶§▬↨↑↓→←∟↔▲▼'
+	' !"#$%&\'()*+,-./'
+	'0123456789:;<=>?'
+	'@ABCDEFGHIJKLMNO'
+	'PQRSTUVWXYZ[\]^_'
+	'`abcdefghijklmno'
+	'pqrstuvwxyz{|}~⌂'
+	'ÇüéâäàåçêëèïîìÄÅ'
+	'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ'
+	'áíóúñÑªº¿⌐¬½¼¡«»'
+	'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐'
+	'└┴┬├─┼╞╟╚╔╩╦╠═╬╧'
+	'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀'
+	'αßΓπΣσµτΦΘΩδ∞φε∩'
+	'≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤'
+)

+fluffych = (
+    '·☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
+	'►◄↕‼¶§▬↨↑↓→←∟↔▲▼'
+	' !"#$%&\'()*+,-./'
+	'0123456789:;<=>?'
+	'@ABCDEFGHIJKLMNO'
+	'PQRSTUVWXYZ[\]^_'
+	'`abcdefghijklmno'
+	'pqrstuvwxyz{|}~⌂'
+	'ÇüéâäàåçêëèïîìÄÅ'
+	'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ'
+	'áíóúñÑªº¿⌐¬½¼¡«»'
+	'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐'
+	'└┴┬├─┼╞╟╚╔╩╦╠═╬╧'
+	'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀'
+	'αßΓπΣσµτΦΘΩδ∞φε∩'
+	'≡±≥≤⌠⌡÷≈°∀∃√ⁿ²■¤'
+)


 def unpack(fmt, buf):
-    """Unpack buf based on fmt, return the rest as a string."""
+    """Unpack buf based on fmt, return the remainder."""

    size = struct.calcsize(fmt)
-    vals = struct.unpack(fmt, str(buf[:size]))
+    vals = struct.unpack(fmt, bytes(buf[:size]))
    return vals + (buf[size:],)


 class HexDumper:
-    def __init__(self, fd=sys.stdout):
-        self.fd = fd
+    def __init__(self, output, charset=fluffych):
        self.offset = 0
-        self.buf = []
+        self.last = None
+        self.elided = False
+        self.hexes = []
+        self.chars = []
+        self.charset = charset
+        self.output = output

-    def _to_printable(self, c):
-        if not c:
-            return u'◌'
-        else:
-            return cgach[ord(c)]
-
-
-    def write(self, what):
-        self.fd.write(what.encode('utf-8'))
-
-    def _flush(self):
-        if not self.buf:
+    def _spit(self):
+        if self.chars == self.last:
+            if not self.elided:
+                self.output.write('*\n')
+                self.elided = True
+            self.hexes = []
+            self.chars = []
            return
+        self.last = self.chars[:]
+        self.elided = False

-        o = []
-        for c in self.buf:
-            if c:
-                o.append(u'%02x' % ord(c))
+        pad = 16 - len(self.chars)
+        self.hexes += ['  '] * pad
+
+        self.output.write('{:08x}  '.format(self.offset - len(self.chars)))
+        self.output.write(' '.join(self.hexes[:8]))
+        self.output.write('  ')
+        self.output.write(' '.join(self.hexes[8:]))
+        self.output.write('  ')
+        self.output.write(''.join(self.chars))
+        self.output.write('\n')
+
+        self.hexes = []
+        self.chars = []
+
+    def add(self, b):
+        if self.offset and self.offset % 16 == 0:
+            self._spit()
+
+        if b is None:
+            h = '⬜'
+            c = '<EFBFBD>'
        else:
-                o.append(u'--')
-        o +=  ([u'  '] * (16 - len(self.buf)))
-        p = [self._to_printable(c) for c in self.buf]
+            h = '{:02x}'.format(b)
+            c = self.charset[b]
+        self.chars.append(c)
+        self.hexes.append(h)

-        self.write(u'%08x  ' % self.offset)
+        self.offset += 1

-        self.write(u' '.join(o[:8]))
-        self.write(u'  ')
-        self.write(u' '.join(o[8:]))
-
-        self.write(u'  ┆')
-
-        self.write(u''.join(p))
-
-        self.write(u'┆\n')
-
-        self.offset += len(self.buf)
-        self.buf = []
-
-    def dump_chr(self, c):
-        self.buf.append(c)
-        if len(self.buf) == 16:
-            self._flush()
-
-    def dump_drop(self):
-        self.buf.append(None)
-        if len(self.buf) == 16:
-            self._flush()
-
-    def finish(self):
-        self._flush()
-        self.write('%08x\n' % self.offset)
+    def done(self):
+        self._spit()
+        self.output.write('{:08x}\n'.format(self.offset))


-def hexdump(buf, f=sys.stdout):
+def hexdump(buf, f=sys.stdout, charset=fluffych):
    "Print a hex dump of buf"

-    d = HexDumper()
-
-    for c in buf:
-        d.dump_chr(c)
-    d.finish()
+    h = HexDumper(output=f, charset=charset)
+    for b in buf:
+        h.add(b)
+    h.done()


 def cstring(buf):
@ -266,7 +266,7 @@ def bin(i, bits=None):
 def unhex(s):
    """Decode a string as hex, stripping whitespace first"""

-    return [ord(i) for i in s.replace(' ', '').decode('hex')]
+    return binascii.unhexlify(s.replace(' ', ''))


 def pp(value, bits=16):
--- a/netarch/crypto.py
+++ b/netarch/crypto.py
--- a/netarch/ip.py
+++ b/netarch/ip.py
@ -1,29 +1,28 @@
-#! /usr/bin/python
+#! /usr/bin/python3

 ## IP resequencing + protocol reversing skeleton
-## 2008 Massive Blowout
+## 2008, 2018 Neale Pickett

-import StringIO
 import struct
 import socket
 import warnings
 import heapq
-import gapstr
 import time
+import io
 try:
    import pcap
 except ImportError:
-    import py_pcap as pcap
+    warnings.warn("Using slow pure-python pcap library")
+    import netarch.py_pcap as pcap
 import os
 import cgi
-import urllib
-import UserDict
-from __init__ import *
+import urllib.parse
+from netarch import unpack, hexdump
+from netarch.trilobytes import TriloBytes

 def unpack_nybbles(byte):
    return (byte >> 4, byte & 0x0F)

-
 transfers = os.environ.get('TRANSFERS', 'transfers')

 IP = 0x0800
@ -43,6 +42,7 @@ class Frame:
    def __init__(self, pkt):
        ((self.time, self.time_usec, _), frame) = pkt

+        
        # Ethernet
        (self.eth_dhost,
         self.eth_shost,
@ -132,15 +132,17 @@ class Frame:


    def get_src_addr(self):
+        if not hasattr(self, "_src_addr"):
            saddr = struct.pack('!i', self.saddr)
-        self.src_addr = socket.inet_ntoa(saddr)
-        return self.src_addr
+            self._src_addr = socket.inet_ntoa(saddr)
+        return self._src_addr
    src_addr = property(get_src_addr)

    def get_dst_addr(self):
+        if not hasattr(self, "_dst_addr"):
            daddr = struct.pack('!i', self.daddr)
-        self.dst_addr = socket.inet_ntoa(daddr)
-        return self.dst_addr
+            self._dst_addr = socket.inet_ntoa(daddr)
+        return self._dst_addr
    dst_addr = property(get_dst_addr)

    def __repr__(self):
@ -231,7 +233,7 @@ class TCP_Recreate:



-        return ethhdr + iphdr + tcphdr + str(payload)
+        return ethhdr + iphdr + tcphdr + bytes(payload)

    def write_pkt(self, timestamp, cli, payload, flags=0):
        p = self.packet(cli, payload, flags)
@ -309,16 +311,14 @@ class TCP_Resequence:

        pending = self.pending[xdi]
        # Get a sorted list of sequence numbers
-        keys = pending.keys()
-        keys.sort()
+        keys = sorted(pending)

        # Build up return value
-        gs = gapstr.GapString()
+        gs = TriloBytes()
        if keys:
-            f = pending[keys[0]]
-            ret = (xdi, f, gs)
+            first = pending[keys[0]]
        else:
-            ret = (xdi, None, gs)
+            first = None

        # Fill in gs with our frames
        for key in keys:
@ -328,13 +328,14 @@ class TCP_Resequence:
            frame = pending[key]
            if key > seq:
                # Dropped frame(s)
-                if key - seq > 6000:
-                    print "Gosh, bob, %d dropped octets sure is a lot!" % (key - seq)
-                gs.append(key - seq)
+                dropped = key - seq
+                if dropped > 6000:
+                    print("Gosh, %d dropped octets sure is a lot!" % (dropped))
+                gs += [None] * dropped
                seq = key
            if key == seq:
                # Default
-                gs.append(frame.payload)
+                gs += frame.payload
                seq += len(frame.payload)
                del pending[key]
            elif key < seq:
@ -349,13 +350,14 @@ class TCP_Resequence:
                    self.handle = self.handle_drop
        if seq != pkt.ack:
            # Drop at the end
-            if pkt.ack - seq > 6000:
-                print 'Large drop at end of session!'
-                print '    %s' % ((pkt, pkt.time),)
-                print '    %x  %x' % (pkt.ack, seq)
-            gs.append(pkt.ack - seq)
+            dropped = pkt.ack - seq
+            if dropped > 6000:
+                print('Large drop at end of session!')
+                print('    %s' % ((pkt, pkt.time),))
+                print('    %x  %x' % (pkt.ack, seq))
+            gs += [None] * dropped

-        return ret
+        return (xdi, first, gs)


    def handle(self, pkt):
@ -447,14 +449,14 @@ class Dispatch:
        if not literal:
            parts = filename.split(':::')
            fn = parts[0]
-            fd = file(fn)
+            fd = open(fn, "rb")
            pc = pcap.open(fd)
            if len(parts) > 1:
                pos = int(parts[1])
                fd.seek(pos)
            self._read(pc, fn, fd)
        else:
-            fd = file(filename)
+            fd = open(filename, "rb")
            pc = pcap.open(fd)
            self._read(pc, filename, fd)

@ -490,7 +492,7 @@ class Dispatch:
 class NeedMoreData(Exception):
    pass

-class Packet(UserDict.DictMixin):
+class Packet:
    """Base class for a packet from a binary protocol.

    This is a base class for making protocol reverse-engineering easier.
@ -549,43 +551,46 @@ class Packet(UserDict.DictMixin):
            assert a in b, ('%r not in %r' % (a, b))

    def show(self):
-        print '%s %3s: %s' % (self.__class__.__name__,
+        print('%s %3s: %s' % (self.__class__.__name__,
                              self.opcode,
-                              self.opcode_desc)
+                              self.opcode_desc))
        if self.firstframe:
-            print '    %s:%d -> %s:%d (%s.%06dZ)' % (self.firstframe.src_addr,
+            print('    %s:%d -> %s:%d (%s.%06dZ)' % (self.firstframe.src_addr,
                                                     self.firstframe.sport,
                                                     self.firstframe.dst_addr,
                                                     self.firstframe.dport,
                                                     time.strftime('%Y-%m-%dT%T', time.gmtime(self.firstframe.time)),
-                                                     self.firstframe.time_usec)
+                                                     self.firstframe.time_usec))

        if self.parts:
            dl = len(self.parts[-1])
            p = []
+            xp = []
            for x in self.parts[:-1]:
                if x == dl:
                    p.append('%3d!' % x)
+                    xp.append('%3x!' % x)
                else:
                    p.append('%3d' % x)
-            print '           parts: (%s) +%d bytes' % (','.join(p), dl)
+                    xp.append('%3x' % x)
+            print('           parts: (%s) +%d octets' % (','.join(p), dl))
+            print('         0xparts: (%s) +%x octets' % (','.join(xp), dl))

-        keys = self.params.keys()
-        keys.sort()
+        keys = sorted(self.params)
        for k in keys:
-            print '    %12s: %s' % (k, self.params[k])
+            print('    %12s: %s' % (k, self.params[k]))

        if self.subpackets:
            for p in self.subpackets:
                p.show()
        elif self.payload:
            try:
-                self.payload.hexdump()
+                hexdump(self.payload)
            except AttributeError:
-                print '         payload: %r' % self.payload
+                print('         payload: %r' % self.payload)

    def parse(self, data):
-        """Parse a chunk of data (possibly a GapString).
+        """Parse a chunk of data (possibly a TriloBytes).

        Anything returned is not part of this packet and will be passed
        in to a subsequent packet.
@ -600,7 +605,7 @@ class Packet(UserDict.DictMixin):
        """Handle data from a Session class."""

        data = self.parse(data)
-        if self.opcode <> None:
+        if self.opcode != None:
            try:
                f = getattr(self, 'opcode_%s' % self.opcode)
            except AttributeError:
@ -642,12 +647,12 @@ class Session:

        pass

-    def handle(self, is_srv, frame, gs, lastpos):
+    def handle(self, is_srv, frame, data, lastpos):
        """Handle a data burst.

        @param is_srv   Is this from the server?
        @param frame    A frame associated with this packet, or None if it's all drops
-        @param gs       A gapstring of the data
+        @param data     A TriloBytes of the data
        @param lastpos  Last position in the source file, for debugging

        """
@ -659,21 +664,21 @@ class Session:
        try:
            saddr = frame.saddr
            try:
-                (f, data) = self.pending.pop(saddr)
+                (f, buf) = self.pending.pop(saddr)
            except KeyError:
                f = frame
-                data = gapstr.GapString()
-            data.extend(gs)
+                buf = TriloBytes()
+            buf += data
            try:
-                while data:
+                while buf:
                    p = self.Packet(self, f)
-                    data = p.handle(data)
+                    buf = p.handle(buf)
                    self.process(p)
            except NeedMoreData:
-                self.pending[saddr] = (f, data)
+                self.pending[saddr] = (f, buf)
            self.count += 1
        except:
-            print ('Lastpos: %r' % (lastpos,))
+            print('Lastpos: %r' % (lastpos,))
            raise

    def process(self, packet):
@ -698,11 +703,11 @@ class Session:
        fn = '%d-%s~%d-%s~%d---%s' % (frame.time,
                                      frame.src_addr, frame.sport,
                                      frame.dst_addr, frame.dport,
-                                      urllib.quote(fn, ''))
+                                      urllib.parse.quote(fn, ''))
        fullfn = os.path.join(self.basename, fn)
        fullfn2 = os.path.join(self.basename2, fn)
-        print '  writing %s' % (fn,)
-        fd = file(fullfn, 'w')
+        print('  writing %s' % (fn,))
+        fd = open(fullfn, 'wb')
        try:
            os.unlink(fullfn2)
        except OSError:
@ -721,7 +726,9 @@ class Session:
 class HtmlSession(Session):
    def __init__(self, frame):
        Session.__init__(self, frame)
-        self.sessfd = self.open_out('session.html')
+        fd = self.open_out('session.html')
+        fbuf = io.BufferedWriter(fd, 1024)
+        self.sessfd = io.TextIOWrapper(fbuf, encoding="utf-8")
        self.sessfd.write('''<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
--- a/netarch/py_pcap.py
+++ b/netarch/py_pcap.py
@ -1,13 +1,16 @@
-#! /usr/bin/python
+#! /usr/bin/python3

 import struct
+import builtins

 _MAGIC = 0xA1B2C3D4

-class pcap:
-    def __init__(self, stream, mode='rb', snaplen=65535, linktype=1):
+class PcapFile:
+    def __init__(self, stream, mode='r', snaplen=65535, linktype=1):
+        if 'b' not in mode:
+            mode += 'b'
        try:
-            self.stream = file(stream, mode)
+            self.stream = builtins.open(stream, mode)
        except TypeError:
            self.stream = stream
        try:
@ -16,7 +19,7 @@ class pcap:
        except IOError:
            hdr = None

-        if hdr:
+        if 'r' in mode:
            # We're in read mode
            self._endian = None
            for endian in '<>':
@ -71,17 +74,22 @@ class pcap:
                break
            yield r

-
-open = pcap
-open_offline = pcap
+open = PcapFile
+pcap = PcapFile
+open_offline = PcapFile


 if __name__ == '__main__':
-    p = open('test.pcap', 'w')  # Create a new file
-    p.write(((0, 0, 3), 'foo')) # Add a packet
-    p.write(((0, 0, 3), 'bar'))
+    import io
+    
+    f = io.BytesIO()
+    p = PcapFile(f, 'w')
+    p.write(((0, 0, 3), b'foo')) # Add a packet
+    p.write(((0, 0, 3), b'bar'))
    del p
-    p = open(file('test.pcap')) # Also takes file objects
+    
+    f.seek(0)
+    p = PcapFile(f)
    assert ((p.version, p.thiszone, p.sigfigs, p.snaplen, p.linktype) ==
            ((2, 4), 0, 0, 65535, 1))
-    assert ([i for i in p] == [((0, 0, 3), 'foo'), ((0, 0, 3), 'bar')])
+    assert ([i for i in p] == [((0, 0, 3), b'foo'), ((0, 0, 3), b'bar')])
--- a/netarch/trilobytes.py
+++ b/netarch/trilobytes.py
@ -0,0 +1,146 @@
+#! /usr/bin/python3
+
+## 2008, 2018 Neale Pickett
+
+import itertools
+
+class TriloBytes:
+    """Three-level byte array (0, 1, Missing).
+
+    This allows you to represent on-wire transactions with holes in the middle,
+    due to eg. dropped packets.
+    
+    >>> tb = TriloBytes(b'hi')
+    >>> bytes(tb)
+    b'hi'
+    >>> bytes(tb[:40])
+    b'hi'
+    
+    >>> tb = TriloBytes(b'hi') + [None] * 3
+    >>> bytes(tb)
+    b'hi???'
+    >>> bytes(tb[:40])
+    b'hi???'
+    >>> bytes(tb[:3])
+    b'hi?'
+    >>> bytes(tb[-4:])
+    b'i???'
+    >>> bytes(tb + tb)
+    b'hi???hi???'
+    >>> bytes(tb ^ 1)
+    b'ih???'
+    >>> bytes(tb ^ [32, 1])
+    b'Hh???'
+    
+    >>> tb = TriloBytes(b'hi', drop=b'DROP')
+    >>> bytes(tb)
+    b'hi'
+    >>> tb += [None] * 7
+    >>> bytes(tb)
+    b'hiOPDROPD'
+
+    >>> tb = TriloBytes(b'00')^1
+    >>> tb[0]
+    1
+    
+    >>> bytes(TriloBytes(b'00'))
+    b'\x00'
+"""
+
+    def __init__(self, initializer=(), drop=b'?'):
+        self._drop = drop
+        self._contents = tuple(initializer)
+
+    @classmethod
+    def fromhex(cls, string):
+        """
+        >>> bytes(TriloBytes.fromhex("616263"))
+        b'abc'
+        """
+        
+        return cls(bytes.fromhex(string))
+
+    def __len__(self):
+        """
+        >>> len(TriloBytes(b'abc'))
+        3
+        """
+        
+        return len(self._contents)
+
+    def __nonzero__(self):
+        """
+        >>> 10 if TriloBytes() else -10
+        -10
+        >>> 10 if TriloBytes(b'a') else -10
+        10
+        """
+        
+        return len(self) > 0
+
+    def __getitem__(self, key):
+        ret = self._contents[key]
+        try:
+            return TriloBytes(ret, self._drop)
+        except:
+            return ret
+
+    def __iter__(self):
+        for val in self._contents:
+            yield val
+
+    def __bytes__(self):
+        return bytes((d if v is None else v for v,d in zip(self,itertools.cycle(self._drop))))
+
+    def __add__(self, other):
+        try:
+            contents = self._contents + other._contents
+        except AttributeError:
+            contents = self._contents + tuple(other)
+        return TriloBytes(contents, self._drop)
+
+    def __eq__(self, other):
+        try:
+            return self._contents == other._contents
+        except:
+            return False
+
+    def __hash__(self):
+        return hash(self._contents)
+        
+    def __xor__(self, mask):
+        try:
+            mask[0]
+        except TypeError:
+            mask = [mask]
+        return TriloBytes(((None if x is None or y is None else x^y) for x,y in zip(self._contents, itertools.cycle(mask))), drop=self._drop)
+
+    def __repr__(self):
+        """
+        >>> TriloBytes(b'abc')
+        <TriloBytes missing 0 of 3>
+        >>> TriloBytes(b'abc') + [None]
+        <TriloBytes missing 1 of 4>
+        """
+        
+        return '<TriloBytes missing %d of %d>' % (self.missing(), len(self))
+
+    def decode(self, codec):
+        return bytes(self).decode(codec)
+    
+    def missing(self):
+        """
+        >>> TriloBytes(b'abc').missing()
+        0
+        >>> (TriloBytes(b'abc') + [None, None]).missing()
+        2
+        """
+        return self._contents.count(None)
+
+    def map(self, func, *args):
+        return (v if v is not None else func(v, *args) for v in self)
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
--- a/netarch/unpack.py
+++ b/netarch/unpack.py
@ -0,0 +1,73 @@
+#! /usr/bin/python3
+
+ENDIAN_LITTLE = 1
+ENDIAN_BIG = 2
+ENDIAN_MIDDLE = 3
+ENDIAN_NETWORK = ENDIAN_BIG
+
+class Unpacker:
+    """Class that lets you peel values off
+    
+    >>> u = Unpacker(bytes((1, 0,2, 0,0,0,3, 0,0,0,0,0,0,0,4)))
+    >>> u.uint8()
+    1
+    >>> u.uint16()
+    2
+    >>> u.uint32()
+    3
+    >>> u.uint64()
+    4
+    
+    >>> u = Unpacker(bytes((1,0, 104,105)), ENDIAN_LITTLE)
+    >>> u.uint16()
+    1
+    >>> u.buf
+    b'hi'
+
+    >>> u = Unpacker(bytes((1,0, 0,2)))
+    >>> u.uint16(ENDIAN_LITTLE)
+    1
+    >>> u.uint(16, ENDIAN_BIG)
+    2
+    
+    >>> u = Unpacker(bytes((0,1,2,3)), ENDIAN_MIDDLE)
+    >>> '%08x' % u.uint32()
+    '01000302'
+    """
+    
+    def __init__(self, buf, endian=ENDIAN_NETWORK):
+        self.endian = endian
+        self.buf = buf
+
+    def uint(self, size, endian=None):
+        endian = endian or self.endian
+        if size not in (8, 16, 32, 64):
+            # XXX: I'm pretty sure this can be done, but I don't want to code it up right now.
+            raise ValueError("Can't do weird sizes")
+        noctets = size // 8
+        if endian == ENDIAN_BIG:
+            r = range(0, noctets)
+        elif endian == ENDIAN_LITTLE:
+            r = range(noctets-1, -1, -1)
+        elif endian == ENDIAN_MIDDLE:
+            r = (1, 0, 3, 2,   5, 4, 7, 6)[:noctets]
+        else:
+            raise ValueError("Unsupported byte order")
+        pull, self.buf = self.buf[:noctets], self.buf[noctets:]
+        acc = 0
+        for i in r:
+            acc = (acc << 8) | pull[i]
+        return acc
+        
+    def uint8(self):
+        return self.uint(8)
+    def uint16(self, endian=None):
+        return self.uint(16, endian)
+    def uint32(self, endian=None):
+        return self.uint(32, endian)
+    def uint64(self, endian=None):
+        return self.uint(64, endian)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()