Start porting my (better) Go library over

2020-09-22 17:49:04 -06:00 · 2020-09-22 17:49:04 -06:00 · 40be4ee431
parent c3ced6f1c4
commit 40be4ee431
6 changed files with 493 additions and 244 deletions
--- a/init.py
+++ b/init.py
@ -1,194 +1,197 @@
-#! /usr/bin/python3

-import binascii
-import sys
-import struct
-from . import ip
+import typing
+import io
+from . import binary
+
+class Error(Exception):
+    """Base class for netshovel exceptions"""
+    pass


+class ShortError(Error):
+    """Exception raised when not enough data is available.

-def cstring(buf):
-    "Return buf if buf were a C-style (NULL-terminate) string"
+    Attributes:
+        wanted -- how much data we wanted
+        available -- how much data we had
+    """
    
-    i = buf.index('\0')
-    return buf[:i]
-
-
-def assert_equal(a, b):
-    assert a == b, ('%r != %r' % (a, b))
-
-
-def assert_in(a, *b):
-    assert a in b, ('%r not in %r' % (a, b))
-
-
-##
-## Binary and other base conversions
-##
-
-class BitVector:
-    def __init__(self, i=0, length=None):
-        try:
-            self._val = 0
-            for c in i:
-                self._val <<= 8
-                self._val += ord(c)
-            if length is not None:
-                self._len = length
-            else:
-                self._len = len(i) * 8
-        except TypeError:
-            self._val = i
-            if length is not None:
-                self._len = length
-            else:
-                self._len = 0
-                while i > 0:
-                    i >>= 1
-                    self._len += 1
-
-    def __len__(self):
-        return self._len
-
-    def __getitem__(self, idx):
-        if idx > self._len:
-            raise IndexError()
-        idx = self._len - idx
-        return int((self._val >> idx) & 1)
-
-    def __getslice__(self, a, b):
-        if b > self._len:
-            b = self._len
-        i = self._val >> (self._len - b)
-        l = b - a
-        mask = (1 << l) - 1
-        return BitVector(i & mask, length=l)
-
-    def __iter__(self):
-        """Iterate from LSB to MSB"""
-
-        v = self._val
-        for _ in range(self._len):
-            yield int(v & 1)
-            v >>= 1
+    def __init__(self, wanted:int, available:int):
+        self.wanted = wanted
+        self.available = available

    def __str__(self):
-        r = ''
-        v = self._val
-        i = self._len
-        while i > 8:
-            o = ((v >> (i - 8)) & 0xFF)
-            r += chr(o)
-            i -= 8
-        if i > 0:
-            o = v & ((1 << i) - 1)
-            r += chr(o)
-        return r
-
-    def __int__(self):
-        return self._val
-
-    def __repr__(self):
-        l = list(self)
-        l.reverse()
-        return '<BitVector ' + ''.join(str(x) for x in l) + '>'
-
-    def __add__(self, i):
-        if isinstance(i, BitVector):
-            l = len(self) + len(i)
-            v = (int(self) << len(i)) + int(i)
-            return BitVector(v, l)
-        else:
-            raise ValueError("Can't extend with this type yet")
-
-    def bitstr(self):
-        bits = [str(x) for x in self]
-        bits.reverse()
-        return ''.join(bits)
+        return "Not enough data available: wanted %d, got %d" % (self.wanted, self.got)


-def bin(i, bits=None):
-    """Return the binary representation of i"""
+class MissingError(Error):
+    """Exception raised when gaps were present for code that can't handle gaps.
+    """

-    return BitVector(i, bits).bitstr()
-
-
-def unhex(s):
-    """Decode a string as hex, stripping whitespace first"""
-
-    return binascii.unhexlify(s.replace(' ', ''))
-
-
-def pp(value, bits=16):
-    hexfmt = '%%0%dx' % (bits / 4)
-    return '%6d  0x%s  %s' % (value, (hexfmt % value), bin(value, bits))
-
-##
-## Codecs
-##
-import codecs
-import string
-
-b64alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-
-def from_b64(s, alphabet, codec='base64'):
-    tr = alphabet.maketrans(b64alpha)
-    t = s.translate(tr)
-    return t.decode(codec)
-
-class Esab64Codec(codecs.Codec):
-    """Little-endian version of base64."""
-
-    ## This could be made nicer by better conforming to the codecs.Codec
-    ## spec.  For instance, raising the appropriate exceptions.
-    ##
-    ## Using BitVector makes the code very readable, but it is probably
-    ## slow.
-
-    b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-    def decode(self, input, errors='strict'):
-        r = []
-        for i in range(0, len(input), 4):
-            v = BitVector()
-            for c in input[i:i+4]:
-                if c in ('=', ' ', '\n'):
-                    break
-                v += BitVector(self.b64_chars.index(c), 6)
-
-            # Normal base64 would start at the beginning
-            b = (v[10:12] + v[ 0: 6] +
-                 v[14:18] + v[ 6:10] +
-                 v[18:24] + v[12:14])
-
-            r.append(str(b))
-        return ''.join(r), len(input)
-
-    def encode(self, input, errors='strict'):
-        raise NotImplementedError()
-
-
-class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter):
+    def __init__(self):
        pass

-class Esab64StreamReader(Esab64Codec, codecs.StreamReader):
-    pass
+    def __str__(self):
+        return "Operation on missing bytes"

-def _registry(encoding):
-    if encoding == 'esab64':
-        c = Esab64Codec()
-        return (c.encode, c.decode,
-                Esab64StreamReader, Esab64StreamWriter)

-codecs.register(_registry)
+class namedField(typing.NamedTuple):
+    key: str
+    value: str

-def main(session):
-    s = None
-    reseq = ip.Dispatch(*sys.argv[1:])
-    for _, d in reseq:
-        srv, first, chunk = d
-        if not s:
-            s = session(first)
-        s.handle(srv, first, chunk, reseq.last)
+class headerField(typing.NamedTuple):
+    name: str
+    bits: int
+    value: typing.Any
+    order: binary.ByteOrder

-Session = ip.Session
-Packet = ip.Packet
+class Packet:
+    def __init__(self, when, payload):
+        self.opcode = -1
+        self.description = "Undefined"
+        self.when = when
+        self.payload = payload
+        self.header = []
+        self.fields = []
+
+    def describeType(self) -> str:
+        """Returns a string with timestamp, opcode, and description of this packet"""
+        return "%s Opcode %d: %s"  % (self.when, self.opcode, self.description)
+    
+    def describeFields(self) -> str:
+        """Returns a multi-line string describing fields in this packet"""
+        lines = []
+        for k, v in self.fields:
+            lines.append("    %s: %s\n", k, v)
+        return "".join(lines)
+
+    def describeHeader(self) -> str:
+        """Returns a multi-line string describing this packet's header structure"""
+        out = io.StringIO()
+        out.write(" 0                               1                            \n")
+        out.write(" 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f\n")
+        
+        bitOffset = 0
+        for f in self.header:
+            bits = f.bits
+            while bits > 0:
+                if bitOffset == 0:
+                    out.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n")
+                linebits = bits
+                if linebits+bitOffset > 0x20:
+                    linebits = 0x20 - bitOffset
+
+                nameval = "%s (0x%x)" % (f.name, f.value)
+                out.write("|" + nameval.center(linebits*2-1))
+
+                bitOffset += linebits
+                bits -= linebits
+                if linebits == 0x20:
+                    out.write("|\n")
+                    bitOffset = 0
+        out.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n")
+        return out.getvalue()
+
+    def describe(self) -> str:
+        """Return a multi-line string describing this packet
+
+        This shows the timestamp, opcode, description, and hex dump.
+        If you set any values, those are displayed in the order they were set.
+
+        This will quickly get unweildy, especially for large conversations.
+        You are encouraged to implement your own describe() method.
+        """
+        out = io.StringIO()
+        out.write(self.describeType())
+        out.write("\n")
+        out.write(self.describeFields())
+        out.write(self.describeHeader())
+        out.write(self.payload.hexdump())
+        return out.getvalue()
+
+    def setValue(self, key:str, value:str):
+        """Set a value
+
+        This is intended to be used to note debugging information
+        that you'd like to see on each packet.
+        """
+        self.fields.append(namedField(key, value))
+
+    def setString(self, key:str, value:str):
+        """Set a string value, displaying its Python string representation"""
+        self.setValue(key, repr(value))
+
+    def setInt(self, key:str, value:int):
+        """Set an int value, displaying its decimal and hexadecimal representations"""
+        self.setValue(key, "%d == 0x%x" % (value, value))
+    setUInt = setInt
+
+    def setUInt32(self, key:str, value:int):
+        """Set a Uint32 value, dispalying its decimal and 0-padded hexadecimal representations"""
+        self.setValue(key, "%d == %04x" % (value, value))
+
+    def setBytes(self, key:str, value:str):
+        """Set a bytes value, displaying the hex encoding of the bytes"""
+        self.setValue(key, binascii.hexlify(value).encode("ascii"))
+
+    def peel(self, octets:int) -> bytes:
+        """Peel octets bytes off the Payload, returning those bytes"""
+        pllen = len(self.payload)
+        if octets > pllen:
+            raise ShortError(octets, pllen)
+        buf = self.payload[:octets]
+        if buf.missing() > 0:
+            raise MissingError()
+        self.payload = self.payload[octets:]
+        return buf.bytes()
+
+    def addHeaderField(self, order:binary.ByteOrder, name:str, bits:int, value:typing.Any):
+        """Add a field to the header field description."""
+        h = headerField(name, bits, value, order)
+        self.header.append(h)
+
+    def readUint(self, order:binary.ByteOrder, bits:int, name:str):
+        """Peel an unsigned integer of size bits, adding it to the header field"""
+        if bits not in (8, 16, 32, 64):
+            raise RuntimeError("Weird number of bits: %d" % bits)
+        octets = bits >> 3
+        b = self.peel(octets)
+        if bits == 8:
+            value = b[0]
+        elif bits == 16:
+            value = order.Uint16(b)
+        elif bits == 32:
+            value = order.Uint32(b)
+        elif bits == 64:
+            value = order.Uint64(b)
+        self.addHeaderField(order, name, bits, value)
+
+        return value
+
+    def uint8(self, name:str) -> int:
+        "Peel off a uint8 (aka byte)"
+        return self.readUint(binary.LittleEndian, 8, name)
+    
+    def uint16le(self, name:str) -> int:
+        "Peel off a uint16, little-endian"
+        return self.readUint(binary.LittleEndian, 16, name)
+
+    def uint32le(self, name:str) -> int:
+        "Peel off a uint32, little-endian"
+        return self.readUint(binary.LittleEndian, 32, name)
+
+    def uint64le(self, name:str) -> int:
+        "Peel off a uint64, little-endian"
+        return self.readUint(binary.LittleEndian, 64, name)
+
+    def uint16be(self, name:str) -> int:
+        "Peel off a uint64, big-endian"
+        return self.readUint(binary.BigEndian, 16, name)
+
+    def uint32be(self, name:str) -> int:
+        "Peel off a uint32, big-endian"
+        return self.readUint(binary.BigEndian, 32, name)
+
+    def uint64be(self, name:str) -> int:
+        "Peel off a uint44, big-endian"
+        return self.readUint(binary.BigEndian, 64, name)
--- a/binary.py
+++ b/binary.py
@ -0,0 +1,91 @@
+"""Endianness conversions.
+
+This is a blatant rip-off of the golang binary library.
+I'm not too proud to steal a nicely-thought-out API.
+
+"""
+
+def byte(v):
+    return v & 0xff
+
+class ByteOrder:
+    "A ByteOrder specifies how to convert byte sequences into 16-, 32-, or 64-bit unsigned integers."
+    pass
+
+class LittleEndian(ByteOrder):
+    "Little-Endian byte order"
+
+    def Uint16(self, b:bytes) -> int:
+        return b[0] | (b[1]<<8)
+
+    def PutUint16(self, v:int) -> bytes:
+        return bytes([
+            byte(v), 
+            byte(v>>8),
+        ])
+
+    def Uint32(self, b:bytes) -> int:
+        return b[0] | (b[1]<<8) | (b[2]<<16) | (b[3]<<24)
+
+    def PutUint16(self, v:int) -> bytes:
+        return bytes([
+            byte(v), 
+            byte(v>>8), 
+            byte(v>>16),
+            byte(v>>24),
+        ])
+
+    def Uint64(self, b:bytes) -> int:
+        return b[0] | (b[1]<<8) | (b[2]<<16) | (b[3]<<24) | \
+            (b[4]<<32) | (b[5]<<40) | (b[6]<<48) | (b[7]<<56)
+
+    def PutUint64(self, v:int) -> bytes:
+        return bytes([
+            byte(v), 
+            byte(v>>8), 
+            byte(v>>16),
+            byte(v>>24),
+            byte(v>>32),
+            byte(v>>40),
+            byte(v>>48),
+            byte(v>>56),
+        ])
+
+class BigEndian(ByteOrder):
+    "Big-Endian byte order"
+
+    def Uint16(self, b:bytes) -> int:
+        return b[1] | (b[0]<<8)
+
+    def PutUint16(self, v:int) -> bytes:
+        return bytes([
+            byte(v>>8), 
+            byte(v),
+        ])
+
+    def Uint32(self, b:bytes) -> int:
+        return b[3] | (b[2]<<8) | (b[1]<<16) | (b[0]<<24)
+
+    def PutUint16(self, v:int) -> bytes:
+        return bytes([
+            byte(v>>24), 
+            byte(v>>16), 
+            byte(v>>8),
+            byte(v),
+        ])
+
+    def Uint64(self, b:bytes) -> int:
+        return b[7] | (b[6]<<8) | (b[5]<<16) | (b[4]<<24) | \
+            (b[3]<<32) | (b[2]<<40) | (b[1]<<48) | (b[0]<<56)
+
+    def PutUint64(self, v:int) -> bytes:
+        return bytes([
+            byte(v>>56),
+            byte(v>>48),
+            byte(v>>40),
+            byte(v>>32),
+            byte(v>>24),
+            byte(v>>16),
+            byte(v>>8), 
+            byte(v), 
+        ])
--- a/ip.py
+++ b/ip.py
@ -533,9 +533,6 @@ class Packet:
    def __iter__(self):
        return self.params.__iter__()

-    def has_key(self, k):
-        return self.params.has_key(k)
-
    def keys(self):
        return self.params.keys()

@ -586,8 +583,8 @@ class Packet:
            except AttributeError:
                print('         payload: %r' % self.payload)

-    def parse(self, data):
-        """Parse a chunk of data (possibly a TriloBytes).
+    def decode(self, data):
+        """Decode a chunk of data (possibly a TriloBytes).

        Anything returned is not part of this packet and will be passed
        in to a subsequent packet.
@ -596,12 +593,12 @@ class Packet:

        self.parts = [data]
        self.payload = data
-        return None
+        return False

    def handle(self, data):
        """Handle data from a Session class."""

-        data = self.parse(data)
+        data = self.decode(data)
        if self.opcode != None:
            try:
                f = getattr(self, 'opcode_%s' % self.opcode)
--- a/orig.py
+++ b/orig.py
@ -0,0 +1,194 @@
+#! /usr/bin/python3
+
+import binascii
+import sys
+import struct
+from . import ip
+
+
+
+def cstring(buf):
+    "Return buf if buf were a C-style (NULL-terminate) string"
+
+    i = buf.index('\0')
+    return buf[:i]
+
+
+def assert_equal(a, b):
+    assert a == b, ('%r != %r' % (a, b))
+
+
+def assert_in(a, *b):
+    assert a in b, ('%r not in %r' % (a, b))
+
+
+##
+## Binary and other base conversions
+##
+
+class BitVector:
+    def __init__(self, i=0, length=None):
+        try:
+            self._val = 0
+            for c in i:
+                self._val <<= 8
+                self._val += ord(c)
+            if length is not None:
+                self._len = length
+            else:
+                self._len = len(i) * 8
+        except TypeError:
+            self._val = i
+            if length is not None:
+                self._len = length
+            else:
+                self._len = 0
+                while i > 0:
+                    i >>= 1
+                    self._len += 1
+
+    def __len__(self):
+        return self._len
+
+    def __getitem__(self, idx):
+        if idx > self._len:
+            raise IndexError()
+        idx = self._len - idx
+        return int((self._val >> idx) & 1)
+
+    def __getslice__(self, a, b):
+        if b > self._len:
+            b = self._len
+        i = self._val >> (self._len - b)
+        l = b - a
+        mask = (1 << l) - 1
+        return BitVector(i & mask, length=l)
+
+    def __iter__(self):
+        """Iterate from LSB to MSB"""
+
+        v = self._val
+        for _ in range(self._len):
+            yield int(v & 1)
+            v >>= 1
+
+    def __str__(self):
+        r = ''
+        v = self._val
+        i = self._len
+        while i > 8:
+            o = ((v >> (i - 8)) & 0xFF)
+            r += chr(o)
+            i -= 8
+        if i > 0:
+            o = v & ((1 << i) - 1)
+            r += chr(o)
+        return r
+
+    def __int__(self):
+        return self._val
+
+    def __repr__(self):
+        l = list(self)
+        l.reverse()
+        return '<BitVector ' + ''.join(str(x) for x in l) + '>'
+
+    def __add__(self, i):
+        if isinstance(i, BitVector):
+            l = len(self) + len(i)
+            v = (int(self) << len(i)) + int(i)
+            return BitVector(v, l)
+        else:
+            raise ValueError("Can't extend with this type yet")
+
+    def bitstr(self):
+        bits = [str(x) for x in self]
+        bits.reverse()
+        return ''.join(bits)
+
+
+def bin(i, bits=None):
+    """Return the binary representation of i"""
+
+    return BitVector(i, bits).bitstr()
+
+
+def unhex(s):
+    """Decode a string as hex, stripping whitespace first"""
+
+    return binascii.unhexlify(s.replace(' ', ''))
+
+
+def pp(value, bits=16):
+    hexfmt = '%%0%dx' % (bits / 4)
+    return '%6d  0x%s  %s' % (value, (hexfmt % value), bin(value, bits))
+
+##
+## Codecs
+##
+import codecs
+import string
+
+b64alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+
+def from_b64(s, alphabet, codec='base64'):
+    tr = alphabet.maketrans(b64alpha)
+    t = s.translate(tr)
+    return t.decode(codec)
+
+class Esab64Codec(codecs.Codec):
+    """Little-endian version of base64."""
+
+    ## This could be made nicer by better conforming to the codecs.Codec
+    ## spec.  For instance, raising the appropriate exceptions.
+    ##
+    ## Using BitVector makes the code very readable, but it is probably
+    ## slow.
+
+    b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+    def decode(self, input, errors='strict'):
+        r = []
+        for i in range(0, len(input), 4):
+            v = BitVector()
+            for c in input[i:i+4]:
+                if c in ('=', ' ', '\n'):
+                    break
+                v += BitVector(self.b64_chars.index(c), 6)
+
+            # Normal base64 would start at the beginning
+            b = (v[10:12] + v[ 0: 6] +
+                 v[14:18] + v[ 6:10] +
+                 v[18:24] + v[12:14])
+
+            r.append(str(b))
+        return ''.join(r), len(input)
+
+    def encode(self, input, errors='strict'):
+        raise NotImplementedError()
+
+
+class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter):
+    pass
+
+class Esab64StreamReader(Esab64Codec, codecs.StreamReader):
+    pass
+
+def _registry(encoding):
+    if encoding == 'esab64':
+        c = Esab64Codec()
+        return (c.encode, c.decode,
+                Esab64StreamReader, Esab64StreamWriter)
+
+codecs.register(_registry)
+
+def main(session):
+    s = None
+    reseq = ip.Dispatch(*sys.argv[1:])
+    for _, d in reseq:
+        srv, first, chunk = d
+        if not s:
+            s = session(first)
+        s.handle(srv, first, chunk, reseq.last)
+
+Session = ip.Session
+Packet = ip.Packet
--- a/stream.py
+++ b/stream.py
@ -0,0 +1,37 @@
+import typing
+from . import trilobytes
+
+class NamedFile(typing.NamedTuple):
+    """A file object and the path where it lives"""
+    File: typing.BinaryIO
+    Name: string
+
+class Utterance(typing.NamedTuple):
+    """An atomic communication within a Stream.
+
+    Streams consist of a string of Utterances.
+    Each utterance has associated data, and a time stamp.
+        
+    Typically these line up with what crosses the network,
+    but bear in mind that TCP is a streaming protocol,
+    so don't rely on Utterances alone to separate Application-layer packets.
+    """
+
+    When: float
+    Data: trilobytes.TriloBytes
+
+class Stream:
+    """A Stream is one half of a two-way conversation"""
+
+    def __init__(self, net, transport):
+        self.net = net
+        self.transport = transport
+
+    def reassembled(rs):
+        """Called by the TCP assembler when an Utterance can be built"""
+        data = trilobytes.TriloBytes()
+        for r in rs:
+            if r.Skip > 0:
+                data += [None] * r.Skip
+            data + r.Bytes
+        if len(data) > 0
--- a/unpack.py
+++ b/unpack.py
@ -1,73 +0,0 @@
-#! /usr/bin/python3
-
-ENDIAN_LITTLE = 1
-ENDIAN_BIG = 2
-ENDIAN_MIDDLE = 3
-ENDIAN_NETWORK = ENDIAN_BIG
-
-class Unpacker:
-    """Class that lets you peel values off
-    
-    >>> u = Unpacker(bytes((1, 0,2, 0,0,0,3, 0,0,0,0,0,0,0,4)))
-    >>> u.uint8()
-    1
-    >>> u.uint16()
-    2
-    >>> u.uint32()
-    3
-    >>> u.uint64()
-    4
-    
-    >>> u = Unpacker(bytes((1,0, 104,105)), ENDIAN_LITTLE)
-    >>> u.uint16()
-    1
-    >>> u.buf
-    b'hi'
-
-    >>> u = Unpacker(bytes((1,0, 0,2)))
-    >>> u.uint16(ENDIAN_LITTLE)
-    1
-    >>> u.uint(16, ENDIAN_BIG)
-    2
-    
-    >>> u = Unpacker(bytes((0,1,2,3)), ENDIAN_MIDDLE)
-    >>> '%08x' % u.uint32()
-    '01000302'
-    """
-    
-    def __init__(self, buf, endian=ENDIAN_NETWORK):
-        self.endian = endian
-        self.buf = buf
-
-    def uint(self, size, endian=None):
-        endian = endian or self.endian
-        if size not in (8, 16, 32, 64):
-            # XXX: I'm pretty sure this can be done, but I don't want to code it up right now.
-            raise ValueError("Can't do weird sizes")
-        noctets = size // 8
-        if endian == ENDIAN_BIG:
-            r = range(0, noctets)
-        elif endian == ENDIAN_LITTLE:
-            r = range(noctets-1, -1, -1)
-        elif endian == ENDIAN_MIDDLE:
-            r = (1, 0, 3, 2,   5, 4, 7, 6)[:noctets]
-        else:
-            raise ValueError("Unsupported byte order")
-        pull, self.buf = self.buf[:noctets], self.buf[noctets:]
-        acc = 0
-        for i in r:
-            acc = (acc << 8) | pull[i]
-        return acc
-        
-    def uint8(self):
-        return self.uint(8)
-    def uint16(self, endian=None):
-        return self.uint(16, endian)
-    def uint32(self, endian=None):
-        return self.uint(32, endian)
-    def uint64(self, endian=None):
-        return self.uint(64, endian)
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()