From 40be4ee431a2d292bd2159f95af2dce2788d1bcb Mon Sep 17 00:00:00 2001 From: Neale Pickett Date: Tue, 22 Sep 2020 17:49:04 -0600 Subject: [PATCH] Start porting my (better) Go library over --- __init__.py | 331 ++++++++++++++++++++++++++-------------------------- binary.py | 91 +++++++++++++++ ip.py | 11 +- orig.py | 194 ++++++++++++++++++++++++++++++ stream.py | 37 ++++++ unpack.py | 73 ------------ 6 files changed, 493 insertions(+), 244 deletions(-) create mode 100644 binary.py create mode 100644 orig.py create mode 100644 stream.py delete mode 100644 unpack.py diff --git a/__init__.py b/__init__.py index 11c76bd..387c907 100644 --- a/__init__.py +++ b/__init__.py @@ -1,194 +1,197 @@ -#! /usr/bin/python3 -import binascii -import sys -import struct -from . import ip +import typing +import io +from . import binary + +class Error(Exception): + """Base class for netshovel exceptions""" + pass +class ShortError(Error): + """Exception raised when not enough data is available. -def cstring(buf): - "Return buf if buf were a C-style (NULL-terminate) string" - - i = buf.index('\0') - return buf[:i] - - -def assert_equal(a, b): - assert a == b, ('%r != %r' % (a, b)) - - -def assert_in(a, *b): - assert a in b, ('%r not in %r' % (a, b)) - - -## -## Binary and other base conversions -## - -class BitVector: - def __init__(self, i=0, length=None): - try: - self._val = 0 - for c in i: - self._val <<= 8 - self._val += ord(c) - if length is not None: - self._len = length - else: - self._len = len(i) * 8 - except TypeError: - self._val = i - if length is not None: - self._len = length - else: - self._len = 0 - while i > 0: - i >>= 1 - self._len += 1 - - def __len__(self): - return self._len - - def __getitem__(self, idx): - if idx > self._len: - raise IndexError() - idx = self._len - idx - return int((self._val >> idx) & 1) - - def __getslice__(self, a, b): - if b > self._len: - b = self._len - i = self._val >> (self._len - b) - l = b - a - mask = (1 << l) - 1 - return BitVector(i & mask, length=l) - - def __iter__(self): - """Iterate from LSB to MSB""" - - v = self._val - for _ in range(self._len): - yield int(v & 1) - v >>= 1 + Attributes: + wanted -- how much data we wanted + available -- how much data we had + """ + + def __init__(self, wanted:int, available:int): + self.wanted = wanted + self.available = available def __str__(self): - r = '' - v = self._val - i = self._len - while i > 8: - o = ((v >> (i - 8)) & 0xFF) - r += chr(o) - i -= 8 - if i > 0: - o = v & ((1 << i) - 1) - r += chr(o) - return r - - def __int__(self): - return self._val - - def __repr__(self): - l = list(self) - l.reverse() - return '' - - def __add__(self, i): - if isinstance(i, BitVector): - l = len(self) + len(i) - v = (int(self) << len(i)) + int(i) - return BitVector(v, l) - else: - raise ValueError("Can't extend with this type yet") - - def bitstr(self): - bits = [str(x) for x in self] - bits.reverse() - return ''.join(bits) + return "Not enough data available: wanted %d, got %d" % (self.wanted, self.got) -def bin(i, bits=None): - """Return the binary representation of i""" +class MissingError(Error): + """Exception raised when gaps were present for code that can't handle gaps. + """ - return BitVector(i, bits).bitstr() + def __init__(self): + pass + + def __str__(self): + return "Operation on missing bytes" -def unhex(s): - """Decode a string as hex, stripping whitespace first""" +class namedField(typing.NamedTuple): + key: str + value: str - return binascii.unhexlify(s.replace(' ', '')) +class headerField(typing.NamedTuple): + name: str + bits: int + value: typing.Any + order: binary.ByteOrder +class Packet: + def __init__(self, when, payload): + self.opcode = -1 + self.description = "Undefined" + self.when = when + self.payload = payload + self.header = [] + self.fields = [] -def pp(value, bits=16): - hexfmt = '%%0%dx' % (bits / 4) - return '%6d 0x%s %s' % (value, (hexfmt % value), bin(value, bits)) + def describeType(self) -> str: + """Returns a string with timestamp, opcode, and description of this packet""" + return "%s Opcode %d: %s" % (self.when, self.opcode, self.description) + + def describeFields(self) -> str: + """Returns a multi-line string describing fields in this packet""" + lines = [] + for k, v in self.fields: + lines.append(" %s: %s\n", k, v) + return "".join(lines) -## -## Codecs -## -import codecs -import string + def describeHeader(self) -> str: + """Returns a multi-line string describing this packet's header structure""" + out = io.StringIO() + out.write(" 0 1 \n") + out.write(" 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f\n") + + bitOffset = 0 + for f in self.header: + bits = f.bits + while bits > 0: + if bitOffset == 0: + out.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n") + linebits = bits + if linebits+bitOffset > 0x20: + linebits = 0x20 - bitOffset -b64alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + nameval = "%s (0x%x)" % (f.name, f.value) + out.write("|" + nameval.center(linebits*2-1)) -def from_b64(s, alphabet, codec='base64'): - tr = alphabet.maketrans(b64alpha) - t = s.translate(tr) - return t.decode(codec) + bitOffset += linebits + bits -= linebits + if linebits == 0x20: + out.write("|\n") + bitOffset = 0 + out.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n") + return out.getvalue() -class Esab64Codec(codecs.Codec): - """Little-endian version of base64.""" + def describe(self) -> str: + """Return a multi-line string describing this packet - ## This could be made nicer by better conforming to the codecs.Codec - ## spec. For instance, raising the appropriate exceptions. - ## - ## Using BitVector makes the code very readable, but it is probably - ## slow. + This shows the timestamp, opcode, description, and hex dump. + If you set any values, those are displayed in the order they were set. - b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' - def decode(self, input, errors='strict'): - r = [] - for i in range(0, len(input), 4): - v = BitVector() - for c in input[i:i+4]: - if c in ('=', ' ', '\n'): - break - v += BitVector(self.b64_chars.index(c), 6) + This will quickly get unweildy, especially for large conversations. + You are encouraged to implement your own describe() method. + """ + out = io.StringIO() + out.write(self.describeType()) + out.write("\n") + out.write(self.describeFields()) + out.write(self.describeHeader()) + out.write(self.payload.hexdump()) + return out.getvalue() - # Normal base64 would start at the beginning - b = (v[10:12] + v[ 0: 6] + - v[14:18] + v[ 6:10] + - v[18:24] + v[12:14]) + def setValue(self, key:str, value:str): + """Set a value - r.append(str(b)) - return ''.join(r), len(input) + This is intended to be used to note debugging information + that you'd like to see on each packet. + """ + self.fields.append(namedField(key, value)) - def encode(self, input, errors='strict'): - raise NotImplementedError() + def setString(self, key:str, value:str): + """Set a string value, displaying its Python string representation""" + self.setValue(key, repr(value)) + def setInt(self, key:str, value:int): + """Set an int value, displaying its decimal and hexadecimal representations""" + self.setValue(key, "%d == 0x%x" % (value, value)) + setUInt = setInt -class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter): - pass + def setUInt32(self, key:str, value:int): + """Set a Uint32 value, dispalying its decimal and 0-padded hexadecimal representations""" + self.setValue(key, "%d == %04x" % (value, value)) -class Esab64StreamReader(Esab64Codec, codecs.StreamReader): - pass + def setBytes(self, key:str, value:str): + """Set a bytes value, displaying the hex encoding of the bytes""" + self.setValue(key, binascii.hexlify(value).encode("ascii")) -def _registry(encoding): - if encoding == 'esab64': - c = Esab64Codec() - return (c.encode, c.decode, - Esab64StreamReader, Esab64StreamWriter) + def peel(self, octets:int) -> bytes: + """Peel octets bytes off the Payload, returning those bytes""" + pllen = len(self.payload) + if octets > pllen: + raise ShortError(octets, pllen) + buf = self.payload[:octets] + if buf.missing() > 0: + raise MissingError() + self.payload = self.payload[octets:] + return buf.bytes() -codecs.register(_registry) + def addHeaderField(self, order:binary.ByteOrder, name:str, bits:int, value:typing.Any): + """Add a field to the header field description.""" + h = headerField(name, bits, value, order) + self.header.append(h) -def main(session): - s = None - reseq = ip.Dispatch(*sys.argv[1:]) - for _, d in reseq: - srv, first, chunk = d - if not s: - s = session(first) - s.handle(srv, first, chunk, reseq.last) + def readUint(self, order:binary.ByteOrder, bits:int, name:str): + """Peel an unsigned integer of size bits, adding it to the header field""" + if bits not in (8, 16, 32, 64): + raise RuntimeError("Weird number of bits: %d" % bits) + octets = bits >> 3 + b = self.peel(octets) + if bits == 8: + value = b[0] + elif bits == 16: + value = order.Uint16(b) + elif bits == 32: + value = order.Uint32(b) + elif bits == 64: + value = order.Uint64(b) + self.addHeaderField(order, name, bits, value) -Session = ip.Session -Packet = ip.Packet + return value + + def uint8(self, name:str) -> int: + "Peel off a uint8 (aka byte)" + return self.readUint(binary.LittleEndian, 8, name) + + def uint16le(self, name:str) -> int: + "Peel off a uint16, little-endian" + return self.readUint(binary.LittleEndian, 16, name) + + def uint32le(self, name:str) -> int: + "Peel off a uint32, little-endian" + return self.readUint(binary.LittleEndian, 32, name) + + def uint64le(self, name:str) -> int: + "Peel off a uint64, little-endian" + return self.readUint(binary.LittleEndian, 64, name) + + def uint16be(self, name:str) -> int: + "Peel off a uint64, big-endian" + return self.readUint(binary.BigEndian, 16, name) + + def uint32be(self, name:str) -> int: + "Peel off a uint32, big-endian" + return self.readUint(binary.BigEndian, 32, name) + + def uint64be(self, name:str) -> int: + "Peel off a uint44, big-endian" + return self.readUint(binary.BigEndian, 64, name) diff --git a/binary.py b/binary.py new file mode 100644 index 0000000..0b61df2 --- /dev/null +++ b/binary.py @@ -0,0 +1,91 @@ +"""Endianness conversions. + +This is a blatant rip-off of the golang binary library. +I'm not too proud to steal a nicely-thought-out API. + +""" + +def byte(v): + return v & 0xff + +class ByteOrder: + "A ByteOrder specifies how to convert byte sequences into 16-, 32-, or 64-bit unsigned integers." + pass + +class LittleEndian(ByteOrder): + "Little-Endian byte order" + + def Uint16(self, b:bytes) -> int: + return b[0] | (b[1]<<8) + + def PutUint16(self, v:int) -> bytes: + return bytes([ + byte(v), + byte(v>>8), + ]) + + def Uint32(self, b:bytes) -> int: + return b[0] | (b[1]<<8) | (b[2]<<16) | (b[3]<<24) + + def PutUint16(self, v:int) -> bytes: + return bytes([ + byte(v), + byte(v>>8), + byte(v>>16), + byte(v>>24), + ]) + + def Uint64(self, b:bytes) -> int: + return b[0] | (b[1]<<8) | (b[2]<<16) | (b[3]<<24) | \ + (b[4]<<32) | (b[5]<<40) | (b[6]<<48) | (b[7]<<56) + + def PutUint64(self, v:int) -> bytes: + return bytes([ + byte(v), + byte(v>>8), + byte(v>>16), + byte(v>>24), + byte(v>>32), + byte(v>>40), + byte(v>>48), + byte(v>>56), + ]) + +class BigEndian(ByteOrder): + "Big-Endian byte order" + + def Uint16(self, b:bytes) -> int: + return b[1] | (b[0]<<8) + + def PutUint16(self, v:int) -> bytes: + return bytes([ + byte(v>>8), + byte(v), + ]) + + def Uint32(self, b:bytes) -> int: + return b[3] | (b[2]<<8) | (b[1]<<16) | (b[0]<<24) + + def PutUint16(self, v:int) -> bytes: + return bytes([ + byte(v>>24), + byte(v>>16), + byte(v>>8), + byte(v), + ]) + + def Uint64(self, b:bytes) -> int: + return b[7] | (b[6]<<8) | (b[5]<<16) | (b[4]<<24) | \ + (b[3]<<32) | (b[2]<<40) | (b[1]<<48) | (b[0]<<56) + + def PutUint64(self, v:int) -> bytes: + return bytes([ + byte(v>>56), + byte(v>>48), + byte(v>>40), + byte(v>>32), + byte(v>>24), + byte(v>>16), + byte(v>>8), + byte(v), + ]) diff --git a/ip.py b/ip.py index 0043aaf..306a5c0 100644 --- a/ip.py +++ b/ip.py @@ -533,9 +533,6 @@ class Packet: def __iter__(self): return self.params.__iter__() - def has_key(self, k): - return self.params.has_key(k) - def keys(self): return self.params.keys() @@ -586,8 +583,8 @@ class Packet: except AttributeError: print(' payload: %r' % self.payload) - def parse(self, data): - """Parse a chunk of data (possibly a TriloBytes). + def decode(self, data): + """Decode a chunk of data (possibly a TriloBytes). Anything returned is not part of this packet and will be passed in to a subsequent packet. @@ -596,12 +593,12 @@ class Packet: self.parts = [data] self.payload = data - return None + return False def handle(self, data): """Handle data from a Session class.""" - data = self.parse(data) + data = self.decode(data) if self.opcode != None: try: f = getattr(self, 'opcode_%s' % self.opcode) diff --git a/orig.py b/orig.py new file mode 100644 index 0000000..11c76bd --- /dev/null +++ b/orig.py @@ -0,0 +1,194 @@ +#! /usr/bin/python3 + +import binascii +import sys +import struct +from . import ip + + + +def cstring(buf): + "Return buf if buf were a C-style (NULL-terminate) string" + + i = buf.index('\0') + return buf[:i] + + +def assert_equal(a, b): + assert a == b, ('%r != %r' % (a, b)) + + +def assert_in(a, *b): + assert a in b, ('%r not in %r' % (a, b)) + + +## +## Binary and other base conversions +## + +class BitVector: + def __init__(self, i=0, length=None): + try: + self._val = 0 + for c in i: + self._val <<= 8 + self._val += ord(c) + if length is not None: + self._len = length + else: + self._len = len(i) * 8 + except TypeError: + self._val = i + if length is not None: + self._len = length + else: + self._len = 0 + while i > 0: + i >>= 1 + self._len += 1 + + def __len__(self): + return self._len + + def __getitem__(self, idx): + if idx > self._len: + raise IndexError() + idx = self._len - idx + return int((self._val >> idx) & 1) + + def __getslice__(self, a, b): + if b > self._len: + b = self._len + i = self._val >> (self._len - b) + l = b - a + mask = (1 << l) - 1 + return BitVector(i & mask, length=l) + + def __iter__(self): + """Iterate from LSB to MSB""" + + v = self._val + for _ in range(self._len): + yield int(v & 1) + v >>= 1 + + def __str__(self): + r = '' + v = self._val + i = self._len + while i > 8: + o = ((v >> (i - 8)) & 0xFF) + r += chr(o) + i -= 8 + if i > 0: + o = v & ((1 << i) - 1) + r += chr(o) + return r + + def __int__(self): + return self._val + + def __repr__(self): + l = list(self) + l.reverse() + return '' + + def __add__(self, i): + if isinstance(i, BitVector): + l = len(self) + len(i) + v = (int(self) << len(i)) + int(i) + return BitVector(v, l) + else: + raise ValueError("Can't extend with this type yet") + + def bitstr(self): + bits = [str(x) for x in self] + bits.reverse() + return ''.join(bits) + + +def bin(i, bits=None): + """Return the binary representation of i""" + + return BitVector(i, bits).bitstr() + + +def unhex(s): + """Decode a string as hex, stripping whitespace first""" + + return binascii.unhexlify(s.replace(' ', '')) + + +def pp(value, bits=16): + hexfmt = '%%0%dx' % (bits / 4) + return '%6d 0x%s %s' % (value, (hexfmt % value), bin(value, bits)) + +## +## Codecs +## +import codecs +import string + +b64alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + +def from_b64(s, alphabet, codec='base64'): + tr = alphabet.maketrans(b64alpha) + t = s.translate(tr) + return t.decode(codec) + +class Esab64Codec(codecs.Codec): + """Little-endian version of base64.""" + + ## This could be made nicer by better conforming to the codecs.Codec + ## spec. For instance, raising the appropriate exceptions. + ## + ## Using BitVector makes the code very readable, but it is probably + ## slow. + + b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + def decode(self, input, errors='strict'): + r = [] + for i in range(0, len(input), 4): + v = BitVector() + for c in input[i:i+4]: + if c in ('=', ' ', '\n'): + break + v += BitVector(self.b64_chars.index(c), 6) + + # Normal base64 would start at the beginning + b = (v[10:12] + v[ 0: 6] + + v[14:18] + v[ 6:10] + + v[18:24] + v[12:14]) + + r.append(str(b)) + return ''.join(r), len(input) + + def encode(self, input, errors='strict'): + raise NotImplementedError() + + +class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter): + pass + +class Esab64StreamReader(Esab64Codec, codecs.StreamReader): + pass + +def _registry(encoding): + if encoding == 'esab64': + c = Esab64Codec() + return (c.encode, c.decode, + Esab64StreamReader, Esab64StreamWriter) + +codecs.register(_registry) + +def main(session): + s = None + reseq = ip.Dispatch(*sys.argv[1:]) + for _, d in reseq: + srv, first, chunk = d + if not s: + s = session(first) + s.handle(srv, first, chunk, reseq.last) + +Session = ip.Session +Packet = ip.Packet diff --git a/stream.py b/stream.py new file mode 100644 index 0000000..2435142 --- /dev/null +++ b/stream.py @@ -0,0 +1,37 @@ +import typing +from . import trilobytes + +class NamedFile(typing.NamedTuple): + """A file object and the path where it lives""" + File: typing.BinaryIO + Name: string + +class Utterance(typing.NamedTuple): + """An atomic communication within a Stream. + + Streams consist of a string of Utterances. + Each utterance has associated data, and a time stamp. + + Typically these line up with what crosses the network, + but bear in mind that TCP is a streaming protocol, + so don't rely on Utterances alone to separate Application-layer packets. + """ + + When: float + Data: trilobytes.TriloBytes + +class Stream: + """A Stream is one half of a two-way conversation""" + + def __init__(self, net, transport): + self.net = net + self.transport = transport + + def reassembled(rs): + """Called by the TCP assembler when an Utterance can be built""" + data = trilobytes.TriloBytes() + for r in rs: + if r.Skip > 0: + data += [None] * r.Skip + data + r.Bytes + if len(data) > 0 \ No newline at end of file diff --git a/unpack.py b/unpack.py deleted file mode 100644 index 7a5b80b..0000000 --- a/unpack.py +++ /dev/null @@ -1,73 +0,0 @@ -#! /usr/bin/python3 - -ENDIAN_LITTLE = 1 -ENDIAN_BIG = 2 -ENDIAN_MIDDLE = 3 -ENDIAN_NETWORK = ENDIAN_BIG - -class Unpacker: - """Class that lets you peel values off - - >>> u = Unpacker(bytes((1, 0,2, 0,0,0,3, 0,0,0,0,0,0,0,4))) - >>> u.uint8() - 1 - >>> u.uint16() - 2 - >>> u.uint32() - 3 - >>> u.uint64() - 4 - - >>> u = Unpacker(bytes((1,0, 104,105)), ENDIAN_LITTLE) - >>> u.uint16() - 1 - >>> u.buf - b'hi' - - >>> u = Unpacker(bytes((1,0, 0,2))) - >>> u.uint16(ENDIAN_LITTLE) - 1 - >>> u.uint(16, ENDIAN_BIG) - 2 - - >>> u = Unpacker(bytes((0,1,2,3)), ENDIAN_MIDDLE) - >>> '%08x' % u.uint32() - '01000302' - """ - - def __init__(self, buf, endian=ENDIAN_NETWORK): - self.endian = endian - self.buf = buf - - def uint(self, size, endian=None): - endian = endian or self.endian - if size not in (8, 16, 32, 64): - # XXX: I'm pretty sure this can be done, but I don't want to code it up right now. - raise ValueError("Can't do weird sizes") - noctets = size // 8 - if endian == ENDIAN_BIG: - r = range(0, noctets) - elif endian == ENDIAN_LITTLE: - r = range(noctets-1, -1, -1) - elif endian == ENDIAN_MIDDLE: - r = (1, 0, 3, 2, 5, 4, 7, 6)[:noctets] - else: - raise ValueError("Unsupported byte order") - pull, self.buf = self.buf[:noctets], self.buf[noctets:] - acc = 0 - for i in r: - acc = (acc << 8) | pull[i] - return acc - - def uint8(self): - return self.uint(8) - def uint16(self, endian=None): - return self.uint(16, endian) - def uint32(self, endian=None): - return self.uint(32, endian) - def uint64(self, endian=None): - return self.uint(64, endian) - -if __name__ == "__main__": - import doctest - doctest.testmod()