diff --git a/README.md b/README.md new file mode 100644 index 0000000..c8d2d51 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +Dirtbags Netarch Library +======================== + +This is a library for advanced +[network archaeology](https://sites.google.com/view/cyberfire/foundry/classes/network-archaeology). + +It provides a heavily field-tested framework for +exploring unknown TCP-based protocols, +and room to grow these explorations into full-blown decoders. + +Get going +========= + +Documentation sucks, sorry. +The way we go about things is to copy `dumbdecode.py` to a new file, +and start hacking onto it. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..4d73b55 --- /dev/null +++ b/TODO.md @@ -0,0 +1,8 @@ +Things We Need To Do +==================== + +* `unpack.py` like golang's `encoding/binary` +* documentation +* remove lingering py2-isms +* more logical way to chain together dispatcher, tcp resequencer +* some way to parallelize work diff --git a/netarch/__init__.py b/netarch/__init__.py index edbb2ed..0666039 100644 --- a/netarch/__init__.py +++ b/netarch/__init__.py @@ -41,7 +41,7 @@ decch = (u'␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏' cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼' u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼' - u'␣!"#$%&\'()*+,-./' + u' !"#$%&\'()*+,-./' u'0123456789:;<=>?' u'@ABCDEFGHIJKLMNO' u'PQRSTUVWXYZ[\]^_' @@ -59,80 +59,75 @@ cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼' def unpack(fmt, buf): - """Unpack buf based on fmt, return the rest as a string.""" + """Unpack buf based on fmt, return the remainder.""" size = struct.calcsize(fmt) - vals = struct.unpack(fmt, str(buf[:size])) + vals = struct.unpack(fmt, bytes(buf[:size])) return vals + (buf[size:],) class HexDumper: - def __init__(self, fd=sys.stdout): - self.fd = fd + def __init__(self, output, charset=stdch): self.offset = 0 - self.buf = [] + self.last = None + self.elided = False + self.hexes = [] + self.chars = [] + self.charset = charset + self.output = output - def _to_printable(self, c): - if not c: - return u'◌' - else: - return cgach[ord(c)] - - - def write(self, what): - self.fd.write(what.encode('utf-8')) - - def _flush(self): - if not self.buf: + def _spit(self): + if self.chars == self.last: + if not self.elided: + self.output.write('*\n') + self.elided = True + self.hexes = [] + self.chars = [] return + self.last = self.chars[:] + self.elided = False - o = [] - for c in self.buf: - if c: - o.append(u'%02x' % ord(c)) - else: - o.append(u'--') - o += ([u' '] * (16 - len(self.buf))) - p = [self._to_printable(c) for c in self.buf] + pad = 16 - len(self.chars) + self.hexes += [' '] * pad - self.write(u'%08x ' % self.offset) + self.output.write('{:08x} '.format(self.offset - len(self.chars))) + self.output.write(' '.join(self.hexes[:8])) + self.output.write(' ') + self.output.write(' '.join(self.hexes[8:])) + self.output.write(' |') + self.output.write(''.join(self.chars)) + self.output.write('|\n') - self.write(u' '.join(o[:8])) - self.write(u' ') - self.write(u' '.join(o[8:])) + self.hexes = [] + self.chars = [] - self.write(u' ┆') + def add(self, b): + if self.offset and self.offset % 16 == 0: + self._spit() - self.write(u''.join(p)) + if b is None: + h = '⬜' + c = '�' + else: + h = '{:02x}'.format(b) + c = self.charset[b] + self.chars.append(c) + self.hexes.append(h) - self.write(u'┆\n') + self.offset += 1 - self.offset += len(self.buf) - self.buf = [] - - def dump_chr(self, c): - self.buf.append(c) - if len(self.buf) == 16: - self._flush() - - def dump_drop(self): - self.buf.append(None) - if len(self.buf) == 16: - self._flush() - - def finish(self): - self._flush() - self.write('%08x\n' % self.offset) + def done(self): + self._spit() + self.output.write('{:08x}\n'.format(self.offset)) -def hexdump(buf, f=sys.stdout): +def hexdump(buf, f=sys.stdout, charset=cgach): "Print a hex dump of buf" - d = HexDumper() - - for c in buf: - d.dump_chr(c) - d.finish() + h = HexDumper(output=f, charset=charset) + for b in buf: + h.add(b) + h.done() def cstring(buf): diff --git a/netarch/ip.py b/netarch/ip.py index 82d6401..5478350 100644 --- a/netarch/ip.py +++ b/netarch/ip.py @@ -11,17 +11,17 @@ import time try: import pcap except ImportError: + warnings.warn("Using slow pure-python pcap library") import netarch.py_pcap as pcap import os import cgi import urllib from netarch import * -from netarch.gapstr import * +from netarch.trilobytes import TriloBytes def unpack_nybbles(byte): return (byte >> 4, byte & 0x0F) - transfers = os.environ.get('TRANSFERS', 'transfers') IP = 0x0800 @@ -130,15 +130,17 @@ class Frame: def get_src_addr(self): - saddr = struct.pack('!i', self.saddr) - self.src_addr = socket.inet_ntoa(saddr) - return self.src_addr + if not hasattr(self, "_src_addr"): + saddr = struct.pack('!i', self.saddr) + self._src_addr = socket.inet_ntoa(saddr) + return self._src_addr src_addr = property(get_src_addr) def get_dst_addr(self): - daddr = struct.pack('!i', self.daddr) - self.dst_addr = socket.inet_ntoa(daddr) - return self.dst_addr + if not hasattr(self, "_dst_addr"): + daddr = struct.pack('!i', self.daddr) + self._dst_addr = socket.inet_ntoa(daddr) + return self._dst_addr dst_addr = property(get_dst_addr) def __repr__(self): @@ -229,7 +231,7 @@ class TCP_Recreate: - return ethhdr + iphdr + tcphdr + str(payload) + return ethhdr + iphdr + tcphdr + bytes(payload) def write_pkt(self, timestamp, cli, payload, flags=0): p = self.packet(cli, payload, flags) @@ -307,16 +309,14 @@ class TCP_Resequence: pending = self.pending[xdi] # Get a sorted list of sequence numbers - keys = pending.keys() - keys.sort() + keys = sorted(pending) # Build up return value - gs = gapstr.GapString() + gs = TriloBytes() if keys: - f = pending[keys[0]] - ret = (xdi, f, gs) + first = pending[keys[0]] else: - ret = (xdi, None, gs) + first = None # Fill in gs with our frames for key in keys: @@ -326,13 +326,14 @@ class TCP_Resequence: frame = pending[key] if key > seq: # Dropped frame(s) - if key - seq > 6000: - print("Gosh, %d dropped octets sure is a lot!" % (key - seq)) - gs.append(key - seq) + dropped = key - seq + if dropped > 6000: + print("Gosh, %d dropped octets sure is a lot!" % (dropped)) + gs += [None] * dropped seq = key if key == seq: # Default - gs.append(frame.payload) + gs += frame.payload seq += len(frame.payload) del pending[key] elif key < seq: @@ -347,13 +348,14 @@ class TCP_Resequence: self.handle = self.handle_drop if seq != pkt.ack: # Drop at the end - if pkt.ack - seq > 6000: + dropped = pkt.ack - seq + if dropped > 6000: print('Large drop at end of session!') print(' %s' % ((pkt, pkt.time),)) print(' %x %x' % (pkt.ack, seq)) - gs.append(pkt.ack - seq) + gs += [None] * dropped - return ret + return (xdi, first, gs) def handle(self, pkt): @@ -445,14 +447,14 @@ class Dispatch: if not literal: parts = filename.split(':::') fn = parts[0] - fd = file(fn) + fd = open(fn, "rb") pc = pcap.open(fd) if len(parts) > 1: pos = int(parts[1]) fd.seek(pos) self._read(pc, fn, fd) else: - fd = file(filename) + fd = open(filename, "rb") pc = pcap.open(fd) self._read(pc, filename, fd) @@ -566,10 +568,9 @@ class Packet: p.append('%3d!' % x) else: p.append('%3d' % x) - print(' parts: (%s) +%d bytes' % (','.join(p), dl)) + print(' parts: (%s) +%d(0x%x) octets' % (','.join(p), dl, dl)) - keys = self.params.keys() - keys.sort() + keys = sorted(self.params) for k in keys: print(' %12s: %s' % (k, self.params[k])) @@ -578,12 +579,12 @@ class Packet: p.show() elif self.payload: try: - self.payload.hexdump() + hexdump(self.payload) except AttributeError: print(' payload: %r' % self.payload) def parse(self, data): - """Parse a chunk of data (possibly a GapString). + """Parse a chunk of data (possibly a TriloBytes). Anything returned is not part of this packet and will be passed in to a subsequent packet. @@ -640,12 +641,12 @@ class Session: pass - def handle(self, is_srv, frame, gs, lastpos): + def handle(self, is_srv, frame, data, lastpos): """Handle a data burst. @param is_srv Is this from the server? @param frame A frame associated with this packet, or None if it's all drops - @param gs A gapstring of the data + @param data A TriloBytes of the data @param lastpos Last position in the source file, for debugging """ @@ -657,18 +658,18 @@ class Session: try: saddr = frame.saddr try: - (f, data) = self.pending.pop(saddr) + (f, buf) = self.pending.pop(saddr) except KeyError: f = frame - data = gapstr.GapString() - data.extend(gs) + buf = TriloBytes() + buf += data try: - while data: + while buf: p = self.Packet(self, f) - data = p.handle(data) + buf = p.handle(buf) self.process(p) except NeedMoreData: - self.pending[saddr] = (f, data) + self.pending[saddr] = (f, buf) self.count += 1 except: print('Lastpos: %r' % (lastpos,)) @@ -700,7 +701,7 @@ class Session: fullfn = os.path.join(self.basename, fn) fullfn2 = os.path.join(self.basename2, fn) print(' writing %s' % (fn,)) - fd = file(fullfn, 'w') + fd = open(fullfn, 'w') try: os.unlink(fullfn2) except OSError: diff --git a/netarch/py_pcap.py b/netarch/py_pcap.py index 44e53de..66edf74 100755 --- a/netarch/py_pcap.py +++ b/netarch/py_pcap.py @@ -1,13 +1,16 @@ -#! /usr/bin/python +#! /usr/bin/python3 import struct +import builtins _MAGIC = 0xA1B2C3D4 -class pcap: - def __init__(self, stream, mode='rb', snaplen=65535, linktype=1): +class PcapFile: + def __init__(self, stream, mode='r', snaplen=65535, linktype=1): + if 'b' not in mode: + mode += 'b' try: - self.stream = file(stream, mode) + self.stream = builtins.open(stream, mode) except TypeError: self.stream = stream try: @@ -16,7 +19,7 @@ class pcap: except IOError: hdr = None - if hdr: + if 'r' in mode: # We're in read mode self._endian = None for endian in '<>': @@ -71,17 +74,22 @@ class pcap: break yield r - -open = pcap -open_offline = pcap +open = PcapFile +pcap = PcapFile +open_offline = PcapFile if __name__ == '__main__': - p = open('test.pcap', 'w') # Create a new file - p.write(((0, 0, 3), 'foo')) # Add a packet - p.write(((0, 0, 3), 'bar')) + import io + + f = io.BytesIO() + p = PcapFile(f, 'w') + p.write(((0, 0, 3), b'foo')) # Add a packet + p.write(((0, 0, 3), b'bar')) del p - p = open(file('test.pcap')) # Also takes file objects + + f.seek(0) + p = PcapFile(f) assert ((p.version, p.thiszone, p.sigfigs, p.snaplen, p.linktype) == ((2, 4), 0, 0, 65535, 1)) - assert ([i for i in p] == [((0, 0, 3), 'foo'), ((0, 0, 3), 'bar')]) + assert ([i for i in p] == [((0, 0, 3), b'foo'), ((0, 0, 3), b'bar')]) diff --git a/netarch/trilobytes.py b/netarch/trilobytes.py index 59b7f49..3882466 100644 --- a/netarch/trilobytes.py +++ b/netarch/trilobytes.py @@ -35,7 +35,7 @@ b'Hh???' >>> tb = TriloBytes(b'hi', drop=b'DROP') >>> bytes(tb) b'hi' ->>> tb = tb + [None] * 7 +>>> tb += [None] * 7 >>> bytes(tb) b'hiOPDROPD'