Works now

This commit is contained in:
Neale Pickett 2018-07-10 18:13:07 +00:00
parent 737d482f3a
commit ce44a1d745
6 changed files with 135 additions and 107 deletions

16
README.md Normal file
View File

@ -0,0 +1,16 @@
Dirtbags Netarch Library
========================
This is a library for advanced
[network archaeology](https://sites.google.com/view/cyberfire/foundry/classes/network-archaeology).
It provides a heavily field-tested framework for
exploring unknown TCP-based protocols,
and room to grow these explorations into full-blown decoders.
Get going
=========
Documentation sucks, sorry.
The way we go about things is to copy `dumbdecode.py` to a new file,
and start hacking onto it.

8
TODO.md Normal file
View File

@ -0,0 +1,8 @@
Things We Need To Do
====================
* `unpack.py` like golang's `encoding/binary`
* documentation
* remove lingering py2-isms
* more logical way to chain together dispatcher, tcp resequencer
* some way to parallelize work

View File

@ -41,7 +41,7 @@ decch = (u'␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏'
cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼' cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼' u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼'
u'!"#$%&\'()*+,-./' u' !"#$%&\'()*+,-./'
u'0123456789:;<=>?' u'0123456789:;<=>?'
u'@ABCDEFGHIJKLMNO' u'@ABCDEFGHIJKLMNO'
u'PQRSTUVWXYZ[\]^_' u'PQRSTUVWXYZ[\]^_'
@ -59,80 +59,75 @@ cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼'
def unpack(fmt, buf): def unpack(fmt, buf):
"""Unpack buf based on fmt, return the rest as a string.""" """Unpack buf based on fmt, return the remainder."""
size = struct.calcsize(fmt) size = struct.calcsize(fmt)
vals = struct.unpack(fmt, str(buf[:size])) vals = struct.unpack(fmt, bytes(buf[:size]))
return vals + (buf[size:],) return vals + (buf[size:],)
class HexDumper: class HexDumper:
def __init__(self, fd=sys.stdout): def __init__(self, output, charset=stdch):
self.fd = fd
self.offset = 0 self.offset = 0
self.buf = [] self.last = None
self.elided = False
self.hexes = []
self.chars = []
self.charset = charset
self.output = output
def _to_printable(self, c): def _spit(self):
if not c: if self.chars == self.last:
return u'' if not self.elided:
else: self.output.write('*\n')
return cgach[ord(c)] self.elided = True
self.hexes = []
self.chars = []
def write(self, what):
self.fd.write(what.encode('utf-8'))
def _flush(self):
if not self.buf:
return return
self.last = self.chars[:]
self.elided = False
o = [] pad = 16 - len(self.chars)
for c in self.buf: self.hexes += [' '] * pad
if c:
o.append(u'%02x' % ord(c)) self.output.write('{:08x} '.format(self.offset - len(self.chars)))
self.output.write(' '.join(self.hexes[:8]))
self.output.write(' ')
self.output.write(' '.join(self.hexes[8:]))
self.output.write(' |')
self.output.write(''.join(self.chars))
self.output.write('|\n')
self.hexes = []
self.chars = []
def add(self, b):
if self.offset and self.offset % 16 == 0:
self._spit()
if b is None:
h = ''
c = '<EFBFBD>'
else: else:
o.append(u'--') h = '{:02x}'.format(b)
o += ([u' '] * (16 - len(self.buf))) c = self.charset[b]
p = [self._to_printable(c) for c in self.buf] self.chars.append(c)
self.hexes.append(h)
self.write(u'%08x ' % self.offset) self.offset += 1
self.write(u' '.join(o[:8])) def done(self):
self.write(u' ') self._spit()
self.write(u' '.join(o[8:])) self.output.write('{:08x}\n'.format(self.offset))
self.write(u'')
self.write(u''.join(p))
self.write(u'\n')
self.offset += len(self.buf)
self.buf = []
def dump_chr(self, c):
self.buf.append(c)
if len(self.buf) == 16:
self._flush()
def dump_drop(self):
self.buf.append(None)
if len(self.buf) == 16:
self._flush()
def finish(self):
self._flush()
self.write('%08x\n' % self.offset)
def hexdump(buf, f=sys.stdout): def hexdump(buf, f=sys.stdout, charset=cgach):
"Print a hex dump of buf" "Print a hex dump of buf"
d = HexDumper() h = HexDumper(output=f, charset=charset)
for b in buf:
for c in buf: h.add(b)
d.dump_chr(c) h.done()
d.finish()
def cstring(buf): def cstring(buf):

View File

@ -11,17 +11,17 @@ import time
try: try:
import pcap import pcap
except ImportError: except ImportError:
warnings.warn("Using slow pure-python pcap library")
import netarch.py_pcap as pcap import netarch.py_pcap as pcap
import os import os
import cgi import cgi
import urllib import urllib
from netarch import * from netarch import *
from netarch.gapstr import * from netarch.trilobytes import TriloBytes
def unpack_nybbles(byte): def unpack_nybbles(byte):
return (byte >> 4, byte & 0x0F) return (byte >> 4, byte & 0x0F)
transfers = os.environ.get('TRANSFERS', 'transfers') transfers = os.environ.get('TRANSFERS', 'transfers')
IP = 0x0800 IP = 0x0800
@ -130,15 +130,17 @@ class Frame:
def get_src_addr(self): def get_src_addr(self):
if not hasattr(self, "_src_addr"):
saddr = struct.pack('!i', self.saddr) saddr = struct.pack('!i', self.saddr)
self.src_addr = socket.inet_ntoa(saddr) self._src_addr = socket.inet_ntoa(saddr)
return self.src_addr return self._src_addr
src_addr = property(get_src_addr) src_addr = property(get_src_addr)
def get_dst_addr(self): def get_dst_addr(self):
if not hasattr(self, "_dst_addr"):
daddr = struct.pack('!i', self.daddr) daddr = struct.pack('!i', self.daddr)
self.dst_addr = socket.inet_ntoa(daddr) self._dst_addr = socket.inet_ntoa(daddr)
return self.dst_addr return self._dst_addr
dst_addr = property(get_dst_addr) dst_addr = property(get_dst_addr)
def __repr__(self): def __repr__(self):
@ -229,7 +231,7 @@ class TCP_Recreate:
return ethhdr + iphdr + tcphdr + str(payload) return ethhdr + iphdr + tcphdr + bytes(payload)
def write_pkt(self, timestamp, cli, payload, flags=0): def write_pkt(self, timestamp, cli, payload, flags=0):
p = self.packet(cli, payload, flags) p = self.packet(cli, payload, flags)
@ -307,16 +309,14 @@ class TCP_Resequence:
pending = self.pending[xdi] pending = self.pending[xdi]
# Get a sorted list of sequence numbers # Get a sorted list of sequence numbers
keys = pending.keys() keys = sorted(pending)
keys.sort()
# Build up return value # Build up return value
gs = gapstr.GapString() gs = TriloBytes()
if keys: if keys:
f = pending[keys[0]] first = pending[keys[0]]
ret = (xdi, f, gs)
else: else:
ret = (xdi, None, gs) first = None
# Fill in gs with our frames # Fill in gs with our frames
for key in keys: for key in keys:
@ -326,13 +326,14 @@ class TCP_Resequence:
frame = pending[key] frame = pending[key]
if key > seq: if key > seq:
# Dropped frame(s) # Dropped frame(s)
if key - seq > 6000: dropped = key - seq
print("Gosh, %d dropped octets sure is a lot!" % (key - seq)) if dropped > 6000:
gs.append(key - seq) print("Gosh, %d dropped octets sure is a lot!" % (dropped))
gs += [None] * dropped
seq = key seq = key
if key == seq: if key == seq:
# Default # Default
gs.append(frame.payload) gs += frame.payload
seq += len(frame.payload) seq += len(frame.payload)
del pending[key] del pending[key]
elif key < seq: elif key < seq:
@ -347,13 +348,14 @@ class TCP_Resequence:
self.handle = self.handle_drop self.handle = self.handle_drop
if seq != pkt.ack: if seq != pkt.ack:
# Drop at the end # Drop at the end
if pkt.ack - seq > 6000: dropped = pkt.ack - seq
if dropped > 6000:
print('Large drop at end of session!') print('Large drop at end of session!')
print(' %s' % ((pkt, pkt.time),)) print(' %s' % ((pkt, pkt.time),))
print(' %x %x' % (pkt.ack, seq)) print(' %x %x' % (pkt.ack, seq))
gs.append(pkt.ack - seq) gs += [None] * dropped
return ret return (xdi, first, gs)
def handle(self, pkt): def handle(self, pkt):
@ -445,14 +447,14 @@ class Dispatch:
if not literal: if not literal:
parts = filename.split(':::') parts = filename.split(':::')
fn = parts[0] fn = parts[0]
fd = file(fn) fd = open(fn, "rb")
pc = pcap.open(fd) pc = pcap.open(fd)
if len(parts) > 1: if len(parts) > 1:
pos = int(parts[1]) pos = int(parts[1])
fd.seek(pos) fd.seek(pos)
self._read(pc, fn, fd) self._read(pc, fn, fd)
else: else:
fd = file(filename) fd = open(filename, "rb")
pc = pcap.open(fd) pc = pcap.open(fd)
self._read(pc, filename, fd) self._read(pc, filename, fd)
@ -566,10 +568,9 @@ class Packet:
p.append('%3d!' % x) p.append('%3d!' % x)
else: else:
p.append('%3d' % x) p.append('%3d' % x)
print(' parts: (%s) +%d bytes' % (','.join(p), dl)) print(' parts: (%s) +%d(0x%x) octets' % (','.join(p), dl, dl))
keys = self.params.keys() keys = sorted(self.params)
keys.sort()
for k in keys: for k in keys:
print(' %12s: %s' % (k, self.params[k])) print(' %12s: %s' % (k, self.params[k]))
@ -578,12 +579,12 @@ class Packet:
p.show() p.show()
elif self.payload: elif self.payload:
try: try:
self.payload.hexdump() hexdump(self.payload)
except AttributeError: except AttributeError:
print(' payload: %r' % self.payload) print(' payload: %r' % self.payload)
def parse(self, data): def parse(self, data):
"""Parse a chunk of data (possibly a GapString). """Parse a chunk of data (possibly a TriloBytes).
Anything returned is not part of this packet and will be passed Anything returned is not part of this packet and will be passed
in to a subsequent packet. in to a subsequent packet.
@ -640,12 +641,12 @@ class Session:
pass pass
def handle(self, is_srv, frame, gs, lastpos): def handle(self, is_srv, frame, data, lastpos):
"""Handle a data burst. """Handle a data burst.
@param is_srv Is this from the server? @param is_srv Is this from the server?
@param frame A frame associated with this packet, or None if it's all drops @param frame A frame associated with this packet, or None if it's all drops
@param gs A gapstring of the data @param data A TriloBytes of the data
@param lastpos Last position in the source file, for debugging @param lastpos Last position in the source file, for debugging
""" """
@ -657,18 +658,18 @@ class Session:
try: try:
saddr = frame.saddr saddr = frame.saddr
try: try:
(f, data) = self.pending.pop(saddr) (f, buf) = self.pending.pop(saddr)
except KeyError: except KeyError:
f = frame f = frame
data = gapstr.GapString() buf = TriloBytes()
data.extend(gs) buf += data
try: try:
while data: while buf:
p = self.Packet(self, f) p = self.Packet(self, f)
data = p.handle(data) buf = p.handle(buf)
self.process(p) self.process(p)
except NeedMoreData: except NeedMoreData:
self.pending[saddr] = (f, data) self.pending[saddr] = (f, buf)
self.count += 1 self.count += 1
except: except:
print('Lastpos: %r' % (lastpos,)) print('Lastpos: %r' % (lastpos,))
@ -700,7 +701,7 @@ class Session:
fullfn = os.path.join(self.basename, fn) fullfn = os.path.join(self.basename, fn)
fullfn2 = os.path.join(self.basename2, fn) fullfn2 = os.path.join(self.basename2, fn)
print(' writing %s' % (fn,)) print(' writing %s' % (fn,))
fd = file(fullfn, 'w') fd = open(fullfn, 'w')
try: try:
os.unlink(fullfn2) os.unlink(fullfn2)
except OSError: except OSError:

View File

@ -1,13 +1,16 @@
#! /usr/bin/python #! /usr/bin/python3
import struct import struct
import builtins
_MAGIC = 0xA1B2C3D4 _MAGIC = 0xA1B2C3D4
class pcap: class PcapFile:
def __init__(self, stream, mode='rb', snaplen=65535, linktype=1): def __init__(self, stream, mode='r', snaplen=65535, linktype=1):
if 'b' not in mode:
mode += 'b'
try: try:
self.stream = file(stream, mode) self.stream = builtins.open(stream, mode)
except TypeError: except TypeError:
self.stream = stream self.stream = stream
try: try:
@ -16,7 +19,7 @@ class pcap:
except IOError: except IOError:
hdr = None hdr = None
if hdr: if 'r' in mode:
# We're in read mode # We're in read mode
self._endian = None self._endian = None
for endian in '<>': for endian in '<>':
@ -71,17 +74,22 @@ class pcap:
break break
yield r yield r
open = PcapFile
open = pcap pcap = PcapFile
open_offline = pcap open_offline = PcapFile
if __name__ == '__main__': if __name__ == '__main__':
p = open('test.pcap', 'w') # Create a new file import io
p.write(((0, 0, 3), 'foo')) # Add a packet
p.write(((0, 0, 3), 'bar')) f = io.BytesIO()
p = PcapFile(f, 'w')
p.write(((0, 0, 3), b'foo')) # Add a packet
p.write(((0, 0, 3), b'bar'))
del p del p
p = open(file('test.pcap')) # Also takes file objects
f.seek(0)
p = PcapFile(f)
assert ((p.version, p.thiszone, p.sigfigs, p.snaplen, p.linktype) == assert ((p.version, p.thiszone, p.sigfigs, p.snaplen, p.linktype) ==
((2, 4), 0, 0, 65535, 1)) ((2, 4), 0, 0, 65535, 1))
assert ([i for i in p] == [((0, 0, 3), 'foo'), ((0, 0, 3), 'bar')]) assert ([i for i in p] == [((0, 0, 3), b'foo'), ((0, 0, 3), b'bar')])

View File

@ -35,7 +35,7 @@ b'Hh???'
>>> tb = TriloBytes(b'hi', drop=b'DROP') >>> tb = TriloBytes(b'hi', drop=b'DROP')
>>> bytes(tb) >>> bytes(tb)
b'hi' b'hi'
>>> tb = tb + [None] * 7 >>> tb += [None] * 7
>>> bytes(tb) >>> bytes(tb)
b'hiOPDROPD' b'hiOPDROPD'