From aad4c119bb46c5f3cfba9fed9a5621d01badaab6 Mon Sep 17 00:00:00 2001 From: Neale Pickett Date: Wed, 23 Jan 2008 17:04:41 -0700 Subject: [PATCH] esab64 as a codec, stringier gapstr, and RE classes in IP --- __init__.py | 73 +++++++++++++----- gapstr.py | 10 +++ ip.py | 212 ++++++++++++++++++++++++++++++++++------------------ 3 files changed, 204 insertions(+), 91 deletions(-) diff --git a/__init__.py b/__init__.py index 85a67b5..32df9aa 100755 --- a/__init__.py +++ b/__init__.py @@ -158,7 +158,7 @@ class BitVector: i = self._val >> (self._len - b) l = b - a mask = (1 << l) - 1 - return bitvector(i & mask, length=l) + return BitVector(i & mask, length=l) def __iter__(self): v = self._val @@ -185,33 +185,66 @@ class BitVector: def __repr__(self): l = list(self) l.reverse() - return '' + return '' def __add__(self, i): - if isinstance(i, bitvector): + if isinstance(i, BitVector): l = len(self) + len(i) v = (int(self) << len(i)) + int(i) - return bitvector(v, l) + return BitVector(v, l) else: raise ValueError("Can't extend with this type yet") -b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -def esab64_decode(s): - """Little-endian version of base64""" - r = [] - for i in range(0, len(s), 4): - v = bitvector() - for c in s[i:i+4]: - if c == '=': - break - v += bitvector(b64_chars.index(c), 6) +## +## Codecs +## - # Normal base64 would start at the beginning - b = (v[10:12] + v[ 0: 6] + - v[14:18] + v[ 6:10] + - v[18:24] + v[12:14]) +import codecs +from __init__ import BitVector - r.append(str(b)) - return ''.join(r) +class Esab64Codec(codecs.Codec): + """Little-endian version of base64.""" + + ## This could be made nicer by better conforming to the codecs.Codec + ## spec. For instance, raising the appropriate exceptions. + ## + ## Using BitVector makes the code very readable, but it is probably + ## slow. + + b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + def decode(self, input, errors='strict'): + r = [] + for i in range(0, len(input), 4): + v = BitVector() + for c in input[i:i+4]: + if c in ('=', ' ', '\n'): + break + v += BitVector(self.b64_chars.index(c), 6) + + # Normal base64 would start at the beginning + b = (v[10:12] + v[ 0: 6] + + v[14:18] + v[ 6:10] + + v[18:24] + v[12:14]) + + r.append(str(b)) + return ''.join(r), len(input) + + def encode(self, input, errors='strict'): + raise NotImplementedError() + + +class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter): + pass + +class Esab64StreamReader(Esab64Codec, codecs.StreamReader): + pass + +def _registry(encoding): + if encoding == 'esab64': + c = Esab64Codec() + return (c.encode, c.decode, + Esab64StreamReader, Esab64StreamWriter) + +codecs.register(_registry) diff --git a/gapstr.py b/gapstr.py index f8f4619..380cf08 100755 --- a/gapstr.py +++ b/gapstr.py @@ -40,6 +40,16 @@ class GapString: ret.append(i) return ''.join(ret) + def __iter__(self): + for i in self.contents: + if isinstance(i, int): + for j in range(i): + yield self.drop + else: + for c in i: + yield c + + def hexdump(self, fd=sys.stdout): offset = 0 diff --git a/ip.py b/ip.py index 2659284..e07331f 100755 --- a/ip.py +++ b/ip.py @@ -6,7 +6,8 @@ import socket import warnings import heapq import gapstr - +import time +import UserDict def unpack(fmt, buf): """Unpack buf based on fmt, assuming the rest is a string.""" @@ -18,6 +19,7 @@ def unpack(fmt, buf): def unpack_nybbles(byte): return (byte >> 4, byte & 0x0F) + ICMP = 1 TCP = 6 UDP = 17 @@ -298,60 +300,6 @@ class TCP_Resequence: warnings.warn('Spurious frame after shutdown: %r %d' % (pkt, pkt.flags)) -class HTTP_side: - """One side of an HTTP transaction.""" - - def __init__(self): - self.buf = '' - self.first = '' - self.in_headers = True - self.headers = {} - self.pending_data = 0 - self.data = '' - self.complete = False - - def __repr__(self): - return '' % self.first - - def process(self, chunk): - """Returns any unprocessed part of the chunk, parts which go to - the next utterance.""" - - chunk = chunk + self.buf - while self.in_headers and chunk: - try: - line, chunk = chunk.split('\n', 1) - except ValueError: - self.buf = chunk - return '' - self.process_header_line(line) - self.buf = '' - if self.pending_data: - d = chunk[:self.pending_data] - chunk = chunk[self.pending_data:] - self.data += d - self.pending_data -= len(d) # May set to 0 - if not self.pending_data: - self.complete = True - return chunk - - def process_header_line(self, line): - if not line.strip(): - self.in_headers = False - return - try: - k,v = line.split(':', 1) - except ValueError: - if self.first: - raise ValueError(('Not a header', line)) - else: - self.first += line - return - self.headers[k] = v - if k.lower() == 'content-length': - self.pending_data = int(v) - - def resequence(pc): """Re-sequence from a pcap stream. @@ -397,28 +345,150 @@ def demux(*pcs): heapq.heappush(tops, (frame, pc)) -def process_http(filename): - # XXX: probably broken - import pcap - pc = pcap.open(filename) - sess = TCP_Session(pc) +## +## Binary protocol stuff +## - packets = [] - current = [HTTP_side(), HTTP_side()] - for idx, chunk in sess: - c = current[idx] - while chunk: - chunk = c.process(chunk) - if c.complete: - packets.append((idx, c)) +class Packet(UserDict.DictMixin): + """Base class for a packet from a binary protocol. - c = HTTP_side() - current[idx] = c + This is a base class for making protocol reverse-engineering easier. - return packets + """ + + opcodes = {} + + def __init__(self, frame=None): + self.frame = frame + self.opcode = None + self.opcode_desc = None + self.parts = [] + self.params = {} + self.payload = None + + def __repr__(self): + r = '<%s packet opcode=%s' % (self.__class__.__name__, self.opcode) + if self.opcode_desc: + r += '(%s)' % self.opcode_desc + keys = self.params.keys() + keys.sort() + for k in keys: + r += ' %s=%s' % (k, self.params[k]) + r += '>' + return r + ## Dict methods + def __setitem__(self, k, v): + self.params[k] = v + + def __getitem__(self, k): + return self.params[k] + + def __contains__(self, k): + return k in self.params + + def __iter__(self): + return self.params.__iter__() + + def has_key(self, k): + return self.params.has_key(k) + + def keys(self): + return self.params.keys() + + ## + + def assert_in(self, a, *b): + if len(b) == 1: + assert a == b[0], ('%r != %r' % (a, b[0])) + else: + assert a in b, ('%r not in %r' % (a, b)) + + def show(self): + print '%s %3s: %s' % (self.__class__.__name__, + self.opcode, + self.opcode_desc) + if self.frame: + print ' %s:%d -> %s:%d (%s)' % (self.frame.src_addr, + self.frame.sport, + self.frame.dst_addr, + self.frame.dport, + time.ctime(self.frame.time)) + + if self.parts: + dl = len(self.parts[-1]) + p = [] + for x in self.parts[:-1]: + if x == dl: + p.append('%3d!' % x) + else: + p.append('%3d' % x) + print ' parts: (%s) +%d bytes' % (','.join(p), dl) + + keys = self.params.keys() + keys.sort() + for k in keys: + print ' %12s: %s' % (k, self.params[k]) + + if self.payload: + try: + self.payload.hexdump() + except AttributeError: + print ' payload: %r' % self.payload + + def parse(self, data): + """Parse a chunk of data (possibly a GapString). + + Anything returned is not part of this packet and will be passed + in to a subsequent packet. + + """ + + self.parts = [data] + return None + + def handle(self, data): + """Handle data from a Session class.""" + + data = self.parse(data) + if self.opcode <> None: + f = getattr(self, 'opcode_%s' % self.opcode) + if not self.opcode_desc and f.__doc__: + self.opcode_desc = f.__doc__.split('\n')[0] + f() + return data +class Session: + """Base class for a binary protocol session.""" + + # Override this, duh + Packet = Packet + + def handle(self, frame, data): + """Handle a data burst. + + Pass in a representative frame--earlier is better--and a hunk of + data--possibly a GapString. + + """ + + while data: + p = self.Packet(frame) + data = p.handle(data) + self.process(p) + + def process(self, packet): + """Process a packet. + + When you first start out, this probably does exactly what you + want: print out packets as they come in. As you progress you'll + probably want to override it with something more sophisticated. + That will of course vary wildly between protocols. + + """ + + packet.show()