esab64 as a codec, stringier gapstr, and RE classes in IP

2008-01-23 17:04:41 -07:00 · 2008-01-23 17:04:41 -07:00 · aad4c119bb
parent 36290068ab
commit aad4c119bb
3 changed files with 204 additions and 91 deletions
--- a/init.py
+++ b/init.py
@ -158,7 +158,7 @@ class BitVector:
        i = self._val >> (self._len - b)
        l = b - a
        mask = (1 << l) - 1
-        return bitvector(i & mask, length=l)
+        return BitVector(i & mask, length=l)
    def __iter__(self):
        v = self._val
@ -185,28 +185,43 @@ class BitVector:
    def __repr__(self):
        l = list(self)
        l.reverse()
-        return '<bitvector ' + ''.join(str(x) for x in l) + '>'
+        return '<BitVector ' + ''.join(str(x) for x in l) + '>'
    def __add__(self, i):
-        if isinstance(i, bitvector):
+        if isinstance(i, BitVector):
            l = len(self) + len(i)
            v = (int(self) << len(i)) + int(i)
-            return bitvector(v, l)
+            return BitVector(v, l)
        else:
            raise ValueError("Can't extend with this type yet")
 b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
 def esab64_decode(s):
    """Little-endian version of base64"""
 ##
 ## Codecs
 ##
 import codecs
 from __init__ import BitVector
 class Esab64Codec(codecs.Codec):
    """Little-endian version of base64."""
    ## This could be made nicer by better conforming to the codecs.Codec
    ## spec.  For instance, raising the appropriate exceptions.
    ##
    ## Using BitVector makes the code very readable, but it is probably
    ## slow.
    b64_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
    def decode(self, input, errors='strict'):
        r = []
-    for i in range(0, len(s), 4):
+        for i in range(0, len(input), 4):
-        v = bitvector()
+            v = BitVector()
-        for c in s[i:i+4]:
+            for c in input[i:i+4]:
-            if c == '=':
+                if c in ('=', ' ', '\n'):
                    break
-            v += bitvector(b64_chars.index(c), 6)
+                v += BitVector(self.b64_chars.index(c), 6)
            # Normal base64 would start at the beginning
            b = (v[10:12] + v[ 0: 6] +
@ -214,4 +229,22 @@ def esab64_decode(s):
                 v[18:24] + v[12:14])
            r.append(str(b))
-    return ''.join(r)
+        return ''.join(r), len(input)
    def encode(self, input, errors='strict'):
        raise NotImplementedError()
 class Esab64StreamWriter(Esab64Codec, codecs.StreamWriter):
    pass
 class Esab64StreamReader(Esab64Codec, codecs.StreamReader):
    pass
 def _registry(encoding):
    if encoding == 'esab64':
        c = Esab64Codec()
        return (c.encode, c.decode,
                Esab64StreamReader, Esab64StreamWriter)
 codecs.register(_registry)
--- a/gapstr.py
+++ b/gapstr.py
@ -40,6 +40,16 @@ class GapString:
                ret.append(i)
        return ''.join(ret)
    def __iter__(self):
        for i in self.contents:
            if isinstance(i, int):
                for j in range(i):
                    yield self.drop
            else:
                for c in i:
                    yield c
    def hexdump(self, fd=sys.stdout):
        offset = 0
--- a/ip.py
+++ b/ip.py
@ -6,7 +6,8 @@ import socket
 import warnings
 import heapq
 import gapstr
-
+import time
 import UserDict
 def unpack(fmt, buf):
    """Unpack buf based on fmt, assuming the rest is a string."""
@ -18,6 +19,7 @@ def unpack(fmt, buf):
 def unpack_nybbles(byte):
    return (byte >> 4, byte & 0x0F)
 ICMP = 1
 TCP  = 6
 UDP  = 17
@ -298,60 +300,6 @@ class TCP_Resequence:
            warnings.warn('Spurious frame after shutdown: %r %d' % (pkt, pkt.flags))
 class HTTP_side:
    """One side of an HTTP transaction."""
    def __init__(self):
        self.buf = ''
        self.first = ''
        self.in_headers = True
        self.headers = {}
        self.pending_data = 0
        self.data = ''
        self.complete = False
    def __repr__(self):
        return '<HTTP_side %r>' % self.first
    def process(self, chunk):
        """Returns any unprocessed part of the chunk, parts which go to
        the next utterance."""
        chunk = chunk + self.buf
        while self.in_headers and chunk:
            try:
                line, chunk = chunk.split('\n', 1)
            except ValueError:
                self.buf = chunk
                return ''
            self.process_header_line(line)
        self.buf = ''
        if self.pending_data:
            d = chunk[:self.pending_data]
            chunk = chunk[self.pending_data:]
            self.data += d
            self.pending_data -= len(d) # May set to 0
        if not self.pending_data:
            self.complete = True
        return chunk
    def process_header_line(self, line):
        if not line.strip():
            self.in_headers = False
            return
        try:
            k,v = line.split(':', 1)
        except ValueError:
            if self.first:
                raise ValueError(('Not a header', line))
            else:
                self.first += line
                return
        self.headers[k] = v
        if k.lower() == 'content-length':
            self.pending_data = int(v)
 def resequence(pc):
    """Re-sequence from a pcap stream.
@ -397,28 +345,150 @@ def demux(*pcs):
            heapq.heappush(tops, (frame, pc))
 def process_http(filename):
    # XXX: probably broken
    import pcap
-    pc = pcap.open(filename)
+##
-    sess = TCP_Session(pc)
+## Binary protocol stuff
 ##
-    packets = []
+class Packet(UserDict.DictMixin):
-    current = [HTTP_side(), HTTP_side()]
+    """Base class for a packet from a binary protocol.
    for idx, chunk in sess:
        c = current[idx]
        while chunk:
            chunk = c.process(chunk)
            if c.complete:
                packets.append((idx, c))
-                c = HTTP_side()
+    This is a base class for making protocol reverse-engineering easier.
                current[idx] = c
-    return packets
+    """
    opcodes = {}
    def __init__(self, frame=None):
        self.frame = frame
        self.opcode = None
        self.opcode_desc = None
        self.parts = []
        self.params = {}
        self.payload = None
    def __repr__(self):
        r = '<%s packet opcode=%s' % (self.__class__.__name__, self.opcode)
        if self.opcode_desc:
            r += '(%s)' % self.opcode_desc
        keys = self.params.keys()
        keys.sort()
        for k in keys:
            r += ' %s=%s' % (k, self.params[k])
        r += '>'
        return r
    ## Dict methods
    def __setitem__(self, k, v):
        self.params[k] = v
    def __getitem__(self, k):
        return self.params[k]
    def __contains__(self, k):
        return k in self.params
    def __iter__(self):
        return self.params.__iter__()
    def has_key(self, k):
        return self.params.has_key(k)
    def keys(self):
        return self.params.keys()
    ##
    def assert_in(self, a, *b):
        if len(b) == 1:
            assert a == b[0], ('%r != %r' % (a, b[0]))
        else:
            assert a in b, ('%r not in %r' % (a, b))
    def show(self):
        print '%s %3s: %s' % (self.__class__.__name__,
                              self.opcode,
                              self.opcode_desc)
        if self.frame:
            print '    %s:%d -> %s:%d (%s)' % (self.frame.src_addr,
                                               self.frame.sport,
                                               self.frame.dst_addr,
                                               self.frame.dport,
                                               time.ctime(self.frame.time))
        if self.parts:
            dl = len(self.parts[-1])
            p = []
            for x in self.parts[:-1]:
                if x == dl:
                    p.append('%3d!' % x)
                else:
                    p.append('%3d' % x)
            print '           parts: (%s) +%d bytes' % (','.join(p), dl)
        keys = self.params.keys()
        keys.sort()
        for k in keys:
            print '    %12s: %s' % (k, self.params[k])
        if self.payload:
            try:
                self.payload.hexdump()
            except AttributeError:
                print '         payload: %r' % self.payload
    def parse(self, data):
        """Parse a chunk of data (possibly a GapString).
        Anything returned is not part of this packet and will be passed
        in to a subsequent packet.
        """
        self.parts = [data]
        return None
    def handle(self, data):
        """Handle data from a Session class."""
        data = self.parse(data)
        if self.opcode <> None:
            f = getattr(self, 'opcode_%s' % self.opcode)
            if not self.opcode_desc and f.__doc__:
                self.opcode_desc = f.__doc__.split('\n')[0]
            f()
        return data
 class Session:
    """Base class for a binary protocol session."""
    # Override this, duh
    Packet = Packet
    def handle(self, frame, data):
        """Handle a data burst.
        Pass in a representative frame--earlier is better--and a hunk of
        data--possibly a GapString.
        """
        while data:
            p = self.Packet(frame)
            data = p.handle(data)
            self.process(p)
    def process(self, packet):
        """Process a packet.
        When you first start out, this probably does exactly what you
        want: print out packets as they come in.  As you progress you'll
        probably want to override it with something more sophisticated.
        That will of course vary wildly between protocols.
        """
        packet.show()