Multi-stream resequencer

This commit is contained in:
Neale Pickett 2007-12-30 23:30:23 -07:00
parent d1fb343980
commit 60d47e5350
3 changed files with 108 additions and 42 deletions

View File

@ -2,8 +2,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import sys
import struct
printable = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()[]{}`~/=\\?+|\',."<> ' printable = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()[]{}`~/=-\\?+|\',."<> '
def unpack(fmt, buf): def unpack(fmt, buf):
"""Unpack buf based on fmt, return the rest as a string.""" """Unpack buf based on fmt, return the rest as a string."""

View File

@ -213,6 +213,25 @@ def adds(txt):
return ret return ret
class XorMask:
def __init__(self, mask):
self.offset = 0
if type(mask) == type(''):
self._mask = tuple(ord(m) for m in mask)
else:
self._mask = tuple(mask)
def mask(self, s, stick=False):
r = []
for c in s:
o = ord(c)
r.append(chr(o ^ self._mask[self.offset]))
self.offset = (self.offset + 1) % len(self._mask)
if not stick:
self.offset = 0
return ''.join(r)
## ##
## Grep-like things within dictionary ## Grep-like things within dictionary
## ##

View File

@ -16,8 +16,8 @@ def unpack_nybbles(byte):
return (byte >> 4, byte & 0x0F) return (byte >> 4, byte & 0x0F)
ICMP = 1 ICMP = 1
TCP = 6 TCP = 6
UDP = 17 UDP = 17
class Frame: class Frame:
"""Turn an ethernet frame into relevant TCP parts""" """Turn an ethernet frame into relevant TCP parts"""
@ -86,6 +86,8 @@ class Frame:
# Nice formatting # Nice formatting
self.src = (self.saddr, self.sport) self.src = (self.saddr, self.sport)
self.dst = (self.daddr, self.dport) self.dst = (self.daddr, self.dport)
# This hash is the same for both sides of the transaction
self.hash = (self.saddr ^ self.sport ^ self.daddr ^ self.dport) self.hash = (self.saddr ^ self.sport ^ self.daddr ^ self.dport)
def get_src_addr(self): def get_src_addr(self):
@ -101,40 +103,84 @@ class Frame:
dst_addr = property(get_dst_addr) dst_addr = property(get_dst_addr)
def __repr__(self): def __repr__(self):
return '<Frame %s %s:%d -> %s:%d len %d>' % (self.name, return '<Frame %s %s:%d -> %s:%d length %d>' % (self.name,
self.src_addr, self.sport, self.src_addr, self.sport,
self.dst_addr, self.dport, self.dst_addr, self.dport,
len(self.payload)) len(self.payload))
class DropStringIO(StringIO.StringIO): class Chunk:
"""StringIO with different padding. """Chunk of frames, possibly with gaps.
If you write beyond the length of the current string, this pads with Currently, gaps show up as a string of 0x33, ascii '3'.
the string 'Drop', and not NULs. This should make it more obvious
that you've had a drop. I hope.
""" """
padstr = 'Drop' def __init__(self, seq, drop='3'):
# chr(0x33) == '3'. If you see a bunch of 3s, in the ascii or
# the hex view, suspect a drop.
assert len(drop) == 1, "Don't yet support len(drop) > 1"
self.drop = drop
self.collection = {}
self.length = 0
self.seq = seq
self.first = None
def write(self, s): def add(self, frame):
if self.pos > self.len: assert frame.seq >= self.seq, (frame.seq, self.seq)
bytes = self.pos - self.len if not self.first:
pad = self.padstr * ((bytes / len(self.padstr)) + 1) self.first = frame
self.buflist.append(pad[:bytes]) self.collection[frame.seq] = frame
self.len = self.pos end = frame.seq - self.seq + len(frame.payload)
return StringIO.StringIO.write(self, s) self.length = max(self.length, long(end))
def __len__(self):
return int(self.length)
def __repr__(self):
if self.first:
return '<Chunk %s:%d -> %s:%d length %d>' % (self.first.src_addr,
self.first.sport,
self.first.dst_addr,
self.first.dport,
len(self))
else:
return '<Chunk (no frames)>'
def __str__(self):
s = ''
while len(s) < self.length:
f = self.collection.get(self.seq + len(s))
if f:
s += f.payload
else:
# This is where to fix it for len(drop) > 1.
# This is also where to fix big inefficiency for dropped packets.
s += self.drop
return s
def __add__(self, next):
new = Chunk(self.seq, self.drop)
for frame in self.collection.itervalues():
new.add(frame)
for frame in next.collection.itervalues():
new.add(frame)
return new
class TCP_Session: class TCP_Session:
"""Iterable TCP session resequencer. """TCP session resequencer.
You initialize it with something with a read() method that returns a >>> p = pcap.open('whatever.pcap')
new ethernet frame. For instance, an object from my py-pcap module. >>> s = TCP_Session()
>>> while True:
The read() method returns (srv, chunk), where srv is 1 if this came ... pkt = p.read()
from the server, and chunk is a chunk of data. ... if not pkt:
... break
... f = Frame(pkt)
... r = s.handle(f)
... if r:
... print ('chunk', r)
This returns things in sequence. So you get both sides of the This returns things in sequence. So you get both sides of the
conversation in the order that they happened. conversation in the order that they happened.
@ -176,11 +222,11 @@ class TCP_Session:
elif pkt.flags == 18: # SYNACK elif pkt.flags == 18: # SYNACK
assert (pkt.src == (self.srv or pkt.src)) assert (pkt.src == (self.srv or pkt.src))
self.cli, self.srv = pkt.dst, pkt.src self.cli, self.srv = pkt.dst, pkt.src
self.seq = [pkt.ack + 1, pkt.seq + 1] self.seq = [pkt.ack, pkt.seq + 1]
elif pkt.flags == 16: # ACK elif pkt.flags == 16: # ACK
assert (pkt.src == (self.cli or pkt.src)) assert (pkt.src == (self.cli or pkt.src))
self.cli, self.srv = pkt.src, pkt.dst self.cli, self.srv = pkt.src, pkt.dst
self.seq = [pkt.seq, pkt.ack + 1] self.seq = [pkt.seq, pkt.ack]
self.handle = self.handle_packet self.handle = self.handle_packet
else: else:
raise ValueError('Weird flags in handshake: %d' % pkt.flags) raise ValueError('Weird flags in handshake: %d' % pkt.flags)
@ -195,23 +241,15 @@ class TCP_Session:
# Does this ACK after the last output sequence number? # Does this ACK after the last output sequence number?
if pkt.ack > self.seq[xdi]: if pkt.ack > self.seq[xdi]:
ret = Chunk(self.seq[xdi])
pending = self.pending[xdi] pending = self.pending[xdi]
seq = self.seq[xdi] for key in pending.keys():
ret = DropStringIO()
keys = pending.keys()
for key in keys:
if key >= pkt.ack: if key >= pkt.ack:
continue continue
ret.add(pending[key])
pkt2 = pending[key]
del pending[key] del pending[key]
ret.seek(pkt2.seq - seq)
ret.write(pkt2.payload)
self.seq[xdi] = pkt.ack self.seq[xdi] = pkt.ack
ret = (xdi, ret.getvalue())
# If it has a payload, stick it into pending # If it has a payload, stick it into pending
if pkt.payload: if pkt.payload:
self.pending[idx][pkt.seq] = pkt self.pending[idx][pkt.seq] = pkt
@ -289,6 +327,14 @@ class HTTP_side:
def resequence(pc): def resequence(pc):
"""Re-sequence from a pcap stream.
>>> p = pcap.open('whatever.pcap')
>>> for chunk in resequence(p):
... print `chunk`
"""
sessions = {} sessions = {}
for pkt in pc: for pkt in pc:
f = Frame(pkt) f = Frame(pkt)
@ -298,9 +344,9 @@ def resequence(pc):
if not s: if not s:
s = TCP_Session() s = TCP_Session()
sessions[f.hash] = s sessions[f.hash] = s
r = s.handle(f) chunk = s.handle(f)
if r: if chunk:
yield (f, r) yield chunk
def process_http(filename): def process_http(filename):