netarch/resequence.py

288 lines
7.6 KiB
Python
Executable File

#! /usr/bin/python
import StringIO
import struct
import socket
def unpack(fmt, buf):
"""Unpack buf based on fmt, assuming the rest is a string."""
size = struct.calcsize(fmt)
vals = struct.unpack(fmt, buf[:size])
return vals + (buf[size:],)
def unpack_nybbles(byte):
return (byte >> 4, byte & 0x0F)
class Frame:
def __init__(self, frame):
# Ethernet
(self.eth_dhost,
self.eth_shost,
self.eth_type,
p) = unpack('!6s6sH', frame)
if self.eth_type != 0x0800:
raise ValueError('Not IP %04x' % self.eth_type)
# IP
(self.ihlvers,
self.tos,
self.tot_len,
self.id,
self.frag_off,
self.ttl,
self.protocol,
self.check,
self.saddr,
self.daddr,
p) = unpack("!BBHHHBBH4s4s", p)
if self.protocol != 6:
raise ValueError('Not TCP')
# TCP
(self.th_sport,
self.th_dport,
self.th_seq,
self.th_ack,
x2off,
self.th_flags,
self.th_win,
self.th_sum,
self.th_urp,
p) = unpack("!HHLLBBHHH", p)
(self.th_off, th_x2) = unpack_nybbles(x2off)
opt_length = self.th_off * 4
self.th_options, p = p[:opt_length - 20], p[opt_length - 20:]
payload = p[:self.tot_len - opt_length - 20]
# Nice formatting
self.src = (self.saddr, self.th_sport)
self.dst = (self.daddr, self.th_dport)
self.seq = self.th_seq
self.ack = self.th_ack
self.payload = payload
def get_src_addr(self):
self.src_addr = socket.inet_ntoa(self.saddr)
return self.src_addr
src_addr = property(get_src_addr)
def get_dst_addr(self):
self.dst_addr = socket.inet_ntoa(self.daddr)
return self.dst_addr
dst_addr = property(get_dst_addr)
def __repr__(self):
return '<Frame %s:%d -> %s:%d len %d>' % (self.src_addr, self.th_sport,
self.dst_addr, self.th_dport,
len(self.payload))
class DropStringIO(StringIO.StringIO):
"""StringIO with different padding.
If you write beyond the length of the current string, this pads with
the string 'Drop', and not NULs. This should make it more obvious
that you've had a drop. I hope.
"""
padstr = 'Drop'
def write(self, s):
if self.pos > self.len:
bytes = self.pos - self.len
pad = self.padstr * ((bytes / len(self.padstr)) + 1)
self.buflist.append(pad[:bytes])
self.len = self.pos
return StringIO.StringIO.write(self, s)
class TCP_Session:
"""Iterable TCP session resequencer.
You initialize it with something with a read() method that returns a
new ethernet frame. For instance, an object from my py-pcap module.
The read() method returns (srv, chunk), where srv is 1 if this came
from the server, and chunk is a chunk of data.
This returns things in sequence. So you get both sides of the
conversation in the order that they happened.
Doesn't (yet) handle fragments or dropped packets. Does handle out
of order packets.
"""
def __init__(self, pc):
self.pc = pc
self.cli = None
self.srv = None
self.seq = [None, None]
self.pending = [{}, {}]
self.frames = 0
self.read_handshake()
def read_packet(self):
while True:
p = self.pc.read()
if not p:
raise EOFError()
try:
return Frame(p[1])
except ValueError:
pass
def read_handshake(self):
# Read SYN
pkt = self.read_packet()
assert (pkt.th_flags == 2) # XXX: There's got to be a better way
self.cli = pkt.src
self.srv = pkt.dst
self.seq[0] = pkt.seq + 1
# Read SYN-ACK
while True:
pkt = self.read_packet()
if ((pkt.src == self.srv) and
(pkt.th_flags == 18)):
self.seq[1] = pkt.th_seq + 1
break
# Read ACK
while True:
pkt = self.read_packet()
if ((pkt.src == self.cli) and
(pkt.th_flags == 16)):
assert (self.seq[0] == pkt.th_seq)
break
self.frames = 3
def __iter__(self):
while True:
try:
pkt = self.read_packet()
except EOFError:
return
self.frames += 1
# Which way is this going?
idx = int(pkt.src == self.srv)
xdi = 1 - idx
# Does this ACK after the last output sequence number?
if pkt.th_ack > self.seq[xdi]:
pending = self.pending[xdi]
seq = self.seq[xdi]
ret = DropStringIO()
keys = pending.keys()
for key in keys:
if key >= pkt.th_ack:
continue
pkt2 = pending[key]
del pending[key]
ret.seek(pkt2.th_seq - seq)
ret.write(pkt2.payload)
self.seq[xdi] = pkt.th_ack
yield (xdi, ret.getvalue())
# If it has a payload, stick it into pending
if pkt.payload:
self.pending[idx][pkt.seq] = pkt
self.done()
def done(self):
"""Warn about any unhandled packets"""
for p in self.pending:
k = p.keys()
if k:
k.sort()
print 'unused packets:', k
return
class HTTP_side:
"""One side of an HTTP transaction."""
def __init__(self):
self.buf = ''
self.first = ''
self.in_headers = True
self.headers = {}
self.pending_data = 0
self.data = ''
self.complete = False
def __repr__(self):
return '<HTTP_side %r>' % self.first
def process(self, chunk):
"""Returns any unprocessed part of the chunk, parts which go to
the next utterance."""
chunk = chunk + self.buf
while self.in_headers and chunk:
try:
line, chunk = chunk.split('\n', 1)
except ValueError:
self.buf = chunk
return ''
self.process_header_line(line)
self.buf = ''
if self.pending_data:
d = chunk[:self.pending_data]
chunk = chunk[self.pending_data:]
self.data += d
self.pending_data -= len(d) # May set to 0
if not self.pending_data:
self.complete = True
return chunk
def process_header_line(self, line):
if not line.strip():
self.in_headers = False
return
try:
k,v = line.split(':', 1)
except ValueError:
if self.first:
raise ValueError(('Not a header', line))
else:
self.first += line
return
self.headers[k] = v
if k.lower() == 'content-length':
self.pending_data = int(v)
def process_http(filename):
import pcap
pc = pcap.open(filename)
sess = TCP_Session(pc)
packets = []
current = [HTTP_side(), HTTP_side()]
for idx, chunk in sess:
c = current[idx]
while chunk:
chunk = c.process(chunk)
if c.complete:
packets.append((idx, c))
c = HTTP_side()
current[idx] = c
return packets