2007-11-12 21:11:25 -07:00
|
|
|
#! /usr/bin/python
|
|
|
|
|
|
|
|
import StringIO
|
2007-12-12 11:07:49 -07:00
|
|
|
import struct
|
|
|
|
import socket
|
2007-12-21 17:08:00 -07:00
|
|
|
import warnings
|
2008-01-10 17:53:11 -07:00
|
|
|
import heapq
|
2008-01-18 19:09:09 -07:00
|
|
|
import gapstr
|
2008-01-10 17:53:11 -07:00
|
|
|
|
2007-12-12 11:07:49 -07:00
|
|
|
|
|
|
|
def unpack(fmt, buf):
|
|
|
|
"""Unpack buf based on fmt, assuming the rest is a string."""
|
|
|
|
|
|
|
|
size = struct.calcsize(fmt)
|
|
|
|
vals = struct.unpack(fmt, buf[:size])
|
|
|
|
return vals + (buf[size:],)
|
|
|
|
|
|
|
|
def unpack_nybbles(byte):
|
|
|
|
return (byte >> 4, byte & 0x0F)
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
ICMP = 1
|
2007-12-30 23:30:23 -07:00
|
|
|
TCP = 6
|
|
|
|
UDP = 17
|
2007-12-21 17:08:00 -07:00
|
|
|
|
2007-12-12 11:07:49 -07:00
|
|
|
class Frame:
|
2007-12-21 17:08:00 -07:00
|
|
|
"""Turn an ethernet frame into relevant TCP parts"""
|
|
|
|
|
|
|
|
def __init__(self, pkt):
|
|
|
|
((self.time, _, _), frame) = pkt
|
|
|
|
|
2007-12-12 12:22:20 -07:00
|
|
|
# Ethernet
|
2007-12-12 11:07:49 -07:00
|
|
|
(self.eth_dhost,
|
|
|
|
self.eth_shost,
|
|
|
|
self.eth_type,
|
|
|
|
p) = unpack('!6s6sH', frame)
|
|
|
|
if self.eth_type != 0x0800:
|
|
|
|
raise ValueError('Not IP %04x' % self.eth_type)
|
|
|
|
|
2007-12-12 12:22:20 -07:00
|
|
|
# IP
|
2007-12-12 11:07:49 -07:00
|
|
|
(self.ihlvers,
|
|
|
|
self.tos,
|
|
|
|
self.tot_len,
|
|
|
|
self.id,
|
|
|
|
self.frag_off,
|
|
|
|
self.ttl,
|
|
|
|
self.protocol,
|
|
|
|
self.check,
|
|
|
|
self.saddr,
|
|
|
|
self.daddr,
|
2007-12-21 17:08:00 -07:00
|
|
|
p) = unpack("!BBHHHBBHii", p)
|
|
|
|
|
|
|
|
if self.protocol == TCP:
|
|
|
|
self.name = 'TCP'
|
|
|
|
(self.sport,
|
|
|
|
self.dport,
|
|
|
|
self.seq,
|
|
|
|
self.ack,
|
|
|
|
x2off,
|
|
|
|
self.flags,
|
|
|
|
self.win,
|
|
|
|
self.sum,
|
|
|
|
self.urp,
|
|
|
|
p) = unpack("!HHLLBBHHH", p)
|
|
|
|
(self.off, th_x2) = unpack_nybbles(x2off)
|
|
|
|
opt_length = self.off * 4
|
|
|
|
self.options, p = p[:opt_length - 20], p[opt_length - 20:]
|
|
|
|
self.payload = p[:self.tot_len - opt_length - 20]
|
|
|
|
elif self.protocol == UDP:
|
|
|
|
self.name = 'UDP'
|
|
|
|
(self.sport,
|
|
|
|
self.dport,
|
|
|
|
self.ulen,
|
|
|
|
self.sum,
|
|
|
|
p) = unpack("!HHHH", p)
|
|
|
|
self.payload = p[:self.ulen - 8]
|
|
|
|
elif self.protocol == ICMP:
|
|
|
|
self.name = 'ICMP'
|
|
|
|
self.sport = self.dport = -1
|
|
|
|
(self.type,
|
|
|
|
self.code,
|
|
|
|
self.cheksum,
|
|
|
|
self.id,
|
|
|
|
self.seq,
|
2008-01-01 18:55:24 -07:00
|
|
|
p) = unpack('!BBHHH', p)
|
|
|
|
self.payload = p[:self.tot_len - 8]
|
2007-12-21 17:08:00 -07:00
|
|
|
else:
|
|
|
|
raise ValueError('Unknown protocol')
|
2007-12-12 11:07:49 -07:00
|
|
|
|
2007-12-12 12:22:20 -07:00
|
|
|
# Nice formatting
|
2007-12-21 17:08:00 -07:00
|
|
|
self.src = (self.saddr, self.sport)
|
|
|
|
self.dst = (self.daddr, self.dport)
|
2007-12-30 23:30:23 -07:00
|
|
|
|
|
|
|
# This hash is the same for both sides of the transaction
|
2007-12-21 17:08:00 -07:00
|
|
|
self.hash = (self.saddr ^ self.sport ^ self.daddr ^ self.dport)
|
2007-12-12 11:07:49 -07:00
|
|
|
|
2007-12-12 12:22:20 -07:00
|
|
|
def get_src_addr(self):
|
2007-12-21 17:08:00 -07:00
|
|
|
saddr = struct.pack('!i', self.saddr)
|
|
|
|
self.src_addr = socket.inet_ntoa(saddr)
|
2007-12-12 12:22:20 -07:00
|
|
|
return self.src_addr
|
|
|
|
src_addr = property(get_src_addr)
|
|
|
|
|
|
|
|
def get_dst_addr(self):
|
2007-12-21 17:08:00 -07:00
|
|
|
daddr = struct.pack('!i', self.daddr)
|
|
|
|
self.dst_addr = socket.inet_ntoa(daddr)
|
2007-12-12 12:22:20 -07:00
|
|
|
return self.dst_addr
|
|
|
|
dst_addr = property(get_dst_addr)
|
2007-12-12 11:07:49 -07:00
|
|
|
|
|
|
|
def __repr__(self):
|
2007-12-30 23:30:23 -07:00
|
|
|
return '<Frame %s %s:%d -> %s:%d length %d>' % (self.name,
|
|
|
|
self.src_addr, self.sport,
|
|
|
|
self.dst_addr, self.dport,
|
|
|
|
len(self.payload))
|
2007-11-12 21:11:25 -07:00
|
|
|
|
2008-01-18 19:09:09 -07:00
|
|
|
|
2007-12-30 23:30:23 -07:00
|
|
|
class Chunk:
|
|
|
|
"""Chunk of frames, possibly with gaps.
|
2007-11-12 21:11:25 -07:00
|
|
|
|
|
|
|
"""
|
|
|
|
|
2008-01-18 19:09:09 -07:00
|
|
|
def __init__(self, seq=None):
|
2007-12-30 23:30:23 -07:00
|
|
|
self.collection = {}
|
|
|
|
self.length = 0
|
|
|
|
self.seq = seq
|
|
|
|
self.first = None
|
2007-11-12 21:11:25 -07:00
|
|
|
|
2007-12-30 23:30:23 -07:00
|
|
|
def add(self, frame):
|
|
|
|
if not self.first:
|
|
|
|
self.first = frame
|
2008-01-18 19:09:09 -07:00
|
|
|
if self.seq is None:
|
|
|
|
self.seq = frame.seq
|
2008-01-10 17:53:11 -07:00
|
|
|
assert frame.seq >= self.seq, (frame.seq, self.seq)
|
2007-12-30 23:30:23 -07:00
|
|
|
self.collection[frame.seq] = frame
|
|
|
|
end = frame.seq - self.seq + len(frame.payload)
|
|
|
|
self.length = max(self.length, long(end))
|
2007-11-12 21:11:25 -07:00
|
|
|
|
2007-12-30 23:30:23 -07:00
|
|
|
def __len__(self):
|
|
|
|
return int(self.length)
|
2007-11-12 21:11:25 -07:00
|
|
|
|
2007-12-30 23:30:23 -07:00
|
|
|
def __repr__(self):
|
|
|
|
if self.first:
|
|
|
|
return '<Chunk %s:%d -> %s:%d length %d>' % (self.first.src_addr,
|
|
|
|
self.first.sport,
|
|
|
|
self.first.dst_addr,
|
|
|
|
self.first.dport,
|
|
|
|
len(self))
|
|
|
|
else:
|
|
|
|
return '<Chunk (no frames)>'
|
|
|
|
|
2008-01-18 19:09:09 -07:00
|
|
|
def gapstr(self, drop='?'):
|
|
|
|
"""Return contents as a GapString"""
|
|
|
|
|
2008-01-18 19:22:39 -07:00
|
|
|
ret = gapstr.GapString(drop=drop)
|
2008-01-18 19:09:09 -07:00
|
|
|
while len(ret) < self.length:
|
|
|
|
f = self.collection.get(self.seq + len(ret))
|
2007-12-30 23:30:23 -07:00
|
|
|
if f:
|
2008-01-18 19:09:09 -07:00
|
|
|
ret.append(f.payload)
|
2007-12-30 23:30:23 -07:00
|
|
|
else:
|
2008-01-18 19:09:09 -07:00
|
|
|
# This is where to fix big inefficiency for dropped packets.
|
|
|
|
l = 1
|
|
|
|
while ((len(ret) + l < self.length) and
|
|
|
|
(not (self.seq + len(ret) + l) in self.collection)):
|
|
|
|
l += 1
|
|
|
|
ret.append(l)
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return str(self.gapstr())
|
2007-12-30 23:30:23 -07:00
|
|
|
|
2008-01-10 17:53:11 -07:00
|
|
|
def extend(self, other):
|
|
|
|
self.seq = min(self.seq or other.seq, other.seq)
|
2008-01-18 19:09:09 -07:00
|
|
|
self.length = self.length + other.length
|
|
|
|
if not self.first:
|
|
|
|
self.first = other.first
|
|
|
|
self.collection.update(other.collection)
|
2008-01-10 17:53:11 -07:00
|
|
|
|
2007-12-30 23:30:23 -07:00
|
|
|
def __add__(self, next):
|
2008-01-18 19:09:09 -07:00
|
|
|
new = self.__class__(self.seq)
|
2008-01-10 17:53:11 -07:00
|
|
|
new.extend(self)
|
|
|
|
new.extend(next)
|
2007-12-30 23:30:23 -07:00
|
|
|
return new
|
2007-11-12 21:11:25 -07:00
|
|
|
|
|
|
|
|
2008-01-01 18:55:24 -07:00
|
|
|
FIN = 1
|
|
|
|
SYN = 2
|
|
|
|
RST = 4
|
|
|
|
PSH = 8
|
|
|
|
ACK = 16
|
|
|
|
|
2008-01-10 17:53:11 -07:00
|
|
|
class TCP_Resequence:
|
2007-12-30 23:30:23 -07:00
|
|
|
"""TCP session resequencer.
|
|
|
|
|
|
|
|
>>> p = pcap.open('whatever.pcap')
|
2008-01-10 17:53:11 -07:00
|
|
|
>>> s = TCP_Resequence()
|
2007-12-30 23:30:23 -07:00
|
|
|
>>> while True:
|
|
|
|
... pkt = p.read()
|
|
|
|
... if not pkt:
|
|
|
|
... break
|
|
|
|
... f = Frame(pkt)
|
|
|
|
... r = s.handle(f)
|
|
|
|
... if r:
|
|
|
|
... print ('chunk', r)
|
2007-11-12 21:11:25 -07:00
|
|
|
|
|
|
|
This returns things in sequence. So you get both sides of the
|
|
|
|
conversation in the order that they happened.
|
|
|
|
|
|
|
|
Doesn't (yet) handle fragments or dropped packets. Does handle out
|
|
|
|
of order packets.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
def __init__(self):
|
2007-11-12 21:11:25 -07:00
|
|
|
self.cli = None
|
|
|
|
self.srv = None
|
|
|
|
self.seq = [None, None]
|
2007-12-21 17:08:00 -07:00
|
|
|
self.first = None
|
2007-11-12 21:11:25 -07:00
|
|
|
self.pending = [{}, {}]
|
|
|
|
self.frames = 0
|
2007-12-21 17:08:00 -07:00
|
|
|
self.closed = 0
|
|
|
|
|
|
|
|
self.handle = self.handle_handshake
|
|
|
|
|
|
|
|
|
|
|
|
def handle(self, pkt):
|
|
|
|
"""Stub.
|
|
|
|
|
|
|
|
This function will never be called, it is immediately overridden
|
|
|
|
by __init__. The current value of this function is the state.
|
|
|
|
"""
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
def handle_handshake(self, pkt):
|
|
|
|
self.frames += 1
|
|
|
|
|
|
|
|
if not self.first:
|
|
|
|
self.first = pkt
|
|
|
|
|
2008-01-01 18:55:24 -07:00
|
|
|
if pkt.flags == SYN:
|
2007-12-21 17:08:00 -07:00
|
|
|
self.cli, self.srv = pkt.src, pkt.dst
|
2008-01-01 18:55:24 -07:00
|
|
|
elif pkt.flags == (SYN | ACK):
|
2007-12-21 17:08:00 -07:00
|
|
|
assert (pkt.src == (self.srv or pkt.src))
|
|
|
|
self.cli, self.srv = pkt.dst, pkt.src
|
2007-12-30 23:30:23 -07:00
|
|
|
self.seq = [pkt.ack, pkt.seq + 1]
|
2008-01-01 18:55:24 -07:00
|
|
|
elif pkt.flags == ACK:
|
2007-12-21 17:08:00 -07:00
|
|
|
assert (pkt.src == (self.cli or pkt.src))
|
|
|
|
self.cli, self.srv = pkt.src, pkt.dst
|
2007-12-30 23:30:23 -07:00
|
|
|
self.seq = [pkt.seq, pkt.ack]
|
2007-12-21 17:08:00 -07:00
|
|
|
self.handle = self.handle_packet
|
|
|
|
else:
|
2008-01-01 14:47:06 -07:00
|
|
|
# In the middle of a session, do the best we can
|
|
|
|
self.cli, self.srv = pkt.src, pkt.dst
|
|
|
|
self.seq = [pkt.seq, pkt.ack]
|
|
|
|
self.handle = self.handle_packet
|
|
|
|
self.handle(pkt)
|
2007-12-21 17:08:00 -07:00
|
|
|
|
|
|
|
def handle_packet(self, pkt):
|
|
|
|
ret = None
|
|
|
|
self.frames += 1
|
|
|
|
|
2008-01-01 14:47:06 -07:00
|
|
|
# Which way is this going? 0 == from client
|
2007-12-21 17:08:00 -07:00
|
|
|
idx = int(pkt.src == self.srv)
|
|
|
|
xdi = 1 - idx
|
|
|
|
|
|
|
|
# Does this ACK after the last output sequence number?
|
2008-01-01 14:47:06 -07:00
|
|
|
seq = self.seq[xdi]
|
|
|
|
if pkt.ack > seq:
|
|
|
|
ret = Chunk(seq)
|
2007-12-21 17:08:00 -07:00
|
|
|
pending = self.pending[xdi]
|
2007-12-30 23:30:23 -07:00
|
|
|
for key in pending.keys():
|
2007-12-21 17:08:00 -07:00
|
|
|
if key >= pkt.ack:
|
|
|
|
continue
|
2008-01-01 14:47:06 -07:00
|
|
|
if key >= seq:
|
|
|
|
ret.add(pending[key])
|
2008-01-01 18:55:24 -07:00
|
|
|
else:
|
|
|
|
warnings.warn('Dropping %r from mid-stream session' % pending[key])
|
2007-12-21 17:08:00 -07:00
|
|
|
del pending[key]
|
|
|
|
self.seq[xdi] = pkt.ack
|
|
|
|
|
|
|
|
# If it has a payload, stick it into pending
|
|
|
|
if pkt.payload:
|
|
|
|
self.pending[idx][pkt.seq] = pkt
|
|
|
|
|
|
|
|
# Is it a FIN or RST?
|
2008-01-01 18:55:24 -07:00
|
|
|
if pkt.flags & (FIN | RST):
|
2007-12-21 17:08:00 -07:00
|
|
|
self.closed += 1
|
|
|
|
if self.closed == 2:
|
|
|
|
# Warn about any unhandled packets
|
|
|
|
if self.pending[0] or self.pending[1]:
|
2008-01-01 18:55:24 -07:00
|
|
|
warnings.warn('Dropping unhandled frames after shutdown' % pkt)
|
2007-12-21 17:08:00 -07:00
|
|
|
self.handle = self.handle_drop
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def handle_drop(self, pkt):
|
2007-11-12 21:11:25 -07:00
|
|
|
"""Warn about any unhandled packets"""
|
|
|
|
|
2008-01-01 18:55:24 -07:00
|
|
|
if pkt.payload:
|
|
|
|
warnings.warn('Spurious frame after shutdown: %r %d' % (pkt, pkt.flags))
|
2007-11-12 21:11:25 -07:00
|
|
|
|
|
|
|
|
|
|
|
class HTTP_side:
|
|
|
|
"""One side of an HTTP transaction."""
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.buf = ''
|
|
|
|
self.first = ''
|
|
|
|
self.in_headers = True
|
|
|
|
self.headers = {}
|
|
|
|
self.pending_data = 0
|
|
|
|
self.data = ''
|
|
|
|
self.complete = False
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '<HTTP_side %r>' % self.first
|
|
|
|
|
|
|
|
def process(self, chunk):
|
|
|
|
"""Returns any unprocessed part of the chunk, parts which go to
|
|
|
|
the next utterance."""
|
|
|
|
|
|
|
|
chunk = chunk + self.buf
|
|
|
|
while self.in_headers and chunk:
|
|
|
|
try:
|
|
|
|
line, chunk = chunk.split('\n', 1)
|
|
|
|
except ValueError:
|
|
|
|
self.buf = chunk
|
|
|
|
return ''
|
|
|
|
self.process_header_line(line)
|
|
|
|
self.buf = ''
|
|
|
|
if self.pending_data:
|
|
|
|
d = chunk[:self.pending_data]
|
|
|
|
chunk = chunk[self.pending_data:]
|
|
|
|
self.data += d
|
|
|
|
self.pending_data -= len(d) # May set to 0
|
|
|
|
if not self.pending_data:
|
|
|
|
self.complete = True
|
|
|
|
return chunk
|
|
|
|
|
|
|
|
def process_header_line(self, line):
|
|
|
|
if not line.strip():
|
|
|
|
self.in_headers = False
|
|
|
|
return
|
|
|
|
try:
|
|
|
|
k,v = line.split(':', 1)
|
|
|
|
except ValueError:
|
|
|
|
if self.first:
|
|
|
|
raise ValueError(('Not a header', line))
|
|
|
|
else:
|
|
|
|
self.first += line
|
|
|
|
return
|
|
|
|
self.headers[k] = v
|
|
|
|
if k.lower() == 'content-length':
|
|
|
|
self.pending_data = int(v)
|
2007-12-11 17:20:54 -07:00
|
|
|
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
def resequence(pc):
|
2007-12-30 23:30:23 -07:00
|
|
|
"""Re-sequence from a pcap stream.
|
|
|
|
|
|
|
|
>>> p = pcap.open('whatever.pcap')
|
|
|
|
>>> for chunk in resequence(p):
|
|
|
|
... print `chunk`
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
sessions = {}
|
|
|
|
for pkt in pc:
|
|
|
|
f = Frame(pkt)
|
|
|
|
if f.protocol == TCP:
|
|
|
|
# compute TCP session hash
|
|
|
|
s = sessions.get(f.hash)
|
|
|
|
if not s:
|
2008-01-10 17:53:11 -07:00
|
|
|
s = TCP_Resequence()
|
2007-12-21 17:08:00 -07:00
|
|
|
sessions[f.hash] = s
|
2007-12-30 23:30:23 -07:00
|
|
|
chunk = s.handle(f)
|
|
|
|
if chunk:
|
|
|
|
yield chunk
|
2007-12-21 17:08:00 -07:00
|
|
|
|
2008-01-10 17:53:11 -07:00
|
|
|
def demux(*pcs):
|
|
|
|
"""Demultiplex pcap objects based on time.
|
|
|
|
|
|
|
|
This is iterable just like a pcap object, so you could for instance do:
|
|
|
|
|
|
|
|
>>> resequence(demux(pcap1, pcap2, pcap3))
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
tops = []
|
|
|
|
for pc in pcs:
|
|
|
|
frame = pc.read()
|
|
|
|
if frame:
|
|
|
|
heapq.heappush(tops, (frame, pc))
|
|
|
|
|
|
|
|
while tops:
|
|
|
|
frame, pc = heapq.heappop(tops)
|
|
|
|
yield frame
|
|
|
|
frame = pc.read()
|
|
|
|
if frame:
|
|
|
|
heapq.heappush(tops, (frame, pc))
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
|
2007-12-11 17:20:54 -07:00
|
|
|
def process_http(filename):
|
2008-01-10 17:53:11 -07:00
|
|
|
# XXX: probably broken
|
2007-12-11 17:20:54 -07:00
|
|
|
import pcap
|
|
|
|
|
|
|
|
pc = pcap.open(filename)
|
|
|
|
sess = TCP_Session(pc)
|
|
|
|
|
|
|
|
packets = []
|
|
|
|
current = [HTTP_side(), HTTP_side()]
|
|
|
|
for idx, chunk in sess:
|
|
|
|
c = current[idx]
|
|
|
|
while chunk:
|
|
|
|
chunk = c.process(chunk)
|
|
|
|
if c.complete:
|
|
|
|
packets.append((idx, c))
|
|
|
|
|
|
|
|
c = HTTP_side()
|
|
|
|
current[idx] = c
|
|
|
|
|
|
|
|
return packets
|
|
|
|
|
2007-12-21 17:08:00 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|