diff --git a/__init__.py b/__init__.py
index f70c2ee..85a773a 100755
--- a/__init__.py
+++ b/__init__.py
@@ -2,8 +2,9 @@
# -*- coding: utf-8 -*-
import sys
+import struct
-printable = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()[]{}`~/=\\?+|\',."<> '
+printable = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()[]{}`~/=-\\?+|\',."<> '
def unpack(fmt, buf):
"""Unpack buf based on fmt, return the rest as a string."""
diff --git a/crypto.py b/crypto.py
index 87684df..32e826b 100644
--- a/crypto.py
+++ b/crypto.py
@@ -213,6 +213,25 @@ def adds(txt):
return ret
+class XorMask:
+ def __init__(self, mask):
+ self.offset = 0
+ if type(mask) == type(''):
+ self._mask = tuple(ord(m) for m in mask)
+ else:
+ self._mask = tuple(mask)
+
+ def mask(self, s, stick=False):
+ r = []
+ for c in s:
+ o = ord(c)
+ r.append(chr(o ^ self._mask[self.offset]))
+ self.offset = (self.offset + 1) % len(self._mask)
+ if not stick:
+ self.offset = 0
+ return ''.join(r)
+
+
##
## Grep-like things within dictionary
##
diff --git a/resequence.py b/ip.py
similarity index 71%
rename from resequence.py
rename to ip.py
index c7ebc20..28e125f 100755
--- a/resequence.py
+++ b/ip.py
@@ -16,8 +16,8 @@ def unpack_nybbles(byte):
return (byte >> 4, byte & 0x0F)
ICMP = 1
-TCP = 6
-UDP = 17
+TCP = 6
+UDP = 17
class Frame:
"""Turn an ethernet frame into relevant TCP parts"""
@@ -86,6 +86,8 @@ class Frame:
# Nice formatting
self.src = (self.saddr, self.sport)
self.dst = (self.daddr, self.dport)
+
+ # This hash is the same for both sides of the transaction
self.hash = (self.saddr ^ self.sport ^ self.daddr ^ self.dport)
def get_src_addr(self):
@@ -101,40 +103,84 @@ class Frame:
dst_addr = property(get_dst_addr)
def __repr__(self):
- return ' %s:%d len %d>' % (self.name,
- self.src_addr, self.sport,
- self.dst_addr, self.dport,
- len(self.payload))
+ return ' %s:%d length %d>' % (self.name,
+ self.src_addr, self.sport,
+ self.dst_addr, self.dport,
+ len(self.payload))
-class DropStringIO(StringIO.StringIO):
- """StringIO with different padding.
+class Chunk:
+ """Chunk of frames, possibly with gaps.
- If you write beyond the length of the current string, this pads with
- the string 'Drop', and not NULs. This should make it more obvious
- that you've had a drop. I hope.
+ Currently, gaps show up as a string of 0x33, ascii '3'.
"""
- padstr = 'Drop'
+ def __init__(self, seq, drop='3'):
+ # chr(0x33) == '3'. If you see a bunch of 3s, in the ascii or
+ # the hex view, suspect a drop.
+ assert len(drop) == 1, "Don't yet support len(drop) > 1"
+ self.drop = drop
+ self.collection = {}
+ self.length = 0
+ self.seq = seq
+ self.first = None
- def write(self, s):
- if self.pos > self.len:
- bytes = self.pos - self.len
- pad = self.padstr * ((bytes / len(self.padstr)) + 1)
- self.buflist.append(pad[:bytes])
- self.len = self.pos
- return StringIO.StringIO.write(self, s)
+ def add(self, frame):
+ assert frame.seq >= self.seq, (frame.seq, self.seq)
+ if not self.first:
+ self.first = frame
+ self.collection[frame.seq] = frame
+ end = frame.seq - self.seq + len(frame.payload)
+ self.length = max(self.length, long(end))
+
+ def __len__(self):
+ return int(self.length)
+
+ def __repr__(self):
+ if self.first:
+ return ' %s:%d length %d>' % (self.first.src_addr,
+ self.first.sport,
+ self.first.dst_addr,
+ self.first.dport,
+ len(self))
+ else:
+ return ''
+
+ def __str__(self):
+ s = ''
+ while len(s) < self.length:
+ f = self.collection.get(self.seq + len(s))
+ if f:
+ s += f.payload
+ else:
+ # This is where to fix it for len(drop) > 1.
+ # This is also where to fix big inefficiency for dropped packets.
+ s += self.drop
+ return s
+
+ def __add__(self, next):
+ new = Chunk(self.seq, self.drop)
+ for frame in self.collection.itervalues():
+ new.add(frame)
+ for frame in next.collection.itervalues():
+ new.add(frame)
+ return new
class TCP_Session:
- """Iterable TCP session resequencer.
+ """TCP session resequencer.
- You initialize it with something with a read() method that returns a
- new ethernet frame. For instance, an object from my py-pcap module.
-
- The read() method returns (srv, chunk), where srv is 1 if this came
- from the server, and chunk is a chunk of data.
+ >>> p = pcap.open('whatever.pcap')
+ >>> s = TCP_Session()
+ >>> while True:
+ ... pkt = p.read()
+ ... if not pkt:
+ ... break
+ ... f = Frame(pkt)
+ ... r = s.handle(f)
+ ... if r:
+ ... print ('chunk', r)
This returns things in sequence. So you get both sides of the
conversation in the order that they happened.
@@ -176,11 +222,11 @@ class TCP_Session:
elif pkt.flags == 18: # SYNACK
assert (pkt.src == (self.srv or pkt.src))
self.cli, self.srv = pkt.dst, pkt.src
- self.seq = [pkt.ack + 1, pkt.seq + 1]
+ self.seq = [pkt.ack, pkt.seq + 1]
elif pkt.flags == 16: # ACK
assert (pkt.src == (self.cli or pkt.src))
self.cli, self.srv = pkt.src, pkt.dst
- self.seq = [pkt.seq, pkt.ack + 1]
+ self.seq = [pkt.seq, pkt.ack]
self.handle = self.handle_packet
else:
raise ValueError('Weird flags in handshake: %d' % pkt.flags)
@@ -195,23 +241,15 @@ class TCP_Session:
# Does this ACK after the last output sequence number?
if pkt.ack > self.seq[xdi]:
+ ret = Chunk(self.seq[xdi])
pending = self.pending[xdi]
- seq = self.seq[xdi]
- ret = DropStringIO()
- keys = pending.keys()
- for key in keys:
+ for key in pending.keys():
if key >= pkt.ack:
continue
-
- pkt2 = pending[key]
+ ret.add(pending[key])
del pending[key]
-
- ret.seek(pkt2.seq - seq)
- ret.write(pkt2.payload)
self.seq[xdi] = pkt.ack
- ret = (xdi, ret.getvalue())
-
# If it has a payload, stick it into pending
if pkt.payload:
self.pending[idx][pkt.seq] = pkt
@@ -289,6 +327,14 @@ class HTTP_side:
def resequence(pc):
+ """Re-sequence from a pcap stream.
+
+ >>> p = pcap.open('whatever.pcap')
+ >>> for chunk in resequence(p):
+ ... print `chunk`
+
+ """
+
sessions = {}
for pkt in pc:
f = Frame(pkt)
@@ -298,9 +344,9 @@ def resequence(pc):
if not s:
s = TCP_Session()
sessions[f.hash] = s
- r = s.handle(f)
- if r:
- yield (f, r)
+ chunk = s.handle(f)
+ if chunk:
+ yield chunk
def process_http(filename):