Some Python crap that I use

This commit is contained in:
Neale Pickett 2007-11-12 21:11:25 -07:00
commit bc0165d5ab
4 changed files with 576 additions and 0 deletions

10
bitarray.py Normal file
View File

@ -0,0 +1,10 @@
from StringIO import StringIO
class bitvector:
def __init__(self, txt):
self.txt = txt
def __getitem__(self, idx):
base, offset = divmod(idx, 8)
o = ord(self.txt[base])
return (o >> offset) & 1

368
codebreak.py Normal file
View File

@ -0,0 +1,368 @@
#! /usr/bin/python
## Codebreaking tools
## 2007 Neale Pickett
## I should get an LAUR for this so we can share it.
from sets import Set
from pprint import pprint
# From Wikipedia article "Letter Frequencies"
english_frequency = {'A': .08167,
'B': .01492,
'C': .02782,
'D': .04253,
'E': .12702,
'F': .02228,
'G': .02015,
'H': .06094,
'I': .06966,
'J': .00153,
'K': .00772,
'L': .04025,
'M': .02406,
'N': .06749,
'O': .07507,
'P': .01929,
'Q': .00095,
'R': .05987,
'S': .06327,
'T': .09056,
'U': .02758,
'V': .00978,
'W': .02360,
'X': .00150,
'Y': .01974,
'Z': .00074}
##
## Binary stuff
##
def bin(i):
"""Return the binary representation of i"""
r = []
while i > 0:
r.append(i % 2)
i = i >> 1
r.reverse()
s = ''.join(str(x) for x in r)
return s
class bitvector:
def __init__(self, i, length=None):
if type(i) == type(''):
self._val = 0
for c in i:
self._val <<= 8
self._val += ord(c)
if length is not None:
self._len = length
else:
self._len = len(i) * 8
else:
self._val = i
if length is not None:
self._len = length
else:
self._len = 0
while i > 0:
i >>= 1
self._len += 1
def __len__(self):
return self._len
def __getitem__(self, idx):
if idx > self._len:
raise IndexError()
idx = self._len - idx
return int((self._val >> idx) & 1)
def __getslice__(self, a, b):
if b > self._len:
b = self._len
i = self._val >> (self._len - b)
l = b - a
mask = (1 << l) - 1
return bitvector(i & mask, length=l)
def __iter__(self):
v = self._val
for i in xrange(self._len):
yield int(v & 1)
v >>= 1
def __str__(self):
r = ''
v = self._val
i = self._len
while i > 8:
o = ((v >> (i - 8)) & 0xFF)
r += chr(o)
i -= 8
if i > 0:
o = v & ((1 << i) - 1)
r += chr(o)
return r
def __int__(self):
return self._val
def __repr__(self):
l = list(self)
l.reverse()
return '<bitvector ' + ''.join(str(x) for x in l) + '>'
##
## Statistical stuff
##
def basedist(l):
"""Return a string of length l, with standard distribution of letters"""
out = ""
for c, n in english_frequency.iteritems():
out += c * int(n * l)
return out
##
## Factoring stuff
##
def isPrime(number):
for x in range(2, number):
if number % x == 0:
return True
else:
if number - 1 == x:
return False
def smallestFactor(number):
for x in range(2, number):
if number % x == 0:
return x
def factor(number):
"""Return prime factors for number"""
factors = []
while isPrime(number):
newFactor = smallestFactor(number)
factors.append(newFactor)
number = number / newFactor
factors.append(number)
return factors
##
## Statistical analysis
##
def where(haystack, needle):
ret = []
while True:
pos = haystack.find(needle)
if pos == -1:
break
ret.append(pos)
haystack = haystack[pos + 1:]
return ret
def ngrams(n, haystack, min=2, repeats=False):
acc = {}
for i in range(len(haystack)):
rtxt = haystack[i:]
needle = rtxt[:n]
if repeats:
c = needle[0]
for d in needle:
if d != c:
break
if d != c:
continue
if not acc.has_key(needle):
found = where(rtxt, needle)
if len(found) >= min:
acc[needle] = found
return acc
def freq(txt):
return ngrams(1, txt, min=0)
def bigrams(txt):
return ngrams(2, txt)
def trigrams(txt):
return ngrams(3, txt)
def freqgraph(f):
def cmp2(x, y):
a = x[1]
b = y[1]
if a > b:
return -1
elif a < b:
return 1
else:
return 0
items = []
for c,n in f.iteritems():
if type(n) != type(0):
n = len(n)
items.append((c,n))
items.sort(cmp2)
for c,n in items:
print '%s: %s' % (c, '#' * n)
def neighbors(txt):
out = {}
for dg, w in bigrams(txt).iteritems():
count = len(w)
n = out.get(dg[0], Set())
n.add(dg[1])
out[dg[0]] = n
n = out.get(dg[1], Set())
n.add(dg[0])
out[dg[1]] = n
return out
##
## Brute force tools
##
def rot(n, txt):
"""Caesar cipher"""
out = ""
for c in txt:
if c.isalpha():
o = ord(c) + n
if ((c.islower() and o > ord('z')) or
(c.isupper() and o > ord('Z'))):
o -= 26
out += chr(o)
else:
out += c
return out
def caesars(txt):
return [rot(i, txt) for i in range(26)]
# Tabula recta
tabula_recta = caesars('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
def xor(n, txt):
out = ''
for c in txt:
o = ord(c) ^ n
out += chr(o)
return out
def xors(txt):
ret = []
for n in range(256):
ret.append(xor(n, txt))
return ret
def add(n, txt):
out = ''
for c in txt:
o = (ord(c) + 256 + n) % 256 # Add 256 in case n < 0
out += chr(o)
return out
def adds(txt):
ret = []
for n in range(256):
ret.append(add(n, txt))
return ret
##
## Grep-like things within dictionary
##
def matches(str, tgt):
if len(str) != len(tgt):
return False
map = {}
rmap = {}
for i in range(len(str)):
s = str[i]
t = tgt[i]
m = map.get(s)
if m and m != t:
return False
map[s] = t
r = rmap.get(t)
if r and r != s:
return False
rmap[t] = s
return True
def guess(pattern):
ret = []
pattern = pattern.lower()
words = file('/usr/share/dict/words')
for word in words:
word = word.strip()
word = word.lower()
if matches(word, pattern):
print word
return ret
##
## Overview tools
##
def summary(txt):
print "Length", len(txt)
print "Factors", factor(len(txt))
print
print "Frequency (etaoin shrdlcu)"
freqgraph(freq(txt))
print
print "Bigrams (th er on an re he in ed nd ha at en es of or"
print " nt ea ti to it st io le is ou ar as de rt ve)"
freqgraph(bigrams(txt))
print
print "Trigrams (the and tha ent ion tio for nde has nce edt"
print " tis oft sth men)"
freqgraph(trigrams(txt))
print
# 4-letter words: that with have this will your from they know
# want been good much some time
print "Repeats (ss ee tt ff ll mm oo)"
freqgraph(ngrams(2, txt, min=1, repeats=True))
print
print "Unique neighbors"
pprint(neighbors(txt))
print
def replace(txt, orig, repl):
for o, r in zip(orig, repl):
txt = txt.replace(o, r)
return txt

190
resequence.py Executable file
View File

@ -0,0 +1,190 @@
#! /usr/bin/python
import scapy
import StringIO
IP = scapy.IP
TCP = scapy.TCP
Raw = scapy.Raw
drop_pad = 'DROP'
class DropStringIO(StringIO.StringIO):
"""StringIO with different padding.
If you write beyond the length of the current string, this pads with
the string 'Drop', and not NULs. This should make it more obvious
that you've had a drop. I hope.
"""
padstr = 'Drop'
def write(self, s):
if self.pos > self.len:
bytes = self.pos - self.len
pad = self.padstr * ((bytes / len(self.padstr)) + 1)
self.buflist.append(pad[:bytes])
self.len = self.pos
return StringIO.StringIO.write(self, s)
class TCP_Session:
"""Iterable TCP session resequencer.
You initialize it with something with a read() method that returns a
new ethernet frame. For instance, an object from my py-pcap module.
The read() method returns (srv, chunk), where srv is 1 if this came
from the server, and chunk is a chunk of data.
This returns things in sequence. So you get both sides of the
conversation in the order that they happened.
Doesn't (yet) handle fragments or dropped packets. Does handle out
of order packets.
"""
def __init__(self, pc):
self.pc = pc
self.cli = None
self.srv = None
self.seq = [None, None]
self.pending = [{}, {}]
self.frames = 0
self.read_handshake()
def read_packet(self):
p = self.pc.read()
if not p:
return
return scapy.Ether(p[2])
def read_handshake(self):
# Read SYN
pkt = self.read_packet()
assert (pkt[TCP].flags == 2) # XXX: There's got to be a better way
self.cli = (pkt[IP].src, pkt.sport)
self.srv = (pkt[IP].dst, pkt.dport)
self.seq[0] = pkt.seq + 1
# Read SYN-ACK
while True:
pkt = self.read_packet()
if ((pkt[IP].src == self.srv[0]) and
(pkt[TCP].flags == 18)):
self.seq[1] = pkt.seq + 1
break
# Read ACK
while True:
pkt = self.read_packet()
if ((pkt[IP].src == self.cli[0]) and
(pkt[TCP].flags == 16)):
assert (self.seq[0] == pkt.seq)
break
self.frames = 3
def __iter__(self):
while True:
pkt = self.read_packet()
if not pkt:
return
self.frames += 1
# Which way is this going?
idx = int(pkt[IP].src == self.srv[0])
xdi = 1 - idx
# Does this ACK after the last output sequence number?
if pkt.ack > self.seq[xdi]:
pending = self.pending[xdi]
seq = self.seq[xdi]
ret = DropStringIO()
keys = pending.keys()
for key in keys:
if key >= pkt.ack:
continue
pkt2 = pending[key]
del pending[key]
ret.seek(pkt2.seq - seq)
ret.write(pkt2[TCP][Raw].load)
self.seq[xdi] = pkt.ack
yield (xdi, ret.getvalue())
# If it has a payload, stick it into pending
if hasattr(pkt[TCP][Raw], 'load'):
self.pending[idx][pkt.seq] = pkt
self.done()
def done(self):
"""Warn about any unhandled packets"""
for p in self.pending:
k = p.keys()
if k:
k.sort()
print 'unused packets:', k
return
class HTTP_side:
"""One side of an HTTP transaction."""
def __init__(self):
self.buf = ''
self.first = ''
self.in_headers = True
self.headers = {}
self.pending_data = 0
self.data = ''
self.complete = False
def __repr__(self):
return '<HTTP_side %r>' % self.first
def process(self, chunk):
"""Returns any unprocessed part of the chunk, parts which go to
the next utterance."""
chunk = chunk + self.buf
while self.in_headers and chunk:
try:
line, chunk = chunk.split('\n', 1)
except ValueError:
self.buf = chunk
return ''
self.process_header_line(line)
self.buf = ''
if self.pending_data:
d = chunk[:self.pending_data]
chunk = chunk[self.pending_data:]
self.data += d
self.pending_data -= len(d) # May set to 0
if not self.pending_data:
self.complete = True
return chunk
def process_header_line(self, line):
if not line.strip():
self.in_headers = False
return
try:
k,v = line.split(':', 1)
except ValueError:
if self.first:
raise ValueError(('Not a header', line))
else:
self.first += line
return
self.headers[k] = v
if k.lower() == 'content-length':
self.pending_data = int(v)

8
startup.py Normal file
View File

@ -0,0 +1,8 @@
try:
import readline
except ImportError:
print "Module readline not available."
else:
import rlcompleter
readline.parse_and_bind("tab: complete")