mirror of https://github.com/dirtbags/netarch.git
Some Python crap that I use
This commit is contained in:
commit
bc0165d5ab
|
@ -0,0 +1,10 @@
|
|||
from StringIO import StringIO
|
||||
|
||||
class bitvector:
|
||||
def __init__(self, txt):
|
||||
self.txt = txt
|
||||
|
||||
def __getitem__(self, idx):
|
||||
base, offset = divmod(idx, 8)
|
||||
o = ord(self.txt[base])
|
||||
return (o >> offset) & 1
|
|
@ -0,0 +1,368 @@
|
|||
#! /usr/bin/python
|
||||
|
||||
## Codebreaking tools
|
||||
## 2007 Neale Pickett
|
||||
## I should get an LAUR for this so we can share it.
|
||||
|
||||
from sets import Set
|
||||
from pprint import pprint
|
||||
|
||||
# From Wikipedia article "Letter Frequencies"
|
||||
english_frequency = {'A': .08167,
|
||||
'B': .01492,
|
||||
'C': .02782,
|
||||
'D': .04253,
|
||||
'E': .12702,
|
||||
'F': .02228,
|
||||
'G': .02015,
|
||||
'H': .06094,
|
||||
'I': .06966,
|
||||
'J': .00153,
|
||||
'K': .00772,
|
||||
'L': .04025,
|
||||
'M': .02406,
|
||||
'N': .06749,
|
||||
'O': .07507,
|
||||
'P': .01929,
|
||||
'Q': .00095,
|
||||
'R': .05987,
|
||||
'S': .06327,
|
||||
'T': .09056,
|
||||
'U': .02758,
|
||||
'V': .00978,
|
||||
'W': .02360,
|
||||
'X': .00150,
|
||||
'Y': .01974,
|
||||
'Z': .00074}
|
||||
|
||||
##
|
||||
## Binary stuff
|
||||
##
|
||||
|
||||
def bin(i):
|
||||
"""Return the binary representation of i"""
|
||||
|
||||
r = []
|
||||
while i > 0:
|
||||
r.append(i % 2)
|
||||
i = i >> 1
|
||||
r.reverse()
|
||||
s = ''.join(str(x) for x in r)
|
||||
return s
|
||||
|
||||
class bitvector:
|
||||
def __init__(self, i, length=None):
|
||||
if type(i) == type(''):
|
||||
self._val = 0
|
||||
for c in i:
|
||||
self._val <<= 8
|
||||
self._val += ord(c)
|
||||
if length is not None:
|
||||
self._len = length
|
||||
else:
|
||||
self._len = len(i) * 8
|
||||
else:
|
||||
self._val = i
|
||||
if length is not None:
|
||||
self._len = length
|
||||
else:
|
||||
self._len = 0
|
||||
while i > 0:
|
||||
i >>= 1
|
||||
self._len += 1
|
||||
|
||||
def __len__(self):
|
||||
return self._len
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if idx > self._len:
|
||||
raise IndexError()
|
||||
idx = self._len - idx
|
||||
return int((self._val >> idx) & 1)
|
||||
|
||||
def __getslice__(self, a, b):
|
||||
if b > self._len:
|
||||
b = self._len
|
||||
i = self._val >> (self._len - b)
|
||||
l = b - a
|
||||
mask = (1 << l) - 1
|
||||
return bitvector(i & mask, length=l)
|
||||
|
||||
def __iter__(self):
|
||||
v = self._val
|
||||
for i in xrange(self._len):
|
||||
yield int(v & 1)
|
||||
v >>= 1
|
||||
|
||||
def __str__(self):
|
||||
r = ''
|
||||
v = self._val
|
||||
i = self._len
|
||||
while i > 8:
|
||||
o = ((v >> (i - 8)) & 0xFF)
|
||||
r += chr(o)
|
||||
i -= 8
|
||||
if i > 0:
|
||||
o = v & ((1 << i) - 1)
|
||||
r += chr(o)
|
||||
return r
|
||||
|
||||
def __int__(self):
|
||||
return self._val
|
||||
|
||||
def __repr__(self):
|
||||
l = list(self)
|
||||
l.reverse()
|
||||
return '<bitvector ' + ''.join(str(x) for x in l) + '>'
|
||||
|
||||
##
|
||||
## Statistical stuff
|
||||
##
|
||||
|
||||
|
||||
def basedist(l):
|
||||
"""Return a string of length l, with standard distribution of letters"""
|
||||
|
||||
out = ""
|
||||
for c, n in english_frequency.iteritems():
|
||||
out += c * int(n * l)
|
||||
return out
|
||||
|
||||
|
||||
##
|
||||
## Factoring stuff
|
||||
##
|
||||
|
||||
|
||||
def isPrime(number):
|
||||
for x in range(2, number):
|
||||
if number % x == 0:
|
||||
return True
|
||||
else:
|
||||
if number - 1 == x:
|
||||
return False
|
||||
|
||||
def smallestFactor(number):
|
||||
for x in range(2, number):
|
||||
if number % x == 0:
|
||||
return x
|
||||
|
||||
def factor(number):
|
||||
"""Return prime factors for number"""
|
||||
|
||||
factors = []
|
||||
while isPrime(number):
|
||||
newFactor = smallestFactor(number)
|
||||
factors.append(newFactor)
|
||||
number = number / newFactor
|
||||
factors.append(number)
|
||||
return factors
|
||||
|
||||
|
||||
##
|
||||
## Statistical analysis
|
||||
##
|
||||
|
||||
def where(haystack, needle):
|
||||
ret = []
|
||||
while True:
|
||||
pos = haystack.find(needle)
|
||||
if pos == -1:
|
||||
break
|
||||
ret.append(pos)
|
||||
haystack = haystack[pos + 1:]
|
||||
return ret
|
||||
|
||||
|
||||
def ngrams(n, haystack, min=2, repeats=False):
|
||||
acc = {}
|
||||
for i in range(len(haystack)):
|
||||
rtxt = haystack[i:]
|
||||
needle = rtxt[:n]
|
||||
if repeats:
|
||||
c = needle[0]
|
||||
for d in needle:
|
||||
if d != c:
|
||||
break
|
||||
if d != c:
|
||||
continue
|
||||
if not acc.has_key(needle):
|
||||
found = where(rtxt, needle)
|
||||
if len(found) >= min:
|
||||
acc[needle] = found
|
||||
return acc
|
||||
|
||||
|
||||
def freq(txt):
|
||||
return ngrams(1, txt, min=0)
|
||||
|
||||
def bigrams(txt):
|
||||
return ngrams(2, txt)
|
||||
|
||||
def trigrams(txt):
|
||||
return ngrams(3, txt)
|
||||
|
||||
|
||||
def freqgraph(f):
|
||||
def cmp2(x, y):
|
||||
a = x[1]
|
||||
b = y[1]
|
||||
if a > b:
|
||||
return -1
|
||||
elif a < b:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
items = []
|
||||
for c,n in f.iteritems():
|
||||
if type(n) != type(0):
|
||||
n = len(n)
|
||||
items.append((c,n))
|
||||
items.sort(cmp2)
|
||||
|
||||
for c,n in items:
|
||||
print '%s: %s' % (c, '#' * n)
|
||||
|
||||
def neighbors(txt):
|
||||
out = {}
|
||||
for dg, w in bigrams(txt).iteritems():
|
||||
count = len(w)
|
||||
|
||||
n = out.get(dg[0], Set())
|
||||
n.add(dg[1])
|
||||
out[dg[0]] = n
|
||||
|
||||
n = out.get(dg[1], Set())
|
||||
n.add(dg[0])
|
||||
out[dg[1]] = n
|
||||
return out
|
||||
|
||||
|
||||
##
|
||||
## Brute force tools
|
||||
##
|
||||
|
||||
def rot(n, txt):
|
||||
"""Caesar cipher"""
|
||||
|
||||
out = ""
|
||||
for c in txt:
|
||||
if c.isalpha():
|
||||
o = ord(c) + n
|
||||
if ((c.islower() and o > ord('z')) or
|
||||
(c.isupper() and o > ord('Z'))):
|
||||
o -= 26
|
||||
out += chr(o)
|
||||
else:
|
||||
out += c
|
||||
return out
|
||||
|
||||
|
||||
def caesars(txt):
|
||||
return [rot(i, txt) for i in range(26)]
|
||||
|
||||
# Tabula recta
|
||||
tabula_recta = caesars('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
||||
|
||||
|
||||
def xor(n, txt):
|
||||
out = ''
|
||||
for c in txt:
|
||||
o = ord(c) ^ n
|
||||
out += chr(o)
|
||||
return out
|
||||
|
||||
def xors(txt):
|
||||
ret = []
|
||||
for n in range(256):
|
||||
ret.append(xor(n, txt))
|
||||
return ret
|
||||
|
||||
|
||||
def add(n, txt):
|
||||
out = ''
|
||||
for c in txt:
|
||||
o = (ord(c) + 256 + n) % 256 # Add 256 in case n < 0
|
||||
out += chr(o)
|
||||
return out
|
||||
|
||||
def adds(txt):
|
||||
ret = []
|
||||
for n in range(256):
|
||||
ret.append(add(n, txt))
|
||||
return ret
|
||||
|
||||
|
||||
##
|
||||
## Grep-like things within dictionary
|
||||
##
|
||||
def matches(str, tgt):
|
||||
if len(str) != len(tgt):
|
||||
return False
|
||||
map = {}
|
||||
rmap = {}
|
||||
for i in range(len(str)):
|
||||
s = str[i]
|
||||
t = tgt[i]
|
||||
m = map.get(s)
|
||||
if m and m != t:
|
||||
return False
|
||||
map[s] = t
|
||||
|
||||
r = rmap.get(t)
|
||||
if r and r != s:
|
||||
return False
|
||||
rmap[t] = s
|
||||
|
||||
return True
|
||||
|
||||
def guess(pattern):
|
||||
ret = []
|
||||
|
||||
pattern = pattern.lower()
|
||||
words = file('/usr/share/dict/words')
|
||||
for word in words:
|
||||
word = word.strip()
|
||||
word = word.lower()
|
||||
if matches(word, pattern):
|
||||
print word
|
||||
return ret
|
||||
|
||||
##
|
||||
## Overview tools
|
||||
##
|
||||
|
||||
def summary(txt):
|
||||
print "Length", len(txt)
|
||||
print "Factors", factor(len(txt))
|
||||
print
|
||||
print "Frequency (etaoin shrdlcu)"
|
||||
freqgraph(freq(txt))
|
||||
print
|
||||
|
||||
print "Bigrams (th er on an re he in ed nd ha at en es of or"
|
||||
print " nt ea ti to it st io le is ou ar as de rt ve)"
|
||||
freqgraph(bigrams(txt))
|
||||
print
|
||||
|
||||
print "Trigrams (the and tha ent ion tio for nde has nce edt"
|
||||
print " tis oft sth men)"
|
||||
freqgraph(trigrams(txt))
|
||||
print
|
||||
|
||||
# 4-letter words: that with have this will your from they know
|
||||
# want been good much some time
|
||||
|
||||
print "Repeats (ss ee tt ff ll mm oo)"
|
||||
freqgraph(ngrams(2, txt, min=1, repeats=True))
|
||||
print
|
||||
|
||||
print "Unique neighbors"
|
||||
pprint(neighbors(txt))
|
||||
print
|
||||
|
||||
|
||||
def replace(txt, orig, repl):
|
||||
for o, r in zip(orig, repl):
|
||||
txt = txt.replace(o, r)
|
||||
return txt
|
|
@ -0,0 +1,190 @@
|
|||
#! /usr/bin/python
|
||||
|
||||
import scapy
|
||||
import StringIO
|
||||
|
||||
IP = scapy.IP
|
||||
TCP = scapy.TCP
|
||||
Raw = scapy.Raw
|
||||
|
||||
drop_pad = 'DROP'
|
||||
|
||||
class DropStringIO(StringIO.StringIO):
|
||||
"""StringIO with different padding.
|
||||
|
||||
If you write beyond the length of the current string, this pads with
|
||||
the string 'Drop', and not NULs. This should make it more obvious
|
||||
that you've had a drop. I hope.
|
||||
|
||||
"""
|
||||
|
||||
padstr = 'Drop'
|
||||
|
||||
def write(self, s):
|
||||
if self.pos > self.len:
|
||||
bytes = self.pos - self.len
|
||||
pad = self.padstr * ((bytes / len(self.padstr)) + 1)
|
||||
self.buflist.append(pad[:bytes])
|
||||
self.len = self.pos
|
||||
return StringIO.StringIO.write(self, s)
|
||||
|
||||
|
||||
class TCP_Session:
|
||||
"""Iterable TCP session resequencer.
|
||||
|
||||
You initialize it with something with a read() method that returns a
|
||||
new ethernet frame. For instance, an object from my py-pcap module.
|
||||
|
||||
The read() method returns (srv, chunk), where srv is 1 if this came
|
||||
from the server, and chunk is a chunk of data.
|
||||
|
||||
This returns things in sequence. So you get both sides of the
|
||||
conversation in the order that they happened.
|
||||
|
||||
Doesn't (yet) handle fragments or dropped packets. Does handle out
|
||||
of order packets.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pc):
|
||||
self.pc = pc
|
||||
|
||||
self.cli = None
|
||||
self.srv = None
|
||||
self.seq = [None, None]
|
||||
self.pending = [{}, {}]
|
||||
self.frames = 0
|
||||
|
||||
self.read_handshake()
|
||||
|
||||
def read_packet(self):
|
||||
p = self.pc.read()
|
||||
if not p:
|
||||
return
|
||||
return scapy.Ether(p[2])
|
||||
|
||||
def read_handshake(self):
|
||||
# Read SYN
|
||||
pkt = self.read_packet()
|
||||
assert (pkt[TCP].flags == 2) # XXX: There's got to be a better way
|
||||
self.cli = (pkt[IP].src, pkt.sport)
|
||||
self.srv = (pkt[IP].dst, pkt.dport)
|
||||
self.seq[0] = pkt.seq + 1
|
||||
|
||||
# Read SYN-ACK
|
||||
while True:
|
||||
pkt = self.read_packet()
|
||||
if ((pkt[IP].src == self.srv[0]) and
|
||||
(pkt[TCP].flags == 18)):
|
||||
self.seq[1] = pkt.seq + 1
|
||||
break
|
||||
|
||||
# Read ACK
|
||||
while True:
|
||||
pkt = self.read_packet()
|
||||
if ((pkt[IP].src == self.cli[0]) and
|
||||
(pkt[TCP].flags == 16)):
|
||||
assert (self.seq[0] == pkt.seq)
|
||||
break
|
||||
|
||||
self.frames = 3
|
||||
|
||||
def __iter__(self):
|
||||
while True:
|
||||
pkt = self.read_packet()
|
||||
if not pkt:
|
||||
return
|
||||
self.frames += 1
|
||||
|
||||
# Which way is this going?
|
||||
idx = int(pkt[IP].src == self.srv[0])
|
||||
xdi = 1 - idx
|
||||
|
||||
# Does this ACK after the last output sequence number?
|
||||
if pkt.ack > self.seq[xdi]:
|
||||
pending = self.pending[xdi]
|
||||
seq = self.seq[xdi]
|
||||
ret = DropStringIO()
|
||||
keys = pending.keys()
|
||||
for key in keys:
|
||||
if key >= pkt.ack:
|
||||
continue
|
||||
|
||||
pkt2 = pending[key]
|
||||
del pending[key]
|
||||
|
||||
ret.seek(pkt2.seq - seq)
|
||||
ret.write(pkt2[TCP][Raw].load)
|
||||
self.seq[xdi] = pkt.ack
|
||||
|
||||
yield (xdi, ret.getvalue())
|
||||
|
||||
# If it has a payload, stick it into pending
|
||||
if hasattr(pkt[TCP][Raw], 'load'):
|
||||
self.pending[idx][pkt.seq] = pkt
|
||||
self.done()
|
||||
|
||||
def done(self):
|
||||
"""Warn about any unhandled packets"""
|
||||
|
||||
for p in self.pending:
|
||||
k = p.keys()
|
||||
if k:
|
||||
k.sort()
|
||||
print 'unused packets:', k
|
||||
return
|
||||
|
||||
|
||||
|
||||
class HTTP_side:
|
||||
"""One side of an HTTP transaction."""
|
||||
|
||||
def __init__(self):
|
||||
self.buf = ''
|
||||
self.first = ''
|
||||
self.in_headers = True
|
||||
self.headers = {}
|
||||
self.pending_data = 0
|
||||
self.data = ''
|
||||
self.complete = False
|
||||
|
||||
def __repr__(self):
|
||||
return '<HTTP_side %r>' % self.first
|
||||
|
||||
def process(self, chunk):
|
||||
"""Returns any unprocessed part of the chunk, parts which go to
|
||||
the next utterance."""
|
||||
|
||||
chunk = chunk + self.buf
|
||||
while self.in_headers and chunk:
|
||||
try:
|
||||
line, chunk = chunk.split('\n', 1)
|
||||
except ValueError:
|
||||
self.buf = chunk
|
||||
return ''
|
||||
self.process_header_line(line)
|
||||
self.buf = ''
|
||||
if self.pending_data:
|
||||
d = chunk[:self.pending_data]
|
||||
chunk = chunk[self.pending_data:]
|
||||
self.data += d
|
||||
self.pending_data -= len(d) # May set to 0
|
||||
if not self.pending_data:
|
||||
self.complete = True
|
||||
return chunk
|
||||
|
||||
def process_header_line(self, line):
|
||||
if not line.strip():
|
||||
self.in_headers = False
|
||||
return
|
||||
try:
|
||||
k,v = line.split(':', 1)
|
||||
except ValueError:
|
||||
if self.first:
|
||||
raise ValueError(('Not a header', line))
|
||||
else:
|
||||
self.first += line
|
||||
return
|
||||
self.headers[k] = v
|
||||
if k.lower() == 'content-length':
|
||||
self.pending_data = int(v)
|
|
@ -0,0 +1,8 @@
|
|||
try:
|
||||
import readline
|
||||
except ImportError:
|
||||
print "Module readline not available."
|
||||
else:
|
||||
import rlcompleter
|
||||
readline.parse_and_bind("tab: complete")
|
||||
|
Loading…
Reference in New Issue