commit bc0165d5abcede536161bed7742861c755b05dc5 Author: Neale Pickett Date: Mon Nov 12 21:11:25 2007 -0700 Some Python crap that I use diff --git a/bitarray.py b/bitarray.py new file mode 100644 index 0000000..79be6e0 --- /dev/null +++ b/bitarray.py @@ -0,0 +1,10 @@ +from StringIO import StringIO + +class bitvector: + def __init__(self, txt): + self.txt = txt + + def __getitem__(self, idx): + base, offset = divmod(idx, 8) + o = ord(self.txt[base]) + return (o >> offset) & 1 diff --git a/codebreak.py b/codebreak.py new file mode 100644 index 0000000..778d167 --- /dev/null +++ b/codebreak.py @@ -0,0 +1,368 @@ +#! /usr/bin/python + +## Codebreaking tools +## 2007 Neale Pickett +## I should get an LAUR for this so we can share it. + +from sets import Set +from pprint import pprint + +# From Wikipedia article "Letter Frequencies" +english_frequency = {'A': .08167, + 'B': .01492, + 'C': .02782, + 'D': .04253, + 'E': .12702, + 'F': .02228, + 'G': .02015, + 'H': .06094, + 'I': .06966, + 'J': .00153, + 'K': .00772, + 'L': .04025, + 'M': .02406, + 'N': .06749, + 'O': .07507, + 'P': .01929, + 'Q': .00095, + 'R': .05987, + 'S': .06327, + 'T': .09056, + 'U': .02758, + 'V': .00978, + 'W': .02360, + 'X': .00150, + 'Y': .01974, + 'Z': .00074} + +## +## Binary stuff +## + +def bin(i): + """Return the binary representation of i""" + + r = [] + while i > 0: + r.append(i % 2) + i = i >> 1 + r.reverse() + s = ''.join(str(x) for x in r) + return s + +class bitvector: + def __init__(self, i, length=None): + if type(i) == type(''): + self._val = 0 + for c in i: + self._val <<= 8 + self._val += ord(c) + if length is not None: + self._len = length + else: + self._len = len(i) * 8 + else: + self._val = i + if length is not None: + self._len = length + else: + self._len = 0 + while i > 0: + i >>= 1 + self._len += 1 + + def __len__(self): + return self._len + + def __getitem__(self, idx): + if idx > self._len: + raise IndexError() + idx = self._len - idx + return int((self._val >> idx) & 1) + + def __getslice__(self, a, b): + if b > self._len: + b = self._len + i = self._val >> (self._len - b) + l = b - a + mask = (1 << l) - 1 + return bitvector(i & mask, length=l) + + def __iter__(self): + v = self._val + for i in xrange(self._len): + yield int(v & 1) + v >>= 1 + + def __str__(self): + r = '' + v = self._val + i = self._len + while i > 8: + o = ((v >> (i - 8)) & 0xFF) + r += chr(o) + i -= 8 + if i > 0: + o = v & ((1 << i) - 1) + r += chr(o) + return r + + def __int__(self): + return self._val + + def __repr__(self): + l = list(self) + l.reverse() + return '' + +## +## Statistical stuff +## + + +def basedist(l): + """Return a string of length l, with standard distribution of letters""" + + out = "" + for c, n in english_frequency.iteritems(): + out += c * int(n * l) + return out + + +## +## Factoring stuff +## + + +def isPrime(number): + for x in range(2, number): + if number % x == 0: + return True + else: + if number - 1 == x: + return False + +def smallestFactor(number): + for x in range(2, number): + if number % x == 0: + return x + +def factor(number): + """Return prime factors for number""" + + factors = [] + while isPrime(number): + newFactor = smallestFactor(number) + factors.append(newFactor) + number = number / newFactor + factors.append(number) + return factors + + +## +## Statistical analysis +## + +def where(haystack, needle): + ret = [] + while True: + pos = haystack.find(needle) + if pos == -1: + break + ret.append(pos) + haystack = haystack[pos + 1:] + return ret + + +def ngrams(n, haystack, min=2, repeats=False): + acc = {} + for i in range(len(haystack)): + rtxt = haystack[i:] + needle = rtxt[:n] + if repeats: + c = needle[0] + for d in needle: + if d != c: + break + if d != c: + continue + if not acc.has_key(needle): + found = where(rtxt, needle) + if len(found) >= min: + acc[needle] = found + return acc + + +def freq(txt): + return ngrams(1, txt, min=0) + +def bigrams(txt): + return ngrams(2, txt) + +def trigrams(txt): + return ngrams(3, txt) + + +def freqgraph(f): + def cmp2(x, y): + a = x[1] + b = y[1] + if a > b: + return -1 + elif a < b: + return 1 + else: + return 0 + items = [] + for c,n in f.iteritems(): + if type(n) != type(0): + n = len(n) + items.append((c,n)) + items.sort(cmp2) + + for c,n in items: + print '%s: %s' % (c, '#' * n) + +def neighbors(txt): + out = {} + for dg, w in bigrams(txt).iteritems(): + count = len(w) + + n = out.get(dg[0], Set()) + n.add(dg[1]) + out[dg[0]] = n + + n = out.get(dg[1], Set()) + n.add(dg[0]) + out[dg[1]] = n + return out + + +## +## Brute force tools +## + +def rot(n, txt): + """Caesar cipher""" + + out = "" + for c in txt: + if c.isalpha(): + o = ord(c) + n + if ((c.islower() and o > ord('z')) or + (c.isupper() and o > ord('Z'))): + o -= 26 + out += chr(o) + else: + out += c + return out + + +def caesars(txt): + return [rot(i, txt) for i in range(26)] + +# Tabula recta +tabula_recta = caesars('ABCDEFGHIJKLMNOPQRSTUVWXYZ') + + +def xor(n, txt): + out = '' + for c in txt: + o = ord(c) ^ n + out += chr(o) + return out + +def xors(txt): + ret = [] + for n in range(256): + ret.append(xor(n, txt)) + return ret + + +def add(n, txt): + out = '' + for c in txt: + o = (ord(c) + 256 + n) % 256 # Add 256 in case n < 0 + out += chr(o) + return out + +def adds(txt): + ret = [] + for n in range(256): + ret.append(add(n, txt)) + return ret + + +## +## Grep-like things within dictionary +## +def matches(str, tgt): + if len(str) != len(tgt): + return False + map = {} + rmap = {} + for i in range(len(str)): + s = str[i] + t = tgt[i] + m = map.get(s) + if m and m != t: + return False + map[s] = t + + r = rmap.get(t) + if r and r != s: + return False + rmap[t] = s + + return True + +def guess(pattern): + ret = [] + + pattern = pattern.lower() + words = file('/usr/share/dict/words') + for word in words: + word = word.strip() + word = word.lower() + if matches(word, pattern): + print word + return ret + +## +## Overview tools +## + +def summary(txt): + print "Length", len(txt) + print "Factors", factor(len(txt)) + print + print "Frequency (etaoin shrdlcu)" + freqgraph(freq(txt)) + print + + print "Bigrams (th er on an re he in ed nd ha at en es of or" + print " nt ea ti to it st io le is ou ar as de rt ve)" + freqgraph(bigrams(txt)) + print + + print "Trigrams (the and tha ent ion tio for nde has nce edt" + print " tis oft sth men)" + freqgraph(trigrams(txt)) + print + + # 4-letter words: that with have this will your from they know + # want been good much some time + + print "Repeats (ss ee tt ff ll mm oo)" + freqgraph(ngrams(2, txt, min=1, repeats=True)) + print + + print "Unique neighbors" + pprint(neighbors(txt)) + print + + +def replace(txt, orig, repl): + for o, r in zip(orig, repl): + txt = txt.replace(o, r) + return txt diff --git a/resequence.py b/resequence.py new file mode 100755 index 0000000..5afb663 --- /dev/null +++ b/resequence.py @@ -0,0 +1,190 @@ +#! /usr/bin/python + +import scapy +import StringIO + +IP = scapy.IP +TCP = scapy.TCP +Raw = scapy.Raw + +drop_pad = 'DROP' + +class DropStringIO(StringIO.StringIO): + """StringIO with different padding. + + If you write beyond the length of the current string, this pads with + the string 'Drop', and not NULs. This should make it more obvious + that you've had a drop. I hope. + + """ + + padstr = 'Drop' + + def write(self, s): + if self.pos > self.len: + bytes = self.pos - self.len + pad = self.padstr * ((bytes / len(self.padstr)) + 1) + self.buflist.append(pad[:bytes]) + self.len = self.pos + return StringIO.StringIO.write(self, s) + + +class TCP_Session: + """Iterable TCP session resequencer. + + You initialize it with something with a read() method that returns a + new ethernet frame. For instance, an object from my py-pcap module. + + The read() method returns (srv, chunk), where srv is 1 if this came + from the server, and chunk is a chunk of data. + + This returns things in sequence. So you get both sides of the + conversation in the order that they happened. + + Doesn't (yet) handle fragments or dropped packets. Does handle out + of order packets. + + """ + + def __init__(self, pc): + self.pc = pc + + self.cli = None + self.srv = None + self.seq = [None, None] + self.pending = [{}, {}] + self.frames = 0 + + self.read_handshake() + + def read_packet(self): + p = self.pc.read() + if not p: + return + return scapy.Ether(p[2]) + + def read_handshake(self): + # Read SYN + pkt = self.read_packet() + assert (pkt[TCP].flags == 2) # XXX: There's got to be a better way + self.cli = (pkt[IP].src, pkt.sport) + self.srv = (pkt[IP].dst, pkt.dport) + self.seq[0] = pkt.seq + 1 + + # Read SYN-ACK + while True: + pkt = self.read_packet() + if ((pkt[IP].src == self.srv[0]) and + (pkt[TCP].flags == 18)): + self.seq[1] = pkt.seq + 1 + break + + # Read ACK + while True: + pkt = self.read_packet() + if ((pkt[IP].src == self.cli[0]) and + (pkt[TCP].flags == 16)): + assert (self.seq[0] == pkt.seq) + break + + self.frames = 3 + + def __iter__(self): + while True: + pkt = self.read_packet() + if not pkt: + return + self.frames += 1 + + # Which way is this going? + idx = int(pkt[IP].src == self.srv[0]) + xdi = 1 - idx + + # Does this ACK after the last output sequence number? + if pkt.ack > self.seq[xdi]: + pending = self.pending[xdi] + seq = self.seq[xdi] + ret = DropStringIO() + keys = pending.keys() + for key in keys: + if key >= pkt.ack: + continue + + pkt2 = pending[key] + del pending[key] + + ret.seek(pkt2.seq - seq) + ret.write(pkt2[TCP][Raw].load) + self.seq[xdi] = pkt.ack + + yield (xdi, ret.getvalue()) + + # If it has a payload, stick it into pending + if hasattr(pkt[TCP][Raw], 'load'): + self.pending[idx][pkt.seq] = pkt + self.done() + + def done(self): + """Warn about any unhandled packets""" + + for p in self.pending: + k = p.keys() + if k: + k.sort() + print 'unused packets:', k + return + + + +class HTTP_side: + """One side of an HTTP transaction.""" + + def __init__(self): + self.buf = '' + self.first = '' + self.in_headers = True + self.headers = {} + self.pending_data = 0 + self.data = '' + self.complete = False + + def __repr__(self): + return '' % self.first + + def process(self, chunk): + """Returns any unprocessed part of the chunk, parts which go to + the next utterance.""" + + chunk = chunk + self.buf + while self.in_headers and chunk: + try: + line, chunk = chunk.split('\n', 1) + except ValueError: + self.buf = chunk + return '' + self.process_header_line(line) + self.buf = '' + if self.pending_data: + d = chunk[:self.pending_data] + chunk = chunk[self.pending_data:] + self.data += d + self.pending_data -= len(d) # May set to 0 + if not self.pending_data: + self.complete = True + return chunk + + def process_header_line(self, line): + if not line.strip(): + self.in_headers = False + return + try: + k,v = line.split(':', 1) + except ValueError: + if self.first: + raise ValueError(('Not a header', line)) + else: + self.first += line + return + self.headers[k] = v + if k.lower() == 'content-length': + self.pending_data = int(v) diff --git a/startup.py b/startup.py new file mode 100644 index 0000000..26bd888 --- /dev/null +++ b/startup.py @@ -0,0 +1,8 @@ +try: + import readline +except ImportError: + print "Module readline not available." +else: + import rlcompleter + readline.parse_and_bind("tab: complete") +