Some Python crap that I use

2007-11-12 21:11:25 -07:00 · 2007-11-12 21:11:25 -07:00 · bc0165d5ab
commit bc0165d5ab
4 changed files with 576 additions and 0 deletions
--- a/bitarray.py
+++ b/bitarray.py
@ -0,0 +1,10 @@
+from StringIO import StringIO
+
+class bitvector:
+    def __init__(self, txt):
+        self.txt = txt
+
+    def __getitem__(self, idx):
+        base, offset = divmod(idx, 8)
+        o = ord(self.txt[base])
+        return (o >> offset) & 1
--- a/codebreak.py
+++ b/codebreak.py
@ -0,0 +1,368 @@
+#! /usr/bin/python
+
+## Codebreaking tools
+## 2007  Neale Pickett
+## I should get an LAUR for this so we can share it.
+
+from sets import Set
+from pprint import pprint
+
+# From Wikipedia article "Letter Frequencies"
+english_frequency = {'A': .08167,
+                     'B': .01492,
+                     'C': .02782,
+                     'D': .04253,
+                     'E': .12702,
+                     'F': .02228,
+                     'G': .02015,
+                     'H': .06094,
+                     'I': .06966,
+                     'J': .00153,
+                     'K': .00772,
+                     'L': .04025,
+                     'M': .02406,
+                     'N': .06749,
+                     'O': .07507,
+                     'P': .01929,
+                     'Q': .00095,
+                     'R': .05987,
+                     'S': .06327,
+                     'T': .09056,
+                     'U': .02758,
+                     'V': .00978,
+                     'W': .02360,
+                     'X': .00150,
+                     'Y': .01974,
+                     'Z': .00074}
+
+##
+## Binary stuff
+##
+
+def bin(i):
+    """Return the binary representation of i"""
+
+    r = []
+    while i > 0:
+        r.append(i % 2)
+        i = i >> 1
+    r.reverse()
+    s = ''.join(str(x) for x in r)
+    return s
+
+class bitvector:
+    def __init__(self, i, length=None):
+        if type(i) == type(''):
+            self._val = 0
+            for c in i:
+                self._val <<= 8
+                self._val += ord(c)
+            if length is not None:
+                self._len = length
+            else:
+                self._len = len(i) * 8
+        else:
+            self._val = i
+            if length is not None:
+                self._len = length
+            else:
+                self._len = 0
+                while i > 0:
+                    i >>= 1
+                    self._len += 1
+
+    def __len__(self):
+        return self._len
+
+    def __getitem__(self, idx):
+        if idx > self._len:
+            raise IndexError()
+        idx = self._len - idx
+        return int((self._val >> idx) & 1)
+
+    def __getslice__(self, a, b):
+        if b > self._len:
+            b = self._len
+        i = self._val >> (self._len - b)
+        l = b - a
+        mask = (1 << l) - 1
+        return bitvector(i & mask, length=l)
+
+    def __iter__(self):
+        v = self._val
+        for i in xrange(self._len):
+            yield int(v & 1)
+            v >>= 1
+
+    def __str__(self):
+        r = ''
+        v = self._val
+        i = self._len
+        while i > 8:
+            o = ((v >> (i - 8)) & 0xFF)
+            r += chr(o)
+            i -= 8
+        if i > 0:
+            o = v & ((1 << i) - 1)
+            r += chr(o)
+        return r
+
+    def __int__(self):
+        return self._val
+
+    def __repr__(self):
+        l = list(self)
+        l.reverse()
+        return '<bitvector ' + ''.join(str(x) for x in l) + '>'
+
+##
+## Statistical stuff
+##
+
+
+def basedist(l):
+    """Return a string of length l, with standard distribution of letters"""
+
+    out = ""
+    for c, n in english_frequency.iteritems():
+        out += c * int(n * l)
+    return out
+
+
+##
+## Factoring stuff
+##
+
+
+def isPrime(number):
+    for x in range(2, number):
+        if number % x == 0:
+            return True
+        else:
+            if number - 1 == x:
+                return False
+
+def smallestFactor(number):
+    for x in range(2, number):
+        if number % x == 0:
+            return x
+
+def factor(number):
+    """Return prime factors for number"""
+
+    factors = []
+    while isPrime(number):
+        newFactor = smallestFactor(number)
+        factors.append(newFactor)
+        number = number / newFactor
+    factors.append(number)
+    return factors
+
+
+##
+## Statistical analysis
+##
+
+def where(haystack, needle):
+    ret = []
+    while True:
+        pos = haystack.find(needle)
+        if pos == -1:
+            break
+        ret.append(pos)
+        haystack = haystack[pos + 1:]
+    return ret
+
+
+def ngrams(n, haystack, min=2, repeats=False):
+    acc = {}
+    for i in range(len(haystack)):
+        rtxt = haystack[i:]
+        needle = rtxt[:n]
+        if repeats:
+            c = needle[0]
+            for d in needle:
+                if d != c:
+                    break
+            if d != c:
+                continue
+        if not acc.has_key(needle):
+            found = where(rtxt, needle)
+            if len(found) >= min:
+                acc[needle] = found
+    return acc
+
+
+def freq(txt):
+    return ngrams(1, txt, min=0)
+
+def bigrams(txt):
+    return ngrams(2, txt)
+
+def trigrams(txt):
+    return ngrams(3, txt)
+
+
+def freqgraph(f):
+    def cmp2(x, y):
+        a = x[1]
+        b = y[1]
+        if a > b:
+            return -1
+        elif a < b:
+            return 1
+        else:
+            return 0
+    items = []
+    for c,n in f.iteritems():
+        if type(n) != type(0):
+            n = len(n)
+        items.append((c,n))
+    items.sort(cmp2)
+
+    for c,n in items:
+        print '%s: %s' % (c, '#' * n)
+
+def neighbors(txt):
+    out = {}
+    for dg, w in bigrams(txt).iteritems():
+        count = len(w)
+
+        n = out.get(dg[0], Set())
+        n.add(dg[1])
+        out[dg[0]] = n
+
+        n = out.get(dg[1], Set())
+        n.add(dg[0])
+        out[dg[1]] = n
+    return out
+
+
+##
+## Brute force tools
+##
+
+def rot(n, txt):
+    """Caesar cipher"""
+
+    out = ""
+    for c in txt:
+        if c.isalpha():
+            o = ord(c) + n
+            if ((c.islower() and o > ord('z')) or
+                (c.isupper() and o > ord('Z'))):
+                o -= 26
+            out += chr(o)
+        else:
+            out += c
+    return out
+
+
+def caesars(txt):
+    return [rot(i, txt) for i in range(26)]
+
+# Tabula recta
+tabula_recta = caesars('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+
+
+def xor(n, txt):
+    out = ''
+    for c in txt:
+        o = ord(c) ^ n
+        out += chr(o)
+    return out
+
+def xors(txt):
+    ret = []
+    for n in range(256):
+        ret.append(xor(n, txt))
+    return ret
+
+
+def add(n, txt):
+    out = ''
+    for c in txt:
+        o = (ord(c) + 256 + n) % 256    # Add 256 in case n < 0
+        out += chr(o)
+    return out
+
+def adds(txt):
+    ret = []
+    for n in range(256):
+        ret.append(add(n, txt))
+    return ret
+
+
+##
+## Grep-like things within dictionary
+##
+def matches(str, tgt):
+    if len(str) != len(tgt):
+        return False
+    map = {}
+    rmap = {}
+    for i in range(len(str)):
+        s = str[i]
+        t = tgt[i]
+        m = map.get(s)
+        if m and m != t:
+            return False
+        map[s] = t
+
+        r = rmap.get(t)
+        if r and r != s:
+            return False
+        rmap[t] = s
+
+    return True
+
+def guess(pattern):
+    ret = []
+
+    pattern = pattern.lower()
+    words = file('/usr/share/dict/words')
+    for word in words:
+        word = word.strip()
+        word = word.lower()
+        if matches(word, pattern):
+            print word
+    return ret
+
+##
+## Overview tools
+##
+
+def summary(txt):
+    print "Length", len(txt)
+    print "Factors", factor(len(txt))
+    print
+    print "Frequency (etaoin shrdlcu)"
+    freqgraph(freq(txt))
+    print
+
+    print "Bigrams (th er on an re he in ed nd ha at en es of or"
+    print "         nt ea ti to it st io le is ou ar as de rt ve)"
+    freqgraph(bigrams(txt))
+    print
+
+    print "Trigrams (the and tha ent ion tio for nde has nce edt"
+    print "          tis oft sth men)"
+    freqgraph(trigrams(txt))
+    print
+
+    # 4-letter words: that with have this will your from they know
+    #                 want been good much some time
+
+    print "Repeats (ss ee tt ff ll mm oo)"
+    freqgraph(ngrams(2, txt, min=1, repeats=True))
+    print
+
+    print "Unique neighbors"
+    pprint(neighbors(txt))
+    print
+
+
+def replace(txt, orig, repl):
+    for o, r in zip(orig, repl):
+        txt = txt.replace(o, r)
+    return txt
--- a/resequence.py
+++ b/resequence.py
@ -0,0 +1,190 @@
+#! /usr/bin/python
+
+import scapy
+import StringIO
+
+IP = scapy.IP
+TCP = scapy.TCP
+Raw = scapy.Raw
+
+drop_pad = 'DROP'
+
+class DropStringIO(StringIO.StringIO):
+    """StringIO with different padding.
+
+    If you write beyond the length of the current string, this pads with
+    the string 'Drop', and not NULs.  This should make it more obvious
+    that you've had a drop.  I hope.
+
+    """
+
+    padstr = 'Drop'
+
+    def write(self, s):
+        if self.pos > self.len:
+            bytes = self.pos - self.len
+            pad = self.padstr * ((bytes / len(self.padstr)) + 1)
+            self.buflist.append(pad[:bytes])
+            self.len = self.pos
+        return StringIO.StringIO.write(self, s)
+
+
+class TCP_Session:
+    """Iterable TCP session resequencer.
+
+    You initialize it with something with a read() method that returns a
+    new ethernet frame.  For instance, an object from my py-pcap module.
+
+    The read() method returns (srv, chunk), where srv is 1 if this came
+    from the server, and chunk is a chunk of data.
+
+    This returns things in sequence.  So you get both sides of the
+    conversation in the order that they happened.
+
+    Doesn't (yet) handle fragments or dropped packets.  Does handle out
+    of order packets.
+
+    """
+
+    def __init__(self, pc):
+        self.pc = pc
+
+        self.cli = None
+        self.srv = None
+        self.seq = [None, None]
+        self.pending = [{}, {}]
+        self.frames = 0
+
+        self.read_handshake()
+
+    def read_packet(self):
+        p = self.pc.read()
+        if not p:
+            return
+        return scapy.Ether(p[2])
+
+    def read_handshake(self):
+        # Read SYN
+        pkt = self.read_packet()
+        assert (pkt[TCP].flags == 2) # XXX: There's got to be a better way
+        self.cli = (pkt[IP].src, pkt.sport)
+        self.srv = (pkt[IP].dst, pkt.dport)
+        self.seq[0] = pkt.seq + 1
+
+        # Read SYN-ACK
+        while True:
+            pkt = self.read_packet()
+            if ((pkt[IP].src == self.srv[0]) and
+                (pkt[TCP].flags == 18)):
+                self.seq[1] = pkt.seq + 1
+                break
+
+        # Read ACK
+        while True:
+            pkt = self.read_packet()
+            if ((pkt[IP].src == self.cli[0]) and
+                (pkt[TCP].flags == 16)):
+                assert (self.seq[0] == pkt.seq)
+                break
+
+        self.frames = 3
+
+    def __iter__(self):
+        while True:
+            pkt = self.read_packet()
+            if not pkt:
+                return
+            self.frames += 1
+
+            # Which way is this going?
+            idx = int(pkt[IP].src == self.srv[0])
+            xdi = 1 - idx
+
+            # Does this ACK after the last output sequence number?
+            if pkt.ack > self.seq[xdi]:
+                pending = self.pending[xdi]
+                seq = self.seq[xdi]
+                ret = DropStringIO()
+                keys = pending.keys()
+                for key in keys:
+                    if key >= pkt.ack:
+                        continue
+
+                    pkt2 = pending[key]
+                    del pending[key]
+
+                    ret.seek(pkt2.seq - seq)
+                    ret.write(pkt2[TCP][Raw].load)
+                self.seq[xdi] = pkt.ack
+
+                yield (xdi, ret.getvalue())
+
+            # If it has a payload, stick it into pending
+            if hasattr(pkt[TCP][Raw], 'load'):
+                self.pending[idx][pkt.seq] = pkt
+        self.done()
+
+    def done(self):
+        """Warn about any unhandled packets"""
+
+        for p in self.pending:
+            k = p.keys()
+            if k:
+                k.sort()
+                print 'unused packets:', k
+        return
+
+
+
+class HTTP_side:
+    """One side of an HTTP transaction."""
+
+    def __init__(self):
+        self.buf = ''
+        self.first = ''
+        self.in_headers = True
+        self.headers = {}
+        self.pending_data = 0
+        self.data = ''
+        self.complete = False
+
+    def __repr__(self):
+        return '<HTTP_side %r>' % self.first
+
+    def process(self, chunk):
+        """Returns any unprocessed part of the chunk, parts which go to
+        the next utterance."""
+
+        chunk = chunk + self.buf
+        while self.in_headers and chunk:
+            try:
+                line, chunk = chunk.split('\n', 1)
+            except ValueError:
+                self.buf = chunk
+                return ''
+            self.process_header_line(line)
+        self.buf = ''
+        if self.pending_data:
+            d = chunk[:self.pending_data]
+            chunk = chunk[self.pending_data:]
+            self.data += d
+            self.pending_data -= len(d) # May set to 0
+        if not self.pending_data:
+            self.complete = True
+        return chunk
+
+    def process_header_line(self, line):
+        if not line.strip():
+            self.in_headers = False
+            return
+        try:
+            k,v = line.split(':', 1)
+        except ValueError:
+            if self.first:
+                raise ValueError(('Not a header', line))
+            else:
+                self.first += line
+                return
+        self.headers[k] = v
+        if k.lower() == 'content-length':
+            self.pending_data = int(v)
--- a/startup.py
+++ b/startup.py
@ -0,0 +1,8 @@
+try:
+    import readline
+except ImportError:
+    print "Module readline not available."
+else:
+    import rlcompleter
+    readline.parse_and_bind("tab: complete")
+