Some Python crap that I use

2007-11-12 21:11:25 -07:00 · 2007-11-12 21:11:25 -07:00 · bc0165d5ab
commit bc0165d5ab
4 changed files with 576 additions and 0 deletions
--- a/bitarray.py
+++ b/bitarray.py
@ -0,0 +1,10 @@
 from StringIO import StringIO
 class bitvector:
    def __init__(self, txt):
        self.txt = txt
    def __getitem__(self, idx):
        base, offset = divmod(idx, 8)
        o = ord(self.txt[base])
        return (o >> offset) & 1
--- a/codebreak.py
+++ b/codebreak.py
@ -0,0 +1,368 @@
 #! /usr/bin/python
 ## Codebreaking tools
 ## 2007  Neale Pickett
 ## I should get an LAUR for this so we can share it.
 from sets import Set
 from pprint import pprint
 # From Wikipedia article "Letter Frequencies"
 english_frequency = {'A': .08167,
                     'B': .01492,
                     'C': .02782,
                     'D': .04253,
                     'E': .12702,
                     'F': .02228,
                     'G': .02015,
                     'H': .06094,
                     'I': .06966,
                     'J': .00153,
                     'K': .00772,
                     'L': .04025,
                     'M': .02406,
                     'N': .06749,
                     'O': .07507,
                     'P': .01929,
                     'Q': .00095,
                     'R': .05987,
                     'S': .06327,
                     'T': .09056,
                     'U': .02758,
                     'V': .00978,
                     'W': .02360,
                     'X': .00150,
                     'Y': .01974,
                     'Z': .00074}
 ##
 ## Binary stuff
 ##
 def bin(i):
    """Return the binary representation of i"""
    r = []
    while i > 0:
        r.append(i % 2)
        i = i >> 1
    r.reverse()
    s = ''.join(str(x) for x in r)
    return s
 class bitvector:
    def __init__(self, i, length=None):
        if type(i) == type(''):
            self._val = 0
            for c in i:
                self._val <<= 8
                self._val += ord(c)
            if length is not None:
                self._len = length
            else:
                self._len = len(i) * 8
        else:
            self._val = i
            if length is not None:
                self._len = length
            else:
                self._len = 0
                while i > 0:
                    i >>= 1
                    self._len += 1
    def __len__(self):
        return self._len
    def __getitem__(self, idx):
        if idx > self._len:
            raise IndexError()
        idx = self._len - idx
        return int((self._val >> idx) & 1)
    def __getslice__(self, a, b):
        if b > self._len:
            b = self._len
        i = self._val >> (self._len - b)
        l = b - a
        mask = (1 << l) - 1
        return bitvector(i & mask, length=l)
    def __iter__(self):
        v = self._val
        for i in xrange(self._len):
            yield int(v & 1)
            v >>= 1
    def __str__(self):
        r = ''
        v = self._val
        i = self._len
        while i > 8:
            o = ((v >> (i - 8)) & 0xFF)
            r += chr(o)
            i -= 8
        if i > 0:
            o = v & ((1 << i) - 1)
            r += chr(o)
        return r
    def __int__(self):
        return self._val
    def __repr__(self):
        l = list(self)
        l.reverse()
        return '<bitvector ' + ''.join(str(x) for x in l) + '>'
 ##
 ## Statistical stuff
 ##
 def basedist(l):
    """Return a string of length l, with standard distribution of letters"""
    out = ""
    for c, n in english_frequency.iteritems():
        out += c * int(n * l)
    return out
 ##
 ## Factoring stuff
 ##
 def isPrime(number):
    for x in range(2, number):
        if number % x == 0:
            return True
        else:
            if number - 1 == x:
                return False
 def smallestFactor(number):
    for x in range(2, number):
        if number % x == 0:
            return x
 def factor(number):
    """Return prime factors for number"""
    factors = []
    while isPrime(number):
        newFactor = smallestFactor(number)
        factors.append(newFactor)
        number = number / newFactor
    factors.append(number)
    return factors
 ##
 ## Statistical analysis
 ##
 def where(haystack, needle):
    ret = []
    while True:
        pos = haystack.find(needle)
        if pos == -1:
            break
        ret.append(pos)
        haystack = haystack[pos + 1:]
    return ret
 def ngrams(n, haystack, min=2, repeats=False):
    acc = {}
    for i in range(len(haystack)):
        rtxt = haystack[i:]
        needle = rtxt[:n]
        if repeats:
            c = needle[0]
            for d in needle:
                if d != c:
                    break
            if d != c:
                continue
        if not acc.has_key(needle):
            found = where(rtxt, needle)
            if len(found) >= min:
                acc[needle] = found
    return acc
 def freq(txt):
    return ngrams(1, txt, min=0)
 def bigrams(txt):
    return ngrams(2, txt)
 def trigrams(txt):
    return ngrams(3, txt)
 def freqgraph(f):
    def cmp2(x, y):
        a = x[1]
        b = y[1]
        if a > b:
            return -1
        elif a < b:
            return 1
        else:
            return 0
    items = []
    for c,n in f.iteritems():
        if type(n) != type(0):
            n = len(n)
        items.append((c,n))
    items.sort(cmp2)
    for c,n in items:
        print '%s: %s' % (c, '#' * n)
 def neighbors(txt):
    out = {}
    for dg, w in bigrams(txt).iteritems():
        count = len(w)
        n = out.get(dg[0], Set())
        n.add(dg[1])
        out[dg[0]] = n
        n = out.get(dg[1], Set())
        n.add(dg[0])
        out[dg[1]] = n
    return out
 ##
 ## Brute force tools
 ##
 def rot(n, txt):
    """Caesar cipher"""
    out = ""
    for c in txt:
        if c.isalpha():
            o = ord(c) + n
            if ((c.islower() and o > ord('z')) or
                (c.isupper() and o > ord('Z'))):
                o -= 26
            out += chr(o)
        else:
            out += c
    return out
 def caesars(txt):
    return [rot(i, txt) for i in range(26)]
 # Tabula recta
 tabula_recta = caesars('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
 def xor(n, txt):
    out = ''
    for c in txt:
        o = ord(c) ^ n
        out += chr(o)
    return out
 def xors(txt):
    ret = []
    for n in range(256):
        ret.append(xor(n, txt))
    return ret
 def add(n, txt):
    out = ''
    for c in txt:
        o = (ord(c) + 256 + n) % 256    # Add 256 in case n < 0
        out += chr(o)
    return out
 def adds(txt):
    ret = []
    for n in range(256):
        ret.append(add(n, txt))
    return ret
 ##
 ## Grep-like things within dictionary
 ##
 def matches(str, tgt):
    if len(str) != len(tgt):
        return False
    map = {}
    rmap = {}
    for i in range(len(str)):
        s = str[i]
        t = tgt[i]
        m = map.get(s)
        if m and m != t:
            return False
        map[s] = t
        r = rmap.get(t)
        if r and r != s:
            return False
        rmap[t] = s
    return True
 def guess(pattern):
    ret = []
    pattern = pattern.lower()
    words = file('/usr/share/dict/words')
    for word in words:
        word = word.strip()
        word = word.lower()
        if matches(word, pattern):
            print word
    return ret
 ##
 ## Overview tools
 ##
 def summary(txt):
    print "Length", len(txt)
    print "Factors", factor(len(txt))
    print
    print "Frequency (etaoin shrdlcu)"
    freqgraph(freq(txt))
    print
    print "Bigrams (th er on an re he in ed nd ha at en es of or"
    print "         nt ea ti to it st io le is ou ar as de rt ve)"
    freqgraph(bigrams(txt))
    print
    print "Trigrams (the and tha ent ion tio for nde has nce edt"
    print "          tis oft sth men)"
    freqgraph(trigrams(txt))
    print
    # 4-letter words: that with have this will your from they know
    #                 want been good much some time
    print "Repeats (ss ee tt ff ll mm oo)"
    freqgraph(ngrams(2, txt, min=1, repeats=True))
    print
    print "Unique neighbors"
    pprint(neighbors(txt))
    print
 def replace(txt, orig, repl):
    for o, r in zip(orig, repl):
        txt = txt.replace(o, r)
    return txt
--- a/resequence.py
+++ b/resequence.py
@ -0,0 +1,190 @@
 #! /usr/bin/python
 import scapy
 import StringIO
 IP = scapy.IP
 TCP = scapy.TCP
 Raw = scapy.Raw
 drop_pad = 'DROP'
 class DropStringIO(StringIO.StringIO):
    """StringIO with different padding.
    If you write beyond the length of the current string, this pads with
    the string 'Drop', and not NULs.  This should make it more obvious
    that you've had a drop.  I hope.
    """
    padstr = 'Drop'
    def write(self, s):
        if self.pos > self.len:
            bytes = self.pos - self.len
            pad = self.padstr * ((bytes / len(self.padstr)) + 1)
            self.buflist.append(pad[:bytes])
            self.len = self.pos
        return StringIO.StringIO.write(self, s)
 class TCP_Session:
    """Iterable TCP session resequencer.
    You initialize it with something with a read() method that returns a
    new ethernet frame.  For instance, an object from my py-pcap module.
    The read() method returns (srv, chunk), where srv is 1 if this came
    from the server, and chunk is a chunk of data.
    This returns things in sequence.  So you get both sides of the
    conversation in the order that they happened.
    Doesn't (yet) handle fragments or dropped packets.  Does handle out
    of order packets.
    """
    def __init__(self, pc):
        self.pc = pc
        self.cli = None
        self.srv = None
        self.seq = [None, None]
        self.pending = [{}, {}]
        self.frames = 0
        self.read_handshake()
    def read_packet(self):
        p = self.pc.read()
        if not p:
            return
        return scapy.Ether(p[2])
    def read_handshake(self):
        # Read SYN
        pkt = self.read_packet()
        assert (pkt[TCP].flags == 2) # XXX: There's got to be a better way
        self.cli = (pkt[IP].src, pkt.sport)
        self.srv = (pkt[IP].dst, pkt.dport)
        self.seq[0] = pkt.seq + 1
        # Read SYN-ACK
        while True:
            pkt = self.read_packet()
            if ((pkt[IP].src == self.srv[0]) and
                (pkt[TCP].flags == 18)):
                self.seq[1] = pkt.seq + 1
                break
        # Read ACK
        while True:
            pkt = self.read_packet()
            if ((pkt[IP].src == self.cli[0]) and
                (pkt[TCP].flags == 16)):
                assert (self.seq[0] == pkt.seq)
                break
        self.frames = 3
    def __iter__(self):
        while True:
            pkt = self.read_packet()
            if not pkt:
                return
            self.frames += 1
            # Which way is this going?
            idx = int(pkt[IP].src == self.srv[0])
            xdi = 1 - idx
            # Does this ACK after the last output sequence number?
            if pkt.ack > self.seq[xdi]:
                pending = self.pending[xdi]
                seq = self.seq[xdi]
                ret = DropStringIO()
                keys = pending.keys()
                for key in keys:
                    if key >= pkt.ack:
                        continue
                    pkt2 = pending[key]
                    del pending[key]
                    ret.seek(pkt2.seq - seq)
                    ret.write(pkt2[TCP][Raw].load)
                self.seq[xdi] = pkt.ack
                yield (xdi, ret.getvalue())
            # If it has a payload, stick it into pending
            if hasattr(pkt[TCP][Raw], 'load'):
                self.pending[idx][pkt.seq] = pkt
        self.done()
    def done(self):
        """Warn about any unhandled packets"""
        for p in self.pending:
            k = p.keys()
            if k:
                k.sort()
                print 'unused packets:', k
        return
 class HTTP_side:
    """One side of an HTTP transaction."""
    def __init__(self):
        self.buf = ''
        self.first = ''
        self.in_headers = True
        self.headers = {}
        self.pending_data = 0
        self.data = ''
        self.complete = False
    def __repr__(self):
        return '<HTTP_side %r>' % self.first
    def process(self, chunk):
        """Returns any unprocessed part of the chunk, parts which go to
        the next utterance."""
        chunk = chunk + self.buf
        while self.in_headers and chunk:
            try:
                line, chunk = chunk.split('\n', 1)
            except ValueError:
                self.buf = chunk
                return ''
            self.process_header_line(line)
        self.buf = ''
        if self.pending_data:
            d = chunk[:self.pending_data]
            chunk = chunk[self.pending_data:]
            self.data += d
            self.pending_data -= len(d) # May set to 0
        if not self.pending_data:
            self.complete = True
        return chunk
    def process_header_line(self, line):
        if not line.strip():
            self.in_headers = False
            return
        try:
            k,v = line.split(':', 1)
        except ValueError:
            if self.first:
                raise ValueError(('Not a header', line))
            else:
                self.first += line
                return
        self.headers[k] = v
        if k.lower() == 'content-length':
            self.pending_data = int(v)
--- a/startup.py
+++ b/startup.py
@ -0,0 +1,8 @@
 try:
    import readline
 except ImportError:
    print "Module readline not available."
 else:
    import rlcompleter
    readline.parse_and_bind("tab: complete")