From 833edb41358fdca8e59bf03e2e5917ab0756a34d Mon Sep 17 00:00:00 2001 From: Neale Pickett Date: Tue, 18 Aug 2009 12:23:13 -0600 Subject: [PATCH] UTF-8 symbols in hexdump --- __init__.py | 109 +++++++++++++++++++++++++++++++++++++++++----------- gapstr.py | 39 +++++++++++++++---- 2 files changed, 118 insertions(+), 30 deletions(-) diff --git a/__init__.py b/__init__.py index 1a7ebdf..02bb96f 100755 --- a/__init__.py +++ b/__init__.py @@ -6,7 +6,75 @@ import sys import struct -printable = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()[]{}`~/=-\\?+|\',."<>: _' +stdch = (u'␀·········␊··␍··' + u'················' + u' !"#$%&\'()*+,-./' + u'0123456789:;<=>?' + u'@ABCDEFGHIJKLMNO' + u'PQRSTUVWXYZ[\]^_' + u'`abcdefghijklmno' + u'pqrstuvwxyz{|}~·' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················') + +decch = (u'␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏' + u'␐␑␒␓␔␕␖␗␘␙␚·····' + u'␠!"#$%&\'()*+,-./' + u'0123456789:;<=>?' + u'@ABCDEFGHIJKLMNO' + u'PQRSTUVWXYZ[\]^_' + u'`abcdefghijklmno' + u'pqrstuvwxyz{|}~␡' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················' + u'················') + +cgach = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼' + u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼' + u'␣!"#$%&\'()*+,-./' + u'0123456789:;<=>?' + u'@ABCDEFGHIJKLMNO' + u'PQRSTUVWXYZ[\]^_' + u'`abcdefghijklmno' + u'pqrstuvwxyz{|}~⌂' + u'ÇüéâäàåçêëèïîìÄÅ' + u'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ' + u'áíóúñѪº¿⌐¬½¼¡«»' + u'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐' + u'└┴┬├─┼╞╟╚╔╩╦╠═╬╧' + u'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀' + u'αßΓπΣσµτΦΘΩδ∞φε∩' + u'≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤') + +shpch = (u'␀☺☻♥♦♣♠•◘○◙♂♀♪♫☼' + u'►◄↕‼¶§▬↨↑↓→←∟↔▲▼' + u'␣!"#$%&\'()*+,-./' + u'0123456789:;<=>?' + u'@ABCDEFGHIJKLMNO' + u'PQRSTUVWXYZ[\]^_' + u'`abcdefghijklmno' + u'pqrstuvwxyz{|}~⌂' + u'ÇüéâäàåçêëèïîìÄÅ' + u'ÉæÆôöòûùÿÖÜ¢£¥₧ƒ' + u'áíóúñѪº¿⌐¬½¼¡«»' + u'░▒▓│┤╡╢╖╕╣║╗╝╜╛┐' + u'└┴┬├─┼╞╟╚╔╩╦╠═╬╧' + u'╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀' + u'αßΓπΣσµτΦΘΩδ∞φε∩' + u'≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤') + + def unpack(fmt, buf): """Unpack buf based on fmt, return the rest as a string.""" @@ -24,19 +92,14 @@ class HexDumper: def _to_printable(self, c): if not c: - return '◆' - elif c in printable: - return c - elif c == '\0': - return '␀' - elif c == '\r': - return '␍' - elif c == '\n': - return '␤' + return u'◌' else: - return '·' + return cgach[ord(c)] + def write(self, what): + self.fd.write(what.encode('utf-8')) + def _flush(self): if not self.buf: return @@ -44,26 +107,26 @@ class HexDumper: o = [] for c in self.buf: if c: - o.append('%02x' % ord(c)) + o.append(u'%02x' % ord(c)) else: - o.append('--') - o += ([' '] * (16 - len(self.buf))) + o.append(u'--') + o += ([u' '] * (16 - len(self.buf))) p = [self._to_printable(c) for c in self.buf] - self.fd.write('%08x ' % self.offset) + self.write(u'%08x ' % self.offset) - self.fd.write(' '.join(o[:8])) - self.fd.write(' ') - self.fd.write(' '.join(o[8:])) + self.write(u' '.join(o[:8])) + self.write(u' ') + self.write(u' '.join(o[8:])) - self.fd.write(' ║') + self.write(u' ┆') - self.fd.write(''.join(p)) + self.write(u''.join(p)) - self.fd.write('║\n') + self.write(u'┆\n') + self.offset += len(self.buf) self.buf = [] - self.offset += 16 def dump_chr(self, c): self.buf.append(c) @@ -77,7 +140,7 @@ class HexDumper: def finish(self): self._flush() - self.fd.write('%08x\n' % self.offset) + self.write('%08x\n' % self.offset) def hexdump(buf, f=sys.stdout): diff --git a/gapstr.py b/gapstr.py index 87563d0..354bebd 100755 --- a/gapstr.py +++ b/gapstr.py @@ -2,9 +2,9 @@ ## 2008 Massive Blowout -"""Functions to treat a list as a string with gaps. +"""Functions to treat a list as a byte array with gaps. -Lists should have only string and integer items. +Lists should have only byte and numeric items. """ @@ -56,7 +56,9 @@ class GapString: def hasgaps(self): for i in self.contents: - if isinstance(i, int): + try: + len(i) + except TypeError: return True return False @@ -66,7 +68,7 @@ class GapString: d = __init__.HexDumper(fd) for i in self.contents: try: - for j in range(i): + for j in xrange(i): d.dump_drop() except TypeError: for c in i: @@ -117,8 +119,26 @@ class GapString: return new def __getitem__(self, idx): - # XXX: speed up - return str(self)[idx] + if False: + c = self[idx:idx+1] + if c.hasgaps(): + return self.drop[0] + else: + return c.contents[0][0] + else: + l = 0 + for i in self.contents: + try: + l += len(i) + except TypeError: + l += i + if l > idx: + offs = idx - l + try: + return i[offs] + except: + return self.drop[0] + raise IndexError('Out of bounds') def __add__(self, other): if isinstance(other, str): @@ -133,8 +153,12 @@ class GapString: try: mask = [ord(c) for c in mask] except TypeError: + pass + try: + masklen = len(mask) + except TypeError: + masklen = 1 mask = [mask] - masklen = len(mask) new = self.__class__(drop=self.drop) for i in self.contents: @@ -147,6 +171,7 @@ class GapString: offset = (offset + 1) % masklen new.append(''.join(r)) except TypeError: + print("type error!") new.append(i) return new