Much cleaner Esab32 decoding

Tries all permutations of chunks in Esab32,
until it gets only printable characters.
This decodes all but one line of the data
I currently have:
that GUID is successfully decoded
later after another chunk is added.
So this is 100% successful.
This commit is contained in:
Neale Pickett 2020-12-22 21:45:30 -07:00
parent 7390cb6f7c
commit 3e0413f9f7
1 changed files with 117 additions and 71 deletions

View File

@ -4,7 +4,7 @@
# Unclassified/FOUO # Unclassified/FOUO
# #
# Created: 2020-12-14 16:49:51 # Created: 2020-12-14 16:49:51
# Last-modified: 2020-12-22 17:57:54 # Last-modified: 2020-12-22 21:42:40
# #
# Based on work by @RedDrip7 (twitter), # Based on work by @RedDrip7 (twitter),
# who should be getting more credit in the English-speaking world. # who should be getting more credit in the English-speaking world.
@ -26,10 +26,14 @@ knownDomains = [
"avsvmcloud.com", "avsvmcloud.com",
] ]
def xor(key, buf): def xor(key, buf):
return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key))) return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key)))
Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
SubstitutionAlphabet0 = '0_-.'
def DecodeBase32(s: str): def DecodeBase32(s: str):
@ -39,63 +43,109 @@ def DecodeBase32(s: str):
It doesn't work. It doesn't work.
""" """
t = s.translate(Bsae32Alphabet, trans = str.maketrans(Esab32Alphabet,
"ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678") "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
t = s.translate(trans)
while len(t) % 8 > 0: while len(t) % 8 > 0:
t += '=' t += '='
return base64.b32decode(t) return base64.b32decode(t)
def DecodeBsae32(s: str): def DecodeEsab32(s: str) -> (int, int):
"""Decode using zany base32-like algorithm. """Decode using big-endian base32 algorithm.
The following opinion has been formed hastily and could be misinformed: Returns a bigint, and the number of bits contained therein
This is not proper Base32. It's more like somebody read about Base32,
implemented an encoder and decoder incorrectly, and went on to the next task.
""" """
bits = 0 acc = bits = 0
acc = 0
for c in s: for c in s:
acc |= Bsae32Alphabet.find(c) << bits try:
p = Esab32Alphabet.index(c)
except ValueError:
raise RuntimeError(
"Not an Esab32 encoded character: %c in %s" % (c, s))
acc |= p << bits
bits += 5 bits += 5
return acc, bits
out8 = []
while bits > 0:
out8.append(acc & 255)
acc >>= 8
bits -= 8
if bits:
del out8[-1]
return bytes(out8)
SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj' def DecodeSubst(s: str) -> str:
SubstitutionXlat = str.maketrans( alphabet = SubstitutionAlphabet
SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
SubstitutionXlat0 = str.maketrans(
SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
def DecodeSubst(s: str):
zeroBaby = False
out = [] out = []
for c in s: for c in s:
if c == '0': if c == '0':
zeroBaby = True alphabet = SubstitutionAlphabet0
continue
if zeroBaby:
out.append(c.translate(SubstitutionXlat0))
else: else:
out.append(c.translate(SubstitutionXlat)) try:
zeroBaby = False pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet)
return ''.join(out) except ValueError:
raise RuntimeError(
"Not a subst encoded character: %c in %s" % (c, s))
out.append(alphabet[pos])
alphabet = SubstitutionAlphabet
return "".join(out)
PayloadsByGuid = {}
def DecodeDomain(domain: str): Guid = int
def isprintable(c: int) -> bool:
return c >= 0x20 and c <= 0x7f
def quopri(buf: bytes) -> str:
return codecs.encode(buf, "quopri").decode("utf-8")
class DGADecoder:
def __init__(self, guid: Guid):
self.guid = guid
self.history = []
self._decoder = self.DecodeSubst
def decode(self, s: str):
if s.startswith("00"):
self._decoder = self.DecodeEsab32
# We'll throw away the information about which is first,
# since we do a computationally intensive trick later to determine ordering
s = s[2:]
self.history.insert(0, s)
else:
self.history.append(s)
return self._decoder()
def DecodeSubst(self) -> str:
decodes = {DecodeSubst(x) for x in self.history}
return ''.join(sorted(decodes, key=len, reverse=True))
def DecodeEsab32(self) -> str:
history = {x.rstrip("0") for x in self.history}
# "Why don't we just mix up absolutely everything and see what happens?"
# -- Ridcully, in Terry Pratchett's "The Hogfather"
possibilities = []
for attempt in itertools.permutations(history):
acc, abits = DecodeEsab32(''.join(attempt))
length = abits // 8
if abits % 8:
buf = acc.to_bytes(length+1, 'little')
else:
buf = acc.to_bytes(length, 'little')
buf = buf[:length]
if sum(isprintable(b) for b in buf) == length:
# Yay it's probably okay
possibilities.append(buf)
# Well, we tried.
if not possibilities:
return quopri(buf)
else:
return " | ".join(quopri(buf) for buf in possibilities)
DecodersByGuid = {}
def DecodeDomain(domain: str) -> (Guid, int, str):
s = domain.strip() s = domain.strip()
foundDomain = None foundDomain = None
for d in knownDomains: for d in knownDomains:
@ -110,35 +160,28 @@ def DecodeDomain(domain: str):
assert(s[-1] == '.') assert(s[-1] == '.')
s = s[:-1] s = s[:-1]
if foundDomain == "avsvmcloud.com":
return (None, None, "[Probably not a Sunburst domain]")
if len(s) < 16: if len(s) < 16:
return (None, None, "[unable to decode: too short for any known decoding rules]") return (None, None, "[too short]")
dec, _ = DecodeEsab32(s[:15])
eguid = dec.to_bytes(10, 'little')[:9]
guid = int.from_bytes(xor(eguid[0:1], eguid[1:]), 'big')
eguid = DecodeBsae32(s[:15])
unknown_a = s[15] unknown_a = s[15]
guid = xor(eguid[0:1], eguid[1:]) payload = s[16:]
s = s[16:]
payloads = PayloadsByGuid.setdefault(guid, []) decoder = DecodersByGuid.get(guid)
if s not in payloads: if not decoder:
if s.startswith("00"): decoder = DGADecoder(guid)
payloads.insert(0, s) DecodersByGuid[guid] = decoder
else:
payloads.append(s)
# People: friggin' preserve metadata, ugh.
# If I gave you every line of The Empire Strikes Back,
# sorted alphabetically, without timestamps,
# could you reconstruct the movie?
payloads.sort(key=len, reverse=True)
payload = ''.join(payloads) decoded = decoder.decode(payload)
if payload.startswith("00"):
buf = DecodeBsae32(payload[2:])
decoded = codecs.encode(buf, "quopri").decode("utf-8")
else:
decoded = DecodeSubst(payload)
return (guid, unknown_a, decoded) return (guid, unknown_a, decoded)
class TextReader: class TextReader:
def __init__(self, infile): def __init__(self, infile):
self.infile = infile self.infile = infile
@ -152,12 +195,14 @@ class TextReader:
class CsvReader: class CsvReader:
def __init__(self, infile): def __init__(self, infile):
self.reader = csv.DictReader(infile) self.reader = csv.DictReader(infile)
self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"] self.fieldnames = self.reader.fieldnames + \
["guid", "unknown a", "decode"]
def __iter__(self): def __iter__(self):
for record in self.reader: for record in self.reader:
yield record yield record
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Decode sunburst Domain Generation Algorithm (DGA) names") description="Decode sunburst Domain Generation Algorithm (DGA) names")
@ -191,10 +236,11 @@ def main():
for record in reader: for record in reader:
name = record.get("name") or record.get("fqdn") name = record.get("name") or record.get("fqdn")
guid, unknown_a, ptext = DecodeDomain(name) guid, unknown_a, ptext = DecodeDomain(name)
record["guid"] = int.from_bytes(guid or b"", "big") record["guid"] = guid
record["unknown a"] = unknown_a record["unknown a"] = unknown_a
record["decode"] = ptext record["decode"] = ptext
writer.writerow(record) writer.writerow(record)
if __name__ == '__main__': if __name__ == '__main__':
main() main()