Much cleaner Esab32 decoding
Tries all permutations of chunks in Esab32, until it gets only printable characters. This decodes all but one line of the data I currently have: that GUID is successfully decoded later after another chunk is added. So this is 100% successful.
This commit is contained in:
parent
7390cb6f7c
commit
3e0413f9f7
188
sunburst.py
188
sunburst.py
|
@ -4,7 +4,7 @@
|
||||||
# Unclassified/FOUO
|
# Unclassified/FOUO
|
||||||
#
|
#
|
||||||
# Created: 2020-12-14 16:49:51
|
# Created: 2020-12-14 16:49:51
|
||||||
# Last-modified: 2020-12-22 17:57:54
|
# Last-modified: 2020-12-22 21:42:40
|
||||||
#
|
#
|
||||||
# Based on work by @RedDrip7 (twitter),
|
# Based on work by @RedDrip7 (twitter),
|
||||||
# who should be getting more credit in the English-speaking world.
|
# who should be getting more credit in the English-speaking world.
|
||||||
|
@ -26,10 +26,14 @@ knownDomains = [
|
||||||
"avsvmcloud.com",
|
"avsvmcloud.com",
|
||||||
]
|
]
|
||||||
|
|
||||||
def xor(key, buf):
|
|
||||||
return bytes(b^k for b,k in zip(buf, itertools.cycle(key)))
|
|
||||||
|
|
||||||
Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
|
def xor(key, buf):
|
||||||
|
return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key)))
|
||||||
|
|
||||||
|
|
||||||
|
Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
|
||||||
|
SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
|
||||||
|
SubstitutionAlphabet0 = '0_-.'
|
||||||
|
|
||||||
|
|
||||||
def DecodeBase32(s: str):
|
def DecodeBase32(s: str):
|
||||||
|
@ -39,63 +43,109 @@ def DecodeBase32(s: str):
|
||||||
It doesn't work.
|
It doesn't work.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
t = s.translate(Bsae32Alphabet,
|
trans = str.maketrans(Esab32Alphabet,
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
|
||||||
|
t = s.translate(trans)
|
||||||
while len(t) % 8 > 0:
|
while len(t) % 8 > 0:
|
||||||
t += '='
|
t += '='
|
||||||
return base64.b32decode(t)
|
return base64.b32decode(t)
|
||||||
|
|
||||||
|
|
||||||
def DecodeBsae32(s: str):
|
def DecodeEsab32(s: str) -> (int, int):
|
||||||
"""Decode using zany base32-like algorithm.
|
"""Decode using big-endian base32 algorithm.
|
||||||
|
|
||||||
The following opinion has been formed hastily and could be misinformed:
|
Returns a bigint, and the number of bits contained therein
|
||||||
|
|
||||||
This is not proper Base32. It's more like somebody read about Base32,
|
|
||||||
implemented an encoder and decoder incorrectly, and went on to the next task.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
bits = 0
|
acc = bits = 0
|
||||||
acc = 0
|
|
||||||
for c in s:
|
for c in s:
|
||||||
acc |= Bsae32Alphabet.find(c) << bits
|
try:
|
||||||
|
p = Esab32Alphabet.index(c)
|
||||||
|
except ValueError:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Not an Esab32 encoded character: %c in %s" % (c, s))
|
||||||
|
acc |= p << bits
|
||||||
bits += 5
|
bits += 5
|
||||||
|
return acc, bits
|
||||||
out8 = []
|
|
||||||
while bits > 0:
|
|
||||||
out8.append(acc & 255)
|
|
||||||
acc >>= 8
|
|
||||||
bits -= 8
|
|
||||||
|
|
||||||
if bits:
|
|
||||||
del out8[-1]
|
|
||||||
return bytes(out8)
|
|
||||||
|
|
||||||
|
|
||||||
SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
|
def DecodeSubst(s: str) -> str:
|
||||||
SubstitutionXlat = str.maketrans(
|
alphabet = SubstitutionAlphabet
|
||||||
SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
|
|
||||||
SubstitutionXlat0 = str.maketrans(
|
|
||||||
SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
|
|
||||||
|
|
||||||
|
|
||||||
def DecodeSubst(s: str):
|
|
||||||
zeroBaby = False
|
|
||||||
out = []
|
out = []
|
||||||
for c in s:
|
for c in s:
|
||||||
if c == '0':
|
if c == '0':
|
||||||
zeroBaby = True
|
alphabet = SubstitutionAlphabet0
|
||||||
continue
|
|
||||||
if zeroBaby:
|
|
||||||
out.append(c.translate(SubstitutionXlat0))
|
|
||||||
else:
|
else:
|
||||||
out.append(c.translate(SubstitutionXlat))
|
try:
|
||||||
zeroBaby = False
|
pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet)
|
||||||
return ''.join(out)
|
except ValueError:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Not a subst encoded character: %c in %s" % (c, s))
|
||||||
|
out.append(alphabet[pos])
|
||||||
|
alphabet = SubstitutionAlphabet
|
||||||
|
return "".join(out)
|
||||||
|
|
||||||
PayloadsByGuid = {}
|
|
||||||
|
|
||||||
def DecodeDomain(domain: str):
|
Guid = int
|
||||||
|
|
||||||
|
|
||||||
|
def isprintable(c: int) -> bool:
|
||||||
|
return c >= 0x20 and c <= 0x7f
|
||||||
|
|
||||||
|
|
||||||
|
def quopri(buf: bytes) -> str:
|
||||||
|
return codecs.encode(buf, "quopri").decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
class DGADecoder:
|
||||||
|
def __init__(self, guid: Guid):
|
||||||
|
self.guid = guid
|
||||||
|
self.history = []
|
||||||
|
self._decoder = self.DecodeSubst
|
||||||
|
|
||||||
|
def decode(self, s: str):
|
||||||
|
if s.startswith("00"):
|
||||||
|
self._decoder = self.DecodeEsab32
|
||||||
|
# We'll throw away the information about which is first,
|
||||||
|
# since we do a computationally intensive trick later to determine ordering
|
||||||
|
s = s[2:]
|
||||||
|
self.history.insert(0, s)
|
||||||
|
else:
|
||||||
|
self.history.append(s)
|
||||||
|
return self._decoder()
|
||||||
|
|
||||||
|
def DecodeSubst(self) -> str:
|
||||||
|
decodes = {DecodeSubst(x) for x in self.history}
|
||||||
|
return ''.join(sorted(decodes, key=len, reverse=True))
|
||||||
|
|
||||||
|
def DecodeEsab32(self) -> str:
|
||||||
|
history = {x.rstrip("0") for x in self.history}
|
||||||
|
|
||||||
|
# "Why don't we just mix up absolutely everything and see what happens?"
|
||||||
|
# -- Ridcully, in Terry Pratchett's "The Hogfather"
|
||||||
|
possibilities = []
|
||||||
|
for attempt in itertools.permutations(history):
|
||||||
|
acc, abits = DecodeEsab32(''.join(attempt))
|
||||||
|
length = abits // 8
|
||||||
|
if abits % 8:
|
||||||
|
buf = acc.to_bytes(length+1, 'little')
|
||||||
|
else:
|
||||||
|
buf = acc.to_bytes(length, 'little')
|
||||||
|
buf = buf[:length]
|
||||||
|
if sum(isprintable(b) for b in buf) == length:
|
||||||
|
# Yay it's probably okay
|
||||||
|
possibilities.append(buf)
|
||||||
|
# Well, we tried.
|
||||||
|
if not possibilities:
|
||||||
|
return quopri(buf)
|
||||||
|
else:
|
||||||
|
return " | ".join(quopri(buf) for buf in possibilities)
|
||||||
|
|
||||||
|
|
||||||
|
DecodersByGuid = {}
|
||||||
|
|
||||||
|
|
||||||
|
def DecodeDomain(domain: str) -> (Guid, int, str):
|
||||||
s = domain.strip()
|
s = domain.strip()
|
||||||
foundDomain = None
|
foundDomain = None
|
||||||
for d in knownDomains:
|
for d in knownDomains:
|
||||||
|
@ -110,54 +160,49 @@ def DecodeDomain(domain: str):
|
||||||
assert(s[-1] == '.')
|
assert(s[-1] == '.')
|
||||||
s = s[:-1]
|
s = s[:-1]
|
||||||
|
|
||||||
|
if foundDomain == "avsvmcloud.com":
|
||||||
|
return (None, None, "[Probably not a Sunburst domain]")
|
||||||
if len(s) < 16:
|
if len(s) < 16:
|
||||||
return (None, None, "[unable to decode: too short for any known decoding rules]")
|
return (None, None, "[too short]")
|
||||||
|
|
||||||
eguid = DecodeBsae32(s[:15])
|
dec, _ = DecodeEsab32(s[:15])
|
||||||
|
eguid = dec.to_bytes(10, 'little')[:9]
|
||||||
|
guid = int.from_bytes(xor(eguid[0:1], eguid[1:]), 'big')
|
||||||
|
|
||||||
unknown_a = s[15]
|
unknown_a = s[15]
|
||||||
guid = xor(eguid[0:1], eguid[1:])
|
payload = s[16:]
|
||||||
s = s[16:]
|
|
||||||
|
|
||||||
payloads = PayloadsByGuid.setdefault(guid, [])
|
decoder = DecodersByGuid.get(guid)
|
||||||
if s not in payloads:
|
if not decoder:
|
||||||
if s.startswith("00"):
|
decoder = DGADecoder(guid)
|
||||||
payloads.insert(0, s)
|
DecodersByGuid[guid] = decoder
|
||||||
else:
|
|
||||||
payloads.append(s)
|
|
||||||
# People: friggin' preserve metadata, ugh.
|
|
||||||
# If I gave you every line of The Empire Strikes Back,
|
|
||||||
# sorted alphabetically, without timestamps,
|
|
||||||
# could you reconstruct the movie?
|
|
||||||
payloads.sort(key=len, reverse=True)
|
|
||||||
|
|
||||||
payload = ''.join(payloads)
|
decoded = decoder.decode(payload)
|
||||||
|
|
||||||
if payload.startswith("00"):
|
|
||||||
buf = DecodeBsae32(payload[2:])
|
|
||||||
decoded = codecs.encode(buf, "quopri").decode("utf-8")
|
|
||||||
else:
|
|
||||||
decoded = DecodeSubst(payload)
|
|
||||||
return (guid, unknown_a, decoded)
|
return (guid, unknown_a, decoded)
|
||||||
|
|
||||||
|
|
||||||
class TextReader:
|
class TextReader:
|
||||||
def __init__(self, infile):
|
def __init__(self, infile):
|
||||||
self.infile = infile
|
self.infile = infile
|
||||||
self.fieldnames = ["name"]
|
self.fieldnames = ["name"]
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for s in self.infile:
|
for s in self.infile:
|
||||||
yield {"name": s.strip()}
|
yield {"name": s.strip()}
|
||||||
|
|
||||||
|
|
||||||
class CsvReader:
|
class CsvReader:
|
||||||
def __init__(self, infile):
|
def __init__(self, infile):
|
||||||
self.reader = csv.DictReader(infile)
|
self.reader = csv.DictReader(infile)
|
||||||
self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"]
|
self.fieldnames = self.reader.fieldnames + \
|
||||||
|
["guid", "unknown a", "decode"]
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for record in self.reader:
|
for record in self.reader:
|
||||||
yield record
|
yield record
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Decode sunburst Domain Generation Algorithm (DGA) names")
|
description="Decode sunburst Domain Generation Algorithm (DGA) names")
|
||||||
|
@ -188,13 +233,14 @@ def main():
|
||||||
fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
|
fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
|
||||||
writer = csv.DictWriter(args.outfile, fieldnames)
|
writer = csv.DictWriter(args.outfile, fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for record in reader:
|
for record in reader:
|
||||||
name = record.get("name") or record.get("fqdn")
|
name = record.get("name") or record.get("fqdn")
|
||||||
guid, unknown_a, ptext = DecodeDomain(name)
|
guid, unknown_a, ptext = DecodeDomain(name)
|
||||||
record["guid"] = int.from_bytes(guid or b"", "big")
|
record["guid"] = guid
|
||||||
record["unknown a"] = unknown_a
|
record["unknown a"] = unknown_a
|
||||||
record["decode"] = ptext
|
record["decode"] = ptext
|
||||||
writer.writerow(record)
|
writer.writerow(record)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue