#! /usr/bin/python3 # Neale Pickett # Unclassified/FOUO # # Created: 2020-12-14 16:49:51 # Last-modified: 2020-12-22 21:42:40 # # Based on work by @RedDrip7 (twitter), # who should be getting more credit in the English-speaking world. import argparse import base64 import codecs import csv import itertools import re import sys knownDomains = [ "appsync-api.us-east-1.avsvmcloud.com", "appsync-api.us-east-2.avsvmcloud.com", "appsync-api.us-west-2.avsvmcloud.com", "appsync-api.eu-west-1.avsvmcloud.com", "avsvmcloud.com", ] def xor(key, buf): return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key))) Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c" SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj' SubstitutionAlphabet0 = '0_-.' def DecodeBase32(s: str): """Not used by sunburst. If Sunburst actually used Base32, this would work to decode things. It doesn't work. """ trans = str.maketrans(Esab32Alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678") t = s.translate(trans) while len(t) % 8 > 0: t += '=' return base64.b32decode(t) def DecodeEsab32(s: str) -> (int, int): """Decode using big-endian base32 algorithm. Returns a bigint, and the number of bits contained therein """ acc = bits = 0 for c in s: try: p = Esab32Alphabet.index(c) except ValueError: raise RuntimeError( "Not an Esab32 encoded character: %c in %s" % (c, s)) acc |= p << bits bits += 5 return acc, bits def DecodeSubst(s: str) -> str: alphabet = SubstitutionAlphabet out = [] for c in s: if c == '0': alphabet = SubstitutionAlphabet0 else: try: pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet) except ValueError: raise RuntimeError( "Not a subst encoded character: %c in %s" % (c, s)) out.append(alphabet[pos]) alphabet = SubstitutionAlphabet return "".join(out) Guid = int def isprintable(c: int) -> bool: return c >= 0x20 and c <= 0x7f def quopri(buf: bytes) -> str: return codecs.encode(buf, "quopri").decode("utf-8") class DGADecoder: def __init__(self, guid: Guid): self.guid = guid self.history = [] self._decoder = self.DecodeSubst def decode(self, s: str): if s.startswith("00"): self._decoder = self.DecodeEsab32 # We'll throw away the information about which is first, # since we do a computationally intensive trick later to determine ordering s = s[2:] self.history.insert(0, s) else: self.history.append(s) return self._decoder() def DecodeSubst(self) -> str: decodes = {DecodeSubst(x) for x in self.history} return ''.join(sorted(decodes, key=len, reverse=True)) def DecodeEsab32(self) -> str: history = {x.rstrip("0") for x in self.history} # "Why don't we just mix up absolutely everything and see what happens?" # -- Ridcully, in Terry Pratchett's "The Hogfather" possibilities = [] for attempt in itertools.permutations(history): acc, abits = DecodeEsab32(''.join(attempt)) length = abits // 8 if abits % 8: buf = acc.to_bytes(length+1, 'little') else: buf = acc.to_bytes(length, 'little') buf = buf[:length] if sum(isprintable(b) for b in buf) == length: # Yay it's probably okay possibilities.append(buf) # Well, we tried. if not possibilities: return quopri(buf) else: return " | ".join(quopri(buf) for buf in possibilities) DecodersByGuid = {} def DecodeDomain(domain: str) -> (Guid, int, str): s = domain.strip() foundDomain = None for d in knownDomains: if s.endswith(d): foundDomain = d break if not foundDomain: raise RuntimeError("Can't find domain for %s" % s) s = s[:-len(foundDomain)] if not s: return (None, None, "[no data transmitted]") assert(s[-1] == '.') s = s[:-1] if foundDomain == "avsvmcloud.com": return (None, None, "[Probably not a Sunburst domain]") if len(s) < 16: return (None, None, "[too short]") dec, _ = DecodeEsab32(s[:15]) eguid = dec.to_bytes(10, 'little')[:9] guid = int.from_bytes(xor(eguid[0:1], eguid[1:]), 'big') unknown_a = s[15] payload = s[16:] decoder = DecodersByGuid.get(guid) if not decoder: decoder = DGADecoder(guid) DecodersByGuid[guid] = decoder decoded = decoder.decode(payload) return (guid, unknown_a, decoded) class TextReader: def __init__(self, infile): self.infile = infile self.fieldnames = ["name"] def __iter__(self): for s in self.infile: yield {"name": s.strip()} class CsvReader: def __init__(self, infile): self.reader = csv.DictReader(infile) self.fieldnames = self.reader.fieldnames + \ ["guid", "unknown a", "decode"] def __iter__(self): for record in self.reader: yield record def main(): parser = argparse.ArgumentParser( description="Decode sunburst Domain Generation Algorithm (DGA) names") parser.add_argument("--text", dest="input", action="store_const", const="text", help="Parse bambenek-style: list of fqdns, one per line") parser.add_argument("--csv", dest="input", action="store_const", const="csv", help="Parse CSV: records must be in a 'name' or 'fqdn' field") parser.add_argument("infile", nargs="?", type=argparse.FileType("r"), default=sys.stdin) parser.add_argument("--outfile", nargs="?", type=argparse.FileType("w"), default=sys.stdout, help="CSV file to write (default stdout)") args = parser.parse_args() reader = None if args.input == "text": reader = TextReader(args.infile) elif args.input == "csv": reader = CsvReader(args.infile) elif args.infile.name.endswith(".txt"): reader = TextReader(args.infile) elif args.infile.name.endswith(".csv"): reader = CsvReader(args.infile) else: parser.print_help() return fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"] writer = csv.DictWriter(args.outfile, fieldnames) writer.writeheader() for record in reader: name = record.get("name") or record.get("fqdn") guid, unknown_a, ptext = DecodeDomain(name) record["guid"] = guid record["unknown a"] = unknown_a record["decode"] = ptext writer.writerow(record) if __name__ == '__main__': main()