Much cleaner Esab32 decoding - sunburst-decoder@3e0413f

commit: 3e0413f
parent: 7390cb6
author: Neale Pickett
date: 2020-12-22 21:45:30 -0700 MST

Much cleaner Esab32 decoding

Tries all permutations of chunks in Esab32,
until it gets only printable characters.
This decodes all but one line of the data
I currently have:
that GUID is successfully decoded
later after another chunk is added.
So this is 100% successful.

1 files changed, +117, -71

M sunburst.py

+117, -71

  1@@ -4,7 +4,7 @@
  2 # Unclassified/FOUO
  3 #
  4 # Created: 2020-12-14 16:49:51
  5-# Last-modified: 2020-12-22 17:57:54
  6+# Last-modified: 2020-12-22 21:42:40
  7 #
  8 # Based on work by @RedDrip7 (twitter),
  9 # who should be getting more credit in the English-speaking world.
 10@@ -26,10 +26,14 @@ knownDomains = [
 11     "avsvmcloud.com",
 12 ]
 13 
 14+
 15 def xor(key, buf):
 16-    return bytes(b^k for b,k in zip(buf, itertools.cycle(key)))
 17+    return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key)))
 18+
 19 
 20-Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
 21+Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
 22+SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
 23+SubstitutionAlphabet0 = '0_-.'
 24 
 25 
 26 def DecodeBase32(s: str):
 27@@ -39,63 +43,109 @@ def DecodeBase32(s: str):
 28     It doesn't work.
 29     """
 30 
 31-    t = s.translate(Bsae32Alphabet,
 32-                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
 33+    trans = str.maketrans(Esab32Alphabet,
 34+                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
 35+    t = s.translate(trans)
 36     while len(t) % 8 > 0:
 37         t += '='
 38     return base64.b32decode(t)
 39 
 40 
 41-def DecodeBsae32(s: str):
 42-    """Decode using zany base32-like algorithm.
 43+def DecodeEsab32(s: str) -> (int, int):
 44+    """Decode using big-endian base32 algorithm.
 45 
 46-    The following opinion has been formed hastily and could be misinformed:
 47-
 48-    This is not proper Base32. It's more like somebody read about Base32,
 49-    implemented an encoder and decoder incorrectly, and went on to the next task.
 50+    Returns a bigint, and the number of bits contained therein
 51     """
 52 
 53-    bits = 0
 54-    acc = 0
 55+    acc = bits = 0
 56     for c in s:
 57-        acc |= Bsae32Alphabet.find(c) << bits
 58+        try:
 59+            p = Esab32Alphabet.index(c)
 60+        except ValueError:
 61+            raise RuntimeError(
 62+                "Not an Esab32 encoded character: %c in %s" % (c, s))
 63+        acc |= p << bits
 64         bits += 5
 65+    return acc, bits
 66 
 67-    out8 = []
 68-    while bits > 0:
 69-        out8.append(acc & 255)
 70-        acc >>= 8
 71-        bits -= 8
 72 
 73-    if bits:
 74-        del out8[-1]
 75-    return bytes(out8)
 76+def DecodeSubst(s: str) -> str:
 77+    alphabet = SubstitutionAlphabet
 78+    out = []
 79+    for c in s:
 80+        if c == '0':
 81+            alphabet = SubstitutionAlphabet0
 82+        else:
 83+            try:
 84+                pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet)
 85+            except ValueError:
 86+                raise RuntimeError(
 87+                    "Not a subst encoded character: %c in %s" % (c, s))
 88+            out.append(alphabet[pos])
 89+            alphabet = SubstitutionAlphabet
 90+    return "".join(out)
 91 
 92 
 93-SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
 94-SubstitutionXlat = str.maketrans(
 95-    SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
 96-SubstitutionXlat0 = str.maketrans(
 97-    SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
 98+Guid = int
 99 
100 
101-def DecodeSubst(s: str):
102-    zeroBaby = False
103-    out = []
104-    for c in s:
105-        if c == '0':
106-            zeroBaby = True
107-            continue
108-        if zeroBaby:
109-            out.append(c.translate(SubstitutionXlat0))
110+def isprintable(c: int) -> bool:
111+    return c >= 0x20 and c <= 0x7f
112+
113+
114+def quopri(buf: bytes) -> str:
115+    return codecs.encode(buf, "quopri").decode("utf-8")
116+
117+
118+class DGADecoder:
119+    def __init__(self, guid: Guid):
120+        self.guid = guid
121+        self.history = []
122+        self._decoder = self.DecodeSubst
123+
124+    def decode(self, s: str):
125+        if s.startswith("00"):
126+            self._decoder = self.DecodeEsab32
127+            # We'll throw away the information about which is first,
128+            # since we do a computationally intensive trick later to determine ordering
129+            s = s[2:]
130+            self.history.insert(0, s)
131+        else:
132+            self.history.append(s)
133+        return self._decoder()
134+
135+    def DecodeSubst(self) -> str:
136+        decodes = {DecodeSubst(x) for x in self.history}
137+        return ''.join(sorted(decodes, key=len, reverse=True))
138+
139+    def DecodeEsab32(self) -> str:
140+        history = {x.rstrip("0") for x in self.history}
141+
142+        # "Why don't we just mix up absolutely everything and see what happens?"
143+        # -- Ridcully, in Terry Pratchett's "The Hogfather"
144+        possibilities = []
145+        for attempt in itertools.permutations(history):
146+            acc, abits = DecodeEsab32(''.join(attempt))
147+            length = abits // 8
148+            if abits % 8:
149+                buf = acc.to_bytes(length+1, 'little')
150+            else:
151+                buf = acc.to_bytes(length, 'little')
152+            buf = buf[:length]
153+            if sum(isprintable(b) for b in buf) == length:
154+                # Yay it's probably okay
155+                possibilities.append(buf)
156+        # Well, we tried.
157+        if not possibilities:
158+            return quopri(buf)
159         else:
160-            out.append(c.translate(SubstitutionXlat))
161-        zeroBaby = False
162-    return ''.join(out)
163+            return " | ".join(quopri(buf) for buf in possibilities)
164+
165 
166-PayloadsByGuid = {}
167+DecodersByGuid = {}
168 
169-def DecodeDomain(domain: str):
170+
171+def DecodeDomain(domain: str) -> (Guid, int, str):
172     s = domain.strip()
173     foundDomain = None
174     for d in knownDomains:
175@@ -110,54 +160,49 @@ def DecodeDomain(domain: str):
176     assert(s[-1] == '.')
177     s = s[:-1]
178 
179+    if foundDomain == "avsvmcloud.com":
180+        return (None, None, "[Probably not a Sunburst domain]")
181     if len(s) < 16:
182-        return (None, None, "[unable to decode: too short for any known decoding rules]")
183-    
184-    eguid = DecodeBsae32(s[:15])
185+        return (None, None, "[too short]")
186+
187+    dec, _ = DecodeEsab32(s[:15])
188+    eguid = dec.to_bytes(10, 'little')[:9]
189+    guid = int.from_bytes(xor(eguid[0:1], eguid[1:]), 'big')
190+
191     unknown_a = s[15]
192-    guid = xor(eguid[0:1], eguid[1:])
193-    s = s[16:]
194+    payload = s[16:]
195+
196+    decoder = DecodersByGuid.get(guid)
197+    if not decoder:
198+        decoder = DGADecoder(guid)
199+        DecodersByGuid[guid] = decoder
200+
201+    decoded = decoder.decode(payload)
202 
203-    payloads = PayloadsByGuid.setdefault(guid, [])
204-    if s not in payloads:
205-        if s.startswith("00"):
206-            payloads.insert(0, s)
207-        else:
208-            payloads.append(s)
209-            # People: friggin' preserve metadata, ugh. 
210-            # If I gave you every line of The Empire Strikes Back, 
211-            # sorted alphabetically, without timestamps,
212-            # could you reconstruct the movie?
213-            payloads.sort(key=len, reverse=True)
214-
215-    payload = ''.join(payloads)
216-
217-    if payload.startswith("00"):
218-        buf = DecodeBsae32(payload[2:])
219-        decoded = codecs.encode(buf, "quopri").decode("utf-8")
220-    else:
221-        decoded = DecodeSubst(payload)
222     return (guid, unknown_a, decoded)
223 
224+
225 class TextReader:
226     def __init__(self, infile):
227         self.infile = infile
228         self.fieldnames = ["name"]
229-        
230+
231     def __iter__(self):
232         for s in self.infile:
233             yield {"name": s.strip()}
234-    
235+
236 
237 class CsvReader:
238     def __init__(self, infile):
239         self.reader = csv.DictReader(infile)
240-        self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"]
241+        self.fieldnames = self.reader.fieldnames + \
242+            ["guid", "unknown a", "decode"]
243 
244     def __iter__(self):
245         for record in self.reader:
246             yield record
247 
248+
249 def main():
250     parser = argparse.ArgumentParser(
251         description="Decode sunburst Domain Generation Algorithm (DGA) names")
252@@ -188,13 +233,14 @@ def main():
253     fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
254     writer = csv.DictWriter(args.outfile, fieldnames)
255     writer.writeheader()
256-    for record in reader:        
257+    for record in reader:
258         name = record.get("name") or record.get("fqdn")
259         guid, unknown_a, ptext = DecodeDomain(name)
260-        record["guid"] = int.from_bytes(guid or b"", "big")
261+        record["guid"] = guid
262         record["unknown a"] = unknown_a
263         record["decode"] = ptext
264         writer.writerow(record)
265-    
266+
267+
268 if __name__ == '__main__':
269     main()