- commit
- 3e0413f
- parent
- 7390cb6
- author
- Neale Pickett
- date
- 2020-12-22 21:45:30 -0700 MST
Much cleaner Esab32 decoding Tries all permutations of chunks in Esab32, until it gets only printable characters. This decodes all but one line of the data I currently have: that GUID is successfully decoded later after another chunk is added. So this is 100% successful.
1 files changed,
+117,
-71
+117,
-71
1@@ -4,7 +4,7 @@
2 # Unclassified/FOUO
3 #
4 # Created: 2020-12-14 16:49:51
5-# Last-modified: 2020-12-22 17:57:54
6+# Last-modified: 2020-12-22 21:42:40
7 #
8 # Based on work by @RedDrip7 (twitter),
9 # who should be getting more credit in the English-speaking world.
10@@ -26,10 +26,14 @@ knownDomains = [
11 "avsvmcloud.com",
12 ]
13
14+
15 def xor(key, buf):
16- return bytes(b^k for b,k in zip(buf, itertools.cycle(key)))
17+ return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key)))
18+
19
20-Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
21+Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
22+SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
23+SubstitutionAlphabet0 = '0_-.'
24
25
26 def DecodeBase32(s: str):
27@@ -39,63 +43,109 @@ def DecodeBase32(s: str):
28 It doesn't work.
29 """
30
31- t = s.translate(Bsae32Alphabet,
32- "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
33+ trans = str.maketrans(Esab32Alphabet,
34+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
35+ t = s.translate(trans)
36 while len(t) % 8 > 0:
37 t += '='
38 return base64.b32decode(t)
39
40
41-def DecodeBsae32(s: str):
42- """Decode using zany base32-like algorithm.
43+def DecodeEsab32(s: str) -> (int, int):
44+ """Decode using big-endian base32 algorithm.
45
46- The following opinion has been formed hastily and could be misinformed:
47-
48- This is not proper Base32. It's more like somebody read about Base32,
49- implemented an encoder and decoder incorrectly, and went on to the next task.
50+ Returns a bigint, and the number of bits contained therein
51 """
52
53- bits = 0
54- acc = 0
55+ acc = bits = 0
56 for c in s:
57- acc |= Bsae32Alphabet.find(c) << bits
58+ try:
59+ p = Esab32Alphabet.index(c)
60+ except ValueError:
61+ raise RuntimeError(
62+ "Not an Esab32 encoded character: %c in %s" % (c, s))
63+ acc |= p << bits
64 bits += 5
65+ return acc, bits
66
67- out8 = []
68- while bits > 0:
69- out8.append(acc & 255)
70- acc >>= 8
71- bits -= 8
72
73- if bits:
74- del out8[-1]
75- return bytes(out8)
76+def DecodeSubst(s: str) -> str:
77+ alphabet = SubstitutionAlphabet
78+ out = []
79+ for c in s:
80+ if c == '0':
81+ alphabet = SubstitutionAlphabet0
82+ else:
83+ try:
84+ pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet)
85+ except ValueError:
86+ raise RuntimeError(
87+ "Not a subst encoded character: %c in %s" % (c, s))
88+ out.append(alphabet[pos])
89+ alphabet = SubstitutionAlphabet
90+ return "".join(out)
91
92
93-SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
94-SubstitutionXlat = str.maketrans(
95- SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
96-SubstitutionXlat0 = str.maketrans(
97- SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
98+Guid = int
99
100
101-def DecodeSubst(s: str):
102- zeroBaby = False
103- out = []
104- for c in s:
105- if c == '0':
106- zeroBaby = True
107- continue
108- if zeroBaby:
109- out.append(c.translate(SubstitutionXlat0))
110+def isprintable(c: int) -> bool:
111+ return c >= 0x20 and c <= 0x7f
112+
113+
114+def quopri(buf: bytes) -> str:
115+ return codecs.encode(buf, "quopri").decode("utf-8")
116+
117+
118+class DGADecoder:
119+ def __init__(self, guid: Guid):
120+ self.guid = guid
121+ self.history = []
122+ self._decoder = self.DecodeSubst
123+
124+ def decode(self, s: str):
125+ if s.startswith("00"):
126+ self._decoder = self.DecodeEsab32
127+ # We'll throw away the information about which is first,
128+ # since we do a computationally intensive trick later to determine ordering
129+ s = s[2:]
130+ self.history.insert(0, s)
131+ else:
132+ self.history.append(s)
133+ return self._decoder()
134+
135+ def DecodeSubst(self) -> str:
136+ decodes = {DecodeSubst(x) for x in self.history}
137+ return ''.join(sorted(decodes, key=len, reverse=True))
138+
139+ def DecodeEsab32(self) -> str:
140+ history = {x.rstrip("0") for x in self.history}
141+
142+ # "Why don't we just mix up absolutely everything and see what happens?"
143+ # -- Ridcully, in Terry Pratchett's "The Hogfather"
144+ possibilities = []
145+ for attempt in itertools.permutations(history):
146+ acc, abits = DecodeEsab32(''.join(attempt))
147+ length = abits // 8
148+ if abits % 8:
149+ buf = acc.to_bytes(length+1, 'little')
150+ else:
151+ buf = acc.to_bytes(length, 'little')
152+ buf = buf[:length]
153+ if sum(isprintable(b) for b in buf) == length:
154+ # Yay it's probably okay
155+ possibilities.append(buf)
156+ # Well, we tried.
157+ if not possibilities:
158+ return quopri(buf)
159 else:
160- out.append(c.translate(SubstitutionXlat))
161- zeroBaby = False
162- return ''.join(out)
163+ return " | ".join(quopri(buf) for buf in possibilities)
164+
165
166-PayloadsByGuid = {}
167+DecodersByGuid = {}
168
169-def DecodeDomain(domain: str):
170+
171+def DecodeDomain(domain: str) -> (Guid, int, str):
172 s = domain.strip()
173 foundDomain = None
174 for d in knownDomains:
175@@ -110,54 +160,49 @@ def DecodeDomain(domain: str):
176 assert(s[-1] == '.')
177 s = s[:-1]
178
179+ if foundDomain == "avsvmcloud.com":
180+ return (None, None, "[Probably not a Sunburst domain]")
181 if len(s) < 16:
182- return (None, None, "[unable to decode: too short for any known decoding rules]")
183-
184- eguid = DecodeBsae32(s[:15])
185+ return (None, None, "[too short]")
186+
187+ dec, _ = DecodeEsab32(s[:15])
188+ eguid = dec.to_bytes(10, 'little')[:9]
189+ guid = int.from_bytes(xor(eguid[0:1], eguid[1:]), 'big')
190+
191 unknown_a = s[15]
192- guid = xor(eguid[0:1], eguid[1:])
193- s = s[16:]
194+ payload = s[16:]
195+
196+ decoder = DecodersByGuid.get(guid)
197+ if not decoder:
198+ decoder = DGADecoder(guid)
199+ DecodersByGuid[guid] = decoder
200+
201+ decoded = decoder.decode(payload)
202
203- payloads = PayloadsByGuid.setdefault(guid, [])
204- if s not in payloads:
205- if s.startswith("00"):
206- payloads.insert(0, s)
207- else:
208- payloads.append(s)
209- # People: friggin' preserve metadata, ugh.
210- # If I gave you every line of The Empire Strikes Back,
211- # sorted alphabetically, without timestamps,
212- # could you reconstruct the movie?
213- payloads.sort(key=len, reverse=True)
214-
215- payload = ''.join(payloads)
216-
217- if payload.startswith("00"):
218- buf = DecodeBsae32(payload[2:])
219- decoded = codecs.encode(buf, "quopri").decode("utf-8")
220- else:
221- decoded = DecodeSubst(payload)
222 return (guid, unknown_a, decoded)
223
224+
225 class TextReader:
226 def __init__(self, infile):
227 self.infile = infile
228 self.fieldnames = ["name"]
229-
230+
231 def __iter__(self):
232 for s in self.infile:
233 yield {"name": s.strip()}
234-
235+
236
237 class CsvReader:
238 def __init__(self, infile):
239 self.reader = csv.DictReader(infile)
240- self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"]
241+ self.fieldnames = self.reader.fieldnames + \
242+ ["guid", "unknown a", "decode"]
243
244 def __iter__(self):
245 for record in self.reader:
246 yield record
247
248+
249 def main():
250 parser = argparse.ArgumentParser(
251 description="Decode sunburst Domain Generation Algorithm (DGA) names")
252@@ -188,13 +233,14 @@ def main():
253 fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
254 writer = csv.DictWriter(args.outfile, fieldnames)
255 writer.writeheader()
256- for record in reader:
257+ for record in reader:
258 name = record.get("name") or record.get("fqdn")
259 guid, unknown_a, ptext = DecodeDomain(name)
260- record["guid"] = int.from_bytes(guid or b"", "big")
261+ record["guid"] = guid
262 record["unknown a"] = unknown_a
263 record["decode"] = ptext
264 writer.writerow(record)
265-
266+
267+
268 if __name__ == '__main__':
269 main()