- commit
- 7390cb6
- parent
- 7390cb6
- author
- Neale Pickett
- date
- 2020-12-22 17:59:54 -0700 MST
subst reconstitution is broken
4 files changed,
+236,
-0
+2,
-0
1@@ -0,0 +1,2 @@
2+decodes
3+data
A
Makefile
+10,
-0
1@@ -0,0 +1,10 @@
2+TARGETS += $(patsubst data/%.txt, decodes/%.csv, $(wildcard data/*.txt))
3+TARGETS += $(patsubst data/%.csv, decodes/%.csv, $(wildcard data/*.csv))
4+
5+all: $(TARGETS)
6+
7+decodes/%.csv: data/%.txt sunburst.py
8+ ./sunburst.py --outfile $@ $<
9+
10+decodes/%.csv: data/%.csv sunburst.py
11+ ./sunburst.py --outfile $@ $<
A
NOTES.md
+24,
-0
1@@ -0,0 +1,24 @@
2+Stuff I still can't decode
3+====================
4+
5+A lot of these things end in `.local`.
6+
7+* fidelitycomm.lo|cal
8+* milledgeville.l|ocal
9+* cs.haystax.loc|al
10+* signaturebank.l|ocal
11+* vantagedatacente|rs.local
12+
13+I bet these end in `.local` too:
14+
15+* ABLE.loc|7l
16+* ETC1.loc|7l
17+* FVF.loca|m
18+* MOC.loca|m
19+* FSAR.LOC|7f
20+
21+`7l = al`
22+`7f = AL`
23+`m = l`
24+
25+How does `SCMRI.lo|ujjc` become `SCMRI.local`?
+200,
-0
1@@ -0,0 +1,200 @@
2+#! /usr/bin/python3
3+
4+# Neale Pickett <neale@lanl.gov>
5+# Unclassified/FOUO
6+#
7+# Created: 2020-12-14 16:49:51
8+# Last-modified: 2020-12-22 17:57:54
9+#
10+# Based on work by @RedDrip7 (twitter),
11+# who should be getting more credit in the English-speaking world.
12+
13+import argparse
14+import base64
15+import codecs
16+import csv
17+import itertools
18+import re
19+import sys
20+
21+
22+knownDomains = [
23+ "appsync-api.us-east-1.avsvmcloud.com",
24+ "appsync-api.us-east-2.avsvmcloud.com",
25+ "appsync-api.us-west-2.avsvmcloud.com",
26+ "appsync-api.eu-west-1.avsvmcloud.com",
27+ "avsvmcloud.com",
28+]
29+
30+def xor(key, buf):
31+ return bytes(b^k for b,k in zip(buf, itertools.cycle(key)))
32+
33+Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
34+
35+
36+def DecodeBase32(s: str):
37+ """Not used by sunburst.
38+
39+ If Sunburst actually used Base32, this would work to decode things.
40+ It doesn't work.
41+ """
42+
43+ t = s.translate(Bsae32Alphabet,
44+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
45+ while len(t) % 8 > 0:
46+ t += '='
47+ return base64.b32decode(t)
48+
49+
50+def DecodeBsae32(s: str):
51+ """Decode using zany base32-like algorithm.
52+
53+ The following opinion has been formed hastily and could be misinformed:
54+
55+ This is not proper Base32. It's more like somebody read about Base32,
56+ implemented an encoder and decoder incorrectly, and went on to the next task.
57+ """
58+
59+ bits = 0
60+ acc = 0
61+ for c in s:
62+ acc |= Bsae32Alphabet.find(c) << bits
63+ bits += 5
64+
65+ out8 = []
66+ while bits > 0:
67+ out8.append(acc & 255)
68+ acc >>= 8
69+ bits -= 8
70+
71+ if bits:
72+ del out8[-1]
73+ return bytes(out8)
74+
75+
76+SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
77+SubstitutionXlat = str.maketrans(
78+ SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
79+SubstitutionXlat0 = str.maketrans(
80+ SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
81+
82+
83+def DecodeSubst(s: str):
84+ zeroBaby = False
85+ out = []
86+ for c in s:
87+ if c == '0':
88+ zeroBaby = True
89+ continue
90+ if zeroBaby:
91+ out.append(c.translate(SubstitutionXlat0))
92+ else:
93+ out.append(c.translate(SubstitutionXlat))
94+ zeroBaby = False
95+ return ''.join(out)
96+
97+PayloadsByGuid = {}
98+
99+def DecodeDomain(domain: str):
100+ s = domain.strip()
101+ foundDomain = None
102+ for d in knownDomains:
103+ if s.endswith(d):
104+ foundDomain = d
105+ break
106+ if not foundDomain:
107+ raise RuntimeError("Can't find domain for %s" % s)
108+ s = s[:-len(foundDomain)]
109+ if not s:
110+ return (None, None, "[no data transmitted]")
111+ assert(s[-1] == '.')
112+ s = s[:-1]
113+
114+ if len(s) < 16:
115+ return (None, None, "[unable to decode: too short for any known decoding rules]")
116+
117+ eguid = DecodeBsae32(s[:15])
118+ unknown_a = s[15]
119+ guid = xor(eguid[0:1], eguid[1:])
120+ s = s[16:]
121+
122+ payloads = PayloadsByGuid.setdefault(guid, [])
123+ if s not in payloads:
124+ if s.startswith("00"):
125+ payloads.insert(0, s)
126+ else:
127+ payloads.append(s)
128+ # People: friggin' preserve metadata, ugh.
129+ # If I gave you every line of The Empire Strikes Back,
130+ # sorted alphabetically, without timestamps,
131+ # could you reconstruct the movie?
132+ payloads.sort(key=len, reverse=True)
133+
134+ payload = ''.join(payloads)
135+
136+ if payload.startswith("00"):
137+ buf = DecodeBsae32(payload[2:])
138+ decoded = codecs.encode(buf, "quopri").decode("utf-8")
139+ else:
140+ decoded = DecodeSubst(payload)
141+ return (guid, unknown_a, decoded)
142+
143+class TextReader:
144+ def __init__(self, infile):
145+ self.infile = infile
146+ self.fieldnames = ["name"]
147+
148+ def __iter__(self):
149+ for s in self.infile:
150+ yield {"name": s.strip()}
151+
152+
153+class CsvReader:
154+ def __init__(self, infile):
155+ self.reader = csv.DictReader(infile)
156+ self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"]
157+
158+ def __iter__(self):
159+ for record in self.reader:
160+ yield record
161+
162+def main():
163+ parser = argparse.ArgumentParser(
164+ description="Decode sunburst Domain Generation Algorithm (DGA) names")
165+ parser.add_argument("--text", dest="input", action="store_const", const="text",
166+ help="Parse bambenek-style: list of fqdns, one per line")
167+ parser.add_argument("--csv", dest="input", action="store_const", const="csv",
168+ help="Parse CSV: records must be in a 'name' or 'fqdn' field")
169+ parser.add_argument("infile", nargs="?",
170+ type=argparse.FileType("r"), default=sys.stdin)
171+ parser.add_argument("--outfile", nargs="?",
172+ type=argparse.FileType("w"), default=sys.stdout,
173+ help="CSV file to write (default stdout)")
174+ args = parser.parse_args()
175+
176+ reader = None
177+ if args.input == "text":
178+ reader = TextReader(args.infile)
179+ elif args.input == "csv":
180+ reader = CsvReader(args.infile)
181+ elif args.infile.name.endswith(".txt"):
182+ reader = TextReader(args.infile)
183+ elif args.infile.name.endswith(".csv"):
184+ reader = CsvReader(args.infile)
185+ else:
186+ parser.print_help()
187+ return
188+
189+ fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
190+ writer = csv.DictWriter(args.outfile, fieldnames)
191+ writer.writeheader()
192+ for record in reader:
193+ name = record.get("name") or record.get("fqdn")
194+ guid, unknown_a, ptext = DecodeDomain(name)
195+ record["guid"] = int.from_bytes(guid or b"", "big")
196+ record["unknown a"] = unknown_a
197+ record["decode"] = ptext
198+ writer.writerow(record)
199+
200+if __name__ == '__main__':
201+ main()