subst reconstitution is broken - sunburst-decoder@7390cb6

commit: 7390cb6
parent: 7390cb6
author: Neale Pickett
date: 2020-12-22 17:59:54 -0700 MST

subst reconstitution is broken

4 files changed, +236, -0

A .gitignore

A Makefile

A NOTES.md

A sunburst.py

A .gitignore

+2, -0

1@@ -0,0 +1,2 @@
2+decodes
3+data

A Makefile

+10, -0

 1@@ -0,0 +1,10 @@
 2+TARGETS += $(patsubst data/%.txt, decodes/%.csv, $(wildcard data/*.txt))
 3+TARGETS += $(patsubst data/%.csv, decodes/%.csv, $(wildcard data/*.csv))
 4+
 5+all: $(TARGETS)
 6+
 7+decodes/%.csv: data/%.txt sunburst.py
 8+	./sunburst.py --outfile $@ $<
 9+
10+decodes/%.csv: data/%.csv sunburst.py
11+	./sunburst.py --outfile $@ $<

A NOTES.md

+24, -0

 1@@ -0,0 +1,24 @@
 2+Stuff I still can't decode
 3+====================
 4+
 5+A lot of these things end in `.local`.
 6+
 7+* fidelitycomm.lo|cal
 8+* milledgeville.l|ocal
 9+* cs.haystax.loc|al
10+* signaturebank.l|ocal
11+* vantagedatacente|rs.local
12+
13+I bet these end in `.local` too:
14+
15+* ABLE.loc|7l
16+* ETC1.loc|7l
17+* FVF.loca|m
18+* MOC.loca|m
19+* FSAR.LOC|7f
20+
21+`7l = al`
22+`7f = AL`
23+`m = l`
24+
25+How does `SCMRI.lo|ujjc` become `SCMRI.local`?

A sunburst.py

+200, -0

  1@@ -0,0 +1,200 @@
  2+#! /usr/bin/python3
  3+
  4+# Neale Pickett <neale@lanl.gov>
  5+# Unclassified/FOUO
  6+#
  7+# Created: 2020-12-14 16:49:51
  8+# Last-modified: 2020-12-22 17:57:54
  9+#
 10+# Based on work by @RedDrip7 (twitter),
 11+# who should be getting more credit in the English-speaking world.
 12+
 13+import argparse
 14+import base64
 15+import codecs
 16+import csv
 17+import itertools
 18+import re
 19+import sys
 20+
 21+
 22+knownDomains = [
 23+    "appsync-api.us-east-1.avsvmcloud.com",
 24+    "appsync-api.us-east-2.avsvmcloud.com",
 25+    "appsync-api.us-west-2.avsvmcloud.com",
 26+    "appsync-api.eu-west-1.avsvmcloud.com",
 27+    "avsvmcloud.com",
 28+]
 29+
 30+def xor(key, buf):
 31+    return bytes(b^k for b,k in zip(buf, itertools.cycle(key)))
 32+
 33+Bsae32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
 34+
 35+
 36+def DecodeBase32(s: str):
 37+    """Not used by sunburst.
 38+
 39+    If Sunburst actually used Base32, this would work to decode things.
 40+    It doesn't work.
 41+    """
 42+
 43+    t = s.translate(Bsae32Alphabet,
 44+                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
 45+    while len(t) % 8 > 0:
 46+        t += '='
 47+    return base64.b32decode(t)
 48+
 49+
 50+def DecodeBsae32(s: str):
 51+    """Decode using zany base32-like algorithm.
 52+
 53+    The following opinion has been formed hastily and could be misinformed:
 54+
 55+    This is not proper Base32. It's more like somebody read about Base32,
 56+    implemented an encoder and decoder incorrectly, and went on to the next task.
 57+    """
 58+
 59+    bits = 0
 60+    acc = 0
 61+    for c in s:
 62+        acc |= Bsae32Alphabet.find(c) << bits
 63+        bits += 5
 64+
 65+    out8 = []
 66+    while bits > 0:
 67+        out8.append(acc & 255)
 68+        acc >>= 8
 69+        bits -= 8
 70+
 71+    if bits:
 72+        del out8[-1]
 73+    return bytes(out8)
 74+
 75+
 76+SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
 77+SubstitutionXlat = str.maketrans(
 78+    SubstitutionAlphabet[4:] + SubstitutionAlphabet[:4], SubstitutionAlphabet)
 79+SubstitutionXlat0 = str.maketrans(
 80+    SubstitutionAlphabet, ('0_-.' * 9)[:len(SubstitutionAlphabet)])
 81+
 82+
 83+def DecodeSubst(s: str):
 84+    zeroBaby = False
 85+    out = []
 86+    for c in s:
 87+        if c == '0':
 88+            zeroBaby = True
 89+            continue
 90+        if zeroBaby:
 91+            out.append(c.translate(SubstitutionXlat0))
 92+        else:
 93+            out.append(c.translate(SubstitutionXlat))
 94+        zeroBaby = False
 95+    return ''.join(out)
 96+
 97+PayloadsByGuid = {}
 98+
 99+def DecodeDomain(domain: str):
100+    s = domain.strip()
101+    foundDomain = None
102+    for d in knownDomains:
103+        if s.endswith(d):
104+            foundDomain = d
105+            break
106+    if not foundDomain:
107+        raise RuntimeError("Can't find domain for %s" % s)
108+    s = s[:-len(foundDomain)]
109+    if not s:
110+        return (None, None, "[no data transmitted]")
111+    assert(s[-1] == '.')
112+    s = s[:-1]
113+
114+    if len(s) < 16:
115+        return (None, None, "[unable to decode: too short for any known decoding rules]")
116+    
117+    eguid = DecodeBsae32(s[:15])
118+    unknown_a = s[15]
119+    guid = xor(eguid[0:1], eguid[1:])
120+    s = s[16:]
121+
122+    payloads = PayloadsByGuid.setdefault(guid, [])
123+    if s not in payloads:
124+        if s.startswith("00"):
125+            payloads.insert(0, s)
126+        else:
127+            payloads.append(s)
128+            # People: friggin' preserve metadata, ugh. 
129+            # If I gave you every line of The Empire Strikes Back, 
130+            # sorted alphabetically, without timestamps,
131+            # could you reconstruct the movie?
132+            payloads.sort(key=len, reverse=True)
133+
134+    payload = ''.join(payloads)
135+
136+    if payload.startswith("00"):
137+        buf = DecodeBsae32(payload[2:])
138+        decoded = codecs.encode(buf, "quopri").decode("utf-8")
139+    else:
140+        decoded = DecodeSubst(payload)
141+    return (guid, unknown_a, decoded)
142+
143+class TextReader:
144+    def __init__(self, infile):
145+        self.infile = infile
146+        self.fieldnames = ["name"]
147+        
148+    def __iter__(self):
149+        for s in self.infile:
150+            yield {"name": s.strip()}
151+    
152+
153+class CsvReader:
154+    def __init__(self, infile):
155+        self.reader = csv.DictReader(infile)
156+        self.fieldnames = self.reader.fieldnames + ["guid", "unknown a", "decode"]
157+
158+    def __iter__(self):
159+        for record in self.reader:
160+            yield record
161+
162+def main():
163+    parser = argparse.ArgumentParser(
164+        description="Decode sunburst Domain Generation Algorithm (DGA) names")
165+    parser.add_argument("--text", dest="input", action="store_const", const="text",
166+                        help="Parse bambenek-style: list of fqdns, one per line")
167+    parser.add_argument("--csv", dest="input", action="store_const", const="csv",
168+                        help="Parse CSV: records must be in a 'name' or 'fqdn' field")
169+    parser.add_argument("infile", nargs="?",
170+                        type=argparse.FileType("r"), default=sys.stdin)
171+    parser.add_argument("--outfile", nargs="?",
172+                        type=argparse.FileType("w"), default=sys.stdout,
173+                        help="CSV file to write (default stdout)")
174+    args = parser.parse_args()
175+
176+    reader = None
177+    if args.input == "text":
178+        reader = TextReader(args.infile)
179+    elif args.input == "csv":
180+        reader = CsvReader(args.infile)
181+    elif args.infile.name.endswith(".txt"):
182+        reader = TextReader(args.infile)
183+    elif args.infile.name.endswith(".csv"):
184+        reader = CsvReader(args.infile)
185+    else:
186+        parser.print_help()
187+        return
188+
189+    fieldnames = reader.fieldnames + ["guid", "unknown a", "decode"]
190+    writer = csv.DictWriter(args.outfile, fieldnames)
191+    writer.writeheader()
192+    for record in reader:        
193+        name = record.get("name") or record.get("fqdn")
194+        guid, unknown_a, ptext = DecodeDomain(name)
195+        record["guid"] = int.from_bytes(guid or b"", "big")
196+        record["unknown a"] = unknown_a
197+        record["decode"] = ptext
198+        writer.writerow(record)
199+    
200+if __name__ == '__main__':
201+    main()