sunburst-decoder

SUNBURST DGA decoder
git clone https://git.woozle.org/neale/sunburst-decoder.git

Neale Pickett  ·  2021-05-06

sunburst.py

  1#! /usr/bin/python3
  2
  3# Neale Pickett <neale@lanl.gov>
  4#
  5# Created: 2020-12-14 16:49:51
  6# Last-modified: 2021-05-06 11:09:55
  7#
  8# Based on work by @RedDrip7 (twitter),
  9# and Prevasio (https://blog.prevasio.com/2020/12/sunburst-backdoor-deeper-look-into.html)
 10
 11# This is public domain software. The public may copy, distribute, prepare derivative works and 
 12# publicly display this software without charge, provided that this Notice, the statement 
 13# of reserved government right, and any statement of authorship are reproduced on all copies. 
 14# If software is modified to produce derivative works, such modified software should 
 15# be clearly marked, so as not to confuse it with the version available from LANL.
 16
 17# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE. 
 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 21# NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY/NATIONAL 
 22# NUCLEAR SECURITY ADMINSTRATION, NOR Triad National Security, LLC.  NOR ANY OF THEIR 
 23# EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY 
 24# OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, 
 25# APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT 
 26# INFRINGE PRIVATELY OWNED RIGHTS.
 27
 28# IN NO EVENT SHALL THE U.S. GOVERNMENT OR ITS CONTRACTORS BE LIABLE FOR ANY DIRECT, 
 29# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
 30# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 31# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 32# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
 33# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
 34# OF THE POSSIBILITY OF SUCH DAMAGE.
 35
 36import argparse
 37import base64
 38import codecs
 39import csv
 40import itertools
 41import re
 42import sys
 43import time
 44
 45
 46knownDomains = [
 47    "appsync-api.us-east-1.avsvmcloud.com",
 48    "appsync-api.us-east-2.avsvmcloud.com",
 49    "appsync-api.us-west-2.avsvmcloud.com",
 50    "appsync-api.eu-west-1.avsvmcloud.com",
 51]
 52
 53
 54def xor(key, buf):
 55    return bytes(b ^ k for b, k in zip(buf, itertools.cycle(key)))
 56
 57
 58Esab32Alphabet = "ph2eifo3n5utg1j8d94qrvbmk0sal76c"
 59SubstitutionAlphabet = 'rq3gsalt6u1iyfzop572d49bnx8cvmkewhj'
 60SubstitutionAlphabet0 = '0_-.'
 61SequenceAlphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
 62
 63Apps = [
 64    "Windows Live OneCare / Windows Defender",
 65    "Windows Defender Advanced Threat Protection",
 66    "Microsoft Defender for Identity",
 67    "Carbon Black",
 68    "CrowdStrike",
 69    "FireEye",
 70    "ESET",
 71    "F-Secure",
 72]
 73
 74
 75def DecodeBase32(s: str):
 76    """Not used by sunburst.
 77
 78    If Sunburst actually used Base32, this would work to decode things.
 79    It doesn't work.
 80    """
 81
 82    trans = str.maketrans(Esab32Alphabet,
 83                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678")
 84    t = s.translate(trans)
 85    while len(t) % 8 > 0:
 86        t += '='
 87    return base64.b32decode(t)
 88
 89
 90def DecodeEsab32(s: str) -> (int, int):
 91    """Decode using big-endian base32 algorithm.
 92
 93    Returns a bigint, and the number of bits contained therein
 94    """
 95
 96    acc = bits = 0
 97    for c in s:
 98        try:
 99            p = Esab32Alphabet.index(c)
100        except ValueError:
101            raise RuntimeError(
102                "Not an Esab32 encoded character: %c in %s" % (c, s))
103        acc |= p << bits
104        bits += 5
105    return acc, bits
106
107
108def DecodeSubst(s: str) -> str:
109    alphabet = SubstitutionAlphabet
110    out = []
111    for c in s:
112        if c == '0':
113            # Use alternate alphabet next time
114            alphabet = SubstitutionAlphabet0
115            continue
116        try:
117            pos = (SubstitutionAlphabet.index(c) - 4) % len(alphabet)
118        except ValueError:
119            raise RuntimeError(
120                "Not a subst encoded character: %c in %s" % (c, s))
121        out.append(alphabet[pos])
122        alphabet = SubstitutionAlphabet
123    return "".join(out)
124
125
126Guid = int
127
128
129def isprintable(c: int) -> bool:
130    return c >= 0x20 and c <= 0x7f
131
132
133def quopri(buf: bytes) -> str:
134    return codecs.encode(buf, "quopri").decode("utf-8")
135
136
137class DGADecoder:
138    def __init__(self, guid: Guid):
139        self.guid = guid
140        self.history = []
141        self._decoder = self.DecodeSubst
142
143    def decode(self, s: str):
144        if s.startswith("00"):
145            self._decoder = self.DecodeEsab32
146            # We'll throw away the information about which is first,
147            # since we do a computationally intensive trick later to determine ordering
148            s = s[2:]
149            self.history.insert(0, s)
150        else:
151            self.history.append(s)
152        return self._decoder()
153
154    def DecodeSubst(self) -> str:
155        decodes = {DecodeSubst(x) for x in self.history}
156        return ''.join(sorted(decodes, key=len, reverse=True))
157
158    def DecodeEsab32(self) -> str:
159        history = {x.rstrip("0") for x in self.history}
160
161        # "Why don't we just mix up absolutely everything and see what happens?"
162        # -- Ridcully, in Terry Pratchett's "The Hogfather"
163        possibilities = []
164        for attempt in itertools.permutations(history):
165            acc, abits = DecodeEsab32(''.join(attempt))
166            length = abits // 8
167            if abits % 8:
168                buf = acc.to_bytes(length+1, 'little')
169            else:
170                buf = acc.to_bytes(length, 'little')
171            buf = buf[:length]
172            if sum(isprintable(b) for b in buf) == length:
173                # Yay it's probably okay
174                possibilities.append(buf)
175        # Well, we tried.
176        if not possibilities:
177            return quopri(buf)
178        else:
179            return " | ".join(quopri(buf) for buf in possibilities)
180
181
182DecodersByGuid = {}
183
184
185def DecodeDomain(domain: str) -> (Guid, int, str):
186    s = domain.strip()
187    foundDomain = None
188    for d in knownDomains:
189        if s.endswith(d):
190            foundDomain = d
191            break
192    if not foundDomain:
193        return (None, None, "[Probably not a Sunburst domain]")
194    s = s[:-len(foundDomain)]
195    if not s:
196        return (None, None, "[no data transmitted]")
197    assert(s[-1] == '.')
198    s = s[:-1]
199
200    if len(s) < 16:
201        return (None, None, "[too short]")
202
203    c0 = s.encode("ASCII")[0]
204    # https://blog.prevasio.com/2020/12/sunburst-backdoor-part-iii-dga-security.html
205    sequence = (c0 % 36) - SequenceAlphabet.index(s[15])
206
207    if 0 <= sequence < 3:
208        dec, _ = DecodeEsab32(s[:15])
209        eguid = dec.to_bytes(10, 'little')[:9]
210        guid = xor(eguid[0:1], eguid[1:])
211        payload = s[16:]
212        decoder = DecodersByGuid.get(guid)
213        if not decoder:
214            decoder = DGADecoder(guid)
215            DecodersByGuid[guid] = decoder
216        decoded = decoder.decode(payload)
217    else:
218        # https://blog.prevasio.com/2020/12/sunburst-backdoor-part-iii-dga-security.html
219        print(s, c0, sequence)
220        dec1num, bits = DecodeEsab32(s)
221        dec1len = bits // 8
222        dec1 = dec1num.to_bytes(dec1len+1, "little")[:dec1len]
223
224        dec2 = xor(dec1[:1], dec1[1:])
225
226        guid = xor(dec2[9:11], dec2[0:8])
227        tslen = int.from_bytes(dec2[8:11], "big")
228        length = tslen >> (8+8+4)
229        epoch = 1262329200 + ((tslen & 0xfffff) << 2) # 4s intervals since 2010-01-01
230        timestamp = time.gmtime(epoch)
231
232        print(dec2[8:], tslen, length, timestamp)
233        state = int.from_bytes(dec2[15:17], "big")
234        decodedStrings = []
235        for i in range(len(Apps)):
236            appState = state >> (i * 2)
237            app = Apps[i]
238            appStateString = []
239            if appState & 0b01:
240                appStateString.append("running")
241            if appState & 0b10:
242                appStateString.append("stopped")
243            if appStateString:
244                decodedStrings.append("%s [%s]" % (app, ",".join(appStateString)))
245        decoded = "\n".join(decodedStrings)
246
247    return (guid, sequence, decoded)
248
249
250class TextReader:
251    def __init__(self, infile):
252        self.infile = infile
253        self.fieldnames = ["name"]
254
255    def __iter__(self):
256        for s in self.infile:
257            yield {"name": s.strip()}
258
259
260class CsvReader:
261    def __init__(self, infile):
262        self.reader = csv.DictReader(infile)
263        self.fieldnames = self.reader.fieldnames
264
265    def __iter__(self):
266        for record in self.reader:
267            yield record
268
269
270def main():
271    parser = argparse.ArgumentParser(
272        description="Decode sunburst Domain Generation Algorithm (DGA) names")
273    parser.add_argument("--text", dest="input", action="store_const", const="text",
274                        help="Parse bambenek-style: list of fqdns, one per line")
275    parser.add_argument("--csv", dest="input", action="store_const", const="csv",
276                        help="Parse CSV: records must be in a 'name' or 'fqdn' field")
277    parser.add_argument("infile", nargs="?",
278                        type=argparse.FileType("r"), default=sys.stdin)
279    parser.add_argument("--outfile", nargs="?",
280                        type=argparse.FileType("w"), default=sys.stdout,
281                        help="CSV file to write (default stdout)")
282    args = parser.parse_args()
283
284    reader = None
285    if args.input == "text":
286        reader = TextReader(args.infile)
287    elif args.input == "csv":
288        reader = CsvReader(args.infile)
289    elif args.infile.name.endswith(".txt"):
290        reader = TextReader(args.infile)
291    elif args.infile.name.endswith(".csv"):
292        reader = CsvReader(args.infile)
293    else:
294        parser.print_help()
295        return
296
297    fieldnames = reader.fieldnames + ["guid", "sequence", "decode"]
298    writer = csv.DictWriter(args.outfile, fieldnames)
299    writer.writeheader()
300    for record in reader:
301        name = record.get("name") or record.get("fqdn")
302        guid, sequence, ptext = DecodeDomain(name)
303        record["guid"] = int.from_bytes(guid, "big")
304        record["sequence"] = sequence
305        record["decode"] = ptext
306        writer.writerow(record)
307
308
309if __name__ == '__main__':
310    main()