netshovel

Network Archaeology library for Go
git clone https://git.woozle.org/neale/netshovel.git

netshovel / gapstring
Neale Pickett  ·  2018-07-24

gapstring.go

  1/*
  2package netshovel/gapstring provides a GapString type,
  3which represents a byte array with gaps: holes with no data.
  4This is used by netshovel to represent
  5captured data streams with drops.
  6
  7Gaps are represented efficiently,
  8both in memory and in computation.
  9Several convenience functions exist
 10which operate on GapString data,
 11while preserving the gaps.
 12*/
 13package gapstring
 14
 15import (
 16	"bytes"
 17	"encoding/binary"
 18	"fmt"
 19	"strings"
 20	"unicode/utf16"
 21)
 22
 23// XXX: I think there's a clever way to do this with interfaces
 24// XXX: But I'm too exhausted to figure it out.
 25// XXX: I'll have to fix it later; it doesn't matter much for performance
 26
 27type chunk struct {
 28	gap  int // This takes precedence over data
 29	data []byte
 30}
 31
 32func (c chunk) length() int {
 33	if c.gap > 0 {
 34		return c.gap
 35	} else {
 36		return len(c.data)
 37	}
 38}
 39
 40func (c chunk) missing() int {
 41	return c.gap
 42}
 43
 44func (c chunk) slice(a, b int) chunk {
 45	if b > c.length() {
 46		panic("runtime error: index out of range")
 47	}
 48	if c.gap > 0 {
 49		return chunk{gap: b - a}
 50	} else {
 51		return chunk{data: c.data[a:b]}
 52	}
 53}
 54
 55// A GapString is a string with gaps of no data in the middle
 56type GapString struct {
 57	chunks []chunk
 58}
 59
 60// Return a new zero-length GapString
 61func New() GapString {
 62	return GapString{
 63		chunks: []chunk{},
 64	}
 65}
 66
 67// Return a new GapString containing a gap
 68func OfGap(gap int) GapString {
 69	return GapString{
 70		chunks: []chunk{{gap: gap}},
 71	}
 72}
 73
 74// Return a new GapString containing some bytes
 75func OfBytes(b []byte) GapString {
 76	return GapString{
 77		chunks: []chunk{{data: b}},
 78	}
 79}
 80
 81// Return a new GapString containing a string
 82func OfString(s string) GapString {
 83	return OfBytes([]byte(s))
 84}
 85
 86// Return the length of a GapString
 87//
 88// This is the number of bytes you would have if the gaps were filled with some value.
 89func (g GapString) Length() int {
 90	n := 0
 91	for _, c := range g.chunks {
 92		n += c.length()
 93	}
 94	return n
 95}
 96
 97// Return the total size of all gaps
 98func (g GapString) Missing() int {
 99	n := 0
100	for _, c := range g.chunks {
101		n += c.missing()
102	}
103	return n
104}
105
106// Return the current GapString with another GapString appended
107func (g GapString) Append(h GapString) GapString {
108	if h.Length() > 0 {
109		return GapString{
110			chunks: append(g.chunks, h.chunks...),
111		}
112	} else {
113		return g
114	}
115}
116
117// Return the current GapString with a gap appended
118func (g GapString) AppendGap(gap int) GapString {
119	return g.Append(OfGap(gap))
120}
121
122// Return the current GapString with some bytes appended
123func (g GapString) AppendBytes(b []byte) GapString {
124	return g.Append(OfBytes(b))
125}
126
127// Return the current GapString with a string appended
128func (g GapString) AppendString(s string) GapString {
129	return g.Append(OfString(s))
130}
131
132// Return a slice of this GapString
133//
134// This is what you would expect from g[start:end],
135// if g were a string or byte slice.
136func (g GapString) Slice(start, end int) GapString {
137	outchunks := make([]chunk, 0, len(g.chunks))
138
139	if end > g.Length() {
140		panic("runtime error: slice bounds out of range")
141	}
142
143	for _, c := range g.chunks {
144		chunklen := c.length()
145
146		// Discard chunks that appear before the first
147		if start > chunklen {
148			start -= chunklen
149			end -= chunklen
150			continue
151		}
152
153		// Append chunks until we're done
154		cend := chunklen
155		if cend > end {
156			cend = end
157		}
158		if start != cend {
159			outchunks = append(outchunks, c.slice(start, cend))
160		}
161		start = 0
162		end -= cend
163
164		if end == 0 {
165			break
166		}
167	}
168
169	return GapString{chunks: outchunks}
170}
171
172// Return this GapString with the provided xor mask applied
173//
174// The mask is cycled for the length of the GapString.
175func (g GapString) Xor(mask ...byte) GapString {
176	ret := GapString{}
177	pos := 0
178	for _, c := range g.chunks {
179		ret = ret.AppendGap(c.gap)
180
181		out := make([]byte, len(c.data))
182		for i, b := range c.data {
183			m := mask[(pos+i)%len(mask)]
184			out[i] = b ^ m
185		}
186		ret = ret.AppendBytes(out)
187
188		pos += c.length()
189	}
190	return ret
191}
192
193// Return this GapString with gaps filled in
194func (g GapString) Bytes(fill ...byte) []byte {
195	ret := make([]byte, g.Length())
196	pos := 0
197	for _, c := range g.chunks {
198		// Fill in gap
199		if len(fill) > 0 {
200			for i := 0; i < c.gap; i += 1 {
201				ret[pos] = fill[pos%len(fill)]
202				pos += 1
203			}
204		}
205		// Fill in bytes
206		for _, b := range c.data {
207			ret[pos] = b
208			pos += 1
209		}
210	}
211	ret = ret[0:pos]
212	return ret
213}
214
215// Returns the value at a specific position
216//
217// This returns the byte if one is present, or -1 if it's a gap
218func (g GapString) ValueAt(pos int) int {
219	v := g.Slice(pos, pos+1)
220	if v.chunks[0].gap > 0 {
221		return -1
222	} else {
223		return int(v.chunks[0].data[0])
224	}
225}
226
227// Return a string version of the GapString, with gaps filled in
228func (g GapString) String(fill string) string {
229	return string(g.Bytes([]byte(fill)...))
230}
231
232// Return a hex representation of this GapString
233//
234// Each octet is space-separated, and gaps are represented with "--"
235func (g GapString) HexString() string {
236	out := new(strings.Builder)
237	glen := g.Length()
238	for i := 0; i < glen; i += 1 {
239		c := g.ValueAt(i)
240		if c == -1 {
241			out.WriteString("--")
242		} else {
243			// There's probably a faster way to do this. Do we care?
244			fmt.Fprintf(out, "%02x", c)
245		}
246		if i+1 < glen {
247			out.WriteRune(' ')
248			if i%8 == 7 {
249				out.WriteRune(' ')
250			}
251		}
252	}
253	return out.String()
254}
255
256var fluffych = []rune{
257	'·', '☺', '☻', '♥', '♦', '♣', '♠', '•', '◘', '○', '◙', '♂', '♀', '♪', '♫', '☼',
258	'►', '◄', '↕', '‼', '¶', '§', '▬', '↨', '↑', '↓', '→', '←', '∟', '↔', '▲', '▼',
259	' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
260	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
261	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
262	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
263	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
264	'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '⌂',
265	'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å',
266	'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', 'ÿ', 'Ö', 'Ü', '¢', '£', '¥', '₧', 'ƒ',
267	'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', '¿', '⌐', '¬', '½', '¼', '¡', '«', '»',
268	'░', '▒', '▓', '│', '┤', '╡', '╢', '╖', '╕', '╣', '║', '╗', '╝', '╜', '╛', '┐',
269	'└', '┴', '┬', '├', '─', '┼', '╞', '╟', '╚', '╔', '╩', '╦', '╠', '═', '╬', '╧',
270	'╨', '╤', '╥', '╙', '╘', '╒', '╓', '╫', '╪', '┘', '┌', '█', '▄', '▌', '▐', '▀',
271	'α', 'ß', 'Γ', 'π', 'Σ', 'σ', 'µ', 'τ', 'Φ', 'Θ', 'Ω', 'δ', '∞', 'φ', 'ε', '∩',
272	'≡', '±', '≥', '≤', '⌠', '⌡', '÷', '≈', '°', '∀', '∃', '√', 'ⁿ', '²', '■', '¤',
273}
274
275// Return a rune representation of this GapString
276//
277// This uses the glyph set from the Fluffy toolkit
278// (https://dirtbags.github.io/fluffy/).
279// Gaps are represented with the rune '�'
280func (g GapString) Runes() string {
281	out := new(strings.Builder)
282	glen := g.Length()
283	for i := 0; i < glen; i += 1 {
284		c := g.ValueAt(i)
285		if c == -1 {
286			out.WriteRune('�')
287		} else {
288			out.WriteRune(fluffych[c])
289		}
290	}
291	return out.String()
292}
293
294// Return a hex dump of this GapString
295func (g GapString) Hexdump() string {
296	out := new(strings.Builder)
297	skipping := false
298	glen := g.Length()
299	pos := 0
300	prev := []byte{}
301	for pos < glen {
302		// Check for repeats
303		end := pos + 16
304		if end > glen {
305			end = glen
306		}
307		cur := g.Slice(pos, end)
308		curBytes := cur.Bytes()
309		if 0 == bytes.Compare(prev, curBytes) {
310			if !skipping {
311				fmt.Fprintln(out, "*")
312				skipping = true
313			}
314			continue
315		}
316
317		fmt.Fprintf(out, "%08x  ", pos)
318		fmt.Fprintf(out, "%-50s", cur.HexString())
319		fmt.Fprintln(out, cur.Runes())
320
321		pos += cur.Length()
322	}
323	fmt.Fprintf(out, "%08x\n", pos)
324
325	return out.String()
326}
327
328// Return a uint32, little-endian, taken from the front of this GapString
329//
330// The rest of the GapString is returned as the second argument.
331func (g GapString) Uint32LE() (uint32, GapString) {
332	return binary.LittleEndian.Uint32(g.Slice(0, 4).Bytes(0)), g.Slice(4, g.Length())
333}
334
335// Return a uint16, little-endian, taken from the front of this GapString
336//
337// The rest of the GapString is returned as the second argument.
338func (g GapString) Uint16LE() (uint16, GapString) {
339	return binary.LittleEndian.Uint16(g.Slice(0, 2).Bytes(0)), g.Slice(2, g.Length())
340}
341
342// Return this GapString decoded as UTF-16
343func (g GapString) Utf16(order binary.ByteOrder, fill string) string {
344	in := g.Bytes([]byte(fill)...)
345	ints := make([]uint16, len(in)/2)
346
347	for i := 0; i < len(in); i += 2 {
348		ints[i/2] = order.Uint16(in[i:])
349	}
350	return string(utf16.Decode(ints))
351}
352
353// Return this GapString decoded as UTF-16 Little Endian
354//
355// This format is used extensively in Microsoft Windows.
356func (g GapString) Utf16LE(gap string) string {
357	return g.Utf16(binary.LittleEndian, gap)
358}
359
360// Return this GapString decoded as UTF-16 Big Endian
361func (g GapString) Utf16BE(gap string) string {
362	return g.Utf16(binary.BigEndian, gap)
363}