netshovel/gapstring/gapstring.go

364 lines
8.9 KiB
Go
Raw Permalink Normal View History

2018-07-24 17:47:57 -06:00
/*
package netshovel/gapstring provides a GapString type,
which represents a byte array with gaps: holes with no data.
This is used by netshovel to represent
captured data streams with drops.
Gaps are represented efficiently,
both in memory and in computation.
Several convenience functions exist
which operate on GapString data,
while preserving the gaps.
*/
2018-07-19 21:29:26 -06:00
package gapstring
import (
2018-07-23 17:48:14 -06:00
"bytes"
2018-07-19 21:29:26 -06:00
"encoding/binary"
2018-07-24 17:53:06 -06:00
"fmt"
2018-07-19 21:29:26 -06:00
"strings"
"unicode/utf16"
)
// XXX: I think there's a clever way to do this with interfaces
// XXX: But I'm too exhausted to figure it out.
// XXX: I'll have to fix it later; it doesn't matter much for performance
type chunk struct {
2018-07-24 17:53:06 -06:00
gap int // This takes precedence over data
2018-07-19 21:29:26 -06:00
data []byte
}
func (c chunk) length() int {
if c.gap > 0 {
return c.gap
} else {
return len(c.data)
}
}
func (c chunk) missing() int {
return c.gap
}
func (c chunk) slice(a, b int) chunk {
if b > c.length() {
panic("runtime error: index out of range")
}
if c.gap > 0 {
return chunk{gap: b - a}
} else {
return chunk{data: c.data[a:b]}
}
}
2018-07-24 17:32:08 -06:00
// A GapString is a string with gaps of no data in the middle
2018-07-19 21:29:26 -06:00
type GapString struct {
chunks []chunk
}
2018-07-24 17:32:08 -06:00
// Return a new zero-length GapString
2018-07-19 21:29:26 -06:00
func New() GapString {
return GapString{
chunks: []chunk{},
}
}
2018-07-24 17:32:08 -06:00
// Return a new GapString containing a gap
2018-07-19 21:29:26 -06:00
func OfGap(gap int) GapString {
return GapString{
chunks: []chunk{{gap: gap}},
}
}
2018-07-24 17:32:08 -06:00
// Return a new GapString containing some bytes
2018-07-19 21:29:26 -06:00
func OfBytes(b []byte) GapString {
return GapString{
chunks: []chunk{{data: b}},
}
}
2018-07-24 17:32:08 -06:00
// Return a new GapString containing a string
2018-07-19 21:29:26 -06:00
func OfString(s string) GapString {
return OfBytes([]byte(s))
}
2018-07-24 17:32:08 -06:00
// Return the length of a GapString
//
// This is the number of bytes you would have if the gaps were filled with some value.
2018-07-19 21:29:26 -06:00
func (g GapString) Length() int {
n := 0
for _, c := range g.chunks {
n += c.length()
}
return n
}
2018-07-24 17:32:08 -06:00
// Return the total size of all gaps
2018-07-19 21:29:26 -06:00
func (g GapString) Missing() int {
n := 0
for _, c := range g.chunks {
n += c.missing()
}
return n
}
2018-07-24 17:32:08 -06:00
// Return the current GapString with another GapString appended
2018-07-19 21:29:26 -06:00
func (g GapString) Append(h GapString) GapString {
if h.Length() > 0 {
return GapString{
chunks: append(g.chunks, h.chunks...),
}
} else {
return g
}
}
2018-07-24 17:32:08 -06:00
// Return the current GapString with a gap appended
2018-07-19 21:29:26 -06:00
func (g GapString) AppendGap(gap int) GapString {
return g.Append(OfGap(gap))
}
2018-07-24 17:32:08 -06:00
// Return the current GapString with some bytes appended
2018-07-19 21:29:26 -06:00
func (g GapString) AppendBytes(b []byte) GapString {
return g.Append(OfBytes(b))
}
2018-07-24 17:32:08 -06:00
// Return the current GapString with a string appended
2018-07-19 21:29:26 -06:00
func (g GapString) AppendString(s string) GapString {
return g.Append(OfString(s))
}
2018-07-24 17:32:08 -06:00
// Return a slice of this GapString
//
// This is what you would expect from g[start:end],
// if g were a string or byte slice.
2018-07-19 21:29:26 -06:00
func (g GapString) Slice(start, end int) GapString {
outchunks := make([]chunk, 0, len(g.chunks))
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
if end > g.Length() {
panic("runtime error: slice bounds out of range")
}
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
for _, c := range g.chunks {
chunklen := c.length()
// Discard chunks that appear before the first
if start > chunklen {
start -= chunklen
end -= chunklen
continue
}
// Append chunks until we're done
cend := chunklen
if cend > end {
cend = end
}
if start != cend {
outchunks = append(outchunks, c.slice(start, cend))
}
start = 0
end -= cend
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
if end == 0 {
break
}
}
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
return GapString{chunks: outchunks}
}
2018-07-24 17:32:08 -06:00
// Return this GapString with the provided xor mask applied
//
// The mask is cycled for the length of the GapString.
2018-07-19 21:29:26 -06:00
func (g GapString) Xor(mask ...byte) GapString {
ret := GapString{}
pos := 0
for _, c := range g.chunks {
ret = ret.AppendGap(c.gap)
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
out := make([]byte, len(c.data))
for i, b := range c.data {
m := mask[(pos+i)%len(mask)]
out[i] = b ^ m
}
ret = ret.AppendBytes(out)
pos += c.length()
}
return ret
}
2018-07-24 17:32:08 -06:00
// Return this GapString with gaps filled in
2018-07-19 21:29:26 -06:00
func (g GapString) Bytes(fill ...byte) []byte {
ret := make([]byte, g.Length())
pos := 0
for _, c := range g.chunks {
// Fill in gap
if len(fill) > 0 {
for i := 0; i < c.gap; i += 1 {
2018-07-24 17:53:06 -06:00
ret[pos] = fill[pos%len(fill)]
2018-07-19 21:29:26 -06:00
pos += 1
}
}
// Fill in bytes
for _, b := range c.data {
ret[pos] = b
pos += 1
}
}
ret = ret[0:pos]
return ret
}
2018-07-24 17:32:08 -06:00
// Returns the value at a specific position
//
// This returns the byte if one is present, or -1 if it's a gap
2018-07-19 21:29:26 -06:00
func (g GapString) ValueAt(pos int) int {
v := g.Slice(pos, pos+1)
if v.chunks[0].gap > 0 {
return -1
} else {
return int(v.chunks[0].data[0])
}
}
2018-07-24 17:32:08 -06:00
// Return a string version of the GapString, with gaps filled in
2018-07-19 21:29:26 -06:00
func (g GapString) String(fill string) string {
return string(g.Bytes([]byte(fill)...))
}
2018-07-24 17:32:08 -06:00
// Return a hex representation of this GapString
//
// Each octet is space-separated, and gaps are represented with "--"
2018-07-23 17:48:14 -06:00
func (g GapString) HexString() string {
out := new(strings.Builder)
glen := g.Length()
for i := 0; i < glen; i += 1 {
c := g.ValueAt(i)
if c == -1 {
out.WriteString("--")
} else {
// There's probably a faster way to do this. Do we care?
fmt.Fprintf(out, "%02x", c)
}
2018-07-24 17:53:06 -06:00
if i+1 < glen {
2018-07-23 17:48:14 -06:00
out.WriteRune(' ')
2018-07-24 17:53:06 -06:00
if i%8 == 7 {
2018-07-23 17:48:14 -06:00
out.WriteRune(' ')
}
}
}
return out.String()
2018-07-23 15:34:22 -06:00
}
2018-07-19 21:29:26 -06:00
var fluffych = []rune{
2018-07-24 17:53:06 -06:00
'·', '☺', '☻', '♥', '♦', '♣', '♠', '•', '◘', '○', '◙', '♂', '♀', '♪', '♫', '☼',
2018-07-19 21:29:26 -06:00
'►', '◄', '↕', '‼', '¶', '§', '▬', '↨', '↑', '↓', '→', '←', '∟', '↔', '▲', '▼',
' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '⌂',
'Ç', 'ü', 'é', 'â', 'ä', 'à', 'å', 'ç', 'ê', 'ë', 'è', 'ï', 'î', 'ì', 'Ä', 'Å',
'É', 'æ', 'Æ', 'ô', 'ö', 'ò', 'û', 'ù', 'ÿ', 'Ö', 'Ü', '¢', '£', '¥', '₧', 'ƒ',
'á', 'í', 'ó', 'ú', 'ñ', 'Ñ', 'ª', 'º', '¿', '⌐', '¬', '½', '¼', '¡', '«', '»',
'░', '▒', '▓', '│', '┤', '╡', '╢', '╖', '╕', '╣', '║', '╗', '╝', '╜', '╛', '┐',
'└', '┴', '┬', '├', '─', '┼', '╞', '╟', '╚', '╔', '╩', '╦', '╠', '═', '╬', '╧',
'╨', '╤', '╥', '╙', '╘', '╒', '╓', '╫', '╪', '┘', '┌', '█', '▄', '▌', '▐', '▀',
'α', 'ß', 'Γ', 'π', 'Σ', 'σ', 'µ', 'τ', 'Φ', 'Θ', 'Ω', 'δ', '∞', 'φ', 'ε', '∩',
'≡', '±', '≥', '≤', '⌠', '⌡', '÷', '≈', '°', '∀', '∃', '√', 'ⁿ', '²', '■', '¤',
}
2018-07-24 17:32:08 -06:00
// Return a rune representation of this GapString
//
// This uses the glyph set from the Fluffy toolkit
// (https://dirtbags.github.io/fluffy/).
// Gaps are represented with the rune '<27>'
2018-07-23 17:48:14 -06:00
func (g GapString) Runes() string {
out := new(strings.Builder)
glen := g.Length()
for i := 0; i < glen; i += 1 {
c := g.ValueAt(i)
if c == -1 {
out.WriteRune('<27>')
} else {
out.WriteRune(fluffych[c])
}
}
return out.String()
}
2018-07-24 17:32:08 -06:00
// Return a hex dump of this GapString
2018-07-19 21:29:26 -06:00
func (g GapString) Hexdump() string {
2018-07-23 15:34:22 -06:00
out := new(strings.Builder)
2018-07-19 21:29:26 -06:00
skipping := false
glen := g.Length()
pos := 0
2018-07-23 17:48:14 -06:00
prev := []byte{}
2018-07-24 17:53:06 -06:00
for pos < glen {
2018-07-19 21:29:26 -06:00
// Check for repeats
2018-07-23 17:48:14 -06:00
end := pos + 16
if end > glen {
end = glen
}
cur := g.Slice(pos, end)
curBytes := cur.Bytes()
if 0 == bytes.Compare(prev, curBytes) {
2018-07-24 17:53:06 -06:00
if !skipping {
2018-07-23 17:48:14 -06:00
fmt.Fprintln(out, "*")
skipping = true
2018-07-19 21:29:26 -06:00
}
2018-07-23 17:48:14 -06:00
continue
2018-07-19 21:29:26 -06:00
}
2018-07-23 15:34:22 -06:00
fmt.Fprintf(out, "%08x ", pos)
2018-07-23 17:48:14 -06:00
fmt.Fprintf(out, "%-50s", cur.HexString())
fmt.Fprintln(out, cur.Runes())
pos += cur.Length()
2018-07-19 21:29:26 -06:00
}
2018-07-23 15:34:22 -06:00
fmt.Fprintf(out, "%08x\n", pos)
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
return out.String()
}
2018-07-24 17:32:08 -06:00
// Return a uint32, little-endian, taken from the front of this GapString
//
// The rest of the GapString is returned as the second argument.
2018-07-23 21:17:41 -06:00
func (g GapString) Uint32LE() (uint32, GapString) {
2018-07-24 17:32:08 -06:00
return binary.LittleEndian.Uint32(g.Slice(0, 4).Bytes(0)), g.Slice(4, g.Length())
2018-07-23 21:17:41 -06:00
}
2018-07-24 17:32:08 -06:00
// Return a uint16, little-endian, taken from the front of this GapString
//
// The rest of the GapString is returned as the second argument.
2018-07-23 21:17:41 -06:00
func (g GapString) Uint16LE() (uint16, GapString) {
2018-07-24 17:32:08 -06:00
return binary.LittleEndian.Uint16(g.Slice(0, 2).Bytes(0)), g.Slice(2, g.Length())
2018-07-23 21:17:41 -06:00
}
2018-07-24 17:32:08 -06:00
// Return this GapString decoded as UTF-16
func (g GapString) Utf16(order binary.ByteOrder, fill string) string {
2018-07-19 21:29:26 -06:00
in := g.Bytes([]byte(fill)...)
ints := make([]uint16, len(in)/2)
2018-07-24 17:53:06 -06:00
2018-07-19 21:29:26 -06:00
for i := 0; i < len(in); i += 2 {
ints[i/2] = order.Uint16(in[i:])
2018-07-19 21:29:26 -06:00
}
return string(utf16.Decode(ints))
}
2018-07-24 17:32:08 -06:00
// Return this GapString decoded as UTF-16 Little Endian
//
// This format is used extensively in Microsoft Windows.
2018-07-19 21:29:26 -06:00
func (g GapString) Utf16LE(gap string) string {
return g.Utf16(binary.LittleEndian, gap)
}
2018-07-24 17:32:08 -06:00
// Return this GapString decoded as UTF-16 Big Endian
2018-07-19 21:29:26 -06:00
func (g GapString) Utf16BE(gap string) string {
return g.Utf16(binary.BigEndian, gap)
}