+freq, +histogram, small improvements

This commit is contained in:
Neale Pickett 2020-12-22 09:15:56 -07:00
parent 5b679b45ec
commit cced62afd7
7 changed files with 287 additions and 127 deletions

View File

@ -1,3 +1,5 @@
DESTDIR ?= $(HOME)
CFLAGS = -Wall -Werror CFLAGS = -Wall -Werror
TARGETS += pmerge TARGETS += pmerge
@ -10,6 +12,8 @@ TARGETS += pcat
TARGETS += slice TARGETS += slice
TARGETS += hex TARGETS += hex
TARGETS += entropy TARGETS += entropy
TARGETS += freq
TARGETS += histogram
SCRIPTS += octets SCRIPTS += octets

View File

@ -85,15 +85,23 @@ instead of using "." for unprintable characters.
00000007 00000007
Also like the normal hd, Also like the normal hd,
this one will print an asterisk if the preceding 16 octets are repeated. this one will print an ellipsis if the preceding 16 octets are repeated.
Use the offset printed next to determine how many repeats you have. Use the offset printed next to determine how many repeats you have.
printf '%64s' hello | hd $ printf '%64s' hello | hd
00000000 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 00000000 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
*
00000030 20 20 20 20 20 20 20 20 20 20 20 68 65 6c 6c 6f hello 00000030 20 20 20 20 20 20 20 20 20 20 20 68 65 6c 6c 6f hello
00000040 00000040
You can disable this with `-v`
$ printf '%64s' hello | hd
00000000 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000010 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000020 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000030 20 20 20 20 20 20 20 20 20 20 20 68 65 6c 6c 6f hello
00000040
## unhex: unescape hex ## unhex: unescape hex
@ -128,13 +136,13 @@ The "-x" option treats values as hex.
Slices up input octet stream, Slices up input octet stream,
similar to Python's slice operation. similar to Python's slice operation.
~/src/fluffy $ printf '0123456789abcdef' | slice 2; echo $ printf '0123456789abcdef' | slice 2; echo
23456789abcdef 23456789abcdef
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6; echo $ printf '0123456789abcdef' | slice 2 6; echo
2345 2345
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8; echo $ printf '0123456789abcdef' | slice 2 6 8; echo
234589abcdef 234589abcdef
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8 0xa $ printf '0123456789abcdef' | slice 2 6 8 0xa
234589 234589
@ -154,7 +162,7 @@ Output is tab-separated, of the format:
Frequently you are only interested in the payload, Frequently you are only interested in the payload,
so you can run pcat like: so you can run pcat like:
cat myfile.pcap | pcat | cut -f 6 $ cat myfile.pcap | pcat | cut -f 6
Remember the `unhex` program, Remember the `unhex` program,
which will convert payloads to an octet stream, which will convert payloads to an octet stream,
@ -197,13 +205,13 @@ In other words: you can feed `hex` output into `unhex` with no manipulations.
Displays the Shannon entropy of the input. Displays the Shannon entropy of the input.
~/src/fluffy $ echo -n a | ./entropy $ echo -n a | ./entropy
0.000000 0.000000
~/src/fluffy $ echo -n aaaaaaaaa | ./entropy $ echo -n aaaaaaaaa | ./entropy
0.000000 0.000000
~/src/fluffy $ echo -n aaaaaaaaab | ./entropy $ echo -n aaaaaaaaab | ./entropy
0.468996 0.468996
~/src/fluffy $ echo -n aaaaaaaaabc | ./entropy $ echo -n aaaaaaaaabc | ./entropy
0.865857 0.865857
@ -242,6 +250,44 @@ This is occasionally more helpful than `man ascii`.
000000f0 f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff ≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤ 000000f0 f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff ≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤
00000100 00000100
## freq: count octet frequencies
For all 256 octets,
show frequency of each in input.
$ printf 'hello' | freq
1 65 e
1 68 h
2 6c l
1 6f o
$ printf 'hello' | freq -a # Show all octets, even if count==0
0 00 ·
0 01 ☺
0 02 ☻
0 03 ♥
0 04 ♦
0 05 ♣
0 06 ♠
0 07 •
0 08 ◘
...
## histogram: display histogram for input
Reads the first number of each line, and prints a histogram.
`-d DIVISOR` will divide each bar's width.
$ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram
0a ◙ # 1
41 A ######## 8
61 a ################ 16
$ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram -d 4
0a ◙ 1
41 A ## 8
61 a #### 16
Example Recipes Example Recipes
=============== ===============

41
freq.c Normal file
View File

@ -0,0 +1,41 @@
#include <stdio.h>
#include <stdbool.h>
#include <unistd.h>
#include "glyphs.h"
int counts[256] = {0};
int main(int argc, char *argv[]) {
int c;
bool all = false;
while ((c = getopt(argc, argv, "a")) != -1) {
switch (c) {
case -1:
break;
case 'a':
all = true;
break;
default:
fprintf(stderr, "Usage: %s [-a]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-a Output all octets, even if count == 0\n");
return 1;
}
}
for (;;) {
c = getchar();
if (EOF == c) {
break;
}
counts[c] += 1;
}
for (c=0; c<256; ++c) {
if (all || counts[c]) {
printf("%d %02x %s\n", counts[c], c, fluffyglyphs[c]);
}
}
return 0;
}

22
glyphs.h Normal file
View File

@ -0,0 +1,22 @@
#pragma once
/* These glyphs are in most monospace fonts I tried in 2018 */
const char *fluffyglyphs[] = {
"·", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "§", "", "", "", "", "", "", "", "", "", "",
" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "",
"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "", "ƒ",
"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "", "¬", "½", "¼", "¡", "«", "»",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "", "φ", "ε", "",
"", "±", "", "", "", "", "÷", "", "°", "", "", "", "", "²", "", "¤",
};

78
hd.c
View File

@ -1,50 +1,30 @@
#include <stdio.h> #include <getopt.h>
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include "glyphs.h"
/* These glyphs are in most monospace fonts I tried in 2018 */ int dump(FILE *inf, bool verbose) {
const char *charset[] = {
"·", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "§", "", "", "", "", "", "", "", "", "", "",
" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "",
"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "", "ƒ",
"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "", "¬", "½", "¼", "¡", "«", "»",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "", "φ", "ε", "",
"", "±", "", "", "", "", "÷", "", "°", "", "", "", "", "²", "", "¤",
};
int
dump(FILE *f)
{
uint64_t p = 0; uint64_t p = 0;
uint8_t buf[32]; uint8_t buf[32];
int offset = 0; int offset = 0;
int skipping = 0; int skipping = 0;
while (!feof(f)) { while (!feof(inf)) {
uint8_t *bytes = buf + offset; uint8_t *bytes = buf + offset;
size_t len; size_t len;
int i; int i;
offset = 16 - offset; offset = 16 - offset;
len = fread(bytes, 1, 16, f); len = fread(bytes, 1, 16, inf);
if (0 == len) if (0 == len)
break; break;
if (p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) { if (!verbose && p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) {
if (!skipping) { if (!skipping) {
printf("*\n"); printf("\n");
skipping = 1; skipping = 1;
} }
p += 16; p += 16;
@ -53,7 +33,7 @@ dump(FILE *f)
skipping = 0; skipping = 0;
} }
printf("%08lx ", (long unsigned int) p); printf("%08lx ", (long unsigned int)p);
for (i = 0; i < 16; i += 1) { for (i = 0; i < 16; i += 1) {
if (i < len) { if (i < len) {
printf("%02x ", bytes[i]); printf("%02x ", bytes[i]);
@ -66,7 +46,7 @@ dump(FILE *f)
} }
printf(" "); printf(" ");
for (i = 0; i < len; i += 1) { for (i = 0; i < len; i += 1) {
printf("%s", charset[bytes[i]]); printf("%s", fluffyglyphs[bytes[i]]);
} }
if (-1 == printf("\n")) { if (-1 == printf("\n")) {
perror("printf"); perror("printf");
@ -74,26 +54,42 @@ dump(FILE *f)
} }
p += len; p += len;
} }
printf("%08lx\n", (long unsigned int) p); printf("%08lx\n", (long unsigned int)p);
return 0; return 0;
} }
int int main(int argc, char *argv[]) {
main(int argc, char *argv[]) FILE *f;
{ bool verbose = false;
if (1 == argc) { int c;
dump(stdin);
} else {
FILE *f = fopen(argv[1], "rb");
while ((c = getopt(argc, argv, "v")) != -1) {
switch (c) {
case -1:
break;
case 'v':
verbose = true;
break;
default:
fprintf(stderr, "Usage: %s [-v] [FILENAME]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-v Verbose: don't elide output if output lines are identical\n");
return 1;
}
}
if (!argv[optind] || (0 == strcmp("-", argv[optind]))) {
f = stdin;
} else {
f = fopen(argv[optind], "rb");
if (!f) { if (!f) {
perror("open"); perror("open");
return 1; return 1;
} }
dump(f);
} }
dump(f, verbose);
return 0; return 0;
} }

48
histogram.c Normal file
View File

@ -0,0 +1,48 @@
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char* argv[]) {
int lineno = 0;
int divisor = 1;
int c;
while ((c = getopt(argc, argv, "d:")) != -1) {
switch (c) {
case 'd':
divisor = (atoi(optarg));
if (divisor > 0) {
break;
}
// fallthrough
default:
fprintf(stderr, "Usage: %s [-s] [-d DIVISOR]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-d DIVISOR Divide bar width by DIVISOR\n");
return 1;
}
}
for (;;) {
char line[128];
int count;
int ret;
++lineno;
ret = scanf("%d %127[^\n]\n", &count, line);
if (EOF == ret) {
break;
} else if (ret < 2) {
fprintf(stderr, "Unparseable input on line %d\n", lineno);
scanf("%*[^\n]\n"); // Read in and discard one line
continue;
}
printf("%s ", line);
for (int i = 0; i < count / divisor; ++i) {
putchar('#');
}
printf(" %d\n", count);
}
return 0;
}

35
xor.c
View File

@ -1,5 +1,5 @@
/* /*
* xor filter -- 2017 Neale Pickett <zephyr@dirtbags.net> * xor filter -- 2020 Neale Pickett <neale@woozle.org>
* *
* This file is in the public domain. I make no promises about the functionality * This file is in the public domain. I make no promises about the functionality
* of this program. * of this program.
@ -7,35 +7,38 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <unistd.h>
int int main(int argc, char* argv[]) {
main(int argc, char *argv[]) int radix = 10;
{
int start = 1;
int base = 0;
int arg; int arg;
int c;
if (argv[start] && (0 == strcmp(argv[start], "-x"))) { while ((c = getopt(argc, argv, "a")) != -1) {
base = 16; switch (c) {
start += 1; case 'x':
} radix = 16;
break;
if (start + 1 > argc) { default:
fprintf(stderr, "Usage: %s [-x] m1 [m2 ...]\n", argv[0]); fprintf(stderr, "Usage: %s [-x] m1 [m2 ...]\n", argv[0]);
return 1; return 1;
} }
}
arg = start; if (!argv[optind]) {
return 1;
}
arg = optind;
while (1) { while (1) {
int c = getchar(); int c = getchar();
unsigned char mask; unsigned char mask;
if (!argv[arg]) { if (!argv[arg]) {
arg = start; arg = optind;
} }
mask = strtol(argv[arg++], NULL, base); mask = strtol(argv[arg++], NULL, radix);
if (EOF == c) { if (EOF == c) {
break; break;