+freq, +histogram, small improvements

This commit is contained in:
Neale Pickett 2020-12-22 09:15:56 -07:00
parent 5b679b45ec
commit cced62afd7
7 changed files with 287 additions and 127 deletions

View File

@ -1,3 +1,5 @@
DESTDIR ?= $(HOME)
CFLAGS = -Wall -Werror
TARGETS += pmerge
@ -10,6 +12,8 @@ TARGETS += pcat
TARGETS += slice
TARGETS += hex
TARGETS += entropy
TARGETS += freq
TARGETS += histogram
SCRIPTS += octets

View File

@ -85,15 +85,23 @@ instead of using "." for unprintable characters.
00000007
Also like the normal hd,
this one will print an asterisk if the preceding 16 octets are repeated.
this one will print an ellipsis if the preceding 16 octets are repeated.
Use the offset printed next to determine how many repeats you have.
printf '%64s' hello | hd
$ printf '%64s' hello | hd
00000000 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
*
00000030 20 20 20 20 20 20 20 20 20 20 20 68 65 6c 6c 6f hello
00000040
You can disable this with `-v`
$ printf '%64s' hello | hd
00000000 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000010 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000020 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
00000030 20 20 20 20 20 20 20 20 20 20 20 68 65 6c 6c 6f hello
00000040
## unhex: unescape hex
@ -128,13 +136,13 @@ The "-x" option treats values as hex.
Slices up input octet stream,
similar to Python's slice operation.
~/src/fluffy $ printf '0123456789abcdef' | slice 2; echo
$ printf '0123456789abcdef' | slice 2; echo
23456789abcdef
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6; echo
$ printf '0123456789abcdef' | slice 2 6; echo
2345
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8; echo
$ printf '0123456789abcdef' | slice 2 6 8; echo
234589abcdef
~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8 0xa
$ printf '0123456789abcdef' | slice 2 6 8 0xa
234589
@ -154,7 +162,7 @@ Output is tab-separated, of the format:
Frequently you are only interested in the payload,
so you can run pcat like:
cat myfile.pcap | pcat | cut -f 6
$ cat myfile.pcap | pcat | cut -f 6
Remember the `unhex` program,
which will convert payloads to an octet stream,
@ -197,13 +205,13 @@ In other words: you can feed `hex` output into `unhex` with no manipulations.
Displays the Shannon entropy of the input.
~/src/fluffy $ echo -n a | ./entropy
$ echo -n a | ./entropy
0.000000
~/src/fluffy $ echo -n aaaaaaaaa | ./entropy
$ echo -n aaaaaaaaa | ./entropy
0.000000
~/src/fluffy $ echo -n aaaaaaaaab | ./entropy
$ echo -n aaaaaaaaab | ./entropy
0.468996
~/src/fluffy $ echo -n aaaaaaaaabc | ./entropy
$ echo -n aaaaaaaaabc | ./entropy
0.865857
@ -242,6 +250,44 @@ This is occasionally more helpful than `man ascii`.
000000f0 f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff ≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤
00000100
## freq: count octet frequencies
For all 256 octets,
show frequency of each in input.
$ printf 'hello' | freq
1 65 e
1 68 h
2 6c l
1 6f o
$ printf 'hello' | freq -a # Show all octets, even if count==0
0 00 ·
0 01 ☺
0 02 ☻
0 03 ♥
0 04 ♦
0 05 ♣
0 06 ♠
0 07 •
0 08 ◘
...
## histogram: display histogram for input
Reads the first number of each line, and prints a histogram.
`-d DIVISOR` will divide each bar's width.
$ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram
0a ◙ # 1
41 A ######## 8
61 a ################ 16
$ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram -d 4
0a ◙ 1
41 A ## 8
61 a #### 16
Example Recipes
===============

41
freq.c Normal file
View File

@ -0,0 +1,41 @@
#include <stdio.h>
#include <stdbool.h>
#include <unistd.h>
#include "glyphs.h"
int counts[256] = {0};
int main(int argc, char *argv[]) {
int c;
bool all = false;
while ((c = getopt(argc, argv, "a")) != -1) {
switch (c) {
case -1:
break;
case 'a':
all = true;
break;
default:
fprintf(stderr, "Usage: %s [-a]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-a Output all octets, even if count == 0\n");
return 1;
}
}
for (;;) {
c = getchar();
if (EOF == c) {
break;
}
counts[c] += 1;
}
for (c=0; c<256; ++c) {
if (all || counts[c]) {
printf("%d %02x %s\n", counts[c], c, fluffyglyphs[c]);
}
}
return 0;
}

22
glyphs.h Normal file
View File

@ -0,0 +1,22 @@
#pragma once
/* These glyphs are in most monospace fonts I tried in 2018 */
const char *fluffyglyphs[] = {
"·", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "§", "", "", "", "", "", "", "", "", "", "",
" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "",
"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "", "ƒ",
"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "", "¬", "½", "¼", "¡", "«", "»",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "", "φ", "ε", "",
"", "±", "", "", "", "", "÷", "", "°", "", "", "", "", "²", "", "¤",
};

164
hd.c
View File

@ -1,99 +1,95 @@
#include <stdio.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "glyphs.h"
/* These glyphs are in most monospace fonts I tried in 2018 */
const char *charset[] = {
"·", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "§", "", "", "", "", "", "", "", "", "", "",
" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "",
int dump(FILE *inf, bool verbose) {
uint64_t p = 0;
uint8_t buf[32];
int offset = 0;
int skipping = 0;
"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "", "ƒ",
"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "", "¬", "½", "¼", "¡", "«", "»",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "", "φ", "ε", "",
"", "±", "", "", "", "", "÷", "", "°", "", "", "", "", "²", "", "¤",
};
while (!feof(inf)) {
uint8_t *bytes = buf + offset;
size_t len;
int i;
int
dump(FILE *f)
{
uint64_t p = 0;
uint8_t buf[32];
int offset = 0;
int skipping = 0;
offset = 16 - offset;
while (!feof(f)) {
uint8_t *bytes = buf + offset;
size_t len;
int i;
len = fread(bytes, 1, 16, inf);
if (0 == len)
break;
offset = 16 - offset;
if (!verbose && p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) {
if (!skipping) {
printf("\n");
skipping = 1;
}
p += 16;
continue;
} else {
skipping = 0;
}
len = fread(bytes, 1, 16, f);
if (0 == len)
break;
printf("%08lx ", (long unsigned int)p);
for (i = 0; i < 16; i += 1) {
if (i < len) {
printf("%02x ", bytes[i]);
} else {
printf(" ");
}
if (7 == i) {
printf(" ");
}
}
printf(" ");
for (i = 0; i < len; i += 1) {
printf("%s", fluffyglyphs[bytes[i]]);
}
if (-1 == printf("\n")) {
perror("printf");
return 1;
}
p += len;
}
printf("%08lx\n", (long unsigned int)p);
if (p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) {
if (!skipping) {
printf("*\n");
skipping = 1;
}
p += 16;
continue;
} else {
skipping = 0;
}
printf("%08lx ", (long unsigned int) p);
for (i = 0; i < 16; i += 1) {
if (i < len) {
printf("%02x ", bytes[i]);
} else {
printf(" ");
}
if (7 == i) {
printf(" ");
}
}
printf(" ");
for (i = 0; i < len; i += 1) {
printf("%s", charset[bytes[i]]);
}
if (-1 == printf("\n")) {
perror("printf");
return 1;
}
p += len;
}
printf("%08lx\n", (long unsigned int) p);
return 0;
return 0;
}
int
main(int argc, char *argv[])
{
if (1 == argc) {
dump(stdin);
} else {
FILE *f = fopen(argv[1], "rb");
int main(int argc, char *argv[]) {
FILE *f;
bool verbose = false;
int c;
if (!f) {
perror("open");
return 1;
}
while ((c = getopt(argc, argv, "v")) != -1) {
switch (c) {
case -1:
break;
case 'v':
verbose = true;
break;
default:
fprintf(stderr, "Usage: %s [-v] [FILENAME]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-v Verbose: don't elide output if output lines are identical\n");
return 1;
}
}
dump(f);
}
if (!argv[optind] || (0 == strcmp("-", argv[optind]))) {
f = stdin;
} else {
f = fopen(argv[optind], "rb");
if (!f) {
perror("open");
return 1;
}
}
return 0;
dump(f, verbose);
return 0;
}

48
histogram.c Normal file
View File

@ -0,0 +1,48 @@
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char* argv[]) {
int lineno = 0;
int divisor = 1;
int c;
while ((c = getopt(argc, argv, "d:")) != -1) {
switch (c) {
case 'd':
divisor = (atoi(optarg));
if (divisor > 0) {
break;
}
// fallthrough
default:
fprintf(stderr, "Usage: %s [-s] [-d DIVISOR]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "-d DIVISOR Divide bar width by DIVISOR\n");
return 1;
}
}
for (;;) {
char line[128];
int count;
int ret;
++lineno;
ret = scanf("%d %127[^\n]\n", &count, line);
if (EOF == ret) {
break;
} else if (ret < 2) {
fprintf(stderr, "Unparseable input on line %d\n", lineno);
scanf("%*[^\n]\n"); // Read in and discard one line
continue;
}
printf("%s ", line);
for (int i = 0; i < count / divisor; ++i) {
putchar('#');
}
printf(" %d\n", count);
}
return 0;
}

65
xor.c
View File

@ -1,5 +1,5 @@
/*
* xor filter -- 2017 Neale Pickett <zephyr@dirtbags.net>
* xor filter -- 2020 Neale Pickett <neale@woozle.org>
*
* This file is in the public domain. I make no promises about the functionality
* of this program.
@ -7,43 +7,46 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int
main(int argc, char *argv[])
{
int start = 1;
int base = 0;
int arg;
int main(int argc, char* argv[]) {
int radix = 10;
int arg;
int c;
if (argv[start] && (0 == strcmp(argv[start], "-x"))) {
base = 16;
start += 1;
}
while ((c = getopt(argc, argv, "a")) != -1) {
switch (c) {
case 'x':
radix = 16;
break;
default:
fprintf(stderr, "Usage: %s [-x] m1 [m2 ...]\n", argv[0]);
return 1;
}
}
if (start + 1 > argc) {
fprintf(stderr, "Usage: %s [-x] m1 [m2 ...]\n", argv[0]);
return 1;
}
if (!argv[optind]) {
return 1;
}
arg = start;
arg = optind;
while (1) {
int c = getchar();
unsigned char mask;
while (1) {
int c = getchar();
unsigned char mask;
if (!argv[arg]) {
arg = start;
}
mask = strtol(argv[arg++], NULL, base);
if (!argv[arg]) {
arg = optind;
}
mask = strtol(argv[arg++], NULL, radix);
if (EOF == c) {
break;
}
if (EOF == c) {
break;
}
c ^= mask;
putchar(c);
}
c ^= mask;
putchar(c);
}
return 0;
return 0;
}