+freq, +histogram, small improvements

2020-12-22 09:15:56 -07:00 · 2020-12-22 09:15:56 -07:00 · cced62afd7
parent 5b679b45ec
commit cced62afd7
7 changed files with 287 additions and 127 deletions
--- a/4
+++ b/4
@ -1,3 +1,5 @@
 DESTDIR ?= $(HOME)
 CFLAGS = -Wall -Werror
 TARGETS += pmerge 
@ -10,6 +12,8 @@ TARGETS += pcat
 TARGETS += slice
 TARGETS += hex
 TARGETS += entropy
 TARGETS += freq
 TARGETS += histogram
 SCRIPTS += octets
--- a/README.md
+++ b/README.md
@ -85,15 +85,23 @@ instead of using "." for unprintable characters.
    00000007
 Also like the normal hd,
-this one will print an asterisk if the preceding 16 octets are repeated.
+this one will print an ellipsis if the preceding 16 octets are repeated.
 Use the offset printed next to determine how many repeats you have.
-    printf '%64s' hello | hd
+    $ printf '%64s' hello | hd
    00000000  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20                  
-    *
+    ⋮
    00000030  20 20 20 20 20 20 20 20  20 20 20 68 65 6c 6c 6f             hello
    00000040
 You can disable this with `-v`
    $ printf '%64s' hello | hd
    00000000  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20                  
    00000010  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20                  
    00000020  20 20 20 20 20 20 20 20  20 20 20 20 20 20 20 20                  
    00000030  20 20 20 20 20 20 20 20  20 20 20 68 65 6c 6c 6f             hello
    00000040
 ## unhex: unescape hex
@ -128,13 +136,13 @@ The "-x" option treats values as hex.
 Slices up input octet stream,
 similar to Python's slice operation.
-    ~/src/fluffy $ printf '0123456789abcdef' | slice 2; echo
+    $ printf '0123456789abcdef' | slice 2; echo
    23456789abcdef
-    ~/src/fluffy $ printf '0123456789abcdef' | slice 2 6; echo
+    $ printf '0123456789abcdef' | slice 2 6; echo
    2345
-    ~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8; echo
+    $ printf '0123456789abcdef' | slice 2 6 8; echo
    234589abcdef
-    ~/src/fluffy $ printf '0123456789abcdef' | slice 2 6 8 0xa
+    $ printf '0123456789abcdef' | slice 2 6 8 0xa
    234589
@ -154,7 +162,7 @@ Output is tab-separated, of the format:
 Frequently you are only interested in the payload,
 so you can run pcat like:
-    cat myfile.pcap | pcat | cut -f 6
+    $ cat myfile.pcap | pcat | cut -f 6
 Remember the `unhex` program,
 which will convert payloads to an octet stream,
@ -197,13 +205,13 @@ In other words: you can feed `hex` output into `unhex` with no manipulations.
 Displays the Shannon entropy of the input.
-    ~/src/fluffy $ echo -n a | ./entropy
+    $ echo -n a | ./entropy
    0.000000
-    ~/src/fluffy $ echo -n aaaaaaaaa | ./entropy
+    $ echo -n aaaaaaaaa | ./entropy
    0.000000
-    ~/src/fluffy $ echo -n aaaaaaaaab | ./entropy
+    $ echo -n aaaaaaaaab | ./entropy
    0.468996
-    ~/src/fluffy $ echo -n aaaaaaaaabc | ./entropy
+    $ echo -n aaaaaaaaabc | ./entropy
    0.865857
@ -242,6 +250,44 @@ This is occasionally more helpful than `man ascii`.
    000000f0  f0 f1 f2 f3 f4 f5 f6 f7  f8 f9 fa fb fc fd fe ff  ≡±≥≤⌠⌡÷≈°∙·√ⁿ²■¤
    00000100
 ## freq: count octet frequencies
 For all 256 octets,
 show frequency of each in input.
    $ printf 'hello' | freq
    1 65 e
    1 68 h
    2 6c l
    1 6f o
    $ printf 'hello' | freq -a # Show all octets, even if count==0
    0 00 ·
    0 01 ☺
    0 02 ☻
    0 03 ♥
    0 04 ♦
    0 05 ♣
    0 06 ♠
    0 07 •
    0 08 ◘
    ...
 ## histogram: display histogram for input
 Reads the first number of each line, and prints a histogram.
 `-d DIVISOR` will divide each bar's width.
    $ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram
    0a ◙ # 1
    41 A ######## 8
    61 a ################ 16
    $ echo 'aaaaaaaaAAAAAAAAaaaaaaaa' | freq | histogram -d 4
    0a ◙  1
    41 A ## 8
    61 a #### 16
 Example Recipes
 ===============
--- a/freq.c
+++ b/freq.c
@ -0,0 +1,41 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <unistd.h>
 #include "glyphs.h"
 int counts[256] = {0};
 int main(int argc, char *argv[]) {
    int c;
    bool all = false;
    while ((c = getopt(argc, argv, "a")) != -1) {
        switch (c) {
            case -1:
                break;
            case 'a':
                all = true;
                break;
            default:
                fprintf(stderr, "Usage: %s [-a]\n", argv[0]);
                fprintf(stderr, "\n");
                fprintf(stderr, "-a    Output all octets, even if count == 0\n");
                return 1;
        }
    }
    for (;;) {
        c = getchar();
        if (EOF == c) {
            break;
        }
        counts[c] += 1;
    }
    for (c=0; c<256; ++c) {
        if (all || counts[c]) {
            printf("%d %02x %s\n", counts[c], c, fluffyglyphs[c]);
        }
    }
    return 0;
 }
--- a/glyphs.h
+++ b/glyphs.h
@ -0,0 +1,22 @@
 #pragma once
 /* These glyphs are in most monospace fonts I tried in 2018 */
 const char *fluffyglyphs[] = {
 	"·", "☺", "☻", "♥", "♦", "♣", "♠", "•", "◘", "○", "◙", "♂", "♀", "♪", "♫", "☼",
 	"►", "◄", "↕", "‼", "¶", "§", "▬", "↨", "↑", "↓", "→", "←", "∟", "↔", "▲", "▼",
 	" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
 	"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
 	"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
 	"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
 	"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
 	"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "⌂",
 	"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
 	"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ",
 	"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»",
 	"░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐",
 	"└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "╧",
 	"╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀",
 	"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩",
 	"≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∀", "∃", "√", "ⁿ", "²", "■", "¤",
 };
--- a/hd.c
+++ b/hd.c
@ -1,50 +1,30 @@
-#include <stdio.h>
+#include <getopt.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include "glyphs.h"
-/* These glyphs are in most monospace fonts I tried in 2018 */
+int dump(FILE *inf, bool verbose) {
 const char *charset[] = {
 	"·", "☺", "☻", "♥", "♦", "♣", "♠", "•", "◘", "○", "◙", "♂", "♀", "♪", "♫", "☼",
 	"►", "◄", "↕", "‼", "¶", "§", "▬", "↨", "↑", "↓", "→", "←", "∟", "↔", "▲", "▼",
 	" ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
 	"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
 	"@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
 	"P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
 	"`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
 	"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "⌂",
 	"Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å",
 	"É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ",
 	"á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»",
 	"░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐",
 	"└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "╧",
 	"╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀",
 	"α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩",
 	"≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∀", "∃", "√", "ⁿ", "²", "■", "¤",
 };
 int
 dump(FILE *f)
 {
  uint64_t p = 0;
  uint8_t buf[32];
  int offset = 0;
  int skipping = 0;
-	while (!feof(f)) {
+  while (!feof(inf)) {
    uint8_t *bytes = buf + offset;
    size_t len;
    int i;
    offset = 16 - offset;
-		len = fread(bytes, 1, 16, f);
+    len = fread(bytes, 1, 16, inf);
    if (0 == len)
      break;
-		if (p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) {
+    if (!verbose && p && (len == 16) && (0 == memcmp(buf, buf + 16, 16))) {
      if (!skipping) {
-				printf("*\n");
+        printf("⋮\n");
        skipping = 1;
      }
      p += 16;
@ -53,7 +33,7 @@ dump(FILE *f)
      skipping = 0;
    }
-		printf("%08lx  ", (long unsigned int) p);
+    printf("%08lx  ", (long unsigned int)p);
    for (i = 0; i < 16; i += 1) {
      if (i < len) {
        printf("%02x ", bytes[i]);
@ -66,7 +46,7 @@ dump(FILE *f)
    }
    printf(" ");
    for (i = 0; i < len; i += 1) {
-			printf("%s", charset[bytes[i]]);
+      printf("%s", fluffyglyphs[bytes[i]]);
    }
    if (-1 == printf("\n")) {
      perror("printf");
@ -74,26 +54,42 @@ dump(FILE *f)
    }
    p += len;
  }
-	printf("%08lx\n", (long unsigned int) p);
+  printf("%08lx\n", (long unsigned int)p);
  return 0;
 }
-int
+int main(int argc, char *argv[]) {
-main(int argc, char *argv[])
+  FILE *f;
-{
+  bool verbose = false;
-	if (1 == argc) {
+  int c;
 		dump(stdin);
 	} else {
 		FILE *f = fopen(argv[1], "rb");
  while ((c = getopt(argc, argv, "v")) != -1) {
    switch (c) {
      case -1:
        break;
      case 'v':
        verbose = true;
        break;
      default:
        fprintf(stderr, "Usage: %s [-v] [FILENAME]\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "-v    Verbose: don't elide output if output lines are identical\n");
        return 1;
    }
  }
  if (!argv[optind] || (0 == strcmp("-", argv[optind]))) {
    f = stdin;
  } else {
    f = fopen(argv[optind], "rb");
    if (!f) {
      perror("open");
      return 1;
    }
 		dump(f);
  }
  dump(f, verbose);
  return 0;
 }
--- a/histogram.c
+++ b/histogram.c
@ -0,0 +1,48 @@
 #include <getopt.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 int main(int argc, char* argv[]) {
  int lineno = 0;
  int divisor = 1;
  int c;
  while ((c = getopt(argc, argv, "d:")) != -1) {
    switch (c) {
      case 'd':
        divisor = (atoi(optarg));
        if (divisor > 0) {
          break;
        }
        // fallthrough
      default:
        fprintf(stderr, "Usage: %s [-s] [-d DIVISOR]\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "-d DIVISOR   Divide bar width by DIVISOR\n");
        return 1;
    }
  }
  for (;;) {
    char line[128];
    int count;
    int ret;
    ++lineno;
    ret = scanf("%d %127[^\n]\n", &count, line);
    if (EOF == ret) {
      break;
    } else if (ret < 2) {
      fprintf(stderr, "Unparseable input on line %d\n", lineno);
      scanf("%*[^\n]\n");  // Read in and discard one line
      continue;
    }
    printf("%s ", line);
    for (int i = 0; i < count / divisor; ++i) {
      putchar('#');
    }
    printf(" %d\n", count);
  }
  return 0;
 }
--- a/xor.c
+++ b/xor.c
@ -1,5 +1,5 @@
 /*
- * xor filter -- 2017 Neale Pickett <zephyr@dirtbags.net>
+ * xor filter -- 2020 Neale Pickett <neale@woozle.org>
 *
 * This file is in the public domain.  I make no promises about the functionality
 * of this program. 
@ -7,35 +7,38 @@
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
+#include <unistd.h>
-int
+int main(int argc, char* argv[]) {
-main(int argc, char *argv[])
+  int radix = 10;
 {
 	int start = 1;
 	int base = 0;
  int arg;
  int c;
-	if (argv[start] && (0 == strcmp(argv[start], "-x"))) {
+  while ((c = getopt(argc, argv, "a")) != -1) {
-		base = 16;
+    switch (c) {
-		start += 1;
+      case 'x':
-	}
+        radix = 16;
-
+        break;
-	if (start + 1 > argc) {
+      default:
        fprintf(stderr, "Usage: %s [-x] m1 [m2 ...]\n", argv[0]);
        return 1;
    }
  }
-	arg = start;
+  if (!argv[optind]) {
    return 1;
  }
  arg = optind;
  while (1) {
    int c = getchar();
    unsigned char mask;
    if (!argv[arg]) {
-			arg = start;
+      arg = optind;
    }
-		mask = strtol(argv[arg++], NULL, base);
+    mask = strtol(argv[arg++], NULL, radix);
    if (EOF == c) {
      break;