flesh out infobot helper

This commit is contained in:
Neale Pickett 2012-11-20 18:13:16 -07:00
parent e089dd01c3
commit 6fbd1feb67
7 changed files with 352 additions and 158 deletions

View File

@ -4,7 +4,7 @@ TARGETS += infobot
all: $(TARGETS) all: $(TARGETS)
infobot: infobot.o cdb.o infobot: infobot.o cdb.o cdbmake.o
.PHONY: clean .PHONY: clean
clean: clean:

71
cdb.c
View File

@ -2,6 +2,7 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include "cdb.h" #include "cdb.h"
#include "dump.h"
/* /*
* *
@ -46,22 +47,80 @@ read_u32le(FILE *f)
(d[3] << 24)); (d[3] << 24));
} }
static uint32_t
read_buf(FILE *f, uint32_t fieldlen, char *buf, size_t buflen)
{
uint32_t size = min(buflen, fieldlen);
size_t r = fread(buf, 1, (size_t)size, f);
// Slurp up the rest
for (fieldlen -= r; fieldlen > 0; fieldlen -= 1) {
getc(f);
}
return size;
}
void void
cdb_init(struct cdb_ctx *ctx, FILE *f) cdb_init(struct cdb_ctx *ctx, FILE *f)
{ {
ctx->f = f; ctx->f = f;
ctx->key = NULL; ctx->key = NULL;
ctx->hash_len = 1;
}
int
cdb_dump(struct cdb_ctx *ctx,
char *key, size_t *keylen,
char *val, size_t *vallen)
{
// Set hash_len to 0 to signal we're in position
if (ctx->hash_len != 0) {
// Find out where to stop reading
int i;
ctx->hash_len = 0;
ctx->hash_pos = 0xffffffff;
for (i = 0; i < 256; i += 1) {
uint32_t p;
fseek(ctx->f, i * 8, SEEK_SET);
p = read_u32le(ctx->f);
if (p < ctx->hash_pos) {
ctx->hash_pos = p;
}
}
fseek(ctx->f, 256 * 8, SEEK_SET);
} else {
long where = ftell(ctx->f);
if (where >= ctx->hash_pos) {
return EOF;
}
}
// Read the two buffers
{
uint32_t klen = read_u32le(ctx->f);
uint32_t vlen = read_u32le(ctx->f);
*keylen = read_buf(ctx->f, klen, key, *keylen);
*vallen = read_buf(ctx->f, vlen, val, *vallen);
}
return 0;
} }
void void
cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen) cdb_find(struct cdb_ctx *ctx, char *key, size_t keylen)
{ {
ctx->hash_val = hash(key, keylen);
ctx->key = key; ctx->key = key;
ctx->keylen = keylen; ctx->keylen = keylen;
/* Read pointer */ ctx->hash_val = hash(key, keylen);
// Read pointer
fseek(ctx->f, (ctx->hash_val % 256) * 8, SEEK_SET); fseek(ctx->f, (ctx->hash_val % 256) * 8, SEEK_SET);
ctx->hash_pos = read_u32le(ctx->f); ctx->hash_pos = read_u32le(ctx->f);
ctx->hash_len = read_u32le(ctx->f); ctx->hash_len = read_u32le(ctx->f);
@ -69,7 +128,7 @@ cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen)
} }
uint32_t uint32_t
cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen) cdb_next(struct cdb_ctx *ctx, char *buf, size_t buflen)
{ {
uint32_t hashval; uint32_t hashval;
uint32_t entry_pos; uint32_t entry_pos;
@ -109,7 +168,7 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen)
} }
if (buf) { if (buf) {
return fread(buf, 1, min(dlen, buflen), ctx->f); return read_buf(ctx->f, dlen, buf, buflen);
} else { } else {
return dlen; return dlen;
} }

7
cdb.h
View File

@ -18,7 +18,10 @@ struct cdb_ctx {
}; };
void cdb_init(struct cdb_ctx *ctx, FILE *f); void cdb_init(struct cdb_ctx *ctx, FILE *f);
void cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen); int cdb_dump(struct cdb_ctx *ctx,
uint32_t cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen); char *key, size_t *keylen,
char *val, size_t *vallen);
void cdb_find(struct cdb_ctx *ctx, char *key, size_t keylen);
uint32_t cdb_next(struct cdb_ctx *ctx, char *buf, size_t buflen);
#endif #endif

131
cdbm.c
View File

@ -1,131 +0,0 @@
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h> // XXX: remove if malloc() is gone
static uint32_t
hash(char *s, size_t len)
{
uint32_t h = 5381;
size_t i;
for (i = 0; i < len; i += 1) {
h = ((h << 5) + h) ^ s[i];
}
return h;
}
void
write_u32le(FILE *f, uint32_t val)
{
fputc((val >> 0) & 0xff, f);
fputc((val >> 8) & 0xff, f);
fputc((val >> 16) & 0xff, f);
fputc((val >> 24) & 0xff, f);
}
struct record {
uint32_t hashval;
uint32_t offset;
};
int
main(int argc, char *argv[])
{
struct record *records[256] = {0};
uint32_t nrecords[256] = {0};
FILE *f = fopen("a.cdb", "wb");
int idx;
// Start writing records
fseek(f, 256*4*2, SEEK_SET);
while (! feof(stdin)) {
int ret;
char key[4098];
char val[4098];
uint32_t keylen;
uint32_t vallen;
// Read a record
ret = scanf("+%u,%u:", &keylen, &vallen);
if (ret == 0) {
break;
}
if ((keylen > sizeof(key)) || (vallen > sizeof(val))) {
fprintf(stderr, "error: my buffers are too puny (%d,%d)\n", keylen, vallen);
return 1;
}
fread(key, 1, keylen, stdin);
getchar();
getchar();
fread(val, 1, vallen, stdin);
getchar();
// Comupute hash of key
{
long where = ftell(f);
uint32_t hashval = hash(key, keylen);
uint32_t n;
idx = hashval % 256;
n = nrecords[idx];
nrecords[idx] += 1;
records[idx] = (struct record *)realloc(records[idx], nrecords[idx] * sizeof(struct record));
if (NULL == records[idx]) {
perror("realloc records");
return 1;
}
records[idx][n].hashval = hashval;
records[idx][n].offset = (uint32_t)where;
}
// Write it out
write_u32le(f, keylen);
write_u32le(f, vallen);
fwrite(key, 1, keylen, f);
fwrite(val, 1, vallen, f);
}
// Write out tables
for (idx = 0; idx < 256; idx += 1) {
uint32_t r;
long offset;
uint32_t tlen = nrecords[idx] * 2;
uint32_t *buf;
// Pointer
offset = ftell(f);
fseek(f, idx * 8, SEEK_SET);
write_u32le(f, (uint32_t)offset);
write_u32le(f, tlen);
fseek(f, offset, SEEK_SET);
// Build table in memory
buf = (uint32_t *)calloc(tlen * 2, sizeof(uint32_t));
if (! buf) {
perror("Allocating hash table");
return 1;
}
for (r = 0; r < nrecords[idx]; r += 1) {
uint32_t slot = (records[idx][r].hashval / 256) % tlen;
while (buf[slot * 2] > 0) {
slot = (slot + 1) % tlen;
}
buf[slot*2 + 0] = records[idx][r].hashval;
buf[slot*2 + 1] = records[idx][r].offset;
}
// Write it out
for (r = 0; r < tlen; r += 1) {
write_u32le(f, buf[r*2 + 0]);
write_u32le(f, buf[r*2 + 1]);
}
free(buf);
}
fclose(f);
return 0;
}

121
cdbmake.c Normal file
View File

@ -0,0 +1,121 @@
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h> // XXX: remove if malloc() is gone
#include "cdbmake.h"
static uint32_t
hash(char *s, size_t len)
{
uint32_t h = 5381;
size_t i;
for (i = 0; i < len; i += 1) {
h = ((h << 5) + h) ^ s[i];
}
return h;
}
static void
write_u32le(FILE *f, uint32_t val)
{
fputc((val >> 0) & 0xff, f);
fputc((val >> 8) & 0xff, f);
fputc((val >> 16) & 0xff, f);
fputc((val >> 24) & 0xff, f);
}
void
cdbmake_init(struct cdbmake_ctx *ctx, FILE *f)
{
int i;
ctx->f = f;
for (i = 0; i < 256; i += 1) {
ctx->records[i] = NULL;
ctx->nrecords[i] = 0;
}
fseek(f, 256 * 8, SEEK_SET);
}
void
cdbmake_add(struct cdbmake_ctx *ctx,
char *key, size_t keylen,
char *val, size_t vallen)
{
long where = ftell(ctx->f);
uint32_t hashval = hash(key, keylen);
int idx = hashval % 256;
uint32_t n = ctx->nrecords[idx];
ctx->nrecords[idx] += 1;
ctx->records[idx] = (struct cdbmake_record *)realloc(ctx->records[idx],
ctx->nrecords[idx] * sizeof(struct cdbmake_record));
if (NULL == ctx->records[idx]) {
perror("realloc records");
return;
}
ctx->records[idx][n].hashval = hashval;
ctx->records[idx][n].offset = (uint32_t)where;
// Write it out
write_u32le(ctx->f, keylen);
write_u32le(ctx->f, vallen);
fwrite(key, 1, keylen, ctx->f);
fwrite(val, 1, vallen, ctx->f);
}
void
cdbmake_finalize(struct cdbmake_ctx *ctx)
{
int idx;
// Write out tables
for (idx = 0; idx < 256; idx += 1) {
uint32_t r;
long offset;
uint32_t tlen = ctx->nrecords[idx] * 2;
uint32_t *buf;
// Pointer
offset = ftell(ctx->f);
fseek(ctx->f, idx * 8, SEEK_SET);
write_u32le(ctx->f, (uint32_t)offset);
write_u32le(ctx->f, tlen);
fseek(ctx->f, offset, SEEK_SET);
// Build table in memory
buf = (uint32_t *)calloc(tlen * 2, sizeof(uint32_t));
if (! buf) {
perror("Allocating hash table");
return;
}
for (r = 0; r < ctx->nrecords[idx]; r += 1) {
uint32_t slot = (ctx->records[idx][r].hashval / 256) % tlen;
while (buf[slot * 2] > 0) {
slot = (slot + 1) % tlen;
}
buf[slot*2 + 0] = ctx->records[idx][r].hashval;
buf[slot*2 + 1] = ctx->records[idx][r].offset;
}
// Write it out
for (r = 0; r < tlen; r += 1) {
write_u32le(ctx->f, buf[r*2 + 0]);
write_u32le(ctx->f, buf[r*2 + 1]);
}
free(buf);
}
fclose(ctx->f);
ctx->f = NULL;
for (idx = 0; idx < 256; idx += 1) {
if (ctx->records[idx]) {
free(ctx->records[idx]);
}
ctx->records[idx] = NULL;
ctx->nrecords[idx] = 0;
}
}

25
cdbmake.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef __CDBMAKE_H__
#define __CDBMAKE_H__
#include <stdio.h>
#include <stdint.h>
#include <string.h>
struct cdbmake_record {
uint32_t hashval;
uint32_t offset;
};
struct cdbmake_ctx {
FILE *f;
struct cdbmake_record *records[256];
uint32_t nrecords[256];
};
void cdbmake_init(struct cdbmake_ctx *ctx, FILE *f);
void cdbmake_add(struct cdbmake_ctx *ctx,
char *key, size_t keylen,
char *val, size_t vallen);
void cdbmake_finalize(struct cdbmake_ctx *ctx);
#endif

153
infobot.c
View File

@ -2,10 +2,12 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <ctype.h> #include <ctype.h>
#include <sys/time.h> #include <sys/time.h>
#include <sysexits.h> #include <sysexits.h>
#include "cdb.h" #include "cdb.h"
#include "cdbmake.h"
/* Some things I use for debugging */ /* Some things I use for debugging */
#ifdef NODUMP #ifdef NODUMP
@ -26,7 +28,7 @@ usage()
{ {
fprintf(stderr, "Usage: infobot factoids.cdb \"text\"\n"); fprintf(stderr, "Usage: infobot factoids.cdb \"text\"\n");
return 0; return EX_USAGE;
} }
size_t size_t
@ -41,18 +43,12 @@ lowercase(char *text)
return ret; return ret;
} }
int
infocmd(char *filename, char *text)
{
return 0;
}
int int
lookup(char *filename, char *text) choose(char *filename, char *key)
{ {
struct cdb_ctx c; struct cdb_ctx c;
FILE *f = fopen(filename, "r"); FILE *f = fopen(filename, "r");
size_t textlen = lowercase(text); size_t keylen = lowercase(key);
uint32_t nresults; uint32_t nresults;
if (! f) { if (! f) {
@ -63,7 +59,7 @@ lookup(char *filename, char *text)
cdb_init(&c, f); cdb_init(&c, f);
/* Count how many results there are */ /* Count how many results there are */
cdb_find(&c, text, textlen); cdb_find(&c, key, keylen);
for (nresults = 0; cdb_next(&c, NULL, 0); nresults += 1); for (nresults = 0; cdb_next(&c, NULL, 0); nresults += 1);
if (nresults > 0) { if (nresults > 0) {
@ -75,11 +71,11 @@ lookup(char *filename, char *text)
char val[8192]; char val[8192];
uint32_t i; uint32_t i;
cdb_find(&c, text, textlen); cdb_find(&c, key, keylen);
for (i = 0; i < which; i += 1) { for (i = 0; i < which; i += 1) {
cdb_next(&c, NULL, 0); cdb_next(&c, NULL, 0);
} }
vallen = cdb_next(&c, val, sizeof val); vallen = cdb_next(&c, val, sizeof(val));
printf("%.*s\n", vallen, val); printf("%.*s\n", vallen, val);
} }
@ -88,16 +84,125 @@ lookup(char *filename, char *text)
return 0; return 0;
} }
int
list(char *filename, char *key)
{
struct cdb_ctx c;
size_t keylen = lowercase(key);
FILE *f = fopen(filename, "rb");
if (! f) {
perror("Opening database");
return EX_NOINPUT;
}
cdb_init(&c, f);
cdb_find(&c, key, keylen);
for (;;) {
uint32_t vallen;
char val[8192];
vallen = cdb_next(&c, val, sizeof(val));
if (vallen == 0) {
break;
}
printf("%.*s\n", vallen, val);
}
fclose(f);
return 0;
}
int
add(char *filename, char *key, char *val)
{
struct cdb_ctx inc;
struct cdbmake_ctx outc;
FILE *inf;
FILE *outf;
inf = fopen(filename, "rb");
if (! inf) {
perror("Opening database");
return EX_NOINPUT;
}
{
char fn[4096];
snprintf(fn, sizeof(fn), "%s.%d", filename, getpid());
outf = fopen(fn, "wb");
}
if (! outf) {
perror("Creating temporary database");
return EX_CANTCREAT;
}
cdb_init(&inc, inf);
cdbmake_init(&outc, outf);
for (;;) {
char key[8192];
char val[8192];
size_t keylen = sizeof(key);
size_t vallen = sizeof(val);
if (EOF == cdb_dump(&inc, key, &keylen, val, &vallen)) {
break;
}
cdbmake_add(&outc, key, keylen, val, vallen);
}
cdbmake_add(&outc, key, strlen(key), val, strlen(val));
cdbmake_finalize(&outc);
return 0;
}
enum action {
ACT_ONE,
ACT_ALL,
ACT_ADD,
ACT_DEL
};
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
char *filename; char *filename;
char *text; char *key;
char *val;
enum action act = ACT_ONE;
if (3 != argc) { for (;;) {
return usage(); int opt = getopt(argc, argv, "la:r:");
if (-1 == opt) {
break;
}
switch (opt) {
case 'l':
act = ACT_ALL;
break;
case 'a':
act = ACT_ADD;
val = optarg;
break;
case 'r':
act = ACT_DEL;
val = optarg;
break;
default:
return usage(argv[0]);
}
}
if (argc - optind != 2) {
return usage(argv[0]);
} }
// Seed PRNG with some crap
{ {
struct timeval tv; struct timeval tv;
@ -105,8 +210,20 @@ main(int argc, char *argv[])
srand((unsigned int)(tv.tv_sec * tv.tv_usec)); srand((unsigned int)(tv.tv_sec * tv.tv_usec));
} }
filename = argv[1]; filename = argv[optind];
text = argv[2]; key = argv[optind + 1];
return lookup(filename, text); switch (act) {
case ACT_ONE:
return choose(filename, key);
case ACT_ALL:
return list(filename, key);
case ACT_ADD:
return add(filename, key, val);
default:
fprintf(stderr, "Not yet implemented, chump %s.\n", val);
break;
}
return 0;
} }