get infobot.c working with cdb.c

This commit is contained in:
Neale Pickett 2012-11-19 14:53:00 -07:00
parent 47bf8f7fbd
commit 11bb1e9774
4 changed files with 79 additions and 203 deletions

View File

@ -1,8 +1,11 @@
CFLAGS = -Wall -Werror CFLAGS = -Wall -Werror
TARGETS = dispatch irc-filter irc-esc TARGETS = dispatch irc-filter irc-esc
TARGETS += infobot
all: $(TARGETS) all: $(TARGETS)
infobot: infobot.o cdb.o
.PHONY: clean .PHONY: clean
clean: clean:
rm -f $(TARGETS) *.o rm -f $(TARGETS) *.o

94
cdb.c
View File

@ -3,25 +3,26 @@
#include <stdint.h> #include <stdint.h>
#include "cdb.h" #include "cdb.h"
/* Some things I use for debugging */ /*
#ifdef NODUMP *
# define DUMPf(fmt, args...) * CDB Interface
#else *
# define DUMPf(fmt, args...) fprintf(stderr, "%s:%s:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##args) */
#endif
#define DUMP() DUMPf("") /* Why I am using stdio.h
#define DUMP_u(v) DUMPf("%s = %u", #v, v) * By Neale Pickett
#define DUMP_d(v) DUMPf("%s = %d", #v, v) * November, 2012
#define DUMP_x(v) DUMPf("%s = 0x%x", #v, v) *
#define DUMP_s(v) DUMPf("%s = %s", #v, v) * I am not as clever as the people who maintain libc.
#define DUMP_c(v) DUMPf("%s = %c", #v, v) *
#define DUMP_p(v) DUMPf("%s = %p", #v, v) * THE END
*/
#ifndef min #ifndef min
#define min(a,b) ((a)<(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b))
#endif #endif
uint32_t static uint32_t
hash(char *s, size_t len) hash(char *s, size_t len)
{ {
uint32_t h = 5381; uint32_t h = 5381;
@ -33,16 +34,8 @@ hash(char *s, size_t len)
return h; return h;
} }
int static uint32_t
usage() read_u32le(FILE *f)
{
fprintf(stderr, "Usage: infobot\n");
return 0;
}
uint32_t
read_u32(FILE *f)
{ {
uint8_t d[4]; uint8_t d[4];
@ -53,17 +46,6 @@ read_u32(FILE *f)
(d[3] << 24)); (d[3] << 24));
} }
int
bufcmp(char *a, size_t alen, char *b, size_t blen)
{
if (alen == blen) {
return memcmp(a, b, blen);
} else {
return alen - blen;
}
}
void void
cdb_init(struct cdb_ctx *ctx, FILE *f) cdb_init(struct cdb_ctx *ctx, FILE *f)
{ {
@ -81,8 +63,8 @@ cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen)
/* Read pointer */ /* Read pointer */
fseek(ctx->f, (ctx->hash_val % 256) * 8, SEEK_SET); fseek(ctx->f, (ctx->hash_val % 256) * 8, SEEK_SET);
ctx->hash_pos = read_u32(ctx->f); ctx->hash_pos = read_u32le(ctx->f);
ctx->hash_len = read_u32(ctx->f); ctx->hash_len = read_u32le(ctx->f);
ctx->entry = (ctx->hash_val / 256) % ctx->hash_len; ctx->entry = (ctx->hash_val / 256) % ctx->hash_len;
} }
@ -95,9 +77,11 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen)
uint32_t dlen; uint32_t dlen;
for (;;) { for (;;) {
fseek(ctx->f, ctx->hash_pos + (ctx->entry++ * 8), SEEK_SET); fseek(ctx->f, ctx->hash_pos + (ctx->entry * 8), SEEK_SET);
hashval = read_u32(ctx->f); ctx->entry = (ctx->entry + 1) % ctx->hash_len;
entry_pos = read_u32(ctx->f);
hashval = read_u32le(ctx->f);
entry_pos = read_u32le(ctx->f);
if (entry_pos == 0) { if (entry_pos == 0) {
break; break;
} }
@ -106,8 +90,8 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen)
} }
fseek(ctx->f, entry_pos, SEEK_SET); fseek(ctx->f, entry_pos, SEEK_SET);
klen = read_u32(ctx->f); klen = read_u32le(ctx->f);
dlen = read_u32(ctx->f); dlen = read_u32le(ctx->f);
if (klen == ctx->keylen) { if (klen == ctx->keylen) {
uint32_t i; uint32_t i;
@ -124,32 +108,14 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen)
continue; continue;
} }
if (buf) {
return fread(buf, 1, min(dlen, buflen), ctx->f); return fread(buf, 1, min(dlen, buflen), ctx->f);
} else {
return dlen;
}
} }
} }
return 0; return 0;
} }
int
main(int argc, char *argv[])
{
if (1 == argc) {
return usage();
}
{
struct cdb_ctx ctx;
char buf[8192];
int32_t r;
cdb_init(&ctx, stdin);
cdb_find(&ctx, argv[1], strlen(argv[1]));
while ((r = cdb_next(&ctx, buf, sizeof buf))) {
printf("%.*s\n", r, buf);
}
}
return 0;
}

24
cdb.h Normal file
View File

@ -0,0 +1,24 @@
#ifndef __CDB_H__
#define __CDB_H__
#include <stdio.h>
#include <stdint.h>
struct cdb_ctx {
FILE *f;
char *key;
uint32_t keylen;
uint32_t hash_val;
uint32_t hash_pos;
uint32_t hash_len;
uint32_t entry;
};
void cdb_init(struct cdb_ctx *ctx, FILE *f);
void cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen);
uint32_t cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen);
#endif

159
infobot.c
View File

@ -3,12 +3,9 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <ctype.h> #include <ctype.h>
#include <time.h> #include <sys/time.h>
#include <sysexit.h> #include <sysexits.h>
#include "cdb.h"
const char *x_is_y = "It's been said that %s is %s";
const char *added = "Okay, %s, I added a factoid to %s.";
const char *removed = "Okay, %s, I removed %d factoids from %s.";
/* Some things I use for debugging */ /* Some things I use for debugging */
#ifdef NODUMP #ifdef NODUMP
@ -24,119 +21,6 @@ const char *removed = "Okay, %s, I removed %d factoids from %s.";
#define DUMP_c(v) DUMPf("%s = %c", #v, v) #define DUMP_c(v) DUMPf("%s = %c", #v, v)
#define DUMP_p(v) DUMPf("%s = %p", #v, v) #define DUMP_p(v) DUMPf("%s = %p", #v, v)
/*
*
* CDB Interface
*
*/
/* Why I am using stdio.h
* By Neale Pickett
* November, 2012
*
* I am not as clever as the people who maintain libc.
*
* THE END
*/
static uint32_t
hash(char *s, size_t len)
{
uint32_t h = 5381;
size_t i;
for (i = 0; i < len; i += 1) {
h = ((h << 5) + h) ^ s[i];
}
return h;
}
uint32_t
read_u32(FILE *f)
{
uint8_t d[4];
fread(d, 4, 1, f);
return ((d[0]<< 0) |
(d[1] << 8) |
(d[2] << 16) |
(d[3] << 24));
}
int
bufcmp(char *a, size_t alen, char *b, size_t blen)
{
if (alen == blen) {
return memcmp(a, b, blen);
} else {
return alen - blen;
}
}
int
lookup(FILE *f, char *key)
{
size_t keylen = strlen(key);
uint32_t h = hash(key, keylen);
uint32_t p, plen;
uint32_t i;
/* Read pointer */
fseek(f, (h % 256) * 8, SEEK_SET);
p = read_u32(f);
plen = read_u32(f);
/* Read hash table entries */
for (i = (h / 256) % plen; i < plen; i += 1) {
uint32_t hashval;
uint32_t entry_pos;
uint32_t klen;
uint32_t dlen;
fseek(f, p + (i * 8), SEEK_SET);
hashval = read_u32(f);
entry_pos = read_u32(f);
if (entry_pos == 0) {
break;
}
if (hashval != h) {
continue;
}
fseek(f, entry_pos, SEEK_SET);
klen = read_u32(f);
dlen = read_u32(f);
if (klen == keylen) {
uint32_t i;
for (i = 0; i < klen; i += 1) {
int c = fgetc(f);
if (c != key[i]) {
break;
}
}
if (i < klen) {
continue;
}
for (i = 0; i < dlen; i += 1) {
int c = fgetc(f);
putchar(c);
}
}
}
return 0;
}
int int
usage() usage()
{ {
@ -166,7 +50,7 @@ infocmd(char *filename, char *text)
int int
lookup(char *filename, char *text) lookup(char *filename, char *text)
{ {
struct cdb c; struct cdb_ctx c;
FILE *f = fopen(filename, "r"); FILE *f = fopen(filename, "r");
size_t textlen = lowercase(text); size_t textlen = lowercase(text);
uint32_t nresults; uint32_t nresults;
@ -179,30 +63,27 @@ lookup(char *filename, char *text)
cdb_init(&c, f); cdb_init(&c, f);
/* Count how many results there are */ /* Count how many results there are */
cdb_lookup(&c, text, textlen); cdb_find(&c, text, textlen);
for (results = 0; cdb_next(&c, NULL, 0); results += 1); for (nresults = 0; cdb_next(&c, NULL, 0); nresults += 1);
if (nresults > 0) { if (nresults > 0) {
/* This is horrible: say rand() returned between 0 and 2, and results /* This is horrible: say rand() returned between 0 and 2, and results
* was 2. Possible values would be (0, 1, 0): not a uniform * was 2. Possible values would be (0, 1, 0): not a uniform
* distribution. But this is random enough for our purposes. */ * distribution. But this is random enough for our purposes. */
uint32_t which = rand() % results; uint32_t which = rand() % nresults;
uint32_t vallen;
char val[8192]; char val[8192];
uint32_t i;
cdb_lookup(&c, text, textlen); cdb_find(&c, text, textlen);
for (results = 0; results < which; results += 1) { for (i = 0; i < which; i += 1) {
cdb_next(&c, NULL, 0); cdb_next(&c, NULL, 0);
} }
cdb_next(&c, val, sizeof val); vallen = cdb_next(&c, val, sizeof val);
printf("%.*s\n", vallen, val);
}
if (val[0] == '"') { fclose(f);
printf("%s\n", val + 1);
} else if (val[0] == ':') {
printf("\001ACTION %s\001\n", val + 1);
} else {
printf(x_is_y, text, val);
}
}
return 0; return 0;
} }
@ -217,13 +98,15 @@ main(int argc, char *argv[])
return usage(); return usage();
} }
srand((unsigned int)time(NULL)); {
struct timeval tv;
gettimeofday(&tv, NULL);
srand((unsigned int)(tv.tv_sec * tv.tv_usec));
}
filename = argv[1]; filename = argv[1];
text = argv[2]; text = argv[2];
if ('!' == text[0]) {
return infocmd(filename, text + 1);
}
return lookup(filename, text); return lookup(filename, text);
} }