diff --git a/Makefile b/Makefile index f59eb11..f51fac8 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,11 @@ CFLAGS = -Wall -Werror TARGETS = dispatch irc-filter irc-esc +TARGETS += infobot all: $(TARGETS) +infobot: infobot.o cdb.o + .PHONY: clean clean: rm -f $(TARGETS) *.o diff --git a/cdb.c b/cdb.c index ae5dcdd..e2cc7e9 100644 --- a/cdb.c +++ b/cdb.c @@ -3,25 +3,26 @@ #include #include "cdb.h" -/* Some things I use for debugging */ -#ifdef NODUMP -# define DUMPf(fmt, args...) -#else -# define DUMPf(fmt, args...) fprintf(stderr, "%s:%s:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##args) -#endif -#define DUMP() DUMPf("") -#define DUMP_u(v) DUMPf("%s = %u", #v, v) -#define DUMP_d(v) DUMPf("%s = %d", #v, v) -#define DUMP_x(v) DUMPf("%s = 0x%x", #v, v) -#define DUMP_s(v) DUMPf("%s = %s", #v, v) -#define DUMP_c(v) DUMPf("%s = %c", #v, v) -#define DUMP_p(v) DUMPf("%s = %p", #v, v) +/* + * + * CDB Interface + * + */ + +/* Why I am using stdio.h + * By Neale Pickett + * November, 2012 + * + * I am not as clever as the people who maintain libc. + * + * THE END + */ #ifndef min #define min(a,b) ((a)<(b)?(a):(b)) #endif -uint32_t +static uint32_t hash(char *s, size_t len) { uint32_t h = 5381; @@ -33,16 +34,8 @@ hash(char *s, size_t len) return h; } -int -usage() -{ - fprintf(stderr, "Usage: infobot\n"); - - return 0; -} - -uint32_t -read_u32(FILE *f) +static uint32_t +read_u32le(FILE *f) { uint8_t d[4]; @@ -53,17 +46,6 @@ read_u32(FILE *f) (d[3] << 24)); } - -int -bufcmp(char *a, size_t alen, char *b, size_t blen) -{ - if (alen == blen) { - return memcmp(a, b, blen); - } else { - return alen - blen; - } -} - void cdb_init(struct cdb_ctx *ctx, FILE *f) { @@ -81,8 +63,8 @@ cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen) /* Read pointer */ fseek(ctx->f, (ctx->hash_val % 256) * 8, SEEK_SET); - ctx->hash_pos = read_u32(ctx->f); - ctx->hash_len = read_u32(ctx->f); + ctx->hash_pos = read_u32le(ctx->f); + ctx->hash_len = read_u32le(ctx->f); ctx->entry = (ctx->hash_val / 256) % ctx->hash_len; } @@ -95,9 +77,11 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen) uint32_t dlen; for (;;) { - fseek(ctx->f, ctx->hash_pos + (ctx->entry++ * 8), SEEK_SET); - hashval = read_u32(ctx->f); - entry_pos = read_u32(ctx->f); + fseek(ctx->f, ctx->hash_pos + (ctx->entry * 8), SEEK_SET); + ctx->entry = (ctx->entry + 1) % ctx->hash_len; + + hashval = read_u32le(ctx->f); + entry_pos = read_u32le(ctx->f); if (entry_pos == 0) { break; } @@ -106,8 +90,8 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen) } fseek(ctx->f, entry_pos, SEEK_SET); - klen = read_u32(ctx->f); - dlen = read_u32(ctx->f); + klen = read_u32le(ctx->f); + dlen = read_u32le(ctx->f); if (klen == ctx->keylen) { uint32_t i; @@ -124,32 +108,14 @@ cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen) continue; } - return fread(buf, 1, min(dlen, buflen), ctx->f); + if (buf) { + return fread(buf, 1, min(dlen, buflen), ctx->f); + } else { + return dlen; + } } } return 0; } - -int -main(int argc, char *argv[]) -{ - if (1 == argc) { - return usage(); - } - - { - struct cdb_ctx ctx; - char buf[8192]; - int32_t r; - - cdb_init(&ctx, stdin); - cdb_find(&ctx, argv[1], strlen(argv[1])); - while ((r = cdb_next(&ctx, buf, sizeof buf))) { - printf("%.*s\n", r, buf); - } - } - - return 0; -} diff --git a/cdb.h b/cdb.h new file mode 100644 index 0000000..558e4b0 --- /dev/null +++ b/cdb.h @@ -0,0 +1,24 @@ +#ifndef __CDB_H__ +#define __CDB_H__ + +#include +#include + +struct cdb_ctx { + FILE *f; + + char *key; + uint32_t keylen; + + uint32_t hash_val; + uint32_t hash_pos; + uint32_t hash_len; + + uint32_t entry; +}; + +void cdb_init(struct cdb_ctx *ctx, FILE *f); +void cdb_find(struct cdb_ctx *ctx, char *key, uint32_t keylen); +uint32_t cdb_next(struct cdb_ctx *ctx, char *buf, uint32_t buflen); + +#endif diff --git a/infobot.c b/infobot.c index 5e41c00..8288688 100644 --- a/infobot.c +++ b/infobot.c @@ -3,12 +3,9 @@ #include #include #include -#include -#include - -const char *x_is_y = "It's been said that %s is %s"; -const char *added = "Okay, %s, I added a factoid to %s."; -const char *removed = "Okay, %s, I removed %d factoids from %s."; +#include +#include +#include "cdb.h" /* Some things I use for debugging */ #ifdef NODUMP @@ -24,119 +21,6 @@ const char *removed = "Okay, %s, I removed %d factoids from %s."; #define DUMP_c(v) DUMPf("%s = %c", #v, v) #define DUMP_p(v) DUMPf("%s = %p", #v, v) - -/* - * - * CDB Interface - * - */ - -/* Why I am using stdio.h - * By Neale Pickett - * November, 2012 - * - * I am not as clever as the people who maintain libc. - * - * THE END - */ - -static uint32_t -hash(char *s, size_t len) -{ - uint32_t h = 5381; - size_t i; - - for (i = 0; i < len; i += 1) { - h = ((h << 5) + h) ^ s[i]; - } - return h; -} - - -uint32_t -read_u32(FILE *f) -{ - uint8_t d[4]; - - fread(d, 4, 1, f); - return ((d[0]<< 0) | - (d[1] << 8) | - (d[2] << 16) | - (d[3] << 24)); -} - - -int -bufcmp(char *a, size_t alen, char *b, size_t blen) -{ - if (alen == blen) { - return memcmp(a, b, blen); - } else { - return alen - blen; - } -} - -int -lookup(FILE *f, char *key) -{ - size_t keylen = strlen(key); - uint32_t h = hash(key, keylen); - uint32_t p, plen; - uint32_t i; - - /* Read pointer */ - fseek(f, (h % 256) * 8, SEEK_SET); - p = read_u32(f); - plen = read_u32(f); - - /* Read hash table entries */ - for (i = (h / 256) % plen; i < plen; i += 1) { - uint32_t hashval; - uint32_t entry_pos; - uint32_t klen; - uint32_t dlen; - - fseek(f, p + (i * 8), SEEK_SET); - hashval = read_u32(f); - entry_pos = read_u32(f); - if (entry_pos == 0) { - break; - } - if (hashval != h) { - continue; - } - - fseek(f, entry_pos, SEEK_SET); - klen = read_u32(f); - dlen = read_u32(f); - - if (klen == keylen) { - uint32_t i; - - for (i = 0; i < klen; i += 1) { - int c = fgetc(f); - - if (c != key[i]) { - break; - } - } - - if (i < klen) { - continue; - } - - for (i = 0; i < dlen; i += 1) { - int c = fgetc(f); - - putchar(c); - } - } - } - - return 0; -} - - int usage() { @@ -166,7 +50,7 @@ infocmd(char *filename, char *text) int lookup(char *filename, char *text) { - struct cdb c; + struct cdb_ctx c; FILE *f = fopen(filename, "r"); size_t textlen = lowercase(text); uint32_t nresults; @@ -179,31 +63,28 @@ lookup(char *filename, char *text) cdb_init(&c, f); /* Count how many results there are */ - cdb_lookup(&c, text, textlen); - for (results = 0; cdb_next(&c, NULL, 0); results += 1); + cdb_find(&c, text, textlen); + for (nresults = 0; cdb_next(&c, NULL, 0); nresults += 1); if (nresults > 0) { /* This is horrible: say rand() returned between 0 and 2, and results * was 2. Possible values would be (0, 1, 0): not a uniform * distribution. But this is random enough for our purposes. */ - uint32_t which = rand() % results; + uint32_t which = rand() % nresults; + uint32_t vallen; char val[8192]; + uint32_t i; - cdb_lookup(&c, text, textlen); - for (results = 0; results < which; results += 1) { + cdb_find(&c, text, textlen); + for (i = 0; i < which; i += 1) { cdb_next(&c, NULL, 0); } - cdb_next(&c, val, sizeof val); - - if (val[0] == '"') { - printf("%s\n", val + 1); - } else if (val[0] == ':') { - printf("\001ACTION %s\001\n", val + 1); - } else { - printf(x_is_y, text, val); - } + vallen = cdb_next(&c, val, sizeof val); + printf("%.*s\n", vallen, val); } + fclose(f); + return 0; } @@ -217,13 +98,15 @@ main(int argc, char *argv[]) return usage(); } - srand((unsigned int)time(NULL)); + { + struct timeval tv; + + gettimeofday(&tv, NULL); + srand((unsigned int)(tv.tv_sec * tv.tv_usec)); + } filename = argv[1]; text = argv[2]; - if ('!' == text[0]) { - return infocmd(filename, text + 1); - } return lookup(filename, text); }