Looking up factoids now

This commit is contained in:
Neale Pickett 2009-02-09 22:06:14 -07:00
parent da4d22de5b
commit 9e247dd246
4 changed files with 381 additions and 1 deletions

View File

@ -5,7 +5,7 @@ OCAMLCFLAGS += -g
.DEFAULT: bot
OCamlProgram(bot, bot irc command iobuf dispatch)
OCamlProgram(bot, bot irc command iobuf dispatch cdb)
section
OCAMLPACKS[] +=

31
bot.ml
View File

@ -1,3 +1,16 @@
let info_db = Cdb.open_cdb_in "/home/neale/src/firebot/info.cdb"
let _ = Random.self_init ()
let choice l =
let n = Random.int (List.length l) in
List.nth l n
let get_one key =
let matches = Cdb.get_matches info_db key in
match Stream.npeek 120 matches with
| [] -> raise Not_found
| keys -> choice keys
let write iobuf command args text =
let cmd = Command.create None command args text in
print_endline ("--> " ^ (Command.as_string cmd));
@ -12,6 +25,24 @@ let handle_command iobuf cmd =
write iobuf "JOIN" ["#bot"] None
| (Some who, "JOIN", [], Some chan) ->
write iobuf "PRIVMSG" [chan] (Some "hi asl")
| (Some who, "PRIVMSG", [target], Some text) ->
if target.[0] = '#' then
try
let factoid = get_one text in
let response =
match factoid.[0] with
| ':' ->
"\001ACTION " ^ (Str.string_after factoid 1) ^ "\001"
| '\\' ->
Str.string_after factoid 1
| _ ->
Printf.sprintf "Gosh, %s, I think %s is %s" who text factoid
in
write iobuf "PRIVMSG" [target] (Some response)
with Not_found ->
()
else
()
| _ ->
()

309
cdb.ml Normal file
View File

@ -0,0 +1,309 @@
(*
* Copyright (c) 2003 Dustin Sallings <dustin@spy.net>
*
* arch-tag: 1E3B7401-2AE1-11D8-A379-000393CB0F1E
*)
(** CDB Implementation.
{{:http://cr.yp.to/cdb/cdb.txt} http://cr.yp.to/cdb/cdb.txt}
*)
(* The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting
hash of 5381.
*)
(** CDB creation handle. *)
type cdb_creator = {
table_count: int array;
(* Hash index pointers *)
mutable pointers: (Int32.t * Int32.t) list;
out: out_channel;
}
(** Initial hash value *)
let hash_init = Int64.of_int 5381
let ff64 = Int64.of_int 0xff
(* I need to do this of_string because it's larger than an ocaml int *)
let ffffffff64 = Int64.of_string "0xffffffff"
let ff32 = Int32.of_int 0xff
(** Hash the given string. *)
let hash s =
let h = ref hash_init in
String.iter (fun c -> h := Int64.logand ffffffff64 (Int64.logxor
(Int64.add (Int64.shift_left !h 5) !h)
(Int64.of_int (int_of_char c)))
) s;
Int64.to_int32 !h
let write_le cdc i =
output_byte cdc.out (i land 0xff);
output_byte cdc.out ((i lsr 8) land 0xff);
output_byte cdc.out ((i lsr 16) land 0xff);
output_byte cdc.out ((i lsr 24) land 0xff)
(** Write a little endian integer to the file *)
let write_le32 cdc i =
output_byte cdc.out (Int32.to_int (Int32.logand ff32 i));
output_byte cdc.out (Int32.to_int
(Int32.logand ff32 (Int32.shift_right_logical i 8)));
output_byte cdc.out (Int32.to_int
(Int32.logand ff32 (Int32.shift_right_logical i 16)));
output_byte cdc.out (Int32.to_int
(Int32.logand ff32 (Int32.shift_right_logical i 24)))
(**
Open a cdb creator for writing.
@param fn the file to write
*)
let open_out fn =
let s = { table_count=Array.make 256 0;
pointers=[];
out=open_out_bin fn
} in
(* Skip over the header *)
seek_out s.out 2048;
s
(**
Convert out_channel to cdb_creator.
@param out_channel the out_channel to convert
*)
let cdb_creator_of_out_channel out_channel =
let s = { table_count=Array.make 256 0;
pointers=[];
out=out_channel
} in
(* Skip over the header *)
seek_out s.out 2048;
s
let hash_to_table h =
Int32.to_int (Int32.logand h ff32)
let hash_to_bucket h len =
Int32.to_int (Int32.rem (Int32.shift_right_logical h 8) (Int32.of_int len))
let pos_out_32 x =
Int64.to_int32 (LargeFile.pos_out x)
(** Add a value to the cdb *)
let add cdc k v =
(* Add the hash to the list *)
let h = hash k in
cdc.pointers <- (h, pos_out_32 cdc.out) :: cdc.pointers;
let table = hash_to_table h in
cdc.table_count.(table) <- cdc.table_count.(table) + 1;
(* Add the data to the file *)
write_le cdc (String.length k);
write_le cdc (String.length v);
output_string cdc.out k;
output_string cdc.out v
(** Process a hash table *)
let process_table cdc table_start slot_table slot_pointers i tc =
(* Length of the table *)
let len = tc * 2 in
(* Store the table position *)
slot_table := (pos_out_32 cdc.out, Int32.of_int len) :: !slot_table;
(* Build the hash table *)
let ht = Array.make len None in
let cur_p = ref table_start.(i) in
(* Lookup entries by slot number *)
let lookupSlot x =
try Hashtbl.find slot_pointers x
with Not_found -> (Int32.zero,Int32.zero)
in
(* from 0 to tc-1 because the loop will run an extra time otherwise *)
for u = 0 to (tc - 1) do
let hp = lookupSlot !cur_p in
cur_p := !cur_p + 1;
(* Find an available hash bucket *)
let rec find_where where =
match ht.(where) with
| None ->
where
| _ ->
if ((where + 1) = len) then (find_where 0)
else (find_where (where + 1))
in
let where = find_where (hash_to_bucket (fst hp) len) in
ht.(where) <- Some hp;
done;
(* Write this hash table *)
Array.iter (fun hpp ->
let h,t = match hpp with
None -> Int32.zero,Int32.zero
| Some(h,t) -> h,t;
in
write_le32 cdc h; write_le32 cdc t
) ht
(** Close and finish the cdb creator. *)
let close_cdb_out cdc =
let cur_entry = ref 0 in
let table_start = Array.make 256 0 in
(* Find all the hash starts *)
Array.iteri (fun i x ->
cur_entry := !cur_entry + x;
table_start.(i) <- !cur_entry) cdc.table_count;
(* Build out the slot pointers hash *)
let slot_pointers = Hashtbl.create (List.length cdc.pointers) in
(* Fill in the slot pointers *)
List.iter (fun hp ->
let h = fst hp in
let table = hash_to_table h in
table_start.(table) <- table_start.(table) - 1;
Hashtbl.replace slot_pointers table_start.(table) hp;
) cdc.pointers;
(* Write the shit out *)
let slot_table = ref [] in
(* Write out the hash tables *)
Array.iteri (process_table cdc table_start slot_table slot_pointers)
cdc.table_count;
(* write out the pointer sets *)
seek_out cdc.out 0;
List.iter (fun x -> write_le32 cdc (fst x); write_le32 cdc (snd x))
(List.rev !slot_table);
close_out cdc.out
(** {1 Iterating a cdb file} *)
(* read a little-endian integer *)
let read_le f =
let a = (input_byte f) in
let b = (input_byte f) in
let c = (input_byte f) in
let d = (input_byte f) in
a lor (b lsl 8) lor (c lsl 16) lor (d lsl 24)
(* Int32 version of read_le *)
let read_le32 f =
let a = (input_byte f) in
let b = (input_byte f) in
let c = (input_byte f) in
let d = (input_byte f) in
Int32.logor (Int32.of_int (a lor (b lsl 8) lor (c lsl 16)))
(Int32.shift_left (Int32.of_int d) 24)
(**
Iterate a CDB.
@param f the function to call for every key/value pair
@param fn the name of the cdb to iterate
*)
let iter f fn =
let fin = open_in_bin fn in
try
(* Figure out where the end of all data is *)
let eod = read_le32 fin in
(* Seek to the record section *)
seek_in fin 2048;
let rec loop() =
(* (pos_in fin) < eod *)
if (Int32.compare (Int64.to_int32 (LargeFile.pos_in fin)) eod < 0)
then (
let klen = read_le fin in
let dlen = read_le fin in
let key = String.create klen in
let data = String.create dlen in
really_input fin key 0 klen;
really_input fin data 0 dlen;
f key data;
loop()
) in
loop();
close_in fin;
with x -> close_in fin; raise x;
(** {1 Searching } *)
(** Type type of a cdb_file. *)
type cdb_file = {
f: in_channel;
(* Position * length *)
tables: (Int32.t * int) array;
}
(** Open a CDB file for searching.
@param fn the file to open
*)
let open_cdb_in fn =
let fin = open_in_bin fn in
let tables = Array.make 256 (Int32.zero,0) in
(* Set the positions and lengths *)
Array.iteri (fun i it ->
let pos = read_le32 fin in
let len = read_le fin in
tables.(i) <- (pos,len)
) tables;
{f=fin; tables=tables}
(**
Close a cdb file.
@param cdf the cdb file to close
*)
let close_cdb_in cdf =
close_in cdf.f
(** Get a stream of matches.
@param cdf the cdb file
@param key the key to search
*)
let get_matches cdf key =
let kh = hash key in
(* Find out where the hash table is *)
let hpos, hlen = cdf.tables.(hash_to_table kh) in
let rec loop x =
if(x >= hlen) then (
None
) else (
(* Calculate the slot containing these entries *)
let lslot = ((hash_to_bucket kh hlen) + x) mod hlen in
let spos = Int32.add (Int32.of_int (lslot * 8)) hpos in
LargeFile.seek_in cdf.f (Int64.of_int32 spos);
let h = read_le32 cdf.f in
let pos = read_le32 cdf.f in
(* validate that we a real bucket *)
if (h = kh) && ((Int32.compare pos Int32.zero) > 0) then (
LargeFile.seek_in cdf.f (Int64.of_int32 pos);
let klen = read_le cdf.f in
if (klen = String.length key) then (
let dlen = read_le cdf.f in
let rkey = String.create klen in
really_input cdf.f rkey 0 klen;
if(rkey = key) then (
let rdata = String.create dlen in
really_input cdf.f rdata 0 dlen;
Some(rdata)
) else (
loop (x + 1)
)
) else (
loop (x + 1)
)
) else (
loop (x + 1)
)
) in
Stream.from loop
(**
Find the first record with the given key.
@param cdf the cdb_file
@param key the key to find
*)
let find cdf key =
try
Stream.next (get_matches cdf key)
with Stream.Failure ->
raise Not_found

40
cdb.mli Normal file
View File

@ -0,0 +1,40 @@
(** CDB Implementation.
{{:http://cr.yp.to/cdb/cdb.txt} http://cr.yp.to/cdb/cdb.txt}
*)
(** {1 Utilities} *)
val hash : string -> int32
(** {1 Building a CDB } *)
type cdb_creator = {
table_count : int array;
mutable pointers : (Int32.t * Int32.t) list;
out : out_channel;
}
val open_out : string -> cdb_creator
val cdb_creator_of_out_channel : Pervasives.out_channel -> cdb_creator
val add : cdb_creator -> string -> string -> unit
val close_cdb_out : cdb_creator -> unit
(** {1 Iterating a CDB } *)
val iter : (string -> string -> unit) -> string -> unit
(** {1 Searching } *)
type cdb_file = {
f: in_channel;
tables: (Int32.t * int) array;
}
val open_cdb_in : string -> cdb_file
val close_cdb_in : cdb_file -> unit
val get_matches : cdb_file -> string -> string Stream.t
val find : cdb_file -> string -> string
(*
* arch-tag: 55F4CBF0-2B50-11D8-BEDC-000393CFE6B8
*)