diff --git a/OMakefile b/OMakefile index 50d5312..a6364f3 100644 --- a/OMakefile +++ b/OMakefile @@ -5,7 +5,7 @@ OCAMLCFLAGS += -g .DEFAULT: bot -OCamlProgram(bot, bot irc command iobuf dispatch) +OCamlProgram(bot, bot irc command iobuf dispatch cdb) section OCAMLPACKS[] += diff --git a/bot.ml b/bot.ml index d26528f..b48ed40 100644 --- a/bot.ml +++ b/bot.ml @@ -1,3 +1,16 @@ +let info_db = Cdb.open_cdb_in "/home/neale/src/firebot/info.cdb" +let _ = Random.self_init () + +let choice l = + let n = Random.int (List.length l) in + List.nth l n + +let get_one key = + let matches = Cdb.get_matches info_db key in + match Stream.npeek 120 matches with + | [] -> raise Not_found + | keys -> choice keys + let write iobuf command args text = let cmd = Command.create None command args text in print_endline ("--> " ^ (Command.as_string cmd)); @@ -12,6 +25,24 @@ let handle_command iobuf cmd = write iobuf "JOIN" ["#bot"] None | (Some who, "JOIN", [], Some chan) -> write iobuf "PRIVMSG" [chan] (Some "hi asl") + | (Some who, "PRIVMSG", [target], Some text) -> + if target.[0] = '#' then + try + let factoid = get_one text in + let response = + match factoid.[0] with + | ':' -> + "\001ACTION " ^ (Str.string_after factoid 1) ^ "\001" + | '\\' -> + Str.string_after factoid 1 + | _ -> + Printf.sprintf "Gosh, %s, I think %s is %s" who text factoid + in + write iobuf "PRIVMSG" [target] (Some response) + with Not_found -> + () + else + () | _ -> () diff --git a/cdb.ml b/cdb.ml new file mode 100644 index 0000000..85e6f05 --- /dev/null +++ b/cdb.ml @@ -0,0 +1,309 @@ +(* + * Copyright (c) 2003 Dustin Sallings + * + * arch-tag: 1E3B7401-2AE1-11D8-A379-000393CB0F1E + *) + +(** CDB Implementation. + {{:http://cr.yp.to/cdb/cdb.txt} http://cr.yp.to/cdb/cdb.txt} + *) + +(* The cdb hash function is ``h = ((h << 5) + h) ^ c'', with a starting + hash of 5381. + *) + +(** CDB creation handle. *) +type cdb_creator = { + table_count: int array; + (* Hash index pointers *) + mutable pointers: (Int32.t * Int32.t) list; + out: out_channel; +} + +(** Initial hash value *) +let hash_init = Int64.of_int 5381 + +let ff64 = Int64.of_int 0xff +(* I need to do this of_string because it's larger than an ocaml int *) +let ffffffff64 = Int64.of_string "0xffffffff" +let ff32 = Int32.of_int 0xff + +(** Hash the given string. *) +let hash s = + let h = ref hash_init in + String.iter (fun c -> h := Int64.logand ffffffff64 (Int64.logxor + (Int64.add (Int64.shift_left !h 5) !h) + (Int64.of_int (int_of_char c))) + ) s; + Int64.to_int32 !h + +let write_le cdc i = + output_byte cdc.out (i land 0xff); + output_byte cdc.out ((i lsr 8) land 0xff); + output_byte cdc.out ((i lsr 16) land 0xff); + output_byte cdc.out ((i lsr 24) land 0xff) + +(** Write a little endian integer to the file *) +let write_le32 cdc i = + output_byte cdc.out (Int32.to_int (Int32.logand ff32 i)); + output_byte cdc.out (Int32.to_int + (Int32.logand ff32 (Int32.shift_right_logical i 8))); + output_byte cdc.out (Int32.to_int + (Int32.logand ff32 (Int32.shift_right_logical i 16))); + output_byte cdc.out (Int32.to_int + (Int32.logand ff32 (Int32.shift_right_logical i 24))) + +(** + Open a cdb creator for writing. + + @param fn the file to write + *) +let open_out fn = + let s = { table_count=Array.make 256 0; + pointers=[]; + out=open_out_bin fn + } in + (* Skip over the header *) + seek_out s.out 2048; + s + +(** + Convert out_channel to cdb_creator. + + @param out_channel the out_channel to convert + *) +let cdb_creator_of_out_channel out_channel = + let s = { table_count=Array.make 256 0; + pointers=[]; + out=out_channel + } in + (* Skip over the header *) + seek_out s.out 2048; + s + +let hash_to_table h = + Int32.to_int (Int32.logand h ff32) + +let hash_to_bucket h len = + Int32.to_int (Int32.rem (Int32.shift_right_logical h 8) (Int32.of_int len)) + +let pos_out_32 x = + Int64.to_int32 (LargeFile.pos_out x) + +(** Add a value to the cdb *) +let add cdc k v = + (* Add the hash to the list *) + let h = hash k in + cdc.pointers <- (h, pos_out_32 cdc.out) :: cdc.pointers; + let table = hash_to_table h in + cdc.table_count.(table) <- cdc.table_count.(table) + 1; + + (* Add the data to the file *) + write_le cdc (String.length k); + write_le cdc (String.length v); + output_string cdc.out k; + output_string cdc.out v + +(** Process a hash table *) +let process_table cdc table_start slot_table slot_pointers i tc = + (* Length of the table *) + let len = tc * 2 in + (* Store the table position *) + slot_table := (pos_out_32 cdc.out, Int32.of_int len) :: !slot_table; + (* Build the hash table *) + let ht = Array.make len None in + let cur_p = ref table_start.(i) in + (* Lookup entries by slot number *) + let lookupSlot x = + try Hashtbl.find slot_pointers x + with Not_found -> (Int32.zero,Int32.zero) + in + (* from 0 to tc-1 because the loop will run an extra time otherwise *) + for u = 0 to (tc - 1) do + let hp = lookupSlot !cur_p in + cur_p := !cur_p + 1; + + (* Find an available hash bucket *) + let rec find_where where = + match ht.(where) with + | None -> + where + | _ -> + if ((where + 1) = len) then (find_where 0) + else (find_where (where + 1)) + in + let where = find_where (hash_to_bucket (fst hp) len) in + ht.(where) <- Some hp; + done; + (* Write this hash table *) + Array.iter (fun hpp -> + let h,t = match hpp with + None -> Int32.zero,Int32.zero + | Some(h,t) -> h,t; + in + write_le32 cdc h; write_le32 cdc t + ) ht + +(** Close and finish the cdb creator. *) +let close_cdb_out cdc = + let cur_entry = ref 0 in + let table_start = Array.make 256 0 in + (* Find all the hash starts *) + Array.iteri (fun i x -> + cur_entry := !cur_entry + x; + table_start.(i) <- !cur_entry) cdc.table_count; + (* Build out the slot pointers hash *) + let slot_pointers = Hashtbl.create (List.length cdc.pointers) in + (* Fill in the slot pointers *) + List.iter (fun hp -> + let h = fst hp in + let table = hash_to_table h in + table_start.(table) <- table_start.(table) - 1; + Hashtbl.replace slot_pointers table_start.(table) hp; + ) cdc.pointers; + (* Write the shit out *) + let slot_table = ref [] in + (* Write out the hash tables *) + Array.iteri (process_table cdc table_start slot_table slot_pointers) + cdc.table_count; + (* write out the pointer sets *) + seek_out cdc.out 0; + List.iter (fun x -> write_le32 cdc (fst x); write_le32 cdc (snd x)) + (List.rev !slot_table); + close_out cdc.out + +(** {1 Iterating a cdb file} *) + +(* read a little-endian integer *) +let read_le f = + let a = (input_byte f) in + let b = (input_byte f) in + let c = (input_byte f) in + let d = (input_byte f) in + a lor (b lsl 8) lor (c lsl 16) lor (d lsl 24) + +(* Int32 version of read_le *) +let read_le32 f = + let a = (input_byte f) in + let b = (input_byte f) in + let c = (input_byte f) in + let d = (input_byte f) in + Int32.logor (Int32.of_int (a lor (b lsl 8) lor (c lsl 16))) + (Int32.shift_left (Int32.of_int d) 24) + +(** + Iterate a CDB. + + @param f the function to call for every key/value pair + @param fn the name of the cdb to iterate + *) +let iter f fn = + let fin = open_in_bin fn in + try + (* Figure out where the end of all data is *) + let eod = read_le32 fin in + (* Seek to the record section *) + seek_in fin 2048; + let rec loop() = + (* (pos_in fin) < eod *) + if (Int32.compare (Int64.to_int32 (LargeFile.pos_in fin)) eod < 0) + then ( + let klen = read_le fin in + let dlen = read_le fin in + let key = String.create klen in + let data = String.create dlen in + really_input fin key 0 klen; + really_input fin data 0 dlen; + f key data; + loop() + ) in + loop(); + close_in fin; + with x -> close_in fin; raise x; + +(** {1 Searching } *) + +(** Type type of a cdb_file. *) +type cdb_file = { + f: in_channel; + (* Position * length *) + tables: (Int32.t * int) array; +} + +(** Open a CDB file for searching. + + @param fn the file to open + *) +let open_cdb_in fn = + let fin = open_in_bin fn in + let tables = Array.make 256 (Int32.zero,0) in + (* Set the positions and lengths *) + Array.iteri (fun i it -> + let pos = read_le32 fin in + let len = read_le fin in + tables.(i) <- (pos,len) + ) tables; + {f=fin; tables=tables} + +(** + Close a cdb file. + + @param cdf the cdb file to close + *) +let close_cdb_in cdf = + close_in cdf.f + +(** Get a stream of matches. + + @param cdf the cdb file + @param key the key to search + *) +let get_matches cdf key = + let kh = hash key in + (* Find out where the hash table is *) + let hpos, hlen = cdf.tables.(hash_to_table kh) in + let rec loop x = + if(x >= hlen) then ( + None + ) else ( + (* Calculate the slot containing these entries *) + let lslot = ((hash_to_bucket kh hlen) + x) mod hlen in + let spos = Int32.add (Int32.of_int (lslot * 8)) hpos in + LargeFile.seek_in cdf.f (Int64.of_int32 spos); + let h = read_le32 cdf.f in + let pos = read_le32 cdf.f in + (* validate that we a real bucket *) + if (h = kh) && ((Int32.compare pos Int32.zero) > 0) then ( + LargeFile.seek_in cdf.f (Int64.of_int32 pos); + let klen = read_le cdf.f in + if (klen = String.length key) then ( + let dlen = read_le cdf.f in + let rkey = String.create klen in + really_input cdf.f rkey 0 klen; + if(rkey = key) then ( + let rdata = String.create dlen in + really_input cdf.f rdata 0 dlen; + Some(rdata) + ) else ( + loop (x + 1) + ) + ) else ( + loop (x + 1) + ) + ) else ( + loop (x + 1) + ) + ) in + Stream.from loop + +(** + Find the first record with the given key. + + @param cdf the cdb_file + @param key the key to find + *) +let find cdf key = + try + Stream.next (get_matches cdf key) + with Stream.Failure -> + raise Not_found diff --git a/cdb.mli b/cdb.mli new file mode 100644 index 0000000..e78aa22 --- /dev/null +++ b/cdb.mli @@ -0,0 +1,40 @@ + +(** CDB Implementation. + {{:http://cr.yp.to/cdb/cdb.txt} http://cr.yp.to/cdb/cdb.txt} + *) + +(** {1 Utilities} *) +val hash : string -> int32 + +(** {1 Building a CDB } *) + +type cdb_creator = { + table_count : int array; + mutable pointers : (Int32.t * Int32.t) list; + out : out_channel; +} +val open_out : string -> cdb_creator +val cdb_creator_of_out_channel : Pervasives.out_channel -> cdb_creator +val add : cdb_creator -> string -> string -> unit +val close_cdb_out : cdb_creator -> unit + +(** {1 Iterating a CDB } *) + +val iter : (string -> string -> unit) -> string -> unit + +(** {1 Searching } *) + +type cdb_file = { + f: in_channel; + tables: (Int32.t * int) array; +} + +val open_cdb_in : string -> cdb_file +val close_cdb_in : cdb_file -> unit + +val get_matches : cdb_file -> string -> string Stream.t +val find : cdb_file -> string -> string + +(* + * arch-tag: 55F4CBF0-2B50-11D8-BEDC-000393CFE6B8 + *)