elements-ocaml/makedb.ml

125 lines
3 KiB
OCaml

(** Init script for the SQLite database. **)
let inits = "\
DROP TABLE IF EXISTS elements; \
DROP TABLE IF EXISTS searchtable; \
CREATE TABLE elements ( \
num INTEGER PRIMARY KEY NOT NULL, \
symb TEXT UNIQUE NOT NULL, \
name TEXT UNIQUE NOT NULL, \
mass FLOAT, \
cpk_color TEXT, \
e_config TEXT, \
e_neg FLOAT, \
rad FLOAT, \
ionization_e FLOAT, \
e_affinity FLOAT, \
oxidation TEXT, \
state TEXT, \
melting FLOAT, \
boiling FLOAT, \
density FLOAT, \
block TEXT, \
discovered INTEGER \
) WITHOUT ROWID; \
CREATE TABLE searchtable ( \
name TEXT PRIMARY KEY NOT NULL, \
dat BLOB NOT NULL) WITHOUT ROWID; \
"
(** Converts the column data in the PubChem JSON file to
a string option array. *)
let toarr json = let open Yojson.Basic.Util in
json |> member "Table" |> member "Columns" |> member "Column"
|> to_list |> filter_string |> Array.of_list
(** The PubChem JSON file is compressed by keeping all key values in
a single array at the top called [Column]. *)
(** Ignores all rows or throws an exception, then resets [stmt]. *)
let allstep stmt = let open Sqlite3 in
let rec f = function
| Rc.ROW -> f (step stmt)
| x -> Rc.check x
in
f (step stmt);
reset stmt |> Rc.check
(** Initializes [db] from [json]. *)
let makedb db json = let open Sqlite3
in let () = exec db inits |> Rc.check
in let symbs = ref []
in let names = ref []
in let colarr = toarr json
in let stmt = "INSERT INTO elements \
(num, \
symb, \
name, \
mass, \
cpk_color, \
e_config, \
e_neg, \
rad, \
ionization_e, \
e_affinity, \
oxidation, \
state, \
melting, \
boiling, \
density, \
block, \
discovered) \
VALUES \
(:AtomicNumber, \
:Symbol, \
:Name, \
:AtomicMass, \
:CPKHexColor, \
:ElectronConfiguration, \
:Electronegativity, \
:AtomicRadius, \
:IonizationEnergy, \
:ElectronAffinity, \
:OxidationStates, \
:StandardState, \
:MeltingPoint, \
:BoilingPoint, \
:Density, \
:GroupBlock, \
:YearDiscovered);"
|> prepare db
in let binds stmt s v = bind_name stmt (":" ^ s) v |> Rc.check
(* Bind each column name. In SQLite3 the leading ":" is part
of the named parameter. *)
in let prep i x =
(match colarr.(i) with
| "Symbol" -> symbs := (x,None)::!symbs
| "Name" -> names := (x,None)::!names
| _ -> ()
);
binds stmt colarr.(i)
(if x = "" then Data.NULL else Data.TEXT x)
(* Cache fuzzy matching automaton. *)
in let do_model stmt name vals =
let open Batch_jaro_winkler
in let model = build_exportable_model
~encoding:Encoding.UTF8 vals
in
binds stmt "name" (Data.TEXT name);
binds stmt "cache" (Data.BLOB model);
allstep stmt
in let open Yojson.Basic.Util
in let access_data x = x |> member "Cell" |> to_list
|> filter_string |> List.iteri prep;
allstep stmt
in json |> member "Table" |> member "Row" |> to_list
|> List.iter access_data;
finalize stmt |> Rc.check;
let stmt = "INSERT INTO searchtable VALUES (:name, :cache);"
|> prepare db
in
do_model stmt "symb" !symbs;
do_model stmt "name" !names;
finalize stmt |> Rc.check