125 lines
3 KiB
OCaml
125 lines
3 KiB
OCaml
(** Init script for the SQLite database. **)
|
|
let inits = "\
|
|
DROP TABLE IF EXISTS elements; \
|
|
DROP TABLE IF EXISTS searchtable; \
|
|
CREATE TABLE elements ( \
|
|
num INTEGER PRIMARY KEY NOT NULL, \
|
|
symb TEXT UNIQUE NOT NULL, \
|
|
name TEXT UNIQUE NOT NULL, \
|
|
mass FLOAT, \
|
|
cpk_color TEXT, \
|
|
e_config TEXT, \
|
|
e_neg FLOAT, \
|
|
rad FLOAT, \
|
|
ionization_e FLOAT, \
|
|
e_affinity FLOAT, \
|
|
oxidation TEXT, \
|
|
state TEXT, \
|
|
melting FLOAT, \
|
|
boiling FLOAT, \
|
|
density FLOAT, \
|
|
block TEXT, \
|
|
discovered INTEGER \
|
|
) WITHOUT ROWID; \
|
|
CREATE TABLE searchtable ( \
|
|
name TEXT PRIMARY KEY NOT NULL, \
|
|
dat BLOB NOT NULL) WITHOUT ROWID; \
|
|
"
|
|
|
|
(** Converts the column data in the PubChem JSON file to
|
|
a string option array. *)
|
|
let toarr json = let open Yojson.Basic.Util in
|
|
json |> member "Table" |> member "Columns" |> member "Column"
|
|
|> to_list |> filter_string |> Array.of_list
|
|
(** The PubChem JSON file is compressed by keeping all key values in
|
|
a single array at the top called [Column]. *)
|
|
|
|
(** Ignores all rows or throws an exception, then resets [stmt]. *)
|
|
let allstep stmt = let open Sqlite3 in
|
|
let rec f = function
|
|
| Rc.ROW -> f (step stmt)
|
|
| x -> Rc.check x
|
|
in
|
|
f (step stmt);
|
|
reset stmt |> Rc.check
|
|
|
|
(** Initializes [db] from [json]. *)
|
|
let makedb db json = let open Sqlite3
|
|
in let () = exec db inits |> Rc.check
|
|
in let symbs = ref []
|
|
in let names = ref []
|
|
in let colarr = toarr json
|
|
in let stmt = "INSERT INTO elements \
|
|
(num, \
|
|
symb, \
|
|
name, \
|
|
mass, \
|
|
cpk_color, \
|
|
e_config, \
|
|
e_neg, \
|
|
rad, \
|
|
ionization_e, \
|
|
e_affinity, \
|
|
oxidation, \
|
|
state, \
|
|
melting, \
|
|
boiling, \
|
|
density, \
|
|
block, \
|
|
discovered) \
|
|
VALUES \
|
|
(:AtomicNumber, \
|
|
:Symbol, \
|
|
:Name, \
|
|
:AtomicMass, \
|
|
:CPKHexColor, \
|
|
:ElectronConfiguration, \
|
|
:Electronegativity, \
|
|
:AtomicRadius, \
|
|
:IonizationEnergy, \
|
|
:ElectronAffinity, \
|
|
:OxidationStates, \
|
|
:StandardState, \
|
|
:MeltingPoint, \
|
|
:BoilingPoint, \
|
|
:Density, \
|
|
:GroupBlock, \
|
|
:YearDiscovered);"
|
|
|> prepare db
|
|
|
|
in let binds stmt s v = bind_name stmt (":" ^ s) v |> Rc.check
|
|
|
|
(* Bind each column name. In SQLite3 the leading ":" is part
|
|
of the named parameter. *)
|
|
in let prep i x =
|
|
(match colarr.(i) with
|
|
| "Symbol" -> symbs := (x,None)::!symbs
|
|
| "Name" -> names := (x,None)::!names
|
|
| _ -> ()
|
|
);
|
|
binds stmt colarr.(i)
|
|
(if x = "" then Data.NULL else Data.TEXT x)
|
|
|
|
(* Cache fuzzy matching automaton. *)
|
|
in let do_model stmt name vals =
|
|
let open Batch_jaro_winkler
|
|
in let model = build_exportable_model
|
|
~encoding:Encoding.UTF8 vals
|
|
in
|
|
binds stmt "name" (Data.TEXT name);
|
|
binds stmt "cache" (Data.BLOB model);
|
|
allstep stmt
|
|
|
|
in let open Yojson.Basic.Util
|
|
in let access_data x = x |> member "Cell" |> to_list
|
|
|> filter_string |> List.iteri prep;
|
|
allstep stmt
|
|
in json |> member "Table" |> member "Row" |> to_list
|
|
|> List.iter access_data;
|
|
finalize stmt |> Rc.check;
|
|
let stmt = "INSERT INTO searchtable VALUES (:name, :cache);"
|
|
|> prepare db
|
|
in
|
|
do_model stmt "symb" !symbs;
|
|
do_model stmt "name" !names;
|
|
finalize stmt |> Rc.check
|