(** Init script for the SQLite database. **) let inits = "\ DROP TABLE IF EXISTS elements; \ DROP TABLE IF EXISTS cache; \ CREATE TABLE elements ( \ num INTEGER PRIMARY KEY NOT NULL, \ symb TEXT UNIQUE NOT NULL, \ name TEXT UNIQUE NOT NULL, mass FLOAT, \ cpk_color TEXT, e_config TEXT, e_neg FLOAT, rad FLOAT, \ ionization_e FLOAT, e_affinity FLOAT, \ oxidation TEXT, state TEXT, \ melting FLOAT, boiling FLOAT, density FLOAT, \ block TEXT, discovered INTEGER) WITHOUT ROWID; \ CREATE TABLE cache ( \ name TEXT PRIMARY KEY NOT NULL, \ dat BLOB NOT NULL) WITHOUT ROWID; \ " (** Converts the column data in the PubChem JSON file to a string option array. *) let toarr json = let open Yojson.Basic.Util in json |> member "Table" |> member "Columns" |> member "Column" |> to_list |> filter_string |> Array.of_list (** The PubChem JSON file is compressed by keeping all key values in a single array at the top called [Column]. *) (** Ignores all rows or throws an exception, then resets [stmt]. *) let allstep stmt = let open Sqlite3 in let rec f = function | Rc.ROW -> f (step stmt) | x -> Rc.check x in f (step stmt); reset stmt |> Rc.check (** Initializes [db] from [json]. *) let makedb db json = let open Sqlite3 in let () = exec db inits |> Rc.check in let symbs = ref [] in let names = ref [] in let colarr = toarr json in let stmt = "INSERT INTO elements \ (num, symb, name, mass, cpk_color, e_config, e_neg, \ rad, ionization_e, e_affinity, oxidation, state, \ melting, boiling, density, block, discovered) VALUES \ (:AtomicNumber, :Symbol, :Name, :AtomicMass, \ :CPKHexColor, :ElectronConfiguration, \ :Electronegativity, :AtomicRadius, :IonizationEnergy, \ :ElectronAffinity, :OxidationStates, :StandardState, \ :MeltingPoint, :BoilingPoint, :Density, :GroupBlock, \ :YearDiscovered);" |> prepare db in let binds stmt s v = bind_name stmt (":" ^ s) v |> Rc.check (* Bind each column name. In SQLite3 the leading ":" is part of the named parameter. *) in let prep i x = (match colarr.(i) with | "Symbol" -> symbs := (x,None)::!symbs | "Name" -> names := (x,None)::!names | _ -> () ); binds stmt colarr.(i) (if x = "" then Data.NULL else Data.TEXT x) (* Cache fuzzy matching automaton. *) in let do_model name vals = let stmt = "INSERT INTO cache VALUES (:name, :cache);" |> prepare db in let open Batch_jaro_winkler in let model = build_exportable_model ~encoding:Encoding.UTF8 vals in binds stmt "name" (Data.TEXT name); binds stmt "cache" (Data.TEXT model); allstep stmt; finalize stmt |> Rc.check in let open Yojson.Basic.Util in let access_data x = x |> member "Cell" |> to_list |> filter_string |> List.iteri prep; allstep stmt in json |> member "Table" |> member "Row" |> to_list |> List.iter access_data; finalize stmt |> Rc.check; do_model "symbs" !symbs; do_model "names" !names