diff --git a/dune b/dune index ddf0014..7a62cc2 100644 --- a/dune +++ b/dune @@ -1,5 +1,5 @@ (executable (name elements) - (modules Elements Preprocess) - (libraries yojson re) + (modules Elements Makedb) + (libraries sqlite3 yojson) ) diff --git a/elements.ml b/elements.ml index acacd01..518a705 100644 --- a/elements.ml +++ b/elements.ml @@ -1 +1,2 @@ -let () = Preprocess.preprocess stdin +let () = Makedb.makedb (Sqlite3.db_open "elements.sqlite3") + (Yojson.Basic.from_channel stdin) diff --git a/makedb.ml b/makedb.ml new file mode 100644 index 0000000..11d6a9d --- /dev/null +++ b/makedb.ml @@ -0,0 +1,52 @@ +(** Init script for the SQLite database. **) +let inits = "\ + DROP TABLE IF EXISTS elements; \ + CREATE TABLE elements (\ + num INTEGER PRIMARY KEY NOT NULL, \ + symb TEXT UNIQUE NOT NULL, \ + name TEXT UNIQUE NOT NULL, mass FLOAT, \ + cpk_color TEXT, e_config TEXT, e_neg FLOAT, rad FLOAT, \ + ionization_e FLOAT, e_affinity FLOAT, \ + oxidation TEXT, state TEXT, \ + melting FLOAT, boiling FLOAT, density FLOAT, \ + block TEXT, discovered INTEGER) WITHOUT ROWID; \ +" + +(** Converts the column data in the PubChem JSON file to + a string option array. *) +let toarr json = let open Yojson.Basic.Util in + json |> member "Table" |> member "Columns" |> member "Column" + |> to_list |> filter_string |> Array.of_list +(** The PubChem JSON file is compressed by keeping all key values in + a single array at the top called [Column]. *) + +(** Ignores all rows or throws an exception, then resets [stmt]. *) +let allstep stmt = let open Sqlite3 in + let rec f = function + | Rc.ROW -> f (step stmt) + | x -> Rc.check x + in f (step stmt); reset stmt |> Rc.check + +(** Initializes [db] from [json]. *) +let makedb db json = let open Sqlite3 + in let () = exec db inits |> Rc.check + in let colarr = toarr json + in let stmt = "INSERT INTO elements \ + (num, symb, name, mass, cpk_color, e_config, e_neg, \ + rad, ionization_e, e_affinity, oxidation, state, \ + melting, boiling, density, block, discovered) VALUES \ + (:AtomicNumber, :Symbol, :Name, :AtomicMass, \ + :CPKHexColor, :ElectronConfiguration, \ + :Electronegativity, :AtomicRadius, :IonizationEnergy, \ + :ElectronAffinity, :OxidationStates, :StandardState, \ + :MeltingPoint, :BoilingPoint, :Density, :GroupBlock, \ + :YearDiscovered);" + |> prepare db + in let prep i x = + bind_name stmt (":" ^ colarr.(i)) + (if x = "" then Data.NULL else Data.TEXT x) |> Rc.check + in let open Yojson.Basic.Util + in let access_data x = x |> member "Cell" |> to_list + |> filter_string |> List.iteri prep; allstep stmt + in json |> member "Table" |> member "Row" |> to_list + |> List.iter access_data diff --git a/preprocess.ml b/preprocess.ml deleted file mode 100644 index 797dc02..0000000 --- a/preprocess.ml +++ /dev/null @@ -1,24 +0,0 @@ -let exprcomp expr = Re.Posix.re expr |> Re.compile -let justws s = - let e = Re.seq [Re.bos; Re.rep (Re.set " \\t\\r\\v\\n"); Re.eos] - |> Re.compile - in Re.exec_opt e s |> Option.is_some -let repl expr by = Re.replace_string (exprcomp expr) ~by - -let preprocess s = - let open Yojson.Basic - in let open Yojson.Basic.Util - in let json = from_channel s - in let escp s = if justws s then print_endline "(none)" else - print_endline (s |> repl "\\\\" "\\\\\\\\" |> repl "\n" "\\n") - in let pcell x = member "Cell" x |> to_list |> filter_string - |> List.iter escp; print_newline() - in - (* Print columns *) - json |> member "Table" |> member "Columns" - |> member "Column" |> to_list |> filter_string |> List.iter escp; - print_newline(); - - (* Print rows *) - json |> member "Table" |> member "Row" |> to_list - |> List.iter pcell