From 87f446b8c9b652283bf26411355ca90fa7e4a864 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Fri, 13 Aug 2021 19:18:26 -0400 Subject: [PATCH] add searching by element name --- dune | 2 +- elements.ml | 5 +- elements_search.ml | 134 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 elements_search.ml diff --git a/dune b/dune index df210fb..2b80f74 100644 --- a/dune +++ b/dune @@ -1,5 +1,5 @@ (executable (name elements) - (modules Elements Makedb) + (modules Elements Makedb Elements_search) (libraries sqlite3 yojson batch_jaro_winkler) ) diff --git a/elements.ml b/elements.ml index b5a778b..fb3ee4d 100644 --- a/elements.ml +++ b/elements.ml @@ -10,7 +10,7 @@ module Opts = struct let jsonf = ref "" (** Limit the amount of matches. *) - let maxmatch = ref 3 + let maxmatch = ref 1 (** Lists to search. *) @@ -68,3 +68,6 @@ let _ = if !Opts.jsonf <> "" then else Yojson.Basic.from_file !Opts.jsonf ) + +let () = if !Opts.match_name <> [] then + Elements_search.print_by_name db !Opts.maxmatch (List.rev !Opts.match_name) diff --git a/elements_search.ml b/elements_search.ml new file mode 100644 index 0000000..ca5e9ab --- /dev/null +++ b/elements_search.ml @@ -0,0 +1,134 @@ +type elem = { + num : int; + symb : string; + name : string; + mass : float option; + cpk_color : string option; + e_config : string option; + e_neg : float option; + rad : float option; + ionization_e : float option; + e_affinity : float option; + oxidation : string option; + state : string option; + melting : float option; + boiling : float option; + density : float option; + block : string option; + discovered : int option +} + +let float_or_null stmt n = match (Sqlite3.column stmt n) with +| Sqlite3.Data.NULL -> None +| Sqlite3.Data.FLOAT x -> Some x +| _ -> raise (Failure (Printf.sprintf "Column %d has invalid type" n)) + +let string_or_null stmt n = match (Sqlite3.column stmt n) with +| Sqlite3.Data.NULL -> None +| Sqlite3.Data.TEXT x -> Some x +| _ -> raise (Failure (Printf.sprintf "Column %d has invalid type" n)) + +let int_or_null stmt n = match (Sqlite3.column stmt n) with +| Sqlite3.Data.NULL -> None +| Sqlite3.Data.INT x -> Some (Int64.to_int x) +| _ -> raise (Failure (Printf.sprintf "Column %d has invalid type" n)) + +let chk = function +| Sqlite3.Rc.ROW -> () +| Sqlite3.Rc.OK -> () +| _ -> raise (Failure "statement failure") + +let get_model db typ = + let open Sqlite3 + in let stmt = "SELECT dat FROM searchtable WHERE name = ?;" + |> prepare db + in + bind_text stmt 1 typ |> Rc.check; + step stmt |> chk; + let v = column_blob stmt 0 + in + finalize stmt |> Rc.check; + Batch_jaro_winkler.build_runtime_model v + +let search_model most model s = + let open Batch_jaro_winkler + in let encoding = Encoding.UTF8 + in jaro_winkler_distance ~encoding ~n_best_results:most model s + +let by_name n db s = search_model n (get_model db "name") s + +let name_stmt db s = + let open Sqlite3 + in let stmt = "SELECT \ + num, symb, name, mass, cpk_color, e_config, e_neg, rad, \ + ionization_e, e_affinity, oxidation, state, melting, \ + boiling, density, block, discovered \ + FROM elements WHERE name = ?;" + |> prepare db + in bind_text stmt 1 s |> Rc.check; + stmt + +let get_elem_from_stmt stmt = + let open Sqlite3 + in + step stmt |> chk; { + num = column_int stmt 0; + symb = column_text stmt 1; + name = column_text stmt 2; + mass = float_or_null stmt 3; + cpk_color = string_or_null stmt 4; + e_config = string_or_null stmt 5; + e_neg = float_or_null stmt 6; + rad = float_or_null stmt 7; + ionization_e = float_or_null stmt 8; + e_affinity = float_or_null stmt 9; + oxidation = string_or_null stmt 10; + state = string_or_null stmt 11; + melting = float_or_null stmt 12; + boiling = float_or_null stmt 13; + density = float_or_null stmt 14; + block = string_or_null stmt 15; + discovered = int_or_null stmt 16 + } + +let get_elem db s = name_stmt db s |> get_elem_from_stmt + +let print_elem e = + let p_str name = Option.iter (fun x -> + Printf.printf "%s: %s\n" name x) + in let p_flt name = Option.iter (fun x -> + Printf.printf "%s: %f\n" name x) + in let p_int name = Option.iter (fun x -> + Printf.printf "%s: %d\n" name x) + in Printf.printf "'%s' -- '%s' -- '%d'\n" e.name e.symb e.num; + p_flt "Standard Atomic Mass" e.mass; + p_str "CPK Color" e.cpk_color; + p_str "Electron Configuration" e.e_config; + p_flt "Electronegativity (Pauling)" e.e_neg; + p_flt "Van der Walls (pm)" e.rad; + p_flt "Ionization Energy (eV)" e.ionization_e; + p_flt "Electron Affinity (eV)" e.e_affinity; + p_str "Oxidation States" e.oxidation; + p_str "Standard State" e.state; + p_flt "Melting Point (K)" e.melting; + p_flt "Boiling Point (K)" e.boiling; + p_flt "Density (g/cm³)" e.density; + p_str "Block" e.block; + p_int "Year Discovered" e.discovered; + print_newline() + +let print_by_name db most l = + let serc = by_name (Some most) db + in let on_match v = (match serc (String.lowercase_ascii v) with + | [] -> Printf.printf "No matches found for '%s'\n" v; + | (m,p)::[] -> if m <> v then + Printf.printf + "Best match '%s' for '%s' (%.0f%%)\n" + m v (p *. 100.0); + get_elem db m |> print_elem + | l -> + Printf.printf "Matches for '%s'\n" v; + List.iter (fun (a,b) -> Printf.printf "%f -- %s\n" b a) + (List.rev l) + ) + in List.iter on_match l