process directory into dictionary

For each data table in the OPUS file (i.e. "ScSm"), there exists a "Data Paramater" table, with three important values: * FXV (First X Value?) * LXV (Last X Value?) * NPT (Number of PoinTs?) These can generate spaced frequencies corresponding to values in "ScSm". NPT is usually less than the length of "ScSm". This program groups data together only if FXV, LXV, and NPT are EXACTLY EQUAL across files. FXV and LXV are floating point numbers so it is possible that there may be some variance across files. Each group lumps together data with the same field value (taken from the filename) for further processing.
2021-08-09 22:41:37 -04:00 · 2021-08-09 22:41:37 -04:00 · 6caab18f76
parent 157b0ce047
commit 6caab18f76
1 changed files with 33 additions and 8 deletions
--- a/osak/procdir.py
+++ b/osak/procdir.py
@ -1,19 +1,44 @@
 import os
-import brukeropusreader as bor
+import sys
+from brukeropusreader import read_file
 import re

 match_opus = re.compile(r"_[0-9]+(\.[0-9]+)?T.0$")

-def ifnot(d,v):
-	if v not in d:
-		d[v] = {}
+def mkkey(d):
+	return (d["FXV"], d["LXV"], d["NPT"])
+def addkey(dict, key, field, val):
+	if key not in dict:
+		dict[key] = {}
+	if field not in dict[key]:
+		dict[key][field] = []
+	dict[key][field].append(val)
+def progress(x, y):
+	sys.stderr.write(f"Processed {x}/{y}\r")
+class BlankSpace:
+	def __getitem__(self, _):
+		return " "

 def process(s):
-	for fn in os.listdir(s):
+	groupings = {}
+	dir = os.listdir(s)
+	i = 0
+
+	for fn in dir:
+		progress(i,len(dir))
+		i = i + 1
 		v = re.search(match_opus, fn)
 		if v is None:
 			continue
 		field = v.group()[1:len(v.group())-2]
-		print(fn)
-		print(field)
-		print()
+
+		dat = read_file(f"{s}/{fn}")
+		if "ScSm" not in dat:
+			sys.stderr.write(f'"{fn}" does have a "ScSm" field: skipping')
+			continue
+
+		key = mkkey(dat["ScSm Data Parameter"])
+		addkey(groupings, key, field, dat["ScSm"][:key[2]])
+
+	blanks = f"Processed {i}/{len(dir)}".translate(BlankSpace())
+	sys.stderr.write(f"{blanks}\rProcessed {i} files\n")