process directory into dictionary

For each data table in the OPUS file (i.e. "ScSm"), there exists
a "Data Paramater" table, with three important values:

* FXV (First X Value?)
* LXV (Last X Value?)
* NPT (Number of PoinTs?)

These can generate spaced frequencies corresponding to values in
"ScSm". NPT is usually less than the length of "ScSm".

This program groups data together only if FXV, LXV, and NPT are
EXACTLY EQUAL across files. FXV and LXV are floating point numbers
so it is possible that there may be some variance across files.

Each group lumps together data with the same field value (taken from
the filename) for further processing.
This commit is contained in:
Peter McGoron 2021-08-09 22:41:37 -04:00
parent 157b0ce047
commit 6caab18f76
1 changed files with 33 additions and 8 deletions

View File

@ -1,19 +1,44 @@
import os
import brukeropusreader as bor
import sys
from brukeropusreader import read_file
import re
match_opus = re.compile(r"_[0-9]+(\.[0-9]+)?T.0$")
def ifnot(d,v):
if v not in d:
d[v] = {}
def mkkey(d):
return (d["FXV"], d["LXV"], d["NPT"])
def addkey(dict, key, field, val):
if key not in dict:
dict[key] = {}
if field not in dict[key]:
dict[key][field] = []
dict[key][field].append(val)
def progress(x, y):
sys.stderr.write(f"Processed {x}/{y}\r")
class BlankSpace:
def __getitem__(self, _):
return " "
def process(s):
for fn in os.listdir(s):
groupings = {}
dir = os.listdir(s)
i = 0
for fn in dir:
progress(i,len(dir))
i = i + 1
v = re.search(match_opus, fn)
if v is None:
continue
field = v.group()[1:len(v.group())-2]
print(fn)
print(field)
print()
dat = read_file(f"{s}/{fn}")
if "ScSm" not in dat:
sys.stderr.write(f'"{fn}" does have a "ScSm" field: skipping')
continue
key = mkkey(dat["ScSm Data Parameter"])
addkey(groupings, key, field, dat["ScSm"][:key[2]])
blanks = f"Processed {i}/{len(dir)}".translate(BlankSpace())
sys.stderr.write(f"{blanks}\rProcessed {i} files\n")