From 6caab18f764cd5ee8c63de3b591b930ec7ef50f6 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Mon, 9 Aug 2021 22:41:37 -0400 Subject: [PATCH] process directory into dictionary For each data table in the OPUS file (i.e. "ScSm"), there exists a "Data Paramater" table, with three important values: * FXV (First X Value?) * LXV (Last X Value?) * NPT (Number of PoinTs?) These can generate spaced frequencies corresponding to values in "ScSm". NPT is usually less than the length of "ScSm". This program groups data together only if FXV, LXV, and NPT are EXACTLY EQUAL across files. FXV and LXV are floating point numbers so it is possible that there may be some variance across files. Each group lumps together data with the same field value (taken from the filename) for further processing. --- osak/procdir.py | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/osak/procdir.py b/osak/procdir.py index d22f2d2..c56d51d 100644 --- a/osak/procdir.py +++ b/osak/procdir.py @@ -1,19 +1,44 @@ import os -import brukeropusreader as bor +import sys +from brukeropusreader import read_file import re match_opus = re.compile(r"_[0-9]+(\.[0-9]+)?T.0$") -def ifnot(d,v): - if v not in d: - d[v] = {} +def mkkey(d): + return (d["FXV"], d["LXV"], d["NPT"]) +def addkey(dict, key, field, val): + if key not in dict: + dict[key] = {} + if field not in dict[key]: + dict[key][field] = [] + dict[key][field].append(val) +def progress(x, y): + sys.stderr.write(f"Processed {x}/{y}\r") +class BlankSpace: + def __getitem__(self, _): + return " " def process(s): - for fn in os.listdir(s): + groupings = {} + dir = os.listdir(s) + i = 0 + + for fn in dir: + progress(i,len(dir)) + i = i + 1 v = re.search(match_opus, fn) if v is None: continue field = v.group()[1:len(v.group())-2] - print(fn) - print(field) - print() + + dat = read_file(f"{s}/{fn}") + if "ScSm" not in dat: + sys.stderr.write(f'"{fn}" does have a "ScSm" field: skipping') + continue + + key = mkkey(dat["ScSm Data Parameter"]) + addkey(groupings, key, field, dat["ScSm"][:key[2]]) + + blanks = f"Processed {i}/{len(dir)}".translate(BlankSpace()) + sys.stderr.write(f"{blanks}\rProcessed {i} files\n")