From 29e2cbee212359991a9786efc05f8faa13d94ed6 Mon Sep 17 00:00:00 2001 From: William Wu Date: Tue, 10 Oct 2017 18:11:21 +0200 Subject: [PATCH] cosmetics Change-Id: I661ae0200fa1ccdee849390b76e7f05c0f50545b --- README.md | 43 ++++++++++++++++++++++++--------- brukeropusreader/opus_data.py | 2 +- brukeropusreader/opus_reader.py | 9 ++++--- setup.py | 6 ++--- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 487b9ca..930acb4 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,40 @@ -## Bruker OPUS binary files reader -Python scripts in this project allow to read Bruker OPUS propriertary files. You can provide scripts with config file(example in conf/) to perform interpolation of spectra. +# Bruker OPUS Reader + +## Introduction +The Python scripts in this project enable the reading of Bruker OPUS files. ## Usage -You can run function brukeropusreader.opus_reader.opus reader to read opus file. +Run the function brukeropusreader.opus_reader.opus_reader to read OPUS files. ## Structure of OPUS files -OPUS file always consist of spectrum series. Each series is described by few parameters: NPT (number of points), FXV (value of first wavelength), LXV (value of last wavelength), END (address of spectra series). +OPUS files consist of several spectrum series. +Each series is described by a few parameters: -This parameters can be found by searching for particular string in binary file(in ASCII). After founding occurence one have to move pointer few bytes further to read value. -It is not established how far forward pointer should be moved. We empirically checked that is is 12 for end and 8 for npt, fxv, lxv. -Beyond that, each file contains some metadata about hardware used for measurement. +- NPT (number of points) +- FXV (value of first wavelength) +- LXV (value of last wavelength) +- END (address of spectra series) + +This parameters are found by searching for ASCII strings in the binary files. +After finding a match, we must move the pointer a few bytes further to read values. +There is not a standard describing how much further the pointer should be moved. +We empirically checked that it is 12 bytes for END and 8 for NPT, FXV, and LXV. +In addition, each file contains some metadata about the hardware used for measurement. ## Controversies -Bruker OPUS is proprieratry file, therefore we don't know exactly what is its structure. We can just guess it. The main problem is - having few series how to decide which one is absorption spectra? -Our solution (empirically developd) is: -* Removed broken series (ones with fxv greater than lxv, without npt information, etc...) -* Filter out interferrograms - taken from https://github.com/philipp-baumann/simplerspec. Interferrograms have starting value 0. We don't need them. -* If after these two steps we still have more than one spectrum left we can choose one with highest average. We empirically checked that others are usually random noise with values near 0. +Bruker OPUS is a proprietary file format, so we do not know its structure exactly. +One problem is, given only a few series, how to decide which are absorption spectra? +Our solution (empirically developed) is: + +1. Remove broken series (ones with FXV > LXV, missing NPT information, etc.) +2. Remove interferograms. (See https://github.com/philipp-baumann/simplerspec) Interferrograms have a starting value of 0. +3. If after these two steps we still have more than one series left, we can choose the one with the highest average value. We empirically checked that other series are usually random noise with values near 0. +## Contact +For developer issues, please start a ticket in Github. +You can also write to the dev team directly at bruker-opus-reader-dev@qed.ai. +For other issues, please write to: bruker-opus-reader@qed.ai + +-- +QED | https://qed.ai diff --git a/brukeropusreader/opus_data.py b/brukeropusreader/opus_data.py index 7c4ab27..6a4e13a 100644 --- a/brukeropusreader/opus_data.py +++ b/brukeropusreader/opus_data.py @@ -78,7 +78,7 @@ class OpusData(object): if k == n_interp - 1: yi.append(yav[0]) else: - print("Wrong wavelenghts for interpolating data") + print("Wrong wavelengths for interpolating data") elif iwavenumber[k] > xa_max: if k == 0: yi.append(yav[-1]) diff --git a/brukeropusreader/opus_reader.py b/brukeropusreader/opus_reader.py index bf09b10..b6235d4 100644 --- a/brukeropusreader/opus_reader.py +++ b/brukeropusreader/opus_reader.py @@ -20,17 +20,18 @@ def opus_reader(filepath): wave_num_abs_pair = reversed(zip(ab_wavenumbers, ab_spectra)) - meta = get_meta_data(buff) + meta = get_metadata(buff) return OpusData(zip(*wave_num_abs_pair), meta=meta) def choose_ab(fxv_spc, spc, wavenumbers): - # Filtering interferograms - we don't need them + # Removing interferograms which_ig = np.where(fxv_spc == 0)[0] not_ig = np.setdiff1d(range(len(fxv_spc)), which_ig) - # Filtering single channel spectras - that's just guessing, but it works! + # Removing single channel spectras + # (heuristics are empirically derived) ab = [] for x in not_ig: if np.average(spc[x]) > 0.25: @@ -110,7 +111,7 @@ def generate_wavelengths(lxv_spc, fxv_spc, npt_spc): return wavenumbers -def get_meta_data(buff): +def get_metadata(buff): # Getting source of instruments all_ins = tuple(find_all('INS', buff)) inst = unpack_from("<3s", buff, all_ins[-1] + 8)[0] diff --git a/setup.py b/setup.py index 896d6bd..aec90c4 100644 --- a/setup.py +++ b/setup.py @@ -3,8 +3,8 @@ from distutils.core import setup setup(name='bruker-opus-reader', version='1.0', - description='Bruker OPUS files reader', - author='t2', - author_email='t2@qed.ai', + description='Bruker OPUS File Reader', + author='QED', + author_email='bruker-opus-reader-dev@qed.ai', packages=['brukeropusreader'], )