From c00b3d0773e2c5b7e02d975f2c80cd2382d1ef2c Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Fri, 19 May 2023 14:09:09 +0200 Subject: [PATCH 01/54] Initial commit --- .github/workflows/generate_api.yaml | 38 + .vscode/settings.json | 6 + links/enzymeml.toml | 22 + links/nmrml.toml | 0 nmrpy/data_objects.py | 1337 ++++++++++++------- nmrpy/datamodel/__init__.py | 3 + nmrpy/datamodel/core/__init__.py | 33 + nmrpy/datamodel/core/citation.py | 225 ++++ nmrpy/datamodel/core/cv.py | 33 + nmrpy/datamodel/core/experiment.py | 65 + nmrpy/datamodel/core/fid.py | 32 + nmrpy/datamodel/core/fidarray.py | 24 + nmrpy/datamodel/core/fileformats.py | 7 + nmrpy/datamodel/core/identifiertypes.py | 5 + nmrpy/datamodel/core/nmrpy.py | 45 + nmrpy/datamodel/core/parameters.py | 70 + nmrpy/datamodel/core/person.py | 57 + nmrpy/datamodel/core/publication.py | 90 ++ nmrpy/datamodel/core/publicationtypes.py | 5 + nmrpy/datamodel/core/subjects.py | 8 + nmrpy/datamodel/core/term.py | 44 + nmrpy/datamodel/schemes/datamodel_schema.md | 121 ++ specifications/nmrpy.md | 279 ++++ 23 files changed, 2031 insertions(+), 518 deletions(-) create mode 100644 .github/workflows/generate_api.yaml create mode 100644 .vscode/settings.json create mode 100644 links/enzymeml.toml create mode 100644 links/nmrml.toml create mode 100644 nmrpy/datamodel/__init__.py create mode 100644 nmrpy/datamodel/core/__init__.py create mode 100644 nmrpy/datamodel/core/citation.py create mode 100644 nmrpy/datamodel/core/cv.py create mode 100644 nmrpy/datamodel/core/experiment.py create mode 100644 nmrpy/datamodel/core/fid.py create mode 100644 nmrpy/datamodel/core/fidarray.py create mode 100644 nmrpy/datamodel/core/fileformats.py create mode 100644 nmrpy/datamodel/core/identifiertypes.py create mode 100644 nmrpy/datamodel/core/nmrpy.py create mode 100644 nmrpy/datamodel/core/parameters.py create mode 100644 nmrpy/datamodel/core/person.py create mode 100644 nmrpy/datamodel/core/publication.py create mode 100644 nmrpy/datamodel/core/publicationtypes.py create mode 100644 nmrpy/datamodel/core/subjects.py create mode 100644 nmrpy/datamodel/core/term.py create mode 100644 nmrpy/datamodel/schemes/datamodel_schema.md create mode 100644 specifications/nmrpy.md diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml new file mode 100644 index 0000000..b321a1d --- /dev/null +++ b/.github/workflows/generate_api.yaml @@ -0,0 +1,38 @@ +name: Generate API + +on: push + +jobs: + generate-api: + runs-on: ubuntu-latest + env: + LIB_NAME: ${{ secrets.LIB_NAME }} + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install sdRDM + run: python3 -m pip install git+https://github.com/JR-1991/software-driven-rdm.git@20-lowest-level-elements-in-xml-cannot-have-attributes-and-content + + - name: Generate API + env: + URL: ${{github.repositoryUrl}} + COMMIT: ${{github.sha}} + run: sdrdm generate --path ./specifications/ --out . --name "$LIB_NAME" --url "$URL" --commit "$COMMIT" + + - name: Push source code + run: | + if [[ `git status --porcelain` ]]; then + git add "$LIB_NAME" + git config --global user.name 'sdRDM Bot' + git config --global user.email 'sdRDM@bot.com' + git commit -am "API update" + git push + else + echo "Nothing changed!" + fi diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d99f2f3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/links/enzymeml.toml b/links/enzymeml.toml new file mode 100644 index 0000000..a312726 --- /dev/null +++ b/links/enzymeml.toml @@ -0,0 +1,22 @@ +__model__ = "NMRpy" + +[__sources__] +EnzymeMLDocument = "https://github.com/EnzymeML/enzymeml-specifications.git@markdown-parser-refactor" + +[NMRpy] +datetime_created = "EnzymeMLDocument.created" +datetime_modified = "EnzymeMLDocument.modified" + +[experiment] +name = "EnzymeMLDocument.name" + +[citation] +doi = "EnzymeMLDocument.doi" + +["citation.authors"] +last_name = "EnzymeMLDocument.creators.given_name" +first_name = "EnzymeMLDocument.creators.family_name" +email = "EnzymeMLDocument.creators.mail" + +["citation.related_publications"] +doi = "EnzymeMLDocument.url" diff --git a/links/nmrml.toml b/links/nmrml.toml new file mode 100644 index 0000000..e69de29 diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index e9e1cd8..d22822b 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1,3 +1,4 @@ +from pathlib import Path import numpy import scipy from matplotlib import pyplot @@ -9,22 +10,45 @@ from nmrpy.plotting import * import os import pickle +from ipywidgets import SelectMultiple +from sdRDM import DataModel -class Base(): + +class Base: _complex_dtypes = [ - numpy.dtype('csingle'), - numpy.dtype('cdouble'), - numpy.dtype('clongdouble'), - ] + numpy.dtype("csingle"), + numpy.dtype("cdouble"), + numpy.dtype("clongdouble"), + ] - _file_formats = ['varian', 'bruker', None] + _file_formats = ["varian", "bruker", None] def __init__(self, *args, **kwargs): - self.id = kwargs.get('id', None) - self._procpar = kwargs.get('procpar', None) + self.id = kwargs.get("id", None) + self._procpar = kwargs.get("procpar", None) self._params = None - self.fid_path = kwargs.get('fid_path', '.') + self.fid_path = kwargs.get("fid_path", ".") self._file_format = None + # self.parameters_object = self.lib.Parameters() + + @property + def lib(self): + try: + self.__lib + except: + self.__lib = DataModel.from_markdown( + path=Path(__file__).parent.parent / "specifications" + ) + return self.__lib + + # @property + # def parameters_object(self): + # return self.__parameter_object + + # @parameters_object.setter + # def parameters_object(self, parameters_object): + # if isinstance(parameters_object, DataModel): + # self.__parameters_object = parameters_object @property def id(self): @@ -35,8 +59,8 @@ def id(self, id): if isinstance(id, str) or id is None: self.__id = id else: - raise AttributeError('ID must be a string or None.') - + raise AttributeError("ID must be a string or None.") + @property def fid_path(self): return self.__fid_path @@ -46,7 +70,7 @@ def fid_path(self, fid_path): if isinstance(fid_path, str): self.__fid_path = fid_path else: - raise AttributeError('fid_path must be a string.') + raise AttributeError("fid_path must be a string.") @property def _file_format(self): @@ -57,7 +81,9 @@ def _file_format(self, file_format): if file_format in self._file_formats: self.__file_format = file_format else: - raise AttributeError('_file_format must be "varian", "bruker", or None.') + raise AttributeError( + '_file_format must be "varian", "bruker", or None.' + ) @classmethod def _is_iter(cls, i): @@ -90,12 +116,30 @@ def _procpar(self): @_procpar.setter def _procpar(self, procpar): if procpar is None: - self.__procpar = procpar + self.__procpar = procpar elif isinstance(procpar, dict): - self.__procpar = procpar + self.__procpar = procpar self._params = self._extract_procpar(procpar) + # self.parameters_object( + # acquisition_time=self._params.get("at"), + # relaxation_time=self._params.get("d1"), + # repetition_time=self._params.get("rt"), + # spectral_width_ppm=self._params.get("sw"), + # spectral_width_hz=self._params.get("sw_hz"), + # spectrometer_frequency=self._params.get("sfrq"), + # reference_frequency=self._params.get("reffrq"), + # spectral_width_left=self._params.get("sw_left"), + # ) + # for _ in self._params.get("nt"): + # if type(_) is not None: + # self.fid_object.parameters.number_of_transients.append(_) + # for _ in self._params.get("acqtime"): + # if type(_) is not None: + # self.fid_object.parameters.acquisition_times_array.append( + # _ + # ) else: - raise AttributeError('procpar must be a dictionary or None.') + raise AttributeError("procpar must be a dictionary or None.") @property def _params(self): @@ -106,16 +150,16 @@ def _params(self, params): if isinstance(params, dict) or params is None: self.__params = params else: - raise AttributeError('params must be a dictionary or None.') + raise AttributeError("params must be a dictionary or None.") - #processing + # processing def _extract_procpar(self, procpar): - if self._file_format == 'bruker': + if self._file_format == "bruker": return self._extract_procpar_bruker(procpar) - elif self._file_format == 'varian': + elif self._file_format == "varian": return self._extract_procpar_varian(procpar) - #else: - # raise AttributeError('Could not parse procpar.') + # else: + # raise AttributeError('Could not parse procpar.') @staticmethod def _extract_procpar_varian(procpar): @@ -161,20 +205,20 @@ def _extract_procpar_varian(procpar): return params @staticmethod - def _extract_procpar_bruker(procpar): + def _extract_procpar_bruker(procpar): """ Extract some commonly-used NMR parameters (using Bruker denotations) and return a parameter dictionary 'params'. """ - d1 = procpar['acqus']['D'][1] - reffrq = procpar['acqus']['SFO1'] - nt = procpar['acqus']['NS'] - sw_hz = procpar['acqus']['SW_h'] - sw = procpar['acqus']['SW'] + d1 = procpar["acqus"]["D"][1] + reffrq = procpar["acqus"]["SFO1"] + nt = procpar["acqus"]["NS"] + sw_hz = procpar["acqus"]["SW_h"] + sw = procpar["acqus"]["SW"] # lefthand offset of the processed data in ppm - if 'procs' in procpar: - sfrq = procpar['procs']['SF'] - sw_left = procpar['procs']['OFFSET'] + if "procs" in procpar: + sfrq = procpar["procs"]["SF"] + sw_left = procpar["procs"]["OFFSET"] else: sfrq = procpar['acqus']['BF1'] sw_left = (0.5+1e6*(sfrq-reffrq)/sw_hz)*sw_hz/sfrq @@ -203,18 +247,20 @@ def _extract_procpar_bruker(procpar): sfrq=sfrq, reffrq=reffrq, sw_left=sw_left, - ) + ) return params + class Fid(Base): - ''' + """ The basic FID (Free Induction Decay) class contains all the data for a single spectrum (:attr:`~nmrpy.data_objects.Fid.data`), and the necessary methods to process these data. - ''' + """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.data = kwargs.get('data', []) + self.fid_object = self.lib.FID() + self.data = kwargs.get("data", []) self.peaks = None self.ranges = None self._deconvoluted_peaks = None @@ -223,7 +269,16 @@ def __init__(self, *args, **kwargs): } def __str__(self): - return 'FID: %s (%i data)'%(self.id, len(self.data)) + return "FID: %s (%i data)" % (self.id, len(self.data)) + + @property + def fid_object(self): + return self.__fid_object + + @fid_object.setter + def fid_object(self, fid_object): + if isinstance(fid_object, DataModel): + self.__fid_object = fid_object @property def data(self): @@ -231,11 +286,14 @@ def data(self): The spectral data. This is the primary object upon which the processing and analysis functions work. """ return self.__data - - @data.setter + + @data.setter def data(self, data): if Fid._is_valid_dataset(data): self.__data = numpy.array(data) + # for _ in self.__data: + # if type(_) is not None: + # self.fid_object.data.append(float(_)) @property def _ppm(self): @@ -243,7 +301,11 @@ def _ppm(self): Index of :attr:`~nmrpy.data_objects.Fid.data` in ppm (parts per million). """ if self._params is not None and self.data is not None: - return numpy.linspace(self._params['sw_left']-self._params['sw'], self._params['sw_left'], len(self.data))[::-1] + return numpy.linspace( + self._params["sw_left"] - self._params["sw"], + self._params["sw_left"], + len(self.data), + )[::-1] else: return None @@ -253,14 +315,14 @@ def peaks(self): Picked peaks for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._peaks - - @peaks.setter + + @peaks.setter def peaks(self, peaks): if peaks is not None: if not Fid._is_flat_iter(peaks): - raise AttributeError('peaks must be a flat iterable') + raise AttributeError("peaks must be a flat iterable") if not all(isinstance(i, numbers.Number) for i in peaks): - raise AttributeError('peaks must be numbers') + raise AttributeError("peaks must be numbers") self._peaks = numpy.array(peaks) else: self._peaks = peaks @@ -271,34 +333,40 @@ def ranges(self): Picked ranges for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._ranges - - @ranges.setter + + @ranges.setter def ranges(self, ranges): if ranges is None: self._ranges = None return if not Fid._is_iter_of_iters(ranges) or ranges is None: - raise AttributeError('ranges must be an iterable of iterables or None') + raise AttributeError( + "ranges must be an iterable of iterables or None" + ) ranges = numpy.array(ranges) if ranges.shape[1] != 2: - raise AttributeError('ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]') + raise AttributeError( + "ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]" + ) for r in ranges: if not all(isinstance(i, numbers.Number) for i in r): - raise AttributeError('ranges must be numbers') + raise AttributeError("ranges must be numbers") self._ranges = ranges @property def _bl_ppm(self): return self.__bl_ppm - - @_bl_ppm.setter + + @_bl_ppm.setter def _bl_ppm(self, bl_ppm): if bl_ppm is not None: if not Fid._is_flat_iter(bl_ppm): - raise AttributeError('baseline indices must be a flat iterable') + raise AttributeError( + "baseline indices must be a flat iterable" + ) if len(bl_ppm) > 0: if not all(isinstance(i, numbers.Number) for i in bl_ppm): - raise AttributeError('baseline indices must be numbers') + raise AttributeError("baseline indices must be numbers") self.__bl_ppm = numpy.sort(list(set(bl_ppm)))[::-1] else: self.__bl_ppm = None @@ -308,21 +376,28 @@ def _bl_ppm(self, bl_ppm): @property def _bl_indices(self): if self._bl_ppm is not None: - return self._conv_to_index(self.data, self._bl_ppm, self._params['sw_left'], self._params['sw']) + return self._conv_to_index( + self.data, + self._bl_ppm, + self._params["sw_left"], + self._params["sw"], + ) else: return None @property def _bl_poly(self): return self.__bl_poly - - @_bl_poly.setter + + @_bl_poly.setter def _bl_poly(self, bl_poly): if bl_poly is not None: if not Fid._is_flat_iter(bl_poly): - raise AttributeError('baseline polynomial must be a flat iterable') + raise AttributeError( + "baseline polynomial must be a flat iterable" + ) if not all(isinstance(i, numbers.Number) for i in bl_poly): - raise AttributeError('baseline polynomial must be numbers') + raise AttributeError("baseline polynomial must be numbers") self.__bl_poly = numpy.array(bl_poly) else: self.__bl_ppm = bl_poly @@ -333,9 +408,14 @@ def _index_peaks(self): :attr:`~nmrpy.data_objects.Fid.peaks` converted to indices rather than ppm """ if self.peaks is not None: - return self._conv_to_index(self.data, self.peaks, self._params['sw_left'], self._params['sw']) + return self._conv_to_index( + self.data, + self.peaks, + self._params["sw_left"], + self._params["sw"], + ) else: - return [] + return [] @property def _index_ranges(self): @@ -344,10 +424,15 @@ def _index_ranges(self): """ if self.ranges is not None: shp = self.ranges.shape - index_ranges = self._conv_to_index(self.data, self.ranges.flatten(), self._params['sw_left'], self._params['sw']) + index_ranges = self._conv_to_index( + self.data, + self.ranges.flatten(), + self._params["sw_left"], + self._params["sw"], + ) return index_ranges.reshape(shp) else: - return [] + return [] @property def _grouped_peaklist(self): @@ -355,18 +440,37 @@ def _grouped_peaklist(self): :attr:`~nmrpy.data_objects.Fid.peaks` grouped according to :attr:`~nmrpy.data_objects.Fid.ranges` """ if self.ranges is not None: - return numpy.array([[peak for peak in self.peaks if peak > min(peak_range) and peak < max(peak_range)] - for peak_range in self.ranges], dtype=object) + return numpy.array( + [ + [ + peak + for peak in self.peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self.ranges + ], + dtype=object, + ) else: return [] + @property def _grouped_index_peaklist(self): """ :attr:`~nmrpy.data_objects.Fid._index_peaks` grouped according to :attr:`~nmrpy.data_objects.Fid._index_ranges` """ if self._index_ranges is not None: - return numpy.array([[peak for peak in self._index_peaks if peak > min(peak_range) and peak < max(peak_range)] - for peak_range in self._index_ranges], dtype=object) + return numpy.array( + [ + [ + peak + for peak in self._index_peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self._index_ranges + ], + dtype=object, + ) else: return [] @@ -378,17 +482,17 @@ def _deconvoluted_peaks(self): def _deconvoluted_peaks(self, deconvoluted_peaks): """This is a list of lists of peak parameters with the order [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss]: - offset: spectral offset + offset: spectral offset - gauss_sigma: Gaussian sigma + gauss_sigma: Gaussian sigma - lorentz_hwhm: Lorentzian half-width-at-half-maximum + lorentz_hwhm: Lorentzian half-width-at-half-maximum - amplitude: height of peak + amplitude: height of peak - frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) - """ - self.__deconvoluted_peaks = deconvoluted_peaks + frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) + """ + self.__deconvoluted_peaks = deconvoluted_peaks @property def deconvoluted_integrals(self): @@ -398,16 +502,22 @@ def deconvoluted_integrals(self): if self._deconvoluted_peaks is not None: integrals = [] for peak in self._deconvoluted_peaks: - int_gauss = peak[-1]*Fid._f_gauss_int(peak[3], peak[1]) - int_lorentz = (1-peak[-1])*Fid._f_lorentz_int(peak[3], peak[2]) - integrals.append(int_gauss+int_lorentz) + int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) + int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int( + peak[3], peak[2] + ) + integrals.append(int_gauss + int_lorentz) return integrals - + def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. """ - plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] return plots def _del_plots(self): @@ -423,7 +533,8 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. """ widgets = [ - id for id in sorted(self.__dict__) + id + for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], Calibrator) or isinstance(self.__dict__[id], DataPeakSelector) @@ -442,15 +553,14 @@ def _del_widgets(self): @classmethod def _is_valid_dataset(cls, data): if isinstance(data, str): - raise TypeError('Data must be iterable not a string.') + raise TypeError("Data must be iterable not a string.") if not cls._is_iter(data): - raise TypeError('Data must be an iterable.') + raise TypeError("Data must be an iterable.") if not cls._is_flat_iter(data): - raise TypeError('Data must not be nested.') + raise TypeError("Data must not be nested.") if not all(isinstance(i, numbers.Number) for i in data): - raise TypeError('Data must consist of numbers only.') - return True - + raise TypeError("Data must consist of numbers only.") + return True @classmethod def from_data(cls, data): @@ -459,11 +569,11 @@ def from_data(cls, data): Instantiate a new :class:`~nmrpy.data_objects.Fid` object by providing a spectral data object as argument. Eg. :: - fid = Fid.from_data(data) + fid = Fid.from_data(data) """ new_instance = cls() new_instance.data = data - return new_instance + return new_instance def zf(self): """ @@ -475,7 +585,9 @@ def zf(self): in an artificially increased resolution once Fourier-transformed. """ - self.data = numpy.append(self.data, 0*self.data) + self.data = numpy.append(self.data, 0 * self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) def emhz(self, lb=5.0): """ @@ -486,13 +598,24 @@ def emhz(self, lb=5.0): :keyword lb: degree of line-broadening in Hz. """ - self.data = numpy.exp(-numpy.pi*numpy.arange(len(self.data)) * (lb/self._params['sw_hz'])) * self.data + self.data = ( + numpy.exp( + -numpy.pi + * numpy.arange(len(self.data)) + * (lb / self._params["sw_hz"]) + ) + * self.data + ) + for _ in self.data: + self.fid_object.data.append(float(_)) def real(self): """ Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. """ self.data = numpy.real(self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) # GENERAL FUNCTIONS def ft(self): @@ -505,12 +628,14 @@ def ft(self): series,' Math. Comput. 19: 297-301.*) """ - if self._flags['ft']: - raise ValueError('Data have already been Fourier Transformed.') + if self._flags["ft"]: + raise ValueError("Data have already been Fourier Transformed.") if Fid._is_valid_dataset(self.data): list_params = (self.data, self._file_format) self.data = Fid._ft(list_params) - self._flags['ft'] = True + for _ in self.data: + self.fid_object.data.append(float(_)) + self._flags["ft"] = True @classmethod def _ft(cls, list_params): @@ -519,31 +644,36 @@ def _ft(cls, list_params): list_params is a tuple of (, ). """ if len(list_params) != 2: - raise ValueError('Wrong number of parameters. list_params must contain [, ]') + raise ValueError( + "Wrong number of parameters. list_params must contain [, ]" + ) data, file_format = list_params if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) s = len(data) - if file_format == 'varian' or file_format == None: - ft_data = numpy.append(data[int(s / 2.0):], data[: int(s / 2.0)]) - if file_format == 'bruker': - ft_data = numpy.append(data[int(s / 2.0):: -1], data[s: int(s / 2.0): -1]) + if file_format == "varian" or file_format == None: + ft_data = numpy.append( + data[int(s / 2.0) :], data[: int(s / 2.0)] + ) + if file_format == "bruker": + ft_data = numpy.append( + data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] + ) return ft_data - @staticmethod def _conv_to_ppm(data, index, sw_left, sw): - """ - Convert index array to ppm. - """ - if isinstance(index, list): - index = numpy.array(index) - frc_sw = index/float(len(data)) - ppm = sw_left-sw*frc_sw - if Fid._is_iter(ppm): - return numpy.array([round(i, 2) for i in ppm]) - else: - return round(ppm, 2) + """ + Convert index array to ppm. + """ + if isinstance(index, list): + index = numpy.array(index) + frc_sw = index / float(len(data)) + ppm = sw_left - sw * frc_sw + if Fid._is_iter(ppm): + return numpy.array([round(i, 2) for i in ppm]) + else: + return round(ppm, 2) @staticmethod def _conv_to_index(data, ppm, sw_left, sw): @@ -566,18 +696,14 @@ def _conv_to_index(data, ppm, sw_left, sw): def phase_correct(self, method='leastsq', verbose = True): """ - Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising - total absolute area. - - :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - - Nelder-Mead (nelder) + Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising + total absolute area. - L-BFGS-B (l-bfgs-b) + :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - Conjugate Gradient (cg) + Nelder-Mead (nelder) - Powell (powell) + L-BFGS-B (l-bfgs-b) Newton-CG (newton) @@ -615,78 +741,80 @@ def _phase_correct(cls, list_params): @classmethod def _phased_data_sum(cls, pars, data): - err = Fid._ps(data, p0=pars['p0'].value, p1=pars['p1'].value).real - return numpy.array([abs(err).sum()]*2) + err = Fid._ps(data, p0=pars["p0"].value, p1=pars["p1"].value).real + return numpy.array([abs(err).sum()] * 2) @classmethod def _ps(cls, data, p0=0.0, p1=0.0): - """ - Linear phase correction - - :keyword p0: Zero order phase in degrees. - - :keyword p1: First order phase in degrees. + """ + Linear phase correction - """ - if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError('p0 and p1 must be floats or ints.') - if not data.dtype in Fid._complex_dtypes: - raise TypeError('data must be complex.') - # convert to radians - p0 = p0*numpy.pi/180.0 - p1 = p1*numpy.pi/180.0 - size = len(data) - ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) - return ph*data + :keyword p0: Zero order phase in degrees. + + :keyword p1: First order phase in degrees. + + """ + if not all(isinstance(i, (float, int)) for i in [p0, p1]): + raise TypeError("p0 and p1 must be floats or ints.") + if not data.dtype in Fid._complex_dtypes: + raise TypeError("data must be complex.") + # convert to radians + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 + size = len(data) + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + return ph * data def ps(self, p0=0.0, p1=0.0): """ Linear phase correction of :attr:`~nmrpy.data_objects.Fid.data` - + :keyword p0: Zero order phase in degrees :keyword p1: First order phase in degrees - + """ if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError('p0 and p1 must be floats or ints.') + raise TypeError("p0 and p1 must be floats or ints.") if not self.data.dtype in self._complex_dtypes: - raise TypeError('data must be complex.') + raise TypeError("data must be complex.") # convert to radians - p0 = p0*numpy.pi/180.0 - p1 = p1*numpy.pi/180.0 + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 size = len(self.data) - ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) - self.data = ph*self.data + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + self.data = ph * self.data + for _ in self.data: + self.fid_object.data.append(float(_)) def phaser(self): """ Instantiate a phase-correction GUI widget which applies to :attr:`~nmrpy.data_objects.Fid.data`. """ if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype not in self._complex_dtypes: - raise TypeError('data must be complex.') + raise TypeError("data must be complex.") if not Fid._is_flat_iter(self.data): - raise AttributeError('data must be 1 dimensional.') + raise AttributeError("data must be 1 dimensional.") global _phaser_widget self._phaser_widget = Phaser(self) def calibrate(self): """ - Instantiate a GUI widget to select a peak and calibrate spectrum. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate spectrum. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration. """ - plot_label = \ -''' + plot_label = """ Left - select peak -''' +""" plot_title = "Calibration {}".format(self.id) - self._calibrate_widget = Calibrator(self, - title=plot_title, - label=plot_label, - ) + self._calibrate_widget = Calibrator( + self, + title=plot_title, + label=plot_label, + ) def baseline_correct(self, deg=2): """ @@ -695,20 +823,22 @@ def baseline_correct(self, deg=2): (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) with polynomial of specified degree (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) and subtract this polynomial from :attr:`~nmrpy.data_objects.Fid.data`. - + :keyword deg: degree of fitted polynomial """ if self._bl_indices is None: - raise AttributeError('No points selected for baseline correction. Run fid.baseliner()') + raise AttributeError( + "No points selected for baseline correction. Run fid.baseliner()" + ) if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype in self._complex_dtypes: - raise TypeError('data must not be complex.') + raise TypeError("data must not be complex.") if not Fid._is_flat_iter(self.data): - raise AttributeError('data must be 1 dimensional.') - + raise AttributeError("data must be 1 dimensional.") + data = self.data x = numpy.arange(len(data)) m = numpy.ones_like(x) @@ -719,20 +849,24 @@ def baseline_correct(self, deg=2): p = numpy.ma.polyfit(xm, ym, deg) yp = numpy.polyval(p, x) self._bl_poly = yp - data_bl = data-yp + data_bl = data - yp self.data = numpy.array(data_bl) + for _ in self.data: + self.fid_object.data.append(float(_)) def peakpick(self, thresh=0.1): - """ + """ Attempt to automatically identify peaks. Picked peaks are assigned to :attr:`~nmrpy.data_objects.Fid.peaks`. :keyword thresh: fractional threshold for peak-picking """ - peaks_ind = nmrglue.peakpick.pick(self.data, thresh*self.data.max()) + peaks_ind = nmrglue.peakpick.pick(self.data, thresh * self.data.max()) peaks_ind = [i[0] for i in peaks_ind] - peaks_ppm = Fid._conv_to_ppm(self.data, peaks_ind, self._params['sw_left'], self._params['sw']) + peaks_ppm = Fid._conv_to_ppm( + self.data, peaks_ind, self._params["sw_left"], self._params["sw"] + ) self.peaks = peaks_ppm print(self.peaks) @@ -746,19 +880,19 @@ def peakpicker(self): :meth:`~nmrpy.data_objects.Fid.deconv`). """ - plot_label = \ -''' + plot_label = """ Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" plot_title = "Peak-picking {}".format(self.id) - self._peakpicker_widget = DataPeakSelector(self, - title=plot_title, - label=plot_label, - ) + self._peakpicker_widget = DataPeakSelector( + self, + title=plot_title, + label=plot_label, + ) def clear_peaks(self): """ @@ -781,48 +915,62 @@ def baseliner(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = \ -''' + plot_label = """ Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" plot_title = "Baseline correction {}".format(self.id) - self._baseliner_widget = FidRangeSelector(self, - title=plot_title, - label=plot_label, - ) - + self._baseliner_widget = FidRangeSelector( + self, + title=plot_title, + label=plot_label, + ) + @classmethod def _f_gauss(cls, offset, amplitude, gauss_sigma, x): - return amplitude*numpy.exp(-((offset-x)**2.0)/(2.0*gauss_sigma**2.0)) - + return amplitude * numpy.exp( + -((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0) + ) + @classmethod def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): - #return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) - return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-offset)**2.0) + # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) + return ( + amplitude + * lorentz_hwhm**2.0 + / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) + ) @classmethod def _f_gauss_int(cls, amplitude, gauss_sigma): - return amplitude*numpy.sqrt(2.0*numpy.pi*gauss_sigma**2.0) + return amplitude * numpy.sqrt(2.0 * numpy.pi * gauss_sigma**2.0) @classmethod def _f_lorentz_int(cls, amplitude, lorentz_hwhm): - #empirical integral commented out - #x = numpy.arange(1000*lorentz_hwhm) - #return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) - #this integral forumula from http://magicplot.com/wiki/fit_equations - return amplitude*lorentz_hwhm*numpy.pi + # empirical integral commented out + # x = numpy.arange(1000*lorentz_hwhm) + # return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) + # this integral forumula from http://magicplot.com/wiki/fit_equations + return amplitude * lorentz_hwhm * numpy.pi @classmethod - def _f_pk(cls, x, offset=0.0, gauss_sigma=1.0, lorentz_hwhm=1.0, amplitude=1.0, frac_gauss=0.0): + def _f_pk( + cls, + x, + offset=0.0, + gauss_sigma=1.0, + lorentz_hwhm=1.0, + amplitude=1.0, + frac_gauss=0.0, + ): """ Return the a combined Gaussian/Lorentzian peakshape for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. - + :arg x: array of equal length to :attr:`~nmrpy.data_objects.Fid.data` - + :keyword offset: spectral offset in x @@ -834,39 +982,37 @@ def _f_pk(cls, x, offset=0.0, gauss_sigma=1.0, lorentz_hwhm=1.0, amplitude=1.0, :keyword frac_gauss: fraction of function to be Gaussian (0 -> 1). Note: specifying a Gaussian fraction of 0 will produce a pure Lorentzian and vice - versa. """ - - #validation + versa.""" + + # validation parameters = [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss] if not all(isinstance(i, numbers.Number) for i in parameters): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) if frac_gauss > 1.0: frac_gauss = 1.0 if frac_gauss < 0.0: frac_gauss = 0.0 - + gauss_peak = cls._f_gauss(offset, amplitude, gauss_sigma, x) lorentz_peak = cls._f_lorentz(offset, amplitude, lorentz_hwhm, x) - peak = frac_gauss*gauss_peak + (1-frac_gauss)*lorentz_peak - - return peak - + peak = frac_gauss * gauss_peak + (1 - frac_gauss) * lorentz_peak + return peak @classmethod def _f_makep(cls, data, peaks, frac_gauss=None): """ Make a set of initial peak parameters for deconvolution. - + :arg data: data to be fitted :arg peaks: selected peak positions (see peakpicker()) - + :returns: an array of peaks, each consisting of the following parameters: spectral offset (x) @@ -880,15 +1026,15 @@ def _f_makep(cls, data, peaks, frac_gauss=None): frac_gauss: fraction of function to be Gaussian (0 -> 1) """ if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_flat_iter(peaks): - raise TypeError('peaks must be a flat iterable') + raise TypeError("peaks must be a flat iterable") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + p = [] for i in peaks: - pamp = 0.9*abs(data[int(i)]) + pamp = 0.9 * abs(data[int(i)]) single_peak = [i, 10, 0.1, pamp, frac_gauss] p.append(single_peak) return numpy.array(p) @@ -897,95 +1043,99 @@ def _f_makep(cls, data, peaks, frac_gauss=None): def _f_conv(cls, parameterset_list, data): """ Returns the maximum of a convolution of an initial set of lineshapes and the data to be fitted. - - parameterset_list -- a list of parameter lists: n*[[spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + + parameterset_list -- a list of parameter lists: n*[[spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)]] where n is the number of peaks data -- 1D spectral array - + """ if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_iter(parameterset_list): - raise TypeError('parameterset_list must be an iterable') + raise TypeError("parameterset_list must be an iterable") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + data[data == 0.0] = 1e-6 - x = numpy.arange(len(data), dtype='f8') + x = numpy.arange(len(data), dtype="f8") peaks_init = cls._f_pks(parameterset_list, x) data_convolution = numpy.convolve(data, peaks_init[::-1]) auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) - max_data_convolution = numpy.where(data_convolution == data_convolution.max())[0][0] - max_auto_convolution = numpy.where(auto_convolution == auto_convolution.max())[0][0] + max_data_convolution = numpy.where( + data_convolution == data_convolution.max() + )[0][0] + max_auto_convolution = numpy.where( + auto_convolution == auto_convolution.max() + )[0][0] return max_data_convolution - max_auto_convolution - @classmethod + @classmethod def _f_pks_list(cls, parameterset_list, x): """ Return a list of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ if not cls._is_iter_of_iters(parameterset_list): - raise TypeError('Parameter set must be an iterable of iterables') + raise TypeError("Parameter set must be an iterable of iterables") for p in parameterset_list: if not cls._is_iter(p): - raise TypeError('Parameter set must be an iterable') + raise TypeError("Parameter set must be an iterable") if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) return numpy.array([Fid._f_pk(x, *peak) for peak in parameterset_list]) - - @classmethod + @classmethod def _f_pks(cls, parameterset_list, x): """ Return the sum of a series of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ - + if not cls._is_iter_of_iters(parameterset_list): - raise TypeError('Parameter set must be an iterable of iterables') + raise TypeError("Parameter set must be an iterable of iterables") for p in parameterset_list: if not cls._is_iter(p): - raise TypeError('Parameter set must be an iterable') + raise TypeError("Parameter set must be an iterable") if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError('Keyword parameters must be numbers.') + raise TypeError("Keyword parameters must be numbers.") if not cls._is_iter(x): - raise TypeError('x must be an iterable') + raise TypeError("x must be an iterable") if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - - peaks = x*0.0 + x = numpy.array(x) + + peaks = x * 0.0 for p in parameterset_list: - peak = cls._f_pk(x, - offset=p[0], - gauss_sigma=p[1], - lorentz_hwhm=p[2], - amplitude=p[3], - frac_gauss=p[4], - ) + peak = cls._f_pk( + x, + offset=p[0], + gauss_sigma=p[1], + lorentz_hwhm=p[2], + amplitude=p[3], + frac_gauss=p[4], + ) peaks += peak return peaks @@ -993,9 +1143,9 @@ def _f_pks(cls, parameterset_list, x): def _f_res(cls, p, data): """ Objective function for deconvolution. Returns residuals of the devonvolution fit. - + x -- array of equal length to FID - + Keyword arguments: p -- lmfit parameters object: offset_n -- spectral offset in x @@ -1005,75 +1155,81 @@ def _f_res(cls, p, data): frac_gauss_n -- fraction of function to be Gaussian (0 -> 1) where n is the peak number (zero-indexed) data -- spectrum array - + """ if not isinstance(p, lmfit.parameter.Parameters): - raise TypeError('Parameters must be of type lmfit.parameter.Parameters.') + raise TypeError( + "Parameters must be of type lmfit.parameter.Parameters." + ) if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable.') + raise TypeError("data must be a flat iterable.") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + params = Fid._parameters_to_list(p) - x = numpy.arange(len(data), dtype='f8') - res = data-cls._f_pks(params, x) + x = numpy.arange(len(data), dtype="f8") + res = data - cls._f_pks(params, x) return res @classmethod - def _f_fitp(cls, data, peaks, frac_gauss=None, method='leastsq'): + def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): """Fit a section of spectral data with a combination of Gaussian/Lorentzian peaks for deconvolution. - + Keyword arguments: peaks -- selected peak positions (see peakpicker()) frac_gauss -- fraction of fitted function to be Gaussian (1 - Guassian, 0 - Lorentzian) - + returns: fits -- list of fitted peak parameter sets - + Note: peaks are fitted by default using the Levenberg-Marquardt algorithm[1]. Other fitting algorithms are available (http://cars9.uchicago.edu/software/python/lmfit/fitting.html#choosing-different-fitting-methods). - + [1] Marquardt, Donald W. 'An algorithm for least-squares estimation of nonlinear parameters.' Journal of the Society for Industrial & Applied Mathematics 11.2 (1963): 431-441. """ data = numpy.real(data) if not cls._is_flat_iter(data): - raise TypeError('data must be a flat iterable') + raise TypeError("data must be a flat iterable") if not cls._is_flat_iter(peaks): - raise TypeError('peaks must be a flat iterable') - if any(peak > (len(data)-1) for peak in peaks): - raise ValueError('peaks must be within the length of data.') + raise TypeError("peaks must be a flat iterable") + if any(peak > (len(data) - 1) for peak in peaks): + raise ValueError("peaks must be within the length of data.") if not isinstance(data, numpy.ndarray): - data = numpy.array(data) + data = numpy.array(data) p = cls._f_makep(data, peaks, frac_gauss=0.5) init_ref = cls._f_conv(p, data) - if any(peaks+init_ref < 0) or any(peaks+init_ref > len(data)-1): - init_ref = 0 - if frac_gauss==None: - p = cls._f_makep(data, peaks+init_ref, frac_gauss=0.5) + if any(peaks + init_ref < 0) or any(peaks + init_ref > len(data) - 1): + init_ref = 0 + if frac_gauss == None: + p = cls._f_makep(data, peaks + init_ref, frac_gauss=0.5) else: - p = cls._f_makep(data, peaks+init_ref, frac_gauss=frac_gauss) - + p = cls._f_makep(data, peaks + init_ref, frac_gauss=frac_gauss) + params = lmfit.Parameters() for parset in range(len(p)): - current_parset = dict(zip(['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss'], p[parset])) - for k,v in current_parset.items(): - par_name = '%s_%i'%(k, parset) - params.add(name=par_name, - value=v, - vary=True, - min=0.0) - if 'offset' in par_name: - params[par_name].max = len(data)-1 - if 'frac_gauss' in par_name: + current_parset = dict( + zip( + ["offset", "sigma", "hwhm", "amplitude", "frac_gauss"], + p[parset], + ) + ) + for k, v in current_parset.items(): + par_name = "%s_%i" % (k, parset) + params.add(name=par_name, value=v, vary=True, min=0.0) + if "offset" in par_name: + params[par_name].max = len(data) - 1 + if "frac_gauss" in par_name: params[par_name].max = 1.0 if frac_gauss is not None: params[par_name].vary = False - #if 'sigma' in par_name or 'hwhm' in par_name: - # params[par_name].max = 0.01*current_parset['amplitude'] - if 'amplitude' in par_name: - params[par_name].max = 2.0*data.max() - + # if 'sigma' in par_name or 'hwhm' in par_name: + # params[par_name].max = 0.01*current_parset['amplitude'] + if "amplitude" in par_name: + params[par_name].max = 2.0 * data.max() + try: - mz = lmfit.minimize(cls._f_res, params, args=([data]), method=method) + mz = lmfit.minimize( + cls._f_res, params, args=([data]), method=method + ) fits = Fid._parameters_to_list(mz.params) except: fits = None @@ -1081,47 +1237,60 @@ def _f_fitp(cls, data, peaks, frac_gauss=None, method='leastsq'): @classmethod def _parameters_to_list(cls, p): - n_pks = int(len(p)/5) + n_pks = int(len(p) / 5) params = [] for i in range(n_pks): - current_params = [p['%s_%s'%(par, i)].value for par in ['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss']] + current_params = [ + p["%s_%s" % (par, i)].value + for par in [ + "offset", + "sigma", + "hwhm", + "amplitude", + "frac_gauss", + ] + ] params.append(current_params) return params - @classmethod def _deconv_datum(cls, list_parameters): if len(list_parameters) != 5: - raise ValueError('list_parameters must consist of five objects.') - if (type(list_parameters[1]) == list and len(list_parameters[1]) == 0) or \ - (type(list_parameters[2]) == list and len(list_parameters[2]) == 0): + raise ValueError("list_parameters must consist of five objects.") + if ( + type(list_parameters[1]) == list and len(list_parameters[1]) == 0 + ) or ( + type(list_parameters[2]) == list and len(list_parameters[2]) == 0 + ): return [] datum, peaks, ranges, frac_gauss, method = list_parameters if not cls._is_iter_of_iters(ranges): - raise TypeError('ranges must be an iterable of iterables') + raise TypeError("ranges must be an iterable of iterables") if not all(len(rng) == 2 for rng in ranges): - raise ValueError('ranges must contain two values.') + raise ValueError("ranges must contain two values.") if not all(rng[0] != rng[1] for rng in ranges): - raise ValueError('data_index must contain different values.') + raise ValueError("data_index must contain different values.") if not isinstance(datum, numpy.ndarray): - datum = numpy.array(datum) + datum = numpy.array(datum) if datum.dtype in cls._complex_dtypes: - raise TypeError('data must be not be complex.') + raise TypeError("data must be not be complex.") fit = [] for j in zip(peaks, ranges): - d_slice = datum[j[1][0]:j[1][1]] - p_slice = j[0]-j[1][0] - f = cls._f_fitp(d_slice, p_slice, frac_gauss=frac_gauss, method=method) + d_slice = datum[j[1][0] : j[1][1]] + p_slice = j[0] - j[1][0] + f = cls._f_fitp( + d_slice, p_slice, frac_gauss=frac_gauss, method=method + ) f = numpy.array(f).transpose() f[0] += j[1][0] f = f.transpose() fit.append(f) return fit - def deconv(self, method='leastsq', frac_gauss=0.0): + def deconv(self, method="leastsq", frac_gauss=0.0): """ Deconvolute :attr:`~nmrpy.data_obects.Fid.data` object by fitting a @@ -1133,32 +1302,39 @@ def deconv(self, method='leastsq', frac_gauss=0.0): :keyword frac_gauss: (0-1) determines the Gaussian fraction of the peaks. Setting this argument to None will fit this parameter as well. :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - + Nelder-Mead (nelder) - + L-BFGS-B (l-bfgs-b) - + Conjugate Gradient (cg) - + Powell (powell) - + Newton-CG (newton) - + """ if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError("data does not exist.") if self.data.dtype in self._complex_dtypes: - raise TypeError('data must be not be complex.') + raise TypeError("data must be not be complex.") if self.peaks is None: - raise AttributeError('peaks must be picked.') + raise AttributeError("peaks must be picked.") if self.ranges is None: - raise AttributeError('ranges must be specified.') - print('deconvoluting {}'.format(self.id)) - list_parameters = [self.data, self._grouped_index_peaklist, self._index_ranges, frac_gauss, method] - self._deconvoluted_peaks = numpy.array([j for i in Fid._deconv_datum(list_parameters) for j in i]) - print('deconvolution completed') - + raise AttributeError("ranges must be specified.") + print("deconvoluting {}".format(self.id)) + list_parameters = [ + self.data, + self._grouped_index_peaklist, + self._index_ranges, + frac_gauss, + method, + ] + self._deconvoluted_peaks = numpy.array( + [j for i in Fid._deconv_datum(list_parameters) for j in i] + ) + print("deconvolution completed") def plot_ppm(self, **kwargs): """ @@ -1168,7 +1344,7 @@ def plot_ppm(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot """ @@ -1185,7 +1361,7 @@ def plot_deconv(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot @@ -1194,14 +1370,15 @@ def plot_deconv(self, **kwargs): :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks """ if not len(self._deconvoluted_peaks): - raise AttributeError('deconvolution not yet performed') + raise AttributeError("deconvolution not yet performed") plt = Plot() plt._plot_deconv(self, **kwargs) setattr(self, plt.id, plt) pyplot.show() - + + class FidArray(Base): - ''' + """ This object collects several :class:`~nmrpy.data_objects.Fid` objects into an array, and it contains all the processing methods necessary for bulk @@ -1214,9 +1391,36 @@ class FidArray(Base): :class:`~nmrpy.data_objects.FidArray` with a unique ID of the form 'fidXX', where 'XX' is an increasing integer . - ''' + """ + + def __init__(self): + _now = str(datetime.now()) + self.data_model = self.lib.NMRpy( + datetime_created=_now, + datetime_modified=_now, + ) + del _now + + @property + def data_model(self): + return self.__data_model + + @data_model.setter + def data_model(self, data_model: DataModel): + if not isinstance(data_model, DataModel): + raise AttributeError( + f"Parameter `data_model` has to be of type `sdrdm.DataModel`, got {type(data_model)} instead." + ) + self.__data_model = data_model + self.__data_model.datetime_modified = str(datetime.now()) + + @data_model.deleter + def data_model(self): + del self.__data_model + print("The current data model has been deleted.") + def __str__(self): - return 'FidArray of {} FID(s)'.format(len(self.data)) + return "FidArray of {} FID(s)".format(len(self.data)) def get_fid(self, id): """ @@ -1229,20 +1433,28 @@ def get_fid(self, id): try: return getattr(self, id) except AttributeError: - print('{} does not exist.'.format(id)) + print("{} does not exist.".format(id)) def get_fids(self): """ Return a list of all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - fids = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Fid)] + fids = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Fid) + ] return fids def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] return plots def _del_plots(self): @@ -1258,7 +1470,8 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.FidArray`. """ widgets = [ - id for id in sorted(self.__dict__) + id + for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], RangeCalibrator) or isinstance(self.__dict__[id], DataPeakRangeSelector) @@ -1335,7 +1548,7 @@ def add_fid(self, fid): if isinstance(fid, Fid): setattr(self, fid.id, fid) else: - raise AttributeError('FidArray requires Fid object.') + raise AttributeError("FidArray requires Fid object.") def del_fid(self, fid_id): """ @@ -1358,14 +1571,14 @@ def del_fid(self, fid_id): # Old NMRpy _params structure self._params['acqtime'] = at else: - raise AttributeError('{} is not an FID object.'.format(fid_id)) + raise AttributeError("{} is not an FID object.".format(fid_id)) else: - raise AttributeError('FID {} does not exist.'.format(fid_id)) + raise AttributeError("FID {} does not exist.".format(fid_id)) def add_fids(self, fids): """ Add a list of :class:`~nmrpy.data_objects.Fid` objects to this :class:`~nmrpy.data_objects.FidArray`. - + :arg fids: a list of :class:`~nmrpy.data_objects.Fid` instances """ if FidArray._is_iter(fids): @@ -1374,7 +1587,7 @@ def add_fids(self, fids): for fid_index in range(num_fids): try: fid = fids[fid_index] - id_str = 'fid{0:0'+zero_fill+'d}' + id_str = "fid{0:0" + zero_fill + "d}" fid.id = id_str.format(fid_index) self.add_fid(fid) except AttributeError as e: @@ -1421,50 +1634,50 @@ def _setup_params(fid_array): def from_data(cls, data): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a 2D data set of spectral arrays. - - :arg data: a 2D data array + + :arg data: a 2D data array """ if not cls._is_iter_of_iters(data): - raise TypeError('data must be an iterable of iterables.') + raise TypeError("data must be an iterable of iterables.") fid_array = cls() fids = [] for fid_index, datum in zip(range(len(data)), data): - fid_id = 'fid%i'%fid_index + fid_id = "fid%i" % fid_index fid = Fid(id=fid_id, data=datum) fids.append(fid) fid_array.add_fids(fids) return fid_array @classmethod - def from_path(cls, fid_path='.', file_format=None, arrayset=None): + def from_path(cls, fid_path=".", file_format=None, arrayset=None): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a .fid directory. :keyword fid_path: filepath to .fid directory :keyword file_format: 'varian' or 'bruker', usually unnecessary - - :keyword arrayset: (int) array set for interleaved spectra, - user is prompted if not specified + + :keyword arrayset: (int) array set for interleaved spectra, + user is prompted if not specified """ if not file_format: try: - with open(fid_path, 'rb') as f: + with open(fid_path, "rb") as f: return pickle.load(f) except: - print('Not NMRPy data file.') + print("Not NMRPy data file.") importer = Importer(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == 'varian': + elif file_format == "varian": importer = VarianImporter(fid_path=fid_path) importer.import_fid() - elif file_format == 'bruker': + elif file_format == "bruker": importer = BrukerImporter(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == 'nmrpy': - with open(fid_path, 'rb') as f: + elif file_format == "nmrpy": + with open(fid_path, "rb") as f: return pickle.load(f) - + if cls._is_iter(importer.data): fid_array = cls.from_data(importer.data) fid_array._file_format = importer._file_format @@ -1477,17 +1690,17 @@ def from_path(cls, fid_path='.', file_format=None, arrayset=None): cls._setup_params(fid_array) return fid_array else: - raise IOError('Data could not be imported.') + raise IOError("Data could not be imported.") def zf_fids(self): - """ + """ Zero-fill all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` """ for fid in self.get_fids(): fid.zf() def emhz_fids(self, lb=5.0): - """ + """ Apply line-broadening (apodisation) to all :class:`nmrpy.~data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword lb: degree of line-broadening in Hz. @@ -1496,7 +1709,7 @@ def emhz_fids(self, lb=5.0): fid.emhz(lb=lb) def ft_fids(self, mp=True, cpus=None): - """ + """ Fourier-transform all FIDs. :keyword mp: parallelise over multiple processors, significantly reducing computation time @@ -1509,14 +1722,14 @@ def ft_fids(self, mp=True, cpus=None): ft_data = self._generic_mp(Fid._ft, list_params, cpus) for fid, datum in zip(fids, ft_data): fid.data = datum - fid._flags['ft'] = True - else: + fid._flags["ft"] = True + else: for fid in self.get_fids(): fid.ft() - print('Fourier-transformation completed') + print("Fourier-transformation completed") def real_fids(self): - """ + """ Discard imaginary component of FID data sets. """ @@ -1524,13 +1737,13 @@ def real_fids(self): fid.real() def norm_fids(self): - """ + """ Normalise FIDs by maximum data value in :attr:`~nmrpy.data_objects.FidArray.data`. """ dmax = self.data.max() for fid in self.get_fids(): - fid.data = fid.data/dmax + fid.data = fid.data / dmax def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True): """ @@ -1544,7 +1757,7 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) :keyword verbose: prints out phase angles if True (default) """ - if mp: + if mp: fids = self.get_fids() if not all(fid.data.dtype in self._complex_dtypes for fid in fids): raise TypeError('Only complex data can be phase-corrected.') @@ -1569,17 +1782,18 @@ def baseliner_fids(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = \ -''' + plot_label = """ Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' - plot_title = 'Select data for baseline-correction' - self._baseliner_widget = FidArrayRangeSelector(self, title=plot_title, label=plot_label, voff=0.01) - +""" + plot_title = "Select data for baseline-correction" + self._baseliner_widget = FidArrayRangeSelector( + self, title=plot_title, label=plot_label, voff=0.01 + ) + def baseline_correct_fids(self, deg=2): - """ + """ Apply baseline-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) @@ -1588,8 +1802,12 @@ def baseline_correct_fids(self, deg=2): try: fid.baseline_correct(deg=deg) except: - print('failed for {}. Perhaps first run baseliner_fids()'.format(fid.id)) - print('baseline-correction completed') + print( + "failed for {}. Perhaps first run baseliner_fids()".format( + fid.id + ) + ) + print("baseline-correction completed") @property def _data_traces(self): @@ -1597,7 +1815,7 @@ def _data_traces(self): @_data_traces.setter def _data_traces(self, data_traces): - self.__data_traces = data_traces + self.__data_traces = data_traces @property def _index_traces(self): @@ -1605,7 +1823,7 @@ def _index_traces(self): @_index_traces.setter def _index_traces(self, index_traces): - self.__index_traces = index_traces + self.__index_traces = index_traces @property def _trace_mask(self): @@ -1613,7 +1831,7 @@ def _trace_mask(self): @_trace_mask.setter def _trace_mask(self, trace_mask): - self.__trace_mask = trace_mask + self.__trace_mask = trace_mask @property def _trace_mean_ppm(self): @@ -1621,8 +1839,8 @@ def _trace_mean_ppm(self): @_trace_mean_ppm.setter def _trace_mean_ppm(self, trace_mean_ppm): - trace_mean_ppm - self.__trace_mean_ppm = trace_mean_ppm + trace_mean_ppm + self.__trace_mean_ppm = trace_mean_ppm @property def integral_traces(self): @@ -1634,10 +1852,12 @@ def integral_traces(self): @integral_traces.setter def integral_traces(self, integral_traces): - self._integral_traces = integral_traces + self._integral_traces = integral_traces - def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): - """ + def deconv_fids( + self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0 + ): + """ Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` @@ -1646,18 +1866,33 @@ def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True, default is n-1 cores """ - if mp: + if mp: fids = self.get_fids() - if not all(fid._flags['ft'] for fid in fids): - raise ValueError('Only Fourier-transformed data can be deconvoluted.') - list_params = [[fid.data, fid._grouped_index_peaklist, fid._index_ranges, frac_gauss, method] for fid in fids] - deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) + if not all(fid._flags["ft"] for fid in fids): + raise ValueError( + "Only Fourier-transformed data can be deconvoluted." + ) + list_params = [ + [ + fid.data, + fid._grouped_index_peaklist, + fid._index_ranges, + frac_gauss, + method, + ] + for fid in fids + ] + deconv_datum = self._generic_mp( + Fid._deconv_datum, list_params, cpus + ) for fid, datum in zip(fids, deconv_datum): - fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) + fid._deconvoluted_peaks = numpy.array( + [j for i in datum for j in i] + ) else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) - print('deconvolution completed') + print("deconvolution completed") def get_masked_integrals(self): """ @@ -1667,16 +1902,15 @@ def get_masked_integrals(self): try: ints = [list(i) for i in self.deconvoluted_integrals] for i in self._trace_mask: - ints_current = numpy.zeros_like(i, dtype='f8') + ints_current = numpy.zeros_like(i, dtype="f8") for j in range(len(i)): if i[j] != -1: ints_current[j] = ints[j].pop(0) result.append(ints_current) except AttributeError: - print('peakpicker_traces() or deconv_fids() probably not yet run.') + print("peakpicker_traces() or deconv_fids() probably not yet run.") return result - def ps_fids(self, p0=0.0, p1=0.0): """ Apply manual phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` @@ -1686,19 +1920,18 @@ def ps_fids(self, p0=0.0, p1=0.0): :keyword p1: First order phase in degrees """ for fid in self.get_fids(): - fid.ps(p0=p0, p1=p1) + fid.ps(p0=p0, p1=p1) @staticmethod def _generic_mp(fcn, iterable, cpus): if cpus is None: - cpus = cpu_count()-1 + cpus = cpu_count() - 1 proc_pool = Pool(cpus) result = proc_pool.map(fcn, iterable) proc_pool.close() proc_pool.join() return result - def plot_array(self, **kwargs): """ Plot :attr:`~nmrpy.data_objects.FidArray.data`. @@ -1766,43 +1999,44 @@ def plot_deconv_array(self, **kwargs): """ plt = Plot() - plt._plot_deconv_array(self.get_fids(), - **kwargs) + plt._plot_deconv_array(self.get_fids(), **kwargs) setattr(self, plt.id, plt) - - def calibrate(self, fid_number=None, assign_only_to_index=False, - voff=0.02): + def calibrate( + self, fid_number=None, assign_only_to_index=False, voff=0.02 + ): """ - Instantiate a GUI widget to select a peak and calibrate - spectra in a :class:`~nmrpy.data_objects.FidArray`. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate + spectra in a :class:`~nmrpy.data_objects.FidArray`. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration; this will be applied to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. See also :meth:`~nmrpy.data_objects.Fid.calibrate`. - + :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for calibration. If None, the whole data array is plotted. :keyword assign_only_to_index: if True, assigns calibration only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number; if False, assigns to all. :keyword voff: vertical offset for spectra """ - plot_label = \ -''' + plot_label = """ Left - select peak -''' - self._calibrate_widget = RangeCalibrator(self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) +""" + self._calibrate_widget = RangeCalibrator( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) - def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): + def peakpicker( + self, fid_number=None, assign_only_to_index=True, voff=0.02 + ): """ - Instantiate peak-picker widget for + Instantiate peak-picker widget for :attr:`~nmrpy.data_objects.Fid.data`, and apply selected :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` to all :class:`~nmrpy.data_objects.Fid` @@ -1816,23 +2050,22 @@ def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): :keyword voff: vertical offset for spectra """ - plot_label = \ -''' + plot_label = """ Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' - self._peakpicker_widget = DataPeakRangeSelector(self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label) +""" + self._peakpicker_widget = DataPeakRangeSelector( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) - def peakpicker_traces(self, - voff=0.02, - lw=1): + def peakpicker_traces(self, voff=0.02, lw=1): """ Instantiates a widget to pick peaks and ranges employing a polygon shape (or 'trace'). This is useful for picking peaks that are subject to drift and peaks @@ -1844,22 +2077,21 @@ def peakpicker_traces(self, """ if self.data is None: - raise AttributeError('No FIDs.') - plot_label = \ -''' + raise AttributeError("No FIDs.") + plot_label = """ Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -''' +""" self._peakpicker_widget = DataTraceRangeSelector( self, voff=voff, lw=lw, label=plot_label, - ) + ) def clear_peaks(self): """ @@ -1881,17 +2113,17 @@ def _generate_trace_mask(self, traces): ppm = [numpy.round(numpy.mean(i[0]), 2) for i in traces] self._trace_mean_ppm = ppm tt = [i[1] for i in traces] - ln = len(self.data) + ln = len(self.data) filled_tt = [] for i in tt: rng = numpy.arange(ln) if len(i) < ln: - rng[~(~(rngmax(i)))] = -1 + rng[~(~(rng < min(i)) * ~(rng > max(i)))] = -1 filled_tt.append(rng) filled_tt = numpy.array(filled_tt) return filled_tt - def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): + def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): traces = [dict(zip(i[1], i[0])) for i in traces] fids = self.get_fids() fids_i = range(len(self.data)) @@ -1903,15 +2135,14 @@ def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): for rng in spans: if peak >= min(rng) and peak <= max(rng): peaks.append(peak) - fids[i].peaks = peaks + fids[i].peaks = peaks ranges = [] - for rng in spans: - if any((peaks>min(rng))*(peaks min(rng)) * (peaks < max(rng))): ranges.append(rng) if ranges == []: ranges = None - fids[i].ranges = ranges - + fids[i].ranges = ranges def _get_all_summed_peakshapes(self): """ @@ -1919,7 +2150,7 @@ def _get_all_summed_peakshapes(self): """ peaks = [] for fid in self.get_fids(): - #x = numpy.arange(len(self.get_fids()[0].data)) + # x = numpy.arange(len(self.get_fids()[0].data)) x = numpy.arange(len(self.get_fids()[0].data)) peaks.append(Fid._f_pks(fid._deconvoluted_peaks, x)) return peaks @@ -1930,7 +2161,7 @@ def _get_all_list_peakshapes(self): """ peaks = [] for fid in self.get_fids(): - #x = numpy.arange(len(self.get_fids()[0].data)) + # x = numpy.arange(len(self.get_fids()[0].data)) x = numpy.arange(len(self.get_fids()[0].data)) peaks.append(Fid._f_pks_list(fid._deconvoluted_peaks, x)) return peaks @@ -1947,7 +2178,7 @@ def _get_truncated_peak_shapes_for_plotting(self): pk_y = [] pk_x = [] for pk in ps: - pk_ind = pk > 0.1*pk.max() + pk_ind = pk > 0.1 * pk.max() pk_x.append(ppm[pk_ind]) pk_y.append(pk[pk_ind]) peakshapes_short_x.append(pk_x) @@ -1967,24 +2198,25 @@ def select_integral_traces(self, voff=0.02, lw=1): :keyword lw: linewidth of plot (1) """ if self.data is None: - raise AttributeError('No FIDs.') - if (self.deconvoluted_integrals==None).any(): - raise AttributeError('No integrals.') + raise AttributeError("No FIDs.") + if (self.deconvoluted_integrals == None).any(): + raise AttributeError("No integrals.") peakshapes = self._get_all_summed_peakshapes() - #pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() - plot_label = \ -''' + # pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() + plot_label = """ Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Ctrl+Alt+Right - assign -''' - self._select_trace_widget = DataTraceSelector(self, - extra_data=peakshapes, - extra_data_colour='b', - voff=voff, +""" + self._select_trace_widget = DataTraceSelector( + self, + extra_data=peakshapes, + extra_data_colour="b", + voff=voff, label=plot_label, - lw=lw) + lw=lw, + ) def get_integrals_from_traces(self): """ @@ -1992,13 +2224,17 @@ def get_integrals_from_traces(self): :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary :attr:`~nmrpy.data_objects.FidArray.integral_traces`. """ - if self.deconvoluted_integrals is None or \ - None in self.deconvoluted_integrals: - raise AttributeError('No integrals.') - if not hasattr(self, '_integral_traces'): - raise AttributeError('No integral traces. First run select_integral_traces().') + if ( + self.deconvoluted_integrals is None + or None in self.deconvoluted_integrals + ): + raise AttributeError("No integrals.") + if not hasattr(self, "_integral_traces"): + raise AttributeError( + "No integral traces. First run select_integral_traces()." + ) integrals_set = {} - decon_set = self.deconvoluted_integrals + decon_set = self.deconvoluted_integrals for i, tr in self.integral_traces.items(): tr_keys = numpy.array([fid for fid in tr.keys()]) tr_vals = numpy.array([val for val in tr.values()]) @@ -2006,9 +2242,16 @@ def get_integrals_from_traces(self): tr_keys = tr_keys[tr_sort] tr_vals = tr_vals[tr_sort] integrals = decon_set[tr_keys, tr_vals] - integrals_set[i] = integrals + integrals_set[i] = integrals return integrals_set + def assign_integrals(self, integrals_set: list) -> dict: + print("~~~ Method under contruction ~~~") + widget_list = [] + for i, j in enumerate(integrals_set): + widget_list.append((i, list(j))) + return SelectMultiple(options=widget_list, description="Integrals:") + def save_to_file(self, filename=None, overwrite=False): """ Save :class:`~nmrpy.data_objects.FidArray` object to file, including all objects owned. @@ -2020,13 +2263,13 @@ def save_to_file(self, filename=None, overwrite=False): """ if filename is None: basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename+'.nmrpy' + filename = basename + ".nmrpy" if not isinstance(filename, str): - raise TypeError('filename must be a string.') - if filename[-6:] != '.nmrpy': - filename += '.nmrpy' + raise TypeError("filename must be a string.") + if filename[-6:] != ".nmrpy": + filename += ".nmrpy" if os.path.isfile(filename) and not overwrite: - print('File '+filename+' exists, set overwrite=True to force.') + print("File " + filename + " exists, set overwrite=True to force.") return 1 # delete all matplotlib plots to reduce file size self._del_plots() @@ -2036,11 +2279,52 @@ def save_to_file(self, filename=None, overwrite=False): self._del_widgets() for fid in self.get_fids(): fid._del_widgets() - with open(filename, 'wb') as f: + with open(filename, "wb") as f: pickle.dump(self, f) - -class Importer(Base): + def save_data(self, file_format: str, filename=None, overwrite=False): + print("~~~ Method under contruction ~~~") + if file_format.lower() == ("enzymeml" or "nmrml"): + # model = self.data_model.convert_to( + # template=Path(__file__).parent.parent / "links/enzymeml.toml" + # ) + enzymeml = DataModel.from_git( + url="https://github.com/EnzymeML/enzymeml-specifications.git", + tag="markdown-parser-refactor", + ) + doc = enzymeml.EnzymeMLDocument( + name=( + self.data_model.experiment.name + if hasattr(self.data_model.experiment, "name") + else "NMR experiment" + ), + created=self.data_model.datetime_created, + modified=self.data_model.datetime_modified, + ) + model = doc.xml() + elif file_format.lower() == "xml": + model = self.data_model.xml() + elif file_format.lower() == "json": + model = self.data_model.json() + elif file_format.lower() == "yaml": + model = self.data_model.yaml() + elif file_format.lower() == "hdf5": + model = self.data_model.hdf5() + else: + raise AttributeError( + f"Parameter `file_format` expected to be one of `enzymeml`; `nmrml`; `xml`; `json`; `yaml`; `hdf5`, got {file_format} instead." + ) + if not filename: + basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] + filename = basename + "." + file_format.lower() + if os.path.isfile(filename) and not overwrite: + print("File " + filename + " exists, set overwrite=True to force.") + return 1 + with open(filename, "w") as f: + f.write(model) + + +class Importer(Base): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data = None @@ -2059,17 +2343,16 @@ def data(self, data): elif Importer._is_iter(data): self.__data = numpy.array([data]) else: - raise TypeError('data must be iterable.') + raise TypeError("data must be iterable.") else: - raise TypeError('data must be complex.') - + raise TypeError("data must be complex.") def import_fid(self, arrayset=None): """ This will first attempt to import Bruker data. Failing that, Varian. """ try: - print('Attempting Bruker') + print("Attempting Bruker") brukerimporter = BrukerImporter(fid_path=self.fid_path) brukerimporter.import_fid(arrayset=arrayset) self.data = brukerimporter.data @@ -2077,45 +2360,50 @@ def import_fid(self, arrayset=None): self._file_format = brukerimporter._file_format return except (FileNotFoundError, OSError): - print('fid_path does not specify a valid .fid directory.') - return + print("fid_path does not specify a valid .fid directory.") + return except (TypeError, IndexError): - print('probably not Bruker data') - try: - print('Attempting Varian') + print("probably not Bruker data") + try: + print("Attempting Varian") varianimporter = VarianImporter(fid_path=self.fid_path) varianimporter.import_fid() self._procpar = varianimporter._procpar - self.data = varianimporter.data + self.data = varianimporter.data self._file_format = varianimporter._file_format return except TypeError: - print('probably not Varian data') + print("probably not Varian data") -class VarianImporter(Importer): +class VarianImporter(Importer): def import_fid(self): try: procpar, data = nmrglue.varian.read(self.fid_path) - self.data = data + self.data = data self._procpar = procpar - self._file_format = 'varian' + self._file_format = "varian" except FileNotFoundError: - print('fid_path does not specify a valid .fid directory.') + print("fid_path does not specify a valid .fid directory.") except OSError: - print('fid_path does not specify a valid .fid directory.') - -class BrukerImporter(Importer): + print("fid_path does not specify a valid .fid directory.") + +class BrukerImporter(Importer): def import_fid(self, arrayset=None): try: - dirs = [int(i) for i in os.listdir(self.fid_path) if \ - os.path.isdir(self.fid_path+os.path.sep+i)] + dirs = [ + int(i) + for i in os.listdir(self.fid_path) + if os.path.isdir(self.fid_path + os.path.sep + i) + ] dirs.sort() dirs = [str(i) for i in dirs] alldata = [] for d in dirs: - procpar, data = nmrglue.bruker.read(self.fid_path+os.path.sep+d) + procpar, data = nmrglue.bruker.read( + self.fid_path + os.path.sep + d + ) alldata.append((procpar, data)) self.alldata = alldata incr = 1 @@ -2127,47 +2415,60 @@ def import_fid(self, arrayset=None): incr += 1 if incr > 1: if arrayset == None: - print('Total of '+str(incr)+' alternating FidArrays found.') - arrayset = input('Which one to import? ') + print( + "Total of " + + str(incr) + + " alternating FidArrays found." + ) + arrayset = input("Which one to import? ") arrayset = int(arrayset) else: arrayset = arrayset if arrayset < 1 or arrayset > incr: - raise ValueError('Select a value between 1 and ' - + str(incr) + '.') + raise ValueError( + "Select a value between 1 and " + str(incr) + "." + ) else: arrayset = 1 self.incr = incr - procpar = alldata[arrayset-1][0] - data = numpy.vstack([d[1] for d in alldata[(arrayset-1)::incr]]) + procpar = alldata[arrayset - 1][0] + data = numpy.vstack( + [d[1] for d in alldata[(arrayset - 1) :: incr]] + ) self.data = data self._procpar = procpar - self._file_format = 'bruker' - self.data = nmrglue.bruker.remove_digital_filter(procpar, self.data) - self._procpar['tdelta'], self._procpar['tcum'],\ - self._procpar['tsingle'] = self._get_time_delta() - self._procpar['arraylength'] = self.data.shape[0] - self._procpar['arrayset'] = arrayset + self._file_format = "bruker" + self.data = nmrglue.bruker.remove_digital_filter( + procpar, self.data + ) + ( + self._procpar["tdelta"], + self._procpar["tcum"], + self._procpar["tsingle"], + ) = self._get_time_delta() + self._procpar["arraylength"] = self.data.shape[0] + self._procpar["arrayset"] = arrayset except FileNotFoundError: - print('fid_path does not specify a valid .fid directory.') + print("fid_path does not specify a valid .fid directory.") except OSError: - print('fid_path does not specify a valid .fid directory.') - + print("fid_path does not specify a valid .fid directory.") + def _get_time_delta(self): td = 0.0 tcum = [] tsingle = [] for i in range(self.incr): - pp = self.alldata[i][0]['acqus'] - sw_hz = pp['SW_h'] - at = pp['TD']/(2*sw_hz) - d1 = pp['D'][1] - nt = pp['NS'] - tot = (at+d1)*nt/60. # convert to mins + pp = self.alldata[i][0]["acqus"] + sw_hz = pp["SW_h"] + at = pp["TD"] / (2 * sw_hz) + d1 = pp["D"][1] + nt = pp["NS"] + tot = (at + d1) * nt / 60.0 # convert to mins td += tot tcum.append(td) tsingle.append(tot) return (td, numpy.array(tcum), numpy.array(tsingle)) -if __name__ == '__main__': + +if __name__ == "__main__": pass diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py new file mode 100644 index 0000000..d866ab2 --- /dev/null +++ b/nmrpy/datamodel/__init__.py @@ -0,0 +1,3 @@ + +__URL__ = "" +__COMMIT__ = "" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py new file mode 100644 index 0000000..bf9ee94 --- /dev/null +++ b/nmrpy/datamodel/core/__init__.py @@ -0,0 +1,33 @@ +from .nmrpy import NMRpy +from .experiment import Experiment +from .fid import FID +from .parameters import Parameters +from .fidarray import FIDArray +from .citation import Citation +from .person import Person +from .publication import Publication +from .cv import CV +from .term import Term +from .fileformats import FileFormats +from .subjects import Subjects +from .publicationtypes import PublicationTypes +from .identifiertypes import IdentifierTypes + +__doc__ = "" + +__all__ = [ + "NMRpy", + "Experiment", + "FID", + "Parameters", + "FIDArray", + "Citation", + "Person", + "Publication", + "CV", + "Term", + "FileFormats", + "Subjects", + "PublicationTypes", + "IdentifierTypes", +] diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py new file mode 100644 index 0000000..dc21126 --- /dev/null +++ b/nmrpy/datamodel/core/citation.py @@ -0,0 +1,225 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + +from typing import Any +from pydantic import AnyUrl + +from .term import Term +from .identifiertypes import IdentifierTypes +from .person import Person +from .publication import Publication +from .subjects import Subjects +from .publicationtypes import PublicationTypes + + +@forge_signature +class Citation(sdRDM.DataModel): + + """Container for various types of metadata primarily used in the publication and citation of the dataset.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("citationINDEX"), + xml="@id", + ) + + title: Optional[str] = Field( + default=None, + description="Title the dataset should have when published.", + ) + + doi: Optional[AnyUrl] = Field( + default=None, + description="DOI pointing to the published dataset", + ) + + description: Optional[str] = Field( + default=None, + description="Description the dataset should have when published.", + ) + + authors: List[Person] = Field( + description="List of authors for this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + subjects: List[Subjects] = Field( + description="List of subjects this dataset belongs to.", + default_factory=ListPlus, + multiple=True, + ) + + keywords: List[Term] = Field( + description="List of CV-based keywords describing the dataset.", + default_factory=ListPlus, + multiple=True, + ) + + topics: List[Term] = Field( + description="List of CV-based topics the dataset addresses.", + default_factory=ListPlus, + multiple=True, + ) + + related_publications: List[Publication] = Field( + description="List of publications relating to this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + notes: Optional[str] = Field( + default=None, + description="Additional notes about the dataset.", + ) + + funding: List[str] = Field( + description="Funding information for this dataset.", + default_factory=ListPlus, + multiple=True, + ) + + license: Optional[str] = Field( + default="CC BY 4.0", + description="License information for this dataset. Defaults to `CC BY 4.0`.", + ) + + def add_to_authors( + self, + last_name: str, + first_name: str, + middle_names: List[str] = ListPlus(), + affiliation: Optional[str] = None, + email: Optional[str] = None, + identifier_type: Optional[IdentifierTypes] = None, + identifier_value: Optional[str] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Person' to attribute authors + + Args: + id (str): Unique identifier of the 'Person' object. Defaults to 'None'. + last_name (): Family name of the person.. + first_name (): Given name of the person.. + middle_names (): List of middle names of the person.. Defaults to ListPlus() + affiliation (): Institution the Person belongs to.. Defaults to None + email (): Email address of the person.. Defaults to None + identifier_type (): Recognized identifier for the person.. Defaults to None + identifier_value (): Value of the identifier for the person.. Defaults to None + """ + + params = { + "last_name": last_name, + "first_name": first_name, + "middle_names": middle_names, + "affiliation": affiliation, + "email": email, + "identifier_type": identifier_type, + "identifier_value": identifier_value, + } + + if id is not None: + params["id"] = id + + self.authors.append(Person(**params)) + + def add_to_keywords( + self, + name: str, + accession: str, + term_cv_reference: Optional[str] = None, + value: Optional[Any] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Term' to attribute keywords + + Args: + id (str): Unique identifier of the 'Term' object. Defaults to 'None'. + name (): The preferred name of the term associated with the given accession number.. + accession (): Accession number of the term in the controlled vocabulary.. + term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None + value (): Value of the term, if applicable.. Defaults to None + """ + + params = { + "name": name, + "accession": accession, + "term_cv_reference": term_cv_reference, + "value": value, + } + + if id is not None: + params["id"] = id + + self.keywords.append(Term(**params)) + + def add_to_topics( + self, + name: str, + accession: str, + term_cv_reference: Optional[str] = None, + value: Optional[Any] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Term' to attribute topics + + Args: + id (str): Unique identifier of the 'Term' object. Defaults to 'None'. + name (): The preferred name of the term associated with the given accession number.. + accession (): Accession number of the term in the controlled vocabulary.. + term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None + value (): Value of the term, if applicable.. Defaults to None + """ + + params = { + "name": name, + "accession": accession, + "term_cv_reference": term_cv_reference, + "value": value, + } + + if id is not None: + params["id"] = id + + self.topics.append(Term(**params)) + + def add_to_related_publications( + self, + type: PublicationTypes, + title: str, + authors: List[Person] = ListPlus(), + year: Optional[int] = None, + doi: Optional[AnyUrl] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Publication' to attribute related_publications + + Args: + id (str): Unique identifier of the 'Publication' object. Defaults to 'None'. + type (): Nature of the publication.. + title (): Title of the publication.. + authors (): Authors of the publication.. Defaults to ListPlus() + year (): Year of publication.. Defaults to None + doi (): The DOI pointing to the publication.. Defaults to None + """ + + params = { + "type": type, + "title": title, + "authors": authors, + "year": year, + "doi": doi, + } + + if id is not None: + params["id"] = id + + self.related_publications.append(Publication(**params)) diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py new file mode 100644 index 0000000..86735eb --- /dev/null +++ b/nmrpy/datamodel/core/cv.py @@ -0,0 +1,33 @@ +import sdRDM + +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import AnyUrl + + +@forge_signature +class CV(sdRDM.DataModel): + + """lorem ipsum""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("cvINDEX"), + xml="@id", + ) + + vocabulary: str = Field( + ..., + description="Name of the CV used.", + ) + + version: str = Field( + ..., + description="Version of the CV used.", + ) + + url: AnyUrl = Field( + ..., + description="URL pointing to the CV used.", + ) diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py new file mode 100644 index 0000000..1f78eca --- /dev/null +++ b/nmrpy/datamodel/core/experiment.py @@ -0,0 +1,65 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .fidarray import FIDArray +from .fid import FID +from .parameters import Parameters + + +@forge_signature +class Experiment(sdRDM.DataModel): + + """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed + Also preparation of EnzymeML doc""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("experimentINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description="A descriptive name for the overarching experiment.", + ) + + fid: List[FID] = Field( + description="A single NMR spectrum.", + default_factory=ListPlus, + multiple=True, + ) + + fid_array: Optional[FIDArray] = Field( + default=None, + description="Multiple NMR spectra to be processed together.", + ) + + def add_to_fid( + self, + data: List[float] = ListPlus(), + parameters: Optional[Parameters] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'FID' to attribute fid + + Args: + id (str): Unique identifier of the 'FID' object. Defaults to 'None'. + data (): Spectral data from numpy array.. Defaults to ListPlus() + parameters (): Contains commonly-used NMR parameters.. Defaults to None + """ + + params = { + "data": data, + "parameters": parameters, + } + + if id is not None: + params["id"] = id + + self.fid.append(FID(**params)) diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fid.py new file mode 100644 index 0000000..143b833 --- /dev/null +++ b/nmrpy/datamodel/core/fid.py @@ -0,0 +1,32 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .parameters import Parameters + + +@forge_signature +class FID(sdRDM.DataModel): + + """Container for a single NMR spectrum.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("fidINDEX"), + xml="@id", + ) + + data: List[float] = Field( + description="Spectral data from numpy array.", + default_factory=ListPlus, + multiple=True, + ) + + parameters: Optional[Parameters] = Field( + default=None, + description="Contains commonly-used NMR parameters.", + ) diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py new file mode 100644 index 0000000..48b4b49 --- /dev/null +++ b/nmrpy/datamodel/core/fidarray.py @@ -0,0 +1,24 @@ +import sdRDM + +from typing import List +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class FIDArray(sdRDM.DataModel): + + """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.}""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("fidarrayINDEX"), + xml="@id", + ) + + fids: List[str] = Field( + description="List of `FID.id` belonging to this array.", + multiple=True, + default_factory=ListPlus, + ) diff --git a/nmrpy/datamodel/core/fileformats.py b/nmrpy/datamodel/core/fileformats.py new file mode 100644 index 0000000..bf80f78 --- /dev/null +++ b/nmrpy/datamodel/core/fileformats.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class FileFormats(Enum): + VARIAN = "varian" + BRUKER = "bruker" + NONE = None diff --git a/nmrpy/datamodel/core/identifiertypes.py b/nmrpy/datamodel/core/identifiertypes.py new file mode 100644 index 0000000..f4bf8fe --- /dev/null +++ b/nmrpy/datamodel/core/identifiertypes.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class IdentifierTypes(Enum): + ORCID = "ORCID" diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py new file mode 100644 index 0000000..4b68b75 --- /dev/null +++ b/nmrpy/datamodel/core/nmrpy.py @@ -0,0 +1,45 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from datetime import datetime + +from .citation import Citation +from .experiment import Experiment + + +@forge_signature +class NMRpy(sdRDM.DataModel): + + """Root element of the NMRpy data model.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("nmrpyINDEX"), + xml="@id", + ) + + datetime_created: datetime = Field( + ..., + description="Date and time this dataset has been created.", + ) + + datetime_modified: Optional[datetime] = Field( + default=None, + description="Date and time this dataset has last been modified.", + ) + + experiment: Optional[Experiment] = Field( + default=None, + description="List of experiments associated with this dataset.", + ) + + citation: Optional[Citation] = Field( + default=None, + description=( + "Relevant information regarding the publication and citation of this" + " dataset." + ), + ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py new file mode 100644 index 0000000..2062e25 --- /dev/null +++ b/nmrpy/datamodel/core/parameters.py @@ -0,0 +1,70 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class Parameters(sdRDM.DataModel): + + """Container for relevant NMR parameters.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("parametersINDEX"), + xml="@id", + ) + + acquisition_time: Optional[float] = Field( + default=None, + description="at", + ) + + relaxation_time: Optional[float] = Field( + default=None, + description="d1", + ) + + repetition_time: Optional[float] = Field( + default=None, + description="rt = at + d1", + ) + + number_of_transients: List[float] = Field( + description="nt", + default_factory=ListPlus, + multiple=True, + ) + + acquisition_times_array: List[float] = Field( + description="acqtime = [nt, 2nt, ..., rt x nt]", + default_factory=ListPlus, + multiple=True, + ) + + spectral_width_ppm: Optional[float] = Field( + default=None, + description="sw", + ) + + spectral_width_hz: Optional[float] = Field( + default=None, + description="sw_hz", + ) + + spectrometer_frequency: Optional[float] = Field( + default=None, + description="sfrq", + ) + + reference_frequency: Optional[float] = Field( + default=None, + description="reffrq", + ) + + spectral_width_left: Optional[float] = Field( + default=None, + description="sw_left", + ) diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py new file mode 100644 index 0000000..dcfbaef --- /dev/null +++ b/nmrpy/datamodel/core/person.py @@ -0,0 +1,57 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .identifiertypes import IdentifierTypes + + +@forge_signature +class Person(sdRDM.DataModel): + + """Container for information regarding a person that worked on an experiment.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("personINDEX"), + xml="@id", + ) + + last_name: str = Field( + ..., + description="Family name of the person.", + ) + + first_name: str = Field( + ..., + description="Given name of the person.", + ) + + middle_names: List[str] = Field( + description="List of middle names of the person.", + default_factory=ListPlus, + multiple=True, + ) + + affiliation: Optional[str] = Field( + default=None, + description="Institution the Person belongs to.", + ) + + email: Optional[str] = Field( + default=None, + description="Email address of the person.", + ) + + identifier_type: Optional[IdentifierTypes] = Field( + default=None, + description="Recognized identifier for the person.", + ) + + identifier_value: Optional[str] = Field( + default=None, + description="Value of the identifier for the person.", + ) diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py new file mode 100644 index 0000000..42025c6 --- /dev/null +++ b/nmrpy/datamodel/core/publication.py @@ -0,0 +1,90 @@ +import sdRDM + +from typing import List, Optional +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import AnyUrl + +from .identifiertypes import IdentifierTypes +from .person import Person +from .publicationtypes import PublicationTypes + + +@forge_signature +class Publication(sdRDM.DataModel): + + """Container for citation information of a relevant publication.""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("publicationINDEX"), + xml="@id", + ) + + type: PublicationTypes = Field( + ..., + description="Nature of the publication.", + ) + + title: str = Field( + ..., + description="Title of the publication.", + ) + + authors: List[Person] = Field( + description="Authors of the publication.", + multiple=True, + default_factory=ListPlus, + ) + + year: Optional[int] = Field( + default=None, + description="Year of publication.", + ) + + doi: Optional[AnyUrl] = Field( + default=None, + description="The DOI pointing to the publication.", + ) + + def add_to_authors( + self, + last_name: str, + first_name: str, + middle_names: List[str] = ListPlus(), + affiliation: Optional[str] = None, + email: Optional[str] = None, + identifier_type: Optional[IdentifierTypes] = None, + identifier_value: Optional[str] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Person' to attribute authors + + Args: + id (str): Unique identifier of the 'Person' object. Defaults to 'None'. + last_name (): Family name of the person.. + first_name (): Given name of the person.. + middle_names (): List of middle names of the person.. Defaults to ListPlus() + affiliation (): Institution the Person belongs to.. Defaults to None + email (): Email address of the person.. Defaults to None + identifier_type (): Recognized identifier for the person.. Defaults to None + identifier_value (): Value of the identifier for the person.. Defaults to None + """ + + params = { + "last_name": last_name, + "first_name": first_name, + "middle_names": middle_names, + "affiliation": affiliation, + "email": email, + "identifier_type": identifier_type, + "identifier_value": identifier_value, + } + + if id is not None: + params["id"] = id + + self.authors.append(Person(**params)) diff --git a/nmrpy/datamodel/core/publicationtypes.py b/nmrpy/datamodel/core/publicationtypes.py new file mode 100644 index 0000000..f5974ef --- /dev/null +++ b/nmrpy/datamodel/core/publicationtypes.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class PublicationTypes(Enum): + ARTICLE = "Journal article" diff --git a/nmrpy/datamodel/core/subjects.py b/nmrpy/datamodel/core/subjects.py new file mode 100644 index 0000000..d343f01 --- /dev/null +++ b/nmrpy/datamodel/core/subjects.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class Subjects(Enum): + BIOLOGY = "Biology" + CHEMISTRY = "Chemistry" + IT = "Computer and Information Science" + PHYSICS = "Physics" diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py new file mode 100644 index 0000000..6a48293 --- /dev/null +++ b/nmrpy/datamodel/core/term.py @@ -0,0 +1,44 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from typing import Any + + +@forge_signature +class Term(sdRDM.DataModel): + + """lorem ipsum {Add reference back to term_cv_reference.}""" + + id: str = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("termINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description=( + "The preferred name of the term associated with the given accession number." + ), + ) + + accession: str = Field( + ..., + description="Accession number of the term in the controlled vocabulary.", + ) + + term_cv_reference: Optional[str] = Field( + default=None, + description=( + "Reference to the `CV.id` of a controlled vocabulary that has been defined" + " for this dataset." + ), + ) + + value: Optional[Any] = Field( + default=None, + description="Value of the term, if applicable.", + ) diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md new file mode 100644 index 0000000..527eb3a --- /dev/null +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -0,0 +1,121 @@ +```mermaid +classDiagram + NMRpy *-- Experiment + NMRpy *-- Citation + Experiment *-- FID + Experiment *-- FIDArray + FID *-- Parameters + Citation *-- Subjects + Citation *-- Person + Citation *-- Publication + Citation *-- Term + Person *-- IdentifierTypes + Publication *-- PublicationTypes + Publication *-- Person + + class NMRpy { + +datetime datetime_created* + +datetime datetime_modified + +Experiment experiment + +Citation citation + } + + class Experiment { + +string name* + +FID[0..*] fid + +FIDArray fid_array + } + + class FID { + +float[0..*] data + +Parameters parameters + } + + class Parameters { + +float acquisition_time + +float relaxation_time + +float repetition_time + +float[0..*] number_of_transients + +float[0..*] acquisition_times_array + +float spectral_width_ppm + +float spectral_width_hz + +float spectrometer_frequency + +float reference_frequency + +float spectral_width_left + } + + class FIDArray { + +string[0..*] fids* + } + + class Citation { + +string title + +URL doi + +string description + +Person[0..*] authors + +Subjects[0..*] subjects + +Term[0..*] keywords + +Term[0..*] topics + +Publication[0..*] related_publications + +string notes + +string[0..*] funding + +string license + } + + class Person { + +string last_name* + +string first_name* + +string[0..*] middle_names + +string affiliation + +string email + +IdentifierTypes identifier_type + +string identifier_value + } + + class Publication { + +PublicationTypes type* + +string title* + +Person[0..*] authors* + +integer year + +URL doi + } + + class CV { + +string vocabulary* + +string version* + +URL url* + } + + class Term { + +string name* + +string accession* + +string term_cv_reference + +any value + } + + class FileFormats { + << Enumeration >> + +VARIAN + +BRUKER + +NONE + } + + class Subjects { + << Enumeration >> + +BIOLOGY + +CHEMISTRY + +IT + +PHYSICS + } + + class PublicationTypes { + << Enumeration >> + +ARTICLE + } + + class IdentifierTypes { + << Enumeration >> + +ORCID + } + +``` \ No newline at end of file diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md new file mode 100644 index 0000000..dd45c34 --- /dev/null +++ b/specifications/nmrpy.md @@ -0,0 +1,279 @@ +# NMRpy data model + +Python object model specifications based on the [software-driven-rdm](https://github.com/JR-1991/software-driven-rdm) Python library. + + +## Core objects + + +### NMRpy + +Root element of the NMRpy data model. + +- __datetime_created__ + - Type: datetime + - Description: Date and time this dataset has been created. +- datetime_modified + - Type: datetime + - Description: Date and time this dataset has last been modified. +- experiment + - Type: [Experiment](#experiment) + - Description: List of experiments associated with this dataset. +- citation + - Type: [Citation](#citation) + - Description: Relevant information regarding the publication and citation of this dataset. + + +### Experiment + +Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed +Also preparation of EnzymeML doc + +- __name__ + - Type: string + - Description: A descriptive name for the overarching experiment. +- fid + - Type: [FID](#fid) + - Description: A single NMR spectrum. + - Multiple: True +- fid_array + - Type: [FIDArray](#fidarray) + - Description: Multiple NMR spectra to be processed together. + + +### FID + +Container for a single NMR spectrum. + +- data + - Type: float + - Description: Spectral data from numpy array. + - Multiple: True +- parameters + - Type: [Parameters](#parameters) + - Description: Contains commonly-used NMR parameters. + + +### Parameters + +Container for relevant NMR parameters. + +- acquisition_time + - Type: float + - Description: at +- relaxation_time + - Type: float + - Description: d1 +- repetition_time + - Type: float + - Description: rt = at + d1 +- number_of_transients + - Type: float + - Description: nt + - Multiple: True +- acquisition_times_array + - Type: float + - Description: acqtime = [nt, 2nt, ..., rt x nt] + - Multiple: True +- spectral_width_ppm + - Type: float + - Description: sw +- spectral_width_hz + - Type: float + - Description: sw_hz +- spectrometer_frequency + - Type: float + - Description: sfrq +- reference_frequency + - Type: float + - Description: reffrq +- spectral_width_left + - Type: float + - Description: sw_left + + +### FIDArray + +Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.} + +- __fids__ + - Type: string + - Description: List of `FID.id` belonging to this array. + - Multiple: True + + +### Citation + +Container for various types of metadata primarily used in the publication and citation of the dataset. + +- title + - Type: string + - Description: Title the dataset should have when published. +- doi + - Type: URL + - Description: DOI pointing to the published dataset +- description + - Type: string + - Description: Description the dataset should have when published. +- authors + - Type: [Person](#person) + - Description: List of authors for this dataset. + - Multiple: True +- subjects + - Type: [Subjects](#subjects) + - Description: List of subjects this dataset belongs to. + - Multiple: True +- keywords + - Type: [Term](#term) + - Description: List of CV-based keywords describing the dataset. + - Multiple: True +- topics + - Type: [Term](#term) + - Description: List of CV-based topics the dataset addresses. + - Multiple: True +- related_publications + - Type: [Publication](#publication) + - Description: List of publications relating to this dataset. + - Multiple: True +- notes + - Type: string + - Description: Additional notes about the dataset. +- funding + - Type: string + - Description: Funding information for this dataset. + - Multiple: True +- license + - Type: string + - Description: License information for this dataset. Defaults to `CC BY 4.0`. + - Default: CC BY 4.0 + + +### Person + +Container for information regarding a person that worked on an experiment. + +- __last_name__ + - Type: string + - Description: Family name of the person. +- __first_name__ + - Type: string + - Description: Given name of the person. +- middle_names + - Type: string + - Description: List of middle names of the person. + - Multiple: True +- affiliation + - Type: string + - Description: Institution the Person belongs to. +- email + - Type: string + - Description: Email address of the person. +- identifier_type + - Type: [IdentifierTypes](#identifiertypes) + - Description: Recognized identifier for the person. +- identifier_value + - Type: string + - Description: Value of the identifier for the person. + + +### Publication + +Container for citation information of a relevant publication. + +- __type__ + - Type: [PublicationTypes](#publicationtypes) + - Description: Nature of the publication. +- __title__ + - Type: string + - Description: Title of the publication. +- __authors__ + - Type: [Person](#person) + - Description: Authors of the publication. + - Multiple: True +- year + - Type: integer + - Description: Year of publication. +- doi + - Type: URL + - Description: The DOI pointing to the publication. + + +## Utility objects + + +### CV + +lorem ipsum + +- __vocabulary__ + - Type: string + - Description: Name of the CV used. +- __version__ + - Type: string + - Description: Version of the CV used. +- __url__ + - Type: URL + - Description: URL pointing to the CV used. + + +### Term + +lorem ipsum {Add reference back to term_cv_reference.} + +- __name__ + - Type: string + - Description: The preferred name of the term associated with the given accession number. +- __accession__ + - Type: string + - Description: Accession number of the term in the controlled vocabulary. +- term_cv_reference + - Type: string + - Description: Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset. +- value + - Type: any + - Description: Value of the term, if applicable. + + + +## Enumerations + + +### FileFormats + +Enumeration containing the file formats accepted by the NMRpy library. + +```python +VARIAN = "varian" +BRUKER = "bruker" +NONE = None +``` + + +### Subjects + +Enumeration containing common subjects (research fields) that implement NMR. + +```python +BIOLOGY = "Biology" +CHEMISTRY = "Chemistry" +IT = "Computer and Information Science" +PHYSICS = "Physics" +``` + + +### PublicationTypes + +Enumeration containing accepted types of publication. + +```python +ARTICLE = "Journal article" +``` + + +### IdentifierTypes + +Enumeration containing recognized identifiers for persons. + +```python +ORCID = "ORCID" +``` From 53ebb279a26dd284f65f043c8a8c4123fd4cb920 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 22 Aug 2023 15:47:25 +0200 Subject: [PATCH 02/54] Minor changes --- links/enzymeml.toml | 4 +- nmrpy/data_objects.py | 23 + nmrpy/datamodel/core/citation.py | 6 +- nmrpy/datamodel/core/experiment.py | 2 +- nmrpy/datamodel/core/publication.py | 2 +- nmrpy/plotting.py | 1395 +++++++++++++++------------ specifications/nmrpy.md | 2 +- 7 files changed, 828 insertions(+), 606 deletions(-) diff --git a/links/enzymeml.toml b/links/enzymeml.toml index a312726..4f2a90f 100644 --- a/links/enzymeml.toml +++ b/links/enzymeml.toml @@ -14,8 +14,8 @@ name = "EnzymeMLDocument.name" doi = "EnzymeMLDocument.doi" ["citation.authors"] -last_name = "EnzymeMLDocument.creators.given_name" -first_name = "EnzymeMLDocument.creators.family_name" +last_name = "EnzymeMLDocument.creators.family_name" +first_name = "EnzymeMLDocument.creators.given_name" email = "EnzymeMLDocument.creators.mail" ["citation.related_publications"] diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index d22822b..496cb65 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1400,6 +1400,19 @@ def __init__(self): datetime_modified=_now, ) del _now + self._force_pyenzyme = False + + @property + def force_pyenzyme(self): + return self._force_pyenzyme + + @force_pyenzyme.setter + def force_pyenzyme(self): + raise PermissionError("Forbidden!") + + @force_pyenzyme.deleter + def force_pyenzyme(self): + raise PermissionError("Forbidden!") @property def data_model(self): @@ -2284,6 +2297,16 @@ def save_to_file(self, filename=None, overwrite=False): def save_data(self, file_format: str, filename=None, overwrite=False): print("~~~ Method under contruction ~~~") + if self.force_pyenzyme: + import pyenzyme as pe + + enzymeml = pe.EnzymeMLDocument( + name=self.data_mode.experiment.name + if hasattr(self.data_model.experiment, "name") + else "NMR experiment" + ) + ... + return 1 if file_format.lower() == ("enzymeml" or "nmrml"): # model = self.data_model.convert_to( # template=Path(__file__).parent.parent / "links/enzymeml.toml" diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index dc21126..799d198 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -5,15 +5,15 @@ from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from typing import Any from pydantic import AnyUrl +from typing import Any from .term import Term -from .identifiertypes import IdentifierTypes from .person import Person -from .publication import Publication from .subjects import Subjects +from .publication import Publication from .publicationtypes import PublicationTypes +from .identifiertypes import IdentifierTypes @forge_signature diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index 1f78eca..3af5c83 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -7,8 +7,8 @@ from .fidarray import FIDArray -from .fid import FID from .parameters import Parameters +from .fid import FID @forge_signature diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index 42025c6..3d34597 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -7,9 +7,9 @@ from pydantic import AnyUrl -from .identifiertypes import IdentifierTypes from .person import Person from .publicationtypes import PublicationTypes +from .identifiertypes import IdentifierTypes @forge_signature diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index de6c047..113397c 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -15,7 +15,8 @@ from IPython.display import display import asyncio -class Plot(): + +class Plot: """ Basic 'plot' class containing functions for various types of plots. """ @@ -24,7 +25,7 @@ class Plot(): def __init__(self): self._time = datetime.now() - self.id = 'plot_{}'.format(Plot._plot_id_num) + self.id = "plot_{}".format(Plot._plot_id_num) Plot._plot_id_num += 1 self.fig = None @@ -45,148 +46,182 @@ def fig(self, fig): if fig is None or isinstance(fig, Figure): self._fig = fig else: - raise TypeError('fig must be of type matplotlib.figure.Figure.') - - def _plot_ppm(self, fid, - upper_ppm=None, - lower_ppm=None, - color='k', - lw=1, - filename=None): + raise TypeError("fig must be of type matplotlib.figure.Figure.") + + def _plot_ppm( + self, + fid, + upper_ppm=None, + lower_ppm=None, + color="k", + lw=1, + filename=None, + ): data = fid.data params = fid._params - ft=fid._flags['ft'] - if not Plot._is_flat_iter(data): - raise AttributeError('data must be flat iterable.') + ft = fid._flags["ft"] + if not Plot._is_flat_iter(data): + raise AttributeError("data must be flat iterable.") if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError('ppm range specified is invalid.') - sw_left = params['sw_left'] - sw = params['sw'] + raise ValueError("ppm range specified is invalid.") + sw_left = params["sw_left"] + sw = params["sw"] if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left-sw + lower_ppm = sw_left - sw - ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] + ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] ppm_bool_index = (ppm < upper_ppm) * (ppm > lower_ppm) ppm = ppm[ppm_bool_index] data = data[ppm_bool_index] - self.fig = plt.figure(figsize=[9,5]) + self.fig = plt.figure(figsize=[9, 5]) ax = self.fig.add_subplot(111) if ft: ax.plot(ppm, data, color=color, lw=lw) ax.invert_xaxis() ax.set_xlim([upper_ppm, lower_ppm]) ax.grid() - ax.set_xlabel('PPM (%.2f MHz)'%(params['reffrq'])) + ax.set_xlabel("PPM (%.2f MHz)" % (params["reffrq"])) elif not ft: - at = params['at']*1000 # ms + at = params["at"] * 1000 # ms t = numpy.linspace(0, at, len(data)) ax.plot(t, data, color=color, lw=lw) ax.set_xlim([0, at]) ax.grid() - ax.set_xlabel('Time (ms)') - #self.fig.show() + ax.set_xlabel("Time (ms)") + # self.fig.show() if filename is not None: - self.fig.savefig(filename, format='pdf') - - def _deconv_generator(self, fid, - upper_ppm=None, - lower_ppm=None, - ): - + self.fig.savefig(filename, format="pdf") + + def _deconv_generator( + self, + fid, + upper_ppm=None, + lower_ppm=None, + ): data = fid.data params = fid._params - if not Plot._is_flat_iter(data): - raise AttributeError('data must be flat iterable.') + if not Plot._is_flat_iter(data): + raise AttributeError("data must be flat iterable.") - peakshapes = fid._f_pks_list(fid._deconvoluted_peaks, numpy.arange(len(data))) + peakshapes = fid._f_pks_list( + fid._deconvoluted_peaks, numpy.arange(len(data)) + ) - if not Plot._is_iter_of_iters(peakshapes): - raise AttributeError('data must be flat iterable.') + if not Plot._is_iter_of_iters(peakshapes): + raise AttributeError("data must be flat iterable.") if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError('ppm range specified is invalid.') - sw_left = params['sw_left'] - sw = params['sw'] + raise ValueError("ppm range specified is invalid.") + sw_left = params["sw_left"] + sw = params["sw"] if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left-sw + lower_ppm = sw_left - sw - ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] + ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] ppm_bool_index = (ppm <= upper_ppm) * (ppm >= lower_ppm) ppm = ppm[ppm_bool_index] data = data[ppm_bool_index] peakshapes = peakshapes[:, ppm_bool_index] summed_peaks = peakshapes.sum(0) - residual = data-summed_peaks - return ppm, data, peakshapes, summed_peaks, residual, upper_ppm, lower_ppm - - def _plot_deconv(self, fid, - upper_ppm=None, - lower_ppm=None, - colour='k', - peak_colour='b', - summed_peak_colour='r', - residual_colour='g', - lw=1): - - #validation takes place in self._deconv_generator - ppm, data, peakshapes, summed_peaks, residual, upper_ppm, \ - lower_ppm = self._deconv_generator(fid, - upper_ppm=upper_ppm, - lower_ppm=lower_ppm) - - self.fig = plt.figure(figsize=[9,5]) + residual = data - summed_peaks + return ( + ppm, + data, + peakshapes, + summed_peaks, + residual, + upper_ppm, + lower_ppm, + ) + + def _plot_deconv( + self, + fid, + upper_ppm=None, + lower_ppm=None, + colour="k", + peak_colour="b", + summed_peak_colour="r", + residual_colour="g", + lw=1, + ): + # validation takes place in self._deconv_generator + ( + ppm, + data, + peakshapes, + summed_peaks, + residual, + upper_ppm, + lower_ppm, + ) = self._deconv_generator( + fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm + ) + + self.fig = plt.figure(figsize=[9, 5]) ax = self.fig.add_subplot(111) ax.plot(ppm, residual, color=residual_colour, lw=lw) ax.plot(ppm, data, color=colour, lw=lw) - ax.plot(ppm, summed_peaks, '--', color=summed_peak_colour, lw=lw) - label_pad = 0.02*peakshapes.max() + ax.plot(ppm, summed_peaks, "--", color=summed_peak_colour, lw=lw) + label_pad = 0.02 * peakshapes.max() for n in range(len(peakshapes)): peak = peakshapes[n] - ax.plot(ppm, peak, '-', color=peak_colour, lw=lw) - ax.text(ppm[numpy.argmax(peak)], label_pad+peak.max(), str(n), ha='center') + ax.plot(ppm, peak, "-", color=peak_colour, lw=lw) + ax.text( + ppm[numpy.argmax(peak)], + label_pad + peak.max(), + str(n), + ha="center", + ) ax.invert_xaxis() ax.set_xlim([upper_ppm, lower_ppm]) ax.grid() - ax.set_xlabel('PPM (%.2f MHz)'%(fid._params['reffrq'])) - - def _plot_deconv_array(self, fids, - upper_index=None, - lower_index=None, - upper_ppm=None, - lower_ppm=None, - data_colour='k', - summed_peak_colour='r', - residual_colour='g', - data_filled=False, - summed_peak_filled=True, - residual_filled=False, - figsize=[9, 6], - lw=0.3, - azim=-90, - elev=20, - filename=None): - + ax.set_xlabel("PPM (%.2f MHz)" % (fid._params["reffrq"])) + + def _plot_deconv_array( + self, + fids, + upper_index=None, + lower_index=None, + upper_ppm=None, + lower_ppm=None, + data_colour="k", + summed_peak_colour="r", + residual_colour="g", + data_filled=False, + summed_peak_filled=True, + residual_filled=False, + figsize=[9, 6], + lw=0.3, + azim=-90, + elev=20, + filename=None, + ): if lower_index is None: lower_index = 0 if upper_index is None: upper_index = len(fids) if lower_index >= upper_index: - raise ValueError('upper_index must exceed lower_index') - fids = fids[lower_index: upper_index] + raise ValueError("upper_index must exceed lower_index") + fids = fids[lower_index:upper_index] generated_deconvs = [] for fid in fids: - generated_deconvs.append(self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm)) - - params = fids[0]._params + generated_deconvs.append( + self._deconv_generator( + fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm + ) + ) + + params = fids[0]._params ppm = generated_deconvs[0][0] data = [i[1] for i in generated_deconvs] peakshapes = [i[2] for i in generated_deconvs] @@ -195,21 +230,23 @@ def _plot_deconv_array(self, fids, upper_ppm = generated_deconvs[0][5] lower_ppm = generated_deconvs[0][6] - plot_data = numpy.array([ - residuals, - data, - summed_peaks, - ]) + plot_data = numpy.array( + [ + residuals, + data, + summed_peaks, + ] + ) colours_list = [ - [residual_colour]*len(residuals), - [data_colour]*len(data), - [summed_peak_colour]*len(summed_peaks), - ] + [residual_colour] * len(residuals), + [data_colour] * len(data), + [summed_peak_colour] * len(summed_peaks), + ] filled_list = [ - residual_filled, - data_filled, - summed_peak_filled, - ] + residual_filled, + data_filled, + summed_peak_filled, + ] xlabel = 'PPM (%.2f MHz)'%(params['reffrq']) ylabel = 'min.' @@ -230,49 +267,48 @@ def _plot_deconv_array(self, fids, elev=elev, ) if filename is not None: - self.fig.savefig(filename, format='pdf') + self.fig.savefig(filename, format="pdf") plt.show() - - - - def _plot_array(self, data, params, - upper_index=None, - lower_index=None, - upper_ppm=None, - lower_ppm=None, - figsize=(9, 6), - lw=0.3, - azim=-90, - elev=20, - filled=False, - show_zticks=False, - labels=None, - colour=True, - filename=None, - ): + def _plot_array( + self, + data, + params, + upper_index=None, + lower_index=None, + upper_ppm=None, + lower_ppm=None, + figsize=(9, 6), + lw=0.3, + azim=-90, + elev=20, + filled=False, + show_zticks=False, + labels=None, + colour=True, + filename=None, + ): if not Plot._is_iter_of_iters(data): - raise AttributeError('data must be 2D.') + raise AttributeError("data must be 2D.") if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError('ppm range specified is invalid.') + raise ValueError("ppm range specified is invalid.") if upper_index is not None and lower_index is not None: if upper_index == lower_index or upper_index < lower_index: - raise ValueError('index range specified is invalid.') - + raise ValueError("index range specified is invalid.") - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] if upper_index is None: upper_index = len(data) if lower_index is None: lower_index = 0 - + if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left-sw + lower_ppm = sw_left - sw if "acqtime_array" in params.keys(): # New NMRpy _params structure @@ -295,20 +331,23 @@ def _plot_array(self, data, params, else: colours_list = None - xlabel = 'PPM (%.2f MHz)'%(params['reffrq']) - ylabel = 'min.' - self.fig = self._generic_array_plot(ppm, minutes, [data], - colours_list=colours_list, - filled_list=[filled], - figsize=figsize, - xlabel=xlabel, - ylabel=ylabel, - lw=lw, - azim=azim, - elev=elev, - ) + xlabel = "PPM (%.2f MHz)" % (params["reffrq"]) + ylabel = "min." + self.fig = self._generic_array_plot( + ppm, + minutes, + [data], + colours_list=colours_list, + filled_list=[filled], + figsize=figsize, + xlabel=xlabel, + ylabel=ylabel, + lw=lw, + azim=azim, + elev=elev, + ) if filename is not None: - self.fig.savefig(filename, format='pdf') + self.fig.savefig(filename, format="pdf") plt.show() @staticmethod @@ -322,21 +361,25 @@ def _interleave_datasets(data): idata.append(data[x][y]) return idata - def _generic_array_plot(self, x, y, zlist, - colours_list=None, - filled_list=None, - upper_lim=None, - lower_lim=None, - lw=0.3, - azim=-90, - elev=20, - figsize=[5,5], - show_zticks=False, - labels=None, - xlabel=None, - ylabel=None, - filename=None, - ): + def _generic_array_plot( + self, + x, + y, + zlist, + colours_list=None, + filled_list=None, + upper_lim=None, + lower_lim=None, + lw=0.3, + azim=-90, + elev=20, + figsize=[5, 5], + show_zticks=False, + labels=None, + xlabel=None, + ylabel=None, + filename=None, + ): """ Generic function for plotting arrayed data on a set of 3D axes. x and y @@ -346,46 +389,44 @@ def _generic_array_plot(self, x, y, zlist, """ - - - if colours_list is None: - colours_list = [['k']*len(y)]*len(zlist) + colours_list = [["k"] * len(y)] * len(zlist) if filled_list is None: - filled_list = [False]*len(zlist) - + filled_list = [False] * len(zlist) fig = plt.figure(figsize=figsize) - ax = fig.add_subplot(111, projection='3d', azim=azim, elev=elev) + ax = fig.add_subplot(111, projection="3d", azim=azim, elev=elev) for data_n in range(len(zlist)): data = zlist[data_n] - bh = abs(data.min()) + bh = abs(data.min()) filled = filled_list[data_n] cl = colours_list[data_n] if not filled: - #spectra are plotted in reverse for zorder + # spectra are plotted in reverse for zorder for n in range(len(data))[::-1]: datum = data[n] clr = cl[n] - ax.plot(x, len(datum)*[y[n]], datum, color=clr, lw=lw) + ax.plot(x, len(datum) * [y[n]], datum, color=clr, lw=lw) if filled: verts = [] - plot_data = data+bh + plot_data = data + bh for datum in plot_data: datum[0], datum[-1] = 0, 0 verts.append(list(zip(x, datum))) - - fclr, eclr = ['w']*len(data), ['k']*len(data) + + fclr, eclr = ["w"] * len(data), ["k"] * len(data) fclr = cl - poly = PolyCollection(verts, + poly = PolyCollection( + verts, facecolors=fclr, edgecolors=eclr, - linewidths=[lw]*len(verts)) - ax.add_collection3d(poly, zs=y, zdir='y') - - ax.set_zlim([0, 1.1*max(numpy.array(zlist).flat)]) + linewidths=[lw] * len(verts), + ) + ax.add_collection3d(poly, zs=y, zdir="y") + + ax.set_zlim([0, 1.1 * max(numpy.array(zlist).flat)]) ax.invert_xaxis() if upper_lim is None: upper_lim = x[0] @@ -398,7 +439,6 @@ def _generic_array_plot(self, x, y, zlist, if not show_zticks: ax.set_zticklabels([]) return fig - @classmethod def _is_iter(cls, i): @@ -424,48 +464,58 @@ def _is_flat_iter(cls, i): return True return False + class Phaser: """Interactive phase-correction widget""" + def __init__(self, fid): - if not Plot._is_flat_iter(fid.data): - raise ValueError('data must be flat iterable.') + if not Plot._is_flat_iter(fid.data): + raise ValueError("data must be flat iterable.") if fid.data is [] or fid.data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") self.fid = fid self.fig = plt.figure(figsize=[9, 6]) self.phases = numpy.array([0.0, 0.0]) self.cum_phases = numpy.array([0.0, 0.0]) self.y = 0.0 self.ax = self.fig.add_subplot(111) - self.ax.plot(self.fid.data, color='k', linewidth=1.0) - self.ax.hlines(0, 0, len(self.fid.data)-1) + self.ax.plot(self.fid.data, color="k", linewidth=1.0) + self.ax.hlines(0, 0, len(self.fid.data) - 1) self.ax.set_xlim([0, len(self.fid.data)]) - xtcks = numpy.linspace(0,1,11)*len(self.fid.data) - xtcks[-1] = xtcks[-1]-1 + xtcks = numpy.linspace(0, 1, 11) * len(self.fid.data) + xtcks[-1] = xtcks[-1] - 1 self.ax.set_xticks(xtcks) - self.ax.set_xlabel('PPM (%.2f MHz)'%(self.fid._params['reffrq'])) - self.ax.set_xticklabels([numpy.round(self.fid._ppm[int(i)], 1) for i in xtcks]) - ylims = numpy.array([-1.6, 1.6])*max(abs(numpy.array(self.ax.get_ylim()))) + self.ax.set_xlabel("PPM (%.2f MHz)" % (self.fid._params["reffrq"])) + self.ax.set_xticklabels( + [numpy.round(self.fid._ppm[int(i)], 1) for i in xtcks] + ) + ylims = numpy.array([-1.6, 1.6]) * max( + abs(numpy.array(self.ax.get_ylim())) + ) self.ax.set_ylim(ylims) self.ax.grid() self.visible = True self.canvas = self.ax.figure.canvas - self.canvas.mpl_connect('motion_notify_event', self.onmove) - self.canvas.mpl_connect('button_press_event', self.press) - self.canvas.mpl_connect('button_release_event', self.release) + self.canvas.mpl_connect("motion_notify_event", self.onmove) + self.canvas.mpl_connect("button_press_event", self.press) + self.canvas.mpl_connect("button_release_event", self.release) self.pressv = None self.buttonDown = False self.prev = (0, 0) - self.ax.text(0.05 *self.ax.get_xlim()[1],0.7 *self.ax.get_ylim()[1],'phasing\nleft - zero-order\nright - first order') - cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) + self.ax.text( + 0.05 * self.ax.get_xlim()[1], + 0.7 * self.ax.get_ylim()[1], + "phasing\nleft - zero-order\nright - first order", + ) + cursor = Cursor(self.ax, useblit=True, color="k", linewidth=0.5) cursor.horizOn = False self.fig.subplots_adjust(bottom=0.13) - self.text1 = self.fig.text(0.12, 0.02, ' ', fontsize='large') + self.text1 = self.fig.text(0.12, 0.02, " ", fontsize="large") plt.show() def press(self, event): tb = plt.get_current_fig_manager().toolbar - if tb.mode == '': + if tb.mode == "": x, y = event.xdata, event.ydata if event.inaxes is not None: self.buttonDown = True @@ -473,7 +523,9 @@ def press(self, event): self.y = y def release(self, event): - self.text1.set_text('cumulative p0: {0:.1f} p1: {1:.1f}'.format(*self.cum_phases)) + self.text1.set_text( + "cumulative p0: {0:.1f} p1: {1:.1f}".format(*self.cum_phases) + ) self.buttonDown = False return False @@ -482,26 +534,27 @@ def onmove(self, event): return x = event.xdata y = event.ydata - dy = y-self.y + dy = y - self.y self.y = y if self.button == 1: - self.phases[0] = 50*dy/self.ax.get_ylim()[1] + self.phases[0] = 50 * dy / self.ax.get_ylim()[1] self.phases[1] = 0.0 if self.button == 3: - self.phases[1] = 50*dy/self.ax.get_ylim()[1] + self.phases[1] = 50 * dy / self.ax.get_ylim()[1] self.phases[0] = 0.0 self.fid.ps(p0=self.phases[0], p1=self.phases[1]) self.cum_phases += self.phases - self.ax.lines[0].set_data(numpy.array([numpy.arange(len(self.fid.data)), self.fid.data])) + self.ax.lines[0].set_data( + numpy.array([numpy.arange(len(self.fid.data)), self.fid.data]) + ) self.canvas.draw() # _idle() return False class BaseSelectorMixin: - def __init__(self): super().__init__() - + def press(self, event): pass @@ -517,17 +570,19 @@ def redraw(self): def change_visible(self): pass -class PolySelectorMixin(BaseSelectorMixin): +class PolySelectorMixin(BaseSelectorMixin): def __init__(self): super().__init__() + class Psm: pass + self.psm = Psm() self.psm.btn_add = 1 self.psm.btn_del = 1 self.psm.btn_cls = 3 - self.psm.key_mod = 'control' + self.psm.key_mod = "control" self.psm.xs = [] self.psm.ys = [] self.psm._xs = [] @@ -544,12 +599,12 @@ class Psm: self.psm._yline = None self.psm.lw = 1 self.blocking = False - if not hasattr(self, 'show_tracedata'): + if not hasattr(self, "show_tracedata"): self.show_tracedata = False def redraw(self): super().redraw() - if hasattr(self, 'psm'): + if hasattr(self, "psm"): for i in self.psm._visual_lines: self.ax.draw_artist(i) if self.psm.line is not None: @@ -559,20 +614,21 @@ def redraw(self): def change_visible(self): super().change_visible() - if hasattr(self, 'psm'): + if hasattr(self, "psm"): for i in self.psm._visual_lines: i.set_visible(not i.get_visible()) if self.psm.line is not None: self.psm.line.set_visible(not self.psm.line.get_visible()) - def makepoly(self, + def makepoly( + self, xs=None, ys=None, lw=1, - colour='r', - ms='+', - ls='-', - ): + colour="r", + ms="+", + ls="-", + ): if xs is not None and ys is not None: return self.ax.plot( xs, @@ -581,99 +637,113 @@ def makepoly(self, color=colour, marker=ms, ls=ls, - ) - + ) + def press(self, event): super().press(event) - if self.check_mode() != '': + if self.check_mode() != "": return if event.xdata is None or event.ydata is None: return if event.button == self.psm.btn_add and event.key != self.psm.key_mod: - self.psm.xs.append(event.xdata) - self.psm.ys.append(event.ydata) + self.psm.xs.append(event.xdata) + self.psm.ys.append(event.ydata) + if self.show_tracedata: + self.psm._xs, self.psm._ys = self.get_line_ydata( + self.psm.xs, self.psm.ys + ) + if self.psm.line is None: + (self.psm.line,) = self.makepoly( + self.psm.xs, + self.psm.ys, + lw=self.psm.lw, + ) + self.blocking = True if self.show_tracedata: - self.psm._xs, self.psm._ys = self.get_line_ydata(self.psm.xs, self.psm.ys) - if self.psm.line is None: - self.psm.line, = self.makepoly( - self.psm.xs, - self.psm.ys, + (self.psm._yline,) = self.makepoly( + self.psm._xs, + self.psm._ys, lw=self.psm.lw, - ) - self.blocking = True - if self.show_tracedata: - self.psm._yline, = self.makepoly( - self.psm._xs, - self.psm._ys, - lw=self.psm.lw, - ms='+', - ls='-', - colour='r', - ) - else: - self.psm.line.set_data(self.psm.xs, self.psm.ys) - if self.show_tracedata: - self.psm._yline.set_data(self.psm._xs, self.psm._ys) - elif event.button == self.psm.btn_del and event.key == self.psm.key_mod: + ms="+", + ls="-", + colour="r", + ) + else: + self.psm.line.set_data(self.psm.xs, self.psm.ys) + if self.show_tracedata: + self.psm._yline.set_data(self.psm._xs, self.psm._ys) + elif ( + event.button == self.psm.btn_del and event.key == self.psm.key_mod + ): if len(self.psm._visual_lines) > 0: x = event.xdata y = event.ydata - #trace_dist = [[i[0]-x, i[1]-y] for i in self.psm.lines] - trace_dist = [[i[0]-x] for i in self.psm.lines] - #delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2+i[1]**2)) - delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2)) for i in trace_dist]) + # trace_dist = [[i[0]-x, i[1]-y] for i in self.psm.lines] + trace_dist = [[i[0] - x] for i in self.psm.lines] + # delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2+i[1]**2)) + delete_trace = numpy.argmin( + [min(numpy.sqrt(i[0] ** 2)) for i in trace_dist] + ) self.psm.lines.pop(delete_trace) self.psm.data_lines.pop(delete_trace) trace = self.psm._visual_lines.pop(delete_trace) trace.remove() elif event.button == self.psm.btn_cls and self.psm.line is not None: if len(self.psm.xs) > 1: - self.psm._visual_lines.append(self.makepoly( - self.psm.xs, - self.psm.ys, + self.psm._visual_lines.append( + self.makepoly( + self.psm.xs, + self.psm.ys, lw=self.psm.lw, - colour='b', - )[0]) + colour="b", + )[0] + ) self.psm.lines.append(numpy.array([self.psm.xs, self.psm.ys])) self.psm.xs, self.psm.ys = [], [] self.psm.line.remove() self.psm.line = None self.psm._yline.remove() self.psm._yline = None - self.psm.data_lines.append(self.get_polygon_neighbours_data(self.psm.lines[-1])) - self.psm.index_lines.append(self.get_polygon_neighbours_indices(self.psm.lines[-1])) + self.psm.data_lines.append( + self.get_polygon_neighbours_data(self.psm.lines[-1]) + ) + self.psm.index_lines.append( + self.get_polygon_neighbours_indices(self.psm.lines[-1]) + ) self.blocking = False else: self.psm.xs, self.psm.ys = [], [] self.psm.line = None - #self.redraw() - + # self.redraw() + def onmove(self, event): super().onmove(event) self.psm._x = event.xdata self.psm._y = event.ydata if self.psm.line is not None: - xs = self.psm.xs+[self.psm._x] - ys = self.psm.ys+[self.psm._y] + xs = self.psm.xs + [self.psm._x] + ys = self.psm.ys + [self.psm._y] self.psm.line.set_data(xs, ys) if self.show_tracedata: current_x_ydata = self.get_line_ydata( - [self.psm.xs[-1]]+[self.psm._x], - [self.psm.ys[-1]]+[self.psm._y], - ) + [self.psm.xs[-1]] + [self.psm._x], + [self.psm.ys[-1]] + [self.psm._y], + ) self.psm._yline.set_data( - self.psm._xs+current_x_ydata[0], - self.psm._ys+current_x_ydata[1], - ) + self.psm._xs + current_x_ydata[0], + self.psm._ys + current_x_ydata[1], + ) def get_line_ydata(self, xs, ys): xdata = [] ydata = [] - for i in range(len(xs)-1): - current_xy_data = self.get_polygon_neighbours_data([ - xs[i:i+2], - ys[i:i+2], - ]) + for i in range(len(xs) - 1): + current_xy_data = self.get_polygon_neighbours_data( + [ + xs[i : i + 2], + ys[i : i + 2], + ] + ) xdata += current_xy_data[0] ydata += current_xy_data[1] return xdata, ydata @@ -685,12 +755,17 @@ def get_polygon_neighbours_data(self, line): """ line_xs = [] line_ys = [] - for i in range(len(line[0])-1): - x1, y1, x2, y2 = line[0][i], line[1][i], line[0][i+1], line[1][i+1] + for i in range(len(line[0]) - 1): + x1, y1, x2, y2 = ( + line[0][i], + line[1][i], + line[0][i + 1], + line[1][i + 1], + ) x, y, x_index, y_index = self.get_neighbours([x1, x2], [y1, y2]) if x is not None and y is not None: - line_xs = line_xs+list(x) - line_ys = line_ys+list(y) + line_xs = line_xs + list(x) + line_ys = line_ys + list(y) return [line_xs, line_ys] def get_polygon_neighbours_indices(self, line): @@ -700,14 +775,19 @@ def get_polygon_neighbours_indices(self, line): """ line_xs = [] line_ys = [] - for i in range(len(line[0])-1): - x1, y1, x2, y2 = line[0][i], line[1][i], line[0][i+1], line[1][i+1] + for i in range(len(line[0]) - 1): + x1, y1, x2, y2 = ( + line[0][i], + line[1][i], + line[0][i + 1], + line[1][i + 1], + ) x, y, x_index, y_index = self.get_neighbours([x1, x2], [y1, y2]) if x_index is not None and y_index is not None: - line_xs = line_xs+list(x_index) - line_ys = line_ys+list(y_index) + line_xs = line_xs + list(x_index) + line_ys = line_ys + list(y_index) return [line_xs, line_ys] - + def get_neighbours(self, xs, ys): """ For a pair of coordinates (xs = [x1, x2], ys = [y1, y2]), return the @@ -719,7 +799,7 @@ def get_neighbours(self, xs, ys): if True not in ymask: return None, None, None, None y_lo = ymask.index(True) - y_hi = len(ymask)-ymask[::-1].index(True) + y_hi = len(ymask) - ymask[::-1].index(True) x_neighbours = [] y_neighbours = [] y_indices = [i for i in range(y_lo, y_hi)] @@ -727,13 +807,13 @@ def get_neighbours(self, xs, ys): y_indices = y_indices[::-1] x_indices = [] for i in y_indices: - x = [self.ppm[0], self.ppm[-1], xs[0], xs[1]] - y = [self.y_indices[i], self.y_indices[i], ys[0], ys[1]] + x = [self.ppm[0], self.ppm[-1], xs[0], xs[1]] + y = [self.y_indices[i], self.y_indices[i], ys[0], ys[1]] x, y = self.get_intersection(x, y) - x = numpy.argmin(abs(self.ppm[::-1]-x)) + x = numpy.argmin(abs(self.ppm[::-1] - x)) x_indices.append(x) x_neighbours.append(self.ppm[::-1][x]) - y_neighbours.append(self.data[i][x]+self.y_indices[i]) + y_neighbours.append(self.data[i][x] + self.y_indices[i]) return x_neighbours, y_neighbours, x_indices, y_indices @staticmethod @@ -745,46 +825,54 @@ def get_intersection(x, y): and [x4, y4] represent the other. See https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Given_two_points_on_each_line """ - px = (((x[0]*y[1]-y[0]*x[1])*(x[2]-x[3])-(x[0]-x[1])*(x[2]*y[3]-y[2]*x[3]))/((x[0]-x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]-x[3]))) - py = (((x[0]*y[1]-y[0]*x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]*y[3]-y[2]*x[3]))/((x[0]-x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]-x[3]))) + px = ( + (x[0] * y[1] - y[0] * x[1]) * (x[2] - x[3]) + - (x[0] - x[1]) * (x[2] * y[3] - y[2] * x[3]) + ) / ((x[0] - x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] - x[3])) + py = ( + (x[0] * y[1] - y[0] * x[1]) * (y[2] - y[3]) + - (y[0] - y[1]) * (x[2] * y[3] - y[2] * x[3]) + ) / ((x[0] - x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] - x[3])) return px, py -class LineSelectorMixin(BaseSelectorMixin): +class LineSelectorMixin(BaseSelectorMixin): def __init__(self): super().__init__() + class Lsm: pass + self.lsm = Lsm() self.lsm.btn_add = 1 self.lsm.btn_del = 1 - self.lsm.key_mod = 'control' + self.lsm.key_mod = "control" self.lsm.peaklines = {} self.lsm.peaks = [] for x in self.peaks: self.lsm.peaks.append(x) self.lsm.peaklines[x] = self.makeline(x) - #self.ax.draw_artist(self.lsm.peaklines[x]) + # self.ax.draw_artist(self.lsm.peaklines[x]) self.lsm.peaks = sorted(self.lsm.peaks)[::-1] - + def makeline(self, x): return self.ax.plot( - [x, x], + [x, x], self.ylims, - color='#CC0000', + color="#CC0000", lw=1, - #animated=True - )[0] + # animated=True + )[0] def redraw(self): super().redraw() - if hasattr(self, 'lsm'): + if hasattr(self, "lsm"): for i, j in self.lsm.peaklines.items(): self.ax.draw_artist(j) def change_visible(self): super().change_visible() - if hasattr(self, 'lsm'): + if hasattr(self, "lsm"): for i, j in self.lsm.peaklines.items(): j.set_visible(True) j.set_visible(not j.get_visible()) @@ -793,30 +881,37 @@ def press(self, event): super().press(event) x = numpy.round(event.xdata, 2) # left - if event.button == self.lsm.btn_add and \ - event.key != self.lsm.key_mod and \ - (x >= self.xlims[1]) and (x <= self.xlims[0]): + if ( + event.button == self.lsm.btn_add + and event.key != self.lsm.key_mod + and (x >= self.xlims[1]) + and (x <= self.xlims[0]) + ): with self.out: - print('peak {}'.format(x)) + print("peak {}".format(x)) if x not in self.lsm.peaks: self.lsm.peaks.append(x) self.lsm.peaklines[x] = self.makeline(x) self.lsm.peaks = sorted(self.lsm.peaks)[::-1] - #self.ax.draw_artist(self.lsm.peaklines[x]) - #Ctrl+left - elif event.button == self.lsm.btn_del and event.key == self.lsm.key_mod: - #find and delete nearest peakline + # self.ax.draw_artist(self.lsm.peaklines[x]) + # Ctrl+left + elif ( + event.button == self.lsm.btn_del and event.key == self.lsm.key_mod + ): + # find and delete nearest peakline if len(self.lsm.peaks) > 0: - delete_peak = numpy.argmin([abs(i-x) for i in self.lsm.peaks]) + delete_peak = numpy.argmin( + [abs(i - x) for i in self.lsm.peaks] + ) old_peak = self.lsm.peaks.pop(delete_peak) - try: + try: peakline = self.lsm.peaklines.pop(old_peak) peakline.remove() except: with self.out: - print('Could not remove peakline') + print("Could not remove peakline") self.canvas.draw() - #self.redraw() + # self.redraw() def release(self, event): super().release(event) @@ -826,59 +921,62 @@ def onmove(self, event): class SpanSelectorMixin(BaseSelectorMixin): - def __init__(self): super().__init__() + class Ssm: pass + self.ssm = Ssm() self.ssm.btn_add = 3 self.ssm.btn_del = 3 - self.ssm.key_mod = 'control' + self.ssm.key_mod = "control" self.ssm.minspan = 0 self.ssm.rect = None self.ssm.rangespans = [] - self.ssm.rectprops = dict(facecolor='0.5', alpha=0.2) + self.ssm.rectprops = dict(facecolor="0.5", alpha=0.2) self.ssm.ranges = self.ranges for rng in self.ssm.ranges: - self.ssm.rangespans.append(self.makespan(rng[1], rng[0]-rng[1])) + self.ssm.rangespans.append(self.makespan(rng[1], rng[0] - rng[1])) self.redraw() - trans = blended_transform_factory( - self.ax.transData, - self.ax.transAxes) + trans = blended_transform_factory(self.ax.transData, self.ax.transAxes) w, h = 0, 1 - self.ssm.rect = Rectangle([0, 0], w, h, - transform=trans, - visible=False, - animated=True, - **self.ssm.rectprops - ) + self.ssm.rect = Rectangle( + [0, 0], + w, + h, + transform=trans, + visible=False, + animated=True, + **self.ssm.rectprops + ) self.ax.add_patch(self.ssm.rect) def makespan(self, left, width): - trans = blended_transform_factory( - self.ax.transData, - self.ax.transAxes) + trans = blended_transform_factory(self.ax.transData, self.ax.transAxes) bottom, top = self.ylims - height = top-bottom - rect = Rectangle([left, bottom], width, height, - transform=trans, - visible=True, - #animated=True, - **self.ssm.rectprops - ) + height = top - bottom + rect = Rectangle( + [left, bottom], + width, + height, + transform=trans, + visible=True, + # animated=True, + **self.ssm.rectprops + ) self.ax.add_patch(rect) return rect def redraw(self): super().redraw() - if hasattr(self, 'ssm'): + if hasattr(self, "ssm"): for i in self.ssm.rangespans: self.ax.draw_artist(i) def change_visible(self): super().change_visible() - if hasattr(self, 'ssm'): + if hasattr(self, "ssm"): for i in self.ssm.rangespans: i.set_visible(not i.get_visible()) @@ -889,14 +987,19 @@ def press(self, event): if event.button == self.ssm.btn_add and event.key != self.ssm.key_mod: self.buttonDown = True self.pressv = event.xdata - elif event.button == self.ssm.btn_add and event.key == self.ssm.key_mod: - #find and delete range + elif ( + event.button == self.ssm.btn_add and event.key == self.ssm.key_mod + ): + # find and delete range if len(self.ssm.ranges) > 0: x = event.xdata rng = 0 while rng < len(self.ssm.ranges): - if x >= (self.ssm.ranges[rng])[1] and x <= (self.ssm.ranges[rng])[0]: - self.ssm.ranges.pop(rng) + if ( + x >= (self.ssm.ranges[rng])[1] + and x <= (self.ssm.ranges[rng])[0] + ): + self.ssm.ranges.pop(rng) rangespan = self.ssm.rangespans.pop(rng) rangespan.remove() break @@ -913,20 +1016,21 @@ def release(self, event): span = vmax - vmin self.pressv = None spantest = False - #if len(self.ssm.ranges) > 0: + # if len(self.ssm.ranges) > 0: # for i in self.ssm.ranges: # if (vmin >= i[1]) and (vmin <= i[0]): # spantest = True # if (vmax >= i[1]) and (vmax <= i[0]): # spantest = True if span > self.ssm.minspan and spantest is False: - self.ssm.ranges.append([numpy.round(vmin, 2), numpy.round(vmax, 2)]) + self.ssm.ranges.append( + [numpy.round(vmin, 2), numpy.round(vmax, 2)] + ) self.ssm.rangespans.append(self.makespan(vmin, span)) with self.out: - print('range {} -> {}'.format(vmax, vmin)) + print("range {} -> {}".format(vmax, vmin)) self.ssm.ranges = [numpy.sort(i)[::-1] for i in self.ssm.ranges] - def onmove(self, event): super().onmove(event) if self.pressv is None or self.buttonDown is False: @@ -936,40 +1040,46 @@ def onmove(self, event): v = x minv, maxv = v, self.pressv if minv > maxv: - minv, maxv = maxv, minv + minv, maxv = maxv, minv vmin = self.pressv vmax = event.xdata # or self.prev[0] if vmin > vmax: - vmin, vmax = vmax, vmin + vmin, vmax = vmax, vmin self.ssm.rect.set_visible(self.visible) self.ssm.rect.set_xy([minv, self.ssm.rect.xy[1]]) - self.ssm.rect.set_width(maxv-minv) + self.ssm.rect.set_width(maxv - minv) self.ax.draw_artist(self.ssm.rect) -class PeakSelectorMixin(BaseSelectorMixin): +class PeakSelectorMixin(BaseSelectorMixin): def __init__(self): super().__init__() + class Psm: pass + self.psm = Psm() self.psm.btn_add = 1 self.psm.peak = None self.psm.newx = None - + def makeline(self, x): return self.ax.plot( - [x, x], + [x, x], self.ylims, - color='#CC0000', + color="#CC0000", lw=1, - )[0] + )[0] def press(self, event): super().press(event) x = numpy.round(event.xdata, 2) # left - if event.button == self.psm.btn_add and (x >= self.xlims[1]) and (x <= self.xlims[0]): + if ( + event.button == self.psm.btn_add + and (x >= self.xlims[1]) + and (x <= self.xlims[0]) + ): self.psm.peak = x self.makeline(x) self.process() @@ -979,55 +1089,60 @@ def release(self, event): def onmove(self, event): super().onmove(event) - + def process(self): pass - -class AssignMixin(BaseSelectorMixin): + +class AssignMixin(BaseSelectorMixin): def __init__(self): super().__init__() + class Am: pass + self.am = Am() self.am.btn_assign = 3 - self.am.key_mod1 = 'ctrl+alt' - self.am.key_mod2 = 'alt+control' + self.am.key_mod1 = "ctrl+alt" + self.am.key_mod2 = "alt+control" def press(self, event): super().press(event) - if event.button == self.am.btn_assign and (event.key == self.am.key_mod1 \ - or event.key == self.am.key_mod2): + if event.button == self.am.btn_assign and ( + event.key == self.am.key_mod1 or event.key == self.am.key_mod2 + ): with self.out: - print('assigned peaks and ranges') - self.assign() + print("assigned peaks and ranges") + self.assign() def assign(self): pass - -class DataSelector(): + + +class DataSelector: """ Interactive selector widget. can inherit from various mixins for functionality: Line selection: :class:`~nmrpy.plotting.LineSelectorMixin` Span selection: :class:`~nmrpy.plotting.SpanSelectorMixin` Poly selection: :class:`~nmrpy.plotting.PolySelectorMixin` - + This class is not intended to be used without inheriting at least one mixin. """ - def __init__(self, - data, - params, - extra_data=None, - extra_data_colour='k', - peaks=None, - ranges=None, - title=None, - voff=0.001, - label=None, - ): + def __init__( + self, + data, + params, + extra_data=None, + extra_data_colour="k", + peaks=None, + ranges=None, + title=None, + voff=0.001, + label=None, + ): if not Plot._is_iter(data): - raise AttributeError('data must be iterable.') + raise AttributeError("data must be iterable.") self.data = numpy.array(data) self.extra_data = extra_data self.extra_data_colour = extra_data_colour @@ -1050,19 +1165,25 @@ def __init__(self, self.pressv = None self.buttonDown = False self.prev = (0, 0) - self.blocking = False - #self.canvas.restore_region(self.background) - super().__init__() #calling parent init - #self.canvas.blit(self.ax.bbox) - - self.cidmotion = self.canvas.mpl_connect('motion_notify_event', self.onmove) - self.cidpress = self.canvas.mpl_connect('button_press_event', self.press) - self.cidrelease = self.canvas.mpl_connect('button_release_event', self.release) - self.ciddraw = self.canvas.mpl_connect('draw_event', self.on_draw) - #cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) - #cursor.horizOn = False + self.blocking = False + # self.canvas.restore_region(self.background) + super().__init__() # calling parent init + # self.canvas.blit(self.ax.bbox) + + self.cidmotion = self.canvas.mpl_connect( + "motion_notify_event", self.onmove + ) + self.cidpress = self.canvas.mpl_connect( + "button_press_event", self.press + ) + self.cidrelease = self.canvas.mpl_connect( + "button_release_event", self.release + ) + self.ciddraw = self.canvas.mpl_connect("draw_event", self.on_draw) + # cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) + # cursor.horizOn = False # self.canvas.draw() - #self.redraw() + # self.redraw() # plt.show() def disconnect(self): @@ -1074,9 +1195,9 @@ def disconnect(self): def _isnotebook(self): try: shell = get_ipython().__class__.__name__ - if shell == 'ZMQInteractiveShell': + if shell == "ZMQInteractiveShell": return True # Jupyter notebook or qtconsole - elif shell == 'TerminalInteractiveShell': + elif shell == "TerminalInteractiveShell": return False # Terminal running IPython else: return False # Other type (?) @@ -1089,16 +1210,21 @@ def _make_basic_fig(self, *args, **kwargs): self.ax = self.fig.add_subplot(111) if len(self.data.shape) == 1: self.ppm = numpy.mgrid[ - self.params['sw_left'] - - self.params['sw'] : self.params['sw_left'] : complex(self.data.shape[0]) + self.params["sw_left"] + - self.params["sw"] : self.params["sw_left"] : complex( + self.data.shape[0] + ) ] # extra_data if self.extra_data is not None: self.ax.plot( - self.ppm[::-1], self.extra_data, color=self.extra_data_colour, lw=1 + self.ppm[::-1], + self.extra_data, + color=self.extra_data_colour, + lw=1, ) # data - self.ax.plot(self.ppm[::-1], self.data, color='k', lw=1) + self.ax.plot(self.ppm[::-1], self.data, color="k", lw=1) elif len(self.data.shape) == 2: cl = dict( zip( @@ -1107,14 +1233,20 @@ def _make_basic_fig(self, *args, **kwargs): ) ) self.ppm = numpy.mgrid[ - self.params['sw_left'] - - self.params['sw'] : self.params['sw_left'] : complex(self.data.shape[1]) + self.params["sw_left"] + - self.params["sw"] : self.params["sw_left"] : complex( + self.data.shape[1] + ) ] - self.y_indices = numpy.arange(len(self.data)) * self.voff * self.data.max() + self.y_indices = ( + numpy.arange(len(self.data)) * self.voff * self.data.max() + ) # this is reversed for zorder # extra_data if self.extra_data is not None: - for i, j in zip(range(len(self.extra_data))[::-1], self.extra_data[::-1]): + for i, j in zip( + range(len(self.extra_data))[::-1], self.extra_data[::-1] + ): self.ax.plot( self.ppm[::-1], j + self.y_indices[i], @@ -1123,8 +1255,10 @@ def _make_basic_fig(self, *args, **kwargs): ) # data for i, j in zip(range(len(self.data))[::-1], self.data[::-1]): - self.ax.plot(self.ppm[::-1], j + self.y_indices[i], color=cl[i], lw=1) - self.ax.set_xlabel('ppm') + self.ax.plot( + self.ppm[::-1], j + self.y_indices[i], color=cl[i], lw=1 + ) + self.ax.set_xlabel("ppm") self.ylims = numpy.array(self.ax.get_ylim()) # numpy.array([self.ax.get_ylim()[0], self.data.max() + abs(self.ax.get_ylim()[0])]) # self.ax.set_ylim(self.ylims)#self.ax.get_ylim()[0], self.data.max()*1.1]) @@ -1132,7 +1266,11 @@ def _make_basic_fig(self, *args, **kwargs): self.xlims = [self.ppm[-1], self.ppm[0]] self.ax.set_xlim(self.xlims) self.fig.suptitle(self.title, size=20) - self.ax.text(0.95 * self.ax.get_xlim()[0], 0.7 * self.ax.get_ylim()[1], self.label) + self.ax.text( + 0.95 * self.ax.get_xlim()[0], + 0.7 * self.ax.get_ylim()[1], + self.label, + ) self.ax.set_ylim(self.ylims) self.canvas = self.ax.figure.canvas # self.canvas.draw() @@ -1160,7 +1298,7 @@ def on_zoom(self, event): def press(self, event): tb = plt.get_current_fig_manager().toolbar - if tb.mode == '' and event.xdata is not None: + if tb.mode == "" and event.xdata is not None: x = numpy.round(event.xdata, 2) self.canvas.restore_region(self.background) try: @@ -1168,7 +1306,7 @@ def press(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def release(self, event): if self.pressv is None or not self.buttonDown: @@ -1180,7 +1318,7 @@ def release(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def onmove(self, event): if event.inaxes is None: @@ -1193,87 +1331,105 @@ def onmove(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def make_invisible(self): try: - super().make_invisible() + super().make_invisible() except Exception as e: logging.error(traceback.format_exc()) def make_visible(self): try: - super().make_visible() + super().make_visible() except Exception as e: logging.error(traceback.format_exc()) def redraw(self): try: - super().redraw() + super().redraw() except Exception as e: logging.error(traceback.format_exc()) - + def change_visible(self): try: - super().change_visible() + super().change_visible() except Exception as e: logging.error(traceback.format_exc()) + class IntegralDataSelector(DataSelector, PolySelectorMixin, AssignMixin): show_tracedata = True -class PeakTraceDataSelector(DataSelector, PolySelectorMixin, SpanSelectorMixin, AssignMixin): + +class PeakTraceDataSelector( + DataSelector, PolySelectorMixin, SpanSelectorMixin, AssignMixin +): show_tracedata = True -class LineSpanDataSelector(DataSelector, LineSelectorMixin, SpanSelectorMixin, AssignMixin): + +class LineSpanDataSelector( + DataSelector, LineSelectorMixin, SpanSelectorMixin, AssignMixin +): pass + class PeakDataSelector(DataSelector, PeakSelectorMixin): pass - + + class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): pass + class DataTraceSelector: """ Interactive data-selection widget with traces and ranges. Traces are saved - as self.data_traces (WRT data) and self.index_traces (WRT index). + as self.data_traces (WRT data) and self.index_traces (WRT index). """ - def __init__(self, fid_array, - extra_data=None, - extra_data_colour='b', - voff=1e-3, - lw=1, - label=None, - ): + + def __init__( + self, + fid_array, + extra_data=None, + extra_data_colour="b", + voff=1e-3, + lw=1, + label=None, + ): self.fid_array = fid_array if fid_array.data is [] or fid_array.data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") data = fid_array.data params = fid_array._params - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] + + ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] - self.integral_selector = IntegralDataSelector( - extra_data, - params, - extra_data=data, - extra_data_colour=extra_data_colour, - peaks=None, - ranges=None, - title='Integral trace selector', - voff=voff, - label=label) + extra_data, + params, + extra_data=data, + extra_data_colour=extra_data_colour, + peaks=None, + ranges=None, + title="Integral trace selector", + voff=voff, + label=label, + ) self.integral_selector.assign = self.assign - + def assign(self): data_traces = self.integral_selector.psm.data_lines index_traces = self.integral_selector.psm.index_lines - - self.fid_array._data_traces = [dict(zip(i[1], i[0])) for i in data_traces] - self.fid_array._index_traces = [dict(zip(i[1], i[0])) for i in index_traces] + + self.fid_array._data_traces = [ + dict(zip(i[1], i[0])) for i in data_traces + ] + self.fid_array._index_traces = [ + dict(zip(i[1], i[0])) for i in index_traces + ] decon_peaks = [] for i in self.fid_array._deconvoluted_peaks: @@ -1288,11 +1444,11 @@ def assign(self): integrals = {} for fid, indx in trace.items(): try: - integrals[fid] = numpy.argmin(abs(decon_peaks[fid]-indx)) + integrals[fid] = numpy.argmin(abs(decon_peaks[fid] - indx)) except: integrals[fid] = None trace_dict[t] = integrals - last_fid = (len(self.fid_array.get_fids())-1) + last_fid = len(self.fid_array.get_fids()) - 1 for i in trace_dict: tmin = min(trace_dict[i]) tminval = trace_dict[i][tmin] @@ -1302,97 +1458,113 @@ def assign(self): tmax = max(trace_dict[i]) tmaxval = trace_dict[i][tmax] if tmax < last_fid: - for j in range(tmax, last_fid+1): + for j in range(tmax, last_fid + 1): trace_dict[i][j] = tmaxval self.fid_array.integral_traces = trace_dict plt.close(self.integral_selector.fig) + class DataTraceRangeSelector: """ Interactive data-selection widget with traces and ranges. Traces are saved as self.data_traces (WRT data) and self.index_traces (WRT index). Spans are saves as self.spans. """ - def __init__(self, fid_array, - peaks=None, - ranges=None, - voff=1e-3, - lw=1, - label=None, - ): + + def __init__( + self, + fid_array, + peaks=None, + ranges=None, + voff=1e-3, + lw=1, + label=None, + ): self.fid_array = fid_array if fid_array.data is [] or fid_array.data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") data = fid_array.data params = fid_array._params - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] + + ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] - self.peak_selector = PeakTraceDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title='Peak and range trace selector', - voff=voff, - label=label) + data, + params, + peaks=peaks, + ranges=ranges, + title="Peak and range trace selector", + voff=voff, + label=label, + ) self.peak_selector.assign = self.assign def assign(self): data_traces = self.peak_selector.psm.data_lines index_traces = self.peak_selector.psm.index_lines spans = self.peak_selector.ssm.ranges - - traces = [[i[0], j[1]] for i, j in zip(data_traces, index_traces)] + + traces = [[i[0], j[1]] for i, j in zip(data_traces, index_traces)] self.fid_array.traces = traces - self.fid_array._trace_mask = self.fid_array._generate_trace_mask(traces) + self.fid_array._trace_mask = self.fid_array._generate_trace_mask( + traces + ) self.fid_array._set_all_peaks_ranges_from_traces_and_spans( - traces, spans) + traces, spans + ) plt.close(self.peak_selector.fig) + class DataPeakSelector: """ Interactive data-selection widget with lines and ranges for a single Fid. Lines and spans are saved as self.peaks, self.ranges. """ - def __init__(self, fid, - peaks=None, - ranges=None, - voff=1e-3, - lw=1, - label=None, - title=None, - ): + + def __init__( + self, + fid, + peaks=None, + ranges=None, + voff=1e-3, + lw=1, + label=None, + title=None, + ): self.fid = fid if fid.data is [] or fid.data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") data = fid.data params = fid._params - sw_left = params['sw_left'] - sw = params['sw'] - ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] + sw_left = params["sw_left"] + sw = params["sw"] + ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] if fid.peaks is not None: peaks = list(fid.peaks) if fid.ranges is not None: - ranges = list(fid.ranges) - + ranges = list(fid.ranges) + self.peak_selector = LineSpanDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title=title, - voff=voff, - label=label) + data, + params, + peaks=peaks, + ranges=ranges, + title=title, + voff=voff, + label=label, + ) self.peak_selector.assign = self.assign - + def assign(self): - if len(self.peak_selector.ssm.ranges) > 0 and len(self.peak_selector.lsm.peaks) > 0: + if ( + len(self.peak_selector.ssm.ranges) > 0 + and len(self.peak_selector.lsm.peaks) > 0 + ): self.fid.ranges = self.peak_selector.ssm.ranges peaks = [] for peak in self.peak_selector.lsm.peaks: @@ -1405,17 +1577,21 @@ def assign(self): self.fid.ranges = None plt.close(self.peak_selector.fig) + class DataPeakRangeSelector: """Interactive data-selection widget with lines and ranges. Lines and spans are saved as self.peaks, self.ranges.""" - def __init__(self, fid_array, - peaks=None, - ranges=None, - y_indices=None, - aoti=True, - voff=1e-3, - lw=1, - label=None, - ): + + def __init__( + self, + fid_array, + peaks=None, + ranges=None, + y_indices=None, + aoti=True, + voff=1e-3, + lw=1, + label=None, + ): self.fid_array = fid_array self.fids = fid_array.get_fids() self.assign_only_to_index = aoti @@ -1426,30 +1602,31 @@ def __init__(self, fid_array, else: self.fid_number = range(len(self.fids)) if fid_array.data is [] or fid_array.data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") data = fid_array.data if y_indices is not None: data = fid_array.data[numpy.array(self.fid_number)] params = fid_array._params - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] + + ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] - self.peak_selector = LineSpanDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title='Peak and range selector', - voff=voff, - label=label) + data, + params, + peaks=peaks, + ranges=ranges, + title="Peak and range selector", + voff=voff, + label=label, + ) self.peak_selector.assign = self.assign - + def assign(self): self.peaks = self.peak_selector.lsm.peaks self.ranges = self.peak_selector.ssm.ranges - + if len(self.ranges) > 0 and len(self.peaks) > 0: ranges = self.ranges peaks = [] @@ -1465,80 +1642,96 @@ def assign(self): for fid in [self.fids[i] for i in self.fid_number]: fid.peaks = peaks fid.ranges = ranges - else: + else: for fid in self.fids: fid.peaks = peaks fid.ranges = ranges plt.close(self.peak_selector.fig) - + + class Calibrator: """ Interactive data-selection widget for calibrating PPM of a spectrum. """ - def __init__(self, fid, - lw=1, - label=None, - title=None, - ): + + def __init__( + self, + fid, + lw=1, + label=None, + title=None, + ): self.fid = fid if fid.data is [] or fid.data is None: - raise ValueError('data must exist.') - if not fid._flags['ft']: - raise ValueError('Only Fourier-transformed data can be calibrated.') + raise ValueError("data must exist.") + if not fid._flags["ft"]: + raise ValueError( + "Only Fourier-transformed data can be calibrated." + ) data = fid.data params = fid._params - sw_left = params['sw_left'] + sw_left = params["sw_left"] self.sw_left = sw_left - sw = params['sw'] - ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] + sw = params["sw"] + ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] self.peak_selector = PeakDataSelector( - data, - params, - title=title, - label=label) + data, params, title=title, label=label + ) self.peak_selector.process = self.process - - self.textinput = FloatText(value=0.0, description='New PPM:', - disabled=False, continuous_update=False) - + + self.textinput = FloatText( + value=0.0, + description="New PPM:", + disabled=False, + continuous_update=False, + ) + def _wait_for_change(self, widget, value): future = asyncio.Future() + def getvalue(change): # make the new value available future.set_result(change.new) widget.unobserve(getvalue, value) + widget.observe(getvalue, value) return future - + def process(self): peak = self.peak_selector.psm.peak self.peak_selector.out.clear_output() with self.peak_selector.out: - print('current peak ppm: {}'.format(peak)) + print("current peak ppm: {}".format(peak)) display(self.textinput) + async def f(): - newx = await self._wait_for_change(self.textinput, 'value') + newx = await self._wait_for_change(self.textinput, "value") offset = newx - peak - self.fid._params['sw_left'] = self.sw_left + offset + self.fid._params["sw_left"] = self.sw_left + offset with self.peak_selector.out: - print('calibration done.') + print("calibration done.") plt.close(self.peak_selector.fig) + asyncio.ensure_future(f()) + class RangeCalibrator: """ Interactive data-selection widget for calibrating PPM of an array of spectra. """ - def __init__(self, fid_array, - y_indices=None, - aoti=True, - voff=1e-3, - lw=1, - label=None, - ): + + def __init__( + self, + fid_array, + y_indices=None, + aoti=True, + voff=1e-3, + lw=1, + label=None, + ): self.fid_array = fid_array self.fids = fid_array.get_fids() self.assign_only_to_index = aoti @@ -1549,94 +1742,100 @@ def __init__(self, fid_array, else: self.fid_number = range(len(self.fids)) if fid_array.data is [] or fid_array.data is None: - raise ValueError('data must exist.') - if any (not fid._flags['ft'] for fid in self.fids): - raise ValueError('Only Fourier-transformed data can be calibrated.') + raise ValueError("data must exist.") + if any(not fid._flags["ft"] for fid in self.fids): + raise ValueError( + "Only Fourier-transformed data can be calibrated." + ) data = fid_array.data if y_indices is not None: data = fid_array.data[numpy.array(self.fid_number)] params = fid_array._params - sw_left = params['sw_left'] + sw_left = params["sw_left"] self.sw_left = sw_left - sw = params['sw'] - ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] + sw = params["sw"] + ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] self.peak_selector = PeakDataSelector( - data, - params, - title='FidArray calibration', - voff = voff, - label=label) + data, params, title="FidArray calibration", voff=voff, label=label + ) self.peak_selector.process = self.process - - self.textinput = FloatText(value=0.0, description='New PPM:', - disabled=False, continuous_update=False) - + + self.textinput = FloatText( + value=0.0, + description="New PPM:", + disabled=False, + continuous_update=False, + ) + def _wait_for_change(self, widget, value): future = asyncio.Future() + def getvalue(change): # make the new value available future.set_result(change.new) widget.unobserve(getvalue, value) + widget.observe(getvalue, value) return future - + def process(self): peak = self.peak_selector.psm.peak self.peak_selector.out.clear_output() with self.peak_selector.out: - print('current peak ppm: {}'.format(peak)) + print("current peak ppm: {}".format(peak)) display(self.textinput) + async def f(): - newx = await self._wait_for_change(self.textinput, 'value') + newx = await self._wait_for_change(self.textinput, "value") offset = newx - peak self._applycalibration(offset) with self.peak_selector.out: - print('calibration done.') + print("calibration done.") plt.close(self.peak_selector.fig) + asyncio.ensure_future(f()) def _applycalibration(self, offset): - self.fid_array._params['sw_left'] = self.sw_left + offset - + self.fid_array._params["sw_left"] = self.sw_left + offset + if self.assign_only_to_index: for fid in [self.fids[i] for i in self.fid_number]: - fid._params['sw_left'] = self.sw_left + offset - else: + fid._params["sw_left"] = self.sw_left + offset + else: for fid in self.fids: - fid._params['sw_left'] = self.sw_left + offset + fid._params["sw_left"] = self.sw_left + offset + class FidArrayRangeSelector: """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" - def __init__(self, - fid_array, - ranges=None, - y_indices=None, - voff=1e-3, - lw=1, - title=None, - label=None, - ): + + def __init__( + self, + fid_array, + ranges=None, + y_indices=None, + voff=1e-3, + lw=1, + title=None, + label=None, + ): self.fid_array = fid_array self.fids = fid_array.get_fids() data = fid_array.data params = fid_array._params if data is [] or data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") if y_indices is not None: data = data[numpy.array(y_indices)] - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] + + ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] - self.span_selector = SpanDataSelector( - data, - params, - ranges=ranges, - title=title, - voff=voff, - label=label) + data, params, ranges=ranges, title=title, voff=voff, label=label + ) self.span_selector.assign = self.assign def assign(self): @@ -1651,36 +1850,35 @@ def assign(self): fid._bl_ppm = bl_ppm plt.close(self.span_selector.fig) + class FidRangeSelector: """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" - def __init__(self, - fid, - title=None, - ranges=None, - y_indices=None, - voff=1e-3, - lw=1, - label=None, - ): - self.fid=fid + + def __init__( + self, + fid, + title=None, + ranges=None, + y_indices=None, + voff=1e-3, + lw=1, + label=None, + ): + self.fid = fid data = fid.data params = fid._params if data is [] or data is None: - raise ValueError('data must exist.') + raise ValueError("data must exist.") if y_indices is not None: data = data[numpy.array(y_indices)] - sw_left = params['sw_left'] - sw = params['sw'] + sw_left = params["sw_left"] + sw = params["sw"] + + self.ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] - self.ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] - self.span_selector = SpanDataSelector( - data, - params, - ranges=ranges, - title=title, - voff=voff, - label=label) + data, params, ranges=ranges, title=title, voff=voff, label=label + ) self.span_selector.assign = self.assign def assign(self): @@ -1694,5 +1892,6 @@ def assign(self): self.fid._bl_ppm = bl_ppm plt.close(self.span_selector.fig) -if __name__ == '__main__': + +if __name__ == "__main__": pass diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index dd45c34..97d15e9 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -94,7 +94,7 @@ Container for relevant NMR parameters. ### FIDArray -Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.} +Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5} - __fids__ - Type: string From 85d3a4e79ee72be5ece1cb25e82360d7e86baed7 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 5 Sep 2023 10:08:53 +0200 Subject: [PATCH 03/54] Implement IdentityAssigner for one FID object --- nmrpy/data_objects.py | 36 +++++++++++++++ nmrpy/plotting.py | 105 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 138 insertions(+), 3 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 496cb65..b64a975 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -263,6 +263,7 @@ def __init__(self, *args, **kwargs): self.data = kwargs.get("data", []) self.peaks = None self.ranges = None + self.identities = None self._deconvoluted_peaks = None self._flags = { "ft": False, @@ -353,6 +354,24 @@ def ranges(self, ranges): raise AttributeError("ranges must be numbers") self._ranges = ranges + @property + def identities(self): + """ + Assigned identities corresponding to the various ranges in :attr:`~nmrpy.data_objects.Fid.ranges`. + """ + return self._identitites + + @identities.setter + def identities(self, identities): + if identities is not None: + if not Fid._is_flat_iter(identities): + raise AttributeError("identitites must be a flat iterable") + if not all(isinstance(i, str) for i in identities): + raise AttributeError("identities must be strings") + self._identitites = numpy.array(identities) + else: + self._identities = identities + @property def _bl_ppm(self): return self.__bl_ppm @@ -1376,6 +1395,23 @@ def plot_deconv(self, **kwargs): setattr(self, plt.id, plt) pyplot.show() + def assign_identities(self): + """ + Instantiate a identity-assignment GUI widget. Select a range from dropdown menu containing + :attr:`~nmrpy.data_objects.Fid.ranges`. Select a species from second dropdown menu + containing species defined in EnzymeML. When satisfied with assignment, press Assign button + to apply. + """ + + widget_title = "Assign identitiy for {}".format(self.id) + self._assigner_widget = IdentityAssigner(fid=self, title=widget_title) + + def clear_identities(self): + """ + Clear assigned identities stored in :attr:`~nmrpy.data_objects.Fid.identities`. + """ + self.identities = None + class FidArray(Base): """ diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 113397c..4beb471 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -11,7 +11,7 @@ from matplotlib.widgets import Cursor from matplotlib.backend_bases import NavigationToolbar2, Event -from ipywidgets import FloatText, Output, VBox +from ipywidgets import FloatText, Output, VBox, Dropdown, Label, Button from IPython.display import display import asyncio @@ -948,7 +948,7 @@ class Ssm: transform=trans, visible=False, animated=True, - **self.ssm.rectprops + **self.ssm.rectprops, ) self.ax.add_patch(self.ssm.rect) @@ -963,7 +963,7 @@ def makespan(self, left, width): transform=trans, visible=True, # animated=True, - **self.ssm.rectprops + **self.ssm.rectprops, ) self.ax.add_patch(rect) return rect @@ -1382,6 +1382,105 @@ class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): pass +class IdentityAssigner: + def __init__(self, fid, title): + self.fid = fid + self.title = title + self.selected_values = {} + if fid.data is [] or fid.data is None: + raise ValueError("data must exist.") + if fid.peaks is [] or fid.peaks is None: + raise RuntimeError( + f"`fid.peaks` are required but still empty. Please either assign them manually or using the `peakpicker` method." + ) + + # Create the label widget for the title + title_label = Label(value=title) + + # Create the dropdown widget for the peaks + peak_dropdown = Dropdown( + options=[str(peak) for peak in fid.peaks], + description="Select a peak:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + # Create the dropdown widget for the species + species_dropdown = Dropdown( + options=[], + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + # Create the button to save selection to dict + save_button = Button( + description="Save selection", icon="file-arrow-down", disabled=True + ) + + # Create an output widget to display the selection + selection_output = Output() + + # Define a method to handle the peak dropdown's change event + def on_peak_dropdown_change(event): + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option != "": + species_dropdown.options = [ + "3PG", + "2PG", + "Phosphate", + "TEP", + "PEP", + ] + species_dropdown.disabled = False + save_button.disabled = False + + # Attach the function to the dropdown's change event + peak_dropdown.observe(on_peak_dropdown_change) + + # Define a method to handle the species dropdown's change event + def on_species_dropdown_change(event): + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option != "": + new_key = peak_dropdown.value + self.selected_values[new_key] = selected_option + + # Attach the function to the second dropdown's change event + species_dropdown.observe(on_species_dropdown_change) + + # Define a function to handle the save button click event + def on_save_button_click(b): + with selection_output: + selection_output.clear_output(wait=True) + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") + + # Attach the function to the save button's click event + save_button.on_click(on_save_button_click) + + # Create a container for both the title and the dropdown + container = VBox( + [ + title_label, + peak_dropdown, + species_dropdown, + save_button, + selection_output, + ] + ) + + # Display the container + display(container) + + +class IdentityRangeAssigner: + ... + + class DataTraceSelector: """ Interactive data-selection widget with traces and ranges. Traces are saved From 97df96f20b8926daa9403f6522d40fef47e22525 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:23:44 +0200 Subject: [PATCH 04/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index b321a1d..a4610c2 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -17,13 +17,13 @@ jobs: python-version: 3.9 - name: Install sdRDM - run: python3 -m pip install git+https://github.com/JR-1991/software-driven-rdm.git@20-lowest-level-elements-in-xml-cannot-have-attributes-and-content + run: python3 -m pip install git+https://github.com/JR-1991/software-driven-rdm.git@linking-refactor - name: Generate API env: URL: ${{github.repositoryUrl}} COMMIT: ${{github.sha}} - run: sdrdm generate --path ./specifications/ --out . --name "$LIB_NAME" --url "$URL" --commit "$COMMIT" + run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name datamodel --url "$URL" --commit "$COMMIT" - name: Push source code run: | From 29606d329a14a2cfee42b683f3fe3bf6df52a44c Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:26:25 +0200 Subject: [PATCH 05/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index a4610c2..a59907f 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -23,7 +23,7 @@ jobs: env: URL: ${{github.repositoryUrl}} COMMIT: ${{github.sha}} - run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name datamodel --url "$URL" --commit "$COMMIT" + run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name "datamodel" --url "$URL" --commit "$COMMIT" - name: Push source code run: | From 158381310ca34cf579c8d8e995c44df668cf82c9 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:28:39 +0200 Subject: [PATCH 06/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index a59907f..4615156 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -28,7 +28,7 @@ jobs: - name: Push source code run: | if [[ `git status --porcelain` ]]; then - git add "$LIB_NAME" + git add "datamodel" git config --global user.name 'sdRDM Bot' git config --global user.email 'sdRDM@bot.com' git commit -am "API update" From a87f2f610b5d0bf9c26ffbdded4e97a6a1aa6591 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:31:18 +0200 Subject: [PATCH 07/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index 4615156..e7bccdd 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -6,7 +6,7 @@ jobs: generate-api: runs-on: ubuntu-latest env: - LIB_NAME: ${{ secrets.LIB_NAME }} + LIB_NAME: datamodel steps: - name: Checkout uses: actions/checkout@v2 @@ -23,12 +23,12 @@ jobs: env: URL: ${{github.repositoryUrl}} COMMIT: ${{github.sha}} - run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name "datamodel" --url "$URL" --commit "$COMMIT" + run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name "LIB_NAME" --url "$URL" --commit "$COMMIT" - name: Push source code run: | if [[ `git status --porcelain` ]]; then - git add "datamodel" + git add "$LIB_NAME" git config --global user.name 'sdRDM Bot' git config --global user.email 'sdRDM@bot.com' git commit -am "API update" From f164b24b1ba5fbbfffc24af6113723020aa47be0 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:34:17 +0200 Subject: [PATCH 08/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index e7bccdd..59da890 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -6,7 +6,7 @@ jobs: generate-api: runs-on: ubuntu-latest env: - LIB_NAME: datamodel + LIB_NAME: ${{secrets.LIB_NAME}} steps: - name: Checkout uses: actions/checkout@v2 From 188c3650f8d21f1f7fcfa971688ee804d231c5e8 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 6 Sep 2023 15:43:12 +0200 Subject: [PATCH 09/54] Implement IdentityAssigner for entire FIDArray --- nmrpy/data_objects.py | 44 ++++++++++--- nmrpy/plotting.py | 142 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 173 insertions(+), 13 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index b64a975..c2056cd 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -364,8 +364,8 @@ def identities(self): @identities.setter def identities(self, identities): if identities is not None: - if not Fid._is_flat_iter(identities): - raise AttributeError("identitites must be a flat iterable") + # if not Fid._is_flat_iter(identities): + # raise AttributeError("identitites must be a flat iterable") if not all(isinstance(i, str) for i in identities): raise AttributeError("identities must be strings") self._identitites = numpy.array(identities) @@ -925,6 +925,12 @@ def clear_ranges(self): """ self.ranges = None + def clear_identitites(self): + """ + Clear identities stored in :attr:`~nmrpy.data_objects.Fid.identities`. + """ + self.identities = None + def baseliner(self): """ Instantiate a baseline-correction GUI widget. Right-click-dragging @@ -1397,18 +1403,20 @@ def plot_deconv(self, **kwargs): def assign_identities(self): """ - Instantiate a identity-assignment GUI widget. Select a range from dropdown menu containing - :attr:`~nmrpy.data_objects.Fid.ranges`. Select a species from second dropdown menu - containing species defined in EnzymeML. When satisfied with assignment, press Assign button - to apply. + Instantiate a identity-assignment GUI widget. Select peaks from + dropdown menu containing :attr:`~nmrpy.data_objects.Fid.peaks`. + Attach a species to the selected peak from second dropdown menu + containing species defined in EnzymeML. When satisfied with + assignment, press Assign button to apply. """ - widget_title = "Assign identitiy for {}".format(self.id) + widget_title = "Assign identities for {}".format(self.id) self._assigner_widget = IdentityAssigner(fid=self, title=widget_title) def clear_identities(self): """ - Clear assigned identities stored in :attr:`~nmrpy.data_objects.Fid.identities`. + Clear assigned identities stored in + :attr:`~nmrpy.data_objects.Fid.identities`. """ self.identities = None @@ -2382,6 +2390,26 @@ def save_data(self, file_format: str, filename=None, overwrite=False): with open(filename, "w") as f: f.write(model) + def assign_identities(self): + """ + Instantiate a identity-assignment GUI widget. Select a FID by + its ID from the combobox. Select peaks from dropdown menu + containing :attr:`~nmrpy.data_objects.Fid.peaks`. Attach a + species to the selected peak from second dropdown menu + containing species defined in EnzymeML. When satisfied with + assignment, press Assign button to apply. + """ + + self._assigner_widget = IdentityRangeAssigner(fid_array=self) + + def clear_identities(self): + """ + Clear assigned identities stored in + :attr:`~nmrpy.data_objects.Fid.identities`. + """ + for fid in self.get_fids(): + fid.identities = None + class Importer(Base): def __init__(self, *args, **kwargs): diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 4beb471..b368b6e 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -11,7 +11,15 @@ from matplotlib.widgets import Cursor from matplotlib.backend_bases import NavigationToolbar2, Event -from ipywidgets import FloatText, Output, VBox, Dropdown, Label, Button +from ipywidgets import ( + FloatText, + Output, + VBox, + Dropdown, + Label, + Button, + Combobox, +) from IPython.display import display import asyncio @@ -1387,11 +1395,9 @@ def __init__(self, fid, title): self.fid = fid self.title = title self.selected_values = {} - if fid.data is [] or fid.data is None: - raise ValueError("data must exist.") if fid.peaks is [] or fid.peaks is None: raise RuntimeError( - f"`fid.peaks` are required but still empty. Please either assign them manually or using the `peakpicker` method." + f"`fid.peaks` is required but still empty. Please either assign them manually or using the `peakpicker` method." ) # Create the label widget for the title @@ -1458,6 +1464,9 @@ def on_save_button_click(b): print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") + self.fid.identities = [ + value for value in self.selected_values.values() + ] # Attach the function to the save button's click event save_button.on_click(on_save_button_click) @@ -1478,7 +1487,130 @@ def on_save_button_click(b): class IdentityRangeAssigner: - ... + """Wow, such documentation. + for fid in [self.fids[i] for i in self.fid_number]: + """ + + def __init__(self, fid_array): + self.fid_array = fid_array + self.fids = fid_array.get_fids() + self.selected_fid = None + self.selected_values = {} + for fid in self.fids: + if fid.peaks is [] or fid.peaks is None: + raise RuntimeError( + f"`fid.peaks` is required but still empty. Please either assign them manually or using the `peakpicker` method." + ) + + # Create the label widget for the title + title_label = Label(value="Assign identities for all FIDs") + + # Create the combobox for the selection of the FID ID + combobox = Combobox( + options=[fid.id for fid in self.fids], + description="Select FID to base entire array on:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + # Create the dropdown widget for the peaks + peak_dropdown = Dropdown( + options=[], + description="Select a peak:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + # Create the dropdown widget for the species + species_dropdown = Dropdown( + options=[], + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + # Create the button to save selection to dict + save_button = Button( + description="Save selection", icon="file-arrow-down", disabled=True + ) + + # Create an output widget to display the selection + selection_output = Output() + + # Define a method to handle selection in combobox + def on_combobox_change(event): + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option in combobox.options: + peak_dropdown.disabled = False + self.selected_fid = self.fid_array.get_fid(selected_option) + peak_dropdown.options = [ + str(peak) for peak in self.selected_fid.peaks + ] + + # Attach the method to the combobox's change event: + combobox.observe(on_combobox_change) + + # Define a method to handle the peak dropdown's change event + def on_peak_dropdown_change(event): + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option != "": + species_dropdown.options = [ + "3PG", + "2PG", + "Phosphate", + "TEP", + "PEP", + ] + species_dropdown.disabled = False + save_button.disabled = False + + # Attach the method to the dropdown's change event + peak_dropdown.observe(on_peak_dropdown_change) + + # Define a method to handle the species dropdown's change event + def on_species_dropdown_change(event): + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option != "": + new_key = peak_dropdown.value + self.selected_values[new_key] = selected_option + + # Attach the function to the second dropdown's change event + species_dropdown.observe(on_species_dropdown_change) + + # Define a function to handle the save button click event + def on_save_button_click(b): + with selection_output: + selection_output.clear_output(wait=True) + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") + for fid in self.fids: + fid.identities = [ + value for value in self.selected_values.values() + ] + + # Attach the function to the save button's click event + save_button.on_click(on_save_button_click) + + # Create a container for both the title and the dropdown + container = VBox( + [ + title_label, + combobox, + peak_dropdown, + species_dropdown, + save_button, + selection_output, + ] + ) + + # Display the container + display(container) class DataTraceSelector: From 0bfbfe47bfe5b00a039d1bf9ed9bd0bbf803e7ad Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 26 Sep 2023 01:35:26 +0200 Subject: [PATCH 10/54] Update data model --- nmrpy/data_objects.py | 141 +++++++++++------- nmrpy/datamodel/core/__init__.py | 16 ++ nmrpy/datamodel/core/abstractspecies.py | 71 +++++++++ nmrpy/datamodel/core/citation.py | 19 ++- nmrpy/datamodel/core/complexdatapoint.py | 32 ++++ nmrpy/datamodel/core/cv.py | 4 +- nmrpy/datamodel/core/datatypes.py | 10 ++ nmrpy/datamodel/core/experiment.py | 32 ++-- nmrpy/datamodel/core/fid.py | 81 +++++++++- nmrpy/datamodel/core/fidarray.py | 7 +- nmrpy/datamodel/core/identity.py | 51 +++++++ nmrpy/datamodel/core/nmrpy.py | 11 +- nmrpy/datamodel/core/parameters.py | 3 +- nmrpy/datamodel/core/person.py | 3 +- nmrpy/datamodel/core/processingsteps.py | 81 ++++++++++ nmrpy/datamodel/core/protein.py | 57 +++++++ nmrpy/datamodel/core/publication.py | 7 +- nmrpy/datamodel/core/reactant.py | 49 +++++++ nmrpy/datamodel/core/sboterm.py | 35 +++++ nmrpy/datamodel/core/term.py | 3 +- nmrpy/datamodel/core/vessel.py | 52 +++++++ nmrpy/datamodel/schemes/datamodel_schema.md | 155 +++++++++++++++++++- nmrpy/plotting.py | 76 ++++++---- specifications/nmrpy.md | 99 ++++++++++++- 24 files changed, 963 insertions(+), 132 deletions(-) create mode 100644 nmrpy/datamodel/core/abstractspecies.py create mode 100644 nmrpy/datamodel/core/complexdatapoint.py create mode 100644 nmrpy/datamodel/core/datatypes.py create mode 100644 nmrpy/datamodel/core/identity.py create mode 100644 nmrpy/datamodel/core/processingsteps.py create mode 100644 nmrpy/datamodel/core/protein.py create mode 100644 nmrpy/datamodel/core/reactant.py create mode 100644 nmrpy/datamodel/core/sboterm.py create mode 100644 nmrpy/datamodel/core/vessel.py diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index c2056cd..670d810 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -12,6 +12,7 @@ import pickle from ipywidgets import SelectMultiple from sdRDM import DataModel +from nmrpy.datamodel.core import * class Base: @@ -29,17 +30,17 @@ def __init__(self, *args, **kwargs): self._params = None self.fid_path = kwargs.get("fid_path", ".") self._file_format = None - # self.parameters_object = self.lib.Parameters() - @property - def lib(self): - try: - self.__lib - except: - self.__lib = DataModel.from_markdown( - path=Path(__file__).parent.parent / "specifications" - ) - return self.__lib + # Probably not required anymore + # @property + # def lib(self): + # try: + # self.__lib + # except: + # self.__lib = DataModel.from_markdown( + # path=Path(__file__).parent.parent / "specifications" + # ) + # return self.__lib # @property # def parameters_object(self): @@ -120,24 +121,6 @@ def _procpar(self, procpar): elif isinstance(procpar, dict): self.__procpar = procpar self._params = self._extract_procpar(procpar) - # self.parameters_object( - # acquisition_time=self._params.get("at"), - # relaxation_time=self._params.get("d1"), - # repetition_time=self._params.get("rt"), - # spectral_width_ppm=self._params.get("sw"), - # spectral_width_hz=self._params.get("sw_hz"), - # spectrometer_frequency=self._params.get("sfrq"), - # reference_frequency=self._params.get("reffrq"), - # spectral_width_left=self._params.get("sw_left"), - # ) - # for _ in self._params.get("nt"): - # if type(_) is not None: - # self.fid_object.parameters.number_of_transients.append(_) - # for _ in self._params.get("acqtime"): - # if type(_) is not None: - # self.fid_object.parameters.acquisition_times_array.append( - # _ - # ) else: raise AttributeError("procpar must be a dictionary or None.") @@ -259,7 +242,6 @@ class Fid(Base): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fid_object = self.lib.FID() self.data = kwargs.get("data", []) self.peaks = None self.ranges = None @@ -268,6 +250,13 @@ def __init__(self, *args, **kwargs): self._flags = { "ft": False, } + self.fid_object = FID( + raw_data=[(str(datum)) for datum in self.data], + processed_data=[], + nmr_parameters=Parameters(), + processing_steps=ProcessingSteps(), + peak_identities=[], + ) def __str__(self): return "FID: %s (%i data)" % (self.id, len(self.data)) @@ -281,6 +270,14 @@ def fid_object(self, fid_object): if isinstance(fid_object, DataModel): self.__fid_object = fid_object + @property + def processing_steps(self): + return self.__processing_steps + + @processing_steps.setter + def processing_steps(self, processing_steps): + raise PermissionError("Forbidden!") + @property def data(self): """ @@ -292,9 +289,6 @@ def data(self): def data(self, data): if Fid._is_valid_dataset(data): self.__data = numpy.array(data) - # for _ in self.__data: - # if type(_) is not None: - # self.fid_object.data.append(float(_)) @property def _ppm(self): @@ -357,18 +351,21 @@ def ranges(self, ranges): @property def identities(self): """ - Assigned identities corresponding to the various ranges in :attr:`~nmrpy.data_objects.Fid.ranges`. + Assigned identities corresponding to the various peaks in :attr:`~nmrpy.data_objects.Fid.peaks`. """ - return self._identitites + return self._identities @identities.setter def identities(self, identities): + if identities is None: + self._identities = None + return if identities is not None: # if not Fid._is_flat_iter(identities): # raise AttributeError("identitites must be a flat iterable") if not all(isinstance(i, str) for i in identities): raise AttributeError("identities must be strings") - self._identitites = numpy.array(identities) + self._identities = numpy.array(identities) else: self._identities = identities @@ -605,8 +602,8 @@ def zf(self): """ self.data = numpy.append(self.data, 0 * self.data) - for _ in self.data: - self.fid_object.data.append(float(_)) + self.fid_object.processed_data = [str(datum) for datum in self.data] + self.fid_object.processing_steps.is_zero_filled = True def emhz(self, lb=5.0): """ @@ -625,16 +622,17 @@ def emhz(self, lb=5.0): ) * self.data ) - for _ in self.data: - self.fid_object.data.append(float(_)) + self.fid_object.processed_data = [str(datum) for datum in self.data] + self.fid_object.processing_steps.is_apodised = True + self.fid_object.processing_steps.apodisation_frequency = lb def real(self): """ Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. """ self.data = numpy.real(self.data) - for _ in self.data: - self.fid_object.data.append(float(_)) + self.fid_object.processed_data = [float(datum) for datum in self.data] + self.fid_object.processing_steps.is_only_real = True # GENERAL FUNCTIONS def ft(self): @@ -652,9 +650,10 @@ def ft(self): if Fid._is_valid_dataset(self.data): list_params = (self.data, self._file_format) self.data = Fid._ft(list_params) - for _ in self.data: - self.fid_object.data.append(float(_)) self._flags["ft"] = True + self.fid_object.processed_data = [str(datum) for datum in self.data] + self.fid_object.processing_steps.is_fourier_transformed = True + self.fid_object.processing_steps.fourier_transform_type = "FFT" @classmethod def _ft(cls, list_params): @@ -724,7 +723,11 @@ def phase_correct(self, method='leastsq', verbose = True): L-BFGS-B (l-bfgs-b) - Newton-CG (newton) + Conjugate Gradient (cg) + + Powell (powell) + + Newton-CG (newton) :keyword verbose: prints out phase angles if True (default) """ @@ -735,6 +738,8 @@ def phase_correct(self, method='leastsq', verbose = True): if verbose: print('phasing: %s'%self.id) self.data = Fid._phase_correct((self.data, method, verbose)) + self.fid_object.processed_data = [str(datum) for datum in self.data] + self.fid_object.processing_steps.is_phased = True @classmethod def _phase_correct(cls, list_params): @@ -803,8 +808,10 @@ def ps(self, p0=0.0, p1=0.0): size = len(self.data) ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) self.data = ph * self.data - for _ in self.data: - self.fid_object.data.append(float(_)) + self.fid_object.processed_data = [str(datum) for datum in self.data] + self.fid_object.processing_steps.is_phased = True + self.fid_object.processing_steps.zero_order_phase = p0 + self.fid_object.processing_steps.first_order_phase = p1 def phaser(self): """ @@ -870,8 +877,8 @@ def baseline_correct(self, deg=2): self._bl_poly = yp data_bl = data - yp self.data = numpy.array(data_bl) - for _ in self.data: - self.fid_object.data.append(float(_)) + self.fid_object.processed_data = [float(datum) for datum in self.data] + self.fid_object.processing_steps.is_baseline_corrected = True def peakpick(self, thresh=0.1): """ @@ -1439,7 +1446,7 @@ class FidArray(Base): def __init__(self): _now = str(datetime.now()) - self.data_model = self.lib.NMRpy( + self.data_model = NMRpy( datetime_created=_now, datetime_modified=_now, ) @@ -1780,6 +1787,10 @@ def ft_fids(self, mp=True, cpus=None): for fid, datum in zip(fids, ft_data): fid.data = datum fid._flags["ft"] = True + fid.fid_object.processed_data = [str(data) for data in datum] + fid.fid_object.processing_steps.is_fourier_transformed = True + fid.fid_object.processing_steps.fourier_transform_type = "FFT" + else: for fid in self.get_fids(): fid.ft() @@ -1801,6 +1812,11 @@ def norm_fids(self): dmax = self.data.max() for fid in self.get_fids(): fid.data = fid.data / dmax + fid.fid_object.processed_data = [ + float(datum) for datum in fid.data + ] + fid.fid_object.processing_steps.is_normalised = True + fid.fid_object.processing_steps.max_value = float(dmax) def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True): """ @@ -1824,6 +1840,8 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) phased_data = self._generic_mp(Fid._phase_correct, list_params, cpus) for fid, datum in zip(fids, phased_data): fid.data = datum + fid.fid_object.processed_data = [str(data) for data in datum] + fid.fid_object.processing_steps.is_phased = True else: for fid in self.get_fids(): fid.phase_correct(method=method, verbose=verbose) @@ -1946,6 +1964,7 @@ def deconv_fids( fid._deconvoluted_peaks = numpy.array( [j for i in datum for j in i] ) + fid.fid_object.processing_steps.is_deconvoluted = True else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) @@ -2339,25 +2358,35 @@ def save_to_file(self, filename=None, overwrite=False): with open(filename, "wb") as f: pickle.dump(self, f) + # TODO: Will probably create a measurement object for each FID(?) + # and add them to the EnzymeML document provided + # Issue: How to get species for IdentityAssigner? __init__()? + def add_to_enzymeml(self, enzymeml_document=None) -> None: + ... + + # TODO: Refactor save_data method + # possibly make saving to EnzymeML a get_measurements method def save_data(self, file_format: str, filename=None, overwrite=False): print("~~~ Method under contruction ~~~") if self.force_pyenzyme: - import pyenzyme as pe - + try: + import pyenzyme as pe + except: + self.force_pyenzyme = False + raise ModuleNotFoundError( + "PyEnzyme is not installed in your current environment. Use EnzymeML data model instead or install PyEnzyme." + ) enzymeml = pe.EnzymeMLDocument( - name=self.data_mode.experiment.name + name=self.data_model.experiment.name if hasattr(self.data_model.experiment, "name") else "NMR experiment" ) ... return 1 if file_format.lower() == ("enzymeml" or "nmrml"): - # model = self.data_model.convert_to( - # template=Path(__file__).parent.parent / "links/enzymeml.toml" - # ) enzymeml = DataModel.from_git( url="https://github.com/EnzymeML/enzymeml-specifications.git", - tag="markdown-parser-refactor", + tag="linking-refactor", ) doc = enzymeml.EnzymeMLDocument( name=( diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index bf9ee94..8479b41 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -2,16 +2,24 @@ from .experiment import Experiment from .fid import FID from .parameters import Parameters +from .processingsteps import ProcessingSteps +from .identity import Identity from .fidarray import FIDArray from .citation import Citation from .person import Person from .publication import Publication from .cv import CV from .term import Term +from .vessel import Vessel +from .abstractspecies import AbstractSpecies +from .protein import Protein +from .reactant import Reactant from .fileformats import FileFormats from .subjects import Subjects from .publicationtypes import PublicationTypes from .identifiertypes import IdentifierTypes +from .sboterm import SBOTerm +from .datatypes import DataTypes __doc__ = "" @@ -20,14 +28,22 @@ "Experiment", "FID", "Parameters", + "ProcessingSteps", + "Identity", "FIDArray", "Citation", "Person", "Publication", "CV", "Term", + "Vessel", + "AbstractSpecies", + "Protein", + "Reactant", "FileFormats", "Subjects", "PublicationTypes", "IdentifierTypes", + "SBOTerm", + "DataTypes", ] diff --git a/nmrpy/datamodel/core/abstractspecies.py b/nmrpy/datamodel/core/abstractspecies.py new file mode 100644 index 0000000..57e50eb --- /dev/null +++ b/nmrpy/datamodel/core/abstractspecies.py @@ -0,0 +1,71 @@ +import sdRDM + +from typing import Optional, Union +from pydantic import Field, validator +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import StrictBool + +from .vessel import Vessel + + +@forge_signature +class AbstractSpecies(sdRDM.DataModel): + """This object is used to inherit basic attributes common to all species used in the data model.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("abstractspeciesINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description="None", + ) + + vessel_id: Union[Vessel, str] = Field( + ..., + reference="Vessel.id", + description="None", + ) + + init_conc: Optional[float] = Field( + default=None, + description="None", + ) + + constant: StrictBool = Field( + ..., + description="None", + ) + + unit: Optional[str] = Field( + default=None, + description="None", + ) + + uri: Optional[str] = Field( + default=None, + description="None", + ) + + creator_id: Optional[str] = Field( + default=None, + description="None", + ) + + @validator("vessel_id") + def get_vessel_id_reference(cls, value): + """Extracts the ID from a given object to create a reference""" + + from .vessel import Vessel + + if isinstance(value, Vessel): + return value.id + elif isinstance(value, str): + return value + else: + raise TypeError( + f"Expected types [Vessel, str] got '{type(value).__name__}' instead." + ) diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index 799d198..c574cf3 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -5,23 +5,22 @@ from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from pydantic import AnyUrl from typing import Any +from pydantic import AnyUrl -from .term import Term -from .person import Person -from .subjects import Subjects from .publication import Publication +from .term import Term from .publicationtypes import PublicationTypes +from .subjects import Subjects +from .person import Person from .identifiertypes import IdentifierTypes @forge_signature class Citation(sdRDM.DataModel): - """Container for various types of metadata primarily used in the publication and citation of the dataset.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("citationINDEX"), xml="@id", @@ -128,6 +127,8 @@ def add_to_authors( self.authors.append(Person(**params)) + return self.authors[-1] + def add_to_keywords( self, name: str, @@ -159,6 +160,8 @@ def add_to_keywords( self.keywords.append(Term(**params)) + return self.keywords[-1] + def add_to_topics( self, name: str, @@ -190,6 +193,8 @@ def add_to_topics( self.topics.append(Term(**params)) + return self.topics[-1] + def add_to_related_publications( self, type: PublicationTypes, @@ -223,3 +228,5 @@ def add_to_related_publications( params["id"] = id self.related_publications.append(Publication(**params)) + + return self.related_publications[-1] diff --git a/nmrpy/datamodel/core/complexdatapoint.py b/nmrpy/datamodel/core/complexdatapoint.py new file mode 100644 index 0000000..44ce1c2 --- /dev/null +++ b/nmrpy/datamodel/core/complexdatapoint.py @@ -0,0 +1,32 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class ComplexDataPoint(sdRDM.DataModel): + """Container for a complex number from the Free Induction Decay.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("complexdatapointINDEX"), + xml="@id", + ) + + real_part: Optional[float] = Field( + default=None, + description=( + "Real part of the complex number. Equivalent to `z.real` with `z` being a" + " `complex` number in Python." + ), + ) + + imaginary_part: Optional[float] = Field( + default=None, + description=( + "Imaginary part of the complex number. Equivalent to `z.imag` with `z`" + " being a `complex` number in Python." + ), + ) diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py index 86735eb..e7e070a 100644 --- a/nmrpy/datamodel/core/cv.py +++ b/nmrpy/datamodel/core/cv.py @@ -1,5 +1,6 @@ import sdRDM +from typing import Optional from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator @@ -8,10 +9,9 @@ @forge_signature class CV(sdRDM.DataModel): - """lorem ipsum""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("cvINDEX"), xml="@id", diff --git a/nmrpy/datamodel/core/datatypes.py b/nmrpy/datamodel/core/datatypes.py new file mode 100644 index 0000000..92b2754 --- /dev/null +++ b/nmrpy/datamodel/core/datatypes.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class DataTypes(Enum): + CONCENTRATION = "conc" + ABSORPTION = "abs" + FEED = "feed" + BIOMASS = "biomass" + CONVERSION = "conversion" + PEAK_AREA = "peak-area" diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index 3af5c83..793de75 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -1,23 +1,24 @@ import sdRDM -from typing import List, Optional +from typing import Optional, Union, List from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .fidarray import FIDArray +from .processingsteps import ProcessingSteps +from .identity import Identity from .parameters import Parameters from .fid import FID +from .fidarray import FIDArray @forge_signature class Experiment(sdRDM.DataModel): - """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed Also preparation of EnzymeML doc""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("experimentINDEX"), xml="@id", @@ -41,8 +42,11 @@ class Experiment(sdRDM.DataModel): def add_to_fid( self, - data: List[float] = ListPlus(), - parameters: Optional[Parameters] = None, + raw_data: List[str] = ListPlus(), + processed_data: List[Union[str, float]] = ListPlus(), + nmr_parameters: Optional[Parameters] = None, + processing_steps: Optional[ProcessingSteps] = None, + peak_identities: List[Identity] = ListPlus(), id: Optional[str] = None, ) -> None: """ @@ -50,16 +54,24 @@ def add_to_fid( Args: id (str): Unique identifier of the 'FID' object. Defaults to 'None'. - data (): Spectral data from numpy array.. Defaults to ListPlus() - parameters (): Contains commonly-used NMR parameters.. Defaults to None + raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus() + processed_data (): Processed data array.. Defaults to ListPlus() + nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None + processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None + peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() """ params = { - "data": data, - "parameters": parameters, + "raw_data": raw_data, + "processed_data": processed_data, + "nmr_parameters": nmr_parameters, + "processing_steps": processing_steps, + "peak_identities": peak_identities, } if id is not None: params["id"] = id self.fid.append(FID(**params)) + + return self.fid[-1] diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fid.py index 143b833..c92eb0b 100644 --- a/nmrpy/datamodel/core/fid.py +++ b/nmrpy/datamodel/core/fid.py @@ -1,32 +1,99 @@ import sdRDM -from typing import List, Optional +from typing import Optional, Union, List from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator +from pydantic.types import FrozenSet +from .processingsteps import ProcessingSteps +from .abstractspecies import AbstractSpecies +from .protein import Protein +from .identity import Identity from .parameters import Parameters +from .reactant import Reactant @forge_signature class FID(sdRDM.DataModel): - """Container for a single NMR spectrum.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("fidINDEX"), xml="@id", ) - data: List[float] = Field( - description="Spectral data from numpy array.", + raw_data: List[str] = Field( + description=( + "Complex spectral data from numpy array as string of format" + " `{array.real}+{array.imag}j`." + ), + default_factory=ListPlus, + multiple=True, + ) + + processed_data: List[Union[str, float]] = Field( + description="Processed data array.", default_factory=ListPlus, multiple=True, ) - parameters: Optional[Parameters] = Field( - default=None, + nmr_parameters: Optional[Parameters] = Field( + default=Parameters(), description="Contains commonly-used NMR parameters.", ) + + processing_steps: Optional[ProcessingSteps] = Field( + default=ProcessingSteps(), + description=( + "Contains the processing steps performed, as well as the parameters used" + " for them." + ), + ) + + peak_identities: List[Identity] = Field( + description=( + "Container holding and mapping integrals resulting from peaks and their" + " ranges to EnzymeML species." + ), + default_factory=ListPlus, + multiple=True, + ) + + def add_to_peak_identities( + self, + name: str, + enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = None, + associated_peaks: List[float] = ListPlus(), + associated_ranges: List[FrozenSet] = ListPlus(), + associated_integrals: List[float] = ListPlus(), + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'Identity' to attribute peak_identities + + Args: + id (str): Unique identifier of the 'Identity' object. Defaults to 'None'. + name (): Descriptive name for the species. + enzymeml_species (): A species object from an EnzymeML document.. Defaults to None + associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus() + associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() + associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() + """ + + params = { + "name": name, + "enzymeml_species": enzymeml_species, + "associated_peaks": associated_peaks, + "associated_ranges": associated_ranges, + "associated_integrals": associated_integrals, + } + + if id is not None: + params["id"] = id + + self.peak_identities.append(Identity(**params)) + + return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py index 48b4b49..5fb1400 100644 --- a/nmrpy/datamodel/core/fidarray.py +++ b/nmrpy/datamodel/core/fidarray.py @@ -1,6 +1,6 @@ import sdRDM -from typing import List +from typing import List, Optional from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -8,10 +8,9 @@ @forge_signature class FIDArray(sdRDM.DataModel): + """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" - """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back.}""" - - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("fidarrayINDEX"), xml="@id", diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py new file mode 100644 index 0000000..a3860a9 --- /dev/null +++ b/nmrpy/datamodel/core/identity.py @@ -0,0 +1,51 @@ +import sdRDM + +from typing import Optional, Union, List +from pydantic import Field +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic.types import FrozenSet + +from .abstractspecies import AbstractSpecies +from .protein import Protein +from .reactant import Reactant + + +@forge_signature +class Identity(sdRDM.DataModel): + """Container mapping one or more peaks to the respective species.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("identityINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description="Descriptive name for the species", + ) + + enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = Field( + default=None, + description="A species object from an EnzymeML document.", + ) + + associated_peaks: List[float] = Field( + description="Peaks belonging to the given species", + default_factory=ListPlus, + multiple=True, + ) + + associated_ranges: List[FrozenSet] = Field( + description="Sets of ranges belonging to the given peaks", + default_factory=ListPlus, + multiple=True, + ) + + associated_integrals: List[float] = Field( + description="Integrals resulting from the given peaks and ranges of a species", + default_factory=ListPlus, + multiple=True, + ) diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index 4b68b75..0971fef 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -4,7 +4,7 @@ from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator -from datetime import datetime +from datetime import datetime as Datetime from .citation import Citation from .experiment import Experiment @@ -12,21 +12,20 @@ @forge_signature class NMRpy(sdRDM.DataModel): - """Root element of the NMRpy data model.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("nmrpyINDEX"), xml="@id", ) - datetime_created: datetime = Field( + datetime_created: Datetime = Field( ..., description="Date and time this dataset has been created.", ) - datetime_modified: Optional[datetime] = Field( + datetime_modified: Optional[Datetime] = Field( default=None, description="Date and time this dataset has last been modified.", ) @@ -37,7 +36,7 @@ class NMRpy(sdRDM.DataModel): ) citation: Optional[Citation] = Field( - default=None, + default=Citation(), description=( "Relevant information regarding the publication and citation of this" " dataset." diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py index 2062e25..66f0c37 100644 --- a/nmrpy/datamodel/core/parameters.py +++ b/nmrpy/datamodel/core/parameters.py @@ -8,10 +8,9 @@ @forge_signature class Parameters(sdRDM.DataModel): - """Container for relevant NMR parameters.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("parametersINDEX"), xml="@id", diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py index dcfbaef..c854aeb 100644 --- a/nmrpy/datamodel/core/person.py +++ b/nmrpy/datamodel/core/person.py @@ -11,10 +11,9 @@ @forge_signature class Person(sdRDM.DataModel): - """Container for information regarding a person that worked on an experiment.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("personINDEX"), xml="@id", diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py new file mode 100644 index 0000000..e4c8830 --- /dev/null +++ b/nmrpy/datamodel/core/processingsteps.py @@ -0,0 +1,81 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + + +@forge_signature +class ProcessingSteps(sdRDM.DataModel): + """Container for processing steps performed, as well as parameter for them.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("processingstepsINDEX"), + xml="@id", + ) + + is_apodised: Optional[bool] = Field( + default=None, + description="Whether or not Apodisation (line-broadening) has been performed.", + ) + + apodisation_frequency: Optional[float] = Field( + default=None, + description="Degree of Apodisation (line-broadening) in Hz.", + ) + + is_zero_filled: Optional[bool] = Field( + default=False, + description="Whether or not Zero-filling has been performed.", + ) + + is_fourier_transformed: Optional[bool] = Field( + default=False, + description="Whether or not Fourier transform has been performed.", + ) + + fourier_transform_type: Optional[str] = Field( + default=None, + description="The type of Fourier transform used.", + ) + + is_phased: Optional[bool] = Field( + default=False, + description="Whether or not Phasing was performed.", + ) + + zero_order_phase: Optional[float] = Field( + default=None, + description="Zero-order phase used for Phasing.", + ) + + first_order_phase: Optional[float] = Field( + default=None, + description="First-order phase used for Phasing.", + ) + + is_only_real: Optional[bool] = Field( + default=False, + description="Whether or not the imaginary part has been discarded.", + ) + + is_normalised: Optional[bool] = Field( + default=False, + description="Whether or not Normalisation was performed.", + ) + + max_value: Optional[float] = Field( + default=None, + description="Maximum value of the dataset used for Normalisation.", + ) + + is_deconvoluted: Optional[bool] = Field( + default=False, + description="Whether or not Deconvolution was performed.", + ) + + is_baseline_corrected: Optional[bool] = Field( + default=False, + description="Whether or not Baseline correction was performed.", + ) diff --git a/nmrpy/datamodel/core/protein.py b/nmrpy/datamodel/core/protein.py new file mode 100644 index 0000000..efcc389 --- /dev/null +++ b/nmrpy/datamodel/core/protein.py @@ -0,0 +1,57 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .sboterm import SBOTerm + + +@forge_signature +class Protein(sdRDM.DataModel): + """This objects describes the proteins that were used or produced in the course of the experiment.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("proteinINDEX"), + xml="@id", + ) + + sequence: str = Field( + ..., + description="Amino acid sequence of the protein", + template_alias="Sequence", + ) + + ecnumber: Optional[str] = Field( + default=None, + description="EC number of the protein.", + regex="(\d+.)(\d+.)(\d+.)(\d+)", + template_alias="EC Number", + ) + + organism: Optional[str] = Field( + default=None, + description="Organism the protein was expressed in.", + template_alias="Source organism", + ) + + organism_tax_id: Optional[str] = Field( + default=None, + description="Taxonomy identifier of the expression host.", + ) + + uniprotid: Optional[str] = Field( + default=None, + description=( + "Unique identifier referencing a protein entry at UniProt. Use this" + " identifier to initialize the object from the UniProt database." + ), + template_alias="UniProt ID", + ) + + ontology: SBOTerm = Field( + description="None", + default=SBOTerm.CATALYST, + ) diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index 3d34597..4c41d8c 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -7,17 +7,16 @@ from pydantic import AnyUrl -from .person import Person from .publicationtypes import PublicationTypes +from .person import Person from .identifiertypes import IdentifierTypes @forge_signature class Publication(sdRDM.DataModel): - """Container for citation information of a relevant publication.""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("publicationINDEX"), xml="@id", @@ -88,3 +87,5 @@ def add_to_authors( params["id"] = id self.authors.append(Person(**params)) + + return self.authors[-1] diff --git a/nmrpy/datamodel/core/reactant.py b/nmrpy/datamodel/core/reactant.py new file mode 100644 index 0000000..faf65c1 --- /dev/null +++ b/nmrpy/datamodel/core/reactant.py @@ -0,0 +1,49 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + + +from .sboterm import SBOTerm + + +@forge_signature +class Reactant(sdRDM.DataModel): + """This objects describes the reactants that were used or produced in the course of the experiment.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("reactantINDEX"), + xml="@id", + ) + + smiles: Optional[str] = Field( + default=None, + description=( + "Simplified Molecular Input Line Entry System (SMILES) encoding of the" + " reactant." + ), + template_alias="SMILES", + ) + + inchi: Optional[str] = Field( + default=None, + description=( + "International Chemical Identifier (InChI) encoding of the reactant." + ), + template_alias="InCHI", + ) + + chebi_id: Optional[str] = Field( + default=None, + description=( + "Unique identifier of the CHEBI database. Use this identifier to initialize" + " the object from the CHEBI database." + ), + ) + + ontology: SBOTerm = Field( + description="None", + default=SBOTerm.SMALL_MOLECULE, + ) diff --git a/nmrpy/datamodel/core/sboterm.py b/nmrpy/datamodel/core/sboterm.py new file mode 100644 index 0000000..74d2eb6 --- /dev/null +++ b/nmrpy/datamodel/core/sboterm.py @@ -0,0 +1,35 @@ +from enum import Enum + + +class SBOTerm(Enum): + BIOCHEMICAL_REACTION = "SBO:0000176" + ACID_BASE_REACTION = "SBO:0000208" + CONFORMATIONAL_TRANSITION = "SBO:0000181" + CONVERSION = "SBO:0000182" + DEGRADATION = "SBO:0000179" + DISSOCIATION = "SBO:0000180" + IONISATION = "SBO:0000209" + ISOMERISATION = "SBO:0000377" + NON_COVALENT_BINDING = "SBO:0000177" + REDOX_REACTION = "SBO:0000200" + SPONTANEOUS_REACTION = "SBO:0000672" + PROTEIN = "SBO:0000252" + GENE = "SBO:0000251" + SMALL_MOLECULE = "SBO:0000247" + ION = "SBO:0000327" + RADICAL = "SBO:0000328" + INTERACTOR = "SBO:0000336" + SUBSTRATE = "SBO:0000015" + PRODUCT = "SBO:0000011" + CATALYST = "SBO:0000013" + INHIBITOR = "SBO:0000020" + ESSENTIAL_ACTIVATOR = "SBO:0000461" + NON_ESSENTIAL_ACTIVATOR = "SBO:0000462" + POTENTIATOR = "SBO:0000021" + MACROMOLECULAR_COMPLEX = "SBO:0000296" + PROTEIN_COMPLEX = "SBO:0000297" + DIMER = "SBO:0000607" + MICHAELIS_MENTEN = "SBO:0000028" + K_CAT = "SBO:0000025" + K_M = "SBO:0000027" + V_MAX = "SBO:0000186" diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py index 6a48293..82c67e8 100644 --- a/nmrpy/datamodel/core/term.py +++ b/nmrpy/datamodel/core/term.py @@ -9,10 +9,9 @@ @forge_signature class Term(sdRDM.DataModel): - """lorem ipsum {Add reference back to term_cv_reference.}""" - id: str = Field( + id: Optional[str] = Field( description="Unique identifier of the given object.", default_factory=IDGenerator("termINDEX"), xml="@id", diff --git a/nmrpy/datamodel/core/vessel.py b/nmrpy/datamodel/core/vessel.py new file mode 100644 index 0000000..5dc6fb5 --- /dev/null +++ b/nmrpy/datamodel/core/vessel.py @@ -0,0 +1,52 @@ +import sdRDM + +from typing import Optional +from pydantic import Field +from sdRDM.base.utils import forge_signature, IDGenerator + +from pydantic import StrictBool +from pydantic import PositiveFloat + + +@forge_signature +class Vessel(sdRDM.DataModel): + """This object describes vessels in which the experiment has been carried out. These can include any type of vessel used in biocatalytic experiments.""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("vesselINDEX"), + xml="@id", + ) + + name: str = Field( + ..., + description="Name of the used vessel.", + template_alias="Name", + ) + + volume: PositiveFloat = Field( + ..., + description="Volumetric value of the vessel.", + template_alias="Volume value", + ) + + unit: str = Field( + ..., + description="Volumetric unit of the vessel.", + template_alias="Volume unit", + ) + + constant: StrictBool = Field( + description="Whether the volume of the vessel is constant or not.", + default=True, + ) + + uri: Optional[str] = Field( + default=None, + description="URI of the vessel.", + ) + + creator_id: Optional[str] = Field( + default=None, + description="Unique identifier of the author.", + ) diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index 527eb3a..abbf56d 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -1,10 +1,46 @@ ```mermaid classDiagram + AbstractSpecies <-- Protein + AbstractSpecies <-- Complex + AbstractSpecies <-- Reactant + AbstractSpecies <-- Protein + AbstractSpecies <-- Reactant + EnzymeMLDocument *-- Creator + EnzymeMLDocument *-- Vessel + EnzymeMLDocument *-- Protein + EnzymeMLDocument *-- Complex + EnzymeMLDocument *-- Reactant + EnzymeMLDocument *-- Reaction + EnzymeMLDocument *-- KineticParameter + EnzymeMLDocument *-- Measurement + EnzymeMLDocument *-- File + AbstractSpecies *-- Vessel + Protein *-- SBOTerm + Complex *-- SBOTerm + Reactant *-- SBOTerm + Reaction *-- SBOTerm + Reaction *-- ReactionElement + Reaction *-- KineticModel + ReactionElement *-- SBOTerm + ReactionElement *-- AbstractSpecies + KineticModel *-- SBOTerm + KineticModel *-- KineticParameter + KineticParameter *-- SBOTerm + Measurement *-- MeasurementData + MeasurementData *-- AbstractSpecies + MeasurementData *-- Replicate + Replicate *-- DataTypes + Replicate *-- AbstractSpecies NMRpy *-- Experiment NMRpy *-- Citation Experiment *-- FID Experiment *-- FIDArray FID *-- Parameters + FID *-- ProcessingSteps + FID *-- Identity + Identity *-- AbstractSpecies + Identity *-- Protein + Identity *-- Reactant Citation *-- Subjects Citation *-- Person Citation *-- Publication @@ -12,6 +48,9 @@ classDiagram Person *-- IdentifierTypes Publication *-- PublicationTypes Publication *-- Person + AbstractSpecies *-- Vessel + Protein *-- SBOTerm + Reactant *-- SBOTerm class NMRpy { +datetime datetime_created* @@ -27,8 +66,11 @@ classDiagram } class FID { - +float[0..*] data - +Parameters parameters + +string[0..*] raw_data + +string, float[0..*] processed_data + +Parameters nmr_parameters + +ProcessingSteps processing_steps + +Identity[0..*] peak_identities } class Parameters { @@ -44,6 +86,30 @@ classDiagram +float spectral_width_left } + class ProcessingSteps { + +boolean is_apodised + +float apodisation_frequency + +boolean is_zero_filled + +boolean is_fourier_transformed + +string fourier_transform_type + +boolean is_phased + +float zero_order_phase + +float first_order_phase + +boolean is_only_real + +boolean is_normalised + +float max_value + +boolean is_deconvoluted + +boolean is_baseline_corrected + } + + class Identity { + +string name* + +AbstractSpecies, Protein, Reactant enzymeml_species + +float[0..*] associated_peaks + +frozenset[0..*] associated_ranges + +float[0..*] associated_integrals + } + class FIDArray { +string[0..*] fids* } @@ -93,6 +159,41 @@ classDiagram +any value } + class Vessel { + +string name* + +posfloat volume* + +string unit* + +StrictBool constant* + +string uri + +string creator_id + } + + class AbstractSpecies { + +string name* + +Vessel vessel_id* + +float init_conc + +StrictBool constant* + +string unit + +string uri + +string creator_id + } + + class Protein { + +string sequence* + +string ecnumber + +string organism + +string organism_tax_id + +string uniprotid + +SBOTerm ontology* + } + + class Reactant { + +string smiles + +string inchi + +string chebi_id + +SBOTerm ontology* + } + class FileFormats { << Enumeration >> +VARIAN @@ -118,4 +219,54 @@ classDiagram +ORCID } + class SBOTerm { + << Enumeration >> + +BIOCHEMICAL_REACTION + +ACID_BASE_REACTION + +CONFORMATIONAL_TRANSITION + +CONVERSION + +DEGRADATION + +DISSOCIATION + +IONISATION + +ISOMERISATION + +NON_COVALENT_BINDING + +REDOX_REACTION + +SPONTANEOUS_REACTION + +PROTEIN + +GENE + +SMALL_MOLECULE + +ION + +RADICAL + +INTERACTOR + +SUBSTRATE + +PRODUCT + +CATALYST + +INHIBITOR + +ESSENTIAL_ACTIVATOR + +NON_ESSENTIAL_ACTIVATOR + +POTENTIATOR + +MACROMOLECULAR_COMPLEX + +PROTEIN_COMPLEX + +DIMER + +MICHAELIS_MENTEN + +K_CAT + +K_M + +V_MAX + } + + class DataTypes { + << Enumeration >> + +CONCENTRATION + +ABSORPTION + +FEED + +BIOMASS + +CONVERSION + +PEAK_AREA + } + + class https://github.com/EnzymeML/enzymeml-specifications/ { + << External Object >> + +Repository + } + ``` \ No newline at end of file diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index b368b6e..396c7fd 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -24,6 +24,15 @@ import asyncio +SPECIES_DECOY = [ + "TEP", + "PEP", + "3PG", + "2PG", + "Phosphate", +] + + class Plot: """ Basic 'plot' class containing functions for various types of plots. @@ -1425,23 +1434,18 @@ def __init__(self, fid, title): description="Save selection", icon="file-arrow-down", disabled=True ) + # Create a reset button + reset_button = Button(description="Reset selection", disabled=True) + # Create an output widget to display the selection selection_output = Output() # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - if selected_option != "": - species_dropdown.options = [ - "3PG", - "2PG", - "Phosphate", - "TEP", - "PEP", - ] - species_dropdown.disabled = False - save_button.disabled = False + species_dropdown.options = SPECIES_DECOY + species_dropdown.disabled = False + save_button.disabled = False # Attach the function to the dropdown's change event peak_dropdown.observe(on_peak_dropdown_change) @@ -1450,9 +1454,8 @@ def on_peak_dropdown_change(event): def on_species_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": selected_option = event["new"] - if selected_option != "": - new_key = peak_dropdown.value - self.selected_values[new_key] = selected_option + new_key = peak_dropdown.value + self.selected_values[new_key] = selected_option # Attach the function to the second dropdown's change event species_dropdown.observe(on_species_dropdown_change) @@ -1467,10 +1470,22 @@ def on_save_button_click(b): self.fid.identities = [ value for value in self.selected_values.values() ] + reset_button.disabled = False # Attach the function to the save button's click event save_button.on_click(on_save_button_click) + # Define a function to handle the reset event + def on_reset_button_click(b): + with selection_output: + selection_output.clear_output(wait=True) + print("\nCleared selections!") + self.fid.identities = [] + self.selected_values = {} + + # Attach the function to the reset click event + reset_button.on_click(on_reset_button_click) + # Create a container for both the title and the dropdown container = VBox( [ @@ -1478,6 +1493,7 @@ def on_save_button_click(b): peak_dropdown, species_dropdown, save_button, + reset_button, selection_output, ] ) @@ -1536,6 +1552,9 @@ def __init__(self, fid_array): description="Save selection", icon="file-arrow-down", disabled=True ) + # Create a reset button + reset_button = Button(description="Reset selection", disabled=True) + # Create an output widget to display the selection selection_output = Output() @@ -1556,15 +1575,7 @@ def on_combobox_change(event): # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - if selected_option != "": - species_dropdown.options = [ - "3PG", - "2PG", - "Phosphate", - "TEP", - "PEP", - ] + species_dropdown.options = SPECIES_DECOY species_dropdown.disabled = False save_button.disabled = False @@ -1575,9 +1586,8 @@ def on_peak_dropdown_change(event): def on_species_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": selected_option = event["new"] - if selected_option != "": - new_key = peak_dropdown.value - self.selected_values[new_key] = selected_option + new_key = peak_dropdown.value + self.selected_values[new_key] = selected_option # Attach the function to the second dropdown's change event species_dropdown.observe(on_species_dropdown_change) @@ -1593,10 +1603,23 @@ def on_save_button_click(b): fid.identities = [ value for value in self.selected_values.values() ] + reset_button.disabled = False # Attach the function to the save button's click event save_button.on_click(on_save_button_click) + # Define a function to handle the reset event + def on_reset_button_click(b): + with selection_output: + selection_output.clear_output(wait=True) + print("\nCleared selections!") + for fid in self.fids: + fid.identities = [] + self.selected_values = {} + + # Attach the function to the reset click event + reset_button.on_click(on_reset_button_click) + # Create a container for both the title and the dropdown container = VBox( [ @@ -1605,6 +1628,7 @@ def on_save_button_click(b): peak_dropdown, species_dropdown, save_button, + reset_button, selection_output, ] ) diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 97d15e9..9e64e47 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -32,6 +32,10 @@ Also preparation of EnzymeML doc - __name__ - Type: string - Description: A descriptive name for the overarching experiment. +- enzymeml_species + - Type: https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant + - Description: A species object from an EnzymeML document. + - Multiple: True - fid - Type: [FID](#fid) - Description: A single NMR spectrum. @@ -45,13 +49,24 @@ Also preparation of EnzymeML doc Container for a single NMR spectrum. -- data - - Type: float - - Description: Spectral data from numpy array. +- raw_data + - Type: string + - Description: Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`. + - Multiple: True +- processed_data + - Type: string,float + - Description: Processed data array. - Multiple: True -- parameters +- nmr_parameters - Type: [Parameters](#parameters) - Description: Contains commonly-used NMR parameters. +- processing_steps + - Type: [ProcessingSteps](#processingsteps) + - Description: Contains the processing steps performed, as well as the parameters used for them. +- peak_identities + - Type: [Identity](#identity) + - Description: Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species. + - Multiple: True ### Parameters @@ -92,6 +107,82 @@ Container for relevant NMR parameters. - Description: sw_left +### ProcessingSteps + +Container for processing steps performed, as well as parameter for them. + +- is_apodised + - Type: boolean + - Description: Whether or not Apodisation (line-broadening) has been performed. +- apodisation_frequency + - Type: float + - Description: Degree of Apodisation (line-broadening) in Hz. +- is_zero_filled + - Type: boolean + - Description: Whether or not Zero-filling has been performed. + - Default: False +- is_fourier_transformed + - Type: boolean + - Description: Whether or not Fourier transform has been performed. + - Default: False +- fourier_transform_type + - Type: string + - Description: The type of Fourier transform used. +- is_phased + - Type: boolean + - Description: Whether or not Phasing was performed. + - Default: False +- zero_order_phase + - Type: float + - Description: Zero-order phase used for Phasing. +- first_order_phase + - Type: float + - Description: First-order phase used for Phasing. +- is_only_real + - Type: boolean + - Description: Whether or not the imaginary part has been discarded. + - Default: False +- is_normalised + - Type: boolean + - Description: Whether or not Normalisation was performed. + - Default: False +- max_value + - Type: float + - Description: Maximum value of the dataset used for Normalisation. +- is_deconvoluted + - Type: boolean + - Description: Whether or not Deconvolution was performed. + - Default: False +- is_baseline_corrected + - Type: boolean + - Description: Whether or not Baseline correction was performed. + - Default: False + + +### Identity + +Container mapping one or more peaks to the respective species. + +- name + - Type: string + - Description: Descriptive name for the species +- species_id + - Type: string + - Description: ID of an EnzymeML species +- associated_peaks + - Type: float + - Description: Peaks belonging to the given species + - Multiple: True +- associated_ranges + - Type: frozenset + - Description: Sets of ranges belonging to the given peaks + - Multiple: True +- associated_integrals + - Type: float + - Description: Integrals resulting from the given peaks and ranges of a species + - Multiple: True + + ### FIDArray Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5} From cb0def5a7746067b5e63105a3fd29c79d980f0ee Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 26 Sep 2023 01:38:07 +0200 Subject: [PATCH 11/54] Minor bug fix --- nmrpy/datamodel/core/__init__.py | 12 -- nmrpy/datamodel/core/citation.py | 8 +- nmrpy/datamodel/core/experiment.py | 15 ++- nmrpy/datamodel/core/fid.py | 15 +-- nmrpy/datamodel/core/identity.py | 14 +-- nmrpy/datamodel/core/nmrpy.py | 2 +- nmrpy/datamodel/core/publication.py | 2 +- nmrpy/datamodel/schemes/datamodel_schema.md | 127 +------------------- specifications/nmrpy.md | 4 +- 9 files changed, 33 insertions(+), 166 deletions(-) diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index 8479b41..e3b9f18 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -10,16 +10,10 @@ from .publication import Publication from .cv import CV from .term import Term -from .vessel import Vessel -from .abstractspecies import AbstractSpecies -from .protein import Protein -from .reactant import Reactant from .fileformats import FileFormats from .subjects import Subjects from .publicationtypes import PublicationTypes from .identifiertypes import IdentifierTypes -from .sboterm import SBOTerm -from .datatypes import DataTypes __doc__ = "" @@ -36,14 +30,8 @@ "Publication", "CV", "Term", - "Vessel", - "AbstractSpecies", - "Protein", - "Reactant", "FileFormats", "Subjects", "PublicationTypes", "IdentifierTypes", - "SBOTerm", - "DataTypes", ] diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index c574cf3..bfeb06a 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -5,15 +5,15 @@ from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from typing import Any from pydantic import AnyUrl +from typing import Any -from .publication import Publication from .term import Term -from .publicationtypes import PublicationTypes +from .identifiertypes import IdentifierTypes +from .publication import Publication from .subjects import Subjects +from .publicationtypes import PublicationTypes from .person import Person -from .identifiertypes import IdentifierTypes @forge_signature diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index 793de75..e52a0a3 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -6,17 +6,18 @@ from sdRDM.base.utils import forge_signature, IDGenerator +from .fidarray import FIDArray +from .fid import FID +from .parameters import Parameters from .processingsteps import ProcessingSteps from .identity import Identity -from .parameters import Parameters -from .fid import FID -from .fidarray import FIDArray @forge_signature class Experiment(sdRDM.DataModel): """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed - Also preparation of EnzymeML doc""" + Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant + """ id: Optional[str] = Field( description="Unique identifier of the given object.", @@ -29,6 +30,12 @@ class Experiment(sdRDM.DataModel): description="A descriptive name for the overarching experiment.", ) + enzymeml_species: List[str] = Field( + description="A species object from an EnzymeML document.", + default_factory=ListPlus, + multiple=True, + ) + fid: List[FID] = Field( description="A single NMR spectrum.", default_factory=ListPlus, diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fid.py index c92eb0b..42f07bf 100644 --- a/nmrpy/datamodel/core/fid.py +++ b/nmrpy/datamodel/core/fid.py @@ -7,12 +7,9 @@ from pydantic.types import FrozenSet +from .parameters import Parameters from .processingsteps import ProcessingSteps -from .abstractspecies import AbstractSpecies -from .protein import Protein from .identity import Identity -from .parameters import Parameters -from .reactant import Reactant @forge_signature @@ -64,8 +61,8 @@ class FID(sdRDM.DataModel): def add_to_peak_identities( self, - name: str, - enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = None, + name: Optional[str] = None, + species_id: Optional[str] = None, associated_peaks: List[float] = ListPlus(), associated_ranges: List[FrozenSet] = ListPlus(), associated_integrals: List[float] = ListPlus(), @@ -76,8 +73,8 @@ def add_to_peak_identities( Args: id (str): Unique identifier of the 'Identity' object. Defaults to 'None'. - name (): Descriptive name for the species. - enzymeml_species (): A species object from an EnzymeML document.. Defaults to None + name (): Descriptive name for the species. Defaults to None + species_id (): ID of an EnzymeML species. Defaults to None associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus() associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() @@ -85,7 +82,7 @@ def add_to_peak_identities( params = { "name": name, - "enzymeml_species": enzymeml_species, + "species_id": species_id, "associated_peaks": associated_peaks, "associated_ranges": associated_ranges, "associated_integrals": associated_integrals, diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py index a3860a9..91df6fc 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/identity.py @@ -1,16 +1,12 @@ import sdRDM -from typing import Optional, Union, List +from typing import List, Optional from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator from pydantic.types import FrozenSet -from .abstractspecies import AbstractSpecies -from .protein import Protein -from .reactant import Reactant - @forge_signature class Identity(sdRDM.DataModel): @@ -22,14 +18,14 @@ class Identity(sdRDM.DataModel): xml="@id", ) - name: str = Field( - ..., + name: Optional[str] = Field( + default=None, description="Descriptive name for the species", ) - enzymeml_species: Union[AbstractSpecies, Protein, Reactant, None] = Field( + species_id: Optional[str] = Field( default=None, - description="A species object from an EnzymeML document.", + description="ID of an EnzymeML species", ) associated_peaks: List[float] = Field( diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index 0971fef..fe123a6 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -6,8 +6,8 @@ from datetime import datetime as Datetime -from .citation import Citation from .experiment import Experiment +from .citation import Citation @forge_signature diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index 4c41d8c..75be413 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -7,9 +7,9 @@ from pydantic import AnyUrl +from .identifiertypes import IdentifierTypes from .publicationtypes import PublicationTypes from .person import Person -from .identifiertypes import IdentifierTypes @forge_signature diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index abbf56d..b916c7a 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -1,36 +1,5 @@ ```mermaid classDiagram - AbstractSpecies <-- Protein - AbstractSpecies <-- Complex - AbstractSpecies <-- Reactant - AbstractSpecies <-- Protein - AbstractSpecies <-- Reactant - EnzymeMLDocument *-- Creator - EnzymeMLDocument *-- Vessel - EnzymeMLDocument *-- Protein - EnzymeMLDocument *-- Complex - EnzymeMLDocument *-- Reactant - EnzymeMLDocument *-- Reaction - EnzymeMLDocument *-- KineticParameter - EnzymeMLDocument *-- Measurement - EnzymeMLDocument *-- File - AbstractSpecies *-- Vessel - Protein *-- SBOTerm - Complex *-- SBOTerm - Reactant *-- SBOTerm - Reaction *-- SBOTerm - Reaction *-- ReactionElement - Reaction *-- KineticModel - ReactionElement *-- SBOTerm - ReactionElement *-- AbstractSpecies - KineticModel *-- SBOTerm - KineticModel *-- KineticParameter - KineticParameter *-- SBOTerm - Measurement *-- MeasurementData - MeasurementData *-- AbstractSpecies - MeasurementData *-- Replicate - Replicate *-- DataTypes - Replicate *-- AbstractSpecies NMRpy *-- Experiment NMRpy *-- Citation Experiment *-- FID @@ -38,9 +7,6 @@ classDiagram FID *-- Parameters FID *-- ProcessingSteps FID *-- Identity - Identity *-- AbstractSpecies - Identity *-- Protein - Identity *-- Reactant Citation *-- Subjects Citation *-- Person Citation *-- Publication @@ -48,9 +14,6 @@ classDiagram Person *-- IdentifierTypes Publication *-- PublicationTypes Publication *-- Person - AbstractSpecies *-- Vessel - Protein *-- SBOTerm - Reactant *-- SBOTerm class NMRpy { +datetime datetime_created* @@ -61,6 +24,7 @@ classDiagram class Experiment { +string name* + +string[0..*] enzymeml_species +FID[0..*] fid +FIDArray fid_array } @@ -103,8 +67,8 @@ classDiagram } class Identity { - +string name* - +AbstractSpecies, Protein, Reactant enzymeml_species + +string name + +string species_id +float[0..*] associated_peaks +frozenset[0..*] associated_ranges +float[0..*] associated_integrals @@ -159,41 +123,6 @@ classDiagram +any value } - class Vessel { - +string name* - +posfloat volume* - +string unit* - +StrictBool constant* - +string uri - +string creator_id - } - - class AbstractSpecies { - +string name* - +Vessel vessel_id* - +float init_conc - +StrictBool constant* - +string unit - +string uri - +string creator_id - } - - class Protein { - +string sequence* - +string ecnumber - +string organism - +string organism_tax_id - +string uniprotid - +SBOTerm ontology* - } - - class Reactant { - +string smiles - +string inchi - +string chebi_id - +SBOTerm ontology* - } - class FileFormats { << Enumeration >> +VARIAN @@ -219,54 +148,4 @@ classDiagram +ORCID } - class SBOTerm { - << Enumeration >> - +BIOCHEMICAL_REACTION - +ACID_BASE_REACTION - +CONFORMATIONAL_TRANSITION - +CONVERSION - +DEGRADATION - +DISSOCIATION - +IONISATION - +ISOMERISATION - +NON_COVALENT_BINDING - +REDOX_REACTION - +SPONTANEOUS_REACTION - +PROTEIN - +GENE - +SMALL_MOLECULE - +ION - +RADICAL - +INTERACTOR - +SUBSTRATE - +PRODUCT - +CATALYST - +INHIBITOR - +ESSENTIAL_ACTIVATOR - +NON_ESSENTIAL_ACTIVATOR - +POTENTIATOR - +MACROMOLECULAR_COMPLEX - +PROTEIN_COMPLEX - +DIMER - +MICHAELIS_MENTEN - +K_CAT - +K_M - +V_MAX - } - - class DataTypes { - << Enumeration >> - +CONCENTRATION - +ABSORPTION - +FEED - +BIOMASS - +CONVERSION - +PEAK_AREA - } - - class https://github.com/EnzymeML/enzymeml-specifications/ { - << External Object >> - +Repository - } - ``` \ No newline at end of file diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 9e64e47..79011fa 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -27,13 +27,13 @@ Root element of the NMRpy data model. ### Experiment Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed -Also preparation of EnzymeML doc +Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant - __name__ - Type: string - Description: A descriptive name for the overarching experiment. - enzymeml_species - - Type: https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant + - Type: string - Description: A species object from an EnzymeML document. - Multiple: True - fid From 2feaa3db2a0b0389be7520a48d078e12458b3797 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Thu, 2 Nov 2023 13:13:07 +0100 Subject: [PATCH 12/54] Add interface with EnzymeML --- nmrpy/data_objects-copy.py | 2457 +++++++++++++++++ nmrpy/data_objects.py | 56 +- nmrpy/datamodel/core/__init__.py | 5 +- nmrpy/datamodel/core/citation.py | 30 +- nmrpy/datamodel/core/cv.py | 4 +- nmrpy/datamodel/core/experiment.py | 18 +- nmrpy/datamodel/core/fidarray.py | 4 +- nmrpy/datamodel/core/{fid.py => fidobject.py} | 15 +- nmrpy/datamodel/core/identity.py | 37 +- nmrpy/datamodel/core/nmrpy.py | 2 - nmrpy/datamodel/core/person.py | 2 - nmrpy/datamodel/core/publication.py | 11 +- nmrpy/datamodel/core/term.py | 4 +- nmrpy/datamodel/schemes/datamodel_schema.md | 14 +- nmrpy/plotting.py | 109 +- nmrpy/utils.py | 28 + specifications/nmrpy.md | 10 +- 17 files changed, 2692 insertions(+), 114 deletions(-) create mode 100644 nmrpy/data_objects-copy.py rename nmrpy/datamodel/core/{fid.py => fidobject.py} (93%) create mode 100644 nmrpy/utils.py diff --git a/nmrpy/data_objects-copy.py b/nmrpy/data_objects-copy.py new file mode 100644 index 0000000..a0d72aa --- /dev/null +++ b/nmrpy/data_objects-copy.py @@ -0,0 +1,2457 @@ +from pathlib import Path +import numpy +import scipy +from matplotlib import pyplot +import lmfit +import nmrglue +import numbers +from scipy.optimize import leastsq +from multiprocessing import Pool, cpu_count +from nmrpy.plotting import * +import os +import pickle +from ipywidgets import SelectMultiple +from sdRDM import DataModel + + +class Base: + _complex_dtypes = [ + numpy.dtype("csingle"), + numpy.dtype("cdouble"), + numpy.dtype("clongdouble"), + ] + + _file_formats = ["varian", "bruker", None] + + def __init__(self, *args, **kwargs): + self.id = kwargs.get("id", None) + self._procpar = kwargs.get("procpar", None) + self._params = None + self.fid_path = kwargs.get("fid_path", ".") + self._file_format = None + # self.parameters_object = self.lib.Parameters() + + @property + def lib(self): + try: + self.__lib + except: + self.__lib = DataModel.from_markdown( + path=Path(__file__).parent.parent / "specifications" + ) + return self.__lib + + # @property + # def parameters_object(self): + # return self.__parameter_object + + # @parameters_object.setter + # def parameters_object(self, parameters_object): + # if isinstance(parameters_object, DataModel): + # self.__parameters_object = parameters_object + + @property + def id(self): + return self.__id + + @id.setter + def id(self, id): + if isinstance(id, str) or id is None: + self.__id = id + else: + raise AttributeError("ID must be a string or None.") + + @property + def fid_path(self): + return self.__fid_path + + @fid_path.setter + def fid_path(self, fid_path): + if isinstance(fid_path, str): + self.__fid_path = fid_path + else: + raise AttributeError("fid_path must be a string.") + + @property + def _file_format(self): + return self.__file_format + + @_file_format.setter + def _file_format(self, file_format): + if file_format in self._file_formats: + self.__file_format = file_format + else: + raise AttributeError( + '_file_format must be "varian", "bruker", or None.' + ) + + @classmethod + def _is_iter(cls, i): + try: + iter(i) + return True + except TypeError: + return False + + @classmethod + def _is_iter_of_iters(cls, i): + if type(i) == list and len(i) == 0: + return False + elif cls._is_iter(i) and all(cls._is_iter(j) for j in i): + return True + return False + + @classmethod + def _is_flat_iter(cls, i): + if type(i) == list and len(i) == 0: + return True + elif cls._is_iter(i) and not any(cls._is_iter(j) for j in i): + return True + return False + + @property + def _procpar(self): + return self.__procpar + + @_procpar.setter + def _procpar(self, procpar): + if procpar is None: + self.__procpar = procpar + elif isinstance(procpar, dict): + self.__procpar = procpar + self._params = self._extract_procpar(procpar) + # self.parameters_object( + # acquisition_time=self._params.get("at"), + # relaxation_time=self._params.get("d1"), + # repetition_time=self._params.get("rt"), + # spectral_width_ppm=self._params.get("sw"), + # spectral_width_hz=self._params.get("sw_hz"), + # spectrometer_frequency=self._params.get("sfrq"), + # reference_frequency=self._params.get("reffrq"), + # spectral_width_left=self._params.get("sw_left"), + # ) + # for _ in self._params.get("nt"): + # if type(_) is not None: + # self.fid_object.parameters.number_of_transients.append(_) + # for _ in self._params.get("acqtime"): + # if type(_) is not None: + # self.fid_object.parameters.acquisition_times_array.append( + # _ + # ) + else: + raise AttributeError("procpar must be a dictionary or None.") + + @property + def _params(self): + return self.__params + + @_params.setter + def _params(self, params): + if isinstance(params, dict) or params is None: + self.__params = params + else: + raise AttributeError("params must be a dictionary or None.") + + # processing + def _extract_procpar(self, procpar): + if self._file_format == "bruker": + return self._extract_procpar_bruker(procpar) + elif self._file_format == "varian": + return self._extract_procpar_varian(procpar) + # else: + # raise AttributeError('Could not parse procpar.') + + @staticmethod + def _extract_procpar_varian(procpar): + """ + Extract some commonely-used NMR parameters (using Varian denotations) + and return a parameter dictionary 'params'. + """ + at = float(procpar["procpar"]["at"]["values"][0]) + d1 = float(procpar["procpar"]["d1"]["values"][0]) + sfrq = float(procpar["procpar"]["sfrq"]["values"][0]) + reffrq = float(procpar["procpar"]["reffrq"]["values"][0]) + rfp = float(procpar["procpar"]["rfp"]["values"][0]) + rfl = float(procpar["procpar"]["rfl"]["values"][0]) + tof = float(procpar["procpar"]["tof"]["values"][0]) + rt = at + d1 + nt = numpy.array( + [procpar["procpar"]["nt"]["values"]], dtype=int + ).flatten() + acqtime = numpy.zeros(nt.shape) + acqtime[0] = rt * nt[0] / 2 + for i in range(1, len(nt)): + acqtime[i] = acqtime[i - 1] + (nt[i - 1] + nt[i]) / 2 * rt + acqtime /= 60.0 # convert to min + sw_hz = float(procpar["procpar"]["sw"]["values"][0]) + sw = round(sw_hz / reffrq, 2) + sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq + params = dict( + at=at, # acquisition time + d1=d1, # relaxation delay + rt=rt, # repetition time (at+d1) + nt=nt, # number of transients + acqtime=acqtime, # acquisition times array (nt, 2nt, .., ntxrt) + sw=sw, # spectral width / ppm + sw_hz=sw_hz, # sw / Hz + sfrq=sfrq, # spectrometer frequency + reffrq=reffrq, # reference frequency + rfp=rfp, # irrelevant + rfl=rfl, # irrelevant + tof=tof, # irrelevant + sw_left=sw_left, # spectral window left + ) + return params + + @staticmethod + def _extract_procpar_bruker(procpar): + """ + Extract some commonly-used NMR parameters (using Bruker denotations) + and return a parameter dictionary 'params'. + """ + d1 = procpar["acqus"]["D"][1] + reffrq = procpar["acqus"]["SFO1"] + nt = procpar["acqus"]["NS"] + sw_hz = procpar["acqus"]["SW_h"] + sw = procpar["acqus"]["SW"] + # lefthand offset of the processed data in ppm + if "procs" in procpar: + sfrq = procpar["procs"]["SF"] + sw_left = procpar["procs"]["OFFSET"] + else: + sfrq = procpar["acqus"]["BF1"] + sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq + at = procpar["acqus"]["TD"] / (2 * sw_hz) + rt = at + d1 + td = procpar["tdelta"] + cumulative = procpar["tcum"] + single = procpar["tsingle"] + tstart = cumulative - 0.5 * single # tstart for acquisition + al = procpar["arraylength"] + a = procpar["arrayset"] + acqtime = numpy.zeros((al)) + acqtime[0] = tstart[a - 1] + for i in range(1, al): + acqtime[i] = acqtime[i - 1] + td + params = dict( + at=at, + d1=d1, + rt=rt, + nt=nt, + acqtime=acqtime, + sw=sw, + sw_hz=sw_hz, + sfrq=sfrq, + reffrq=reffrq, + sw_left=sw_left, + ) + return params + + +class Fid(Base): + """ + The basic FID (Free Induction Decay) class contains all the data for a single spectrum (:attr:`~nmrpy.data_objects.Fid.data`), and the + necessary methods to process these data. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fid_object = self.lib.FID() + self.data = kwargs.get("data", []) + self.peaks = None + self.ranges = None + self._deconvoluted_peaks = None + self._flags = { + "ft": False, + } + + def __str__(self): + return "FID: %s (%i data)" % (self.id, len(self.data)) + + @property + def fid_object(self): + return self.__fid_object + + @fid_object.setter + def fid_object(self, fid_object): + if isinstance(fid_object, DataModel): + self.__fid_object = fid_object + + @property + def data(self): + """ + The spectral data. This is the primary object upon which the processing and analysis functions work. + """ + return self.__data + + @data.setter + def data(self, data): + if Fid._is_valid_dataset(data): + self.__data = numpy.array(data) + # for _ in self.__data: + # if type(_) is not None: + # self.fid_object.data.append(float(_)) + + @property + def _ppm(self): + """ + Index of :attr:`~nmrpy.data_objects.Fid.data` in ppm (parts per million). + """ + if self._params is not None and self.data is not None: + return numpy.linspace( + self._params["sw_left"] - self._params["sw"], + self._params["sw_left"], + len(self.data), + )[::-1] + else: + return None + + @property + def peaks(self): + """ + Picked peaks for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. + """ + return self._peaks + + @peaks.setter + def peaks(self, peaks): + if peaks is not None: + if not Fid._is_flat_iter(peaks): + raise AttributeError("peaks must be a flat iterable") + if not all(isinstance(i, numbers.Number) for i in peaks): + raise AttributeError("peaks must be numbers") + self._peaks = numpy.array(peaks) + else: + self._peaks = peaks + + @property + def ranges(self): + """ + Picked ranges for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. + """ + return self._ranges + + @ranges.setter + def ranges(self, ranges): + if ranges is None: + self._ranges = None + return + if not Fid._is_iter_of_iters(ranges) or ranges is None: + raise AttributeError( + "ranges must be an iterable of iterables or None" + ) + ranges = numpy.array(ranges) + if ranges.shape[1] != 2: + raise AttributeError( + "ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]" + ) + for r in ranges: + if not all(isinstance(i, numbers.Number) for i in r): + raise AttributeError("ranges must be numbers") + self._ranges = ranges + + @property + def _bl_ppm(self): + return self.__bl_ppm + + @_bl_ppm.setter + def _bl_ppm(self, bl_ppm): + if bl_ppm is not None: + if not Fid._is_flat_iter(bl_ppm): + raise AttributeError( + "baseline indices must be a flat iterable" + ) + if len(bl_ppm) > 0: + if not all(isinstance(i, numbers.Number) for i in bl_ppm): + raise AttributeError("baseline indices must be numbers") + self.__bl_ppm = numpy.sort(list(set(bl_ppm)))[::-1] + else: + self.__bl_ppm = None + else: + self.__bl_ppm = bl_ppm + + @property + def _bl_indices(self): + if self._bl_ppm is not None: + return self._conv_to_index( + self.data, + self._bl_ppm, + self._params["sw_left"], + self._params["sw"], + ) + else: + return None + + @property + def _bl_poly(self): + return self.__bl_poly + + @_bl_poly.setter + def _bl_poly(self, bl_poly): + if bl_poly is not None: + if not Fid._is_flat_iter(bl_poly): + raise AttributeError( + "baseline polynomial must be a flat iterable" + ) + if not all(isinstance(i, numbers.Number) for i in bl_poly): + raise AttributeError("baseline polynomial must be numbers") + self.__bl_poly = numpy.array(bl_poly) + else: + self.__bl_ppm = bl_poly + + @property + def _index_peaks(self): + """ + :attr:`~nmrpy.data_objects.Fid.peaks` converted to indices rather than ppm + """ + if self.peaks is not None: + return self._conv_to_index( + self.data, + self.peaks, + self._params["sw_left"], + self._params["sw"], + ) + else: + return [] + + @property + def _index_ranges(self): + """ + :attr:`~nmrpy.data_objects.Fid.ranges` converted to indices rather than ppm + """ + if self.ranges is not None: + shp = self.ranges.shape + index_ranges = self._conv_to_index( + self.data, + self.ranges.flatten(), + self._params["sw_left"], + self._params["sw"], + ) + return index_ranges.reshape(shp) + else: + return [] + + @property + def _grouped_peaklist(self): + """ + :attr:`~nmrpy.data_objects.Fid.peaks` grouped according to :attr:`~nmrpy.data_objects.Fid.ranges` + """ + if self.ranges is not None: + return numpy.array( + [ + [ + peak + for peak in self.peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self.ranges + ], + dtype=object, + ) + else: + return [] + + @property + def _grouped_index_peaklist(self): + """ + :attr:`~nmrpy.data_objects.Fid._index_peaks` grouped according to :attr:`~nmrpy.data_objects.Fid._index_ranges` + """ + if self._index_ranges is not None: + return numpy.array( + [ + [ + peak + for peak in self._index_peaks + if peak > min(peak_range) and peak < max(peak_range) + ] + for peak_range in self._index_ranges + ], + dtype=object, + ) + else: + return [] + + @property + def _deconvoluted_peaks(self): + return self.__deconvoluted_peaks + + @_deconvoluted_peaks.setter + def _deconvoluted_peaks(self, deconvoluted_peaks): + """This is a list of lists of peak parameters with the order [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss]: + + offset: spectral offset + + gauss_sigma: Gaussian sigma + + lorentz_hwhm: Lorentzian half-width-at-half-maximum + + amplitude: height of peak + + frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) + """ + self.__deconvoluted_peaks = deconvoluted_peaks + + @property + def deconvoluted_integrals(self): + """ + An array of integrals for each deconvoluted peak. + """ + if self._deconvoluted_peaks is not None: + integrals = [] + for peak in self._deconvoluted_peaks: + int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) + int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int( + peak[3], peak[2] + ) + integrals.append(int_gauss + int_lorentz) + return integrals + + def _get_plots(self): + """ + Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. + """ + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] + return plots + + def _del_plots(self): + """ + Deletes all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. + """ + plots = self._get_plots() + for plot in plots: + delattr(self, plot.id) + + def _get_widgets(self): + """ + Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. + """ + widgets = [ + id + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Phaser) + or isinstance(self.__dict__[id], Calibrator) + or isinstance(self.__dict__[id], DataPeakSelector) + or isinstance(self.__dict__[id], FidRangeSelector) + ] + return widgets + + def _del_widgets(self): + """ + Deletes all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. + """ + widgets = self._get_widgets() + for w in widgets: + delattr(self, w) + + @classmethod + def _is_valid_dataset(cls, data): + if isinstance(data, str): + raise TypeError("Data must be iterable not a string.") + if not cls._is_iter(data): + raise TypeError("Data must be an iterable.") + if not cls._is_flat_iter(data): + raise TypeError("Data must not be nested.") + if not all(isinstance(i, numbers.Number) for i in data): + raise TypeError("Data must consist of numbers only.") + return True + + @classmethod + def from_data(cls, data): + """ + + Instantiate a new :class:`~nmrpy.data_objects.Fid` object by providing a + spectral data object as argument. Eg. :: + + fid = Fid.from_data(data) + """ + new_instance = cls() + new_instance.data = data + return new_instance + + def zf(self): + """ + + Apply a single degree of zero-filling to data array + :attr:`~nmrpy.data_objects.Fid.data`. + + Note: extends data to double length by appending zeroes. This results + in an artificially increased resolution once Fourier-transformed. + + """ + self.data = numpy.append(self.data, 0 * self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) + + def emhz(self, lb=5.0): + """ + + Apply exponential line-broadening to data array + :attr:`~nmrpy.data_objects.Fid.data`. + + :keyword lb: degree of line-broadening in Hz. + + """ + self.data = ( + numpy.exp( + -numpy.pi + * numpy.arange(len(self.data)) + * (lb / self._params["sw_hz"]) + ) + * self.data + ) + for _ in self.data: + self.fid_object.data.append(float(_)) + + def real(self): + """ + Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. + """ + self.data = numpy.real(self.data) + for _ in self.data: + self.fid_object.data.append(float(_)) + + # GENERAL FUNCTIONS + def ft(self): + """ + Fourier Transform the data array :attr:`~nmrpy.data_objects.Fid.data`. + + Calculates the Discrete Fourier Transform using the Fast Fourier + Transform algorithm as implemented in NumPy (*Cooley, James W., and John W. + Tukey, 1965, 'An algorithm for the machine calculation of complex Fourier + series,' Math. Comput. 19: 297-301.*) + + """ + if self._flags["ft"]: + raise ValueError("Data have already been Fourier Transformed.") + if Fid._is_valid_dataset(self.data): + list_params = (self.data, self._file_format) + self.data = Fid._ft(list_params) + for _ in self.data: + self.fid_object.data.append(float(_)) + self._flags["ft"] = True + + @classmethod + def _ft(cls, list_params): + """ + Class method for Fourier-transforming data using multiprocessing. + list_params is a tuple of (, ). + """ + if len(list_params) != 2: + raise ValueError( + "Wrong number of parameters. list_params must contain [, ]" + ) + data, file_format = list_params + if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: + data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) + s = len(data) + if file_format == "varian" or file_format == None: + ft_data = numpy.append( + data[int(s / 2.0) :], data[: int(s / 2.0)] + ) + if file_format == "bruker": + ft_data = numpy.append( + data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] + ) + return ft_data + + @staticmethod + def _conv_to_ppm(data, index, sw_left, sw): + """ + Convert index array to ppm. + """ + if isinstance(index, list): + index = numpy.array(index) + frc_sw = index / float(len(data)) + ppm = sw_left - sw * frc_sw + if Fid._is_iter(ppm): + return numpy.array([round(i, 2) for i in ppm]) + else: + return round(ppm, 2) + + @staticmethod + def _conv_to_index(data, ppm, sw_left, sw): + """ + Convert ppm array to index. + """ + conv_to_int = False + if not Fid._is_iter(ppm): + ppm = [ppm] + conv_to_int = True + if isinstance(ppm, list): + ppm = numpy.array(ppm) + if any(ppm > sw_left) or any(ppm < sw_left - sw): + raise ValueError("ppm must be within spectral width.") + indices = len(data) * (sw_left - ppm) / sw + if conv_to_int: + return int(numpy.ceil(indices)) + return numpy.array(numpy.ceil(indices), dtype=int) + + def phase_correct(self, method="leastsq"): + """ + + Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising + total absolute area. + + :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: + + Nelder-Mead (nelder) + + L-BFGS-B (l-bfgs-b) + + Conjugate Gradient (cg) + + Powell (powell) + + Newton-CG (newton) + """ + if self.data.dtype not in self._complex_dtypes: + raise TypeError("Only complex data can be phase-corrected.") + if not self._flags["ft"]: + raise ValueError( + "Only Fourier-transformed data can be phase-corrected." + ) + print("phasing: %s" % self.id) + self.data = Fid._phase_correct((self.data, method)) + for _ in self.data: + self.fid_object.data.append(float(_)) + + @classmethod + def _phase_correct(cls, list_params): + """ + Class method for phase-correction using multiprocessing. + list_params is a tuple of (, ). + """ + data, method = list_params + p = lmfit.Parameters() + p.add_many( + ("p0", 1.0, True), + ("p1", 0.0, True), + ) + mz = lmfit.minimize( + Fid._phased_data_sum, p, args=([data]), method=method + ) + phased_data = Fid._ps( + data, p0=mz.params["p0"].value, p1=mz.params["p1"].value + ) + # data model + if abs(phased_data.min()) > abs(phased_data.max()): + phased_data *= -1 + if sum(phased_data) < 0.0: + phased_data *= -1 + print("%d\t%d" % (mz.params["p0"].value, mz.params["p1"].value)) + return phased_data + + @classmethod + def _phased_data_sum(cls, pars, data): + err = Fid._ps(data, p0=pars["p0"].value, p1=pars["p1"].value).real + return numpy.array([abs(err).sum()] * 2) + + @classmethod + def _ps(cls, data, p0=0.0, p1=0.0): + """ + Linear phase correction + + :keyword p0: Zero order phase in degrees. + + :keyword p1: First order phase in degrees. + + """ + if not all(isinstance(i, (float, int)) for i in [p0, p1]): + raise TypeError("p0 and p1 must be floats or ints.") + if not data.dtype in Fid._complex_dtypes: + raise TypeError("data must be complex.") + # convert to radians + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 + size = len(data) + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + return ph * data + + def ps(self, p0=0.0, p1=0.0): + """ + Linear phase correction of :attr:`~nmrpy.data_objects.Fid.data` + + :keyword p0: Zero order phase in degrees + + :keyword p1: First order phase in degrees + + """ + if not all(isinstance(i, (float, int)) for i in [p0, p1]): + raise TypeError("p0 and p1 must be floats or ints.") + if not self.data.dtype in self._complex_dtypes: + raise TypeError("data must be complex.") + # convert to radians + p0 = p0 * numpy.pi / 180.0 + p1 = p1 * numpy.pi / 180.0 + size = len(self.data) + ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) + self.data = ph * self.data + for _ in self.data: + self.fid_object.data.append(float(_)) + + def phaser(self): + """ + Instantiate a phase-correction GUI widget which applies to :attr:`~nmrpy.data_objects.Fid.data`. + """ + if not len(self.data): + raise AttributeError("data does not exist.") + if self.data.dtype not in self._complex_dtypes: + raise TypeError("data must be complex.") + if not Fid._is_flat_iter(self.data): + raise AttributeError("data must be 1 dimensional.") + global _phaser_widget + self._phaser_widget = Phaser(self) + + def calibrate(self): + """ + Instantiate a GUI widget to select a peak and calibrate spectrum. + Left-clicking selects a peak. The user is then prompted to enter + the PPM value of that peak for calibration. + """ + plot_label = """ +Left - select peak +""" + plot_title = "Calibration {}".format(self.id) + self._calibrate_widget = Calibrator( + self, + title=plot_title, + label=plot_label, + ) + + def baseline_correct(self, deg=2): + """ + + Perform baseline correction by fitting specified baseline points + (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) with polynomial of specified + degree (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) and subtract this + polynomial from :attr:`~nmrpy.data_objects.Fid.data`. + + + :keyword deg: degree of fitted polynomial + """ + + if self._bl_indices is None: + raise AttributeError( + "No points selected for baseline correction. Run fid.baseliner()" + ) + if not len(self.data): + raise AttributeError("data does not exist.") + if self.data.dtype in self._complex_dtypes: + raise TypeError("data must not be complex.") + if not Fid._is_flat_iter(self.data): + raise AttributeError("data must be 1 dimensional.") + + data = self.data + x = numpy.arange(len(data)) + m = numpy.ones_like(x) + m[self._bl_indices] = 0 + self._bl_poly = [] + ym = numpy.ma.masked_array(data, m) + xm = numpy.ma.masked_array(x, m) + p = numpy.ma.polyfit(xm, ym, deg) + yp = scipy.polyval(p, x) + self._bl_poly = yp + data_bl = data - yp + self.data = numpy.array(data_bl) + for _ in self.data: + self.fid_object.data.append(float(_)) + + def peakpick(self, thresh=0.1): + """ + + Attempt to automatically identify peaks. Picked peaks are assigned to + :attr:`~nmrpy.data_objects.Fid.peaks`. + + :keyword thresh: fractional threshold for peak-picking + """ + peaks_ind = nmrglue.peakpick.pick(self.data, thresh * self.data.max()) + peaks_ind = [i[0] for i in peaks_ind] + peaks_ppm = Fid._conv_to_ppm( + self.data, peaks_ind, self._params["sw_left"], self._params["sw"] + ) + self.peaks = peaks_ppm + print(self.peaks) + + def peakpicker(self): + """ + Instantiate a peak-picking GUI widget. Left-clicking selects a peak. + Right-click-dragging defines a range. Ctrl-left click deletes nearest peak; + ctrl-right click deletes range. Peaks are stored in + :attr:`~nmrpy.data_objects.Fid.peaks`; ranges are stored in + :attr:`~nmrpy.data_objects.Fid.ranges`: both are used for deconvolution (see + :meth:`~nmrpy.data_objects.Fid.deconv`). + + """ + plot_label = """ +Left - select peak +Ctrl+Left - delete nearest peak +Drag Right - select range +Ctrl+Right - delete range +Ctrl+Alt+Right - assign +""" + plot_title = "Peak-picking {}".format(self.id) + self._peakpicker_widget = DataPeakSelector( + self, + title=plot_title, + label=plot_label, + ) + + def clear_peaks(self): + """ + Clear peaks stored in :attr:`~nmrpy.data_objects.Fid.peaks`. + """ + self.peaks = None + + def clear_ranges(self): + """ + Clear ranges stored in :attr:`~nmrpy.data_objects.Fid.ranges`. + """ + self.ranges = None + + def baseliner(self): + """ + Instantiate a baseline-correction GUI widget. Right-click-dragging + defines a range. Ctrl-Right click deletes previously selected range. Indices + selected are stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`, which is used + for baseline-correction (see + :meth:`~nmrpy.data_objects.Fid.baseline_correction`). + + """ + plot_label = """ +Drag Right - select range +Ctrl+Right - delete range +Ctrl+Alt+Right - assign +""" + plot_title = "Baseline correction {}".format(self.id) + self._baseliner_widget = FidRangeSelector( + self, + title=plot_title, + label=plot_label, + ) + + @classmethod + def _f_gauss(cls, offset, amplitude, gauss_sigma, x): + return amplitude * numpy.exp( + -((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0) + ) + + @classmethod + def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): + # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) + return ( + amplitude + * lorentz_hwhm**2.0 + / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) + ) + + @classmethod + def _f_gauss_int(cls, amplitude, gauss_sigma): + return amplitude * numpy.sqrt(2.0 * numpy.pi * gauss_sigma**2.0) + + @classmethod + def _f_lorentz_int(cls, amplitude, lorentz_hwhm): + # empirical integral commented out + # x = numpy.arange(1000*lorentz_hwhm) + # return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) + # this integral forumula from http://magicplot.com/wiki/fit_equations + return amplitude * lorentz_hwhm * numpy.pi + + @classmethod + def _f_pk( + cls, + x, + offset=0.0, + gauss_sigma=1.0, + lorentz_hwhm=1.0, + amplitude=1.0, + frac_gauss=0.0, + ): + """ + + Return the a combined Gaussian/Lorentzian peakshape for deconvolution + of :attr:`~nmrpy.data_objects.Fid.data`. + + :arg x: array of equal length to :attr:`~nmrpy.data_objects.Fid.data` + + + :keyword offset: spectral offset in x + + :keyword gauss_sigma: 2*sigma**2 specifying the width of the Gaussian peakshape + + :keyword lorentz_hwhm: Lorentzian half width at half maximum height + + :keyword amplitude: amplitude of peak + + :keyword frac_gauss: fraction of function to be Gaussian (0 -> 1). Note: + specifying a Gaussian fraction of 0 will produce a pure Lorentzian and vice + versa.""" + + # validation + parameters = [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss] + if not all(isinstance(i, numbers.Number) for i in parameters): + raise TypeError("Keyword parameters must be numbers.") + if not cls._is_iter(x): + raise TypeError("x must be an iterable") + if not isinstance(x, numpy.ndarray): + x = numpy.array(x) + if frac_gauss > 1.0: + frac_gauss = 1.0 + if frac_gauss < 0.0: + frac_gauss = 0.0 + + gauss_peak = cls._f_gauss(offset, amplitude, gauss_sigma, x) + lorentz_peak = cls._f_lorentz(offset, amplitude, lorentz_hwhm, x) + peak = frac_gauss * gauss_peak + (1 - frac_gauss) * lorentz_peak + + return peak + + @classmethod + def _f_makep(cls, data, peaks, frac_gauss=None): + """ + Make a set of initial peak parameters for deconvolution. + + + :arg data: data to be fitted + + :arg peaks: selected peak positions (see peakpicker()) + + :returns: an array of peaks, each consisting of the following parameters: + + spectral offset (x) + + gauss: 2*sigma**2 + + lorentz: scale (HWHM) + + amplitude: amplitude of peak + + frac_gauss: fraction of function to be Gaussian (0 -> 1) + """ + if not cls._is_flat_iter(data): + raise TypeError("data must be a flat iterable") + if not cls._is_flat_iter(peaks): + raise TypeError("peaks must be a flat iterable") + if not isinstance(data, numpy.ndarray): + data = numpy.array(data) + + p = [] + for i in peaks: + pamp = 0.9 * abs(data[int(i)]) + single_peak = [i, 10, 0.1, pamp, frac_gauss] + p.append(single_peak) + return numpy.array(p) + + @classmethod + def _f_conv(cls, parameterset_list, data): + """ + Returns the maximum of a convolution of an initial set of lineshapes and the data to be fitted. + + parameterset_list -- a list of parameter lists: n*[[spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, + frac_gauss: fraction of function to be Gaussian (0 -> 1)]] + where n is the number of peaks + data -- 1D spectral array + + """ + + if not cls._is_flat_iter(data): + raise TypeError("data must be a flat iterable") + if not cls._is_iter(parameterset_list): + raise TypeError("parameterset_list must be an iterable") + if not isinstance(data, numpy.ndarray): + data = numpy.array(data) + + data[data == 0.0] = 1e-6 + x = numpy.arange(len(data), dtype="f8") + peaks_init = cls._f_pks(parameterset_list, x) + data_convolution = numpy.convolve(data, peaks_init[::-1]) + auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) + max_data_convolution = numpy.where( + data_convolution == data_convolution.max() + )[0][0] + max_auto_convolution = numpy.where( + auto_convolution == auto_convolution.max() + )[0][0] + return max_data_convolution - max_auto_convolution + + @classmethod + def _f_pks_list(cls, parameterset_list, x): + """ + Return a list of peak evaluations for deconvolution. See _f_pk(). + + Keyword arguments: + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, + frac_gauss: fraction of function to be Gaussian (0 -> 1)] + x -- array of equal length to FID + """ + if not cls._is_iter_of_iters(parameterset_list): + raise TypeError("Parameter set must be an iterable of iterables") + for p in parameterset_list: + if not cls._is_iter(p): + raise TypeError("Parameter set must be an iterable") + if not all(isinstance(i, numbers.Number) for i in p): + raise TypeError("Keyword parameters must be numbers.") + if not cls._is_iter(x): + raise TypeError("x must be an iterable") + if not isinstance(x, numpy.ndarray): + x = numpy.array(x) + return numpy.array([Fid._f_pk(x, *peak) for peak in parameterset_list]) + + @classmethod + def _f_pks(cls, parameterset_list, x): + """ + Return the sum of a series of peak evaluations for deconvolution. See _f_pk(). + + Keyword arguments: + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, + frac_gauss: fraction of function to be Gaussian (0 -> 1)] + x -- array of equal length to FID + """ + + if not cls._is_iter_of_iters(parameterset_list): + raise TypeError("Parameter set must be an iterable of iterables") + for p in parameterset_list: + if not cls._is_iter(p): + raise TypeError("Parameter set must be an iterable") + if not all(isinstance(i, numbers.Number) for i in p): + raise TypeError("Keyword parameters must be numbers.") + if not cls._is_iter(x): + raise TypeError("x must be an iterable") + if not isinstance(x, numpy.ndarray): + x = numpy.array(x) + + peaks = x * 0.0 + for p in parameterset_list: + peak = cls._f_pk( + x, + offset=p[0], + gauss_sigma=p[1], + lorentz_hwhm=p[2], + amplitude=p[3], + frac_gauss=p[4], + ) + peaks += peak + return peaks + + @classmethod + def _f_res(cls, p, data): + """ + Objective function for deconvolution. Returns residuals of the devonvolution fit. + + x -- array of equal length to FID + + Keyword arguments: + p -- lmfit parameters object: + offset_n -- spectral offset in x + sigma_n -- gaussian 2*sigma**2 + hwhm_n -- lorentzian half width at half maximum height + amplitude_n -- amplitude of peak + frac_gauss_n -- fraction of function to be Gaussian (0 -> 1) + where n is the peak number (zero-indexed) + data -- spectrum array + + """ + if not isinstance(p, lmfit.parameter.Parameters): + raise TypeError( + "Parameters must be of type lmfit.parameter.Parameters." + ) + if not cls._is_flat_iter(data): + raise TypeError("data must be a flat iterable.") + if not isinstance(data, numpy.ndarray): + data = numpy.array(data) + + params = Fid._parameters_to_list(p) + x = numpy.arange(len(data), dtype="f8") + res = data - cls._f_pks(params, x) + return res + + @classmethod + def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): + """Fit a section of spectral data with a combination of Gaussian/Lorentzian peaks for deconvolution. + + Keyword arguments: + peaks -- selected peak positions (see peakpicker()) + frac_gauss -- fraction of fitted function to be Gaussian (1 - Guassian, 0 - Lorentzian) + + returns: + fits -- list of fitted peak parameter sets + + Note: peaks are fitted by default using the Levenberg-Marquardt algorithm[1]. Other fitting algorithms are available (http://cars9.uchicago.edu/software/python/lmfit/fitting.html#choosing-different-fitting-methods). + + [1] Marquardt, Donald W. 'An algorithm for least-squares estimation of nonlinear parameters.' Journal of the Society for Industrial & Applied Mathematics 11.2 (1963): 431-441. + """ + data = numpy.real(data) + if not cls._is_flat_iter(data): + raise TypeError("data must be a flat iterable") + if not cls._is_flat_iter(peaks): + raise TypeError("peaks must be a flat iterable") + if any(peak > (len(data) - 1) for peak in peaks): + raise ValueError("peaks must be within the length of data.") + if not isinstance(data, numpy.ndarray): + data = numpy.array(data) + p = cls._f_makep(data, peaks, frac_gauss=0.5) + init_ref = cls._f_conv(p, data) + if any(peaks + init_ref < 0) or any(peaks + init_ref > len(data) - 1): + init_ref = 0 + if frac_gauss == None: + p = cls._f_makep(data, peaks + init_ref, frac_gauss=0.5) + else: + p = cls._f_makep(data, peaks + init_ref, frac_gauss=frac_gauss) + + params = lmfit.Parameters() + for parset in range(len(p)): + current_parset = dict( + zip( + ["offset", "sigma", "hwhm", "amplitude", "frac_gauss"], + p[parset], + ) + ) + for k, v in current_parset.items(): + par_name = "%s_%i" % (k, parset) + params.add(name=par_name, value=v, vary=True, min=0.0) + if "offset" in par_name: + params[par_name].max = len(data) - 1 + if "frac_gauss" in par_name: + params[par_name].max = 1.0 + if frac_gauss is not None: + params[par_name].vary = False + # if 'sigma' in par_name or 'hwhm' in par_name: + # params[par_name].max = 0.01*current_parset['amplitude'] + if "amplitude" in par_name: + params[par_name].max = 2.0 * data.max() + + try: + mz = lmfit.minimize( + cls._f_res, params, args=([data]), method=method + ) + fits = Fid._parameters_to_list(mz.params) + except: + fits = None + return fits + + @classmethod + def _parameters_to_list(cls, p): + n_pks = int(len(p) / 5) + params = [] + for i in range(n_pks): + current_params = [ + p["%s_%s" % (par, i)].value + for par in [ + "offset", + "sigma", + "hwhm", + "amplitude", + "frac_gauss", + ] + ] + params.append(current_params) + return params + + @classmethod + def _deconv_datum(cls, list_parameters): + if len(list_parameters) != 5: + raise ValueError("list_parameters must consist of five objects.") + if ( + type(list_parameters[1]) == list and len(list_parameters[1]) == 0 + ) or ( + type(list_parameters[2]) == list and len(list_parameters[2]) == 0 + ): + return [] + + datum, peaks, ranges, frac_gauss, method = list_parameters + + if not cls._is_iter_of_iters(ranges): + raise TypeError("ranges must be an iterable of iterables") + if not all(len(rng) == 2 for rng in ranges): + raise ValueError("ranges must contain two values.") + if not all(rng[0] != rng[1] for rng in ranges): + raise ValueError("data_index must contain different values.") + if not isinstance(datum, numpy.ndarray): + datum = numpy.array(datum) + if datum.dtype in cls._complex_dtypes: + raise TypeError("data must be not be complex.") + + fit = [] + for j in zip(peaks, ranges): + d_slice = datum[j[1][0] : j[1][1]] + p_slice = j[0] - j[1][0] + f = cls._f_fitp( + d_slice, p_slice, frac_gauss=frac_gauss, method=method + ) + f = numpy.array(f).transpose() + f[0] += j[1][0] + f = f.transpose() + fit.append(f) + return fit + + def deconv(self, method="leastsq", frac_gauss=0.0): + """ + + Deconvolute :attr:`~nmrpy.data_obects.Fid.data` object by fitting a + series of peaks to the spectrum. These peaks are generated using the parameters + in :attr:`~nmrpy.data_objects.Fid.peaks`. :attr:`~nmrpy.data_objects.Fid.ranges` + splits :attr:`~nmrpy.data_objects.Fid.data` up into smaller portions. This + significantly speeds up deconvolution time. + + :keyword frac_gauss: (0-1) determines the Gaussian fraction of the peaks. Setting this argument to None will fit this parameter as well. + + :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: + + Nelder-Mead (nelder) + + L-BFGS-B (l-bfgs-b) + + Conjugate Gradient (cg) + + Powell (powell) + + Newton-CG (newton) + + """ + + if not len(self.data): + raise AttributeError("data does not exist.") + if self.data.dtype in self._complex_dtypes: + raise TypeError("data must be not be complex.") + if self.peaks is None: + raise AttributeError("peaks must be picked.") + if self.ranges is None: + raise AttributeError("ranges must be specified.") + print("deconvoluting {}".format(self.id)) + list_parameters = [ + self.data, + self._grouped_index_peaklist, + self._index_ranges, + frac_gauss, + method, + ] + self._deconvoluted_peaks = numpy.array( + [j for i in Fid._deconv_datum(list_parameters) for j in i] + ) + print("deconvolution completed") + + def plot_ppm(self, **kwargs): + """ + Plot :attr:`~nmrpy.data_objects.Fid.data`. + + :keyword upper_ppm: upper spectral bound in ppm + + :keyword lower_ppm: lower spectral bound in ppm + + :keyword lw: linewidth of plot + + :keyword colour: colour of the plot + """ + plt = Plot() + plt._plot_ppm(self, **kwargs) + setattr(self, plt.id, plt) + pyplot.show() + + def plot_deconv(self, **kwargs): + """ + Plot :attr:`~nmrpy.data_objects.Fid.data` with deconvoluted peaks overlaid. + + :keyword upper_ppm: upper spectral bound in ppm + + :keyword lower_ppm: lower spectral bound in ppm + + :keyword lw: linewidth of plot + + :keyword colour: colour of the plot + + :keyword peak_colour: colour of the deconvoluted peaks + + :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks + """ + if not len(self._deconvoluted_peaks): + raise AttributeError("deconvolution not yet performed") + plt = Plot() + plt._plot_deconv(self, **kwargs) + setattr(self, plt.id, plt) + pyplot.show() + + +class FidArray(Base): + """ + + This object collects several :class:`~nmrpy.data_objects.Fid` objects into + an array, and it contains all the processing methods necessary for bulk + processing of these FIDs. It should be considered the parent object for any + project. The class methods :meth:`~nmrpy.data_objects.FidArray.from_path` and + :meth:`~nmrpy.data_objects.FidArray.from_data` will instantiate a new + :class:`~nmrpy.data_objects.FidArray` object from a Varian/Bruker .fid path or + an iterable of data respectively. Each :class:`~nmrpy.data_objects.Fid` object + in the array will appear as an attribute of + :class:`~nmrpy.data_objects.FidArray` with a unique ID of the form 'fidXX', + where 'XX' is an increasing integer . + + """ + + def __init__(self): + _now = str(datetime.now()) + self.data_model = self.lib.NMRpy( + datetime_created=_now, + datetime_modified=_now, + ) + del _now + self._force_pyenzyme = False + + @property + def force_pyenzyme(self): + return self._force_pyenzyme + + @force_pyenzyme.setter + def force_pyenzyme(self): + raise PermissionError("Forbidden!") + + @force_pyenzyme.deleter + def force_pyenzyme(self): + raise PermissionError("Forbidden!") + + @property + def data_model(self): + return self.__data_model + + @data_model.setter + def data_model(self, data_model: DataModel): + if not isinstance(data_model, DataModel): + raise AttributeError( + f"Parameter `data_model` has to be of type `sdrdm.DataModel`, got {type(data_model)} instead." + ) + self.__data_model = data_model + self.__data_model.datetime_modified = str(datetime.now()) + + @data_model.deleter + def data_model(self): + del self.__data_model + print("The current data model has been deleted.") + + def __str__(self): + return "FidArray of {} FID(s)".format(len(self.data)) + + def get_fid(self, id): + """ + Return an :class:`~nmrpy.data_objects.Fid` object owned by this object, identified by unique ID. Eg.:: + + fid12 = fid_array.get_fid('fid12') + + :arg id: a string id for an :class:`~nmrpy.data_objects.Fid` + """ + try: + return getattr(self, id) + except AttributeError: + print("{} does not exist.".format(id)) + + def get_fids(self): + """ + Return a list of all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. + """ + fids = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Fid) + ] + return fids + + def _get_plots(self): + """ + Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. + """ + plots = [ + self.__dict__[id] + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Plot) + ] + return plots + + def _del_plots(self): + """ + Deletes all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. + """ + plots = self._get_plots() + for plot in plots: + delattr(self, plot.id) + + def _get_widgets(self): + """ + Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.FidArray`. + """ + widgets = [ + id + for id in sorted(self.__dict__) + if isinstance(self.__dict__[id], Phaser) + or isinstance(self.__dict__[id], RangeCalibrator) + or isinstance(self.__dict__[id], DataPeakRangeSelector) + or isinstance(self.__dict__[id], FidArrayRangeSelector) + or isinstance(self.__dict__[id], DataTraceRangeSelector) + or isinstance(self.__dict__[id], DataTraceSelector) + ] + return widgets + + def _del_widgets(self): + """ + Deletes all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. + """ + widgets = self._get_widgets() + for w in widgets: + delattr(self, w) + + @property + def data(self): + """ + An array of all :attr:`~nmrpy.data_objects.Fid.data` objects belonging to the :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. + """ + data = numpy.array([fid.data for fid in self.get_fids()]) + return data + + @property + def t(self): + """ + An array of the acquisition time for each FID. + """ + nfids = len(self.get_fids()) + t = None + if nfids > 0: + try: + t = self._params["acqtime"] + except: + t = numpy.arange(len(self.get_fids())) + return t + + @property + def deconvoluted_integrals(self): + """ + Collected :class:`~nmrpy.data_objects.Fid.deconvoluted_integrals` + """ + deconvoluted_integrals = [] + for fid in self.get_fids(): + deconvoluted_integrals.append(fid.deconvoluted_integrals) + return numpy.array(deconvoluted_integrals) + + @property + def _deconvoluted_peaks(self): + """ + Collected :class:`~nmrpy.data_objects.Fid._deconvoluted_peaks` + """ + deconvoluted_peaks = [] + for fid in self.get_fids(): + try: + deconvoluted_peaks.append(fid._deconvoluted_peaks) + except: + deconvoluted_peaks.append([]) + return numpy.array(deconvoluted_peaks) + + def add_fid(self, fid): + """ + Add an :class:`~nmrpy.data_objects.Fid` object to this :class:`~nmrpy.data_objects.FidArray`, using a unique id. + + :arg fid: an :class:`~nmrpy.data_objects.Fid` instance + """ + if isinstance(fid, Fid): + setattr(self, fid.id, fid) + else: + raise AttributeError("FidArray requires Fid object.") + + def del_fid(self, fid_id): + """ + Delete an :class:`~nmrpy.data_objects.Fid` object belonging to this :class:`~nmrpy.data_objects.FidArray`, using a unique id. + + :arg fid_id: a string id for an :class:`~nmrpy.data_objects.Fid` + """ + if hasattr(self, fid_id): + if isinstance(getattr(self, fid_id), Fid): + fids = [f.id for f in self.get_fids()] + idx = fids.index(fid_id) + delattr(self, fid_id) + if hasattr(self, "_params") and self._params is not None: + at = list(self._params["acqtime"]) + at.pop(idx) + self._params["acqtime"] = at + else: + raise AttributeError("{} is not an FID object.".format(fid_id)) + else: + raise AttributeError("FID {} does not exist.".format(fid_id)) + + def add_fids(self, fids): + """ + Add a list of :class:`~nmrpy.data_objects.Fid` objects to this :class:`~nmrpy.data_objects.FidArray`. + + :arg fids: a list of :class:`~nmrpy.data_objects.Fid` instances + """ + if FidArray._is_iter(fids): + num_fids = len(fids) + zero_fill = str(len(str(num_fids))) + for fid_index in range(num_fids): + try: + fid = fids[fid_index] + id_str = "fid{0:0" + zero_fill + "d}" + fid.id = id_str.format(fid_index) + self.add_fid(fid) + except AttributeError as e: + print(e) + + @classmethod + def from_data(cls, data): + """ + Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a 2D data set of spectral arrays. + + :arg data: a 2D data array + """ + if not cls._is_iter_of_iters(data): + raise TypeError("data must be an iterable of iterables.") + fid_array = cls() + fids = [] + for fid_index, datum in zip(range(len(data)), data): + fid_id = "fid%i" % fid_index + fid = Fid(id=fid_id, data=datum) + fids.append(fid) + fid_array.add_fids(fids) + return fid_array + + @classmethod + def from_path(cls, fid_path=".", file_format=None, arrayset=None): + """ + Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a .fid directory. + + :keyword fid_path: filepath to .fid directory + + :keyword file_format: 'varian' or 'bruker', usually unnecessary + + :keyword arrayset: (int) array set for interleaved spectra, + user is prompted if not specified + """ + if not file_format: + try: + with open(fid_path, "rb") as f: + return pickle.load(f) + except: + print("Not NMRPy data file.") + importer = Importer(fid_path=fid_path) + importer.import_fid(arrayset=arrayset) + elif file_format == "varian": + importer = VarianImporter(fid_path=fid_path) + importer.import_fid() + elif file_format == "bruker": + importer = BrukerImporter(fid_path=fid_path) + importer.import_fid(arrayset=arrayset) + elif file_format == "nmrpy": + with open(fid_path, "rb") as f: + return pickle.load(f) + + if cls._is_iter(importer.data): + fid_array = cls.from_data(importer.data) + fid_array._file_format = importer._file_format + fid_array.fid_path = fid_path + fid_array._procpar = importer._procpar + for fid in fid_array.get_fids(): + fid._file_format = fid_array._file_format + fid.fid_path = fid_array.fid_path + fid._procpar = fid_array._procpar + return fid_array + else: + raise IOError("Data could not be imported.") + + def zf_fids(self): + """ + Zero-fill all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` + """ + for fid in self.get_fids(): + fid.zf() + + def emhz_fids(self, lb=5.0): + """ + Apply line-broadening (apodisation) to all :class:`nmrpy.~data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` + + :keyword lb: degree of line-broadening in Hz. + """ + for fid in self.get_fids(): + fid.emhz(lb=lb) + + def ft_fids(self, mp=True, cpus=None): + """ + Fourier-transform all FIDs. + + :keyword mp: parallelise over multiple processors, significantly reducing computation time + + :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True + """ + if mp: + fids = self.get_fids() + list_params = [[fid.data, fid._file_format] for fid in fids] + ft_data = self._generic_mp(Fid._ft, list_params, cpus) + for fid, datum in zip(fids, ft_data): + fid.data = datum + fid._flags["ft"] = True + else: + for fid in self.get_fids(): + fid.ft() + print("Fourier-transformation completed") + + def real_fids(self): + """ + Discard imaginary component of FID data sets. + + """ + for fid in self.get_fids(): + fid.real() + + def norm_fids(self): + """ + Normalise FIDs by maximum data value in :attr:`~nmrpy.data_objects.FidArray.data`. + + """ + dmax = self.data.max() + for fid in self.get_fids(): + fid.data = fid.data / dmax + + def phase_correct_fids(self, method="leastsq", mp=True, cpus=None): + """ + Apply automatic phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` + + :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` + + :keyword mp: parallelise the phasing process over multiple processors, significantly reducing computation time + + :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True + """ + if mp: + fids = self.get_fids() + if not all(fid.data.dtype in self._complex_dtypes for fid in fids): + raise TypeError("Only complex data can be phase-corrected.") + if not all(fid._flags["ft"] for fid in fids): + raise ValueError( + "Only Fourier-transformed data can be phase-corrected." + ) + list_params = [[fid.data, method] for fid in fids] + phased_data = self._generic_mp( + Fid._phase_correct, list_params, cpus + ) + for fid, datum in zip(fids, phased_data): + fid.data = datum + else: + for fid in self.get_fids(): + fid.phase_correct(method=method) + print("phase-correction completed") + + def baseliner_fids(self): + """ + + Instantiate a baseline-correction GUI widget. Right-click-dragging + defines a range. Ctrl-Right click deletes previously selected range. Indices + selected are stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`, which is used + for baseline-correction (see + :meth:`~nmrpy.data_objects.Fid.baseline_correction`). + + """ + plot_label = """ +Drag Right - select range +Ctrl+Right - delete range +Ctrl+Alt+Right - assign +""" + plot_title = "Select data for baseline-correction" + self._baseliner_widget = FidArrayRangeSelector( + self, title=plot_title, label=plot_label, voff=0.01 + ) + + def baseline_correct_fids(self, deg=2): + """ + Apply baseline-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` + + :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) + """ + for fid in self.get_fids(): + try: + fid.baseline_correct(deg=deg) + except: + print( + "failed for {}. Perhaps first run baseliner_fids()".format( + fid.id + ) + ) + print("baseline-correction completed") + + @property + def _data_traces(self): + return self.__data_traces + + @_data_traces.setter + def _data_traces(self, data_traces): + self.__data_traces = data_traces + + @property + def _index_traces(self): + return self.__index_traces + + @_index_traces.setter + def _index_traces(self, index_traces): + self.__index_traces = index_traces + + @property + def _trace_mask(self): + return self.__trace_mask + + @_trace_mask.setter + def _trace_mask(self, trace_mask): + self.__trace_mask = trace_mask + + @property + def _trace_mean_ppm(self): + return self.__trace_mean_ppm + + @_trace_mean_ppm.setter + def _trace_mean_ppm(self, trace_mean_ppm): + trace_mean_ppm + self.__trace_mean_ppm = trace_mean_ppm + + @property + def integral_traces(self): + """ + Returns the dictionary of integral traces generated by + :meth:`~nmrpy.FidArray.select_integral_traces`. + """ + return self._integral_traces + + @integral_traces.setter + def integral_traces(self, integral_traces): + self._integral_traces = integral_traces + + def deconv_fids( + self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0 + ): + """ + Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. + + :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` + + :keyword mp: parallelise the phasing process over multiple processors, significantly reduces computation time + + :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True, default is n-1 cores + """ + if mp: + fids = self.get_fids() + if not all(fid._flags["ft"] for fid in fids): + raise ValueError( + "Only Fourier-transformed data can be deconvoluted." + ) + list_params = [ + [ + fid.data, + fid._grouped_index_peaklist, + fid._index_ranges, + frac_gauss, + method, + ] + for fid in fids + ] + deconv_datum = self._generic_mp( + Fid._deconv_datum, list_params, cpus + ) + for fid, datum in zip(fids, deconv_datum): + fid._deconvoluted_peaks = numpy.array( + [j for i in datum for j in i] + ) + else: + for fid in self.get_fids(): + fid.deconv(frac_gauss=frac_gauss) + print("deconvolution completed") + + def get_masked_integrals(self): + """ + After peakpicker_traces() and deconv_fids() this function returns a masked integral array. + """ + result = [] + try: + ints = [list(i) for i in self.deconvoluted_integrals] + for i in self._trace_mask: + ints_current = numpy.zeros_like(i, dtype="f8") + for j in range(len(i)): + if i[j] != -1: + ints_current[j] = ints[j].pop(0) + result.append(ints_current) + except AttributeError: + print("peakpicker_traces() or deconv_fids() probably not yet run.") + return result + + def ps_fids(self, p0=0.0, p1=0.0): + """ + Apply manual phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` + + :keyword p0: Zero order phase in degrees + + :keyword p1: First order phase in degrees + """ + for fid in self.get_fids(): + fid.ps(p0=p0, p1=p1) + + @staticmethod + def _generic_mp(fcn, iterable, cpus): + if cpus is None: + cpus = cpu_count() - 1 + proc_pool = Pool(cpus) + result = proc_pool.map(fcn, iterable) + proc_pool.close() + proc_pool.join() + return result + + def plot_array(self, **kwargs): + """ + Plot :attr:`~nmrpy.data_objects.FidArray.data`. + + :keyword upper_index: upper index of array (None) + + :keyword lower_index: lower index of array (None) + + :keyword upper_ppm: upper spectral bound in ppm (None) + + :keyword lower_ppm: lower spectral bound in ppm (None) + + :keyword lw: linewidth of plot (0.5) + + :keyword azim: starting azimuth of plot (-90) + + :keyword elev: starting elevation of plot (40) + + :keyword filled: True=filled vertices, False=lines (False) + + :keyword show_zticks: show labels on z axis (False) + + :keyword labels: under development (None) + + :keyword colour: plot spectra with colour spectrum, False=black (True) + + :keyword filename: save plot to .pdf file (None) + """ + plt = Plot() + plt._plot_array(self.data, self._params, **kwargs) + setattr(self, plt.id, plt) + + def plot_deconv_array(self, **kwargs): + """ + Plot all :attr:`~nmrpy.data_objects.Fid.data` with deconvoluted peaks overlaid. + + :keyword upper_index: upper index of Fids to plot + + :keyword lower_index: lower index of Fids to plot + + :keyword upper_ppm: upper spectral bound in ppm + + :keyword lower_ppm: lower spectral bound in ppm + + :keyword data_colour: colour of the plotted data ('k') + + :keyword summed_peak_colour: colour of the plotted summed peaks ('r') + + :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks ('g') + + :keyword data_filled: fill state of the plotted data (False) + + :keyword summed_peak_filled: fill state of the plotted summed peaks (True) + + :keyword residual_filled: fill state of the plotted residuals (False) + + :keyword figsize: [x, y] size of plot ([15, 7.5]) + + :keyword lw: linewidth of plot (0.3) + + :keyword azim: azimuth of 3D axes (-90) + + :keyword elev: elevation of 3D axes (20) + + + """ + plt = Plot() + plt._plot_deconv_array(self.get_fids(), **kwargs) + setattr(self, plt.id, plt) + + def calibrate( + self, fid_number=None, assign_only_to_index=False, voff=0.02 + ): + """ + Instantiate a GUI widget to select a peak and calibrate + spectra in a :class:`~nmrpy.data_objects.FidArray`. + Left-clicking selects a peak. The user is then prompted to enter + the PPM value of that peak for calibration; this will be applied + to all :class:`~nmrpy.data_objects.Fid` + objects owned by this :class:`~nmrpy.data_objects.FidArray`. See + also :meth:`~nmrpy.data_objects.Fid.calibrate`. + + :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for calibration. If None, the whole data array is plotted. + + :keyword assign_only_to_index: if True, assigns calibration only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number; if False, assigns to all. + + :keyword voff: vertical offset for spectra + """ + plot_label = """ +Left - select peak +""" + self._calibrate_widget = RangeCalibrator( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) + + def peakpicker( + self, fid_number=None, assign_only_to_index=True, voff=0.02 + ): + """ + + Instantiate peak-picker widget for + :attr:`~nmrpy.data_objects.Fid.data`, and apply selected + :attr:`~nmrpy.data_objects.Fid.peaks` and + :attr:`~nmrpy.data_objects.Fid.ranges` to all :class:`~nmrpy.data_objects.Fid` + objects owned by this :class:`~nmrpy.data_objects.FidArray`. See + :meth:`~nmrpy.data_objects.Fid.peakpicker`. + + :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for peak-picking. If None, data array is plotted. + + :keyword assign_only_to_index: if True, assigns selections only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number, if False, assigns to all. + + :keyword voff: vertical offset for spectra + """ + + plot_label = """ +Left - select peak +Ctrl+Left - delete nearest peak +Drag Right - select range +Ctrl+Right - delete range +Ctrl+Alt+Right - assign +""" + self._peakpicker_widget = DataPeakRangeSelector( + self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) + + def peakpicker_traces(self, voff=0.02, lw=1): + """ + Instantiates a widget to pick peaks and ranges employing a polygon + shape (or 'trace'). This is useful for picking peaks that are subject to drift and peaks + that appear (or disappear) during the course of an experiment. + + :keyword voff: vertical offset fraction (0.01) + + :keyword lw: linewidth of plot (1) + + """ + if self.data is None: + raise AttributeError("No FIDs.") + plot_label = """ +Left - add trace point +Right - finalize trace +Ctrl+Left - delete nearest trace +Drag Right - select range +Ctrl+Right - delete range +Ctrl+Alt+Right - assign +""" + self._peakpicker_widget = DataTraceRangeSelector( + self, + voff=voff, + lw=lw, + label=plot_label, + ) + + def clear_peaks(self): + """ + Calls :meth:`~nmrpy.data_objects.Fid.clear_peaks` on every :class:`~nmrpy.data_objects.Fid` + object in this :class:`~nmrpy.data_objects.FidArray`. + """ + for fid in self.get_fids(): + fid.peaks = None + + def clear_ranges(self): + """ + Calls :meth:`~nmrpy.data_objects.Fid.clear_ranges` on every :class:`~nmrpy.data_objects.Fid` + object in this :class:`~nmrpy.data_objects.FidArray`. + """ + for fid in self.get_fids(): + fid.ranges = None + + def _generate_trace_mask(self, traces): + ppm = [numpy.round(numpy.mean(i[0]), 2) for i in traces] + self._trace_mean_ppm = ppm + tt = [i[1] for i in traces] + ln = len(self.data) + filled_tt = [] + for i in tt: + rng = numpy.arange(ln) + if len(i) < ln: + rng[~(~(rng < min(i)) * ~(rng > max(i)))] = -1 + filled_tt.append(rng) + filled_tt = numpy.array(filled_tt) + return filled_tt + + def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): + traces = [dict(zip(i[1], i[0])) for i in traces] + fids = self.get_fids() + fids_i = range(len(self.data)) + for i in fids_i: + peaks = [] + for j in traces: + if i in j: + peak = j[i] + for rng in spans: + if peak >= min(rng) and peak <= max(rng): + peaks.append(peak) + fids[i].peaks = peaks + ranges = [] + for rng in spans: + if any((peaks > min(rng)) * (peaks < max(rng))): + ranges.append(rng) + if ranges == []: + ranges = None + fids[i].ranges = ranges + + def _get_all_summed_peakshapes(self): + """ + Returns peakshapes for all FIDs + """ + peaks = [] + for fid in self.get_fids(): + # x = numpy.arange(len(self.get_fids()[0].data)) + x = numpy.arange(len(self.get_fids()[0].data)) + peaks.append(Fid._f_pks(fid._deconvoluted_peaks, x)) + return peaks + + def _get_all_list_peakshapes(self): + """ + Returns peakshapes for all FIDs + """ + peaks = [] + for fid in self.get_fids(): + # x = numpy.arange(len(self.get_fids()[0].data)) + x = numpy.arange(len(self.get_fids()[0].data)) + peaks.append(Fid._f_pks_list(fid._deconvoluted_peaks, x)) + return peaks + + def _get_truncated_peak_shapes_for_plotting(self): + """ + Produces a set of truncated deconvoluted peaks for plotting. + """ + peakshapes = self._get_all_list_peakshapes() + ppms = [fid._ppm for fid in self.get_fids()] + peakshapes_short_x = [] + peakshapes_short_y = [] + for ps, ppm in zip(peakshapes, ppms): + pk_y = [] + pk_x = [] + for pk in ps: + pk_ind = pk > 0.1 * pk.max() + pk_x.append(ppm[pk_ind]) + pk_y.append(pk[pk_ind]) + peakshapes_short_x.append(pk_x) + peakshapes_short_y.append(pk_y) + return peakshapes_short_x, peakshapes_short_y + + def select_integral_traces(self, voff=0.02, lw=1): + """ + + Instantiate a trace-selection widget to identify deconvoluted peaks. + This can be useful when data are subject to drift. Selected traces on the data + array are translated into a set of nearest deconvoluted peaks, and saved in a + dictionary: :attr:`~nmrpy.data_objects.FidArray.integral_traces`. + + :keyword voff: vertical offset fraction (0.01) + + :keyword lw: linewidth of plot (1) + """ + if self.data is None: + raise AttributeError("No FIDs.") + if (self.deconvoluted_integrals == None).any(): + raise AttributeError("No integrals.") + peakshapes = self._get_all_summed_peakshapes() + # pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() + plot_label = """ +Left - add trace point +Right - finalize trace +Ctrl+Left - delete nearest trace +Ctrl+Alt+Right - assign +""" + self._select_trace_widget = DataTraceSelector( + self, + extra_data=peakshapes, + extra_data_colour="b", + voff=voff, + label=plot_label, + lw=lw, + ) + + def get_integrals_from_traces(self): + """ + Returns a dictionary of integral values for all + :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary + :attr:`~nmrpy.data_objects.FidArray.integral_traces`. + """ + if ( + self.deconvoluted_integrals is None + or None in self.deconvoluted_integrals + ): + raise AttributeError("No integrals.") + if not hasattr(self, "_integral_traces"): + raise AttributeError( + "No integral traces. First run select_integral_traces()." + ) + integrals_set = {} + decon_set = self.deconvoluted_integrals + for i, tr in self.integral_traces.items(): + tr_keys = numpy.array([fid for fid in tr.keys()]) + tr_vals = numpy.array([val for val in tr.values()]) + tr_sort = numpy.argsort(tr_keys) + tr_keys = tr_keys[tr_sort] + tr_vals = tr_vals[tr_sort] + integrals = decon_set[tr_keys, tr_vals] + integrals_set[i] = integrals + return integrals_set + + def assign_integrals(self, integrals_set: list) -> dict: + print("~~~ Method under contruction ~~~") + widget_list = [] + for i, j in enumerate(integrals_set): + widget_list.append((i, list(j))) + return SelectMultiple(options=widget_list, description="Integrals:") + + def save_to_file(self, filename=None, overwrite=False): + """ + Save :class:`~nmrpy.data_objects.FidArray` object to file, including all objects owned. + + :keyword filename: filename to save :class:`~nmrpy.data_objects.FidArray` to + + :keyword overwrite: if True, overwrite existing file + + """ + if filename is None: + basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] + filename = basename + ".nmrpy" + if not isinstance(filename, str): + raise TypeError("filename must be a string.") + if filename[-6:] != ".nmrpy": + filename += ".nmrpy" + if os.path.isfile(filename) and not overwrite: + print("File " + filename + " exists, set overwrite=True to force.") + return 1 + # delete all matplotlib plots to reduce file size + self._del_plots() + for fid in self.get_fids(): + fid._del_plots() + # delete all widgets (can't be pickled) + self._del_widgets() + for fid in self.get_fids(): + fid._del_widgets() + with open(filename, "wb") as f: + pickle.dump(self, f) + + def save_data(self, file_format: str, filename=None, overwrite=False): + print("~~~ Method under contruction ~~~") + if self.force_pyenzyme: + import pyenzyme as pe + + enzymeml = pe.EnzymeMLDocument( + name=self.data_mode.experiment.name + if hasattr(self.data_model.experiment, "name") + else "NMR experiment" + ) + ... + return 1 + if file_format.lower() == ("enzymeml" or "nmrml"): + # model = self.data_model.convert_to( + # template=Path(__file__).parent.parent / "links/enzymeml.toml" + # ) + enzymeml = DataModel.from_git( + url="https://github.com/EnzymeML/enzymeml-specifications.git", + tag="markdown-parser-refactor", + ) + doc = enzymeml.EnzymeMLDocument( + name=( + self.data_model.experiment.name + if hasattr(self.data_model.experiment, "name") + else "NMR experiment" + ), + created=self.data_model.datetime_created, + modified=self.data_model.datetime_modified, + ) + model = doc.xml() + elif file_format.lower() == "xml": + model = self.data_model.xml() + elif file_format.lower() == "json": + model = self.data_model.json() + elif file_format.lower() == "yaml": + model = self.data_model.yaml() + elif file_format.lower() == "hdf5": + model = self.data_model.hdf5() + else: + raise AttributeError( + f"Parameter `file_format` expected to be one of `enzymeml`; `nmrml`; `xml`; `json`; `yaml`; `hdf5`, got {file_format} instead." + ) + if not filename: + basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] + filename = basename + "." + file_format.lower() + if os.path.isfile(filename) and not overwrite: + print("File " + filename + " exists, set overwrite=True to force.") + return 1 + with open(filename, "w") as f: + f.write(model) + + +class Importer(Base): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.data = None + + @property + def data(self): + return self.__data + + @data.setter + def data(self, data): + if data is None: + self.__data = data + elif data.dtype in self._complex_dtypes: + if Importer._is_iter_of_iters(data): + self.__data = data + elif Importer._is_iter(data): + self.__data = numpy.array([data]) + else: + raise TypeError("data must be iterable.") + else: + raise TypeError("data must be complex.") + + def import_fid(self, arrayset=None): + """ + This will first attempt to import Bruker data. Failing that, Varian. + """ + try: + print("Attempting Bruker") + brukerimporter = BrukerImporter(fid_path=self.fid_path) + brukerimporter.import_fid(arrayset=arrayset) + self.data = brukerimporter.data + self._procpar = brukerimporter._procpar + self._file_format = brukerimporter._file_format + return + except (FileNotFoundError, OSError): + print("fid_path does not specify a valid .fid directory.") + return + except (TypeError, IndexError): + print("probably not Bruker data") + try: + print("Attempting Varian") + varianimporter = VarianImporter(fid_path=self.fid_path) + varianimporter.import_fid() + self._procpar = varianimporter._procpar + self.data = varianimporter.data + self._file_format = varianimporter._file_format + return + except TypeError: + print("probably not Varian data") + + +class VarianImporter(Importer): + def import_fid(self): + try: + procpar, data = nmrglue.varian.read(self.fid_path) + self.data = data + self._procpar = procpar + self._file_format = "varian" + except FileNotFoundError: + print("fid_path does not specify a valid .fid directory.") + except OSError: + print("fid_path does not specify a valid .fid directory.") + + +class BrukerImporter(Importer): + def import_fid(self, arrayset=None): + try: + dirs = [ + int(i) + for i in os.listdir(self.fid_path) + if os.path.isdir(self.fid_path + os.path.sep + i) + ] + dirs.sort() + dirs = [str(i) for i in dirs] + alldata = [] + for d in dirs: + procpar, data = nmrglue.bruker.read( + self.fid_path + os.path.sep + d + ) + alldata.append((procpar, data)) + self.alldata = alldata + incr = 1 + while True: + if len(alldata) == 1: + break + if alldata[incr][1].shape == alldata[0][1].shape: + break + incr += 1 + if incr > 1: + if arrayset == None: + print( + "Total of " + + str(incr) + + " alternating FidArrays found." + ) + arrayset = input("Which one to import? ") + arrayset = int(arrayset) + else: + arrayset = arrayset + if arrayset < 1 or arrayset > incr: + raise ValueError( + "Select a value between 1 and " + str(incr) + "." + ) + else: + arrayset = 1 + self.incr = incr + procpar = alldata[arrayset - 1][0] + data = numpy.vstack( + [d[1] for d in alldata[(arrayset - 1) :: incr]] + ) + self.data = data + self._procpar = procpar + self._file_format = "bruker" + self.data = nmrglue.bruker.remove_digital_filter( + procpar, self.data + ) + ( + self._procpar["tdelta"], + self._procpar["tcum"], + self._procpar["tsingle"], + ) = self._get_time_delta() + self._procpar["arraylength"] = self.data.shape[0] + self._procpar["arrayset"] = arrayset + except FileNotFoundError: + print("fid_path does not specify a valid .fid directory.") + except OSError: + print("fid_path does not specify a valid .fid directory.") + + def _get_time_delta(self): + td = 0.0 + tcum = [] + tsingle = [] + for i in range(self.incr): + pp = self.alldata[i][0]["acqus"] + sw_hz = pp["SW_h"] + at = pp["TD"] / (2 * sw_hz) + d1 = pp["D"][1] + nt = pp["NS"] + tot = (at + d1) * nt / 60.0 # convert to mins + td += tot + tcum.append(td) + tsingle.append(tot) + return (td, numpy.array(tcum), numpy.array(tsingle)) + + +if __name__ == "__main__": + pass diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 670d810..bd4325d 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -12,6 +12,7 @@ import pickle from ipywidgets import SelectMultiple from sdRDM import DataModel +from sdRDM.base.importedmodules import ImportedModules from nmrpy.datamodel.core import * @@ -250,12 +251,11 @@ def __init__(self, *args, **kwargs): self._flags = { "ft": False, } - self.fid_object = FID( + self.fid_object = FIDObject( raw_data=[(str(datum)) for datum in self.data], processed_data=[], nmr_parameters=Parameters(), processing_steps=ProcessingSteps(), - peak_identities=[], ) def __str__(self): @@ -1416,9 +1416,11 @@ def assign_identities(self): containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ - + raise NotImplementedError widget_title = "Assign identities for {}".format(self.id) - self._assigner_widget = IdentityAssigner(fid=self, title=widget_title) + self._assigner_widget = IdentityAssigner( + fid=self, title=widget_title, available_species=[] + ) def clear_identities(self): """ @@ -1483,6 +1485,41 @@ def data_model(self): del self.__data_model print("The current data model has been deleted.") + @property + def enzymeml_document(self): + return self.__enzymeml_document + + @enzymeml_document.setter + def enzymeml_document(self, enzymeml_document: DataModel): + if not isinstance(enzymeml_document, DataModel): + raise AttributeError( + f"Parameter `enzymeml_document` has to be of type `sdrdm.DataModel`, got {type(enzymeml_document)} instead." + ) + self.__enzymeml_document = enzymeml_document + self.__enzymeml_document.modified = datetime.now() + + @enzymeml_document.deleter + def enzymeml_document(self): + del self.__enzymeml_document + print("The current EnzymeML document has been deleted.") + + @property + def enzymeml_library(self): + return self.__enzymeml_library + + @enzymeml_library.setter + def enzymeml_library(self, enzymeml_library: ImportedModules): + if not isinstance(enzymeml_library, ImportedModules): + raise AttributeError( + f"Parameter `enzymeml_library` has to be of type `sdrdm.base.importedmodules.ImportedModules`, got {type(enzymeml_library)} instead." + ) + self.__enzymeml_library = enzymeml_library + + @enzymeml_library.deleter + def enzymeml_library(self): + del self.__enzymeml_library + print("The current EnzymeML library has been deleted.") + def __str__(self): return "FidArray of {} FID(s)".format(len(self.data)) @@ -1694,6 +1731,17 @@ def _setup_params(fid_array): del fid_array._params['nt'] del fid_array._params['acqtime'] + def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: + """Parse an EnzymeML document and its library from specified + file path. + + Args: + path_to_enzymeml_document (str): Path to file containing an EnzymeML document + """ + self.enzymeml_document, self.enzymeml_library = DataModel.parse( + path_to_enzymeml_document + ) + @classmethod def from_data(cls, data): """ diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index e3b9f18..7027d43 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -1,6 +1,6 @@ from .nmrpy import NMRpy from .experiment import Experiment -from .fid import FID +from .fidobject import FIDObject from .parameters import Parameters from .processingsteps import ProcessingSteps from .identity import Identity @@ -16,11 +16,10 @@ from .identifiertypes import IdentifierTypes __doc__ = "" - __all__ = [ "NMRpy", "Experiment", - "FID", + "FIDObject", "Parameters", "ProcessingSteps", "Identity", diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index bfeb06a..be319dd 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -1,19 +1,15 @@ import sdRDM -from typing import List, Optional -from pydantic import Field +from typing import Any, List, Optional +from pydantic import AnyUrl, Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - -from pydantic import AnyUrl -from typing import Any - -from .term import Term +from .publicationtypes import PublicationTypes +from .person import Person from .identifiertypes import IdentifierTypes +from .term import Term from .publication import Publication from .subjects import Subjects -from .publicationtypes import PublicationTypes -from .person import Person @forge_signature @@ -111,7 +107,6 @@ def add_to_authors( identifier_type (): Recognized identifier for the person.. Defaults to None identifier_value (): Value of the identifier for the person.. Defaults to None """ - params = { "last_name": last_name, "first_name": first_name, @@ -121,12 +116,9 @@ def add_to_authors( "identifier_type": identifier_type, "identifier_value": identifier_value, } - if id is not None: params["id"] = id - self.authors.append(Person(**params)) - return self.authors[-1] def add_to_keywords( @@ -147,19 +139,15 @@ def add_to_keywords( term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None value (): Value of the term, if applicable.. Defaults to None """ - params = { "name": name, "accession": accession, "term_cv_reference": term_cv_reference, "value": value, } - if id is not None: params["id"] = id - self.keywords.append(Term(**params)) - return self.keywords[-1] def add_to_topics( @@ -180,19 +168,15 @@ def add_to_topics( term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None value (): Value of the term, if applicable.. Defaults to None """ - params = { "name": name, "accession": accession, "term_cv_reference": term_cv_reference, "value": value, } - if id is not None: params["id"] = id - self.topics.append(Term(**params)) - return self.topics[-1] def add_to_related_publications( @@ -215,7 +199,6 @@ def add_to_related_publications( year (): Year of publication.. Defaults to None doi (): The DOI pointing to the publication.. Defaults to None """ - params = { "type": type, "title": title, @@ -223,10 +206,7 @@ def add_to_related_publications( "year": year, "doi": doi, } - if id is not None: params["id"] = id - self.related_publications.append(Publication(**params)) - return self.related_publications[-1] diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py index e7e070a..84b73d0 100644 --- a/nmrpy/datamodel/core/cv.py +++ b/nmrpy/datamodel/core/cv.py @@ -1,11 +1,9 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import AnyUrl, Field from sdRDM.base.utils import forge_signature, IDGenerator -from pydantic import AnyUrl - @forge_signature class CV(sdRDM.DataModel): diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index e52a0a3..3863bf7 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -4,13 +4,11 @@ from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - - -from .fidarray import FIDArray -from .fid import FID from .parameters import Parameters +from .fidarray import FIDArray from .processingsteps import ProcessingSteps from .identity import Identity +from .fidobject import FIDObject @forge_signature @@ -36,7 +34,7 @@ class Experiment(sdRDM.DataModel): multiple=True, ) - fid: List[FID] = Field( + fid: List[FIDObject] = Field( description="A single NMR spectrum.", default_factory=ListPlus, multiple=True, @@ -57,17 +55,16 @@ def add_to_fid( id: Optional[str] = None, ) -> None: """ - This method adds an object of type 'FID' to attribute fid + This method adds an object of type 'FIDObject' to attribute fid Args: - id (str): Unique identifier of the 'FID' object. Defaults to 'None'. + id (str): Unique identifier of the 'FIDObject' object. Defaults to 'None'. raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus() processed_data (): Processed data array.. Defaults to ListPlus() nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() """ - params = { "raw_data": raw_data, "processed_data": processed_data, @@ -75,10 +72,7 @@ def add_to_fid( "processing_steps": processing_steps, "peak_identities": peak_identities, } - if id is not None: params["id"] = id - - self.fid.append(FID(**params)) - + self.fid.append(FIDObject(**params)) return self.fid[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py index 5fb1400..1400e9c 100644 --- a/nmrpy/datamodel/core/fidarray.py +++ b/nmrpy/datamodel/core/fidarray.py @@ -8,7 +8,7 @@ @forge_signature class FIDArray(sdRDM.DataModel): - """Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" + """Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" id: Optional[str] = Field( description="Unique identifier of the given object.", @@ -17,7 +17,7 @@ class FIDArray(sdRDM.DataModel): ) fids: List[str] = Field( - description="List of `FID.id` belonging to this array.", + description="List of `FIDObject.id` belonging to this array.", multiple=True, default_factory=ListPlus, ) diff --git a/nmrpy/datamodel/core/fid.py b/nmrpy/datamodel/core/fidobject.py similarity index 93% rename from nmrpy/datamodel/core/fid.py rename to nmrpy/datamodel/core/fidobject.py index 42f07bf..f2253f9 100644 --- a/nmrpy/datamodel/core/fid.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -4,21 +4,18 @@ from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - -from pydantic.types import FrozenSet - from .parameters import Parameters from .processingsteps import ProcessingSteps -from .identity import Identity +from .identity import Identity, AssociatedRanges @forge_signature -class FID(sdRDM.DataModel): +class FIDObject(sdRDM.DataModel): """Container for a single NMR spectrum.""" id: Optional[str] = Field( description="Unique identifier of the given object.", - default_factory=IDGenerator("fidINDEX"), + default_factory=IDGenerator("fidobjectINDEX"), xml="@id", ) @@ -64,7 +61,7 @@ def add_to_peak_identities( name: Optional[str] = None, species_id: Optional[str] = None, associated_peaks: List[float] = ListPlus(), - associated_ranges: List[FrozenSet] = ListPlus(), + associated_ranges: List[AssociatedRanges] = ListPlus(), associated_integrals: List[float] = ListPlus(), id: Optional[str] = None, ) -> None: @@ -79,7 +76,6 @@ def add_to_peak_identities( associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() """ - params = { "name": name, "species_id": species_id, @@ -87,10 +83,7 @@ def add_to_peak_identities( "associated_ranges": associated_ranges, "associated_integrals": associated_integrals, } - if id is not None: params["id"] = id - self.peak_identities.append(Identity(**params)) - return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py index 91df6fc..2034bcf 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/identity.py @@ -5,7 +5,18 @@ from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from pydantic.types import FrozenSet + +@forge_signature +class AssociatedRanges(sdRDM.DataModel): + """Small type for attribute 'associated_ranges'""" + + id: Optional[str] = Field( + description="Unique identifier of the given object.", + default_factory=IDGenerator("associatedrangesINDEX"), + xml="@id", + ) + start: Optional[float] = Field() + end: Optional[float] = Field() @forge_signature @@ -34,9 +45,9 @@ class Identity(sdRDM.DataModel): multiple=True, ) - associated_ranges: List[FrozenSet] = Field( - description="Sets of ranges belonging to the given peaks", + associated_ranges: List[AssociatedRanges] = Field( default_factory=ListPlus, + description="Sets of ranges belonging to the given peaks", multiple=True, ) @@ -45,3 +56,23 @@ class Identity(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) + + def add_to_associated_ranges( + self, + start: Optional[float] = None, + end: Optional[float] = None, + id: Optional[str] = None, + ) -> None: + """ + This method adds an object of type 'AssociatedRanges' to attribute associated_ranges + + Args: + id (str): Unique identifier of the 'AssociatedRanges' object. Defaults to 'None'. + start (): . Defaults to None + end (): . Defaults to None + """ + params = {"start": start, "end": end} + if id is not None: + params["id"] = id + self.associated_ranges.append(AssociatedRanges(**params)) + return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index fe123a6..89c4731 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -3,9 +3,7 @@ from typing import Optional from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator - from datetime import datetime as Datetime - from .experiment import Experiment from .citation import Citation diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py index c854aeb..f9618a8 100644 --- a/nmrpy/datamodel/core/person.py +++ b/nmrpy/datamodel/core/person.py @@ -4,8 +4,6 @@ from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - - from .identifiertypes import IdentifierTypes diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index 75be413..f2f967e 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -1,15 +1,12 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import AnyUrl, Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - -from pydantic import AnyUrl - -from .identifiertypes import IdentifierTypes from .publicationtypes import PublicationTypes from .person import Person +from .identifiertypes import IdentifierTypes @forge_signature @@ -72,7 +69,6 @@ def add_to_authors( identifier_type (): Recognized identifier for the person.. Defaults to None identifier_value (): Value of the identifier for the person.. Defaults to None """ - params = { "last_name": last_name, "first_name": first_name, @@ -82,10 +78,7 @@ def add_to_authors( "identifier_type": identifier_type, "identifier_value": identifier_value, } - if id is not None: params["id"] = id - self.authors.append(Person(**params)) - return self.authors[-1] diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py index 82c67e8..022161f 100644 --- a/nmrpy/datamodel/core/term.py +++ b/nmrpy/datamodel/core/term.py @@ -1,11 +1,9 @@ import sdRDM -from typing import Optional +from typing import Any, Optional from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator -from typing import Any - @forge_signature class Term(sdRDM.DataModel): diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index b916c7a..eab511f 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -2,11 +2,11 @@ classDiagram NMRpy *-- Experiment NMRpy *-- Citation - Experiment *-- FID + Experiment *-- FIDObject Experiment *-- FIDArray - FID *-- Parameters - FID *-- ProcessingSteps - FID *-- Identity + FIDObject *-- Parameters + FIDObject *-- ProcessingSteps + FIDObject *-- Identity Citation *-- Subjects Citation *-- Person Citation *-- Publication @@ -25,11 +25,11 @@ classDiagram class Experiment { +string name* +string[0..*] enzymeml_species - +FID[0..*] fid + +FIDObject[0..*] fid +FIDArray fid_array } - class FID { + class FIDObject { +string[0..*] raw_data +string, float[0..*] processed_data +Parameters nmr_parameters @@ -70,7 +70,7 @@ classDiagram +string name +string species_id +float[0..*] associated_peaks - +frozenset[0..*] associated_ranges + +AssociatedRanges[0..*] associated_ranges +float[0..*] associated_integrals } diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 396c7fd..eeb6837 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -23,14 +23,7 @@ from IPython.display import display import asyncio - -SPECIES_DECOY = [ - "TEP", - "PEP", - "3PG", - "2PG", - "Phosphate", -] +from .utils import get_species_from_enzymeml class Plot: @@ -1400,9 +1393,10 @@ class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): class IdentityAssigner: - def __init__(self, fid, title): + def __init__(self, fid, title, available_species): self.fid = fid self.title = title + self.available_species = available_species self.selected_values = {} if fid.peaks is [] or fid.peaks is None: raise RuntimeError( @@ -1443,7 +1437,7 @@ def __init__(self, fid, title): # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = SPECIES_DECOY + species_dropdown.options = self.available_species species_dropdown.disabled = False save_button.disabled = False @@ -1510,6 +1504,13 @@ class IdentityRangeAssigner: def __init__(self, fid_array): self.fid_array = fid_array self.fids = fid_array.get_fids() + self.available_peaks = [] + self.available_species = [ + species.name + for species in get_species_from_enzymeml( + self.fid_array.enzymeml_document + ) + ] self.selected_fid = None self.selected_values = {} for fid in self.fids: @@ -1565,9 +1566,10 @@ def on_combobox_change(event): if selected_option in combobox.options: peak_dropdown.disabled = False self.selected_fid = self.fid_array.get_fid(selected_option) - peak_dropdown.options = [ + self.available_peaks = [ str(peak) for peak in self.selected_fid.peaks ] + peak_dropdown.options = self.available_peaks # Attach the method to the combobox's change event: combobox.observe(on_combobox_change) @@ -1575,34 +1577,84 @@ def on_combobox_change(event): # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = SPECIES_DECOY + species_dropdown.options = self.available_species species_dropdown.disabled = False - save_button.disabled = False # Attach the method to the dropdown's change event peak_dropdown.observe(on_peak_dropdown_change) - # Define a method to handle the species dropdown's change event + # Define a method to handle the peak dropdown's change event def on_species_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - new_key = peak_dropdown.value - self.selected_values[new_key] = selected_option + save_button.disabled = False - # Attach the function to the second dropdown's change event + # Attach the method to the dropdown's change event species_dropdown.observe(on_species_dropdown_change) # Define a function to handle the save button click event def on_save_button_click(b): with selection_output: selection_output.clear_output(wait=True) + # Fetch the values from the species dropdown and peak + # dropdown and add them to a dictionary with species as + # keys + if species_dropdown.value not in self.selected_values: + self.selected_values[species_dropdown.value] = [] + self.selected_values[species_dropdown.value].append( + float(peak_dropdown.value) + ) + # Remove the assigned peaks from the options of the peak + # dropdown, as they cannot belong to two species at once + # and disable the peak dropdown if all are assigned + self.available_peaks.remove(peak_dropdown.value) + peak_dropdown.options = self.available_peaks + if not self.available_peaks: + peak_dropdown.disabled = True + # Iterate over the dictionary of assigned peaks and + # print the saved selections. print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") - for fid in self.fids: - fid.identities = [ - value for value in self.selected_values.values() - ] + # Iterate over every FID, check if a given species + # already exists as an identity in the FID data + # model. If it does not, create a new Identity for + # the species and add name and associated peaks to + # it. If it already exists, check if the current + # value is already in the associated peaks and if + # not, append it. + for fid in self.fids: + identity_exists = False + for identity in fid.fid_object.peak_identities: + if identity.name == key: + for peak in value: + if peak not in identity.associated_peaks: + identity.associated_peaks.append(peak) + peak_index = list(fid.peaks).index( + peak + ) + associated_range = list( + list(fid.ranges)[peak_index] + ) + identity.add_to_associated_ranges( + start=associated_range[0], + end=associated_range[1], + ) + identity_exists = True + if not identity_exists: + peak_index = list(fid.peaks).index(value) + associated_range = list( + list(fid.ranges)[peak_index] + ) + fid.fid_object.add_to_peak_identities( + name=key, + associated_peaks=value, + associated_ranges=[ + { + "start": associated_range[0], + "end": associated_range[1], + } + ], + ) reset_button.disabled = False # Attach the function to the save button's click event @@ -1612,10 +1664,21 @@ def on_save_button_click(b): def on_reset_button_click(b): with selection_output: selection_output.clear_output(wait=True) + # Iterate over every FID and clear the Identities in the + # respective data model and reset the selection dict print("\nCleared selections!") for fid in self.fids: - fid.identities = [] + fid.fid_object.peak_identities = [] self.selected_values = {} + # Refill the list of available peaks as before, + # re-enable the peak dropdown, and disable the reset + # button again + self.available_peaks = [ + str(peak) for peak in self.selected_fid.peaks + ] + peak_dropdown.options = self.available_peaks + peak_dropdown.disabled = False + reset_button.disabled = True # Attach the function to the reset click event reset_button.on_click(on_reset_button_click) diff --git a/nmrpy/utils.py b/nmrpy/utils.py new file mode 100644 index 0000000..5732fb1 --- /dev/null +++ b/nmrpy/utils.py @@ -0,0 +1,28 @@ +from sdRDM import DataModel + + +def get_species_from_enzymeml(enzymeml_document: DataModel) -> list: + """Iterate over various species elements in EnzymeML document, + extract them, and return them as a list. + + Args: + enzymeml_document (DataModel): An EnzymeML data model. + + Raises: + AttributeError: If enzymeml_document is not of type `sdRDM.DataModel`. + + Returns: + list: Available species in EnzymeML document. + """ + if not isinstance(enzymeml_document, DataModel): + raise AttributeError( + f"Parameter `enzymeml_document` has to be of type `sdrdm.DataModel`, got {type(enzymeml_document)} instead." + ) + available_species = [] + for protein in enzymeml_document.proteins: + available_species.append(protein) + for complex in enzymeml_document.complexes: + available_species.append(complex) + for reactant in enzymeml_document.reactants: + available_species.append(reactant) + return available_species diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 79011fa..2208435 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -37,7 +37,7 @@ Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specificat - Description: A species object from an EnzymeML document. - Multiple: True - fid - - Type: [FID](#fid) + - Type: [FIDObject](#fidobject) - Description: A single NMR spectrum. - Multiple: True - fid_array @@ -45,7 +45,7 @@ Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specificat - Description: Multiple NMR spectra to be processed together. -### FID +### FIDObject Container for a single NMR spectrum. @@ -174,7 +174,7 @@ Container mapping one or more peaks to the respective species. - Description: Peaks belonging to the given species - Multiple: True - associated_ranges - - Type: frozenset + - Type: {start: float, end: float} - Description: Sets of ranges belonging to the given peaks - Multiple: True - associated_integrals @@ -185,11 +185,11 @@ Container mapping one or more peaks to the respective species. ### FIDArray -Container for processing of multiple spectra. Must reference the respective `FID` objects by `id`. {Add reference back. Setup time for experiment, Default 0.5} +Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5} - __fids__ - Type: string - - Description: List of `FID.id` belonging to this array. + - Description: List of `FIDObject.id` belonging to this array. - Multiple: True From 9f7fd0cc46a2ca3fb8f7e8b3f9a0d0751dcf98f5 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Nov 2023 14:13:09 +0100 Subject: [PATCH 13/54] Update IdentityAssigner --- .github/workflows/generate_api.yaml | 34 +-- nmrpy/data_objects.py | 144 +++++-------- nmrpy/datamodel/core/citation.py | 6 +- nmrpy/datamodel/core/experiment.py | 12 +- nmrpy/datamodel/core/fidobject.py | 2 +- nmrpy/datamodel/core/publication.py | 2 +- nmrpy/datamodel/schemes/datamodel_schema.md | 1 - nmrpy/plotting.py | 216 ++++++++++---------- specifications/nmrpy.md | 4 - 9 files changed, 172 insertions(+), 249 deletions(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index 59da890..a3d1a46 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -5,34 +5,10 @@ on: push jobs: generate-api: runs-on: ubuntu-latest - env: - LIB_NAME: ${{secrets.LIB_NAME}} steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python - uses: actions/setup-python@v2 + - name: Generate sdRDM library + uses: JR-1991/generate-sdrdm-api@main with: - python-version: 3.9 - - - name: Install sdRDM - run: python3 -m pip install git+https://github.com/JR-1991/software-driven-rdm.git@linking-refactor - - - name: Generate API - env: - URL: ${{github.repositoryUrl}} - COMMIT: ${{github.sha}} - run: sdrdm generate --path ./specifications/ --out ./nmrpy/ --name "LIB_NAME" --url "$URL" --commit "$COMMIT" - - - name: Push source code - run: | - if [[ `git status --porcelain` ]]; then - git add "$LIB_NAME" - git config --global user.name 'sdRDM Bot' - git config --global user.email 'sdRDM@bot.com' - git commit -am "API update" - git push - else - echo "Nothing changed!" - fi + library_name: "datamodel" + output_path: "./nmrpy/" + schema: "true" diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index bd4325d..0c5851f 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -83,9 +83,7 @@ def _file_format(self, file_format): if file_format in self._file_formats: self.__file_format = file_format else: - raise AttributeError( - '_file_format must be "varian", "bruker", or None.' - ) + raise AttributeError('_file_format must be "varian", "bruker", or None.') @classmethod def _is_iter(cls, i): @@ -250,6 +248,7 @@ def __init__(self, *args, **kwargs): self._deconvoluted_peaks = None self._flags = { "ft": False, + "assigned": False, } self.fid_object = FIDObject( raw_data=[(str(datum)) for datum in self.data], @@ -257,6 +256,7 @@ def __init__(self, *args, **kwargs): nmr_parameters=Parameters(), processing_steps=ProcessingSteps(), ) + self.enzymeml_species = None def __str__(self): return "FID: %s (%i data)" % (self.id, len(self.data)) @@ -278,6 +278,14 @@ def processing_steps(self): def processing_steps(self, processing_steps): raise PermissionError("Forbidden!") + @property + def enzymeml_species(self): + return self.__enzymeml_species + + @enzymeml_species.setter + def enzymeml_species(self, enzymeml_species): + self.__enzymeml_species = enzymeml_species + @property def data(self): """ @@ -335,9 +343,7 @@ def ranges(self, ranges): self._ranges = None return if not Fid._is_iter_of_iters(ranges) or ranges is None: - raise AttributeError( - "ranges must be an iterable of iterables or None" - ) + raise AttributeError("ranges must be an iterable of iterables or None") ranges = numpy.array(ranges) if ranges.shape[1] != 2: raise AttributeError( @@ -377,9 +383,7 @@ def _bl_ppm(self): def _bl_ppm(self, bl_ppm): if bl_ppm is not None: if not Fid._is_flat_iter(bl_ppm): - raise AttributeError( - "baseline indices must be a flat iterable" - ) + raise AttributeError("baseline indices must be a flat iterable") if len(bl_ppm) > 0: if not all(isinstance(i, numbers.Number) for i in bl_ppm): raise AttributeError("baseline indices must be numbers") @@ -409,9 +413,7 @@ def _bl_poly(self): def _bl_poly(self, bl_poly): if bl_poly is not None: if not Fid._is_flat_iter(bl_poly): - raise AttributeError( - "baseline polynomial must be a flat iterable" - ) + raise AttributeError("baseline polynomial must be a flat iterable") if not all(isinstance(i, numbers.Number) for i in bl_poly): raise AttributeError("baseline polynomial must be numbers") self.__bl_poly = numpy.array(bl_poly) @@ -519,9 +521,7 @@ def deconvoluted_integrals(self): integrals = [] for peak in self._deconvoluted_peaks: int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) - int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int( - peak[3], peak[2] - ) + int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) integrals.append(int_gauss + int_lorentz) return integrals @@ -616,9 +616,7 @@ def emhz(self, lb=5.0): """ self.data = ( numpy.exp( - -numpy.pi - * numpy.arange(len(self.data)) - * (lb / self._params["sw_hz"]) + -numpy.pi * numpy.arange(len(self.data)) * (lb / self._params["sw_hz"]) ) * self.data ) @@ -670,9 +668,7 @@ def _ft(cls, list_params): data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) s = len(data) if file_format == "varian" or file_format == None: - ft_data = numpy.append( - data[int(s / 2.0) :], data[: int(s / 2.0)] - ) + ft_data = numpy.append(data[int(s / 2.0) :], data[: int(s / 2.0)]) if file_format == "bruker": ft_data = numpy.append( data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] @@ -1098,12 +1094,12 @@ def _f_conv(cls, parameterset_list, data): peaks_init = cls._f_pks(parameterset_list, x) data_convolution = numpy.convolve(data, peaks_init[::-1]) auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) - max_data_convolution = numpy.where( - data_convolution == data_convolution.max() - )[0][0] - max_auto_convolution = numpy.where( - auto_convolution == auto_convolution.max() - )[0][0] + max_data_convolution = numpy.where(data_convolution == data_convolution.max())[ + 0 + ][0] + max_auto_convolution = numpy.where(auto_convolution == auto_convolution.max())[ + 0 + ][0] return max_data_convolution - max_auto_convolution @classmethod @@ -1190,9 +1186,7 @@ def _f_res(cls, p, data): """ if not isinstance(p, lmfit.parameter.Parameters): - raise TypeError( - "Parameters must be of type lmfit.parameter.Parameters." - ) + raise TypeError("Parameters must be of type lmfit.parameter.Parameters.") if not cls._is_flat_iter(data): raise TypeError("data must be a flat iterable.") if not isinstance(data, numpy.ndarray): @@ -1259,9 +1253,7 @@ def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): params[par_name].max = 2.0 * data.max() try: - mz = lmfit.minimize( - cls._f_res, params, args=([data]), method=method - ) + mz = lmfit.minimize(cls._f_res, params, args=([data]), method=method) fits = Fid._parameters_to_list(mz.params) except: fits = None @@ -1289,9 +1281,7 @@ def _parameters_to_list(cls, p): def _deconv_datum(cls, list_parameters): if len(list_parameters) != 5: raise ValueError("list_parameters must consist of five objects.") - if ( - type(list_parameters[1]) == list and len(list_parameters[1]) == 0 - ) or ( + if (type(list_parameters[1]) == list and len(list_parameters[1]) == 0) or ( type(list_parameters[2]) == list and len(list_parameters[2]) == 0 ): return [] @@ -1313,9 +1303,7 @@ def _deconv_datum(cls, list_parameters): for j in zip(peaks, ranges): d_slice = datum[j[1][0] : j[1][1]] p_slice = j[0] - j[1][0] - f = cls._f_fitp( - d_slice, p_slice, frac_gauss=frac_gauss, method=method - ) + f = cls._f_fitp(d_slice, p_slice, frac_gauss=frac_gauss, method=method) f = numpy.array(f).transpose() f[0] += j[1][0] f = f.transpose() @@ -1416,11 +1404,9 @@ def assign_identities(self): containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ - raise NotImplementedError + # raise NotImplementedError widget_title = "Assign identities for {}".format(self.id) - self._assigner_widget = IdentityAssigner( - fid=self, title=widget_title, available_species=[] - ) + self._assigner_widget = IdentityAssigner(fid=self, title=widget_title) def clear_identities(self): """ @@ -1452,6 +1438,7 @@ def __init__(self): datetime_created=_now, datetime_modified=_now, ) + self.__data_model.experiment = Experiment(name="This is still a test") del _now self._force_pyenzyme = False @@ -1469,7 +1456,12 @@ def force_pyenzyme(self): @property def data_model(self): - return self.__data_model + _data_model = self.__data_model + if not _data_model.experiment: + _data_model.experiment = Experiment(name="This is still a test") + for fid in self.get_fids(): + _data_model.experiment.fid.append(fid.fid_object) + return _data_model @data_model.setter def data_model(self, data_model: DataModel): @@ -1497,6 +1489,11 @@ def enzymeml_document(self, enzymeml_document: DataModel): ) self.__enzymeml_document = enzymeml_document self.__enzymeml_document.modified = datetime.now() + for fid in self.get_fids(): + fid.enzymeml_species = [ + species.name + for species in get_species_from_enzymeml(self.__enzymeml_document) + ] @enzymeml_document.deleter def enzymeml_document(self): @@ -1860,9 +1857,7 @@ def norm_fids(self): dmax = self.data.max() for fid in self.get_fids(): fid.data = fid.data / dmax - fid.fid_object.processed_data = [ - float(datum) for datum in fid.data - ] + fid.fid_object.processed_data = [float(datum) for datum in fid.data] fid.fid_object.processing_steps.is_normalised = True fid.fid_object.processing_steps.max_value = float(dmax) @@ -1926,9 +1921,7 @@ def baseline_correct_fids(self, deg=2): fid.baseline_correct(deg=deg) except: print( - "failed for {}. Perhaps first run baseliner_fids()".format( - fid.id - ) + "failed for {}. Perhaps first run baseliner_fids()".format(fid.id) ) print("baseline-correction completed") @@ -1977,9 +1970,7 @@ def integral_traces(self): def integral_traces(self, integral_traces): self._integral_traces = integral_traces - def deconv_fids( - self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0 - ): + def deconv_fids(self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0): """ Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. @@ -1992,9 +1983,7 @@ def deconv_fids( if mp: fids = self.get_fids() if not all(fid._flags["ft"] for fid in fids): - raise ValueError( - "Only Fourier-transformed data can be deconvoluted." - ) + raise ValueError("Only Fourier-transformed data can be deconvoluted.") list_params = [ [ fid.data, @@ -2005,13 +1994,9 @@ def deconv_fids( ] for fid in fids ] - deconv_datum = self._generic_mp( - Fid._deconv_datum, list_params, cpus - ) + deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) for fid, datum in zip(fids, deconv_datum): - fid._deconvoluted_peaks = numpy.array( - [j for i in datum for j in i] - ) + fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) fid.fid_object.processing_steps.is_deconvoluted = True else: for fid in self.get_fids(): @@ -2126,9 +2111,7 @@ def plot_deconv_array(self, **kwargs): plt._plot_deconv_array(self.get_fids(), **kwargs) setattr(self, plt.id, plt) - def calibrate( - self, fid_number=None, assign_only_to_index=False, voff=0.02 - ): + def calibrate(self, fid_number=None, assign_only_to_index=False, voff=0.02): """ Instantiate a GUI widget to select a peak and calibrate spectra in a :class:`~nmrpy.data_objects.FidArray`. @@ -2155,9 +2138,7 @@ def calibrate( label=plot_label, ) - def peakpicker( - self, fid_number=None, assign_only_to_index=True, voff=0.02 - ): + def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): """ Instantiate peak-picker widget for @@ -2348,10 +2329,7 @@ def get_integrals_from_traces(self): :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary :attr:`~nmrpy.data_objects.FidArray.integral_traces`. """ - if ( - self.deconvoluted_integrals is None - or None in self.deconvoluted_integrals - ): + if self.deconvoluted_integrals is None or None in self.deconvoluted_integrals: raise AttributeError("No integrals.") if not hasattr(self, "_integral_traces"): raise AttributeError( @@ -2565,9 +2543,7 @@ def import_fid(self, arrayset=None): dirs = [str(i) for i in dirs] alldata = [] for d in dirs: - procpar, data = nmrglue.bruker.read( - self.fid_path + os.path.sep + d - ) + procpar, data = nmrglue.bruker.read(self.fid_path + os.path.sep + d) alldata.append((procpar, data)) self.alldata = alldata incr = 1 @@ -2579,32 +2555,22 @@ def import_fid(self, arrayset=None): incr += 1 if incr > 1: if arrayset == None: - print( - "Total of " - + str(incr) - + " alternating FidArrays found." - ) + print("Total of " + str(incr) + " alternating FidArrays found.") arrayset = input("Which one to import? ") arrayset = int(arrayset) else: arrayset = arrayset if arrayset < 1 or arrayset > incr: - raise ValueError( - "Select a value between 1 and " + str(incr) + "." - ) + raise ValueError("Select a value between 1 and " + str(incr) + ".") else: arrayset = 1 self.incr = incr procpar = alldata[arrayset - 1][0] - data = numpy.vstack( - [d[1] for d in alldata[(arrayset - 1) :: incr]] - ) + data = numpy.vstack([d[1] for d in alldata[(arrayset - 1) :: incr]]) self.data = data self._procpar = procpar self._file_format = "bruker" - self.data = nmrglue.bruker.remove_digital_filter( - procpar, self.data - ) + self.data = nmrglue.bruker.remove_digital_filter(procpar, self.data) ( self._procpar["tdelta"], self._procpar["tcum"], diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index be319dd..a07a549 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -4,12 +4,12 @@ from pydantic import AnyUrl, Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .publicationtypes import PublicationTypes from .person import Person from .identifiertypes import IdentifierTypes -from .term import Term -from .publication import Publication from .subjects import Subjects +from .publication import Publication +from .publicationtypes import PublicationTypes +from .term import Term @forge_signature diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index 3863bf7..ab503c2 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -4,11 +4,11 @@ from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .parameters import Parameters -from .fidarray import FIDArray +from .fidobject import FIDObject from .processingsteps import ProcessingSteps +from .parameters import Parameters from .identity import Identity -from .fidobject import FIDObject +from .fidarray import FIDArray @forge_signature @@ -28,12 +28,6 @@ class Experiment(sdRDM.DataModel): description="A descriptive name for the overarching experiment.", ) - enzymeml_species: List[str] = Field( - description="A species object from an EnzymeML document.", - default_factory=ListPlus, - multiple=True, - ) - fid: List[FIDObject] = Field( description="A single NMR spectrum.", default_factory=ListPlus, diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py index f2253f9..d0fda84 100644 --- a/nmrpy/datamodel/core/fidobject.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -4,8 +4,8 @@ from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .parameters import Parameters from .processingsteps import ProcessingSteps +from .parameters import Parameters from .identity import Identity, AssociatedRanges diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index f2f967e..396d3da 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -4,9 +4,9 @@ from pydantic import AnyUrl, Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .publicationtypes import PublicationTypes from .person import Person from .identifiertypes import IdentifierTypes +from .publicationtypes import PublicationTypes @forge_signature diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index eab511f..85785fa 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -24,7 +24,6 @@ classDiagram class Experiment { +string name* - +string[0..*] enzymeml_species +FIDObject[0..*] fid +FIDArray fid_array } diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index eeb6837..da58de4 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -119,9 +119,7 @@ def _deconv_generator( if not Plot._is_flat_iter(data): raise AttributeError("data must be flat iterable.") - peakshapes = fid._f_pks_list( - fid._deconvoluted_peaks, numpy.arange(len(data)) - ) + peakshapes = fid._f_pks_list(fid._deconvoluted_peaks, numpy.arange(len(data))) if not Plot._is_iter_of_iters(peakshapes): raise AttributeError("data must be flat iterable.") @@ -173,9 +171,7 @@ def _plot_deconv( residual, upper_ppm, lower_ppm, - ) = self._deconv_generator( - fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm - ) + ) = self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm) self.fig = plt.figure(figsize=[9, 5]) ax = self.fig.add_subplot(111) @@ -226,9 +222,7 @@ def _plot_deconv_array( generated_deconvs = [] for fid in fids: generated_deconvs.append( - self._deconv_generator( - fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm - ) + self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm) ) params = fids[0]._params @@ -496,12 +490,8 @@ def __init__(self, fid): xtcks[-1] = xtcks[-1] - 1 self.ax.set_xticks(xtcks) self.ax.set_xlabel("PPM (%.2f MHz)" % (self.fid._params["reffrq"])) - self.ax.set_xticklabels( - [numpy.round(self.fid._ppm[int(i)], 1) for i in xtcks] - ) - ylims = numpy.array([-1.6, 1.6]) * max( - abs(numpy.array(self.ax.get_ylim())) - ) + self.ax.set_xticklabels([numpy.round(self.fid._ppm[int(i)], 1) for i in xtcks]) + ylims = numpy.array([-1.6, 1.6]) * max(abs(numpy.array(self.ax.get_ylim()))) self.ax.set_ylim(ylims) self.ax.grid() self.visible = True @@ -682,9 +672,7 @@ def press(self, event): self.psm.line.set_data(self.psm.xs, self.psm.ys) if self.show_tracedata: self.psm._yline.set_data(self.psm._xs, self.psm._ys) - elif ( - event.button == self.psm.btn_del and event.key == self.psm.key_mod - ): + elif event.button == self.psm.btn_del and event.key == self.psm.key_mod: if len(self.psm._visual_lines) > 0: x = event.xdata y = event.ydata @@ -905,14 +893,10 @@ def press(self, event): self.lsm.peaks = sorted(self.lsm.peaks)[::-1] # self.ax.draw_artist(self.lsm.peaklines[x]) # Ctrl+left - elif ( - event.button == self.lsm.btn_del and event.key == self.lsm.key_mod - ): + elif event.button == self.lsm.btn_del and event.key == self.lsm.key_mod: # find and delete nearest peakline if len(self.lsm.peaks) > 0: - delete_peak = numpy.argmin( - [abs(i - x) for i in self.lsm.peaks] - ) + delete_peak = numpy.argmin([abs(i - x) for i in self.lsm.peaks]) old_peak = self.lsm.peaks.pop(delete_peak) try: peakline = self.lsm.peaklines.pop(old_peak) @@ -997,9 +981,7 @@ def press(self, event): if event.button == self.ssm.btn_add and event.key != self.ssm.key_mod: self.buttonDown = True self.pressv = event.xdata - elif ( - event.button == self.ssm.btn_add and event.key == self.ssm.key_mod - ): + elif event.button == self.ssm.btn_add and event.key == self.ssm.key_mod: # find and delete range if len(self.ssm.ranges) > 0: x = event.xdata @@ -1033,9 +1015,7 @@ def release(self, event): # if (vmax >= i[1]) and (vmax <= i[0]): # spantest = True if span > self.ssm.minspan and spantest is False: - self.ssm.ranges.append( - [numpy.round(vmin, 2), numpy.round(vmax, 2)] - ) + self.ssm.ranges.append([numpy.round(vmin, 2), numpy.round(vmax, 2)]) self.ssm.rangespans.append(self.makespan(vmin, span)) with self.out: print("range {} -> {}".format(vmax, vmin)) @@ -1180,15 +1160,9 @@ def __init__( super().__init__() # calling parent init # self.canvas.blit(self.ax.bbox) - self.cidmotion = self.canvas.mpl_connect( - "motion_notify_event", self.onmove - ) - self.cidpress = self.canvas.mpl_connect( - "button_press_event", self.press - ) - self.cidrelease = self.canvas.mpl_connect( - "button_release_event", self.release - ) + self.cidmotion = self.canvas.mpl_connect("motion_notify_event", self.onmove) + self.cidpress = self.canvas.mpl_connect("button_press_event", self.press) + self.cidrelease = self.canvas.mpl_connect("button_release_event", self.release) self.ciddraw = self.canvas.mpl_connect("draw_event", self.on_draw) # cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) # cursor.horizOn = False @@ -1248,9 +1222,7 @@ def _make_basic_fig(self, *args, **kwargs): self.data.shape[1] ) ] - self.y_indices = ( - numpy.arange(len(self.data)) * self.voff * self.data.max() - ) + self.y_indices = numpy.arange(len(self.data)) * self.voff * self.data.max() # this is reversed for zorder # extra_data if self.extra_data is not None: @@ -1265,9 +1237,7 @@ def _make_basic_fig(self, *args, **kwargs): ) # data for i, j in zip(range(len(self.data))[::-1], self.data[::-1]): - self.ax.plot( - self.ppm[::-1], j + self.y_indices[i], color=cl[i], lw=1 - ) + self.ax.plot(self.ppm[::-1], j + self.y_indices[i], color=cl[i], lw=1) self.ax.set_xlabel("ppm") self.ylims = numpy.array(self.ax.get_ylim()) # numpy.array([self.ax.get_ylim()[0], self.data.max() + abs(self.ax.get_ylim()[0])]) @@ -1393,10 +1363,11 @@ class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): class IdentityAssigner: - def __init__(self, fid, title, available_species): + def __init__(self, fid, title): self.fid = fid self.title = title - self.available_species = available_species + self.available_peaks = [str(peak) for peak in self.fid.peaks] + self.available_species = self.fid.enzymeml_species self.selected_values = {} if fid.peaks is [] or fid.peaks is None: raise RuntimeError( @@ -1408,7 +1379,7 @@ def __init__(self, fid, title, available_species): # Create the dropdown widget for the peaks peak_dropdown = Dropdown( - options=[str(peak) for peak in fid.peaks], + options=self.available_peaks, description="Select a peak:", layout={"width": "max-content"}, style={"description_width": "initial"}, @@ -1416,16 +1387,16 @@ def __init__(self, fid, title, available_species): # Create the dropdown widget for the species species_dropdown = Dropdown( - options=[], + options=self.available_species, description="Select a species:", layout={"width": "max-content"}, style={"description_width": "initial"}, - disabled=True, ) # Create the button to save selection to dict save_button = Button( - description="Save selection", icon="file-arrow-down", disabled=True + description="Save selection", + icon="file-arrow-down", ) # Create a reset button @@ -1434,36 +1405,66 @@ def __init__(self, fid, title, available_species): # Create an output widget to display the selection selection_output = Output() - # Define a method to handle the peak dropdown's change event - def on_peak_dropdown_change(event): - if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = self.available_species - species_dropdown.disabled = False - save_button.disabled = False - - # Attach the function to the dropdown's change event - peak_dropdown.observe(on_peak_dropdown_change) - - # Define a method to handle the species dropdown's change event - def on_species_dropdown_change(event): - if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - new_key = peak_dropdown.value - self.selected_values[new_key] = selected_option - - # Attach the function to the second dropdown's change event - species_dropdown.observe(on_species_dropdown_change) - # Define a function to handle the save button click event def on_save_button_click(b): with selection_output: selection_output.clear_output(wait=True) + # Fetch the values from the species dropdown and peak + # dropdown and add them to a dictionary with species as + # keys + if species_dropdown.value not in self.selected_values: + self.selected_values[species_dropdown.value] = [] + self.selected_values[species_dropdown.value].append( + float(peak_dropdown.value) + ) + # Remove the assigned peaks from the options of the peak + # dropdown, as they cannot belong to two species at once + # and disable the peak dropdown if all are assigned + self.available_peaks.remove(peak_dropdown.value) + peak_dropdown.options = self.available_peaks + if not self.available_peaks: + peak_dropdown.disabled = True + save_button.disabled = True + # Iterate over the dictionary of assigned peaks and + # print the saved selections. print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") - self.fid.identities = [ - value for value in self.selected_values.values() - ] + # Check if a given species already exists as an + # identity in the FID data model. If it does not, + # create a new Identity for the species and add name + # and associated peaks to it. If it already exists, + # check if the currentvalue is already in the + # associated peaks and if not, append it. + identity_exists = False + for identity in self.fid.fid_object.peak_identities: + if identity.name == key: + for peak in value: + if peak not in identity.associated_peaks: + identity.associated_peaks.append(peak) + peak_index = list(self.fid.peaks).index(peak) + associated_range = list( + list(self.fid.ranges)[peak_index] + ) + identity.add_to_associated_ranges( + start=float(associated_range[0]), + end=float(associated_range[1]), + ) + identity_exists = True + if not identity_exists: + peak_index = list(self.fid.peaks).index(value) + associated_range = list(list(self.fid.ranges)[peak_index]) + self.fid.fid_object.add_to_peak_identities( + name=key, + associated_peaks=value, + associated_ranges=[ + { + "start": float(associated_range[0]), + "end": float(associated_range[1]), + } + ], + ) + self.fid._flags["assigned"] = True reset_button.disabled = False # Attach the function to the save button's click event @@ -1473,9 +1474,20 @@ def on_save_button_click(b): def on_reset_button_click(b): with selection_output: selection_output.clear_output(wait=True) + # Clear the Identities in the data model and reset the + # selection dict print("\nCleared selections!") - self.fid.identities = [] + fid.fid_object.peak_identities = [] self.selected_values = {} + # Refill the list of available peaks as before, + # re-enable the peak dropdown, and disable the reset + # button again + self.available_peaks = [str(peak) for peak in self.fid.peaks] + peak_dropdown.options = self.available_peaks + peak_dropdown.disabled = False + save_button.disabled = False + self.fid._flags["assigned"] = False + reset_button.disabled = True # Attach the function to the reset click event reset_button.on_click(on_reset_button_click) @@ -1507,9 +1519,7 @@ def __init__(self, fid_array): self.available_peaks = [] self.available_species = [ species.name - for species in get_species_from_enzymeml( - self.fid_array.enzymeml_document - ) + for species in get_species_from_enzymeml(self.fid_array.enzymeml_document) ] self.selected_fid = None self.selected_values = {} @@ -1623,35 +1633,33 @@ def on_save_button_click(b): # value is already in the associated peaks and if # not, append it. for fid in self.fids: + if fid._flags["assigned"]: + continue identity_exists = False for identity in fid.fid_object.peak_identities: if identity.name == key: for peak in value: if peak not in identity.associated_peaks: identity.associated_peaks.append(peak) - peak_index = list(fid.peaks).index( - peak - ) + peak_index = list(fid.peaks).index(peak) associated_range = list( list(fid.ranges)[peak_index] ) identity.add_to_associated_ranges( - start=associated_range[0], - end=associated_range[1], + start=float(associated_range[0]), + end=float(associated_range[1]), ) identity_exists = True if not identity_exists: peak_index = list(fid.peaks).index(value) - associated_range = list( - list(fid.ranges)[peak_index] - ) + associated_range = list(list(fid.ranges)[peak_index]) fid.fid_object.add_to_peak_identities( name=key, associated_peaks=value, associated_ranges=[ { - "start": associated_range[0], - "end": associated_range[1], + "start": float(associated_range[0]), + "end": float(associated_range[1]), } ], ) @@ -1673,9 +1681,7 @@ def on_reset_button_click(b): # Refill the list of available peaks as before, # re-enable the peak dropdown, and disable the reset # button again - self.available_peaks = [ - str(peak) for peak in self.selected_fid.peaks - ] + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] peak_dropdown.options = self.available_peaks peak_dropdown.disabled = False reset_button.disabled = True @@ -1742,12 +1748,8 @@ def assign(self): data_traces = self.integral_selector.psm.data_lines index_traces = self.integral_selector.psm.index_lines - self.fid_array._data_traces = [ - dict(zip(i[1], i[0])) for i in data_traces - ] - self.fid_array._index_traces = [ - dict(zip(i[1], i[0])) for i in index_traces - ] + self.fid_array._data_traces = [dict(zip(i[1], i[0])) for i in data_traces] + self.fid_array._index_traces = [dict(zip(i[1], i[0])) for i in index_traces] decon_peaks = [] for i in self.fid_array._deconvoluted_peaks: @@ -1827,13 +1829,9 @@ def assign(self): traces = [[i[0], j[1]] for i, j in zip(data_traces, index_traces)] self.fid_array.traces = traces - self.fid_array._trace_mask = self.fid_array._generate_trace_mask( - traces - ) + self.fid_array._trace_mask = self.fid_array._generate_trace_mask(traces) - self.fid_array._set_all_peaks_ranges_from_traces_and_spans( - traces, spans - ) + self.fid_array._set_all_peaks_ranges_from_traces_and_spans(traces, spans) plt.close(self.peak_selector.fig) @@ -1983,9 +1981,7 @@ def __init__( if fid.data is [] or fid.data is None: raise ValueError("data must exist.") if not fid._flags["ft"]: - raise ValueError( - "Only Fourier-transformed data can be calibrated." - ) + raise ValueError("Only Fourier-transformed data can be calibrated.") data = fid.data params = fid._params @@ -1994,9 +1990,7 @@ def __init__( sw = params["sw"] ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] - self.peak_selector = PeakDataSelector( - data, params, title=title, label=label - ) + self.peak_selector = PeakDataSelector(data, params, title=title, label=label) self.peak_selector.process = self.process self.textinput = FloatText( @@ -2062,9 +2056,7 @@ def __init__( if fid_array.data is [] or fid_array.data is None: raise ValueError("data must exist.") if any(not fid._flags["ft"] for fid in self.fids): - raise ValueError( - "Only Fourier-transformed data can be calibrated." - ) + raise ValueError("Only Fourier-transformed data can be calibrated.") data = fid_array.data if y_indices is not None: data = fid_array.data[numpy.array(self.fid_number)] diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 2208435..0adcf22 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -32,10 +32,6 @@ Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specificat - __name__ - Type: string - Description: A descriptive name for the overarching experiment. -- enzymeml_species - - Type: string - - Description: A species object from an EnzymeML document. - - Multiple: True - fid - Type: [FIDObject](#fidobject) - Description: A single NMR spectrum. From aaea4f96d8f5234cd66ff9a97cd0b4c1d6807c64 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Nov 2023 14:16:28 +0100 Subject: [PATCH 14/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index a3d1a46..e4d5975 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -10,5 +10,5 @@ jobs: uses: JR-1991/generate-sdrdm-api@main with: library_name: "datamodel" - output_path: "./nmrpy/" + out_dir: "./nmrpy/" schema: "true" From c1bec905ca77af8da722ab74f1517ff2666e64a6 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Nov 2023 14:27:15 +0100 Subject: [PATCH 15/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index e4d5975..9307ef9 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Generate sdRDM library - uses: JR-1991/generate-sdrdm-api@main + uses: torogi94/generate-sdrdm-api@main with: library_name: "datamodel" out_dir: "./nmrpy/" From ce11a7320ade9621c9860448ced7bab97cd19e5f Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Nov 2023 14:36:17 +0100 Subject: [PATCH 16/54] Update generate_api.yaml --- .github/workflows/generate_api.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml index 9307ef9..c9e17c0 100644 --- a/.github/workflows/generate_api.yaml +++ b/.github/workflows/generate_api.yaml @@ -11,4 +11,3 @@ jobs: with: library_name: "datamodel" out_dir: "./nmrpy/" - schema: "true" From 030dcc53c89edc1b06ed7716cc7667ec0db23a92 Mon Sep 17 00:00:00 2001 From: sdRDM Bot Date: Mon, 20 Nov 2023 13:36:58 +0000 Subject: [PATCH 17/54] API update --- nmrpy/datamodel/__init__.py | 4 ++-- nmrpy/datamodel/core/citation.py | 12 ++++++++---- nmrpy/datamodel/core/cv.py | 6 +++++- nmrpy/datamodel/core/experiment.py | 8 ++++++-- nmrpy/datamodel/core/fidarray.py | 6 +++++- nmrpy/datamodel/core/fidobject.py | 8 ++++++-- nmrpy/datamodel/core/identity.py | 10 +++++++++- nmrpy/datamodel/core/nmrpy.py | 8 ++++++-- nmrpy/datamodel/core/parameters.py | 6 +++++- nmrpy/datamodel/core/person.py | 6 +++++- nmrpy/datamodel/core/processingsteps.py | 6 +++++- nmrpy/datamodel/core/publication.py | 8 ++++++-- nmrpy/datamodel/core/term.py | 6 +++++- 13 files changed, 73 insertions(+), 21 deletions(-) diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py index d866ab2..08352f9 100644 --- a/nmrpy/datamodel/__init__.py +++ b/nmrpy/datamodel/__init__.py @@ -1,3 +1,3 @@ -__URL__ = "" -__COMMIT__ = "" +__URL__ = "https://github.com/NMRPy/nmrpy" +__COMMIT__ = "dec2cda6676f8d04070715fe079ed786515ea918" diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py index a07a549..c7834f3 100644 --- a/nmrpy/datamodel/core/citation.py +++ b/nmrpy/datamodel/core/citation.py @@ -1,15 +1,15 @@ import sdRDM from typing import Any, List, Optional -from pydantic import AnyUrl, Field +from pydantic import AnyUrl, Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .person import Person from .identifiertypes import IdentifierTypes -from .subjects import Subjects -from .publication import Publication +from .person import Person from .publicationtypes import PublicationTypes +from .subjects import Subjects from .term import Term +from .publication import Publication @forge_signature @@ -82,6 +82,10 @@ class Citation(sdRDM.DataModel): default="CC BY 4.0", description="License information for this dataset. Defaults to `CC BY 4.0`.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) def add_to_authors( self, diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py index 84b73d0..ccc6ca5 100644 --- a/nmrpy/datamodel/core/cv.py +++ b/nmrpy/datamodel/core/cv.py @@ -1,7 +1,7 @@ import sdRDM from typing import Optional -from pydantic import AnyUrl, Field +from pydantic import AnyUrl, Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator @@ -29,3 +29,7 @@ class CV(sdRDM.DataModel): ..., description="URL pointing to the CV used.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index ab503c2..d698bcf 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -1,14 +1,14 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator from .fidobject import FIDObject from .processingsteps import ProcessingSteps -from .parameters import Parameters from .identity import Identity from .fidarray import FIDArray +from .parameters import Parameters @forge_signature @@ -38,6 +38,10 @@ class Experiment(sdRDM.DataModel): default=None, description="Multiple NMR spectra to be processed together.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) def add_to_fid( self, diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py index 1400e9c..fae2599 100644 --- a/nmrpy/datamodel/core/fidarray.py +++ b/nmrpy/datamodel/core/fidarray.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -21,3 +21,7 @@ class FIDArray(sdRDM.DataModel): multiple=True, default_factory=ListPlus, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py index d0fda84..513846b 100644 --- a/nmrpy/datamodel/core/fidobject.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -1,12 +1,12 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator from .processingsteps import ProcessingSteps -from .parameters import Parameters from .identity import Identity, AssociatedRanges +from .parameters import Parameters @forge_signature @@ -55,6 +55,10 @@ class FIDObject(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) def add_to_peak_identities( self, diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py index 2034bcf..3f28d1a 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/identity.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -17,6 +17,10 @@ class AssociatedRanges(sdRDM.DataModel): ) start: Optional[float] = Field() end: Optional[float] = Field() + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) @forge_signature @@ -56,6 +60,10 @@ class Identity(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) def add_to_associated_ranges( self, diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index 89c4731..9f68800 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -1,11 +1,11 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator from datetime import datetime as Datetime -from .experiment import Experiment from .citation import Citation +from .experiment import Experiment @forge_signature @@ -40,3 +40,7 @@ class NMRpy(sdRDM.DataModel): " dataset." ), ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py index 66f0c37..f2983ef 100644 --- a/nmrpy/datamodel/core/parameters.py +++ b/nmrpy/datamodel/core/parameters.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -67,3 +67,7 @@ class Parameters(sdRDM.DataModel): default=None, description="sw_left", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py index f9618a8..0fc6d07 100644 --- a/nmrpy/datamodel/core/person.py +++ b/nmrpy/datamodel/core/person.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator from .identifiertypes import IdentifierTypes @@ -52,3 +52,7 @@ class Person(sdRDM.DataModel): default=None, description="Value of the identifier for the person.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py index e4c8830..8fdea85 100644 --- a/nmrpy/datamodel/core/processingsteps.py +++ b/nmrpy/datamodel/core/processingsteps.py @@ -1,7 +1,7 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator @@ -79,3 +79,7 @@ class ProcessingSteps(sdRDM.DataModel): default=False, description="Whether or not Baseline correction was performed.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py index 396d3da..f3f8e50 100644 --- a/nmrpy/datamodel/core/publication.py +++ b/nmrpy/datamodel/core/publication.py @@ -1,11 +1,11 @@ import sdRDM from typing import List, Optional -from pydantic import AnyUrl, Field +from pydantic import AnyUrl, Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator -from .person import Person from .identifiertypes import IdentifierTypes +from .person import Person from .publicationtypes import PublicationTypes @@ -44,6 +44,10 @@ class Publication(sdRDM.DataModel): default=None, description="The DOI pointing to the publication.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) def add_to_authors( self, diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py index 022161f..eabbad0 100644 --- a/nmrpy/datamodel/core/term.py +++ b/nmrpy/datamodel/core/term.py @@ -1,7 +1,7 @@ import sdRDM from typing import Any, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator @@ -39,3 +39,7 @@ class Term(sdRDM.DataModel): default=None, description="Value of the term, if applicable.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="dec2cda6676f8d04070715fe079ed786515ea918" + ) From 41c363ea69f721d702edb28c96c3486164cf8652 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Fri, 19 Apr 2024 13:41:07 +0200 Subject: [PATCH 18/54] Remove Citation info from data model --- nmrpy/data_objects.py | 52 ++++- nmrpy/datamodel/__init__.py | 4 +- nmrpy/datamodel/core/__init__.py | 13 +- nmrpy/datamodel/core/abstractspecies.py | 71 ------- nmrpy/datamodel/core/citation.py | 216 -------------------- nmrpy/datamodel/core/complexdatapoint.py | 32 --- nmrpy/datamodel/core/cv.py | 8 +- nmrpy/datamodel/core/datatypes.py | 10 - nmrpy/datamodel/core/experiment.py | 14 +- nmrpy/datamodel/core/fidarray.py | 6 +- nmrpy/datamodel/core/fidobject.py | 19 +- nmrpy/datamodel/core/identifiertypes.py | 5 - nmrpy/datamodel/core/identity.py | 21 +- nmrpy/datamodel/core/nmrpy.py | 17 +- nmrpy/datamodel/core/parameters.py | 6 +- nmrpy/datamodel/core/person.py | 58 ------ nmrpy/datamodel/core/processingsteps.py | 6 +- nmrpy/datamodel/core/protein.py | 57 ------ nmrpy/datamodel/core/publication.py | 88 -------- nmrpy/datamodel/core/publicationtypes.py | 5 - nmrpy/datamodel/core/reactant.py | 49 ----- nmrpy/datamodel/core/sboterm.py | 35 ---- nmrpy/datamodel/core/subjects.py | 8 - nmrpy/datamodel/core/term.py | 10 +- nmrpy/datamodel/core/vessel.py | 52 ----- nmrpy/datamodel/schemes/datamodel_schema.md | 59 ------ nmrpy/plotting.py | 120 ++++++++++- nmrpy/utils.py | 56 +++++ specifications/nmrpy.md | 129 ------------ 29 files changed, 270 insertions(+), 956 deletions(-) delete mode 100644 nmrpy/datamodel/core/abstractspecies.py delete mode 100644 nmrpy/datamodel/core/citation.py delete mode 100644 nmrpy/datamodel/core/complexdatapoint.py delete mode 100644 nmrpy/datamodel/core/datatypes.py delete mode 100644 nmrpy/datamodel/core/identifiertypes.py delete mode 100644 nmrpy/datamodel/core/person.py delete mode 100644 nmrpy/datamodel/core/protein.py delete mode 100644 nmrpy/datamodel/core/publication.py delete mode 100644 nmrpy/datamodel/core/publicationtypes.py delete mode 100644 nmrpy/datamodel/core/reactant.py delete mode 100644 nmrpy/datamodel/core/sboterm.py delete mode 100644 nmrpy/datamodel/core/subjects.py delete mode 100644 nmrpy/datamodel/core/vessel.py diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 0c5851f..1f231b8 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -14,6 +14,7 @@ from sdRDM import DataModel from sdRDM.base.importedmodules import ImportedModules from nmrpy.datamodel.core import * +from nmrpy.utils import create_enzymeml class Base: @@ -519,10 +520,24 @@ def deconvoluted_integrals(self): """ if self._deconvoluted_peaks is not None: integrals = [] + i = 0 for peak in self._deconvoluted_peaks: int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) integrals.append(int_gauss + int_lorentz) + + for peak_identity in self.fid_object.peak_identities: + if peak_identity.name == self.identities[i]: + try: + peak_identity.associated_integrals.append( + float(integrals[i]) + ) + except: + peak_identity.associated_integrals = [] + peak_identity.associated_integrals.append( + float(integrals[i]) + ) + i += 1 return integrals def _get_plots(self): @@ -1517,6 +1532,24 @@ def enzymeml_library(self): del self.__enzymeml_library print("The current EnzymeML library has been deleted.") + @property + def concentrations(self): + """ + An array of the concentration for each FID. + """ + return self.__c + + @concentrations.setter + def concentrations(self, c): + if not isinstance(c, dict): + raise TypeError("c must be a dictionary.") + self.__c = c + + @concentrations.deleter + def concentrations(self): + del self.__c + print("The current concentrations have been deleted.") + def __str__(self): return "FidArray of {} FID(s)".format(len(self.data)) @@ -1576,6 +1609,7 @@ def _get_widgets(self): or isinstance(self.__dict__[id], FidArrayRangeSelector) or isinstance(self.__dict__[id], DataTraceRangeSelector) or isinstance(self.__dict__[id], DataTraceSelector) + or isinstance(self.__dict__[id], IdentityRangeAssigner) ] return widgets @@ -2381,14 +2415,22 @@ def save_to_file(self, filename=None, overwrite=False): self._del_widgets() for fid in self.get_fids(): fid._del_widgets() + # delete EnzymeML library & document (can't be pickled) + try: + del self.enzymeml_library + del self.enzymeml_document + except: + pass with open(filename, "wb") as f: pickle.dump(self, f) # TODO: Will probably create a measurement object for each FID(?) # and add them to the EnzymeML document provided # Issue: How to get species for IdentityAssigner? __init__()? - def add_to_enzymeml(self, enzymeml_document=None) -> None: - ... + def to_enzymeml(self, enzymeml_document: DataModel = None) -> DataModel: + if not enzymeml_document: + enzymeml_document = self.enzymeml_document + return create_enzymeml(self, enzymeml_document) # TODO: Refactor save_data method # possibly make saving to EnzymeML a get_measurements method @@ -2465,6 +2507,12 @@ def clear_identities(self): for fid in self.get_fids(): fid.identities = None + def calculate_concentrations(self): + integrals = self.deconvoluted_integrals.transpose() + self._concentration_widget = ConcentrationCalculator( + fid_array=self, integrals=integrals + ) + class Importer(Base): def __init__(self, *args, **kwargs): diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py index 08352f9..d866ab2 100644 --- a/nmrpy/datamodel/__init__.py +++ b/nmrpy/datamodel/__init__.py @@ -1,3 +1,3 @@ -__URL__ = "https://github.com/NMRPy/nmrpy" -__COMMIT__ = "dec2cda6676f8d04070715fe079ed786515ea918" +__URL__ = "" +__COMMIT__ = "" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index 7027d43..f22d8a0 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -5,17 +5,12 @@ from .processingsteps import ProcessingSteps from .identity import Identity from .fidarray import FIDArray -from .citation import Citation -from .person import Person -from .publication import Publication from .cv import CV from .term import Term from .fileformats import FileFormats -from .subjects import Subjects -from .publicationtypes import PublicationTypes -from .identifiertypes import IdentifierTypes __doc__ = "" + __all__ = [ "NMRpy", "Experiment", @@ -24,13 +19,7 @@ "ProcessingSteps", "Identity", "FIDArray", - "Citation", - "Person", - "Publication", "CV", "Term", "FileFormats", - "Subjects", - "PublicationTypes", - "IdentifierTypes", ] diff --git a/nmrpy/datamodel/core/abstractspecies.py b/nmrpy/datamodel/core/abstractspecies.py deleted file mode 100644 index 57e50eb..0000000 --- a/nmrpy/datamodel/core/abstractspecies.py +++ /dev/null @@ -1,71 +0,0 @@ -import sdRDM - -from typing import Optional, Union -from pydantic import Field, validator -from sdRDM.base.utils import forge_signature, IDGenerator - -from pydantic import StrictBool - -from .vessel import Vessel - - -@forge_signature -class AbstractSpecies(sdRDM.DataModel): - """This object is used to inherit basic attributes common to all species used in the data model.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("abstractspeciesINDEX"), - xml="@id", - ) - - name: str = Field( - ..., - description="None", - ) - - vessel_id: Union[Vessel, str] = Field( - ..., - reference="Vessel.id", - description="None", - ) - - init_conc: Optional[float] = Field( - default=None, - description="None", - ) - - constant: StrictBool = Field( - ..., - description="None", - ) - - unit: Optional[str] = Field( - default=None, - description="None", - ) - - uri: Optional[str] = Field( - default=None, - description="None", - ) - - creator_id: Optional[str] = Field( - default=None, - description="None", - ) - - @validator("vessel_id") - def get_vessel_id_reference(cls, value): - """Extracts the ID from a given object to create a reference""" - - from .vessel import Vessel - - if isinstance(value, Vessel): - return value.id - elif isinstance(value, str): - return value - else: - raise TypeError( - f"Expected types [Vessel, str] got '{type(value).__name__}' instead." - ) diff --git a/nmrpy/datamodel/core/citation.py b/nmrpy/datamodel/core/citation.py deleted file mode 100644 index c7834f3..0000000 --- a/nmrpy/datamodel/core/citation.py +++ /dev/null @@ -1,216 +0,0 @@ -import sdRDM - -from typing import Any, List, Optional -from pydantic import AnyUrl, Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator -from .identifiertypes import IdentifierTypes -from .person import Person -from .publicationtypes import PublicationTypes -from .subjects import Subjects -from .term import Term -from .publication import Publication - - -@forge_signature -class Citation(sdRDM.DataModel): - """Container for various types of metadata primarily used in the publication and citation of the dataset.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("citationINDEX"), - xml="@id", - ) - - title: Optional[str] = Field( - default=None, - description="Title the dataset should have when published.", - ) - - doi: Optional[AnyUrl] = Field( - default=None, - description="DOI pointing to the published dataset", - ) - - description: Optional[str] = Field( - default=None, - description="Description the dataset should have when published.", - ) - - authors: List[Person] = Field( - description="List of authors for this dataset.", - default_factory=ListPlus, - multiple=True, - ) - - subjects: List[Subjects] = Field( - description="List of subjects this dataset belongs to.", - default_factory=ListPlus, - multiple=True, - ) - - keywords: List[Term] = Field( - description="List of CV-based keywords describing the dataset.", - default_factory=ListPlus, - multiple=True, - ) - - topics: List[Term] = Field( - description="List of CV-based topics the dataset addresses.", - default_factory=ListPlus, - multiple=True, - ) - - related_publications: List[Publication] = Field( - description="List of publications relating to this dataset.", - default_factory=ListPlus, - multiple=True, - ) - - notes: Optional[str] = Field( - default=None, - description="Additional notes about the dataset.", - ) - - funding: List[str] = Field( - description="Funding information for this dataset.", - default_factory=ListPlus, - multiple=True, - ) - - license: Optional[str] = Field( - default="CC BY 4.0", - description="License information for this dataset. Defaults to `CC BY 4.0`.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) - - def add_to_authors( - self, - last_name: str, - first_name: str, - middle_names: List[str] = ListPlus(), - affiliation: Optional[str] = None, - email: Optional[str] = None, - identifier_type: Optional[IdentifierTypes] = None, - identifier_value: Optional[str] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Person' to attribute authors - - Args: - id (str): Unique identifier of the 'Person' object. Defaults to 'None'. - last_name (): Family name of the person.. - first_name (): Given name of the person.. - middle_names (): List of middle names of the person.. Defaults to ListPlus() - affiliation (): Institution the Person belongs to.. Defaults to None - email (): Email address of the person.. Defaults to None - identifier_type (): Recognized identifier for the person.. Defaults to None - identifier_value (): Value of the identifier for the person.. Defaults to None - """ - params = { - "last_name": last_name, - "first_name": first_name, - "middle_names": middle_names, - "affiliation": affiliation, - "email": email, - "identifier_type": identifier_type, - "identifier_value": identifier_value, - } - if id is not None: - params["id"] = id - self.authors.append(Person(**params)) - return self.authors[-1] - - def add_to_keywords( - self, - name: str, - accession: str, - term_cv_reference: Optional[str] = None, - value: Optional[Any] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Term' to attribute keywords - - Args: - id (str): Unique identifier of the 'Term' object. Defaults to 'None'. - name (): The preferred name of the term associated with the given accession number.. - accession (): Accession number of the term in the controlled vocabulary.. - term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None - value (): Value of the term, if applicable.. Defaults to None - """ - params = { - "name": name, - "accession": accession, - "term_cv_reference": term_cv_reference, - "value": value, - } - if id is not None: - params["id"] = id - self.keywords.append(Term(**params)) - return self.keywords[-1] - - def add_to_topics( - self, - name: str, - accession: str, - term_cv_reference: Optional[str] = None, - value: Optional[Any] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Term' to attribute topics - - Args: - id (str): Unique identifier of the 'Term' object. Defaults to 'None'. - name (): The preferred name of the term associated with the given accession number.. - accession (): Accession number of the term in the controlled vocabulary.. - term_cv_reference (): Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.. Defaults to None - value (): Value of the term, if applicable.. Defaults to None - """ - params = { - "name": name, - "accession": accession, - "term_cv_reference": term_cv_reference, - "value": value, - } - if id is not None: - params["id"] = id - self.topics.append(Term(**params)) - return self.topics[-1] - - def add_to_related_publications( - self, - type: PublicationTypes, - title: str, - authors: List[Person] = ListPlus(), - year: Optional[int] = None, - doi: Optional[AnyUrl] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Publication' to attribute related_publications - - Args: - id (str): Unique identifier of the 'Publication' object. Defaults to 'None'. - type (): Nature of the publication.. - title (): Title of the publication.. - authors (): Authors of the publication.. Defaults to ListPlus() - year (): Year of publication.. Defaults to None - doi (): The DOI pointing to the publication.. Defaults to None - """ - params = { - "type": type, - "title": title, - "authors": authors, - "year": year, - "doi": doi, - } - if id is not None: - params["id"] = id - self.related_publications.append(Publication(**params)) - return self.related_publications[-1] diff --git a/nmrpy/datamodel/core/complexdatapoint.py b/nmrpy/datamodel/core/complexdatapoint.py deleted file mode 100644 index 44ce1c2..0000000 --- a/nmrpy/datamodel/core/complexdatapoint.py +++ /dev/null @@ -1,32 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class ComplexDataPoint(sdRDM.DataModel): - """Container for a complex number from the Free Induction Decay.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("complexdatapointINDEX"), - xml="@id", - ) - - real_part: Optional[float] = Field( - default=None, - description=( - "Real part of the complex number. Equivalent to `z.real` with `z` being a" - " `complex` number in Python." - ), - ) - - imaginary_part: Optional[float] = Field( - default=None, - description=( - "Imaginary part of the complex number. Equivalent to `z.imag` with `z`" - " being a `complex` number in Python." - ), - ) diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py index ccc6ca5..e7e070a 100644 --- a/nmrpy/datamodel/core/cv.py +++ b/nmrpy/datamodel/core/cv.py @@ -1,9 +1,11 @@ import sdRDM from typing import Optional -from pydantic import AnyUrl, Field, PrivateAttr +from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator +from pydantic import AnyUrl + @forge_signature class CV(sdRDM.DataModel): @@ -29,7 +31,3 @@ class CV(sdRDM.DataModel): ..., description="URL pointing to the CV used.", ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/datatypes.py b/nmrpy/datamodel/core/datatypes.py deleted file mode 100644 index 92b2754..0000000 --- a/nmrpy/datamodel/core/datatypes.py +++ /dev/null @@ -1,10 +0,0 @@ -from enum import Enum - - -class DataTypes(Enum): - CONCENTRATION = "conc" - ABSORPTION = "abs" - FEED = "feed" - BIOMASS = "biomass" - CONVERSION = "conversion" - PEAK_AREA = "peak-area" diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index d698bcf..e379b4f 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -1,14 +1,16 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator + + from .fidobject import FIDObject from .processingsteps import ProcessingSteps -from .identity import Identity from .fidarray import FIDArray from .parameters import Parameters +from .identity import Identity @forge_signature @@ -38,10 +40,6 @@ class Experiment(sdRDM.DataModel): default=None, description="Multiple NMR spectra to be processed together.", ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) def add_to_fid( self, @@ -63,6 +61,7 @@ def add_to_fid( processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() """ + params = { "raw_data": raw_data, "processed_data": processed_data, @@ -70,7 +69,10 @@ def add_to_fid( "processing_steps": processing_steps, "peak_identities": peak_identities, } + if id is not None: params["id"] = id + self.fid.append(FIDObject(**params)) + return self.fid[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py index fae2599..1400e9c 100644 --- a/nmrpy/datamodel/core/fidarray.py +++ b/nmrpy/datamodel/core/fidarray.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -21,7 +21,3 @@ class FIDArray(sdRDM.DataModel): multiple=True, default_factory=ListPlus, ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py index 513846b..61e4f00 100644 --- a/nmrpy/datamodel/core/fidobject.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -1,12 +1,15 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator + + from .processingsteps import ProcessingSteps -from .identity import Identity, AssociatedRanges from .parameters import Parameters +from .identity import Identity +from .identity import AssociatedRanges @forge_signature @@ -35,16 +38,16 @@ class FIDObject(sdRDM.DataModel): ) nmr_parameters: Optional[Parameters] = Field( - default=Parameters(), description="Contains commonly-used NMR parameters.", + default_factory=Parameters, ) processing_steps: Optional[ProcessingSteps] = Field( - default=ProcessingSteps(), description=( "Contains the processing steps performed, as well as the parameters used" " for them." ), + default_factory=ProcessingSteps, ) peak_identities: List[Identity] = Field( @@ -55,10 +58,6 @@ class FIDObject(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) def add_to_peak_identities( self, @@ -80,6 +79,7 @@ def add_to_peak_identities( associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() """ + params = { "name": name, "species_id": species_id, @@ -87,7 +87,10 @@ def add_to_peak_identities( "associated_ranges": associated_ranges, "associated_integrals": associated_integrals, } + if id is not None: params["id"] = id + self.peak_identities.append(Identity(**params)) + return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/identifiertypes.py b/nmrpy/datamodel/core/identifiertypes.py deleted file mode 100644 index f4bf8fe..0000000 --- a/nmrpy/datamodel/core/identifiertypes.py +++ /dev/null @@ -1,5 +0,0 @@ -from enum import Enum - - -class IdentifierTypes(Enum): - ORCID = "ORCID" diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py index 3f28d1a..17c838b 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/identity.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -15,12 +15,10 @@ class AssociatedRanges(sdRDM.DataModel): default_factory=IDGenerator("associatedrangesINDEX"), xml="@id", ) + start: Optional[float] = Field() + end: Optional[float] = Field() - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) @forge_signature @@ -60,10 +58,6 @@ class Identity(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) def add_to_associated_ranges( self, @@ -79,8 +73,15 @@ def add_to_associated_ranges( start (): . Defaults to None end (): . Defaults to None """ - params = {"start": start, "end": end} + + params = { + "start": start, + "end": end, + } + if id is not None: params["id"] = id + self.associated_ranges.append(AssociatedRanges(**params)) + return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index 9f68800..ab19951 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -1,10 +1,11 @@ import sdRDM from typing import Optional -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator + from datetime import datetime as Datetime -from .citation import Citation + from .experiment import Experiment @@ -32,15 +33,3 @@ class NMRpy(sdRDM.DataModel): default=None, description="List of experiments associated with this dataset.", ) - - citation: Optional[Citation] = Field( - default=Citation(), - description=( - "Relevant information regarding the publication and citation of this" - " dataset." - ), - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py index f2983ef..66f0c37 100644 --- a/nmrpy/datamodel/core/parameters.py +++ b/nmrpy/datamodel/core/parameters.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -67,7 +67,3 @@ class Parameters(sdRDM.DataModel): default=None, description="sw_left", ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/person.py b/nmrpy/datamodel/core/person.py deleted file mode 100644 index 0fc6d07..0000000 --- a/nmrpy/datamodel/core/person.py +++ /dev/null @@ -1,58 +0,0 @@ -import sdRDM - -from typing import List, Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator -from .identifiertypes import IdentifierTypes - - -@forge_signature -class Person(sdRDM.DataModel): - """Container for information regarding a person that worked on an experiment.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("personINDEX"), - xml="@id", - ) - - last_name: str = Field( - ..., - description="Family name of the person.", - ) - - first_name: str = Field( - ..., - description="Given name of the person.", - ) - - middle_names: List[str] = Field( - description="List of middle names of the person.", - default_factory=ListPlus, - multiple=True, - ) - - affiliation: Optional[str] = Field( - default=None, - description="Institution the Person belongs to.", - ) - - email: Optional[str] = Field( - default=None, - description="Email address of the person.", - ) - - identifier_type: Optional[IdentifierTypes] = Field( - default=None, - description="Recognized identifier for the person.", - ) - - identifier_value: Optional[str] = Field( - default=None, - description="Value of the identifier for the person.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py index 8fdea85..e4c8830 100644 --- a/nmrpy/datamodel/core/processingsteps.py +++ b/nmrpy/datamodel/core/processingsteps.py @@ -1,7 +1,7 @@ import sdRDM from typing import Optional -from pydantic import Field, PrivateAttr +from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator @@ -79,7 +79,3 @@ class ProcessingSteps(sdRDM.DataModel): default=False, description="Whether or not Baseline correction was performed.", ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/protein.py b/nmrpy/datamodel/core/protein.py deleted file mode 100644 index efcc389..0000000 --- a/nmrpy/datamodel/core/protein.py +++ /dev/null @@ -1,57 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field -from sdRDM.base.utils import forge_signature, IDGenerator - - -from .sboterm import SBOTerm - - -@forge_signature -class Protein(sdRDM.DataModel): - """This objects describes the proteins that were used or produced in the course of the experiment.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("proteinINDEX"), - xml="@id", - ) - - sequence: str = Field( - ..., - description="Amino acid sequence of the protein", - template_alias="Sequence", - ) - - ecnumber: Optional[str] = Field( - default=None, - description="EC number of the protein.", - regex="(\d+.)(\d+.)(\d+.)(\d+)", - template_alias="EC Number", - ) - - organism: Optional[str] = Field( - default=None, - description="Organism the protein was expressed in.", - template_alias="Source organism", - ) - - organism_tax_id: Optional[str] = Field( - default=None, - description="Taxonomy identifier of the expression host.", - ) - - uniprotid: Optional[str] = Field( - default=None, - description=( - "Unique identifier referencing a protein entry at UniProt. Use this" - " identifier to initialize the object from the UniProt database." - ), - template_alias="UniProt ID", - ) - - ontology: SBOTerm = Field( - description="None", - default=SBOTerm.CATALYST, - ) diff --git a/nmrpy/datamodel/core/publication.py b/nmrpy/datamodel/core/publication.py deleted file mode 100644 index f3f8e50..0000000 --- a/nmrpy/datamodel/core/publication.py +++ /dev/null @@ -1,88 +0,0 @@ -import sdRDM - -from typing import List, Optional -from pydantic import AnyUrl, Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator -from .identifiertypes import IdentifierTypes -from .person import Person -from .publicationtypes import PublicationTypes - - -@forge_signature -class Publication(sdRDM.DataModel): - """Container for citation information of a relevant publication.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("publicationINDEX"), - xml="@id", - ) - - type: PublicationTypes = Field( - ..., - description="Nature of the publication.", - ) - - title: str = Field( - ..., - description="Title of the publication.", - ) - - authors: List[Person] = Field( - description="Authors of the publication.", - multiple=True, - default_factory=ListPlus, - ) - - year: Optional[int] = Field( - default=None, - description="Year of publication.", - ) - - doi: Optional[AnyUrl] = Field( - default=None, - description="The DOI pointing to the publication.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) - - def add_to_authors( - self, - last_name: str, - first_name: str, - middle_names: List[str] = ListPlus(), - affiliation: Optional[str] = None, - email: Optional[str] = None, - identifier_type: Optional[IdentifierTypes] = None, - identifier_value: Optional[str] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Person' to attribute authors - - Args: - id (str): Unique identifier of the 'Person' object. Defaults to 'None'. - last_name (): Family name of the person.. - first_name (): Given name of the person.. - middle_names (): List of middle names of the person.. Defaults to ListPlus() - affiliation (): Institution the Person belongs to.. Defaults to None - email (): Email address of the person.. Defaults to None - identifier_type (): Recognized identifier for the person.. Defaults to None - identifier_value (): Value of the identifier for the person.. Defaults to None - """ - params = { - "last_name": last_name, - "first_name": first_name, - "middle_names": middle_names, - "affiliation": affiliation, - "email": email, - "identifier_type": identifier_type, - "identifier_value": identifier_value, - } - if id is not None: - params["id"] = id - self.authors.append(Person(**params)) - return self.authors[-1] diff --git a/nmrpy/datamodel/core/publicationtypes.py b/nmrpy/datamodel/core/publicationtypes.py deleted file mode 100644 index f5974ef..0000000 --- a/nmrpy/datamodel/core/publicationtypes.py +++ /dev/null @@ -1,5 +0,0 @@ -from enum import Enum - - -class PublicationTypes(Enum): - ARTICLE = "Journal article" diff --git a/nmrpy/datamodel/core/reactant.py b/nmrpy/datamodel/core/reactant.py deleted file mode 100644 index faf65c1..0000000 --- a/nmrpy/datamodel/core/reactant.py +++ /dev/null @@ -1,49 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field -from sdRDM.base.utils import forge_signature, IDGenerator - - -from .sboterm import SBOTerm - - -@forge_signature -class Reactant(sdRDM.DataModel): - """This objects describes the reactants that were used or produced in the course of the experiment.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("reactantINDEX"), - xml="@id", - ) - - smiles: Optional[str] = Field( - default=None, - description=( - "Simplified Molecular Input Line Entry System (SMILES) encoding of the" - " reactant." - ), - template_alias="SMILES", - ) - - inchi: Optional[str] = Field( - default=None, - description=( - "International Chemical Identifier (InChI) encoding of the reactant." - ), - template_alias="InCHI", - ) - - chebi_id: Optional[str] = Field( - default=None, - description=( - "Unique identifier of the CHEBI database. Use this identifier to initialize" - " the object from the CHEBI database." - ), - ) - - ontology: SBOTerm = Field( - description="None", - default=SBOTerm.SMALL_MOLECULE, - ) diff --git a/nmrpy/datamodel/core/sboterm.py b/nmrpy/datamodel/core/sboterm.py deleted file mode 100644 index 74d2eb6..0000000 --- a/nmrpy/datamodel/core/sboterm.py +++ /dev/null @@ -1,35 +0,0 @@ -from enum import Enum - - -class SBOTerm(Enum): - BIOCHEMICAL_REACTION = "SBO:0000176" - ACID_BASE_REACTION = "SBO:0000208" - CONFORMATIONAL_TRANSITION = "SBO:0000181" - CONVERSION = "SBO:0000182" - DEGRADATION = "SBO:0000179" - DISSOCIATION = "SBO:0000180" - IONISATION = "SBO:0000209" - ISOMERISATION = "SBO:0000377" - NON_COVALENT_BINDING = "SBO:0000177" - REDOX_REACTION = "SBO:0000200" - SPONTANEOUS_REACTION = "SBO:0000672" - PROTEIN = "SBO:0000252" - GENE = "SBO:0000251" - SMALL_MOLECULE = "SBO:0000247" - ION = "SBO:0000327" - RADICAL = "SBO:0000328" - INTERACTOR = "SBO:0000336" - SUBSTRATE = "SBO:0000015" - PRODUCT = "SBO:0000011" - CATALYST = "SBO:0000013" - INHIBITOR = "SBO:0000020" - ESSENTIAL_ACTIVATOR = "SBO:0000461" - NON_ESSENTIAL_ACTIVATOR = "SBO:0000462" - POTENTIATOR = "SBO:0000021" - MACROMOLECULAR_COMPLEX = "SBO:0000296" - PROTEIN_COMPLEX = "SBO:0000297" - DIMER = "SBO:0000607" - MICHAELIS_MENTEN = "SBO:0000028" - K_CAT = "SBO:0000025" - K_M = "SBO:0000027" - V_MAX = "SBO:0000186" diff --git a/nmrpy/datamodel/core/subjects.py b/nmrpy/datamodel/core/subjects.py deleted file mode 100644 index d343f01..0000000 --- a/nmrpy/datamodel/core/subjects.py +++ /dev/null @@ -1,8 +0,0 @@ -from enum import Enum - - -class Subjects(Enum): - BIOLOGY = "Biology" - CHEMISTRY = "Chemistry" - IT = "Computer and Information Science" - PHYSICS = "Physics" diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py index eabbad0..82c67e8 100644 --- a/nmrpy/datamodel/core/term.py +++ b/nmrpy/datamodel/core/term.py @@ -1,9 +1,11 @@ import sdRDM -from typing import Any, Optional -from pydantic import Field, PrivateAttr +from typing import Optional +from pydantic import Field from sdRDM.base.utils import forge_signature, IDGenerator +from typing import Any + @forge_signature class Term(sdRDM.DataModel): @@ -39,7 +41,3 @@ class Term(sdRDM.DataModel): default=None, description="Value of the term, if applicable.", ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="dec2cda6676f8d04070715fe079ed786515ea918" - ) diff --git a/nmrpy/datamodel/core/vessel.py b/nmrpy/datamodel/core/vessel.py deleted file mode 100644 index 5dc6fb5..0000000 --- a/nmrpy/datamodel/core/vessel.py +++ /dev/null @@ -1,52 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field -from sdRDM.base.utils import forge_signature, IDGenerator - -from pydantic import StrictBool -from pydantic import PositiveFloat - - -@forge_signature -class Vessel(sdRDM.DataModel): - """This object describes vessels in which the experiment has been carried out. These can include any type of vessel used in biocatalytic experiments.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("vesselINDEX"), - xml="@id", - ) - - name: str = Field( - ..., - description="Name of the used vessel.", - template_alias="Name", - ) - - volume: PositiveFloat = Field( - ..., - description="Volumetric value of the vessel.", - template_alias="Volume value", - ) - - unit: str = Field( - ..., - description="Volumetric unit of the vessel.", - template_alias="Volume unit", - ) - - constant: StrictBool = Field( - description="Whether the volume of the vessel is constant or not.", - default=True, - ) - - uri: Optional[str] = Field( - default=None, - description="URI of the vessel.", - ) - - creator_id: Optional[str] = Field( - default=None, - description="Unique identifier of the author.", - ) diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index 85785fa..0cf4664 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -1,25 +1,16 @@ ```mermaid classDiagram NMRpy *-- Experiment - NMRpy *-- Citation Experiment *-- FIDObject Experiment *-- FIDArray FIDObject *-- Parameters FIDObject *-- ProcessingSteps FIDObject *-- Identity - Citation *-- Subjects - Citation *-- Person - Citation *-- Publication - Citation *-- Term - Person *-- IdentifierTypes - Publication *-- PublicationTypes - Publication *-- Person class NMRpy { +datetime datetime_created* +datetime datetime_modified +Experiment experiment - +Citation citation } class Experiment { @@ -77,38 +68,6 @@ classDiagram +string[0..*] fids* } - class Citation { - +string title - +URL doi - +string description - +Person[0..*] authors - +Subjects[0..*] subjects - +Term[0..*] keywords - +Term[0..*] topics - +Publication[0..*] related_publications - +string notes - +string[0..*] funding - +string license - } - - class Person { - +string last_name* - +string first_name* - +string[0..*] middle_names - +string affiliation - +string email - +IdentifierTypes identifier_type - +string identifier_value - } - - class Publication { - +PublicationTypes type* - +string title* - +Person[0..*] authors* - +integer year - +URL doi - } - class CV { +string vocabulary* +string version* @@ -129,22 +88,4 @@ classDiagram +NONE } - class Subjects { - << Enumeration >> - +BIOLOGY - +CHEMISTRY - +IT - +PHYSICS - } - - class PublicationTypes { - << Enumeration >> - +ARTICLE - } - - class IdentifierTypes { - << Enumeration >> - +ORCID - } - ``` \ No newline at end of file diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index da58de4..ebed113 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -19,11 +19,13 @@ Label, Button, Combobox, + Text, + HTML, ) from IPython.display import display import asyncio -from .utils import get_species_from_enzymeml +from .utils import get_species_from_enzymeml, get_ordered_list_of_species_names class Plot: @@ -185,7 +187,9 @@ def _plot_deconv( ax.text( ppm[numpy.argmax(peak)], label_pad + peak.max(), - str(n), + get_ordered_list_of_species_names(fid)[n] + if fid.fid_object.peak_identities + else str(n), ha="center", ) ax.invert_xaxis() @@ -1464,6 +1468,7 @@ def on_save_button_click(b): } ], ) + self.fid.identities = get_ordered_list_of_species_names(self.fid) self.fid._flags["assigned"] = True reset_button.disabled = False @@ -1486,6 +1491,7 @@ def on_reset_button_click(b): peak_dropdown.options = self.available_peaks peak_dropdown.disabled = False save_button.disabled = False + self.fid.identities = None self.fid._flags["assigned"] = False reset_button.disabled = True @@ -1663,6 +1669,7 @@ def on_save_button_click(b): } ], ) + fid.identities = get_ordered_list_of_species_names(fid) reset_button.disabled = False # Attach the function to the save button's click event @@ -1677,6 +1684,7 @@ def on_reset_button_click(b): print("\nCleared selections!") for fid in self.fids: fid.fid_object.peak_identities = [] + fid.identities = None self.selected_values = {} # Refill the list of available peaks as before, # re-enable the peak dropdown, and disable the reset @@ -2203,5 +2211,113 @@ def assign(self): plt.close(self.span_selector.fig) +class ConcentrationCalculator: + def __init__(self, fid_array, integrals): + self.fid_array = fid_array + self.integrals = integrals + self.fids = fid_array.get_fids() + self.available_species = get_ordered_list_of_species_names( + self.fid_array.get_fid("fid00") + ) + self.equation = "" + + # Create the label widget for the title + title_label = Label( + value="[WORK IN PROGRESS] Calculate concentrations from peak integrals for all FIDs [WORK IN PROGRESS]" + ) + + # Create the dropdown widget for the internal standard + standard_dropdown = Dropdown( + options=self.available_species, + description="Select the internal standard:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + # Create a text input widget for the concentration equation + concentration_equation = Text( + value="", + placeholder="Enter the equation for the concentration here", + description="Concentration equation:", + layout={"width": "auto"}, + style={ + "description_width": "initial", + }, + ) + + # Create an HTML widget to display a legend for the concentration equation + legend_html = HTML( + value="'c_s': Concentration of species
'c_n': Concentration of internal standard
'x_s': Peak integral of species
'x_n': Peak integral of internal standard
Example: c_s = c_n * x_s / x_n", + description="Legend:", + ) + + # Create a button to calculate the concentrations + calculate_button = Button( + description="Calculate concentrations", + icon="calculator", + layout={"width": "max-content"}, + disabled=True, + ) + + # Create an output widget to display the calculation progress + output = Output() + + # Define a method to handle the text input widget's change event + def on_text_change(event): + if event["type"] == "change" and event["name"] == "value": + calculate_button.disabled = False + + # Attach the method to the text input widget's change event + concentration_equation.observe(on_text_change) + + # Define a method to handle the calculate button's click event + def on_calculate_button_click(b): + with output: + output.clear_output(wait=True) + # Fetch the values from the standard dropdown and the + # text widget and add them to a dictionary with species as + # keys + print("\nCalculating concentrations...") + if ( + not concentration_equation.value.replace(" ", "") + == "c_s=c_n*x_s/x_n" + ): + raise NotImplementedError( + "Only the example formula is currently supported." + ) + else: + # TODO: Currently hard-coded for the example data + standard_index = self.available_species.index( + standard_dropdown.value + ) + self.fid_array.concentrations = { + species: 5 + * concentration + / self.integrals[standard_index].mean() + for species, concentration in zip( + self.available_species, self.integrals + ) + } + print(f"Done! Get concentrations with `FidArray.concentrations`.") + + # Attach the function to the calculate button's click event + calculate_button.on_click(on_calculate_button_click) + + # Create the container + container = VBox( + [ + title_label, + standard_dropdown, + concentration_equation, + legend_html, + calculate_button, + output, + ] + ) + + # Display the container + display(container) + + if __name__ == "__main__": pass diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 5732fb1..8d206d9 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -26,3 +26,59 @@ def get_species_from_enzymeml(enzymeml_document: DataModel) -> list: for reactant in enzymeml_document.reactants: available_species.append(reactant) return available_species + + +def get_ordered_list_of_species_names(fid: "Fid") -> list: + """Iterate over the identites in a given FID object and extract a + list of species names ordered by peak index, multiple occurences + thus allowed. + + Args: + fid (Fid): The FID object from which to get the species names. + + Returns: + list: List of species names in desecending order by peak index. + """ + list_of_tuples = [] + # Iterate over the identies and then over their associated peaks of + # a given FID object and append a tuple of the identity's name and + # corresponding peak (one tuple per peak) to a list of tuples. + for identity in fid.fid_object.peak_identities: + for peak in identity.associated_peaks: + list_of_tuples.append((identity.name, peak)) + # Use the `sorted` function with a custom key to sort the list of + # tuples by the second element of each tuple (the peak) from highest + # value to lowest (reverse=True). + list_of_tuples = sorted(list_of_tuples, key=lambda x: x[1], reverse=True) + # Create and return an ordered list of only the species names from + # the sorted list of tuples. + ordered_list_of_species_names = [t[0] for t in list_of_tuples] + return ordered_list_of_species_names + + +def create_enzymeml(fid_array: "FidArray", enzymeml_document: DataModel) -> DataModel: + # Specify EnzymeML version + URL = "https://github.com/EnzymeML/enzymeml-specifications.git" + COMMIT = "5e5f05b9dc76134305b8f9cef65271e35563ac76" + + EnzymeML = DataModel.from_git(URL, COMMIT) + SBOTerm = EnzymeML.enums.SBOTerm + DataTypes = EnzymeML.enums.DataTypes + + measurement = EnzymeML.Measurement( + name=fid_array.data_model.experiment.name, + temperature=enzymeml_document.reactions[0].temperature, + temperature_unit=enzymeml_document.reactions[0].temperature_unit, + ph=enzymeml_document.reactions[0].ph, + global_time=fid_array.t.tolist(), + global_time_unit="min", + ) + + enzymeml_document.measurements.append(measurement) + + return enzymeml_document + + # for species, concentrations in fid_array.concentrations.items(): + # new_species = EnzymeML.MeasurementData( + # init_conc=enzymeml_document.reactants + # ) diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 0adcf22..d433e21 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -19,9 +19,6 @@ Root element of the NMRpy data model. - experiment - Type: [Experiment](#experiment) - Description: List of experiments associated with this dataset. -- citation - - Type: [Citation](#citation) - - Description: Relevant information regarding the publication and citation of this dataset. ### Experiment @@ -189,102 +186,6 @@ Container for processing of multiple spectra. Must reference the respective `FID - Multiple: True -### Citation - -Container for various types of metadata primarily used in the publication and citation of the dataset. - -- title - - Type: string - - Description: Title the dataset should have when published. -- doi - - Type: URL - - Description: DOI pointing to the published dataset -- description - - Type: string - - Description: Description the dataset should have when published. -- authors - - Type: [Person](#person) - - Description: List of authors for this dataset. - - Multiple: True -- subjects - - Type: [Subjects](#subjects) - - Description: List of subjects this dataset belongs to. - - Multiple: True -- keywords - - Type: [Term](#term) - - Description: List of CV-based keywords describing the dataset. - - Multiple: True -- topics - - Type: [Term](#term) - - Description: List of CV-based topics the dataset addresses. - - Multiple: True -- related_publications - - Type: [Publication](#publication) - - Description: List of publications relating to this dataset. - - Multiple: True -- notes - - Type: string - - Description: Additional notes about the dataset. -- funding - - Type: string - - Description: Funding information for this dataset. - - Multiple: True -- license - - Type: string - - Description: License information for this dataset. Defaults to `CC BY 4.0`. - - Default: CC BY 4.0 - - -### Person - -Container for information regarding a person that worked on an experiment. - -- __last_name__ - - Type: string - - Description: Family name of the person. -- __first_name__ - - Type: string - - Description: Given name of the person. -- middle_names - - Type: string - - Description: List of middle names of the person. - - Multiple: True -- affiliation - - Type: string - - Description: Institution the Person belongs to. -- email - - Type: string - - Description: Email address of the person. -- identifier_type - - Type: [IdentifierTypes](#identifiertypes) - - Description: Recognized identifier for the person. -- identifier_value - - Type: string - - Description: Value of the identifier for the person. - - -### Publication - -Container for citation information of a relevant publication. - -- __type__ - - Type: [PublicationTypes](#publicationtypes) - - Description: Nature of the publication. -- __title__ - - Type: string - - Description: Title of the publication. -- __authors__ - - Type: [Person](#person) - - Description: Authors of the publication. - - Multiple: True -- year - - Type: integer - - Description: Year of publication. -- doi - - Type: URL - - Description: The DOI pointing to the publication. - - ## Utility objects @@ -334,33 +235,3 @@ VARIAN = "varian" BRUKER = "bruker" NONE = None ``` - - -### Subjects - -Enumeration containing common subjects (research fields) that implement NMR. - -```python -BIOLOGY = "Biology" -CHEMISTRY = "Chemistry" -IT = "Computer and Information Science" -PHYSICS = "Physics" -``` - - -### PublicationTypes - -Enumeration containing accepted types of publication. - -```python -ARTICLE = "Journal article" -``` - - -### IdentifierTypes - -Enumeration containing recognized identifiers for persons. - -```python -ORCID = "ORCID" -``` From ecbaea23e73c0fabc5d87789a591e13c466e6aa2 Mon Sep 17 00:00:00 2001 From: sdRDM Bot Date: Fri, 19 Apr 2024 11:42:03 +0000 Subject: [PATCH 19/54] API update --- nmrpy/datamodel/__init__.py | 4 ++-- nmrpy/datamodel/core/__init__.py | 1 - nmrpy/datamodel/core/cv.py | 8 ++++--- nmrpy/datamodel/core/experiment.py | 16 ++++++------- nmrpy/datamodel/core/fidarray.py | 6 ++++- nmrpy/datamodel/core/fidobject.py | 32 ++++++++----------------- nmrpy/datamodel/core/identity.py | 21 ++++++++-------- nmrpy/datamodel/core/nmrpy.py | 8 ++++--- nmrpy/datamodel/core/parameters.py | 6 ++++- nmrpy/datamodel/core/processingsteps.py | 6 ++++- nmrpy/datamodel/core/term.py | 19 +++++++-------- 11 files changed, 62 insertions(+), 65 deletions(-) diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py index d866ab2..74154e7 100644 --- a/nmrpy/datamodel/__init__.py +++ b/nmrpy/datamodel/__init__.py @@ -1,3 +1,3 @@ -__URL__ = "" -__COMMIT__ = "" +__URL__ = "https://github.com/NMRPy/nmrpy" +__COMMIT__ = "478f8467aed0bc8b72d82a7fb9e649202e3b1026" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index f22d8a0..79d1e45 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -10,7 +10,6 @@ from .fileformats import FileFormats __doc__ = "" - __all__ = [ "NMRpy", "Experiment", diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py index e7e070a..1663e05 100644 --- a/nmrpy/datamodel/core/cv.py +++ b/nmrpy/datamodel/core/cv.py @@ -1,11 +1,9 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import AnyUrl, Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator -from pydantic import AnyUrl - @forge_signature class CV(sdRDM.DataModel): @@ -31,3 +29,7 @@ class CV(sdRDM.DataModel): ..., description="URL pointing to the CV used.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index e379b4f..e0c3a55 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -1,16 +1,14 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - - from .fidobject import FIDObject +from .identity import Identity +from .parameters import Parameters from .processingsteps import ProcessingSteps from .fidarray import FIDArray -from .parameters import Parameters -from .identity import Identity @forge_signature @@ -40,6 +38,10 @@ class Experiment(sdRDM.DataModel): default=None, description="Multiple NMR spectra to be processed together.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) def add_to_fid( self, @@ -61,7 +63,6 @@ def add_to_fid( processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() """ - params = { "raw_data": raw_data, "processed_data": processed_data, @@ -69,10 +70,7 @@ def add_to_fid( "processing_steps": processing_steps, "peak_identities": peak_identities, } - if id is not None: params["id"] = id - self.fid.append(FIDObject(**params)) - return self.fid[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py index 1400e9c..ddb5523 100644 --- a/nmrpy/datamodel/core/fidarray.py +++ b/nmrpy/datamodel/core/fidarray.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -21,3 +21,7 @@ class FIDArray(sdRDM.DataModel): multiple=True, default_factory=ListPlus, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py index 61e4f00..4e72e76 100644 --- a/nmrpy/datamodel/core/fidobject.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -1,15 +1,12 @@ import sdRDM from typing import Optional, Union, List -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator - - -from .processingsteps import ProcessingSteps +from .identity import Identity, AssociatedRanges from .parameters import Parameters -from .identity import Identity -from .identity import AssociatedRanges +from .processingsteps import ProcessingSteps @forge_signature @@ -23,10 +20,7 @@ class FIDObject(sdRDM.DataModel): ) raw_data: List[str] = Field( - description=( - "Complex spectral data from numpy array as string of format" - " `{array.real}+{array.imag}j`." - ), + description="Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.", default_factory=ListPlus, multiple=True, ) @@ -43,21 +37,19 @@ class FIDObject(sdRDM.DataModel): ) processing_steps: Optional[ProcessingSteps] = Field( - description=( - "Contains the processing steps performed, as well as the parameters used" - " for them." - ), + description="Contains the processing steps performed, as well as the parameters used for them.", default_factory=ProcessingSteps, ) peak_identities: List[Identity] = Field( - description=( - "Container holding and mapping integrals resulting from peaks and their" - " ranges to EnzymeML species." - ), + description="Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.", default_factory=ListPlus, multiple=True, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) def add_to_peak_identities( self, @@ -79,7 +71,6 @@ def add_to_peak_identities( associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() """ - params = { "name": name, "species_id": species_id, @@ -87,10 +78,7 @@ def add_to_peak_identities( "associated_ranges": associated_ranges, "associated_integrals": associated_integrals, } - if id is not None: params["id"] = id - self.peak_identities.append(Identity(**params)) - return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py index 17c838b..e646fc6 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/identity.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -15,10 +15,12 @@ class AssociatedRanges(sdRDM.DataModel): default_factory=IDGenerator("associatedrangesINDEX"), xml="@id", ) - start: Optional[float] = Field() - end: Optional[float] = Field() + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) @forge_signature @@ -58,6 +60,10 @@ class Identity(sdRDM.DataModel): default_factory=ListPlus, multiple=True, ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) def add_to_associated_ranges( self, @@ -73,15 +79,8 @@ def add_to_associated_ranges( start (): . Defaults to None end (): . Defaults to None """ - - params = { - "start": start, - "end": end, - } - + params = {"start": start, "end": end} if id is not None: params["id"] = id - self.associated_ranges.append(AssociatedRanges(**params)) - return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index ab19951..12e9194 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -1,11 +1,9 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator - from datetime import datetime as Datetime - from .experiment import Experiment @@ -33,3 +31,7 @@ class NMRpy(sdRDM.DataModel): default=None, description="List of experiments associated with this dataset.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py index 66f0c37..1ba4443 100644 --- a/nmrpy/datamodel/core/parameters.py +++ b/nmrpy/datamodel/core/parameters.py @@ -1,7 +1,7 @@ import sdRDM from typing import List, Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.listplus import ListPlus from sdRDM.base.utils import forge_signature, IDGenerator @@ -67,3 +67,7 @@ class Parameters(sdRDM.DataModel): default=None, description="sw_left", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py index e4c8830..aa642a0 100644 --- a/nmrpy/datamodel/core/processingsteps.py +++ b/nmrpy/datamodel/core/processingsteps.py @@ -1,7 +1,7 @@ import sdRDM from typing import Optional -from pydantic import Field +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator @@ -79,3 +79,7 @@ class ProcessingSteps(sdRDM.DataModel): default=False, description="Whether or not Baseline correction was performed.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py index 82c67e8..5505ae1 100644 --- a/nmrpy/datamodel/core/term.py +++ b/nmrpy/datamodel/core/term.py @@ -1,11 +1,9 @@ import sdRDM -from typing import Optional -from pydantic import Field +from typing import Any, Optional +from pydantic import Field, PrivateAttr from sdRDM.base.utils import forge_signature, IDGenerator -from typing import Any - @forge_signature class Term(sdRDM.DataModel): @@ -19,9 +17,7 @@ class Term(sdRDM.DataModel): name: str = Field( ..., - description=( - "The preferred name of the term associated with the given accession number." - ), + description="The preferred name of the term associated with the given accession number.", ) accession: str = Field( @@ -31,13 +27,14 @@ class Term(sdRDM.DataModel): term_cv_reference: Optional[str] = Field( default=None, - description=( - "Reference to the `CV.id` of a controlled vocabulary that has been defined" - " for this dataset." - ), + description="Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.", ) value: Optional[Any] = Field( default=None, description="Value of the term, if applicable.", ) + __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") + __commit__: Optional[str] = PrivateAttr( + default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" + ) From 6b3e30af9d833b6f470b1f44943ad67408024e6e Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 24 Jun 2024 13:05:36 +0200 Subject: [PATCH 20/54] Remove depricated data model --- nmrpy/datamodel/__init__.py | 3 - nmrpy/datamodel/core/__init__.py | 24 ------ nmrpy/datamodel/core/cv.py | 35 -------- nmrpy/datamodel/core/experiment.py | 76 ----------------- nmrpy/datamodel/core/fidarray.py | 27 ------ nmrpy/datamodel/core/fidobject.py | 84 ------------------- nmrpy/datamodel/core/fileformats.py | 7 -- nmrpy/datamodel/core/identity.py | 86 ------------------- nmrpy/datamodel/core/nmrpy.py | 37 --------- nmrpy/datamodel/core/parameters.py | 73 ----------------- nmrpy/datamodel/core/processingsteps.py | 85 ------------------- nmrpy/datamodel/core/term.py | 40 --------- nmrpy/datamodel/schemes/datamodel_schema.md | 91 --------------------- 13 files changed, 668 deletions(-) delete mode 100644 nmrpy/datamodel/__init__.py delete mode 100644 nmrpy/datamodel/core/__init__.py delete mode 100644 nmrpy/datamodel/core/cv.py delete mode 100644 nmrpy/datamodel/core/experiment.py delete mode 100644 nmrpy/datamodel/core/fidarray.py delete mode 100644 nmrpy/datamodel/core/fidobject.py delete mode 100644 nmrpy/datamodel/core/fileformats.py delete mode 100644 nmrpy/datamodel/core/identity.py delete mode 100644 nmrpy/datamodel/core/nmrpy.py delete mode 100644 nmrpy/datamodel/core/parameters.py delete mode 100644 nmrpy/datamodel/core/processingsteps.py delete mode 100644 nmrpy/datamodel/core/term.py delete mode 100644 nmrpy/datamodel/schemes/datamodel_schema.md diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py deleted file mode 100644 index 74154e7..0000000 --- a/nmrpy/datamodel/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ - -__URL__ = "https://github.com/NMRPy/nmrpy" -__COMMIT__ = "478f8467aed0bc8b72d82a7fb9e649202e3b1026" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py deleted file mode 100644 index 79d1e45..0000000 --- a/nmrpy/datamodel/core/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from .nmrpy import NMRpy -from .experiment import Experiment -from .fidobject import FIDObject -from .parameters import Parameters -from .processingsteps import ProcessingSteps -from .identity import Identity -from .fidarray import FIDArray -from .cv import CV -from .term import Term -from .fileformats import FileFormats - -__doc__ = "" -__all__ = [ - "NMRpy", - "Experiment", - "FIDObject", - "Parameters", - "ProcessingSteps", - "Identity", - "FIDArray", - "CV", - "Term", - "FileFormats", -] diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py deleted file mode 100644 index 1663e05..0000000 --- a/nmrpy/datamodel/core/cv.py +++ /dev/null @@ -1,35 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import AnyUrl, Field, PrivateAttr -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class CV(sdRDM.DataModel): - """lorem ipsum""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("cvINDEX"), - xml="@id", - ) - - vocabulary: str = Field( - ..., - description="Name of the CV used.", - ) - - version: str = Field( - ..., - description="Version of the CV used.", - ) - - url: AnyUrl = Field( - ..., - description="URL pointing to the CV used.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py deleted file mode 100644 index e0c3a55..0000000 --- a/nmrpy/datamodel/core/experiment.py +++ /dev/null @@ -1,76 +0,0 @@ -import sdRDM - -from typing import Optional, Union, List -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator -from .fidobject import FIDObject -from .identity import Identity -from .parameters import Parameters -from .processingsteps import ProcessingSteps -from .fidarray import FIDArray - - -@forge_signature -class Experiment(sdRDM.DataModel): - """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed - Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant - """ - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("experimentINDEX"), - xml="@id", - ) - - name: str = Field( - ..., - description="A descriptive name for the overarching experiment.", - ) - - fid: List[FIDObject] = Field( - description="A single NMR spectrum.", - default_factory=ListPlus, - multiple=True, - ) - - fid_array: Optional[FIDArray] = Field( - default=None, - description="Multiple NMR spectra to be processed together.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) - - def add_to_fid( - self, - raw_data: List[str] = ListPlus(), - processed_data: List[Union[str, float]] = ListPlus(), - nmr_parameters: Optional[Parameters] = None, - processing_steps: Optional[ProcessingSteps] = None, - peak_identities: List[Identity] = ListPlus(), - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'FIDObject' to attribute fid - - Args: - id (str): Unique identifier of the 'FIDObject' object. Defaults to 'None'. - raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus() - processed_data (): Processed data array.. Defaults to ListPlus() - nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None - processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None - peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() - """ - params = { - "raw_data": raw_data, - "processed_data": processed_data, - "nmr_parameters": nmr_parameters, - "processing_steps": processing_steps, - "peak_identities": peak_identities, - } - if id is not None: - params["id"] = id - self.fid.append(FIDObject(**params)) - return self.fid[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py deleted file mode 100644 index ddb5523..0000000 --- a/nmrpy/datamodel/core/fidarray.py +++ /dev/null @@ -1,27 +0,0 @@ -import sdRDM - -from typing import List, Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class FIDArray(sdRDM.DataModel): - """Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("fidarrayINDEX"), - xml="@id", - ) - - fids: List[str] = Field( - description="List of `FIDObject.id` belonging to this array.", - multiple=True, - default_factory=ListPlus, - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py deleted file mode 100644 index 4e72e76..0000000 --- a/nmrpy/datamodel/core/fidobject.py +++ /dev/null @@ -1,84 +0,0 @@ -import sdRDM - -from typing import Optional, Union, List -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator -from .identity import Identity, AssociatedRanges -from .parameters import Parameters -from .processingsteps import ProcessingSteps - - -@forge_signature -class FIDObject(sdRDM.DataModel): - """Container for a single NMR spectrum.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("fidobjectINDEX"), - xml="@id", - ) - - raw_data: List[str] = Field( - description="Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.", - default_factory=ListPlus, - multiple=True, - ) - - processed_data: List[Union[str, float]] = Field( - description="Processed data array.", - default_factory=ListPlus, - multiple=True, - ) - - nmr_parameters: Optional[Parameters] = Field( - description="Contains commonly-used NMR parameters.", - default_factory=Parameters, - ) - - processing_steps: Optional[ProcessingSteps] = Field( - description="Contains the processing steps performed, as well as the parameters used for them.", - default_factory=ProcessingSteps, - ) - - peak_identities: List[Identity] = Field( - description="Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.", - default_factory=ListPlus, - multiple=True, - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) - - def add_to_peak_identities( - self, - name: Optional[str] = None, - species_id: Optional[str] = None, - associated_peaks: List[float] = ListPlus(), - associated_ranges: List[AssociatedRanges] = ListPlus(), - associated_integrals: List[float] = ListPlus(), - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'Identity' to attribute peak_identities - - Args: - id (str): Unique identifier of the 'Identity' object. Defaults to 'None'. - name (): Descriptive name for the species. Defaults to None - species_id (): ID of an EnzymeML species. Defaults to None - associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus() - associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() - associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() - """ - params = { - "name": name, - "species_id": species_id, - "associated_peaks": associated_peaks, - "associated_ranges": associated_ranges, - "associated_integrals": associated_integrals, - } - if id is not None: - params["id"] = id - self.peak_identities.append(Identity(**params)) - return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/fileformats.py b/nmrpy/datamodel/core/fileformats.py deleted file mode 100644 index bf80f78..0000000 --- a/nmrpy/datamodel/core/fileformats.py +++ /dev/null @@ -1,7 +0,0 @@ -from enum import Enum - - -class FileFormats(Enum): - VARIAN = "varian" - BRUKER = "bruker" - NONE = None diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py deleted file mode 100644 index e646fc6..0000000 --- a/nmrpy/datamodel/core/identity.py +++ /dev/null @@ -1,86 +0,0 @@ -import sdRDM - -from typing import List, Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class AssociatedRanges(sdRDM.DataModel): - """Small type for attribute 'associated_ranges'""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("associatedrangesINDEX"), - xml="@id", - ) - start: Optional[float] = Field() - end: Optional[float] = Field() - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) - - -@forge_signature -class Identity(sdRDM.DataModel): - """Container mapping one or more peaks to the respective species.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("identityINDEX"), - xml="@id", - ) - - name: Optional[str] = Field( - default=None, - description="Descriptive name for the species", - ) - - species_id: Optional[str] = Field( - default=None, - description="ID of an EnzymeML species", - ) - - associated_peaks: List[float] = Field( - description="Peaks belonging to the given species", - default_factory=ListPlus, - multiple=True, - ) - - associated_ranges: List[AssociatedRanges] = Field( - default_factory=ListPlus, - description="Sets of ranges belonging to the given peaks", - multiple=True, - ) - - associated_integrals: List[float] = Field( - description="Integrals resulting from the given peaks and ranges of a species", - default_factory=ListPlus, - multiple=True, - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) - - def add_to_associated_ranges( - self, - start: Optional[float] = None, - end: Optional[float] = None, - id: Optional[str] = None, - ) -> None: - """ - This method adds an object of type 'AssociatedRanges' to attribute associated_ranges - - Args: - id (str): Unique identifier of the 'AssociatedRanges' object. Defaults to 'None'. - start (): . Defaults to None - end (): . Defaults to None - """ - params = {"start": start, "end": end} - if id is not None: - params["id"] = id - self.associated_ranges.append(AssociatedRanges(**params)) - return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py deleted file mode 100644 index 12e9194..0000000 --- a/nmrpy/datamodel/core/nmrpy.py +++ /dev/null @@ -1,37 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.utils import forge_signature, IDGenerator -from datetime import datetime as Datetime -from .experiment import Experiment - - -@forge_signature -class NMRpy(sdRDM.DataModel): - """Root element of the NMRpy data model.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("nmrpyINDEX"), - xml="@id", - ) - - datetime_created: Datetime = Field( - ..., - description="Date and time this dataset has been created.", - ) - - datetime_modified: Optional[Datetime] = Field( - default=None, - description="Date and time this dataset has last been modified.", - ) - - experiment: Optional[Experiment] = Field( - default=None, - description="List of experiments associated with this dataset.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py deleted file mode 100644 index 1ba4443..0000000 --- a/nmrpy/datamodel/core/parameters.py +++ /dev/null @@ -1,73 +0,0 @@ -import sdRDM - -from typing import List, Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class Parameters(sdRDM.DataModel): - """Container for relevant NMR parameters.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("parametersINDEX"), - xml="@id", - ) - - acquisition_time: Optional[float] = Field( - default=None, - description="at", - ) - - relaxation_time: Optional[float] = Field( - default=None, - description="d1", - ) - - repetition_time: Optional[float] = Field( - default=None, - description="rt = at + d1", - ) - - number_of_transients: List[float] = Field( - description="nt", - default_factory=ListPlus, - multiple=True, - ) - - acquisition_times_array: List[float] = Field( - description="acqtime = [nt, 2nt, ..., rt x nt]", - default_factory=ListPlus, - multiple=True, - ) - - spectral_width_ppm: Optional[float] = Field( - default=None, - description="sw", - ) - - spectral_width_hz: Optional[float] = Field( - default=None, - description="sw_hz", - ) - - spectrometer_frequency: Optional[float] = Field( - default=None, - description="sfrq", - ) - - reference_frequency: Optional[float] = Field( - default=None, - description="reffrq", - ) - - spectral_width_left: Optional[float] = Field( - default=None, - description="sw_left", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py deleted file mode 100644 index aa642a0..0000000 --- a/nmrpy/datamodel/core/processingsteps.py +++ /dev/null @@ -1,85 +0,0 @@ -import sdRDM - -from typing import Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class ProcessingSteps(sdRDM.DataModel): - """Container for processing steps performed, as well as parameter for them.""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("processingstepsINDEX"), - xml="@id", - ) - - is_apodised: Optional[bool] = Field( - default=None, - description="Whether or not Apodisation (line-broadening) has been performed.", - ) - - apodisation_frequency: Optional[float] = Field( - default=None, - description="Degree of Apodisation (line-broadening) in Hz.", - ) - - is_zero_filled: Optional[bool] = Field( - default=False, - description="Whether or not Zero-filling has been performed.", - ) - - is_fourier_transformed: Optional[bool] = Field( - default=False, - description="Whether or not Fourier transform has been performed.", - ) - - fourier_transform_type: Optional[str] = Field( - default=None, - description="The type of Fourier transform used.", - ) - - is_phased: Optional[bool] = Field( - default=False, - description="Whether or not Phasing was performed.", - ) - - zero_order_phase: Optional[float] = Field( - default=None, - description="Zero-order phase used for Phasing.", - ) - - first_order_phase: Optional[float] = Field( - default=None, - description="First-order phase used for Phasing.", - ) - - is_only_real: Optional[bool] = Field( - default=False, - description="Whether or not the imaginary part has been discarded.", - ) - - is_normalised: Optional[bool] = Field( - default=False, - description="Whether or not Normalisation was performed.", - ) - - max_value: Optional[float] = Field( - default=None, - description="Maximum value of the dataset used for Normalisation.", - ) - - is_deconvoluted: Optional[bool] = Field( - default=False, - description="Whether or not Deconvolution was performed.", - ) - - is_baseline_corrected: Optional[bool] = Field( - default=False, - description="Whether or not Baseline correction was performed.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py deleted file mode 100644 index 5505ae1..0000000 --- a/nmrpy/datamodel/core/term.py +++ /dev/null @@ -1,40 +0,0 @@ -import sdRDM - -from typing import Any, Optional -from pydantic import Field, PrivateAttr -from sdRDM.base.utils import forge_signature, IDGenerator - - -@forge_signature -class Term(sdRDM.DataModel): - """lorem ipsum {Add reference back to term_cv_reference.}""" - - id: Optional[str] = Field( - description="Unique identifier of the given object.", - default_factory=IDGenerator("termINDEX"), - xml="@id", - ) - - name: str = Field( - ..., - description="The preferred name of the term associated with the given accession number.", - ) - - accession: str = Field( - ..., - description="Accession number of the term in the controlled vocabulary.", - ) - - term_cv_reference: Optional[str] = Field( - default=None, - description="Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset.", - ) - - value: Optional[Any] = Field( - default=None, - description="Value of the term, if applicable.", - ) - __repo__: Optional[str] = PrivateAttr(default="https://github.com/NMRPy/nmrpy") - __commit__: Optional[str] = PrivateAttr( - default="478f8467aed0bc8b72d82a7fb9e649202e3b1026" - ) diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md deleted file mode 100644 index 0cf4664..0000000 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ /dev/null @@ -1,91 +0,0 @@ -```mermaid -classDiagram - NMRpy *-- Experiment - Experiment *-- FIDObject - Experiment *-- FIDArray - FIDObject *-- Parameters - FIDObject *-- ProcessingSteps - FIDObject *-- Identity - - class NMRpy { - +datetime datetime_created* - +datetime datetime_modified - +Experiment experiment - } - - class Experiment { - +string name* - +FIDObject[0..*] fid - +FIDArray fid_array - } - - class FIDObject { - +string[0..*] raw_data - +string, float[0..*] processed_data - +Parameters nmr_parameters - +ProcessingSteps processing_steps - +Identity[0..*] peak_identities - } - - class Parameters { - +float acquisition_time - +float relaxation_time - +float repetition_time - +float[0..*] number_of_transients - +float[0..*] acquisition_times_array - +float spectral_width_ppm - +float spectral_width_hz - +float spectrometer_frequency - +float reference_frequency - +float spectral_width_left - } - - class ProcessingSteps { - +boolean is_apodised - +float apodisation_frequency - +boolean is_zero_filled - +boolean is_fourier_transformed - +string fourier_transform_type - +boolean is_phased - +float zero_order_phase - +float first_order_phase - +boolean is_only_real - +boolean is_normalised - +float max_value - +boolean is_deconvoluted - +boolean is_baseline_corrected - } - - class Identity { - +string name - +string species_id - +float[0..*] associated_peaks - +AssociatedRanges[0..*] associated_ranges - +float[0..*] associated_integrals - } - - class FIDArray { - +string[0..*] fids* - } - - class CV { - +string vocabulary* - +string version* - +URL url* - } - - class Term { - +string name* - +string accession* - +string term_cv_reference - +any value - } - - class FileFormats { - << Enumeration >> - +VARIAN - +BRUKER - +NONE - } - -``` \ No newline at end of file From 56d2cf7cada4f3f4302eab2770dd829b4841fdda Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 24 Sep 2024 01:15:15 +0200 Subject: [PATCH 21/54] Add apply_to_enzymeml() method --- nmrpy/data_objects-copy.py | 2457 ------------------- nmrpy/data_objects.py | 198 +- nmrpy/datamodel/__init__.py | 12 + nmrpy/datamodel/core/__init__.py | 10 + nmrpy/datamodel/core/cv.py | 55 + nmrpy/datamodel/core/experiment.py | 106 + nmrpy/datamodel/core/fidarray.py | 46 + nmrpy/datamodel/core/fidobject.py | 133 + nmrpy/datamodel/core/fileformats.py | 8 + nmrpy/datamodel/core/identity.py | 162 ++ nmrpy/datamodel/core/nmrpy.py | 60 + nmrpy/datamodel/core/parameters.py | 111 + nmrpy/datamodel/core/processingsteps.py | 128 + nmrpy/datamodel/core/term.py | 68 + nmrpy/datamodel/schemes/datamodel_schema.md | 92 + nmrpy/plotting.py | 192 +- nmrpy/utils.py | 104 +- specifications/nmrpy.md | 6 +- 18 files changed, 1287 insertions(+), 2661 deletions(-) delete mode 100644 nmrpy/data_objects-copy.py create mode 100644 nmrpy/datamodel/__init__.py create mode 100644 nmrpy/datamodel/core/__init__.py create mode 100644 nmrpy/datamodel/core/cv.py create mode 100644 nmrpy/datamodel/core/experiment.py create mode 100644 nmrpy/datamodel/core/fidarray.py create mode 100644 nmrpy/datamodel/core/fidobject.py create mode 100644 nmrpy/datamodel/core/fileformats.py create mode 100644 nmrpy/datamodel/core/identity.py create mode 100644 nmrpy/datamodel/core/nmrpy.py create mode 100644 nmrpy/datamodel/core/parameters.py create mode 100644 nmrpy/datamodel/core/processingsteps.py create mode 100644 nmrpy/datamodel/core/term.py create mode 100644 nmrpy/datamodel/schemes/datamodel_schema.md diff --git a/nmrpy/data_objects-copy.py b/nmrpy/data_objects-copy.py deleted file mode 100644 index a0d72aa..0000000 --- a/nmrpy/data_objects-copy.py +++ /dev/null @@ -1,2457 +0,0 @@ -from pathlib import Path -import numpy -import scipy -from matplotlib import pyplot -import lmfit -import nmrglue -import numbers -from scipy.optimize import leastsq -from multiprocessing import Pool, cpu_count -from nmrpy.plotting import * -import os -import pickle -from ipywidgets import SelectMultiple -from sdRDM import DataModel - - -class Base: - _complex_dtypes = [ - numpy.dtype("csingle"), - numpy.dtype("cdouble"), - numpy.dtype("clongdouble"), - ] - - _file_formats = ["varian", "bruker", None] - - def __init__(self, *args, **kwargs): - self.id = kwargs.get("id", None) - self._procpar = kwargs.get("procpar", None) - self._params = None - self.fid_path = kwargs.get("fid_path", ".") - self._file_format = None - # self.parameters_object = self.lib.Parameters() - - @property - def lib(self): - try: - self.__lib - except: - self.__lib = DataModel.from_markdown( - path=Path(__file__).parent.parent / "specifications" - ) - return self.__lib - - # @property - # def parameters_object(self): - # return self.__parameter_object - - # @parameters_object.setter - # def parameters_object(self, parameters_object): - # if isinstance(parameters_object, DataModel): - # self.__parameters_object = parameters_object - - @property - def id(self): - return self.__id - - @id.setter - def id(self, id): - if isinstance(id, str) or id is None: - self.__id = id - else: - raise AttributeError("ID must be a string or None.") - - @property - def fid_path(self): - return self.__fid_path - - @fid_path.setter - def fid_path(self, fid_path): - if isinstance(fid_path, str): - self.__fid_path = fid_path - else: - raise AttributeError("fid_path must be a string.") - - @property - def _file_format(self): - return self.__file_format - - @_file_format.setter - def _file_format(self, file_format): - if file_format in self._file_formats: - self.__file_format = file_format - else: - raise AttributeError( - '_file_format must be "varian", "bruker", or None.' - ) - - @classmethod - def _is_iter(cls, i): - try: - iter(i) - return True - except TypeError: - return False - - @classmethod - def _is_iter_of_iters(cls, i): - if type(i) == list and len(i) == 0: - return False - elif cls._is_iter(i) and all(cls._is_iter(j) for j in i): - return True - return False - - @classmethod - def _is_flat_iter(cls, i): - if type(i) == list and len(i) == 0: - return True - elif cls._is_iter(i) and not any(cls._is_iter(j) for j in i): - return True - return False - - @property - def _procpar(self): - return self.__procpar - - @_procpar.setter - def _procpar(self, procpar): - if procpar is None: - self.__procpar = procpar - elif isinstance(procpar, dict): - self.__procpar = procpar - self._params = self._extract_procpar(procpar) - # self.parameters_object( - # acquisition_time=self._params.get("at"), - # relaxation_time=self._params.get("d1"), - # repetition_time=self._params.get("rt"), - # spectral_width_ppm=self._params.get("sw"), - # spectral_width_hz=self._params.get("sw_hz"), - # spectrometer_frequency=self._params.get("sfrq"), - # reference_frequency=self._params.get("reffrq"), - # spectral_width_left=self._params.get("sw_left"), - # ) - # for _ in self._params.get("nt"): - # if type(_) is not None: - # self.fid_object.parameters.number_of_transients.append(_) - # for _ in self._params.get("acqtime"): - # if type(_) is not None: - # self.fid_object.parameters.acquisition_times_array.append( - # _ - # ) - else: - raise AttributeError("procpar must be a dictionary or None.") - - @property - def _params(self): - return self.__params - - @_params.setter - def _params(self, params): - if isinstance(params, dict) or params is None: - self.__params = params - else: - raise AttributeError("params must be a dictionary or None.") - - # processing - def _extract_procpar(self, procpar): - if self._file_format == "bruker": - return self._extract_procpar_bruker(procpar) - elif self._file_format == "varian": - return self._extract_procpar_varian(procpar) - # else: - # raise AttributeError('Could not parse procpar.') - - @staticmethod - def _extract_procpar_varian(procpar): - """ - Extract some commonely-used NMR parameters (using Varian denotations) - and return a parameter dictionary 'params'. - """ - at = float(procpar["procpar"]["at"]["values"][0]) - d1 = float(procpar["procpar"]["d1"]["values"][0]) - sfrq = float(procpar["procpar"]["sfrq"]["values"][0]) - reffrq = float(procpar["procpar"]["reffrq"]["values"][0]) - rfp = float(procpar["procpar"]["rfp"]["values"][0]) - rfl = float(procpar["procpar"]["rfl"]["values"][0]) - tof = float(procpar["procpar"]["tof"]["values"][0]) - rt = at + d1 - nt = numpy.array( - [procpar["procpar"]["nt"]["values"]], dtype=int - ).flatten() - acqtime = numpy.zeros(nt.shape) - acqtime[0] = rt * nt[0] / 2 - for i in range(1, len(nt)): - acqtime[i] = acqtime[i - 1] + (nt[i - 1] + nt[i]) / 2 * rt - acqtime /= 60.0 # convert to min - sw_hz = float(procpar["procpar"]["sw"]["values"][0]) - sw = round(sw_hz / reffrq, 2) - sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq - params = dict( - at=at, # acquisition time - d1=d1, # relaxation delay - rt=rt, # repetition time (at+d1) - nt=nt, # number of transients - acqtime=acqtime, # acquisition times array (nt, 2nt, .., ntxrt) - sw=sw, # spectral width / ppm - sw_hz=sw_hz, # sw / Hz - sfrq=sfrq, # spectrometer frequency - reffrq=reffrq, # reference frequency - rfp=rfp, # irrelevant - rfl=rfl, # irrelevant - tof=tof, # irrelevant - sw_left=sw_left, # spectral window left - ) - return params - - @staticmethod - def _extract_procpar_bruker(procpar): - """ - Extract some commonly-used NMR parameters (using Bruker denotations) - and return a parameter dictionary 'params'. - """ - d1 = procpar["acqus"]["D"][1] - reffrq = procpar["acqus"]["SFO1"] - nt = procpar["acqus"]["NS"] - sw_hz = procpar["acqus"]["SW_h"] - sw = procpar["acqus"]["SW"] - # lefthand offset of the processed data in ppm - if "procs" in procpar: - sfrq = procpar["procs"]["SF"] - sw_left = procpar["procs"]["OFFSET"] - else: - sfrq = procpar["acqus"]["BF1"] - sw_left = (0.5 + 1e6 * (sfrq - reffrq) / sw_hz) * sw_hz / sfrq - at = procpar["acqus"]["TD"] / (2 * sw_hz) - rt = at + d1 - td = procpar["tdelta"] - cumulative = procpar["tcum"] - single = procpar["tsingle"] - tstart = cumulative - 0.5 * single # tstart for acquisition - al = procpar["arraylength"] - a = procpar["arrayset"] - acqtime = numpy.zeros((al)) - acqtime[0] = tstart[a - 1] - for i in range(1, al): - acqtime[i] = acqtime[i - 1] + td - params = dict( - at=at, - d1=d1, - rt=rt, - nt=nt, - acqtime=acqtime, - sw=sw, - sw_hz=sw_hz, - sfrq=sfrq, - reffrq=reffrq, - sw_left=sw_left, - ) - return params - - -class Fid(Base): - """ - The basic FID (Free Induction Decay) class contains all the data for a single spectrum (:attr:`~nmrpy.data_objects.Fid.data`), and the - necessary methods to process these data. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.fid_object = self.lib.FID() - self.data = kwargs.get("data", []) - self.peaks = None - self.ranges = None - self._deconvoluted_peaks = None - self._flags = { - "ft": False, - } - - def __str__(self): - return "FID: %s (%i data)" % (self.id, len(self.data)) - - @property - def fid_object(self): - return self.__fid_object - - @fid_object.setter - def fid_object(self, fid_object): - if isinstance(fid_object, DataModel): - self.__fid_object = fid_object - - @property - def data(self): - """ - The spectral data. This is the primary object upon which the processing and analysis functions work. - """ - return self.__data - - @data.setter - def data(self, data): - if Fid._is_valid_dataset(data): - self.__data = numpy.array(data) - # for _ in self.__data: - # if type(_) is not None: - # self.fid_object.data.append(float(_)) - - @property - def _ppm(self): - """ - Index of :attr:`~nmrpy.data_objects.Fid.data` in ppm (parts per million). - """ - if self._params is not None and self.data is not None: - return numpy.linspace( - self._params["sw_left"] - self._params["sw"], - self._params["sw_left"], - len(self.data), - )[::-1] - else: - return None - - @property - def peaks(self): - """ - Picked peaks for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. - """ - return self._peaks - - @peaks.setter - def peaks(self, peaks): - if peaks is not None: - if not Fid._is_flat_iter(peaks): - raise AttributeError("peaks must be a flat iterable") - if not all(isinstance(i, numbers.Number) for i in peaks): - raise AttributeError("peaks must be numbers") - self._peaks = numpy.array(peaks) - else: - self._peaks = peaks - - @property - def ranges(self): - """ - Picked ranges for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. - """ - return self._ranges - - @ranges.setter - def ranges(self, ranges): - if ranges is None: - self._ranges = None - return - if not Fid._is_iter_of_iters(ranges) or ranges is None: - raise AttributeError( - "ranges must be an iterable of iterables or None" - ) - ranges = numpy.array(ranges) - if ranges.shape[1] != 2: - raise AttributeError( - "ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]" - ) - for r in ranges: - if not all(isinstance(i, numbers.Number) for i in r): - raise AttributeError("ranges must be numbers") - self._ranges = ranges - - @property - def _bl_ppm(self): - return self.__bl_ppm - - @_bl_ppm.setter - def _bl_ppm(self, bl_ppm): - if bl_ppm is not None: - if not Fid._is_flat_iter(bl_ppm): - raise AttributeError( - "baseline indices must be a flat iterable" - ) - if len(bl_ppm) > 0: - if not all(isinstance(i, numbers.Number) for i in bl_ppm): - raise AttributeError("baseline indices must be numbers") - self.__bl_ppm = numpy.sort(list(set(bl_ppm)))[::-1] - else: - self.__bl_ppm = None - else: - self.__bl_ppm = bl_ppm - - @property - def _bl_indices(self): - if self._bl_ppm is not None: - return self._conv_to_index( - self.data, - self._bl_ppm, - self._params["sw_left"], - self._params["sw"], - ) - else: - return None - - @property - def _bl_poly(self): - return self.__bl_poly - - @_bl_poly.setter - def _bl_poly(self, bl_poly): - if bl_poly is not None: - if not Fid._is_flat_iter(bl_poly): - raise AttributeError( - "baseline polynomial must be a flat iterable" - ) - if not all(isinstance(i, numbers.Number) for i in bl_poly): - raise AttributeError("baseline polynomial must be numbers") - self.__bl_poly = numpy.array(bl_poly) - else: - self.__bl_ppm = bl_poly - - @property - def _index_peaks(self): - """ - :attr:`~nmrpy.data_objects.Fid.peaks` converted to indices rather than ppm - """ - if self.peaks is not None: - return self._conv_to_index( - self.data, - self.peaks, - self._params["sw_left"], - self._params["sw"], - ) - else: - return [] - - @property - def _index_ranges(self): - """ - :attr:`~nmrpy.data_objects.Fid.ranges` converted to indices rather than ppm - """ - if self.ranges is not None: - shp = self.ranges.shape - index_ranges = self._conv_to_index( - self.data, - self.ranges.flatten(), - self._params["sw_left"], - self._params["sw"], - ) - return index_ranges.reshape(shp) - else: - return [] - - @property - def _grouped_peaklist(self): - """ - :attr:`~nmrpy.data_objects.Fid.peaks` grouped according to :attr:`~nmrpy.data_objects.Fid.ranges` - """ - if self.ranges is not None: - return numpy.array( - [ - [ - peak - for peak in self.peaks - if peak > min(peak_range) and peak < max(peak_range) - ] - for peak_range in self.ranges - ], - dtype=object, - ) - else: - return [] - - @property - def _grouped_index_peaklist(self): - """ - :attr:`~nmrpy.data_objects.Fid._index_peaks` grouped according to :attr:`~nmrpy.data_objects.Fid._index_ranges` - """ - if self._index_ranges is not None: - return numpy.array( - [ - [ - peak - for peak in self._index_peaks - if peak > min(peak_range) and peak < max(peak_range) - ] - for peak_range in self._index_ranges - ], - dtype=object, - ) - else: - return [] - - @property - def _deconvoluted_peaks(self): - return self.__deconvoluted_peaks - - @_deconvoluted_peaks.setter - def _deconvoluted_peaks(self, deconvoluted_peaks): - """This is a list of lists of peak parameters with the order [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss]: - - offset: spectral offset - - gauss_sigma: Gaussian sigma - - lorentz_hwhm: Lorentzian half-width-at-half-maximum - - amplitude: height of peak - - frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) - """ - self.__deconvoluted_peaks = deconvoluted_peaks - - @property - def deconvoluted_integrals(self): - """ - An array of integrals for each deconvoluted peak. - """ - if self._deconvoluted_peaks is not None: - integrals = [] - for peak in self._deconvoluted_peaks: - int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) - int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int( - peak[3], peak[2] - ) - integrals.append(int_gauss + int_lorentz) - return integrals - - def _get_plots(self): - """ - Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. - """ - plots = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Plot) - ] - return plots - - def _del_plots(self): - """ - Deletes all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. - """ - plots = self._get_plots() - for plot in plots: - delattr(self, plot.id) - - def _get_widgets(self): - """ - Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. - """ - widgets = [ - id - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Phaser) - or isinstance(self.__dict__[id], Calibrator) - or isinstance(self.__dict__[id], DataPeakSelector) - or isinstance(self.__dict__[id], FidRangeSelector) - ] - return widgets - - def _del_widgets(self): - """ - Deletes all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. - """ - widgets = self._get_widgets() - for w in widgets: - delattr(self, w) - - @classmethod - def _is_valid_dataset(cls, data): - if isinstance(data, str): - raise TypeError("Data must be iterable not a string.") - if not cls._is_iter(data): - raise TypeError("Data must be an iterable.") - if not cls._is_flat_iter(data): - raise TypeError("Data must not be nested.") - if not all(isinstance(i, numbers.Number) for i in data): - raise TypeError("Data must consist of numbers only.") - return True - - @classmethod - def from_data(cls, data): - """ - - Instantiate a new :class:`~nmrpy.data_objects.Fid` object by providing a - spectral data object as argument. Eg. :: - - fid = Fid.from_data(data) - """ - new_instance = cls() - new_instance.data = data - return new_instance - - def zf(self): - """ - - Apply a single degree of zero-filling to data array - :attr:`~nmrpy.data_objects.Fid.data`. - - Note: extends data to double length by appending zeroes. This results - in an artificially increased resolution once Fourier-transformed. - - """ - self.data = numpy.append(self.data, 0 * self.data) - for _ in self.data: - self.fid_object.data.append(float(_)) - - def emhz(self, lb=5.0): - """ - - Apply exponential line-broadening to data array - :attr:`~nmrpy.data_objects.Fid.data`. - - :keyword lb: degree of line-broadening in Hz. - - """ - self.data = ( - numpy.exp( - -numpy.pi - * numpy.arange(len(self.data)) - * (lb / self._params["sw_hz"]) - ) - * self.data - ) - for _ in self.data: - self.fid_object.data.append(float(_)) - - def real(self): - """ - Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. - """ - self.data = numpy.real(self.data) - for _ in self.data: - self.fid_object.data.append(float(_)) - - # GENERAL FUNCTIONS - def ft(self): - """ - Fourier Transform the data array :attr:`~nmrpy.data_objects.Fid.data`. - - Calculates the Discrete Fourier Transform using the Fast Fourier - Transform algorithm as implemented in NumPy (*Cooley, James W., and John W. - Tukey, 1965, 'An algorithm for the machine calculation of complex Fourier - series,' Math. Comput. 19: 297-301.*) - - """ - if self._flags["ft"]: - raise ValueError("Data have already been Fourier Transformed.") - if Fid._is_valid_dataset(self.data): - list_params = (self.data, self._file_format) - self.data = Fid._ft(list_params) - for _ in self.data: - self.fid_object.data.append(float(_)) - self._flags["ft"] = True - - @classmethod - def _ft(cls, list_params): - """ - Class method for Fourier-transforming data using multiprocessing. - list_params is a tuple of (, ). - """ - if len(list_params) != 2: - raise ValueError( - "Wrong number of parameters. list_params must contain [, ]" - ) - data, file_format = list_params - if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: - data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) - s = len(data) - if file_format == "varian" or file_format == None: - ft_data = numpy.append( - data[int(s / 2.0) :], data[: int(s / 2.0)] - ) - if file_format == "bruker": - ft_data = numpy.append( - data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] - ) - return ft_data - - @staticmethod - def _conv_to_ppm(data, index, sw_left, sw): - """ - Convert index array to ppm. - """ - if isinstance(index, list): - index = numpy.array(index) - frc_sw = index / float(len(data)) - ppm = sw_left - sw * frc_sw - if Fid._is_iter(ppm): - return numpy.array([round(i, 2) for i in ppm]) - else: - return round(ppm, 2) - - @staticmethod - def _conv_to_index(data, ppm, sw_left, sw): - """ - Convert ppm array to index. - """ - conv_to_int = False - if not Fid._is_iter(ppm): - ppm = [ppm] - conv_to_int = True - if isinstance(ppm, list): - ppm = numpy.array(ppm) - if any(ppm > sw_left) or any(ppm < sw_left - sw): - raise ValueError("ppm must be within spectral width.") - indices = len(data) * (sw_left - ppm) / sw - if conv_to_int: - return int(numpy.ceil(indices)) - return numpy.array(numpy.ceil(indices), dtype=int) - - def phase_correct(self, method="leastsq"): - """ - - Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising - total absolute area. - - :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - - Nelder-Mead (nelder) - - L-BFGS-B (l-bfgs-b) - - Conjugate Gradient (cg) - - Powell (powell) - - Newton-CG (newton) - """ - if self.data.dtype not in self._complex_dtypes: - raise TypeError("Only complex data can be phase-corrected.") - if not self._flags["ft"]: - raise ValueError( - "Only Fourier-transformed data can be phase-corrected." - ) - print("phasing: %s" % self.id) - self.data = Fid._phase_correct((self.data, method)) - for _ in self.data: - self.fid_object.data.append(float(_)) - - @classmethod - def _phase_correct(cls, list_params): - """ - Class method for phase-correction using multiprocessing. - list_params is a tuple of (, ). - """ - data, method = list_params - p = lmfit.Parameters() - p.add_many( - ("p0", 1.0, True), - ("p1", 0.0, True), - ) - mz = lmfit.minimize( - Fid._phased_data_sum, p, args=([data]), method=method - ) - phased_data = Fid._ps( - data, p0=mz.params["p0"].value, p1=mz.params["p1"].value - ) - # data model - if abs(phased_data.min()) > abs(phased_data.max()): - phased_data *= -1 - if sum(phased_data) < 0.0: - phased_data *= -1 - print("%d\t%d" % (mz.params["p0"].value, mz.params["p1"].value)) - return phased_data - - @classmethod - def _phased_data_sum(cls, pars, data): - err = Fid._ps(data, p0=pars["p0"].value, p1=pars["p1"].value).real - return numpy.array([abs(err).sum()] * 2) - - @classmethod - def _ps(cls, data, p0=0.0, p1=0.0): - """ - Linear phase correction - - :keyword p0: Zero order phase in degrees. - - :keyword p1: First order phase in degrees. - - """ - if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError("p0 and p1 must be floats or ints.") - if not data.dtype in Fid._complex_dtypes: - raise TypeError("data must be complex.") - # convert to radians - p0 = p0 * numpy.pi / 180.0 - p1 = p1 * numpy.pi / 180.0 - size = len(data) - ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) - return ph * data - - def ps(self, p0=0.0, p1=0.0): - """ - Linear phase correction of :attr:`~nmrpy.data_objects.Fid.data` - - :keyword p0: Zero order phase in degrees - - :keyword p1: First order phase in degrees - - """ - if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError("p0 and p1 must be floats or ints.") - if not self.data.dtype in self._complex_dtypes: - raise TypeError("data must be complex.") - # convert to radians - p0 = p0 * numpy.pi / 180.0 - p1 = p1 * numpy.pi / 180.0 - size = len(self.data) - ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) - self.data = ph * self.data - for _ in self.data: - self.fid_object.data.append(float(_)) - - def phaser(self): - """ - Instantiate a phase-correction GUI widget which applies to :attr:`~nmrpy.data_objects.Fid.data`. - """ - if not len(self.data): - raise AttributeError("data does not exist.") - if self.data.dtype not in self._complex_dtypes: - raise TypeError("data must be complex.") - if not Fid._is_flat_iter(self.data): - raise AttributeError("data must be 1 dimensional.") - global _phaser_widget - self._phaser_widget = Phaser(self) - - def calibrate(self): - """ - Instantiate a GUI widget to select a peak and calibrate spectrum. - Left-clicking selects a peak. The user is then prompted to enter - the PPM value of that peak for calibration. - """ - plot_label = """ -Left - select peak -""" - plot_title = "Calibration {}".format(self.id) - self._calibrate_widget = Calibrator( - self, - title=plot_title, - label=plot_label, - ) - - def baseline_correct(self, deg=2): - """ - - Perform baseline correction by fitting specified baseline points - (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) with polynomial of specified - degree (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) and subtract this - polynomial from :attr:`~nmrpy.data_objects.Fid.data`. - - - :keyword deg: degree of fitted polynomial - """ - - if self._bl_indices is None: - raise AttributeError( - "No points selected for baseline correction. Run fid.baseliner()" - ) - if not len(self.data): - raise AttributeError("data does not exist.") - if self.data.dtype in self._complex_dtypes: - raise TypeError("data must not be complex.") - if not Fid._is_flat_iter(self.data): - raise AttributeError("data must be 1 dimensional.") - - data = self.data - x = numpy.arange(len(data)) - m = numpy.ones_like(x) - m[self._bl_indices] = 0 - self._bl_poly = [] - ym = numpy.ma.masked_array(data, m) - xm = numpy.ma.masked_array(x, m) - p = numpy.ma.polyfit(xm, ym, deg) - yp = scipy.polyval(p, x) - self._bl_poly = yp - data_bl = data - yp - self.data = numpy.array(data_bl) - for _ in self.data: - self.fid_object.data.append(float(_)) - - def peakpick(self, thresh=0.1): - """ - - Attempt to automatically identify peaks. Picked peaks are assigned to - :attr:`~nmrpy.data_objects.Fid.peaks`. - - :keyword thresh: fractional threshold for peak-picking - """ - peaks_ind = nmrglue.peakpick.pick(self.data, thresh * self.data.max()) - peaks_ind = [i[0] for i in peaks_ind] - peaks_ppm = Fid._conv_to_ppm( - self.data, peaks_ind, self._params["sw_left"], self._params["sw"] - ) - self.peaks = peaks_ppm - print(self.peaks) - - def peakpicker(self): - """ - Instantiate a peak-picking GUI widget. Left-clicking selects a peak. - Right-click-dragging defines a range. Ctrl-left click deletes nearest peak; - ctrl-right click deletes range. Peaks are stored in - :attr:`~nmrpy.data_objects.Fid.peaks`; ranges are stored in - :attr:`~nmrpy.data_objects.Fid.ranges`: both are used for deconvolution (see - :meth:`~nmrpy.data_objects.Fid.deconv`). - - """ - plot_label = """ -Left - select peak -Ctrl+Left - delete nearest peak -Drag Right - select range -Ctrl+Right - delete range -Ctrl+Alt+Right - assign -""" - plot_title = "Peak-picking {}".format(self.id) - self._peakpicker_widget = DataPeakSelector( - self, - title=plot_title, - label=plot_label, - ) - - def clear_peaks(self): - """ - Clear peaks stored in :attr:`~nmrpy.data_objects.Fid.peaks`. - """ - self.peaks = None - - def clear_ranges(self): - """ - Clear ranges stored in :attr:`~nmrpy.data_objects.Fid.ranges`. - """ - self.ranges = None - - def baseliner(self): - """ - Instantiate a baseline-correction GUI widget. Right-click-dragging - defines a range. Ctrl-Right click deletes previously selected range. Indices - selected are stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`, which is used - for baseline-correction (see - :meth:`~nmrpy.data_objects.Fid.baseline_correction`). - - """ - plot_label = """ -Drag Right - select range -Ctrl+Right - delete range -Ctrl+Alt+Right - assign -""" - plot_title = "Baseline correction {}".format(self.id) - self._baseliner_widget = FidRangeSelector( - self, - title=plot_title, - label=plot_label, - ) - - @classmethod - def _f_gauss(cls, offset, amplitude, gauss_sigma, x): - return amplitude * numpy.exp( - -((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0) - ) - - @classmethod - def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): - # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) - return ( - amplitude - * lorentz_hwhm**2.0 - / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) - ) - - @classmethod - def _f_gauss_int(cls, amplitude, gauss_sigma): - return amplitude * numpy.sqrt(2.0 * numpy.pi * gauss_sigma**2.0) - - @classmethod - def _f_lorentz_int(cls, amplitude, lorentz_hwhm): - # empirical integral commented out - # x = numpy.arange(1000*lorentz_hwhm) - # return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) - # this integral forumula from http://magicplot.com/wiki/fit_equations - return amplitude * lorentz_hwhm * numpy.pi - - @classmethod - def _f_pk( - cls, - x, - offset=0.0, - gauss_sigma=1.0, - lorentz_hwhm=1.0, - amplitude=1.0, - frac_gauss=0.0, - ): - """ - - Return the a combined Gaussian/Lorentzian peakshape for deconvolution - of :attr:`~nmrpy.data_objects.Fid.data`. - - :arg x: array of equal length to :attr:`~nmrpy.data_objects.Fid.data` - - - :keyword offset: spectral offset in x - - :keyword gauss_sigma: 2*sigma**2 specifying the width of the Gaussian peakshape - - :keyword lorentz_hwhm: Lorentzian half width at half maximum height - - :keyword amplitude: amplitude of peak - - :keyword frac_gauss: fraction of function to be Gaussian (0 -> 1). Note: - specifying a Gaussian fraction of 0 will produce a pure Lorentzian and vice - versa.""" - - # validation - parameters = [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss] - if not all(isinstance(i, numbers.Number) for i in parameters): - raise TypeError("Keyword parameters must be numbers.") - if not cls._is_iter(x): - raise TypeError("x must be an iterable") - if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - if frac_gauss > 1.0: - frac_gauss = 1.0 - if frac_gauss < 0.0: - frac_gauss = 0.0 - - gauss_peak = cls._f_gauss(offset, amplitude, gauss_sigma, x) - lorentz_peak = cls._f_lorentz(offset, amplitude, lorentz_hwhm, x) - peak = frac_gauss * gauss_peak + (1 - frac_gauss) * lorentz_peak - - return peak - - @classmethod - def _f_makep(cls, data, peaks, frac_gauss=None): - """ - Make a set of initial peak parameters for deconvolution. - - - :arg data: data to be fitted - - :arg peaks: selected peak positions (see peakpicker()) - - :returns: an array of peaks, each consisting of the following parameters: - - spectral offset (x) - - gauss: 2*sigma**2 - - lorentz: scale (HWHM) - - amplitude: amplitude of peak - - frac_gauss: fraction of function to be Gaussian (0 -> 1) - """ - if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") - if not cls._is_flat_iter(peaks): - raise TypeError("peaks must be a flat iterable") - if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - - p = [] - for i in peaks: - pamp = 0.9 * abs(data[int(i)]) - single_peak = [i, 10, 0.1, pamp, frac_gauss] - p.append(single_peak) - return numpy.array(p) - - @classmethod - def _f_conv(cls, parameterset_list, data): - """ - Returns the maximum of a convolution of an initial set of lineshapes and the data to be fitted. - - parameterset_list -- a list of parameter lists: n*[[spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, - frac_gauss: fraction of function to be Gaussian (0 -> 1)]] - where n is the number of peaks - data -- 1D spectral array - - """ - - if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") - if not cls._is_iter(parameterset_list): - raise TypeError("parameterset_list must be an iterable") - if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - - data[data == 0.0] = 1e-6 - x = numpy.arange(len(data), dtype="f8") - peaks_init = cls._f_pks(parameterset_list, x) - data_convolution = numpy.convolve(data, peaks_init[::-1]) - auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) - max_data_convolution = numpy.where( - data_convolution == data_convolution.max() - )[0][0] - max_auto_convolution = numpy.where( - auto_convolution == auto_convolution.max() - )[0][0] - return max_data_convolution - max_auto_convolution - - @classmethod - def _f_pks_list(cls, parameterset_list, x): - """ - Return a list of peak evaluations for deconvolution. See _f_pk(). - - Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, - frac_gauss: fraction of function to be Gaussian (0 -> 1)] - x -- array of equal length to FID - """ - if not cls._is_iter_of_iters(parameterset_list): - raise TypeError("Parameter set must be an iterable of iterables") - for p in parameterset_list: - if not cls._is_iter(p): - raise TypeError("Parameter set must be an iterable") - if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError("Keyword parameters must be numbers.") - if not cls._is_iter(x): - raise TypeError("x must be an iterable") - if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - return numpy.array([Fid._f_pk(x, *peak) for peak in parameterset_list]) - - @classmethod - def _f_pks(cls, parameterset_list, x): - """ - Return the sum of a series of peak evaluations for deconvolution. See _f_pk(). - - Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, - frac_gauss: fraction of function to be Gaussian (0 -> 1)] - x -- array of equal length to FID - """ - - if not cls._is_iter_of_iters(parameterset_list): - raise TypeError("Parameter set must be an iterable of iterables") - for p in parameterset_list: - if not cls._is_iter(p): - raise TypeError("Parameter set must be an iterable") - if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError("Keyword parameters must be numbers.") - if not cls._is_iter(x): - raise TypeError("x must be an iterable") - if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - - peaks = x * 0.0 - for p in parameterset_list: - peak = cls._f_pk( - x, - offset=p[0], - gauss_sigma=p[1], - lorentz_hwhm=p[2], - amplitude=p[3], - frac_gauss=p[4], - ) - peaks += peak - return peaks - - @classmethod - def _f_res(cls, p, data): - """ - Objective function for deconvolution. Returns residuals of the devonvolution fit. - - x -- array of equal length to FID - - Keyword arguments: - p -- lmfit parameters object: - offset_n -- spectral offset in x - sigma_n -- gaussian 2*sigma**2 - hwhm_n -- lorentzian half width at half maximum height - amplitude_n -- amplitude of peak - frac_gauss_n -- fraction of function to be Gaussian (0 -> 1) - where n is the peak number (zero-indexed) - data -- spectrum array - - """ - if not isinstance(p, lmfit.parameter.Parameters): - raise TypeError( - "Parameters must be of type lmfit.parameter.Parameters." - ) - if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable.") - if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - - params = Fid._parameters_to_list(p) - x = numpy.arange(len(data), dtype="f8") - res = data - cls._f_pks(params, x) - return res - - @classmethod - def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): - """Fit a section of spectral data with a combination of Gaussian/Lorentzian peaks for deconvolution. - - Keyword arguments: - peaks -- selected peak positions (see peakpicker()) - frac_gauss -- fraction of fitted function to be Gaussian (1 - Guassian, 0 - Lorentzian) - - returns: - fits -- list of fitted peak parameter sets - - Note: peaks are fitted by default using the Levenberg-Marquardt algorithm[1]. Other fitting algorithms are available (http://cars9.uchicago.edu/software/python/lmfit/fitting.html#choosing-different-fitting-methods). - - [1] Marquardt, Donald W. 'An algorithm for least-squares estimation of nonlinear parameters.' Journal of the Society for Industrial & Applied Mathematics 11.2 (1963): 431-441. - """ - data = numpy.real(data) - if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") - if not cls._is_flat_iter(peaks): - raise TypeError("peaks must be a flat iterable") - if any(peak > (len(data) - 1) for peak in peaks): - raise ValueError("peaks must be within the length of data.") - if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - p = cls._f_makep(data, peaks, frac_gauss=0.5) - init_ref = cls._f_conv(p, data) - if any(peaks + init_ref < 0) or any(peaks + init_ref > len(data) - 1): - init_ref = 0 - if frac_gauss == None: - p = cls._f_makep(data, peaks + init_ref, frac_gauss=0.5) - else: - p = cls._f_makep(data, peaks + init_ref, frac_gauss=frac_gauss) - - params = lmfit.Parameters() - for parset in range(len(p)): - current_parset = dict( - zip( - ["offset", "sigma", "hwhm", "amplitude", "frac_gauss"], - p[parset], - ) - ) - for k, v in current_parset.items(): - par_name = "%s_%i" % (k, parset) - params.add(name=par_name, value=v, vary=True, min=0.0) - if "offset" in par_name: - params[par_name].max = len(data) - 1 - if "frac_gauss" in par_name: - params[par_name].max = 1.0 - if frac_gauss is not None: - params[par_name].vary = False - # if 'sigma' in par_name or 'hwhm' in par_name: - # params[par_name].max = 0.01*current_parset['amplitude'] - if "amplitude" in par_name: - params[par_name].max = 2.0 * data.max() - - try: - mz = lmfit.minimize( - cls._f_res, params, args=([data]), method=method - ) - fits = Fid._parameters_to_list(mz.params) - except: - fits = None - return fits - - @classmethod - def _parameters_to_list(cls, p): - n_pks = int(len(p) / 5) - params = [] - for i in range(n_pks): - current_params = [ - p["%s_%s" % (par, i)].value - for par in [ - "offset", - "sigma", - "hwhm", - "amplitude", - "frac_gauss", - ] - ] - params.append(current_params) - return params - - @classmethod - def _deconv_datum(cls, list_parameters): - if len(list_parameters) != 5: - raise ValueError("list_parameters must consist of five objects.") - if ( - type(list_parameters[1]) == list and len(list_parameters[1]) == 0 - ) or ( - type(list_parameters[2]) == list and len(list_parameters[2]) == 0 - ): - return [] - - datum, peaks, ranges, frac_gauss, method = list_parameters - - if not cls._is_iter_of_iters(ranges): - raise TypeError("ranges must be an iterable of iterables") - if not all(len(rng) == 2 for rng in ranges): - raise ValueError("ranges must contain two values.") - if not all(rng[0] != rng[1] for rng in ranges): - raise ValueError("data_index must contain different values.") - if not isinstance(datum, numpy.ndarray): - datum = numpy.array(datum) - if datum.dtype in cls._complex_dtypes: - raise TypeError("data must be not be complex.") - - fit = [] - for j in zip(peaks, ranges): - d_slice = datum[j[1][0] : j[1][1]] - p_slice = j[0] - j[1][0] - f = cls._f_fitp( - d_slice, p_slice, frac_gauss=frac_gauss, method=method - ) - f = numpy.array(f).transpose() - f[0] += j[1][0] - f = f.transpose() - fit.append(f) - return fit - - def deconv(self, method="leastsq", frac_gauss=0.0): - """ - - Deconvolute :attr:`~nmrpy.data_obects.Fid.data` object by fitting a - series of peaks to the spectrum. These peaks are generated using the parameters - in :attr:`~nmrpy.data_objects.Fid.peaks`. :attr:`~nmrpy.data_objects.Fid.ranges` - splits :attr:`~nmrpy.data_objects.Fid.data` up into smaller portions. This - significantly speeds up deconvolution time. - - :keyword frac_gauss: (0-1) determines the Gaussian fraction of the peaks. Setting this argument to None will fit this parameter as well. - - :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - - Nelder-Mead (nelder) - - L-BFGS-B (l-bfgs-b) - - Conjugate Gradient (cg) - - Powell (powell) - - Newton-CG (newton) - - """ - - if not len(self.data): - raise AttributeError("data does not exist.") - if self.data.dtype in self._complex_dtypes: - raise TypeError("data must be not be complex.") - if self.peaks is None: - raise AttributeError("peaks must be picked.") - if self.ranges is None: - raise AttributeError("ranges must be specified.") - print("deconvoluting {}".format(self.id)) - list_parameters = [ - self.data, - self._grouped_index_peaklist, - self._index_ranges, - frac_gauss, - method, - ] - self._deconvoluted_peaks = numpy.array( - [j for i in Fid._deconv_datum(list_parameters) for j in i] - ) - print("deconvolution completed") - - def plot_ppm(self, **kwargs): - """ - Plot :attr:`~nmrpy.data_objects.Fid.data`. - - :keyword upper_ppm: upper spectral bound in ppm - - :keyword lower_ppm: lower spectral bound in ppm - - :keyword lw: linewidth of plot - - :keyword colour: colour of the plot - """ - plt = Plot() - plt._plot_ppm(self, **kwargs) - setattr(self, plt.id, plt) - pyplot.show() - - def plot_deconv(self, **kwargs): - """ - Plot :attr:`~nmrpy.data_objects.Fid.data` with deconvoluted peaks overlaid. - - :keyword upper_ppm: upper spectral bound in ppm - - :keyword lower_ppm: lower spectral bound in ppm - - :keyword lw: linewidth of plot - - :keyword colour: colour of the plot - - :keyword peak_colour: colour of the deconvoluted peaks - - :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks - """ - if not len(self._deconvoluted_peaks): - raise AttributeError("deconvolution not yet performed") - plt = Plot() - plt._plot_deconv(self, **kwargs) - setattr(self, plt.id, plt) - pyplot.show() - - -class FidArray(Base): - """ - - This object collects several :class:`~nmrpy.data_objects.Fid` objects into - an array, and it contains all the processing methods necessary for bulk - processing of these FIDs. It should be considered the parent object for any - project. The class methods :meth:`~nmrpy.data_objects.FidArray.from_path` and - :meth:`~nmrpy.data_objects.FidArray.from_data` will instantiate a new - :class:`~nmrpy.data_objects.FidArray` object from a Varian/Bruker .fid path or - an iterable of data respectively. Each :class:`~nmrpy.data_objects.Fid` object - in the array will appear as an attribute of - :class:`~nmrpy.data_objects.FidArray` with a unique ID of the form 'fidXX', - where 'XX' is an increasing integer . - - """ - - def __init__(self): - _now = str(datetime.now()) - self.data_model = self.lib.NMRpy( - datetime_created=_now, - datetime_modified=_now, - ) - del _now - self._force_pyenzyme = False - - @property - def force_pyenzyme(self): - return self._force_pyenzyme - - @force_pyenzyme.setter - def force_pyenzyme(self): - raise PermissionError("Forbidden!") - - @force_pyenzyme.deleter - def force_pyenzyme(self): - raise PermissionError("Forbidden!") - - @property - def data_model(self): - return self.__data_model - - @data_model.setter - def data_model(self, data_model: DataModel): - if not isinstance(data_model, DataModel): - raise AttributeError( - f"Parameter `data_model` has to be of type `sdrdm.DataModel`, got {type(data_model)} instead." - ) - self.__data_model = data_model - self.__data_model.datetime_modified = str(datetime.now()) - - @data_model.deleter - def data_model(self): - del self.__data_model - print("The current data model has been deleted.") - - def __str__(self): - return "FidArray of {} FID(s)".format(len(self.data)) - - def get_fid(self, id): - """ - Return an :class:`~nmrpy.data_objects.Fid` object owned by this object, identified by unique ID. Eg.:: - - fid12 = fid_array.get_fid('fid12') - - :arg id: a string id for an :class:`~nmrpy.data_objects.Fid` - """ - try: - return getattr(self, id) - except AttributeError: - print("{} does not exist.".format(id)) - - def get_fids(self): - """ - Return a list of all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. - """ - fids = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Fid) - ] - return fids - - def _get_plots(self): - """ - Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. - """ - plots = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Plot) - ] - return plots - - def _del_plots(self): - """ - Deletes all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. - """ - plots = self._get_plots() - for plot in plots: - delattr(self, plot.id) - - def _get_widgets(self): - """ - Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.FidArray`. - """ - widgets = [ - id - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Phaser) - or isinstance(self.__dict__[id], RangeCalibrator) - or isinstance(self.__dict__[id], DataPeakRangeSelector) - or isinstance(self.__dict__[id], FidArrayRangeSelector) - or isinstance(self.__dict__[id], DataTraceRangeSelector) - or isinstance(self.__dict__[id], DataTraceSelector) - ] - return widgets - - def _del_widgets(self): - """ - Deletes all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. - """ - widgets = self._get_widgets() - for w in widgets: - delattr(self, w) - - @property - def data(self): - """ - An array of all :attr:`~nmrpy.data_objects.Fid.data` objects belonging to the :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. - """ - data = numpy.array([fid.data for fid in self.get_fids()]) - return data - - @property - def t(self): - """ - An array of the acquisition time for each FID. - """ - nfids = len(self.get_fids()) - t = None - if nfids > 0: - try: - t = self._params["acqtime"] - except: - t = numpy.arange(len(self.get_fids())) - return t - - @property - def deconvoluted_integrals(self): - """ - Collected :class:`~nmrpy.data_objects.Fid.deconvoluted_integrals` - """ - deconvoluted_integrals = [] - for fid in self.get_fids(): - deconvoluted_integrals.append(fid.deconvoluted_integrals) - return numpy.array(deconvoluted_integrals) - - @property - def _deconvoluted_peaks(self): - """ - Collected :class:`~nmrpy.data_objects.Fid._deconvoluted_peaks` - """ - deconvoluted_peaks = [] - for fid in self.get_fids(): - try: - deconvoluted_peaks.append(fid._deconvoluted_peaks) - except: - deconvoluted_peaks.append([]) - return numpy.array(deconvoluted_peaks) - - def add_fid(self, fid): - """ - Add an :class:`~nmrpy.data_objects.Fid` object to this :class:`~nmrpy.data_objects.FidArray`, using a unique id. - - :arg fid: an :class:`~nmrpy.data_objects.Fid` instance - """ - if isinstance(fid, Fid): - setattr(self, fid.id, fid) - else: - raise AttributeError("FidArray requires Fid object.") - - def del_fid(self, fid_id): - """ - Delete an :class:`~nmrpy.data_objects.Fid` object belonging to this :class:`~nmrpy.data_objects.FidArray`, using a unique id. - - :arg fid_id: a string id for an :class:`~nmrpy.data_objects.Fid` - """ - if hasattr(self, fid_id): - if isinstance(getattr(self, fid_id), Fid): - fids = [f.id for f in self.get_fids()] - idx = fids.index(fid_id) - delattr(self, fid_id) - if hasattr(self, "_params") and self._params is not None: - at = list(self._params["acqtime"]) - at.pop(idx) - self._params["acqtime"] = at - else: - raise AttributeError("{} is not an FID object.".format(fid_id)) - else: - raise AttributeError("FID {} does not exist.".format(fid_id)) - - def add_fids(self, fids): - """ - Add a list of :class:`~nmrpy.data_objects.Fid` objects to this :class:`~nmrpy.data_objects.FidArray`. - - :arg fids: a list of :class:`~nmrpy.data_objects.Fid` instances - """ - if FidArray._is_iter(fids): - num_fids = len(fids) - zero_fill = str(len(str(num_fids))) - for fid_index in range(num_fids): - try: - fid = fids[fid_index] - id_str = "fid{0:0" + zero_fill + "d}" - fid.id = id_str.format(fid_index) - self.add_fid(fid) - except AttributeError as e: - print(e) - - @classmethod - def from_data(cls, data): - """ - Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a 2D data set of spectral arrays. - - :arg data: a 2D data array - """ - if not cls._is_iter_of_iters(data): - raise TypeError("data must be an iterable of iterables.") - fid_array = cls() - fids = [] - for fid_index, datum in zip(range(len(data)), data): - fid_id = "fid%i" % fid_index - fid = Fid(id=fid_id, data=datum) - fids.append(fid) - fid_array.add_fids(fids) - return fid_array - - @classmethod - def from_path(cls, fid_path=".", file_format=None, arrayset=None): - """ - Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a .fid directory. - - :keyword fid_path: filepath to .fid directory - - :keyword file_format: 'varian' or 'bruker', usually unnecessary - - :keyword arrayset: (int) array set for interleaved spectra, - user is prompted if not specified - """ - if not file_format: - try: - with open(fid_path, "rb") as f: - return pickle.load(f) - except: - print("Not NMRPy data file.") - importer = Importer(fid_path=fid_path) - importer.import_fid(arrayset=arrayset) - elif file_format == "varian": - importer = VarianImporter(fid_path=fid_path) - importer.import_fid() - elif file_format == "bruker": - importer = BrukerImporter(fid_path=fid_path) - importer.import_fid(arrayset=arrayset) - elif file_format == "nmrpy": - with open(fid_path, "rb") as f: - return pickle.load(f) - - if cls._is_iter(importer.data): - fid_array = cls.from_data(importer.data) - fid_array._file_format = importer._file_format - fid_array.fid_path = fid_path - fid_array._procpar = importer._procpar - for fid in fid_array.get_fids(): - fid._file_format = fid_array._file_format - fid.fid_path = fid_array.fid_path - fid._procpar = fid_array._procpar - return fid_array - else: - raise IOError("Data could not be imported.") - - def zf_fids(self): - """ - Zero-fill all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` - """ - for fid in self.get_fids(): - fid.zf() - - def emhz_fids(self, lb=5.0): - """ - Apply line-broadening (apodisation) to all :class:`nmrpy.~data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` - - :keyword lb: degree of line-broadening in Hz. - """ - for fid in self.get_fids(): - fid.emhz(lb=lb) - - def ft_fids(self, mp=True, cpus=None): - """ - Fourier-transform all FIDs. - - :keyword mp: parallelise over multiple processors, significantly reducing computation time - - :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True - """ - if mp: - fids = self.get_fids() - list_params = [[fid.data, fid._file_format] for fid in fids] - ft_data = self._generic_mp(Fid._ft, list_params, cpus) - for fid, datum in zip(fids, ft_data): - fid.data = datum - fid._flags["ft"] = True - else: - for fid in self.get_fids(): - fid.ft() - print("Fourier-transformation completed") - - def real_fids(self): - """ - Discard imaginary component of FID data sets. - - """ - for fid in self.get_fids(): - fid.real() - - def norm_fids(self): - """ - Normalise FIDs by maximum data value in :attr:`~nmrpy.data_objects.FidArray.data`. - - """ - dmax = self.data.max() - for fid in self.get_fids(): - fid.data = fid.data / dmax - - def phase_correct_fids(self, method="leastsq", mp=True, cpus=None): - """ - Apply automatic phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` - - :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` - - :keyword mp: parallelise the phasing process over multiple processors, significantly reducing computation time - - :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True - """ - if mp: - fids = self.get_fids() - if not all(fid.data.dtype in self._complex_dtypes for fid in fids): - raise TypeError("Only complex data can be phase-corrected.") - if not all(fid._flags["ft"] for fid in fids): - raise ValueError( - "Only Fourier-transformed data can be phase-corrected." - ) - list_params = [[fid.data, method] for fid in fids] - phased_data = self._generic_mp( - Fid._phase_correct, list_params, cpus - ) - for fid, datum in zip(fids, phased_data): - fid.data = datum - else: - for fid in self.get_fids(): - fid.phase_correct(method=method) - print("phase-correction completed") - - def baseliner_fids(self): - """ - - Instantiate a baseline-correction GUI widget. Right-click-dragging - defines a range. Ctrl-Right click deletes previously selected range. Indices - selected are stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`, which is used - for baseline-correction (see - :meth:`~nmrpy.data_objects.Fid.baseline_correction`). - - """ - plot_label = """ -Drag Right - select range -Ctrl+Right - delete range -Ctrl+Alt+Right - assign -""" - plot_title = "Select data for baseline-correction" - self._baseliner_widget = FidArrayRangeSelector( - self, title=plot_title, label=plot_label, voff=0.01 - ) - - def baseline_correct_fids(self, deg=2): - """ - Apply baseline-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` - - :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) - """ - for fid in self.get_fids(): - try: - fid.baseline_correct(deg=deg) - except: - print( - "failed for {}. Perhaps first run baseliner_fids()".format( - fid.id - ) - ) - print("baseline-correction completed") - - @property - def _data_traces(self): - return self.__data_traces - - @_data_traces.setter - def _data_traces(self, data_traces): - self.__data_traces = data_traces - - @property - def _index_traces(self): - return self.__index_traces - - @_index_traces.setter - def _index_traces(self, index_traces): - self.__index_traces = index_traces - - @property - def _trace_mask(self): - return self.__trace_mask - - @_trace_mask.setter - def _trace_mask(self, trace_mask): - self.__trace_mask = trace_mask - - @property - def _trace_mean_ppm(self): - return self.__trace_mean_ppm - - @_trace_mean_ppm.setter - def _trace_mean_ppm(self, trace_mean_ppm): - trace_mean_ppm - self.__trace_mean_ppm = trace_mean_ppm - - @property - def integral_traces(self): - """ - Returns the dictionary of integral traces generated by - :meth:`~nmrpy.FidArray.select_integral_traces`. - """ - return self._integral_traces - - @integral_traces.setter - def integral_traces(self, integral_traces): - self._integral_traces = integral_traces - - def deconv_fids( - self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0 - ): - """ - Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. - - :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` - - :keyword mp: parallelise the phasing process over multiple processors, significantly reduces computation time - - :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True, default is n-1 cores - """ - if mp: - fids = self.get_fids() - if not all(fid._flags["ft"] for fid in fids): - raise ValueError( - "Only Fourier-transformed data can be deconvoluted." - ) - list_params = [ - [ - fid.data, - fid._grouped_index_peaklist, - fid._index_ranges, - frac_gauss, - method, - ] - for fid in fids - ] - deconv_datum = self._generic_mp( - Fid._deconv_datum, list_params, cpus - ) - for fid, datum in zip(fids, deconv_datum): - fid._deconvoluted_peaks = numpy.array( - [j for i in datum for j in i] - ) - else: - for fid in self.get_fids(): - fid.deconv(frac_gauss=frac_gauss) - print("deconvolution completed") - - def get_masked_integrals(self): - """ - After peakpicker_traces() and deconv_fids() this function returns a masked integral array. - """ - result = [] - try: - ints = [list(i) for i in self.deconvoluted_integrals] - for i in self._trace_mask: - ints_current = numpy.zeros_like(i, dtype="f8") - for j in range(len(i)): - if i[j] != -1: - ints_current[j] = ints[j].pop(0) - result.append(ints_current) - except AttributeError: - print("peakpicker_traces() or deconv_fids() probably not yet run.") - return result - - def ps_fids(self, p0=0.0, p1=0.0): - """ - Apply manual phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` - - :keyword p0: Zero order phase in degrees - - :keyword p1: First order phase in degrees - """ - for fid in self.get_fids(): - fid.ps(p0=p0, p1=p1) - - @staticmethod - def _generic_mp(fcn, iterable, cpus): - if cpus is None: - cpus = cpu_count() - 1 - proc_pool = Pool(cpus) - result = proc_pool.map(fcn, iterable) - proc_pool.close() - proc_pool.join() - return result - - def plot_array(self, **kwargs): - """ - Plot :attr:`~nmrpy.data_objects.FidArray.data`. - - :keyword upper_index: upper index of array (None) - - :keyword lower_index: lower index of array (None) - - :keyword upper_ppm: upper spectral bound in ppm (None) - - :keyword lower_ppm: lower spectral bound in ppm (None) - - :keyword lw: linewidth of plot (0.5) - - :keyword azim: starting azimuth of plot (-90) - - :keyword elev: starting elevation of plot (40) - - :keyword filled: True=filled vertices, False=lines (False) - - :keyword show_zticks: show labels on z axis (False) - - :keyword labels: under development (None) - - :keyword colour: plot spectra with colour spectrum, False=black (True) - - :keyword filename: save plot to .pdf file (None) - """ - plt = Plot() - plt._plot_array(self.data, self._params, **kwargs) - setattr(self, plt.id, plt) - - def plot_deconv_array(self, **kwargs): - """ - Plot all :attr:`~nmrpy.data_objects.Fid.data` with deconvoluted peaks overlaid. - - :keyword upper_index: upper index of Fids to plot - - :keyword lower_index: lower index of Fids to plot - - :keyword upper_ppm: upper spectral bound in ppm - - :keyword lower_ppm: lower spectral bound in ppm - - :keyword data_colour: colour of the plotted data ('k') - - :keyword summed_peak_colour: colour of the plotted summed peaks ('r') - - :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks ('g') - - :keyword data_filled: fill state of the plotted data (False) - - :keyword summed_peak_filled: fill state of the plotted summed peaks (True) - - :keyword residual_filled: fill state of the plotted residuals (False) - - :keyword figsize: [x, y] size of plot ([15, 7.5]) - - :keyword lw: linewidth of plot (0.3) - - :keyword azim: azimuth of 3D axes (-90) - - :keyword elev: elevation of 3D axes (20) - - - """ - plt = Plot() - plt._plot_deconv_array(self.get_fids(), **kwargs) - setattr(self, plt.id, plt) - - def calibrate( - self, fid_number=None, assign_only_to_index=False, voff=0.02 - ): - """ - Instantiate a GUI widget to select a peak and calibrate - spectra in a :class:`~nmrpy.data_objects.FidArray`. - Left-clicking selects a peak. The user is then prompted to enter - the PPM value of that peak for calibration; this will be applied - to all :class:`~nmrpy.data_objects.Fid` - objects owned by this :class:`~nmrpy.data_objects.FidArray`. See - also :meth:`~nmrpy.data_objects.Fid.calibrate`. - - :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for calibration. If None, the whole data array is plotted. - - :keyword assign_only_to_index: if True, assigns calibration only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number; if False, assigns to all. - - :keyword voff: vertical offset for spectra - """ - plot_label = """ -Left - select peak -""" - self._calibrate_widget = RangeCalibrator( - self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) - - def peakpicker( - self, fid_number=None, assign_only_to_index=True, voff=0.02 - ): - """ - - Instantiate peak-picker widget for - :attr:`~nmrpy.data_objects.Fid.data`, and apply selected - :attr:`~nmrpy.data_objects.Fid.peaks` and - :attr:`~nmrpy.data_objects.Fid.ranges` to all :class:`~nmrpy.data_objects.Fid` - objects owned by this :class:`~nmrpy.data_objects.FidArray`. See - :meth:`~nmrpy.data_objects.Fid.peakpicker`. - - :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for peak-picking. If None, data array is plotted. - - :keyword assign_only_to_index: if True, assigns selections only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number, if False, assigns to all. - - :keyword voff: vertical offset for spectra - """ - - plot_label = """ -Left - select peak -Ctrl+Left - delete nearest peak -Drag Right - select range -Ctrl+Right - delete range -Ctrl+Alt+Right - assign -""" - self._peakpicker_widget = DataPeakRangeSelector( - self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) - - def peakpicker_traces(self, voff=0.02, lw=1): - """ - Instantiates a widget to pick peaks and ranges employing a polygon - shape (or 'trace'). This is useful for picking peaks that are subject to drift and peaks - that appear (or disappear) during the course of an experiment. - - :keyword voff: vertical offset fraction (0.01) - - :keyword lw: linewidth of plot (1) - - """ - if self.data is None: - raise AttributeError("No FIDs.") - plot_label = """ -Left - add trace point -Right - finalize trace -Ctrl+Left - delete nearest trace -Drag Right - select range -Ctrl+Right - delete range -Ctrl+Alt+Right - assign -""" - self._peakpicker_widget = DataTraceRangeSelector( - self, - voff=voff, - lw=lw, - label=plot_label, - ) - - def clear_peaks(self): - """ - Calls :meth:`~nmrpy.data_objects.Fid.clear_peaks` on every :class:`~nmrpy.data_objects.Fid` - object in this :class:`~nmrpy.data_objects.FidArray`. - """ - for fid in self.get_fids(): - fid.peaks = None - - def clear_ranges(self): - """ - Calls :meth:`~nmrpy.data_objects.Fid.clear_ranges` on every :class:`~nmrpy.data_objects.Fid` - object in this :class:`~nmrpy.data_objects.FidArray`. - """ - for fid in self.get_fids(): - fid.ranges = None - - def _generate_trace_mask(self, traces): - ppm = [numpy.round(numpy.mean(i[0]), 2) for i in traces] - self._trace_mean_ppm = ppm - tt = [i[1] for i in traces] - ln = len(self.data) - filled_tt = [] - for i in tt: - rng = numpy.arange(ln) - if len(i) < ln: - rng[~(~(rng < min(i)) * ~(rng > max(i)))] = -1 - filled_tt.append(rng) - filled_tt = numpy.array(filled_tt) - return filled_tt - - def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): - traces = [dict(zip(i[1], i[0])) for i in traces] - fids = self.get_fids() - fids_i = range(len(self.data)) - for i in fids_i: - peaks = [] - for j in traces: - if i in j: - peak = j[i] - for rng in spans: - if peak >= min(rng) and peak <= max(rng): - peaks.append(peak) - fids[i].peaks = peaks - ranges = [] - for rng in spans: - if any((peaks > min(rng)) * (peaks < max(rng))): - ranges.append(rng) - if ranges == []: - ranges = None - fids[i].ranges = ranges - - def _get_all_summed_peakshapes(self): - """ - Returns peakshapes for all FIDs - """ - peaks = [] - for fid in self.get_fids(): - # x = numpy.arange(len(self.get_fids()[0].data)) - x = numpy.arange(len(self.get_fids()[0].data)) - peaks.append(Fid._f_pks(fid._deconvoluted_peaks, x)) - return peaks - - def _get_all_list_peakshapes(self): - """ - Returns peakshapes for all FIDs - """ - peaks = [] - for fid in self.get_fids(): - # x = numpy.arange(len(self.get_fids()[0].data)) - x = numpy.arange(len(self.get_fids()[0].data)) - peaks.append(Fid._f_pks_list(fid._deconvoluted_peaks, x)) - return peaks - - def _get_truncated_peak_shapes_for_plotting(self): - """ - Produces a set of truncated deconvoluted peaks for plotting. - """ - peakshapes = self._get_all_list_peakshapes() - ppms = [fid._ppm for fid in self.get_fids()] - peakshapes_short_x = [] - peakshapes_short_y = [] - for ps, ppm in zip(peakshapes, ppms): - pk_y = [] - pk_x = [] - for pk in ps: - pk_ind = pk > 0.1 * pk.max() - pk_x.append(ppm[pk_ind]) - pk_y.append(pk[pk_ind]) - peakshapes_short_x.append(pk_x) - peakshapes_short_y.append(pk_y) - return peakshapes_short_x, peakshapes_short_y - - def select_integral_traces(self, voff=0.02, lw=1): - """ - - Instantiate a trace-selection widget to identify deconvoluted peaks. - This can be useful when data are subject to drift. Selected traces on the data - array are translated into a set of nearest deconvoluted peaks, and saved in a - dictionary: :attr:`~nmrpy.data_objects.FidArray.integral_traces`. - - :keyword voff: vertical offset fraction (0.01) - - :keyword lw: linewidth of plot (1) - """ - if self.data is None: - raise AttributeError("No FIDs.") - if (self.deconvoluted_integrals == None).any(): - raise AttributeError("No integrals.") - peakshapes = self._get_all_summed_peakshapes() - # pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() - plot_label = """ -Left - add trace point -Right - finalize trace -Ctrl+Left - delete nearest trace -Ctrl+Alt+Right - assign -""" - self._select_trace_widget = DataTraceSelector( - self, - extra_data=peakshapes, - extra_data_colour="b", - voff=voff, - label=plot_label, - lw=lw, - ) - - def get_integrals_from_traces(self): - """ - Returns a dictionary of integral values for all - :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary - :attr:`~nmrpy.data_objects.FidArray.integral_traces`. - """ - if ( - self.deconvoluted_integrals is None - or None in self.deconvoluted_integrals - ): - raise AttributeError("No integrals.") - if not hasattr(self, "_integral_traces"): - raise AttributeError( - "No integral traces. First run select_integral_traces()." - ) - integrals_set = {} - decon_set = self.deconvoluted_integrals - for i, tr in self.integral_traces.items(): - tr_keys = numpy.array([fid for fid in tr.keys()]) - tr_vals = numpy.array([val for val in tr.values()]) - tr_sort = numpy.argsort(tr_keys) - tr_keys = tr_keys[tr_sort] - tr_vals = tr_vals[tr_sort] - integrals = decon_set[tr_keys, tr_vals] - integrals_set[i] = integrals - return integrals_set - - def assign_integrals(self, integrals_set: list) -> dict: - print("~~~ Method under contruction ~~~") - widget_list = [] - for i, j in enumerate(integrals_set): - widget_list.append((i, list(j))) - return SelectMultiple(options=widget_list, description="Integrals:") - - def save_to_file(self, filename=None, overwrite=False): - """ - Save :class:`~nmrpy.data_objects.FidArray` object to file, including all objects owned. - - :keyword filename: filename to save :class:`~nmrpy.data_objects.FidArray` to - - :keyword overwrite: if True, overwrite existing file - - """ - if filename is None: - basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename + ".nmrpy" - if not isinstance(filename, str): - raise TypeError("filename must be a string.") - if filename[-6:] != ".nmrpy": - filename += ".nmrpy" - if os.path.isfile(filename) and not overwrite: - print("File " + filename + " exists, set overwrite=True to force.") - return 1 - # delete all matplotlib plots to reduce file size - self._del_plots() - for fid in self.get_fids(): - fid._del_plots() - # delete all widgets (can't be pickled) - self._del_widgets() - for fid in self.get_fids(): - fid._del_widgets() - with open(filename, "wb") as f: - pickle.dump(self, f) - - def save_data(self, file_format: str, filename=None, overwrite=False): - print("~~~ Method under contruction ~~~") - if self.force_pyenzyme: - import pyenzyme as pe - - enzymeml = pe.EnzymeMLDocument( - name=self.data_mode.experiment.name - if hasattr(self.data_model.experiment, "name") - else "NMR experiment" - ) - ... - return 1 - if file_format.lower() == ("enzymeml" or "nmrml"): - # model = self.data_model.convert_to( - # template=Path(__file__).parent.parent / "links/enzymeml.toml" - # ) - enzymeml = DataModel.from_git( - url="https://github.com/EnzymeML/enzymeml-specifications.git", - tag="markdown-parser-refactor", - ) - doc = enzymeml.EnzymeMLDocument( - name=( - self.data_model.experiment.name - if hasattr(self.data_model.experiment, "name") - else "NMR experiment" - ), - created=self.data_model.datetime_created, - modified=self.data_model.datetime_modified, - ) - model = doc.xml() - elif file_format.lower() == "xml": - model = self.data_model.xml() - elif file_format.lower() == "json": - model = self.data_model.json() - elif file_format.lower() == "yaml": - model = self.data_model.yaml() - elif file_format.lower() == "hdf5": - model = self.data_model.hdf5() - else: - raise AttributeError( - f"Parameter `file_format` expected to be one of `enzymeml`; `nmrml`; `xml`; `json`; `yaml`; `hdf5`, got {file_format} instead." - ) - if not filename: - basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename + "." + file_format.lower() - if os.path.isfile(filename) and not overwrite: - print("File " + filename + " exists, set overwrite=True to force.") - return 1 - with open(filename, "w") as f: - f.write(model) - - -class Importer(Base): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.data = None - - @property - def data(self): - return self.__data - - @data.setter - def data(self, data): - if data is None: - self.__data = data - elif data.dtype in self._complex_dtypes: - if Importer._is_iter_of_iters(data): - self.__data = data - elif Importer._is_iter(data): - self.__data = numpy.array([data]) - else: - raise TypeError("data must be iterable.") - else: - raise TypeError("data must be complex.") - - def import_fid(self, arrayset=None): - """ - This will first attempt to import Bruker data. Failing that, Varian. - """ - try: - print("Attempting Bruker") - brukerimporter = BrukerImporter(fid_path=self.fid_path) - brukerimporter.import_fid(arrayset=arrayset) - self.data = brukerimporter.data - self._procpar = brukerimporter._procpar - self._file_format = brukerimporter._file_format - return - except (FileNotFoundError, OSError): - print("fid_path does not specify a valid .fid directory.") - return - except (TypeError, IndexError): - print("probably not Bruker data") - try: - print("Attempting Varian") - varianimporter = VarianImporter(fid_path=self.fid_path) - varianimporter.import_fid() - self._procpar = varianimporter._procpar - self.data = varianimporter.data - self._file_format = varianimporter._file_format - return - except TypeError: - print("probably not Varian data") - - -class VarianImporter(Importer): - def import_fid(self): - try: - procpar, data = nmrglue.varian.read(self.fid_path) - self.data = data - self._procpar = procpar - self._file_format = "varian" - except FileNotFoundError: - print("fid_path does not specify a valid .fid directory.") - except OSError: - print("fid_path does not specify a valid .fid directory.") - - -class BrukerImporter(Importer): - def import_fid(self, arrayset=None): - try: - dirs = [ - int(i) - for i in os.listdir(self.fid_path) - if os.path.isdir(self.fid_path + os.path.sep + i) - ] - dirs.sort() - dirs = [str(i) for i in dirs] - alldata = [] - for d in dirs: - procpar, data = nmrglue.bruker.read( - self.fid_path + os.path.sep + d - ) - alldata.append((procpar, data)) - self.alldata = alldata - incr = 1 - while True: - if len(alldata) == 1: - break - if alldata[incr][1].shape == alldata[0][1].shape: - break - incr += 1 - if incr > 1: - if arrayset == None: - print( - "Total of " - + str(incr) - + " alternating FidArrays found." - ) - arrayset = input("Which one to import? ") - arrayset = int(arrayset) - else: - arrayset = arrayset - if arrayset < 1 or arrayset > incr: - raise ValueError( - "Select a value between 1 and " + str(incr) + "." - ) - else: - arrayset = 1 - self.incr = incr - procpar = alldata[arrayset - 1][0] - data = numpy.vstack( - [d[1] for d in alldata[(arrayset - 1) :: incr]] - ) - self.data = data - self._procpar = procpar - self._file_format = "bruker" - self.data = nmrglue.bruker.remove_digital_filter( - procpar, self.data - ) - ( - self._procpar["tdelta"], - self._procpar["tcum"], - self._procpar["tsingle"], - ) = self._get_time_delta() - self._procpar["arraylength"] = self.data.shape[0] - self._procpar["arrayset"] = arrayset - except FileNotFoundError: - print("fid_path does not specify a valid .fid directory.") - except OSError: - print("fid_path does not specify a valid .fid directory.") - - def _get_time_delta(self): - td = 0.0 - tcum = [] - tsingle = [] - for i in range(self.incr): - pp = self.alldata[i][0]["acqus"] - sw_hz = pp["SW_h"] - at = pp["TD"] / (2 * sw_hz) - d1 = pp["D"][1] - nt = pp["NS"] - tot = (at + d1) * nt / 60.0 # convert to mins - td += tot - tcum.append(td) - tsingle.append(tot) - return (td, numpy.array(tcum), numpy.array(tsingle)) - - -if __name__ == "__main__": - pass diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 1f231b8..234bda3 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -15,6 +15,9 @@ from sdRDM.base.importedmodules import ImportedModules from nmrpy.datamodel.core import * from nmrpy.utils import create_enzymeml +import pyenzyme as pe +import pyenzyme.equations as peq +from pyenzyme.model import EnzymeMLDocument class Base: @@ -33,26 +36,6 @@ def __init__(self, *args, **kwargs): self.fid_path = kwargs.get("fid_path", ".") self._file_format = None - # Probably not required anymore - # @property - # def lib(self): - # try: - # self.__lib - # except: - # self.__lib = DataModel.from_markdown( - # path=Path(__file__).parent.parent / "specifications" - # ) - # return self.__lib - - # @property - # def parameters_object(self): - # return self.__parameter_object - - # @parameters_object.setter - # def parameters_object(self, parameters_object): - # if isinstance(parameters_object, DataModel): - # self.__parameters_object = parameters_object - @property def id(self): return self.__id @@ -525,19 +508,54 @@ def deconvoluted_integrals(self): int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) integrals.append(int_gauss + int_lorentz) + print(f"peak {i} integral: {integrals[i]}") for peak_identity in self.fid_object.peak_identities: - if peak_identity.name == self.identities[i]: + print(f"peak_identity: {peak_identity.name}") + if i in peak_identity.associated_indices: + print( + f"peak index {i} found in in associated indices {peak_identity.associated_indices} for peak identity {peak_identity.name}" + ) + if integrals[i] in peak_identity.associated_integrals: + print( + f"integral {integrals[i]} already in associated integrals" + ) + pass try: peak_identity.associated_integrals.append( float(integrals[i]) ) + print( + f"added integral {integrals[i]} to associated integrals" + ) except: peak_identity.associated_integrals = [] + print( + f"created new associated integrals list for peak identity {peak_identity.name}" + ) peak_identity.associated_integrals.append( float(integrals[i]) ) - i += 1 + print( + f"added integral {integrals[i]} to associated integrals" + ) + i += 1 + print(f"incremented i to {i}") + + # for peak_identity in self.fid_object.peak_identities: + # if peak_identity.name == self.identities[i]: + # if integrals[i] in peak_identity.associated_integrals: + # pass + # try: + # peak_identity.associated_integrals.append( + # float(integrals[i]) + # ) + # except: + # peak_identity.associated_integrals = [] + # peak_identity.associated_integrals.append( + # float(integrals[i]) + # ) + # i += 1 return integrals def _get_plots(self): @@ -972,18 +990,12 @@ def baseliner(self): @classmethod def _f_gauss(cls, offset, amplitude, gauss_sigma, x): - return amplitude * numpy.exp( - -((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0) - ) + return amplitude * numpy.exp(-((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0)) @classmethod def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) - return ( - amplitude - * lorentz_hwhm**2.0 - / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) - ) + return amplitude * lorentz_hwhm**2.0 / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) @classmethod def _f_gauss_int(cls, amplitude, gauss_sigma): @@ -1369,6 +1381,9 @@ def deconv(self, method="leastsq", frac_gauss=0.0): self._deconvoluted_peaks = numpy.array( [j for i in Fid._deconv_datum(list_parameters) for j in i] ) + + print(self.deconvoluted_integrals) + print("deconvolution completed") def plot_ppm(self, **kwargs): @@ -1497,16 +1512,17 @@ def enzymeml_document(self): return self.__enzymeml_document @enzymeml_document.setter - def enzymeml_document(self, enzymeml_document: DataModel): - if not isinstance(enzymeml_document, DataModel): + def enzymeml_document(self, enzymeml_document: EnzymeMLDocument): + if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( - f"Parameter `enzymeml_document` has to be of type `sdrdm.DataModel`, got {type(enzymeml_document)} instead." + f"Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead." ) self.__enzymeml_document = enzymeml_document self.__enzymeml_document.modified = datetime.now() + self.__data_model.experiment.name = self.__enzymeml_document.name for fid in self.get_fids(): fid.enzymeml_species = [ - species.name + (species.name, species.id) for species in get_species_from_enzymeml(self.__enzymeml_document) ] @@ -1515,23 +1531,6 @@ def enzymeml_document(self): del self.__enzymeml_document print("The current EnzymeML document has been deleted.") - @property - def enzymeml_library(self): - return self.__enzymeml_library - - @enzymeml_library.setter - def enzymeml_library(self, enzymeml_library: ImportedModules): - if not isinstance(enzymeml_library, ImportedModules): - raise AttributeError( - f"Parameter `enzymeml_library` has to be of type `sdrdm.base.importedmodules.ImportedModules`, got {type(enzymeml_library)} instead." - ) - self.__enzymeml_library = enzymeml_library - - @enzymeml_library.deleter - def enzymeml_library(self): - del self.__enzymeml_library - print("The current EnzymeML library has been deleted.") - @property def concentrations(self): """ @@ -1769,9 +1768,7 @@ def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: Args: path_to_enzymeml_document (str): Path to file containing an EnzymeML document """ - self.enzymeml_document, self.enzymeml_library = DataModel.parse( - path_to_enzymeml_document - ) + self.enzymeml_document = pe.load_enzymeml(path_to_enzymeml_document) @classmethod def from_data(cls, data): @@ -2029,12 +2026,40 @@ def deconv_fids(self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0): for fid in fids ] deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) + for fid, datum in zip(fids, deconv_datum): fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) + # Iterate over newly deconvoluted peaks and calculate integrals + integrals = [] + i = 0 + for peak in fid._deconvoluted_peaks: + int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) + int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) + integrals.append(int_gauss + int_lorentz) + # Iterate over peak identities and assign integrals based on the peak and integral indices + for peak_identity in fid.fid_object.peak_identities: + if i in peak_identity.associated_indices: + # Check if the integral has already been assigned to the peak identity + if integrals[i] in peak_identity.associated_integrals: + pass + # If not, assign the integral to the peak identity + try: + peak_identity.associated_integrals.append( + float(integrals[i]) + ) + except: + peak_identity.associated_integrals = [] + peak_identity.associated_integrals.append( + float(integrals[i]) + ) + i += 1 fid.fid_object.processing_steps.is_deconvoluted = True + else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) + fid.fid_object.processing_steps.is_deconvoluted = True + print("deconvolution completed") def get_masked_integrals(self): @@ -2424,69 +2449,13 @@ def save_to_file(self, filename=None, overwrite=False): with open(filename, "wb") as f: pickle.dump(self, f) - # TODO: Will probably create a measurement object for each FID(?) - # and add them to the EnzymeML document provided - # Issue: How to get species for IdentityAssigner? __init__()? - def to_enzymeml(self, enzymeml_document: DataModel = None) -> DataModel: + def apply_to_enzymeml( + self, enzymeml_document: EnzymeMLDocument = None + ) -> EnzymeMLDocument: if not enzymeml_document: enzymeml_document = self.enzymeml_document return create_enzymeml(self, enzymeml_document) - # TODO: Refactor save_data method - # possibly make saving to EnzymeML a get_measurements method - def save_data(self, file_format: str, filename=None, overwrite=False): - print("~~~ Method under contruction ~~~") - if self.force_pyenzyme: - try: - import pyenzyme as pe - except: - self.force_pyenzyme = False - raise ModuleNotFoundError( - "PyEnzyme is not installed in your current environment. Use EnzymeML data model instead or install PyEnzyme." - ) - enzymeml = pe.EnzymeMLDocument( - name=self.data_model.experiment.name - if hasattr(self.data_model.experiment, "name") - else "NMR experiment" - ) - ... - return 1 - if file_format.lower() == ("enzymeml" or "nmrml"): - enzymeml = DataModel.from_git( - url="https://github.com/EnzymeML/enzymeml-specifications.git", - tag="linking-refactor", - ) - doc = enzymeml.EnzymeMLDocument( - name=( - self.data_model.experiment.name - if hasattr(self.data_model.experiment, "name") - else "NMR experiment" - ), - created=self.data_model.datetime_created, - modified=self.data_model.datetime_modified, - ) - model = doc.xml() - elif file_format.lower() == "xml": - model = self.data_model.xml() - elif file_format.lower() == "json": - model = self.data_model.json() - elif file_format.lower() == "yaml": - model = self.data_model.yaml() - elif file_format.lower() == "hdf5": - model = self.data_model.hdf5() - else: - raise AttributeError( - f"Parameter `file_format` expected to be one of `enzymeml`; `nmrml`; `xml`; `json`; `yaml`; `hdf5`, got {file_format} instead." - ) - if not filename: - basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename + "." + file_format.lower() - if os.path.isfile(filename) and not overwrite: - print("File " + filename + " exists, set overwrite=True to force.") - return 1 - with open(filename, "w") as f: - f.write(model) - def assign_identities(self): """ Instantiate a identity-assignment GUI widget. Select a FID by @@ -2508,9 +2477,8 @@ def clear_identities(self): fid.identities = None def calculate_concentrations(self): - integrals = self.deconvoluted_integrals.transpose() self._concentration_widget = ConcentrationCalculator( - fid_array=self, integrals=integrals + fid_array=self, enzymeml_document=self.enzymeml_document ) diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py new file mode 100644 index 0000000..98449d3 --- /dev/null +++ b/nmrpy/datamodel/__init__.py @@ -0,0 +1,12 @@ +import os + +from .core.cv import CV +from .core.experiment import Experiment +from .core.fidarray import FIDArray +from .core.fidobject import FIDObject +from .core.fileformats import FileFormats +from .core.identity import Identity +from .core.nmrpy import NMRpy +from .core.parameters import Parameters +from .core.processingsteps import ProcessingSteps +from .core.term import Term diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py new file mode 100644 index 0000000..262cc64 --- /dev/null +++ b/nmrpy/datamodel/core/__init__.py @@ -0,0 +1,10 @@ +from .cv import CV +from .experiment import Experiment +from .fidarray import FIDArray +from .fidobject import FIDObject +from .fileformats import FileFormats +from .identity import Identity +from .nmrpy import NMRpy +from .parameters import Parameters +from .processingsteps import ProcessingSteps +from .term import Term diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py new file mode 100644 index 0000000..d686157 --- /dev/null +++ b/nmrpy/datamodel/core/cv.py @@ -0,0 +1,55 @@ +from typing import Dict, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import AnyUrl, PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + + +class CV( + sdRDM.DataModel, + search_mode="unordered", +): + """lorem ipsum""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + vocabulary: str = element( + description="Name of the CV used.", + tag="vocabulary", + json_schema_extra=dict(), + ) + + version: str = element( + description="Version of the CV used.", + tag="version", + json_schema_extra=dict(), + ) + + url: AnyUrl = element( + description="URL pointing to the CV used.", + tag="url", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py new file mode 100644 index 0000000..84ecc87 --- /dev/null +++ b/nmrpy/datamodel/core/experiment.py @@ -0,0 +1,106 @@ +from typing import Dict, List, Optional, Union +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + +from .fidarray import FIDArray +from .fidobject import FIDObject +from .identity import Identity +from .parameters import Parameters +from .processingsteps import ProcessingSteps + + +class Experiment( + sdRDM.DataModel, + search_mode="unordered", +): + """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed + Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant + """ + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + name: str = element( + description="A descriptive name for the overarching experiment.", + tag="name", + json_schema_extra=dict(), + ) + + fid: List[FIDObject] = element( + description="A single NMR spectrum.", + default_factory=ListPlus, + tag="fid", + json_schema_extra=dict( + multiple=True, + ), + ) + + fid_array: Optional[FIDArray] = element( + description="Multiple NMR spectra to be processed together.", + default=None, + tag="fid_array", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self + + def add_to_fid( + self, + raw_data: List[str] = ListPlus(), + processed_data: List[Union[str, float]] = ListPlus(), + nmr_parameters: Optional[Parameters] = None, + processing_steps: Optional[ProcessingSteps] = None, + peak_identities: List[Identity] = ListPlus(), + id: Optional[str] = None, + **kwargs, + ) -> FIDObject: + """ + This method adds an object of type 'FIDObject' to attribute fid + + Args: + id (str): Unique identifier of the 'FIDObject' object. Defaults to 'None'. + raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus() + processed_data (): Processed data array.. Defaults to ListPlus() + nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None + processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None + peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() + """ + + params = { + "raw_data": raw_data, + "processed_data": processed_data, + "nmr_parameters": nmr_parameters, + "processing_steps": processing_steps, + "peak_identities": peak_identities, + } + + if id is not None: + params["id"] = id + + obj = FIDObject(**params) + + self.fid.append(obj) + + return self.fid[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py new file mode 100644 index 0000000..7b83f32 --- /dev/null +++ b/nmrpy/datamodel/core/fidarray.py @@ -0,0 +1,46 @@ +from typing import Dict, List, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + + +class FIDArray( + sdRDM.DataModel, + search_mode="unordered", +): + """Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + fids: List[str] = element( + description="List of `FIDObject.id` belonging to this array.", + default_factory=ListPlus, + tag="fids", + json_schema_extra=dict( + multiple=True, + ), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py new file mode 100644 index 0000000..0b0e5f9 --- /dev/null +++ b/nmrpy/datamodel/core/fidobject.py @@ -0,0 +1,133 @@ +from typing import Dict, List, Optional, Union +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + +from .identity import AssociatedRanges, Identity +from .parameters import Parameters +from .processingsteps import ProcessingSteps + + +class FIDObject( + sdRDM.DataModel, + search_mode="unordered", +): + """Container for a single NMR spectrum.""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + raw_data: List[str] = element( + description=( + "Complex spectral data from numpy array as string of format" + " `{array.real}+{array.imag}j`." + ), + default_factory=ListPlus, + tag="raw_data", + json_schema_extra=dict( + multiple=True, + ), + ) + + processed_data: List[Union[str, float]] = element( + description="Processed data array.", + default_factory=ListPlus, + tag="processed_data", + json_schema_extra=dict( + multiple=True, + ), + ) + + nmr_parameters: Optional[Parameters] = element( + description="Contains commonly-used NMR parameters.", + default_factory=Parameters, + tag="nmr_parameters", + json_schema_extra=dict(), + ) + + processing_steps: Optional[ProcessingSteps] = element( + description=( + "Contains the processing steps performed, as well as the parameters used" + " for them." + ), + default_factory=ProcessingSteps, + tag="processing_steps", + json_schema_extra=dict(), + ) + + peak_identities: List[Identity] = element( + description=( + "Container holding and mapping integrals resulting from peaks and their" + " ranges to EnzymeML species." + ), + default_factory=ListPlus, + tag="peak_identities", + json_schema_extra=dict( + multiple=True, + ), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self + + def add_to_peak_identities( + self, + name: Optional[str] = None, + species_id: Optional[str] = None, + associated_peaks: List[float] = ListPlus(), + associated_ranges: List[AssociatedRanges] = ListPlus(), + associated_indices: List[int] = ListPlus(), + associated_integrals: List[float] = ListPlus(), + id: Optional[str] = None, + **kwargs, + ) -> Identity: + """ + This method adds an object of type 'Identity' to attribute peak_identities + + Args: + id (str): Unique identifier of the 'Identity' object. Defaults to 'None'. + name (): Descriptive name for the species. Defaults to None + species_id (): ID of an EnzymeML species. Defaults to None + associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus() + associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() + associated_indices (): Indices in the NMR spectrum (counted from left to right) belonging to the given peaks. Defaults to ListPlus() + associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() + """ + + params = { + "name": name, + "species_id": species_id, + "associated_peaks": associated_peaks, + "associated_ranges": associated_ranges, + "associated_indices": associated_indices, + "associated_integrals": associated_integrals, + } + + if id is not None: + params["id"] = id + + obj = Identity(**params) + + self.peak_identities.append(obj) + + return self.peak_identities[-1] diff --git a/nmrpy/datamodel/core/fileformats.py b/nmrpy/datamodel/core/fileformats.py new file mode 100644 index 0000000..476dc52 --- /dev/null +++ b/nmrpy/datamodel/core/fileformats.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class FileFormats(Enum): + + VARIAN = "varian" + BRUKER = "bruker" + NONE = None diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/identity.py new file mode 100644 index 0000000..26bf547 --- /dev/null +++ b/nmrpy/datamodel/core/identity.py @@ -0,0 +1,162 @@ +from typing import Dict, List, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature +from sdRDM.tools.utils import elem2dict + + +class AssociatedRanges( + sdRDM.DataModel, + search_mode="unordered", +): + """Small type for attribute 'associated_ranges'""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + start: Optional[float] = element( + default=None, + tag="start", + json_schema_extra=dict(), + ) + + end: Optional[float] = element( + default=None, + tag="end", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self + + +@forge_signature +class Identity( + sdRDM.DataModel, + search_mode="unordered", +): + """Container mapping one or more peaks to the respective species.""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + name: Optional[str] = element( + description="Descriptive name for the species", + default=None, + tag="name", + json_schema_extra=dict(), + ) + + species_id: Optional[str] = element( + description="ID of an EnzymeML species", + default=None, + tag="species_id", + json_schema_extra=dict(), + ) + + associated_peaks: List[float] = element( + description="Peaks belonging to the given species", + default_factory=ListPlus, + tag="associated_peaks", + json_schema_extra=dict( + multiple=True, + ), + ) + + associated_ranges: List[AssociatedRanges] = element( + description="Sets of ranges belonging to the given peaks", + default_factory=ListPlus, + tag="associated_ranges", + json_schema_extra=dict( + multiple=True, + ), + ) + + associated_indices: List[int] = element( + description=( + "Indices in the NMR spectrum (counted from left to right) belonging to the" + " given peaks" + ), + default_factory=ListPlus, + tag="associated_indices", + json_schema_extra=dict( + multiple=True, + ), + ) + + associated_integrals: List[float] = element( + description="Integrals resulting from the given peaks and ranges of a species", + default_factory=ListPlus, + tag="associated_integrals", + json_schema_extra=dict( + multiple=True, + ), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self + + def add_to_associated_ranges( + self, + start: Optional[float] = None, + end: Optional[float] = None, + id: Optional[str] = None, + **kwargs, + ) -> AssociatedRanges: + """ + This method adds an object of type 'AssociatedRanges' to attribute associated_ranges + + Args: + id (str): Unique identifier of the 'AssociatedRanges' object. Defaults to 'None'. + start (): . Defaults to None + end (): . Defaults to None + """ + + params = { + "start": start, + "end": end, + } + + if id is not None: + params["id"] = id + + obj = AssociatedRanges(**params) + + self.associated_ranges.append(obj) + + return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py new file mode 100644 index 0000000..d32755c --- /dev/null +++ b/nmrpy/datamodel/core/nmrpy.py @@ -0,0 +1,60 @@ +from datetime import datetime as Datetime +from typing import Dict, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + +from .experiment import Experiment + + +class NMRpy( + sdRDM.DataModel, + search_mode="unordered", +): + """Root element of the NMRpy data model.""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + datetime_created: Datetime = element( + description="Date and time this dataset has been created.", + tag="datetime_created", + json_schema_extra=dict(), + ) + + datetime_modified: Optional[Datetime] = element( + description="Date and time this dataset has last been modified.", + default=None, + tag="datetime_modified", + json_schema_extra=dict(), + ) + + experiment: Optional[Experiment] = element( + description="List of experiments associated with this dataset.", + default=None, + tag="experiment", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py new file mode 100644 index 0000000..eb3dbfb --- /dev/null +++ b/nmrpy/datamodel/core/parameters.py @@ -0,0 +1,111 @@ +from typing import Dict, List, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + + +class Parameters( + sdRDM.DataModel, + search_mode="unordered", +): + """Container for relevant NMR parameters.""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + acquisition_time: Optional[float] = element( + description="at", + default=None, + tag="acquisition_time", + json_schema_extra=dict(), + ) + + relaxation_time: Optional[float] = element( + description="d1", + default=None, + tag="relaxation_time", + json_schema_extra=dict(), + ) + + repetition_time: Optional[float] = element( + description="rt = at + d1", + default=None, + tag="repetition_time", + json_schema_extra=dict(), + ) + + number_of_transients: List[float] = element( + description="nt", + default_factory=ListPlus, + tag="number_of_transients", + json_schema_extra=dict( + multiple=True, + ), + ) + + acquisition_times_array: List[float] = element( + description="acqtime = [nt, 2nt, ..., rt x nt]", + default_factory=ListPlus, + tag="acquisition_times_array", + json_schema_extra=dict( + multiple=True, + ), + ) + + spectral_width_ppm: Optional[float] = element( + description="sw", + default=None, + tag="spectral_width_ppm", + json_schema_extra=dict(), + ) + + spectral_width_hz: Optional[float] = element( + description="sw_hz", + default=None, + tag="spectral_width_hz", + json_schema_extra=dict(), + ) + + spectrometer_frequency: Optional[float] = element( + description="sfrq", + default=None, + tag="spectrometer_frequency", + json_schema_extra=dict(), + ) + + reference_frequency: Optional[float] = element( + description="reffrq", + default=None, + tag="reference_frequency", + json_schema_extra=dict(), + ) + + spectral_width_left: Optional[float] = element( + description="sw_left", + default=None, + tag="spectral_width_left", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py new file mode 100644 index 0000000..c2ec8d5 --- /dev/null +++ b/nmrpy/datamodel/core/processingsteps.py @@ -0,0 +1,128 @@ +from typing import Dict, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + + +class ProcessingSteps( + sdRDM.DataModel, + search_mode="unordered", +): + """Container for processing steps performed, as well as parameter for them.""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + is_apodised: Optional[bool] = element( + description="Whether or not Apodisation (line-broadening) has been performed.", + default=None, + tag="is_apodised", + json_schema_extra=dict(), + ) + + apodisation_frequency: Optional[float] = element( + description="Degree of Apodisation (line-broadening) in Hz.", + default=None, + tag="apodisation_frequency", + json_schema_extra=dict(), + ) + + is_zero_filled: Optional[bool] = element( + description="Whether or not Zero-filling has been performed.", + default=False, + tag="is_zero_filled", + json_schema_extra=dict(), + ) + + is_fourier_transformed: Optional[bool] = element( + description="Whether or not Fourier transform has been performed.", + default=False, + tag="is_fourier_transformed", + json_schema_extra=dict(), + ) + + fourier_transform_type: Optional[str] = element( + description="The type of Fourier transform used.", + default=None, + tag="fourier_transform_type", + json_schema_extra=dict(), + ) + + is_phased: Optional[bool] = element( + description="Whether or not Phasing was performed.", + default=False, + tag="is_phased", + json_schema_extra=dict(), + ) + + zero_order_phase: Optional[float] = element( + description="Zero-order phase used for Phasing.", + default=None, + tag="zero_order_phase", + json_schema_extra=dict(), + ) + + first_order_phase: Optional[float] = element( + description="First-order phase used for Phasing.", + default=None, + tag="first_order_phase", + json_schema_extra=dict(), + ) + + is_only_real: Optional[bool] = element( + description="Whether or not the imaginary part has been discarded.", + default=False, + tag="is_only_real", + json_schema_extra=dict(), + ) + + is_normalised: Optional[bool] = element( + description="Whether or not Normalisation was performed.", + default=False, + tag="is_normalised", + json_schema_extra=dict(), + ) + + max_value: Optional[float] = element( + description="Maximum value of the dataset used for Normalisation.", + default=None, + tag="max_value", + json_schema_extra=dict(), + ) + + is_deconvoluted: Optional[bool] = element( + description="Whether or not Deconvolution was performed.", + default=False, + tag="is_deconvoluted", + json_schema_extra=dict(), + ) + + is_baseline_corrected: Optional[bool] = element( + description="Whether or not Baseline correction was performed.", + default=False, + tag="is_baseline_corrected", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py new file mode 100644 index 0000000..c28d57e --- /dev/null +++ b/nmrpy/datamodel/core/term.py @@ -0,0 +1,68 @@ +from typing import Dict, Optional +from uuid import uuid4 + +import sdRDM +from lxml.etree import _Element +from pydantic import PrivateAttr, model_validator +from pydantic_xml import attr, element +from sdRDM.base.listplus import ListPlus +from sdRDM.tools.utils import elem2dict + + +class Term( + sdRDM.DataModel, + search_mode="unordered", +): + """lorem ipsum {Add reference back to term_cv_reference.}""" + + id: Optional[str] = attr( + name="id", + alias="@id", + description="Unique identifier of the given object.", + default_factory=lambda: str(uuid4()), + ) + + name: str = element( + description=( + "The preferred name of the term associated with the given accession number." + ), + tag="name", + json_schema_extra=dict(), + ) + + accession: str = element( + description="Accession number of the term in the controlled vocabulary.", + tag="accession", + json_schema_extra=dict(), + ) + + term_cv_reference: Optional[str] = element( + description=( + "Reference to the `CV.id` of a controlled vocabulary that has been defined" + " for this dataset." + ), + default=None, + tag="term_cv_reference", + json_schema_extra=dict(), + ) + + value: Optional[str] = element( + description="Value of the term, if applicable.", + default=None, + tag="value", + json_schema_extra=dict(), + ) + + _raw_xml_data: Dict = PrivateAttr(default_factory=dict) + + @model_validator(mode="after") + def _parse_raw_xml_data(self): + for attr, value in self: + if isinstance(value, (ListPlus, list)) and all( + isinstance(i, _Element) for i in value + ): + self._raw_xml_data[attr] = [elem2dict(i) for i in value] + elif isinstance(value, _Element): + self._raw_xml_data[attr] = elem2dict(value) + + return self diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md new file mode 100644 index 0000000..a037ba8 --- /dev/null +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -0,0 +1,92 @@ +```mermaid +classDiagram + NMRpy *-- Experiment + Experiment *-- FIDObject + Experiment *-- FIDArray + FIDObject *-- Parameters + FIDObject *-- ProcessingSteps + FIDObject *-- Identity + + class NMRpy { + +datetime datetime_created* + +datetime datetime_modified + +Experiment experiment + } + + class Experiment { + +string name* + +FIDObject[0..*] fid + +FIDArray fid_array + } + + class FIDObject { + +string[0..*] raw_data + +string, float[0..*] processed_data + +Parameters nmr_parameters + +ProcessingSteps processing_steps + +Identity[0..*] peak_identities + } + + class Parameters { + +float acquisition_time + +float relaxation_time + +float repetition_time + +float[0..*] number_of_transients + +float[0..*] acquisition_times_array + +float spectral_width_ppm + +float spectral_width_hz + +float spectrometer_frequency + +float reference_frequency + +float spectral_width_left + } + + class ProcessingSteps { + +boolean is_apodised + +float apodisation_frequency + +boolean is_zero_filled + +boolean is_fourier_transformed + +string fourier_transform_type + +boolean is_phased + +float zero_order_phase + +float first_order_phase + +boolean is_only_real + +boolean is_normalised + +float max_value + +boolean is_deconvoluted + +boolean is_baseline_corrected + } + + class Identity { + +string name + +string species_id + +float[0..*] associated_peaks + +AssociatedRanges[0..*] associated_ranges + +int[0..*] associated_indices + +float[0..*] associated_integrals + } + + class FIDArray { + +string[0..*] fids* + } + + class CV { + +string vocabulary* + +string version* + +URL url* + } + + class Term { + +string name* + +string accession* + +string term_cv_reference + +string value + } + + class FileFormats { + << Enumeration >> + +VARIAN + +BRUKER + +NONE + } + +``` \ No newline at end of file diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index ebed113..5cac9d4 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -25,7 +25,13 @@ from IPython.display import display import asyncio -from .utils import get_species_from_enzymeml, get_ordered_list_of_species_names +import sympy as sp + +from .utils import ( + get_species_from_enzymeml, + get_ordered_list_of_species_names, + get_initial_concentration_by_species_id, +) class Plot: @@ -187,9 +193,11 @@ def _plot_deconv( ax.text( ppm[numpy.argmax(peak)], label_pad + peak.max(), - get_ordered_list_of_species_names(fid)[n] - if fid.fid_object.peak_identities - else str(n), + ( + get_ordered_list_of_species_names(fid)[n] + if fid.fid_object.peak_identities + else str(n) + ), ha="center", ) ax.invert_xaxis() @@ -1372,6 +1380,7 @@ def __init__(self, fid, title): self.title = title self.available_peaks = [str(peak) for peak in self.fid.peaks] self.available_species = self.fid.enzymeml_species + self.species_names = [name for name, _ in self.available_species] self.selected_values = {} if fid.peaks is [] or fid.peaks is None: raise RuntimeError( @@ -1391,7 +1400,7 @@ def __init__(self, fid, title): # Create the dropdown widget for the species species_dropdown = Dropdown( - options=self.available_species, + options=self.species_names, description="Select a species:", layout={"width": "max-content"}, style={"description_width": "initial"}, @@ -1447,26 +1456,33 @@ def on_save_button_click(b): if peak not in identity.associated_peaks: identity.associated_peaks.append(peak) peak_index = list(self.fid.peaks).index(peak) - associated_range = list( - list(self.fid.ranges)[peak_index] - ) - identity.add_to_associated_ranges( - start=float(associated_range[0]), - end=float(associated_range[1]), - ) + # associated_range = list( + # list(self.fid.ranges)[peak_index] + # ) + # identity.add_to_associated_ranges( + # start=float(associated_range[0]), + # end=float(associated_range[1]), + # ) + identity.add_to_associated_indices(peak_index) identity_exists = True if not identity_exists: peak_index = list(self.fid.peaks).index(value) - associated_range = list(list(self.fid.ranges)[peak_index]) + # associated_range = list(list(self.fid.ranges)[peak_index]) self.fid.fid_object.add_to_peak_identities( name=key, + species_id=next( + species_id + for name, species_id in self.available_species + if name == key + ), associated_peaks=value, - associated_ranges=[ - { - "start": float(associated_range[0]), - "end": float(associated_range[1]), - } - ], + # associated_ranges=[ + # { + # "start": float(associated_range[0]), + # "end": float(associated_range[1]), + # } + # ], + associated_indices=[peak_index], ) self.fid.identities = get_ordered_list_of_species_names(self.fid) self.fid._flags["assigned"] = True @@ -1524,9 +1540,10 @@ def __init__(self, fid_array): self.fids = fid_array.get_fids() self.available_peaks = [] self.available_species = [ - species.name + (species.name, species.id) for species in get_species_from_enzymeml(self.fid_array.enzymeml_document) ] + self.species_names = [name for name, _ in self.available_species] self.selected_fid = None self.selected_values = {} for fid in self.fids: @@ -1593,7 +1610,7 @@ def on_combobox_change(event): # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = self.available_species + species_dropdown.options = self.species_names species_dropdown.disabled = False # Attach the method to the dropdown's change event @@ -1648,26 +1665,33 @@ def on_save_button_click(b): if peak not in identity.associated_peaks: identity.associated_peaks.append(peak) peak_index = list(fid.peaks).index(peak) - associated_range = list( - list(fid.ranges)[peak_index] - ) - identity.add_to_associated_ranges( - start=float(associated_range[0]), - end=float(associated_range[1]), - ) + # associated_range = list( + # list(fid.ranges)[peak_index] + # ) + # identity.add_to_associated_ranges( + # start=float(associated_range[0]), + # end=float(associated_range[1]), + # ) + identity.add_to_associated_indices(peak_index) identity_exists = True if not identity_exists: peak_index = list(fid.peaks).index(value) - associated_range = list(list(fid.ranges)[peak_index]) + # associated_range = list(list(fid.ranges)[peak_index]) fid.fid_object.add_to_peak_identities( name=key, + species_id=next( + species_id + for name, species_id in self.available_species + if name == key + ), associated_peaks=value, - associated_ranges=[ - { - "start": float(associated_range[0]), - "end": float(associated_range[1]), - } - ], + # associated_ranges=[ + # { + # "start": float(associated_range[0]), + # "end": float(associated_range[1]), + # } + # ], + associated_indices=[peak_index], ) fid.identities = get_ordered_list_of_species_names(fid) reset_button.disabled = False @@ -2212,18 +2236,22 @@ def assign(self): class ConcentrationCalculator: - def __init__(self, fid_array, integrals): + def __init__(self, fid_array, enzymeml_document): self.fid_array = fid_array - self.integrals = integrals - self.fids = fid_array.get_fids() + self.enzymeml_document = enzymeml_document self.available_species = get_ordered_list_of_species_names( - self.fid_array.get_fid("fid00") + self.fid_array.get_fids()[0] ) - self.equation = "" + self.c_n = sp.symbols("c_n") + self.x_s = sp.symbols("x_s") + self.x_n = sp.symbols("x_n") + self.c_n_value = float("nan") + self.x_s_value = float("nan") + self.x_n_value = float("nan") # Create the label widget for the title title_label = Label( - value="[WORK IN PROGRESS] Calculate concentrations from peak integrals for all FIDs [WORK IN PROGRESS]" + value="Calculate concentrations from peak integrals for all FIDs" ) # Create the dropdown widget for the internal standard @@ -2238,7 +2266,7 @@ def __init__(self, fid_array, integrals): concentration_equation = Text( value="", placeholder="Enter the equation for the concentration here", - description="Concentration equation:", + description="Concentration equation: c_s =", layout={"width": "auto"}, style={ "description_width": "initial", @@ -2278,26 +2306,72 @@ def on_calculate_button_click(b): # text widget and add them to a dictionary with species as # keys print("\nCalculating concentrations...") - if ( - not concentration_equation.value.replace(" ", "") - == "c_s=c_n*x_s/x_n" - ): - raise NotImplementedError( - "Only the example formula is currently supported." + + equation = sp.sympify(concentration_equation.value) + print(f"`equation` is {equation}.") + # Create a dictionary to store the concentrations for each species + self.fid_array.concentrations = { + species: [] for species in self.available_species + } + + for fid in self.fid_array.get_fids(): + # Get data from the internal standard using next() + standard = next( + ( + identity + for identity in fid.fid_object.peak_identities + if identity.name == standard_dropdown.value + ), + None, + ) + # Extract the initial concentration of the standard from the EnzymeML document by its ID + self.c_n_value = get_initial_concentration_by_species_id( + enzymeml_document=self.enzymeml_document, + species_id=standard.species_id, ) - else: - # TODO: Currently hard-coded for the example data - standard_index = self.available_species.index( - standard_dropdown.value + # Set the peak integral values for the standard + self.x_n_value = sum(standard.associated_integrals) + self.x_s_value = self.x_n_value + # Calculate the concentration of the standard and append it to the list of concentrations + concentration = equation.subs( + { + self.c_n: self.c_n_value, + self.x_s: self.x_s_value, + self.x_n: self.x_n_value, + } ) - self.fid_array.concentrations = { - species: 5 - * concentration - / self.integrals[standard_index].mean() - for species, concentration in zip( - self.available_species, self.integrals + current_concentration = concentration.evalf() + print(f"adding {current_concentration} to {standard.name}.") + self.fid_array.concentrations[standard.name].append( + current_concentration + ) + print( + f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." + ) + + # Iterate over all identities but the standard + for identity in fid.fid_object.peak_identities: + if identity.name == standard_dropdown.value: + pass + # Calculate the peak integral value for the species + self.x_s_value = sum(identity.associated_integrals) + # Calculate the concentration of the species and append it to the list of concentrations + concentration = equation.subs( + { + self.c_n: self.c_n_value, + self.x_s: self.x_s_value, + self.x_n: self.x_n_value, + } ) - } + current_concentration = concentration.evalf() + print(f"adding {current_concentration} to {identity.name}.") + self.fid_array.concentrations[identity.name].append( + current_concentration + ) + print( + f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." + ) + print(f"Done! Get concentrations with `FidArray.concentrations`.") # Attach the function to the calculate button's click event diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 8d206d9..ddbcd97 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -1,30 +1,33 @@ -from sdRDM import DataModel +import sympy as sp +import pyenzyme as pe +from pyenzyme.model import EnzymeMLDocument -def get_species_from_enzymeml(enzymeml_document: DataModel) -> list: + +def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: """Iterate over various species elements in EnzymeML document, extract them, and return them as a list. Args: - enzymeml_document (DataModel): An EnzymeML data model. + enzymeml_document (EnzymeMLDocument): An EnzymeML data model. Raises: - AttributeError: If enzymeml_document is not of type `sdRDM.DataModel`. + AttributeError: If enzymeml_document is not of type `EnzymeMLDocument`. Returns: list: Available species in EnzymeML document. """ - if not isinstance(enzymeml_document, DataModel): + if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( - f"Parameter `enzymeml_document` has to be of type `sdrdm.DataModel`, got {type(enzymeml_document)} instead." + f"Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead." ) available_species = [] for protein in enzymeml_document.proteins: available_species.append(protein) for complex in enzymeml_document.complexes: available_species.append(complex) - for reactant in enzymeml_document.reactants: - available_species.append(reactant) + for small_molecule in enzymeml_document.small_molecules: + available_species.append(small_molecule) return available_species @@ -56,29 +59,72 @@ def get_ordered_list_of_species_names(fid: "Fid") -> list: return ordered_list_of_species_names -def create_enzymeml(fid_array: "FidArray", enzymeml_document: DataModel) -> DataModel: - # Specify EnzymeML version - URL = "https://github.com/EnzymeML/enzymeml-specifications.git" - COMMIT = "5e5f05b9dc76134305b8f9cef65271e35563ac76" +def get_initial_concentration_by_species_id( + enzymeml_document: EnzymeMLDocument, species_id: str +) -> float: + """Get the initial concentration of a species in an EnzymeML + document by its `species_id`. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML data model. + species_id (str): The `species_id` of the species for which to get the initial concentration. - EnzymeML = DataModel.from_git(URL, COMMIT) - SBOTerm = EnzymeML.enums.SBOTerm - DataTypes = EnzymeML.enums.DataTypes + Returns: + float: The initial concentration of the species. + """ + intial_concentration = float("nan") + for measurement in enzymeml_document.measurements: + for measurement_datum in measurement.species: + if measurement_datum.species_id == species_id: + intial_concentration = measurement_datum.init_conc + return intial_concentration - measurement = EnzymeML.Measurement( - name=fid_array.data_model.experiment.name, - temperature=enzymeml_document.reactions[0].temperature, - temperature_unit=enzymeml_document.reactions[0].temperature_unit, - ph=enzymeml_document.reactions[0].ph, - global_time=fid_array.t.tolist(), - global_time_unit="min", - ) - enzymeml_document.measurements.append(measurement) +def get_species_id_by_name( + enzymeml_document: EnzymeMLDocument, species_name: str +) -> str: + """Get the `species_id` of a species in an EnzymeML document by its name. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML data model. + species_name (str): The name of the species for which to get the `species_id`. + + Returns: + str: The `species_id` of the species. + """ + species_id = None + for species in get_species_from_enzymeml(enzymeml_document): + if species.name == species_name: + species_id = species.id + return species_id - return enzymeml_document - # for species, concentrations in fid_array.concentrations.items(): - # new_species = EnzymeML.MeasurementData( - # init_conc=enzymeml_document.reactants - # ) +def create_enzymeml( + fid_array: "FidArray", enzymeml_document: EnzymeMLDocument +) -> EnzymeMLDocument: + """Create an EnzymeML document from a given FidArray object. + + Args: + fid_array (FidArray): The FidArray object from which to create the EnzymeML document. + enzymeml_document (EnzymeMLDocument): The EnzymeML document to which to add the data. + + Returns: + EnzymeMLDocument: The EnzymeML document with the added data. + """ + + if not enzymeml_document.measurements: + raise AttributeError( + "EnzymeML document does not contain measurement metadata. Please add a measurement to the document first." + ) + + global_time = (fid_array.t.tolist(),) + for measured_species in fid_array.concentrations.items(): + for available_species in enzymeml_document.measurements[0].species: + if not available_species.species_id == get_species_id_by_name( + enzymeml_document, measured_species[0] + ): + pass + available_species.time = [float(x) for x in global_time[0]] + available_species.data = [float(x) for x in measured_species[1]] + + return enzymeml_document diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index d433e21..ab67e6e 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -170,6 +170,10 @@ Container mapping one or more peaks to the respective species. - Type: {start: float, end: float} - Description: Sets of ranges belonging to the given peaks - Multiple: True +- associated_indices + - Type: int + - Description: Indices in the NMR spectrum (counted from left to right) belonging to the given peaks + - Multiple: True - associated_integrals - Type: float - Description: Integrals resulting from the given peaks and ranges of a species @@ -218,7 +222,7 @@ lorem ipsum {Add reference back to term_cv_reference.} - Type: string - Description: Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset. - value - - Type: any + - Type: string - Description: Value of the term, if applicable. From d7e5cf5c78b31284bc1b2493b1841e3ed3a30618 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 24 Sep 2024 03:03:00 +0200 Subject: [PATCH 22/54] Update plotting.py --- nmrpy/plotting.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 5cac9d4..4a5ee19 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -2341,13 +2341,13 @@ def on_calculate_button_click(b): } ) current_concentration = concentration.evalf() - print(f"adding {current_concentration} to {standard.name}.") + # print(f"adding {current_concentration} to {standard.name}.") self.fid_array.concentrations[standard.name].append( current_concentration ) - print( - f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." - ) + # print( + # f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." + # ) # Iterate over all identities but the standard for identity in fid.fid_object.peak_identities: @@ -2364,13 +2364,13 @@ def on_calculate_button_click(b): } ) current_concentration = concentration.evalf() - print(f"adding {current_concentration} to {identity.name}.") + # print(f"adding {current_concentration} to {identity.name}.") self.fid_array.concentrations[identity.name].append( current_concentration ) - print( - f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." - ) + # print( + # f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." + # ) print(f"Done! Get concentrations with `FidArray.concentrations`.") From 6afc8910856aea57ae377fc83d1e0e46a22afb9b Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Thu, 16 Jan 2025 09:48:21 +0100 Subject: [PATCH 23/54] Restructure data model & reflect changes in lib - Change FIDObject to be Peak-based instead of Identity-based - Update PeakAssigner and PeakRangeAssigner to reflect new data model - Add PeakIndexer and PeakRangeIndexer and add handlers to FID and FIDArray --- .github/workflows/generate_api.yaml | 13 - nmrpy/data_objects.py | 236 +++---- nmrpy/datamodel/__init__.py | 8 +- nmrpy/datamodel/core/__init__.py | 17 +- nmrpy/datamodel/core/cv.py | 55 -- nmrpy/datamodel/core/experiment.py | 36 +- nmrpy/datamodel/core/fidarray.py | 46 -- nmrpy/datamodel/core/fidobject.py | 57 +- nmrpy/datamodel/core/nmrpy.py | 6 +- nmrpy/datamodel/core/parameters.py | 54 +- nmrpy/datamodel/core/{identity.py => peak.py} | 102 +-- nmrpy/datamodel/core/processingsteps.py | 4 +- nmrpy/datamodel/core/term.py | 68 -- nmrpy/datamodel/schemes/datamodel_schema.md | 36 +- nmrpy/plotting.py | 655 ++++++++++++++---- nmrpy/utils.py | 42 +- specifications/nmrpy.md | 139 +--- 17 files changed, 830 insertions(+), 744 deletions(-) delete mode 100644 .github/workflows/generate_api.yaml delete mode 100644 nmrpy/datamodel/core/cv.py delete mode 100644 nmrpy/datamodel/core/fidarray.py rename nmrpy/datamodel/core/{identity.py => peak.py} (50%) delete mode 100644 nmrpy/datamodel/core/term.py diff --git a/.github/workflows/generate_api.yaml b/.github/workflows/generate_api.yaml deleted file mode 100644 index c9e17c0..0000000 --- a/.github/workflows/generate_api.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: Generate API - -on: push - -jobs: - generate-api: - runs-on: ubuntu-latest - steps: - - name: Generate sdRDM library - uses: torogi94/generate-sdrdm-api@main - with: - library_name: "datamodel" - out_dir: "./nmrpy/" diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 234bda3..b496733 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -13,7 +13,14 @@ from ipywidgets import SelectMultiple from sdRDM import DataModel from sdRDM.base.importedmodules import ImportedModules -from nmrpy.datamodel.core import * +from nmrpy.datamodel.core import ( + NMRpy, + Experiment, + FIDObject, + Parameters, + ProcessingSteps, + Peak, +) from nmrpy.utils import create_enzymeml import pyenzyme as pe import pyenzyme.equations as peq @@ -228,7 +235,7 @@ def __init__(self, *args, **kwargs): self.data = kwargs.get("data", []) self.peaks = None self.ranges = None - self.identities = None + self.species = None self._deconvoluted_peaks = None self._flags = { "ft": False, @@ -339,25 +346,23 @@ def ranges(self, ranges): self._ranges = ranges @property - def identities(self): + def species(self): """ - Assigned identities corresponding to the various peaks in :attr:`~nmrpy.data_objects.Fid.peaks`. + Assigned species corresponding to the various peaks in :attr:`~nmrpy.data_objects.Fid.peaks`. """ - return self._identities + return self._species - @identities.setter - def identities(self, identities): - if identities is None: - self._identities = None + @species.setter + def species(self, species): + if species is None: + self._species = None return - if identities is not None: - # if not Fid._is_flat_iter(identities): - # raise AttributeError("identitites must be a flat iterable") - if not all(isinstance(i, str) for i in identities): - raise AttributeError("identities must be strings") - self._identities = numpy.array(identities) - else: - self._identities = identities + if species is not None: + if not all((i is None) or isinstance(i, str) for i in species): + raise AttributeError("species must be strings") + if not len(species) == len(self.peaks): + raise AttributeError("species must have the same length as peaks") + self._species = numpy.array(species, dtype=object) @property def _bl_ppm(self): @@ -508,54 +513,20 @@ def deconvoluted_integrals(self): int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) integrals.append(int_gauss + int_lorentz) - print(f"peak {i} integral: {integrals[i]}") - - for peak_identity in self.fid_object.peak_identities: - print(f"peak_identity: {peak_identity.name}") - if i in peak_identity.associated_indices: - print( - f"peak index {i} found in in associated indices {peak_identity.associated_indices} for peak identity {peak_identity.name}" - ) - if integrals[i] in peak_identity.associated_integrals: + print(f"Peak {i} integral: {integrals[i]}") + + for peak_object in self.fid_object.peaks: + print(f"Peak object: {peak_object.peak_index}") + if i == peak_object.peak_index: + if integrals[i] == peak_object.peak_integral: print( - f"integral {integrals[i]} already in associated integrals" + f"Integral {integrals[i]} already stored in peak object" ) pass - try: - peak_identity.associated_integrals.append( - float(integrals[i]) - ) - print( - f"added integral {integrals[i]} to associated integrals" - ) - except: - peak_identity.associated_integrals = [] - print( - f"created new associated integrals list for peak identity {peak_identity.name}" - ) - peak_identity.associated_integrals.append( - float(integrals[i]) - ) - print( - f"added integral {integrals[i]} to associated integrals" - ) + peak_object.peak_integral = float(integrals[i]) + print(f"Added integral {integrals[i]} to peak object") i += 1 print(f"incremented i to {i}") - - # for peak_identity in self.fid_object.peak_identities: - # if peak_identity.name == self.identities[i]: - # if integrals[i] in peak_identity.associated_integrals: - # pass - # try: - # peak_identity.associated_integrals.append( - # float(integrals[i]) - # ) - # except: - # peak_identity.associated_integrals = [] - # peak_identity.associated_integrals.append( - # float(integrals[i]) - # ) - # i += 1 return integrals def _get_plots(self): @@ -961,11 +932,11 @@ def clear_ranges(self): """ self.ranges = None - def clear_identitites(self): + def clear_species(self): """ - Clear identities stored in :attr:`~nmrpy.data_objects.Fid.identities`. + Clear species stored in :attr:`~nmrpy.data_objects.Fid.species`. """ - self.identities = None + self.species = None def baseliner(self): """ @@ -1282,7 +1253,7 @@ def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): try: mz = lmfit.minimize(cls._f_res, params, args=([data]), method=method) fits = Fid._parameters_to_list(mz.params) - except: + except Exception: fits = None return fits @@ -1426,24 +1397,33 @@ def plot_deconv(self, **kwargs): setattr(self, plt.id, plt) pyplot.show() - def assign_identities(self): + def assign_peaks(self): """ - Instantiate a identity-assignment GUI widget. Select peaks from + Instantiate a species-assignment GUI widget. Select peaks from dropdown menu containing :attr:`~nmrpy.data_objects.Fid.peaks`. Attach a species to the selected peak from second dropdown menu containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ # raise NotImplementedError - widget_title = "Assign identities for {}".format(self.id) - self._assigner_widget = IdentityAssigner(fid=self, title=widget_title) + widget_title = "Assign species for {}".format(self.id) + self._assigner_widget = PeakAssigner(fid=self, title=widget_title) - def clear_identities(self): + def index_peaks(self, list_of_species: list[str]): """ - Clear assigned identities stored in - :attr:`~nmrpy.data_objects.Fid.identities`. + Index peaks based on species. """ - self.identities = None + widget_title = "Index peaks for {}".format(self.id) + self._indexer_widget = PeakIndexer( + fid=self, species_list=list_of_species, title=widget_title + ) + + def clear_peaks(self): + """ + Clear assigned species stored in + :attr:`~nmrpy.data_objects.Fid.species`. + """ + self.species = None class FidArray(Base): @@ -1463,35 +1443,14 @@ class FidArray(Base): """ def __init__(self): - _now = str(datetime.now()) self.data_model = NMRpy( - datetime_created=_now, - datetime_modified=_now, + datetime_created=str(datetime.now()), + experiment=Experiment(name="NMR experiment"), ) - self.__data_model.experiment = Experiment(name="This is still a test") - del _now - self._force_pyenzyme = False - - @property - def force_pyenzyme(self): - return self._force_pyenzyme - - @force_pyenzyme.setter - def force_pyenzyme(self): - raise PermissionError("Forbidden!") - - @force_pyenzyme.deleter - def force_pyenzyme(self): - raise PermissionError("Forbidden!") @property def data_model(self): - _data_model = self.__data_model - if not _data_model.experiment: - _data_model.experiment = Experiment(name="This is still a test") - for fid in self.get_fids(): - _data_model.experiment.fid.append(fid.fid_object) - return _data_model + return self.__data_model @data_model.setter def data_model(self, data_model: DataModel): @@ -1521,10 +1480,7 @@ def enzymeml_document(self, enzymeml_document: EnzymeMLDocument): self.__enzymeml_document.modified = datetime.now() self.__data_model.experiment.name = self.__enzymeml_document.name for fid in self.get_fids(): - fid.enzymeml_species = [ - (species.name, species.id) - for species in get_species_from_enzymeml(self.__enzymeml_document) - ] + fid.enzymeml_species = get_species_from_enzymeml(self.__enzymeml_document) @enzymeml_document.deleter def enzymeml_document(self): @@ -1608,7 +1564,7 @@ def _get_widgets(self): or isinstance(self.__dict__[id], FidArrayRangeSelector) or isinstance(self.__dict__[id], DataTraceRangeSelector) or isinstance(self.__dict__[id], DataTraceSelector) - or isinstance(self.__dict__[id], IdentityRangeAssigner) + or isinstance(self.__dict__[id], PeakRangeAssigner) ] return widgets @@ -1666,7 +1622,7 @@ def _deconvoluted_peaks(self): for fid in self.get_fids(): try: deconvoluted_peaks.append(fid._deconvoluted_peaks) - except: + except Exception: deconvoluted_peaks.append([]) return numpy.array(deconvoluted_peaks) @@ -1768,7 +1724,9 @@ def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: Args: path_to_enzymeml_document (str): Path to file containing an EnzymeML document """ - self.enzymeml_document = pe.load_enzymeml(path_to_enzymeml_document) + self.enzymeml_document = pe.read_enzymeml( + cls=pe.EnzymeMLDocument, path=path_to_enzymeml_document + ) @classmethod def from_data(cls, data): @@ -1804,7 +1762,7 @@ def from_path(cls, fid_path=".", file_format=None, arrayset=None): try: with open(fid_path, "rb") as f: return pickle.load(f) - except: + except Exception: print("Not NMRPy data file.") importer = Importer(fid_path=fid_path) importer.import_fid(arrayset=arrayset) @@ -1950,7 +1908,7 @@ def baseline_correct_fids(self, deg=2): for fid in self.get_fids(): try: fid.baseline_correct(deg=deg) - except: + except Exception: print( "failed for {}. Perhaps first run baseliner_fids()".format(fid.id) ) @@ -2036,22 +1994,14 @@ def deconv_fids(self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0): int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) integrals.append(int_gauss + int_lorentz) - # Iterate over peak identities and assign integrals based on the peak and integral indices - for peak_identity in fid.fid_object.peak_identities: - if i in peak_identity.associated_indices: + # Iterate over peaks and assign integrals based on the peak and integral indices + for peak_object in fid.fid_object.peaks: + if i == peak_object.peak_index: # Check if the integral has already been assigned to the peak identity - if integrals[i] in peak_identity.associated_integrals: + if integrals[i] == peak_object.peak_integral: pass - # If not, assign the integral to the peak identity - try: - peak_identity.associated_integrals.append( - float(integrals[i]) - ) - except: - peak_identity.associated_integrals = [] - peak_identity.associated_integrals.append( - float(integrals[i]) - ) + # If not, assign the integral to the peak + peak_object.peak_integral = float(integrals[i]) i += 1 fid.fid_object.processing_steps.is_deconvoluted = True @@ -2406,8 +2356,8 @@ def get_integrals_from_traces(self): integrals_set[i] = integrals return integrals_set - def assign_integrals(self, integrals_set: list) -> dict: - print("~~~ Method under contruction ~~~") + def assign_integrals(self, integrals_set: list) -> dict: # deprecated? + print("~~~ Method under contruction ~~~") # TODO: make pretty widget_list = [] for i, j in enumerate(integrals_set): widget_list.append((i, list(j))) @@ -2444,7 +2394,7 @@ def save_to_file(self, filename=None, overwrite=False): try: del self.enzymeml_library del self.enzymeml_document - except: + except Exception: pass with open(filename, "wb") as f: pickle.dump(self, f) @@ -2456,9 +2406,9 @@ def apply_to_enzymeml( enzymeml_document = self.enzymeml_document return create_enzymeml(self, enzymeml_document) - def assign_identities(self): + def assign_peaks(self, index_list=None): """ - Instantiate a identity-assignment GUI widget. Select a FID by + Instantiate a peak-assignment GUI widget. Select a FID by its ID from the combobox. Select peaks from dropdown menu containing :attr:`~nmrpy.data_objects.Fid.peaks`. Attach a species to the selected peak from second dropdown menu @@ -2466,17 +2416,43 @@ def assign_identities(self): assignment, press Assign button to apply. """ - self._assigner_widget = IdentityRangeAssigner(fid_array=self) + self._assigner_widget = PeakRangeAssigner(fid_array=self, index_list=index_list) + + def index_peaks(self, species_list, index_list=None): + """ + Instantiate a peak-indexing GUI widget. Select a FID by ID to base the peak + assignments on, then assign species names to peaks. The assignments will be + applied to all FIDs in the array. - def clear_identities(self): + Parameters + ---------- + species_list : list + List of species names to assign to peaks + index_list : list, optional + List of indices to select FIDs from the array. If None, all FIDs + will be included. + + Example + ------- + >>> species = ['ATP', 'ADP', 'Pi', 'Glucose'] + >>> fid_array.index_peaks(species) # Use all FIDs + >>> fid_array.index_peaks(species, [0, 1, 2]) # Use only first three FIDs + """ + from nmrpy.plotting import PeakRangeIndexer + + self._indexer_widget = PeakRangeIndexer( + fid_array=self, species_list=species_list, index_list=index_list + ) + + def clear_peaks(self): """ - Clear assigned identities stored in - :attr:`~nmrpy.data_objects.Fid.identities`. + Clear assigned peaks stored in + :attr:`~nmrpy.data_objects.Fid.species`. """ for fid in self.get_fids(): - fid.identities = None + fid.species = None - def calculate_concentrations(self): + def calculate_concentrations(self): # TODO: Write docstring self._concentration_widget = ConcentrationCalculator( fid_array=self, enzymeml_document=self.enzymeml_document ) diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py index 98449d3..0907ff5 100644 --- a/nmrpy/datamodel/__init__.py +++ b/nmrpy/datamodel/__init__.py @@ -1,12 +1,12 @@ import os -from .core.cv import CV from .core.experiment import Experiment -from .core.fidarray import FIDArray from .core.fidobject import FIDObject from .core.fileformats import FileFormats -from .core.identity import Identity from .core.nmrpy import NMRpy from .core.parameters import Parameters +from .core.peak import Peak from .core.processingsteps import ProcessingSteps -from .core.term import Term + +__URL__ = "" +__COMMIT__ = "" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py index 262cc64..c8593df 100644 --- a/nmrpy/datamodel/core/__init__.py +++ b/nmrpy/datamodel/core/__init__.py @@ -1,10 +1,19 @@ -from .cv import CV from .experiment import Experiment -from .fidarray import FIDArray from .fidobject import FIDObject from .fileformats import FileFormats -from .identity import Identity from .nmrpy import NMRpy from .parameters import Parameters +from .peak import Peak from .processingsteps import ProcessingSteps -from .term import Term + +__doc__ = "" + +__all__ = [ + "NMRpy", + "Experiment", + "FIDObject", + "Parameters", + "ProcessingSteps", + "Peak", + "FileFormats", +] diff --git a/nmrpy/datamodel/core/cv.py b/nmrpy/datamodel/core/cv.py deleted file mode 100644 index d686157..0000000 --- a/nmrpy/datamodel/core/cv.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Dict, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import AnyUrl, PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.tools.utils import elem2dict - - -class CV( - sdRDM.DataModel, - search_mode="unordered", -): - """lorem ipsum""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - vocabulary: str = element( - description="Name of the CV used.", - tag="vocabulary", - json_schema_extra=dict(), - ) - - version: str = element( - description="Version of the CV used.", - tag="version", - json_schema_extra=dict(), - ) - - url: AnyUrl = element( - description="URL pointing to the CV used.", - tag="url", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py index 84ecc87..1d9257a 100644 --- a/nmrpy/datamodel/core/experiment.py +++ b/nmrpy/datamodel/core/experiment.py @@ -6,22 +6,21 @@ from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature from sdRDM.tools.utils import elem2dict -from .fidarray import FIDArray from .fidobject import FIDObject -from .identity import Identity from .parameters import Parameters +from .peak import Peak from .processingsteps import ProcessingSteps +@forge_signature class Experiment( sdRDM.DataModel, search_mode="unordered", ): - """Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed - Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant - """ + """Container for a single NMR experiment (e.g., one time-course), containing one or more FID objects in the `fid_array` field. Following the specifications of the EnzymeML standard, the `name` field is mandatory.""" id: Optional[str] = attr( name="id", @@ -36,22 +35,15 @@ class Experiment( json_schema_extra=dict(), ) - fid: List[FIDObject] = element( - description="A single NMR spectrum.", + fid_array: List[FIDObject] = element( + description="List of individual FidObjects.", default_factory=ListPlus, - tag="fid", + tag="fid_array", json_schema_extra=dict( multiple=True, ), ) - fid_array: Optional[FIDArray] = element( - description="Multiple NMR spectra to be processed together.", - default=None, - tag="fid_array", - json_schema_extra=dict(), - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") @@ -66,18 +58,18 @@ def _parse_raw_xml_data(self): return self - def add_to_fid( + def add_to_fid_array( self, raw_data: List[str] = ListPlus(), processed_data: List[Union[str, float]] = ListPlus(), nmr_parameters: Optional[Parameters] = None, processing_steps: Optional[ProcessingSteps] = None, - peak_identities: List[Identity] = ListPlus(), + peaks: List[Peak] = ListPlus(), id: Optional[str] = None, **kwargs, ) -> FIDObject: """ - This method adds an object of type 'FIDObject' to attribute fid + This method adds an object of type 'FIDObject' to attribute fid_array Args: id (str): Unique identifier of the 'FIDObject' object. Defaults to 'None'. @@ -85,7 +77,7 @@ def add_to_fid( processed_data (): Processed data array.. Defaults to ListPlus() nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None - peak_identities (): Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species.. Defaults to ListPlus() + peaks (): Container holding the peaks found in the NMR spectrum associated with species from an EnzymeML document.. Defaults to ListPlus() """ params = { @@ -93,7 +85,7 @@ def add_to_fid( "processed_data": processed_data, "nmr_parameters": nmr_parameters, "processing_steps": processing_steps, - "peak_identities": peak_identities, + "peaks": peaks, } if id is not None: @@ -101,6 +93,6 @@ def add_to_fid( obj = FIDObject(**params) - self.fid.append(obj) + self.fid_array.append(obj) - return self.fid[-1] + return self.fid_array[-1] diff --git a/nmrpy/datamodel/core/fidarray.py b/nmrpy/datamodel/core/fidarray.py deleted file mode 100644 index 7b83f32..0000000 --- a/nmrpy/datamodel/core/fidarray.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Dict, List, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.tools.utils import elem2dict - - -class FIDArray( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5}""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - fids: List[str] = element( - description="List of `FIDObject.id` belonging to this array.", - default_factory=ListPlus, - tag="fids", - json_schema_extra=dict( - multiple=True, - ), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py index 0b0e5f9..cefcd67 100644 --- a/nmrpy/datamodel/core/fidobject.py +++ b/nmrpy/datamodel/core/fidobject.py @@ -6,18 +6,20 @@ from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature from sdRDM.tools.utils import elem2dict -from .identity import AssociatedRanges, Identity from .parameters import Parameters +from .peak import Peak, PeakRange from .processingsteps import ProcessingSteps +@forge_signature class FIDObject( sdRDM.DataModel, search_mode="unordered", ): - """Container for a single NMR spectrum.""" + """Container for a single NMR spectrum, containing both raw data with relevant instrument parameters and processed data with processing steps applied. The `raw_data` field contains the complex spectral array as unaltered free induction decay from the NMR instrument. Every processing step is documented in the `processing_steps` field, together with any relevant parameters to reproduce the processing. Therefore, and to minimize redundancy, only the current state of the data is stored in the `processed_data` field. The `peaks` field is a list of `Peak` objects, each representing one single peak in the NMR spectrum.""" id: Optional[str] = attr( name="id", @@ -64,13 +66,13 @@ class FIDObject( json_schema_extra=dict(), ) - peak_identities: List[Identity] = element( + peaks: List[Peak] = element( description=( - "Container holding and mapping integrals resulting from peaks and their" - " ranges to EnzymeML species." + "Container holding the peaks found in the NMR spectrum associated with" + " species from an EnzymeML document." ), default_factory=ListPlus, - tag="peak_identities", + tag="peaks", json_schema_extra=dict( multiple=True, ), @@ -90,44 +92,41 @@ def _parse_raw_xml_data(self): return self - def add_to_peak_identities( + def add_to_peaks( self, - name: Optional[str] = None, + peak_index: int, + peak_position: Optional[float] = None, + peak_range: Optional[PeakRange] = None, + peak_integral: Optional[float] = None, species_id: Optional[str] = None, - associated_peaks: List[float] = ListPlus(), - associated_ranges: List[AssociatedRanges] = ListPlus(), - associated_indices: List[int] = ListPlus(), - associated_integrals: List[float] = ListPlus(), id: Optional[str] = None, **kwargs, - ) -> Identity: + ) -> Peak: """ - This method adds an object of type 'Identity' to attribute peak_identities + This method adds an object of type 'Peak' to attribute peaks Args: - id (str): Unique identifier of the 'Identity' object. Defaults to 'None'. - name (): Descriptive name for the species. Defaults to None - species_id (): ID of an EnzymeML species. Defaults to None - associated_peaks (): Peaks belonging to the given species. Defaults to ListPlus() - associated_ranges (): Sets of ranges belonging to the given peaks. Defaults to ListPlus() - associated_indices (): Indices in the NMR spectrum (counted from left to right) belonging to the given peaks. Defaults to ListPlus() - associated_integrals (): Integrals resulting from the given peaks and ranges of a species. Defaults to ListPlus() + id (str): Unique identifier of the 'Peak' object. Defaults to 'None'. + peak_index (): Index of the peak in the NMR spectrum, counted from left to right.. + peak_position (): Position of the peak in the NMR spectrum.. Defaults to None + peak_range (): Range of the peak, given as a start and end value.. Defaults to None + peak_integral (): Integral of the peak, resulting from the position and range given.. Defaults to None + species_id (): ID of an EnzymeML species.. Defaults to None """ params = { - "name": name, + "peak_index": peak_index, + "peak_position": peak_position, + "peak_range": peak_range, + "peak_integral": peak_integral, "species_id": species_id, - "associated_peaks": associated_peaks, - "associated_ranges": associated_ranges, - "associated_indices": associated_indices, - "associated_integrals": associated_integrals, } if id is not None: params["id"] = id - obj = Identity(**params) + obj = Peak(**params) - self.peak_identities.append(obj) + self.peaks.append(obj) - return self.peak_identities[-1] + return self.peaks[-1] diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py index d32755c..7bd6795 100644 --- a/nmrpy/datamodel/core/nmrpy.py +++ b/nmrpy/datamodel/core/nmrpy.py @@ -7,16 +7,18 @@ from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature from sdRDM.tools.utils import elem2dict from .experiment import Experiment +@forge_signature class NMRpy( sdRDM.DataModel, search_mode="unordered", ): - """Root element of the NMRpy data model.""" + """Root element of the NMRpy data model. Following the specifications of the EnzymeML standard, the `datetime_created` field is mandatory. Since each NMRpy instance is meant to hold a single experiment (e.g., one time-course), the data model reflects this by only allowing a single `experiment` object.""" id: Optional[str] = attr( name="id", @@ -39,7 +41,7 @@ class NMRpy( ) experiment: Optional[Experiment] = element( - description="List of experiments associated with this dataset.", + description="Experiment object associated with this dataset.", default=None, tag="experiment", json_schema_extra=dict(), diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py index eb3dbfb..75a0819 100644 --- a/nmrpy/datamodel/core/parameters.py +++ b/nmrpy/datamodel/core/parameters.py @@ -6,14 +6,16 @@ from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature from sdRDM.tools.utils import elem2dict +@forge_signature class Parameters( sdRDM.DataModel, search_mode="unordered", ): - """Container for relevant NMR parameters.""" + """Container for relevant NMR parameters. While not exhaustive, these parameters are commonly relevant for (pre-)processing and analysis of NMR data.""" id: Optional[str] = attr( name="id", @@ -23,28 +25,40 @@ class Parameters( ) acquisition_time: Optional[float] = element( - description="at", + description=( + "Duration of the FID signal acquisition period after the excitation pulse." + " Abrreviated as `at`." + ), default=None, tag="acquisition_time", json_schema_extra=dict(), ) relaxation_time: Optional[float] = element( - description="d1", + description=( + "Inter-scan delay allowing spins to relax back toward equilibrium before" + " the next pulse. Abbreviated as `d1`." + ), default=None, tag="relaxation_time", json_schema_extra=dict(), ) repetition_time: Optional[float] = element( - description="rt = at + d1", + description=( + "Total duration of a single scan cycle, combining acquisition and" + " relaxation delays (`rt = at + d1`)." + ), default=None, tag="repetition_time", json_schema_extra=dict(), ) number_of_transients: List[float] = element( - description="nt", + description=( + "Number of individual FIDs averaged to improve signal-to-noise ratio." + " Abbreviated as `nt`." + ), default_factory=ListPlus, tag="number_of_transients", json_schema_extra=dict( @@ -53,7 +67,10 @@ class Parameters( ) acquisition_times_array: List[float] = element( - description="acqtime = [nt, 2nt, ..., rt x nt]", + description=( + "Array of sampled time points corresponding to the collected FID data" + " (`acqtime = [nt, 2nt, ..., rt x nt]`)." + ), default_factory=ListPlus, tag="acquisition_times_array", json_schema_extra=dict( @@ -62,35 +79,50 @@ class Parameters( ) spectral_width_ppm: Optional[float] = element( - description="sw", + description=( + "Frequency range of the acquired spectrum expressed in parts per million" + " (ppm). Abbreviated as `sw`." + ), default=None, tag="spectral_width_ppm", json_schema_extra=dict(), ) spectral_width_hz: Optional[float] = element( - description="sw_hz", + description=( + "Frequency range of the acquired spectrum expressed in Hertz (Hz)." + " Abbreviated as `sw_hz`." + ), default=None, tag="spectral_width_hz", json_schema_extra=dict(), ) spectrometer_frequency: Optional[float] = element( - description="sfrq", + description=( + "Operating resonance frequency for the observed nucleus, defining the" + " chemical shift reference scale. Abbreviated as `sfrq`." + ), default=None, tag="spectrometer_frequency", json_schema_extra=dict(), ) reference_frequency: Optional[float] = element( - description="reffrq", + description=( + "Calibration frequency used to align and standardize the chemical shift" + " scale. Abbreviated as `reffrq`." + ), default=None, tag="reference_frequency", json_schema_extra=dict(), ) spectral_width_left: Optional[float] = element( - description="sw_left", + description=( + "Offset parameter defining the left boundary of the spectral window" + " relative to the reference frequency. Abbreviated as `sw_left`." + ), default=None, tag="spectral_width_left", json_schema_extra=dict(), diff --git a/nmrpy/datamodel/core/identity.py b/nmrpy/datamodel/core/peak.py similarity index 50% rename from nmrpy/datamodel/core/identity.py rename to nmrpy/datamodel/core/peak.py index 26bf547..bea1897 100644 --- a/nmrpy/datamodel/core/identity.py +++ b/nmrpy/datamodel/core/peak.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional +from typing import Dict, Optional from uuid import uuid4 import sdRDM @@ -10,11 +10,12 @@ from sdRDM.tools.utils import elem2dict -class AssociatedRanges( +@forge_signature +class PeakRange( sdRDM.DataModel, search_mode="unordered", ): - """Small type for attribute 'associated_ranges'""" + """Small type for attribute 'peak_range'""" id: Optional[str] = attr( name="id", @@ -51,11 +52,11 @@ def _parse_raw_xml_data(self): @forge_signature -class Identity( +class Peak( sdRDM.DataModel, search_mode="unordered", ): - """Container mapping one or more peaks to the respective species.""" + """Container for a single peak in the NMR spectrum, associated with a species from an EnzymeML document. To ensure unambiguity of every peak, the `peak_index` field (counted from left to right in the NMR spectrum) is mandatory. Species from EnzymeML are identified by their `species_id` as found in the EnzymeML document.""" id: Optional[str] = attr( name="id", @@ -64,57 +65,42 @@ class Identity( default_factory=lambda: str(uuid4()), ) - name: Optional[str] = element( - description="Descriptive name for the species", - default=None, - tag="name", + peak_index: int = element( + description=( + "Index of the peak in the NMR spectrum, counted from left to right." + ), + tag="peak_index", json_schema_extra=dict(), ) - species_id: Optional[str] = element( - description="ID of an EnzymeML species", + peak_position: Optional[float] = element( + description="Position of the peak in the NMR spectrum.", default=None, - tag="species_id", + tag="peak_position", json_schema_extra=dict(), ) - associated_peaks: List[float] = element( - description="Peaks belonging to the given species", - default_factory=ListPlus, - tag="associated_peaks", - json_schema_extra=dict( - multiple=True, - ), - ) - - associated_ranges: List[AssociatedRanges] = element( - description="Sets of ranges belonging to the given peaks", - default_factory=ListPlus, - tag="associated_ranges", - json_schema_extra=dict( - multiple=True, - ), + peak_range: Optional[PeakRange] = element( + description="Range of the peak, given as a start and end value.", + default_factory=PeakRange, + tag="peak_range", + json_schema_extra=dict(), ) - associated_indices: List[int] = element( + peak_integral: Optional[float] = element( description=( - "Indices in the NMR spectrum (counted from left to right) belonging to the" - " given peaks" - ), - default_factory=ListPlus, - tag="associated_indices", - json_schema_extra=dict( - multiple=True, + "Integral of the peak, resulting from the position and range given." ), + default=None, + tag="peak_integral", + json_schema_extra=dict(), ) - associated_integrals: List[float] = element( - description="Integrals resulting from the given peaks and ranges of a species", - default_factory=ListPlus, - tag="associated_integrals", - json_schema_extra=dict( - multiple=True, - ), + species_id: Optional[str] = element( + description="ID of an EnzymeML species.", + default=None, + tag="species_id", + json_schema_extra=dict(), ) _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @@ -130,33 +116,3 @@ def _parse_raw_xml_data(self): self._raw_xml_data[attr] = elem2dict(value) return self - - def add_to_associated_ranges( - self, - start: Optional[float] = None, - end: Optional[float] = None, - id: Optional[str] = None, - **kwargs, - ) -> AssociatedRanges: - """ - This method adds an object of type 'AssociatedRanges' to attribute associated_ranges - - Args: - id (str): Unique identifier of the 'AssociatedRanges' object. Defaults to 'None'. - start (): . Defaults to None - end (): . Defaults to None - """ - - params = { - "start": start, - "end": end, - } - - if id is not None: - params["id"] = id - - obj = AssociatedRanges(**params) - - self.associated_ranges.append(obj) - - return self.associated_ranges[-1] diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py index c2ec8d5..2d29cb5 100644 --- a/nmrpy/datamodel/core/processingsteps.py +++ b/nmrpy/datamodel/core/processingsteps.py @@ -6,14 +6,16 @@ from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus +from sdRDM.base.utils import forge_signature from sdRDM.tools.utils import elem2dict +@forge_signature class ProcessingSteps( sdRDM.DataModel, search_mode="unordered", ): - """Container for processing steps performed, as well as parameter for them.""" + """Container for processing steps performed, as well as parameter for them. Processing steps that are reflected are apodisation, zero-filling, Fourier transformation, phasing, normalisation, deconvolution, and baseline correction.""" id: Optional[str] = attr( name="id", diff --git a/nmrpy/datamodel/core/term.py b/nmrpy/datamodel/core/term.py deleted file mode 100644 index c28d57e..0000000 --- a/nmrpy/datamodel/core/term.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Dict, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.tools.utils import elem2dict - - -class Term( - sdRDM.DataModel, - search_mode="unordered", -): - """lorem ipsum {Add reference back to term_cv_reference.}""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - name: str = element( - description=( - "The preferred name of the term associated with the given accession number." - ), - tag="name", - json_schema_extra=dict(), - ) - - accession: str = element( - description="Accession number of the term in the controlled vocabulary.", - tag="accession", - json_schema_extra=dict(), - ) - - term_cv_reference: Optional[str] = element( - description=( - "Reference to the `CV.id` of a controlled vocabulary that has been defined" - " for this dataset." - ), - default=None, - tag="term_cv_reference", - json_schema_extra=dict(), - ) - - value: Optional[str] = element( - description="Value of the term, if applicable.", - default=None, - tag="value", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/nmrpy/datamodel/schemes/datamodel_schema.md index a037ba8..9d10e79 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/nmrpy/datamodel/schemes/datamodel_schema.md @@ -2,10 +2,9 @@ classDiagram NMRpy *-- Experiment Experiment *-- FIDObject - Experiment *-- FIDArray FIDObject *-- Parameters FIDObject *-- ProcessingSteps - FIDObject *-- Identity + FIDObject *-- Peak class NMRpy { +datetime datetime_created* @@ -15,8 +14,7 @@ classDiagram class Experiment { +string name* - +FIDObject[0..*] fid - +FIDArray fid_array + +FIDObject[0..*] fid_array } class FIDObject { @@ -24,7 +22,7 @@ classDiagram +string, float[0..*] processed_data +Parameters nmr_parameters +ProcessingSteps processing_steps - +Identity[0..*] peak_identities + +Peak[0..*] peaks } class Parameters { @@ -56,30 +54,12 @@ classDiagram +boolean is_baseline_corrected } - class Identity { - +string name + class Peak { + +int peak_index* + +float peak_position + +PeakRange peak_range + +float peak_integral +string species_id - +float[0..*] associated_peaks - +AssociatedRanges[0..*] associated_ranges - +int[0..*] associated_indices - +float[0..*] associated_integrals - } - - class FIDArray { - +string[0..*] fids* - } - - class CV { - +string vocabulary* - +string version* - +URL url* - } - - class Term { - +string name* - +string accession* - +string term_cv_reference - +string value } class FileFormats { diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 4a5ee19..fd5ea80 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1,36 +1,38 @@ -import nmrpy.data_objects -import logging, traceback -import numpy -from matplotlib import pyplot as plt +import asyncio +import logging +import traceback from datetime import datetime -from matplotlib.figure import Figure -from matplotlib.collections import PolyCollection - -from matplotlib.patches import Rectangle -from matplotlib.transforms import blended_transform_factory -from matplotlib.widgets import Cursor -from matplotlib.backend_bases import NavigationToolbar2, Event +import numpy +import sympy as sp +from IPython.display import display from ipywidgets import ( - FloatText, - Output, - VBox, - Dropdown, - Label, + HTML, Button, Combobox, + Dropdown, + FloatText, + Label, + Output, Text, - HTML, + VBox, ) -from IPython.display import display -import asyncio +from matplotlib import pyplot as plt +from matplotlib.backend_bases import Event, NavigationToolbar2 +from matplotlib.collections import PolyCollection +from matplotlib.figure import Figure +from matplotlib.patches import Rectangle +from matplotlib.transforms import blended_transform_factory +from matplotlib.widgets import Cursor -import sympy as sp +import nmrpy.data_objects from .utils import ( - get_species_from_enzymeml, - get_ordered_list_of_species_names, + format_species_string, get_initial_concentration_by_species_id, + get_ordered_list_of_species_names, + get_species_from_enzymeml, + get_species_name_by_id, ) @@ -169,6 +171,7 @@ def _plot_deconv( summed_peak_colour="r", residual_colour="g", lw=1, + show_labels=False, ): # validation takes place in self._deconv_generator ( @@ -190,16 +193,17 @@ def _plot_deconv( for n in range(len(peakshapes)): peak = peakshapes[n] ax.plot(ppm, peak, "-", color=peak_colour, lw=lw) - ax.text( - ppm[numpy.argmax(peak)], - label_pad + peak.max(), - ( - get_ordered_list_of_species_names(fid)[n] - if fid.fid_object.peak_identities - else str(n) - ), - ha="center", - ) + if (fid._flags["assigned"]) and (show_labels): + ax.text( + ppm[numpy.argmax(peak)], + label_pad + peak.max(), + ( + get_ordered_list_of_species_names(fid)[n] + if fid.fid_object.peaks + else str(n) + ), + ha="center", + ) ax.invert_xaxis() ax.set_xlim([upper_ppm, lower_ppm]) ax.grid() @@ -913,7 +917,7 @@ def press(self, event): try: peakline = self.lsm.peaklines.pop(old_peak) peakline.remove() - except: + except Exception: with self.out: print("Could not remove peakline") self.canvas.draw() @@ -1374,18 +1378,13 @@ class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): pass -class IdentityAssigner: +class PeakAssigner: def __init__(self, fid, title): self.fid = fid self.title = title self.available_peaks = [str(peak) for peak in self.fid.peaks] - self.available_species = self.fid.enzymeml_species - self.species_names = [name for name, _ in self.available_species] self.selected_values = {} - if fid.peaks is [] or fid.peaks is None: - raise RuntimeError( - f"`fid.peaks` is required but still empty. Please either assign them manually or using the `peakpicker` method." - ) + self._validate_and_update_fid(self.fid) # Create the label widget for the title title_label = Label(value=title) @@ -1400,7 +1399,9 @@ def __init__(self, fid, title): # Create the dropdown widget for the species species_dropdown = Dropdown( - options=self.species_names, + options=[ + format_species_string(species) for species in self.fid.enzymeml_species + ], description="Select a species:", layout={"width": "max-content"}, style={"description_width": "initial"}, @@ -1443,49 +1444,15 @@ def on_save_button_click(b): print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") - # Check if a given species already exists as an - # identity in the FID data model. If it does not, - # create a new Identity for the species and add name - # and associated peaks to it. If it already exists, - # check if the currentvalue is already in the - # associated peaks and if not, append it. - identity_exists = False - for identity in self.fid.fid_object.peak_identities: - if identity.name == key: - for peak in value: - if peak not in identity.associated_peaks: - identity.associated_peaks.append(peak) - peak_index = list(self.fid.peaks).index(peak) - # associated_range = list( - # list(self.fid.ranges)[peak_index] - # ) - # identity.add_to_associated_ranges( - # start=float(associated_range[0]), - # end=float(associated_range[1]), - # ) - identity.add_to_associated_indices(peak_index) - identity_exists = True - if not identity_exists: - peak_index = list(self.fid.peaks).index(value) - # associated_range = list(list(self.fid.ranges)[peak_index]) - self.fid.fid_object.add_to_peak_identities( - name=key, - species_id=next( - species_id - for name, species_id in self.available_species - if name == key - ), - associated_peaks=value, - # associated_ranges=[ - # { - # "start": float(associated_range[0]), - # "end": float(associated_range[1]), - # } - # ], - associated_indices=[peak_index], - ) - self.fid.identities = get_ordered_list_of_species_names(self.fid) - self.fid._flags["assigned"] = True + # Iterate over every FID, find the peak in the dict + # of selected values, and assign the species ID to + # the peak object + for peak in self.fid.fid_object.peaks: + if peak.peak_position not in value: + continue + peak.species_id = key.split(" ")[0] + self.fid.species[peak.peak_index] = peak.species_id + self.fid._flags["assigned"] = True reset_button.disabled = False # Attach the function to the save button's click event @@ -1495,10 +1462,11 @@ def on_save_button_click(b): def on_reset_button_click(b): with selection_output: selection_output.clear_output(wait=True) - # Clear the Identities in the data model and reset the + # Clear the Peaks in the data model and reset the # selection dict print("\nCleared selections!") - fid.fid_object.peak_identities = [] + for peak_object in self.fid.fid_object.peaks: + peak_object.species_id = None self.selected_values = {} # Refill the list of available peaks as before, # re-enable the peak dropdown, and disable the reset @@ -1507,7 +1475,6 @@ def on_reset_button_click(b): peak_dropdown.options = self.available_peaks peak_dropdown.disabled = False save_button.disabled = False - self.fid.identities = None self.fid._flags["assigned"] = False reset_button.disabled = True @@ -1529,31 +1496,73 @@ def on_reset_button_click(b): # Display the container display(container) + def _validate_and_update_fid(self, fid): + """ + Ensures fid has valid peaks and ranges, + initializes its .species array, and + syncs up with the data model if needed. + """ + if fid.peaks is [] or fid.peaks is None: + raise RuntimeError( + "`fid.peaks` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + + if fid.ranges is [] or fid.ranges is None: + raise RuntimeError( + "`fid.ranges` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + + if len(fid.peaks) != len(fid.ranges): + raise RuntimeError( + "To assign peaks, `fid.peaks` and `fid.ranges` must be of the same length." + ) + + # Prepare a numpy array to store species + fid.species = numpy.empty(len(fid.peaks), dtype=object) + + # Sync data model: update existing peak objects or create new ones + for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): + if i < len(fid.fid_object.peaks): + # Peak already exists, update it + fid.fid_object.peaks[i].peak_position = float(peak_val) + fid.fid_object.peaks[i].peak_range = { + "start": float(range_val[0]), + "end": float(range_val[1]), + } + else: + # Create a new peak in the data model + fid.fid_object.add_to_peaks( + peak_index=i, + peak_position=float(peak_val), + peak_range={ + "start": float(range_val[0]), + "end": float(range_val[1]), + }, + ) + -class IdentityRangeAssigner: +class PeakRangeAssigner: """Wow, such documentation. for fid in [self.fids[i] for i in self.fid_number]: """ - def __init__(self, fid_array): + def __init__(self, fid_array, index_list): self.fid_array = fid_array - self.fids = fid_array.get_fids() + self.fids = self._build_fids(index_list) self.available_peaks = [] - self.available_species = [ - (species.name, species.id) - for species in get_species_from_enzymeml(self.fid_array.enzymeml_document) - ] - self.species_names = [name for name, _ in self.available_species] + self.available_species = get_species_from_enzymeml( + self.fid_array.enzymeml_document + ) self.selected_fid = None self.selected_values = {} + # Validate and/or update each Fid for fid in self.fids: - if fid.peaks is [] or fid.peaks is None: - raise RuntimeError( - f"`fid.peaks` is required but still empty. Please either assign them manually or using the `peakpicker` method." - ) + self._validate_and_update_fid(fid) # Create the label widget for the title - title_label = Label(value="Assign identities for all FIDs") + title_label = Label(value="Assign peaks for all FIDs") # Create the combobox for the selection of the FID ID combobox = Combobox( @@ -1610,7 +1619,9 @@ def on_combobox_change(event): # Define a method to handle the peak dropdown's change event def on_peak_dropdown_change(event): if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = self.species_names + species_dropdown.options = [ + format_species_string(species) for species in self.available_species + ] species_dropdown.disabled = False # Attach the method to the dropdown's change event @@ -1648,52 +1659,16 @@ def on_save_button_click(b): print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") - # Iterate over every FID, check if a given species - # already exists as an identity in the FID data - # model. If it does not, create a new Identity for - # the species and add name and associated peaks to - # it. If it already exists, check if the current - # value is already in the associated peaks and if - # not, append it. + # Iterate over every FID, find the peak in the dict + # of selected values, and assign the species ID to + # the peak object for fid in self.fids: - if fid._flags["assigned"]: - continue - identity_exists = False - for identity in fid.fid_object.peak_identities: - if identity.name == key: - for peak in value: - if peak not in identity.associated_peaks: - identity.associated_peaks.append(peak) - peak_index = list(fid.peaks).index(peak) - # associated_range = list( - # list(fid.ranges)[peak_index] - # ) - # identity.add_to_associated_ranges( - # start=float(associated_range[0]), - # end=float(associated_range[1]), - # ) - identity.add_to_associated_indices(peak_index) - identity_exists = True - if not identity_exists: - peak_index = list(fid.peaks).index(value) - # associated_range = list(list(fid.ranges)[peak_index]) - fid.fid_object.add_to_peak_identities( - name=key, - species_id=next( - species_id - for name, species_id in self.available_species - if name == key - ), - associated_peaks=value, - # associated_ranges=[ - # { - # "start": float(associated_range[0]), - # "end": float(associated_range[1]), - # } - # ], - associated_indices=[peak_index], - ) - fid.identities = get_ordered_list_of_species_names(fid) + for peak in fid.fid_object.peaks: + if peak.peak_position not in value: + continue + peak.species_id = key.split(" ")[0] + fid.species[peak.peak_index] = peak.species_id + fid._flags["assigned"] = True reset_button.disabled = False # Attach the function to the save button's click event @@ -1703,12 +1678,13 @@ def on_save_button_click(b): def on_reset_button_click(b): with selection_output: selection_output.clear_output(wait=True) - # Iterate over every FID and clear the Identities in the + # Iterate over every FID and clear the Peaks in the # respective data model and reset the selection dict print("\nCleared selections!") for fid in self.fids: - fid.fid_object.peak_identities = [] - fid.identities = None + fid._flags["assigned"] = False + for peak_object in fid.fid_object.peaks: + peak_object.species_id = None self.selected_values = {} # Refill the list of available peaks as before, # re-enable the peak dropdown, and disable the reset @@ -1737,6 +1713,385 @@ def on_reset_button_click(b): # Display the container display(container) + def _build_fids(self, index_list) -> list: + """ + Returns a list of FIDs from fid_array: + - If index_list is empty, returns all FIDs. + - Otherwise, does bounds checks and builds IDs with correct formatting. + """ + if not index_list: + # No specific indices, grab all FIDs + return self.fid_array.get_fids() + + # 1) Basic bounds check + total_fids = len(self.fid_array.get_fids()) + for i in index_list: + if i >= total_fids: + raise IndexError( + f"Index {i} is out of bounds (there are {total_fids} FIDs)." + ) + + # 2) Determine how many digits for the ID + n_digits = len(str(total_fids - 1)) # e.g., 2 if up to 99, 3 if up to 999 + if n_digits == 1: + fid_format = "fid{}" + else: + fid_format = f"fid{{:0{n_digits}d}}" + + # 3) Build the list of FIDs + fids = [] + for i in index_list: + fid_id = fid_format.format(i) + fids.append(self.fid_array.get_fid(fid_id)) + + return fids + + def _validate_and_update_fid(self, fid): + """ + Ensures fid has valid peaks and ranges, + initializes its .species array, and + syncs up with the data model if needed. + """ + if fid.peaks is [] or fid.peaks is None: + raise RuntimeError( + "`fid.peaks` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + + if fid.ranges is [] or fid.ranges is None: + raise RuntimeError( + "`fid.ranges` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + + if len(fid.peaks) != len(fid.ranges): + raise RuntimeError( + "To assign peaks, `fid.peaks` and `fid.ranges` must be of the same length." + ) + + # Prepare a numpy array to store species + fid.species = numpy.empty(len(fid.peaks), dtype=object) + + # Sync data model: update existing peak objects or create new ones + for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): + if i < len(fid.fid_object.peaks): + # Peak already exists, update it + fid.fid_object.peaks[i].peak_position = float(peak_val) + fid.fid_object.peaks[i].peak_range = { + "start": float(range_val[0]), + "end": float(range_val[1]), + } + else: + # Create a new peak in the data model + fid.fid_object.add_to_peaks( + peak_index=i, + peak_position=float(peak_val), + peak_range={ + "start": float(range_val[0]), + "end": float(range_val[1]), + }, + ) + + +class PeakIndexer: + """Interactive widget for assigning species names to peaks in a FID.""" + + def __init__(self, fid, species_list, title="Assign peaks"): + self.fid = fid + self.title = title + self.species_list = species_list + self.selected_values = {} + + # Validate and initialize + self._validate_fid(self.fid) + self.available_peaks = [str(peak) for peak in self.fid.peaks] + + # Create and layout widgets + self._create_widgets() + self._setup_callbacks() + self._layout_widgets() + + def _create_widgets(self): + """Create all widget components""" + self.title_label = Label(value=self.title) + + self.peak_dropdown = Dropdown( + options=self.available_peaks, + description="Select a peak:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.species_dropdown = Dropdown( + options=self.species_list, + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.save_button = Button( + description="Save selection", + icon="file-arrow-down", + ) + + self.reset_button = Button(description="Reset selection", disabled=True) + + self.selection_output = Output() + + def _setup_callbacks(self): + """Set up all widget callbacks""" + self.save_button.on_click(self._handle_save) + self.reset_button.on_click(self._handle_reset) + + def _layout_widgets(self): + """Create widget layout and display""" + self.container = VBox( + [ + self.title_label, + self.peak_dropdown, + self.species_dropdown, + self.save_button, + self.reset_button, + self.selection_output, + ] + ) + display(self.container) + + def _handle_save(self, b): + """Handle save button click""" + with self.selection_output: + self.selection_output.clear_output(wait=True) + + species = self.species_dropdown.value + peak_value = float(self.peak_dropdown.value) + + # Update selected values + if species not in self.selected_values: + self.selected_values[species] = [] + self.selected_values[species].append(peak_value) + + # Update available peaks + self.available_peaks.remove(str(peak_value)) + self.peak_dropdown.options = self.available_peaks + + if not self.available_peaks: + self.peak_dropdown.disabled = True + self.save_button.disabled = True + + # Update species array in FID + peak_idx = self.fid.peaks.tolist().index(peak_value) + self.fid.species[peak_idx] = species + + self._display_selections() + self.reset_button.disabled = False + + def _handle_reset(self, b): + """Handle reset button click""" + with self.selection_output: + self.selection_output.clear_output(wait=True) + print("\nCleared selections!") + + # Reset state + self.fid.species = numpy.empty(len(self.fid.peaks), dtype=object) + self.selected_values = {} + self.available_peaks = [str(peak) for peak in self.fid.peaks] + + # Reset widgets + self.peak_dropdown.options = self.available_peaks + self.peak_dropdown.disabled = False + self.save_button.disabled = False + self.reset_button.disabled = True + + def _display_selections(self): + """Display current selections""" + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") + + def _validate_fid(self, fid): + """Validates FID has peaks and initializes species array""" + if fid.peaks is None or len(fid.peaks) == 0: + raise RuntimeError( + "`fid.peaks` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + + # Initialize empty species array + fid.species = numpy.empty(len(fid.peaks), dtype=object) + + +class PeakRangeIndexer: + """Interactive widget for assigning species names to peaks across all FIDs in a FidArray.""" + + def __init__(self, fid_array, species_list, index_list=None): + self.fid_array = fid_array + self.fids = self._build_fids(index_list) + self.species_list = species_list + self.available_peaks = [] + self.selected_fid = None + self.selected_values = {} + + # Validate and/or update each Fid + for fid in self.fids: + self._validate_and_update_fid(fid) + + # Create and layout widgets + self._create_widgets() + self._setup_callbacks() + self._layout_widgets() + + def _build_fids(self, index_list): + """Build list of FIDs to work with based on optional index_list.""" + if index_list is None: + return self.fid_array.get_fids() + return [self.fid_array.get_fids()[i] for i in index_list] + + def _validate_and_update_fid(self, fid): + """Validate FID has peaks and initialize species array if needed.""" + if fid.peaks is None or len(fid.peaks) == 0: + raise RuntimeError( + f"FID {fid.id} requires peaks but they are empty. " + "Please assign them manually or with the `peakpicker` method." + ) + if fid.species is None: + fid.species = numpy.empty(len(fid.peaks), dtype=object) + + def _create_widgets(self): + """Create all widget components""" + self.title_label = Label(value="Assign peaks for all FIDs") + + self.combobox = Combobox( + options=[fid.id for fid in self.fids], + description="Select FID to base entire array on:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.peak_dropdown = Dropdown( + options=[], + description="Select a peak:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + self.species_dropdown = Dropdown( + options=self.species_list, # Now using the species_list + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + self.save_button = Button( + description="Save selection", + icon="file-arrow-down", + disabled=True, + ) + + self.reset_button = Button( + description="Reset selection", + disabled=True, + ) + + self.selection_output = Output() + + def _setup_callbacks(self): + """Set up all widget callbacks""" + self.combobox.observe(self._handle_fid_selection, names="value") + self.save_button.on_click(self._handle_save) + self.reset_button.on_click(self._handle_reset) + + def _layout_widgets(self): + """Create widget layout and display""" + self.container = VBox( + [ + self.title_label, + self.combobox, + self.peak_dropdown, + self.species_dropdown, + self.save_button, + self.reset_button, + self.selection_output, + ] + ) + display(self.container) + + def _handle_fid_selection(self, change): + """Handle FID selection from combobox""" + if change["type"] == "change" and change["name"] == "value": + # Find selected FID + self.selected_fid = next( + fid for fid in self.fids if fid.id == change["new"] + ) + + # Update available peaks + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] + self.peak_dropdown.options = self.available_peaks + + # Enable dropdowns + self.peak_dropdown.disabled = False + self.species_dropdown.disabled = False + self.save_button.disabled = False + + def _handle_save(self, b): + """Handle save button click""" + with self.selection_output: + self.selection_output.clear_output(wait=True) + + species = self.species_dropdown.value + peak_value = float(self.peak_dropdown.value) + + # Update selected values + if species not in self.selected_values: + self.selected_values[species] = [] + self.selected_values[species].append(peak_value) + + # Update available peaks + self.available_peaks.remove(str(peak_value)) + self.peak_dropdown.options = self.available_peaks + + if not self.available_peaks: + self.peak_dropdown.disabled = True + self.save_button.disabled = True + + # Update species array in all FIDs + for fid in self.fids: + try: + peak_idx = fid.peaks.tolist().index(peak_value) + fid.species[peak_idx] = species + except ValueError: + continue # Skip if peak not found in this FID + + self._display_selections() + self.reset_button.disabled = False + + def _handle_reset(self, b): + """Handle reset button click""" + with self.selection_output: + self.selection_output.clear_output(wait=True) + print("\nCleared selections!") + + # Reset state for all FIDs + for fid in self.fids: + fid.species = numpy.empty(len(fid.peaks), dtype=object) + + self.selected_values = {} + + if self.selected_fid: + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] + self.peak_dropdown.options = self.available_peaks + self.peak_dropdown.disabled = False + self.save_button.disabled = False + + self.reset_button.disabled = True + + def _display_selections(self): + """Display current selections""" + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") + class DataTraceSelector: """ @@ -1797,7 +2152,7 @@ def assign(self): for fid, indx in trace.items(): try: integrals[fid] = numpy.argmin(abs(decon_peaks[fid] - indx)) - except: + except Exception: integrals[fid] = None trace_dict[t] = integrals last_fid = len(self.fid_array.get_fids()) - 1 diff --git a/nmrpy/utils.py b/nmrpy/utils.py index ddbcd97..0968303 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -43,12 +43,11 @@ def get_ordered_list_of_species_names(fid: "Fid") -> list: list: List of species names in desecending order by peak index. """ list_of_tuples = [] - # Iterate over the identies and then over their associated peaks of - # a given FID object and append a tuple of the identity's name and + # Iterate over the peak objects and then over their associated peaks + # of a given FID object and append a tuple of the identity's name and # corresponding peak (one tuple per peak) to a list of tuples. - for identity in fid.fid_object.peak_identities: - for peak in identity.associated_peaks: - list_of_tuples.append((identity.name, peak)) + for peak_object in fid.fid_object.peaks: + list_of_tuples.append((peak_object.species_id, peak_object.peak_position)) # Use the `sorted` function with a custom key to sort the list of # tuples by the second element of each tuple (the peak) from highest # value to lowest (reverse=True). @@ -99,6 +98,39 @@ def get_species_id_by_name( return species_id +def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) -> str: + """Get the name of a species in an EnzymeML document by its `species_id`. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML data model. + species_id (str): The `species_id` of the species for which to get the name. + + Returns: + str: The name of the species. + """ + species_name = None + for species in get_species_from_enzymeml(enzymeml_document): + if species.id == species_id: + species_name = species.name + return species_name + + +def format_species_string(enzymeml_species) -> str: + """Format a species object from an EnzymeML document as a string + for display in widgets. + + Args: + enzymeml_species: A species object from an EnzymeML document. + + Returns: + str: The formatted species string. + """ + if enzymeml_species.name: + return f"{enzymeml_species.id} ({enzymeml_species.name})" + else: + return f"{enzymeml_species.id}" + + def create_enzymeml( fid_array: "FidArray", enzymeml_document: EnzymeMLDocument ) -> EnzymeMLDocument: diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index ab67e6e..2edbf70 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -1,14 +1,12 @@ # NMRpy data model -Python object model specifications based on the [software-driven-rdm](https://github.com/JR-1991/software-driven-rdm) Python library. - +Python object model specifications based on the [software-driven-rdm](https://github.com/JR-1991/software-driven-rdm) Python library. The NMRpy data model is designed to store both raw and processed NMR data, as well as the parameters used for processing. As NMRpy is primarily used for the analysis of time-course data, often for determining (enzyme) kinetics, the data model is designed for maximum compatibility with the [EnzymeML](https://enzymeml.github.io/services/) standard, which provides a standardised data exchange format for kinetics data from biocatalysis, enzymology, and beyond. Therefore, relevant fields that are mandatory in the EnzymeML standard are also mandatory in this NMRpy data model. ## Core objects - ### NMRpy -Root element of the NMRpy data model. +Root element of the NMRpy data model. Following the specifications of the EnzymeML standard, the `datetime_created` field is mandatory. Since each NMRpy instance is meant to hold a single experiment (e.g., one time-course), the data model reflects this by only allowing a single `experiment` object. - __datetime_created__ - Type: datetime @@ -18,29 +16,23 @@ Root element of the NMRpy data model. - Description: Date and time this dataset has last been modified. - experiment - Type: [Experiment](#experiment) - - Description: List of experiments associated with this dataset. - + - Description: Experiment object associated with this dataset. ### Experiment -Rohdaten -> Zwischenschritte nur nennen + interessante Parameter -> Endergebnis; Peaklist + Rangelist; rapidly pulsed (if then +calibration factor) vs fully relaxed -Also preparation of EnzymeML doc https://github.com/EnzymeML/enzymeml-specifications/@AbstractSpecies, https://github.com/EnzymeML/enzymeml-specifications/@Protein, https://github.com/EnzymeML/enzymeml-specifications/@Reactant +Container for a single NMR experiment (e.g., one time-course), containing one or more FID objects in the `fid_array` field. Following the specifications of the EnzymeML standard, the `name` field is mandatory. - __name__ - Type: string - Description: A descriptive name for the overarching experiment. -- fid +- fid_array - Type: [FIDObject](#fidobject) - - Description: A single NMR spectrum. + - Description: List of individual FidObjects. - Multiple: True -- fid_array - - Type: [FIDArray](#fidarray) - - Description: Multiple NMR spectra to be processed together. - ### FIDObject -Container for a single NMR spectrum. +Container for a single NMR spectrum, containing both raw data with relevant instrument parameters and processed data with processing steps applied. The `raw_data` field contains the complex spectral array as unaltered free induction decay from the NMR instrument. Every processing step is documented in the `processing_steps` field, together with any relevant parameters to reproduce the processing. Therefore, and to minimize redundancy, only the current state of the data is stored in the `processed_data` field. The `peaks` field is a list of `Peak` objects, each representing one single peak in the NMR spectrum. - raw_data - Type: string @@ -56,53 +48,51 @@ Container for a single NMR spectrum. - processing_steps - Type: [ProcessingSteps](#processingsteps) - Description: Contains the processing steps performed, as well as the parameters used for them. -- peak_identities - - Type: [Identity](#identity) - - Description: Container holding and mapping integrals resulting from peaks and their ranges to EnzymeML species. +- peaks + - Type: [Peak](#peak) + - Description: Container holding the peaks found in the NMR spectrum associated with species from an EnzymeML document. - Multiple: True - ### Parameters -Container for relevant NMR parameters. +Container for relevant NMR parameters. While not exhaustive, these parameters are commonly relevant for (pre-)processing and analysis of NMR data. - acquisition_time - Type: float - - Description: at + - Description: Duration of the FID signal acquisition period after the excitation pulse. Abrreviated as `at`. - relaxation_time - Type: float - - Description: d1 + - Description: Inter-scan delay allowing spins to relax back toward equilibrium before the next pulse. Abbreviated as `d1`. - repetition_time - Type: float - - Description: rt = at + d1 + - Description: Total duration of a single scan cycle, combining acquisition and relaxation delays (`rt = at + d1`). - number_of_transients - Type: float - - Description: nt + - Description: Number of individual FIDs averaged to improve signal-to-noise ratio. Abbreviated as `nt`. - Multiple: True - acquisition_times_array - Type: float - - Description: acqtime = [nt, 2nt, ..., rt x nt] + - Description: Array of sampled time points corresponding to the collected FID data (`acqtime = [nt, 2nt, ..., rt x nt]`). - Multiple: True - spectral_width_ppm - Type: float - - Description: sw + - Description: Frequency range of the acquired spectrum expressed in parts per million (ppm). Abbreviated as `sw`. - spectral_width_hz - Type: float - - Description: sw_hz + - Description: Frequency range of the acquired spectrum expressed in Hertz (Hz). Abbreviated as `sw_hz`. - spectrometer_frequency - Type: float - - Description: sfrq + - Description: Operating resonance frequency for the observed nucleus, defining the chemical shift reference scale. Abbreviated as `sfrq`. - reference_frequency - Type: float - - Description: reffrq + - Description: Calibration frequency used to align and standardize the chemical shift scale. Abbreviated as `reffrq`. - spectral_width_left - Type: float - - Description: sw_left - + - Description: Offset parameter defining the left boundary of the spectral window relative to the reference frequency. Abbreviated as `sw_left`. ### ProcessingSteps -Container for processing steps performed, as well as parameter for them. +Container for processing steps performed, as well as parameter for them. Processing steps that are reflected are apodisation, zero-filling, Fourier transformation, phasing, normalisation, deconvolution, and baseline correction. - is_apodised - Type: boolean @@ -151,88 +141,31 @@ Container for processing steps performed, as well as parameter for them. - Description: Whether or not Baseline correction was performed. - Default: False +### Peak -### Identity - -Container mapping one or more peaks to the respective species. +Container for a single peak in the NMR spectrum, associated with a species from an EnzymeML document. To ensure unambiguity of every peak, the `peak_index` field (counted from left to right in the NMR spectrum) is mandatory. Species from EnzymeML are identified by their `species_id` as found in the EnzymeML document. -- name - - Type: string - - Description: Descriptive name for the species -- species_id - - Type: string - - Description: ID of an EnzymeML species -- associated_peaks +- __peak_index__ + - Type: int + - Description: Index of the peak in the NMR spectrum, counted from left to right. +- peak_position - Type: float - - Description: Peaks belonging to the given species - - Multiple: True -- associated_ranges + - Description: Position of the peak in the NMR spectrum. +- peak_range - Type: {start: float, end: float} - - Description: Sets of ranges belonging to the given peaks - - Multiple: True -- associated_indices - - Type: int - - Description: Indices in the NMR spectrum (counted from left to right) belonging to the given peaks - - Multiple: True -- associated_integrals + - Description: Range of the peak, given as a start and end value. +- peak_integral - Type: float - - Description: Integrals resulting from the given peaks and ranges of a species - - Multiple: True - - -### FIDArray - -Container for processing of multiple spectra. Must reference the respective `FIDObject` by `id`. {Add reference back. Setup time for experiment, Default 0.5} - -- __fids__ - - Type: string - - Description: List of `FIDObject.id` belonging to this array. - - Multiple: True - - -## Utility objects - - -### CV - -lorem ipsum - -- __vocabulary__ - - Type: string - - Description: Name of the CV used. -- __version__ - - Type: string - - Description: Version of the CV used. -- __url__ - - Type: URL - - Description: URL pointing to the CV used. - - -### Term - -lorem ipsum {Add reference back to term_cv_reference.} - -- __name__ - - Type: string - - Description: The preferred name of the term associated with the given accession number. -- __accession__ - - Type: string - - Description: Accession number of the term in the controlled vocabulary. -- term_cv_reference - - Type: string - - Description: Reference to the `CV.id` of a controlled vocabulary that has been defined for this dataset. -- value + - Description: Integral of the peak, resulting from the position and range given. +- species_id - Type: string - - Description: Value of the term, if applicable. - - + - Description: ID of an EnzymeML species. ## Enumerations - ### FileFormats -Enumeration containing the file formats accepted by the NMRpy library. +Enumeration containing the file formats accepted by the NMRpy library. `NONE` corresponds either to a pickled .nmrpy file or a pre-loaded nmrglue array. ```python VARIAN = "varian" From b7bacb069edabb50480b313015b40e9894015617 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Thu, 16 Jan 2025 09:49:52 +0100 Subject: [PATCH 24/54] Delete .vscode directory --- .vscode/settings.json | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index d99f2f3..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter" - }, - "python.formatting.provider": "none" -} \ No newline at end of file From cb07ff9690642002e8959dbbc162f4ef09e7a56f Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Jan 2025 15:28:03 +0100 Subject: [PATCH 25/54] Update from sdRDM to md-models - Remove old sdRDM-based library - Add new md-models-based library - Consolidate PeakAssigner and IndentityAssigner into PeakAssigner - Consolidate PeakRangeAssigner and IdentityRangeAssigner into PeakRangeAssigner - Update Markdown specification for data model --- nmrpy/data_objects.py | 62 +- nmrpy/datamodel/__init__.py | 12 - nmrpy/datamodel/core/__init__.py | 19 - nmrpy/datamodel/core/experiment.py | 98 --- nmrpy/datamodel/core/fidobject.py | 132 --- nmrpy/datamodel/core/fileformats.py | 8 - nmrpy/datamodel/core/nmrpy.py | 62 -- nmrpy/datamodel/core/parameters.py | 143 --- nmrpy/datamodel/core/peak.py | 118 --- nmrpy/datamodel/core/processingsteps.py | 130 --- nmrpy/nmrpy_model.py | 824 ++++++++++++++++++ nmrpy/plotting.py | 744 ++++++---------- nmrpy/utils.py | 4 +- .../datamodel_schema.md | 12 +- specifications/nmrpy.md | 23 +- 15 files changed, 1129 insertions(+), 1262 deletions(-) delete mode 100644 nmrpy/datamodel/__init__.py delete mode 100644 nmrpy/datamodel/core/__init__.py delete mode 100644 nmrpy/datamodel/core/experiment.py delete mode 100644 nmrpy/datamodel/core/fidobject.py delete mode 100644 nmrpy/datamodel/core/fileformats.py delete mode 100644 nmrpy/datamodel/core/nmrpy.py delete mode 100644 nmrpy/datamodel/core/parameters.py delete mode 100644 nmrpy/datamodel/core/peak.py delete mode 100644 nmrpy/datamodel/core/processingsteps.py create mode 100644 nmrpy/nmrpy_model.py rename {nmrpy/datamodel/schemes => specifications}/datamodel_schema.md (91%) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index b496733..691ab03 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -11,20 +11,20 @@ import os import pickle from ipywidgets import SelectMultiple -from sdRDM import DataModel -from sdRDM.base.importedmodules import ImportedModules -from nmrpy.datamodel.core import ( +from nmrpy.nmrpy_model import ( NMRpy, Experiment, FIDObject, Parameters, ProcessingSteps, Peak, + PeakRange, ) from nmrpy.utils import create_enzymeml import pyenzyme as pe import pyenzyme.equations as peq from pyenzyme.model import EnzymeMLDocument +from typing import List, Union class Base: @@ -258,7 +258,7 @@ def fid_object(self): @fid_object.setter def fid_object(self, fid_object): - if isinstance(fid_object, DataModel): + if isinstance(fid_object, FIDObject): self.__fid_object = fid_object @property @@ -1397,7 +1397,7 @@ def plot_deconv(self, **kwargs): setattr(self, plt.id, plt) pyplot.show() - def assign_peaks(self): + def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): """ Instantiate a species-assignment GUI widget. Select peaks from dropdown menu containing :attr:`~nmrpy.data_objects.Fid.peaks`. @@ -1405,17 +1405,10 @@ def assign_peaks(self): containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ - # raise NotImplementedError - widget_title = "Assign species for {}".format(self.id) - self._assigner_widget = PeakAssigner(fid=self, title=widget_title) - - def index_peaks(self, list_of_species: list[str]): - """ - Index peaks based on species. - """ - widget_title = "Index peaks for {}".format(self.id) - self._indexer_widget = PeakIndexer( - fid=self, species_list=list_of_species, title=widget_title + self._assigner_widget = PeakAssigner( + fid=self, + species_list=species_list, + title="Assign species for {}".format(self.id), ) def clear_peaks(self): @@ -1450,13 +1443,15 @@ def __init__(self): @property def data_model(self): + for fid in self.get_fids(): + self.__data_model.experiment.fid_array.append(fid.fid_object) return self.__data_model @data_model.setter - def data_model(self, data_model: DataModel): - if not isinstance(data_model, DataModel): + def data_model(self, data_model: NMRpy): + if not isinstance(data_model, NMRpy): raise AttributeError( - f"Parameter `data_model` has to be of type `sdrdm.DataModel`, got {type(data_model)} instead." + f"Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead." ) self.__data_model = data_model self.__data_model.datetime_modified = str(datetime.now()) @@ -2406,7 +2401,7 @@ def apply_to_enzymeml( enzymeml_document = self.enzymeml_document return create_enzymeml(self, enzymeml_document) - def assign_peaks(self, index_list=None): + def assign_peaks(self, species_list=None, index_list=None): """ Instantiate a peak-assignment GUI widget. Select a FID by its ID from the combobox. Select peaks from dropdown menu @@ -2415,32 +2410,7 @@ def assign_peaks(self, index_list=None): containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ - - self._assigner_widget = PeakRangeAssigner(fid_array=self, index_list=index_list) - - def index_peaks(self, species_list, index_list=None): - """ - Instantiate a peak-indexing GUI widget. Select a FID by ID to base the peak - assignments on, then assign species names to peaks. The assignments will be - applied to all FIDs in the array. - - Parameters - ---------- - species_list : list - List of species names to assign to peaks - index_list : list, optional - List of indices to select FIDs from the array. If None, all FIDs - will be included. - - Example - ------- - >>> species = ['ATP', 'ADP', 'Pi', 'Glucose'] - >>> fid_array.index_peaks(species) # Use all FIDs - >>> fid_array.index_peaks(species, [0, 1, 2]) # Use only first three FIDs - """ - from nmrpy.plotting import PeakRangeIndexer - - self._indexer_widget = PeakRangeIndexer( + self._assigner_widget = PeakRangeAssigner( fid_array=self, species_list=species_list, index_list=index_list ) diff --git a/nmrpy/datamodel/__init__.py b/nmrpy/datamodel/__init__.py deleted file mode 100644 index 0907ff5..0000000 --- a/nmrpy/datamodel/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -import os - -from .core.experiment import Experiment -from .core.fidobject import FIDObject -from .core.fileformats import FileFormats -from .core.nmrpy import NMRpy -from .core.parameters import Parameters -from .core.peak import Peak -from .core.processingsteps import ProcessingSteps - -__URL__ = "" -__COMMIT__ = "" diff --git a/nmrpy/datamodel/core/__init__.py b/nmrpy/datamodel/core/__init__.py deleted file mode 100644 index c8593df..0000000 --- a/nmrpy/datamodel/core/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .experiment import Experiment -from .fidobject import FIDObject -from .fileformats import FileFormats -from .nmrpy import NMRpy -from .parameters import Parameters -from .peak import Peak -from .processingsteps import ProcessingSteps - -__doc__ = "" - -__all__ = [ - "NMRpy", - "Experiment", - "FIDObject", - "Parameters", - "ProcessingSteps", - "Peak", - "FileFormats", -] diff --git a/nmrpy/datamodel/core/experiment.py b/nmrpy/datamodel/core/experiment.py deleted file mode 100644 index 1d9257a..0000000 --- a/nmrpy/datamodel/core/experiment.py +++ /dev/null @@ -1,98 +0,0 @@ -from typing import Dict, List, Optional, Union -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - -from .fidobject import FIDObject -from .parameters import Parameters -from .peak import Peak -from .processingsteps import ProcessingSteps - - -@forge_signature -class Experiment( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for a single NMR experiment (e.g., one time-course), containing one or more FID objects in the `fid_array` field. Following the specifications of the EnzymeML standard, the `name` field is mandatory.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - name: str = element( - description="A descriptive name for the overarching experiment.", - tag="name", - json_schema_extra=dict(), - ) - - fid_array: List[FIDObject] = element( - description="List of individual FidObjects.", - default_factory=ListPlus, - tag="fid_array", - json_schema_extra=dict( - multiple=True, - ), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self - - def add_to_fid_array( - self, - raw_data: List[str] = ListPlus(), - processed_data: List[Union[str, float]] = ListPlus(), - nmr_parameters: Optional[Parameters] = None, - processing_steps: Optional[ProcessingSteps] = None, - peaks: List[Peak] = ListPlus(), - id: Optional[str] = None, - **kwargs, - ) -> FIDObject: - """ - This method adds an object of type 'FIDObject' to attribute fid_array - - Args: - id (str): Unique identifier of the 'FIDObject' object. Defaults to 'None'. - raw_data (): Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`.. Defaults to ListPlus() - processed_data (): Processed data array.. Defaults to ListPlus() - nmr_parameters (): Contains commonly-used NMR parameters.. Defaults to None - processing_steps (): Contains the processing steps performed, as well as the parameters used for them.. Defaults to None - peaks (): Container holding the peaks found in the NMR spectrum associated with species from an EnzymeML document.. Defaults to ListPlus() - """ - - params = { - "raw_data": raw_data, - "processed_data": processed_data, - "nmr_parameters": nmr_parameters, - "processing_steps": processing_steps, - "peaks": peaks, - } - - if id is not None: - params["id"] = id - - obj = FIDObject(**params) - - self.fid_array.append(obj) - - return self.fid_array[-1] diff --git a/nmrpy/datamodel/core/fidobject.py b/nmrpy/datamodel/core/fidobject.py deleted file mode 100644 index cefcd67..0000000 --- a/nmrpy/datamodel/core/fidobject.py +++ /dev/null @@ -1,132 +0,0 @@ -from typing import Dict, List, Optional, Union -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - -from .parameters import Parameters -from .peak import Peak, PeakRange -from .processingsteps import ProcessingSteps - - -@forge_signature -class FIDObject( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for a single NMR spectrum, containing both raw data with relevant instrument parameters and processed data with processing steps applied. The `raw_data` field contains the complex spectral array as unaltered free induction decay from the NMR instrument. Every processing step is documented in the `processing_steps` field, together with any relevant parameters to reproduce the processing. Therefore, and to minimize redundancy, only the current state of the data is stored in the `processed_data` field. The `peaks` field is a list of `Peak` objects, each representing one single peak in the NMR spectrum.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - raw_data: List[str] = element( - description=( - "Complex spectral data from numpy array as string of format" - " `{array.real}+{array.imag}j`." - ), - default_factory=ListPlus, - tag="raw_data", - json_schema_extra=dict( - multiple=True, - ), - ) - - processed_data: List[Union[str, float]] = element( - description="Processed data array.", - default_factory=ListPlus, - tag="processed_data", - json_schema_extra=dict( - multiple=True, - ), - ) - - nmr_parameters: Optional[Parameters] = element( - description="Contains commonly-used NMR parameters.", - default_factory=Parameters, - tag="nmr_parameters", - json_schema_extra=dict(), - ) - - processing_steps: Optional[ProcessingSteps] = element( - description=( - "Contains the processing steps performed, as well as the parameters used" - " for them." - ), - default_factory=ProcessingSteps, - tag="processing_steps", - json_schema_extra=dict(), - ) - - peaks: List[Peak] = element( - description=( - "Container holding the peaks found in the NMR spectrum associated with" - " species from an EnzymeML document." - ), - default_factory=ListPlus, - tag="peaks", - json_schema_extra=dict( - multiple=True, - ), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self - - def add_to_peaks( - self, - peak_index: int, - peak_position: Optional[float] = None, - peak_range: Optional[PeakRange] = None, - peak_integral: Optional[float] = None, - species_id: Optional[str] = None, - id: Optional[str] = None, - **kwargs, - ) -> Peak: - """ - This method adds an object of type 'Peak' to attribute peaks - - Args: - id (str): Unique identifier of the 'Peak' object. Defaults to 'None'. - peak_index (): Index of the peak in the NMR spectrum, counted from left to right.. - peak_position (): Position of the peak in the NMR spectrum.. Defaults to None - peak_range (): Range of the peak, given as a start and end value.. Defaults to None - peak_integral (): Integral of the peak, resulting from the position and range given.. Defaults to None - species_id (): ID of an EnzymeML species.. Defaults to None - """ - - params = { - "peak_index": peak_index, - "peak_position": peak_position, - "peak_range": peak_range, - "peak_integral": peak_integral, - "species_id": species_id, - } - - if id is not None: - params["id"] = id - - obj = Peak(**params) - - self.peaks.append(obj) - - return self.peaks[-1] diff --git a/nmrpy/datamodel/core/fileformats.py b/nmrpy/datamodel/core/fileformats.py deleted file mode 100644 index 476dc52..0000000 --- a/nmrpy/datamodel/core/fileformats.py +++ /dev/null @@ -1,8 +0,0 @@ -from enum import Enum - - -class FileFormats(Enum): - - VARIAN = "varian" - BRUKER = "bruker" - NONE = None diff --git a/nmrpy/datamodel/core/nmrpy.py b/nmrpy/datamodel/core/nmrpy.py deleted file mode 100644 index 7bd6795..0000000 --- a/nmrpy/datamodel/core/nmrpy.py +++ /dev/null @@ -1,62 +0,0 @@ -from datetime import datetime as Datetime -from typing import Dict, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - -from .experiment import Experiment - - -@forge_signature -class NMRpy( - sdRDM.DataModel, - search_mode="unordered", -): - """Root element of the NMRpy data model. Following the specifications of the EnzymeML standard, the `datetime_created` field is mandatory. Since each NMRpy instance is meant to hold a single experiment (e.g., one time-course), the data model reflects this by only allowing a single `experiment` object.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - datetime_created: Datetime = element( - description="Date and time this dataset has been created.", - tag="datetime_created", - json_schema_extra=dict(), - ) - - datetime_modified: Optional[Datetime] = element( - description="Date and time this dataset has last been modified.", - default=None, - tag="datetime_modified", - json_schema_extra=dict(), - ) - - experiment: Optional[Experiment] = element( - description="Experiment object associated with this dataset.", - default=None, - tag="experiment", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/core/parameters.py b/nmrpy/datamodel/core/parameters.py deleted file mode 100644 index 75a0819..0000000 --- a/nmrpy/datamodel/core/parameters.py +++ /dev/null @@ -1,143 +0,0 @@ -from typing import Dict, List, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - - -@forge_signature -class Parameters( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for relevant NMR parameters. While not exhaustive, these parameters are commonly relevant for (pre-)processing and analysis of NMR data.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - acquisition_time: Optional[float] = element( - description=( - "Duration of the FID signal acquisition period after the excitation pulse." - " Abrreviated as `at`." - ), - default=None, - tag="acquisition_time", - json_schema_extra=dict(), - ) - - relaxation_time: Optional[float] = element( - description=( - "Inter-scan delay allowing spins to relax back toward equilibrium before" - " the next pulse. Abbreviated as `d1`." - ), - default=None, - tag="relaxation_time", - json_schema_extra=dict(), - ) - - repetition_time: Optional[float] = element( - description=( - "Total duration of a single scan cycle, combining acquisition and" - " relaxation delays (`rt = at + d1`)." - ), - default=None, - tag="repetition_time", - json_schema_extra=dict(), - ) - - number_of_transients: List[float] = element( - description=( - "Number of individual FIDs averaged to improve signal-to-noise ratio." - " Abbreviated as `nt`." - ), - default_factory=ListPlus, - tag="number_of_transients", - json_schema_extra=dict( - multiple=True, - ), - ) - - acquisition_times_array: List[float] = element( - description=( - "Array of sampled time points corresponding to the collected FID data" - " (`acqtime = [nt, 2nt, ..., rt x nt]`)." - ), - default_factory=ListPlus, - tag="acquisition_times_array", - json_schema_extra=dict( - multiple=True, - ), - ) - - spectral_width_ppm: Optional[float] = element( - description=( - "Frequency range of the acquired spectrum expressed in parts per million" - " (ppm). Abbreviated as `sw`." - ), - default=None, - tag="spectral_width_ppm", - json_schema_extra=dict(), - ) - - spectral_width_hz: Optional[float] = element( - description=( - "Frequency range of the acquired spectrum expressed in Hertz (Hz)." - " Abbreviated as `sw_hz`." - ), - default=None, - tag="spectral_width_hz", - json_schema_extra=dict(), - ) - - spectrometer_frequency: Optional[float] = element( - description=( - "Operating resonance frequency for the observed nucleus, defining the" - " chemical shift reference scale. Abbreviated as `sfrq`." - ), - default=None, - tag="spectrometer_frequency", - json_schema_extra=dict(), - ) - - reference_frequency: Optional[float] = element( - description=( - "Calibration frequency used to align and standardize the chemical shift" - " scale. Abbreviated as `reffrq`." - ), - default=None, - tag="reference_frequency", - json_schema_extra=dict(), - ) - - spectral_width_left: Optional[float] = element( - description=( - "Offset parameter defining the left boundary of the spectral window" - " relative to the reference frequency. Abbreviated as `sw_left`." - ), - default=None, - tag="spectral_width_left", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/core/peak.py b/nmrpy/datamodel/core/peak.py deleted file mode 100644 index bea1897..0000000 --- a/nmrpy/datamodel/core/peak.py +++ /dev/null @@ -1,118 +0,0 @@ -from typing import Dict, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - - -@forge_signature -class PeakRange( - sdRDM.DataModel, - search_mode="unordered", -): - """Small type for attribute 'peak_range'""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - start: Optional[float] = element( - default=None, - tag="start", - json_schema_extra=dict(), - ) - - end: Optional[float] = element( - default=None, - tag="end", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self - - -@forge_signature -class Peak( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for a single peak in the NMR spectrum, associated with a species from an EnzymeML document. To ensure unambiguity of every peak, the `peak_index` field (counted from left to right in the NMR spectrum) is mandatory. Species from EnzymeML are identified by their `species_id` as found in the EnzymeML document.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - peak_index: int = element( - description=( - "Index of the peak in the NMR spectrum, counted from left to right." - ), - tag="peak_index", - json_schema_extra=dict(), - ) - - peak_position: Optional[float] = element( - description="Position of the peak in the NMR spectrum.", - default=None, - tag="peak_position", - json_schema_extra=dict(), - ) - - peak_range: Optional[PeakRange] = element( - description="Range of the peak, given as a start and end value.", - default_factory=PeakRange, - tag="peak_range", - json_schema_extra=dict(), - ) - - peak_integral: Optional[float] = element( - description=( - "Integral of the peak, resulting from the position and range given." - ), - default=None, - tag="peak_integral", - json_schema_extra=dict(), - ) - - species_id: Optional[str] = element( - description="ID of an EnzymeML species.", - default=None, - tag="species_id", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/datamodel/core/processingsteps.py b/nmrpy/datamodel/core/processingsteps.py deleted file mode 100644 index 2d29cb5..0000000 --- a/nmrpy/datamodel/core/processingsteps.py +++ /dev/null @@ -1,130 +0,0 @@ -from typing import Dict, Optional -from uuid import uuid4 - -import sdRDM -from lxml.etree import _Element -from pydantic import PrivateAttr, model_validator -from pydantic_xml import attr, element -from sdRDM.base.listplus import ListPlus -from sdRDM.base.utils import forge_signature -from sdRDM.tools.utils import elem2dict - - -@forge_signature -class ProcessingSteps( - sdRDM.DataModel, - search_mode="unordered", -): - """Container for processing steps performed, as well as parameter for them. Processing steps that are reflected are apodisation, zero-filling, Fourier transformation, phasing, normalisation, deconvolution, and baseline correction.""" - - id: Optional[str] = attr( - name="id", - alias="@id", - description="Unique identifier of the given object.", - default_factory=lambda: str(uuid4()), - ) - - is_apodised: Optional[bool] = element( - description="Whether or not Apodisation (line-broadening) has been performed.", - default=None, - tag="is_apodised", - json_schema_extra=dict(), - ) - - apodisation_frequency: Optional[float] = element( - description="Degree of Apodisation (line-broadening) in Hz.", - default=None, - tag="apodisation_frequency", - json_schema_extra=dict(), - ) - - is_zero_filled: Optional[bool] = element( - description="Whether or not Zero-filling has been performed.", - default=False, - tag="is_zero_filled", - json_schema_extra=dict(), - ) - - is_fourier_transformed: Optional[bool] = element( - description="Whether or not Fourier transform has been performed.", - default=False, - tag="is_fourier_transformed", - json_schema_extra=dict(), - ) - - fourier_transform_type: Optional[str] = element( - description="The type of Fourier transform used.", - default=None, - tag="fourier_transform_type", - json_schema_extra=dict(), - ) - - is_phased: Optional[bool] = element( - description="Whether or not Phasing was performed.", - default=False, - tag="is_phased", - json_schema_extra=dict(), - ) - - zero_order_phase: Optional[float] = element( - description="Zero-order phase used for Phasing.", - default=None, - tag="zero_order_phase", - json_schema_extra=dict(), - ) - - first_order_phase: Optional[float] = element( - description="First-order phase used for Phasing.", - default=None, - tag="first_order_phase", - json_schema_extra=dict(), - ) - - is_only_real: Optional[bool] = element( - description="Whether or not the imaginary part has been discarded.", - default=False, - tag="is_only_real", - json_schema_extra=dict(), - ) - - is_normalised: Optional[bool] = element( - description="Whether or not Normalisation was performed.", - default=False, - tag="is_normalised", - json_schema_extra=dict(), - ) - - max_value: Optional[float] = element( - description="Maximum value of the dataset used for Normalisation.", - default=None, - tag="max_value", - json_schema_extra=dict(), - ) - - is_deconvoluted: Optional[bool] = element( - description="Whether or not Deconvolution was performed.", - default=False, - tag="is_deconvoluted", - json_schema_extra=dict(), - ) - - is_baseline_corrected: Optional[bool] = element( - description="Whether or not Baseline correction was performed.", - default=False, - tag="is_baseline_corrected", - json_schema_extra=dict(), - ) - - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) - - @model_validator(mode="after") - def _parse_raw_xml_data(self): - for attr, value in self: - if isinstance(value, (ListPlus, list)) and all( - isinstance(i, _Element) for i in value - ): - self._raw_xml_data[attr] = [elem2dict(i) for i in value] - elif isinstance(value, _Element): - self._raw_xml_data[attr] = elem2dict(value) - - return self diff --git a/nmrpy/nmrpy_model.py b/nmrpy/nmrpy_model.py new file mode 100644 index 0000000..b814365 --- /dev/null +++ b/nmrpy/nmrpy_model.py @@ -0,0 +1,824 @@ +## This is a generated file. Do not modify it manually! + +from __future__ import annotations +from pydantic import BaseModel, Field, ConfigDict +from typing import Optional, Generic, TypeVar +from enum import Enum +from uuid import uuid4 +from datetime import date, datetime + +# Filter Wrapper definition used to filter a list of objects +# based on their attributes +Cls = TypeVar("Cls") + + +class FilterWrapper(Generic[Cls]): + """Wrapper class to filter a list of objects based on their attributes""" + + def __init__(self, collection: list[Cls], **kwargs): + self.collection = collection + self.kwargs = kwargs + + def filter(self) -> list[Cls]: + for key, value in self.kwargs.items(): + self.collection = [ + item for item in self.collection if self._fetch_attr(key, item) == value + ] + return self.collection + + def _fetch_attr(self, name: str, item: Cls): + try: + return getattr(item, name) + except AttributeError: + raise AttributeError(f"{item} does not have attribute {name}") + + +# JSON-LD Helper Functions +def add_namespace(obj, prefix: str | None, iri: str | None): + """Adds a namespace to the JSON-LD context + + Args: + prefix (str): The prefix to add + iri (str): The IRI to add + """ + if prefix is None and iri is None: + return + elif prefix and iri is None: + raise ValueError("If prefix is provided, iri must also be provided") + elif iri and prefix is None: + raise ValueError("If iri is provided, prefix must also be provided") + + obj.ld_context[prefix] = iri # type: ignore + + +def validate_prefix(term: str | dict, prefix: str): + """Validates that a term is prefixed with a given prefix + + Args: + term (str): The term to validate + prefix (str): The prefix to validate against + + Returns: + bool: True if the term is prefixed with the prefix, False otherwise + """ + + if isinstance(term, dict) and not term["@id"].startswith(prefix + ":"): + raise ValueError(f"Term {term} is not prefixed with {prefix}") + elif isinstance(term, str) and not term.startswith(prefix + ":"): + raise ValueError(f"Term {term} is not prefixed with {prefix}") + + +# Model Definitions + + +class NMRpy(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + datetime_created: str + datetime_modified: Optional[str] = Field(default=None) + experiment: Optional[Experiment] = Field(default=None) + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", default_factory=lambda: "md:NMRpy/" + str(uuid4()) + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:NMRpy", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + +class Experiment(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + name: str + fid_array: list[FIDObject] = Field(default_factory=list) + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", + default_factory=lambda: "md:Experiment/" + str(uuid4()), + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:Experiment", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def filter_fid_array(self, **kwargs) -> list[FIDObject]: + """Filters the fid_array attribute based on the given kwargs + + Args: + **kwargs: The attributes to filter by. + + Returns: + list[FIDObject]: The filtered list of FIDObject objects + """ + + return FilterWrapper[FIDObject](self.fid_array, **kwargs).filter() + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + def add_to_fid_array( + self, + raw_data: list[str] = [], + processed_data: list[Union[None, str, float]] = [], + nmr_parameters: Optional[Parameters] = None, + processing_steps: Optional[ProcessingSteps] = None, + peaks: list[Peak] = [], + **kwargs, + ): + params = { + "raw_data": raw_data, + "processed_data": processed_data, + "nmr_parameters": nmr_parameters, + "processing_steps": processing_steps, + "peaks": peaks, + } + + if "id" in kwargs: + params["id"] = kwargs["id"] + + self.fid_array.append(FIDObject(**params)) + + return self.fid_array[-1] + + +class FIDObject(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + raw_data: list[str] = Field(default_factory=list) + processed_data: list[str] = Field(default_factory=list) + nmr_parameters: Optional[Parameters] = Field(default=None) + processing_steps: Optional[ProcessingSteps] = Field(default=None) + peaks: list[Peak] = Field(default_factory=list) + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", + default_factory=lambda: "md:FIDObject/" + str(uuid4()), + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:FIDObject", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def filter_peaks(self, **kwargs) -> list[Peak]: + """Filters the peaks attribute based on the given kwargs + + Args: + **kwargs: The attributes to filter by. + + Returns: + list[Peak]: The filtered list of Peak objects + """ + + return FilterWrapper[Peak](self.peaks, **kwargs).filter() + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + def add_to_peaks( + self, + peak_index: int, + peak_position: Optional[float] = None, + peak_range: Optional[PeakRange] = None, + peak_integral: Optional[float] = None, + species_id: Optional[str] = None, + **kwargs, + ): + params = { + "peak_index": peak_index, + "peak_position": peak_position, + "peak_range": peak_range, + "peak_integral": peak_integral, + "species_id": species_id, + } + + if "id" in kwargs: + params["id"] = kwargs["id"] + + self.peaks.append(Peak(**params)) + + return self.peaks[-1] + + +class Parameters(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + acquisition_time: Optional[float] = Field(default=None) + relaxation_time: Optional[float] = Field(default=None) + repetition_time: Optional[float] = Field(default=None) + number_of_transients: list[float] = Field(default_factory=list) + acquisition_times_array: list[float] = Field(default_factory=list) + spectral_width_ppm: Optional[float] = Field(default=None) + spectral_width_hz: Optional[float] = Field(default=None) + spectrometer_frequency: Optional[float] = Field(default=None) + reference_frequency: Optional[float] = Field(default=None) + spectral_width_left: Optional[float] = Field(default=None) + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", + default_factory=lambda: "md:Parameters/" + str(uuid4()), + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:Parameters", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + +class ProcessingSteps(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + is_apodised: Optional[bool] = Field(default=None) + apodisation_frequency: Optional[float] = Field(default=None) + is_zero_filled: bool = False + is_fourier_transformed: bool = False + fourier_transform_type: Optional[str] = Field(default=None) + is_phased: bool = False + zero_order_phase: Optional[float] = Field(default=None) + first_order_phase: Optional[float] = Field(default=None) + is_only_real: bool = False + is_normalised: bool = False + max_value: Optional[float] = Field(default=None) + is_deconvoluted: bool = False + is_baseline_corrected: bool = False + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", + default_factory=lambda: "md:ProcessingSteps/" + str(uuid4()), + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:ProcessingSteps", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + +class Peak(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + peak_index: int + peak_position: Optional[float] = Field(default=None) + peak_range: Optional[PeakRange] = Field(default=None) + peak_integral: Optional[float] = Field(default=None) + species_id: Optional[str] = Field(default=None) + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", default_factory=lambda: "md:Peak/" + str(uuid4()) + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:Peak", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + +class PeakRange(BaseModel): + + model_config: ConfigDict = ConfigDict( # type: ignore + validate_assigment=True, + ) # type: ignore + + start: float + end: float + + # JSON-LD fields + ld_id: str = Field( + serialization_alias="@id", + default_factory=lambda: "md:PeakRange/" + str(uuid4()), + ) + ld_type: list[str] = Field( + serialization_alias="@type", + default_factory=lambda: [ + "md:PeakRange", + ], + ) + ld_context: dict[str, str | dict] = Field( + serialization_alias="@context", + default_factory=lambda: { + "md": "http://mdmodel.net/", + }, + ) + + def set_attr_term( + self, + attr: str, + term: str | dict, + prefix: str | None = None, + iri: str | None = None, + ): + """Sets the term for a given attribute in the JSON-LD object + + Example: + # Using an IRI term + >> obj.set_attr_term("name", "http://schema.org/givenName") + + # Using a prefix and term + >> obj.set_attr_term("name", "schema:givenName", "schema", "http://schema.org") + + # Usinng a dictionary term + >> obj.set_attr_term("name", {"@id": "http://schema.org/givenName", "@type": "@id"}) + + Args: + attr (str): The attribute to set the term for + term (str | dict): The term to set for the attribute + + Raises: + AssertionError: If the attribute is not found in the model + """ + + assert ( + attr in self.model_fields + ), f"Attribute {attr} not found in {self.__class__.__name__}" + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_context[attr] = term + + def add_type_term( + self, term: str, prefix: str | None = None, iri: str | None = None + ): + """Adds a term to the @type field of the JSON-LD object + + Example: + # Using a term + >> obj.add_type_term("https://schema.org/Person") + + # Using a prefixed term + >> obj.add_type_term("schema:Person", "schema", "https://schema.org/Person") + + Args: + term (str): The term to add to the @type field + prefix (str, optional): The prefix to use for the term. Defaults to None. + iri (str, optional): The IRI to use for the term prefix. Defaults to None. + + Raises: + ValueError: If prefix is provided but iri is not + ValueError: If iri is provided but prefix is not + """ + + if prefix: + validate_prefix(term, prefix) + + add_namespace(self, prefix, iri) + self.ld_type.append(term) + + +class FileFormats(Enum): + BRUKER = "bruker" + NONE = "None" + VARIAN = "varian" diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index fd5ea80..626e3b5 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -2,6 +2,7 @@ import logging import traceback from datetime import datetime +from typing import List, Union import numpy import sympy as sp @@ -25,6 +26,8 @@ from matplotlib.transforms import blended_transform_factory from matplotlib.widgets import Cursor +from pyenzyme.model import EnzymeMLDocument + import nmrpy.data_objects from .utils import ( @@ -1379,400 +1382,90 @@ class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): class PeakAssigner: - def __init__(self, fid, title): - self.fid = fid - self.title = title - self.available_peaks = [str(peak) for peak in self.fid.peaks] - self.selected_values = {} - self._validate_and_update_fid(self.fid) + """Interactive widget for assigning species to peaks in a FID.""" - # Create the label widget for the title - title_label = Label(value=title) - - # Create the dropdown widget for the peaks - peak_dropdown = Dropdown( - options=self.available_peaks, - description="Select a peak:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - - # Create the dropdown widget for the species - species_dropdown = Dropdown( - options=[ - format_species_string(species) for species in self.fid.enzymeml_species - ], - description="Select a species:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - - # Create the button to save selection to dict - save_button = Button( - description="Save selection", - icon="file-arrow-down", - ) - - # Create a reset button - reset_button = Button(description="Reset selection", disabled=True) - - # Create an output widget to display the selection - selection_output = Output() - - # Define a function to handle the save button click event - def on_save_button_click(b): - with selection_output: - selection_output.clear_output(wait=True) - # Fetch the values from the species dropdown and peak - # dropdown and add them to a dictionary with species as - # keys - if species_dropdown.value not in self.selected_values: - self.selected_values[species_dropdown.value] = [] - self.selected_values[species_dropdown.value].append( - float(peak_dropdown.value) - ) - # Remove the assigned peaks from the options of the peak - # dropdown, as they cannot belong to two species at once - # and disable the peak dropdown if all are assigned - self.available_peaks.remove(peak_dropdown.value) - peak_dropdown.options = self.available_peaks - if not self.available_peaks: - peak_dropdown.disabled = True - save_button.disabled = True - # Iterate over the dictionary of assigned peaks and - # print the saved selections. - print("\nSaved selections:") - for key, value in self.selected_values.items(): - print(f"{key}: {value}") - # Iterate over every FID, find the peak in the dict - # of selected values, and assign the species ID to - # the peak object - for peak in self.fid.fid_object.peaks: - if peak.peak_position not in value: - continue - peak.species_id = key.split(" ")[0] - self.fid.species[peak.peak_index] = peak.species_id - self.fid._flags["assigned"] = True - reset_button.disabled = False - - # Attach the function to the save button's click event - save_button.on_click(on_save_button_click) - - # Define a function to handle the reset event - def on_reset_button_click(b): - with selection_output: - selection_output.clear_output(wait=True) - # Clear the Peaks in the data model and reset the - # selection dict - print("\nCleared selections!") - for peak_object in self.fid.fid_object.peaks: - peak_object.species_id = None - self.selected_values = {} - # Refill the list of available peaks as before, - # re-enable the peak dropdown, and disable the reset - # button again - self.available_peaks = [str(peak) for peak in self.fid.peaks] - peak_dropdown.options = self.available_peaks - peak_dropdown.disabled = False - save_button.disabled = False - self.fid._flags["assigned"] = False - reset_button.disabled = True - - # Attach the function to the reset click event - reset_button.on_click(on_reset_button_click) - - # Create a container for both the title and the dropdown - container = VBox( - [ - title_label, - peak_dropdown, - species_dropdown, - save_button, - reset_button, - selection_output, - ] - ) - - # Display the container - display(container) - - def _validate_and_update_fid(self, fid): + def __init__(self, fid, species_list=None, title="Assign species"): """ - Ensures fid has valid peaks and ranges, - initializes its .species array, and - syncs up with the data model if needed. + Initialize peak assigner widget. + + Parameters + ---------- + fid : Fid + The FID object to assign peaks for + species_source : Union[List[str], EnzymeMLDocument], optional + Either a list of species names or an EnzymeML document. + If None, will try to use fid.enzymeml_document + title : str, optional + Title for the widget """ - if fid.peaks is [] or fid.peaks is None: - raise RuntimeError( - "`fid.peaks` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - - if fid.ranges is [] or fid.ranges is None: - raise RuntimeError( - "`fid.ranges` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - - if len(fid.peaks) != len(fid.ranges): - raise RuntimeError( - "To assign peaks, `fid.peaks` and `fid.ranges` must be of the same length." - ) - - # Prepare a numpy array to store species - fid.species = numpy.empty(len(fid.peaks), dtype=object) - - # Sync data model: update existing peak objects or create new ones - for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): - if i < len(fid.fid_object.peaks): - # Peak already exists, update it - fid.fid_object.peaks[i].peak_position = float(peak_val) - fid.fid_object.peaks[i].peak_range = { - "start": float(range_val[0]), - "end": float(range_val[1]), - } - else: - # Create a new peak in the data model - fid.fid_object.add_to_peaks( - peak_index=i, - peak_position=float(peak_val), - peak_range={ - "start": float(range_val[0]), - "end": float(range_val[1]), - }, - ) - - -class PeakRangeAssigner: - """Wow, such documentation. - for fid in [self.fids[i] for i in self.fid_number]: - """ - - def __init__(self, fid_array, index_list): - self.fid_array = fid_array - self.fids = self._build_fids(index_list) - self.available_peaks = [] - self.available_species = get_species_from_enzymeml( - self.fid_array.enzymeml_document - ) - self.selected_fid = None + self.fid = fid + self.title = title self.selected_values = {} - # Validate and/or update each Fid - for fid in self.fids: - self._validate_and_update_fid(fid) - - # Create the label widget for the title - title_label = Label(value="Assign peaks for all FIDs") - - # Create the combobox for the selection of the FID ID - combobox = Combobox( - options=[fid.id for fid in self.fids], - description="Select FID to base entire array on:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - - # Create the dropdown widget for the peaks - peak_dropdown = Dropdown( - options=[], - description="Select a peak:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - disabled=True, - ) - - # Create the dropdown widget for the species - species_dropdown = Dropdown( - options=[], - description="Select a species:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - disabled=True, - ) - - # Create the button to save selection to dict - save_button = Button( - description="Save selection", icon="file-arrow-down", disabled=True - ) - - # Create a reset button - reset_button = Button(description="Reset selection", disabled=True) - - # Create an output widget to display the selection - selection_output = Output() - - # Define a method to handle selection in combobox - def on_combobox_change(event): - if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - if selected_option in combobox.options: - peak_dropdown.disabled = False - self.selected_fid = self.fid_array.get_fid(selected_option) - self.available_peaks = [ - str(peak) for peak in self.selected_fid.peaks - ] - peak_dropdown.options = self.available_peaks - - # Attach the method to the combobox's change event: - combobox.observe(on_combobox_change) - - # Define a method to handle the peak dropdown's change event - def on_peak_dropdown_change(event): - if event["type"] == "change" and event["name"] == "value": - species_dropdown.options = [ - format_species_string(species) for species in self.available_species - ] - species_dropdown.disabled = False - - # Attach the method to the dropdown's change event - peak_dropdown.observe(on_peak_dropdown_change) - - # Define a method to handle the peak dropdown's change event - def on_species_dropdown_change(event): - if event["type"] == "change" and event["name"] == "value": - save_button.disabled = False - - # Attach the method to the dropdown's change event - species_dropdown.observe(on_species_dropdown_change) - - # Define a function to handle the save button click event - def on_save_button_click(b): - with selection_output: - selection_output.clear_output(wait=True) - # Fetch the values from the species dropdown and peak - # dropdown and add them to a dictionary with species as - # keys - if species_dropdown.value not in self.selected_values: - self.selected_values[species_dropdown.value] = [] - self.selected_values[species_dropdown.value].append( - float(peak_dropdown.value) - ) - # Remove the assigned peaks from the options of the peak - # dropdown, as they cannot belong to two species at once - # and disable the peak dropdown if all are assigned - self.available_peaks.remove(peak_dropdown.value) - peak_dropdown.options = self.available_peaks - if not self.available_peaks: - peak_dropdown.disabled = True - # Iterate over the dictionary of assigned peaks and - # print the saved selections. - print("\nSaved selections:") - for key, value in self.selected_values.items(): - print(f"{key}: {value}") - # Iterate over every FID, find the peak in the dict - # of selected values, and assign the species ID to - # the peak object - for fid in self.fids: - for peak in fid.fid_object.peaks: - if peak.peak_position not in value: - continue - peak.species_id = key.split(" ")[0] - fid.species[peak.peak_index] = peak.species_id - fid._flags["assigned"] = True - reset_button.disabled = False - - # Attach the function to the save button's click event - save_button.on_click(on_save_button_click) - - # Define a function to handle the reset event - def on_reset_button_click(b): - with selection_output: - selection_output.clear_output(wait=True) - # Iterate over every FID and clear the Peaks in the - # respective data model and reset the selection dict - print("\nCleared selections!") - for fid in self.fids: - fid._flags["assigned"] = False - for peak_object in fid.fid_object.peaks: - peak_object.species_id = None - self.selected_values = {} - # Refill the list of available peaks as before, - # re-enable the peak dropdown, and disable the reset - # button again - self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - peak_dropdown.options = self.available_peaks - peak_dropdown.disabled = False - reset_button.disabled = True - - # Attach the function to the reset click event - reset_button.on_click(on_reset_button_click) - # Create a container for both the title and the dropdown - container = VBox( - [ - title_label, - combobox, - peak_dropdown, - species_dropdown, - save_button, - reset_button, - selection_output, - ] - ) + # Determine species source and mode + self._setup_species_source(species_list) - # Display the container - display(container) + # Validate and initialize + self._validate_fid(self.fid) + self._setup_fid(self.fid) + self.available_peaks = [str(peak) for peak in self.fid.peaks] - def _build_fids(self, index_list) -> list: - """ - Returns a list of FIDs from fid_array: - - If index_list is empty, returns all FIDs. - - Otherwise, does bounds checks and builds IDs with correct formatting. - """ - if not index_list: - # No specific indices, grab all FIDs - return self.fid_array.get_fids() + # Create and layout widgets + self._create_widgets() + self._setup_callbacks() + self._layout_widgets() - # 1) Basic bounds check - total_fids = len(self.fid_array.get_fids()) - for i in index_list: - if i >= total_fids: - raise IndexError( - f"Index {i} is out of bounds (there are {total_fids} FIDs)." + def _setup_species_source(self, species_source): + # Configure species source and create list of available species + # Check for default case first + if species_source is None: + if not hasattr(self.fid, "enzymeml_species"): + raise ValueError( + "No species list provided and FID has no enzymeml_species" ) - - # 2) Determine how many digits for the ID - n_digits = len(str(total_fids - 1)) # e.g., 2 if up to 99, 3 if up to 999 - if n_digits == 1: - fid_format = "fid{}" + self.available_species = self.fid.enzymeml_species + return + # Check for EnzymeML document + elif isinstance(species_source, EnzymeMLDocument): + self.available_species = get_species_from_enzymeml(species_source) + return + # Check for list of strings + elif isinstance(species_source, list): + self.available_species = species_source + return + # If we get here, the input was invalid else: - fid_format = f"fid{{:0{n_digits}d}}" - - # 3) Build the list of FIDs - fids = [] - for i in index_list: - fid_id = fid_format.format(i) - fids.append(self.fid_array.get_fid(fid_id)) - - return fids + raise ValueError( + "species_list must be a list of species names, " + "an EnzymeML document, or None if FID has enzymeml_species" + ) - def _validate_and_update_fid(self, fid): - """ - Ensures fid has valid peaks and ranges, - initializes its .species array, and - syncs up with the data model if needed. - """ - if fid.peaks is [] or fid.peaks is None: + def _validate_fid(self, fid): + # Validates FID has peaks and ranges and len(peaks) == len(ranges) + if fid.peaks is None or len(fid.peaks) == 0: raise RuntimeError( "`fid.peaks` is required but still empty. " "Please assign them manually or with the `peakpicker` method." ) - - if fid.ranges is [] or fid.ranges is None: + if fid.ranges is None or len(fid.ranges) == 0: raise RuntimeError( "`fid.ranges` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." + "Please assign them manually or with the `rangepicker` method." ) - if len(fid.peaks) != len(fid.ranges): raise RuntimeError( - "To assign peaks, `fid.peaks` and `fid.ranges` must be of the same length." + "`fid.peaks` and `fid.ranges` must have the same length, as " + "each peak must have a range assigned to it." ) - # Prepare a numpy array to store species + def _setup_fid(self, fid): + # Initialize species array and creates or updates Peak objects + # in data model if species from EnyzmeML are used. + + # Initialize empty species array fid.species = numpy.empty(len(fid.peaks), dtype=object) - # Sync data model: update existing peak objects or create new ones + # Create or update Peak objects in data model for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): if i < len(fid.fid_object.peaks): # Peak already exists, update it @@ -1782,7 +1475,7 @@ def _validate_and_update_fid(self, fid): "end": float(range_val[1]), } else: - # Create a new peak in the data model + # Peak does not yet exist, create it fid.fid_object.add_to_peaks( peak_index=i, peak_position=float(peak_val), @@ -1792,59 +1485,37 @@ def _validate_and_update_fid(self, fid): }, ) - -class PeakIndexer: - """Interactive widget for assigning species names to peaks in a FID.""" - - def __init__(self, fid, species_list, title="Assign peaks"): - self.fid = fid - self.title = title - self.species_list = species_list - self.selected_values = {} - - # Validate and initialize - self._validate_fid(self.fid) - self.available_peaks = [str(peak) for peak in self.fid.peaks] - - # Create and layout widgets - self._create_widgets() - self._setup_callbacks() - self._layout_widgets() - def _create_widgets(self): - """Create all widget components""" + # Create all widget components self.title_label = Label(value=self.title) - self.peak_dropdown = Dropdown( options=self.available_peaks, description="Select a peak:", layout={"width": "max-content"}, style={"description_width": "initial"}, ) - self.species_dropdown = Dropdown( - options=self.species_list, + options=[ + format_species_string(species) for species in self.available_species + ], description="Select a species:", layout={"width": "max-content"}, style={"description_width": "initial"}, ) - self.save_button = Button( description="Save selection", icon="file-arrow-down", ) - self.reset_button = Button(description="Reset selection", disabled=True) - self.selection_output = Output() def _setup_callbacks(self): - """Set up all widget callbacks""" + # Set up all widget callbacks self.save_button.on_click(self._handle_save) self.reset_button.on_click(self._handle_reset) def _layout_widgets(self): - """Create widget layout and display""" + # Create widget layout and display self.container = VBox( [ self.title_label, @@ -1858,7 +1529,7 @@ def _layout_widgets(self): display(self.container) def _handle_save(self, b): - """Handle save button click""" + # Handle save button click with self.selection_output: self.selection_output.clear_output(wait=True) @@ -1879,20 +1550,24 @@ def _handle_save(self, b): self.save_button.disabled = True # Update species array in FID - peak_idx = self.fid.peaks.tolist().index(peak_value) - self.fid.species[peak_idx] = species - + for species_id, peak_position in self.selected_values.items(): + self._update_fid(peak_position, species_id) self._display_selections() + + # Re-enable the reset button self.reset_button.disabled = False def _handle_reset(self, b): - """Handle reset button click""" + # Handle reset button click with self.selection_output: self.selection_output.clear_output(wait=True) print("\nCleared selections!") # Reset state + self.fid._flags["assigned"] = False self.fid.species = numpy.empty(len(self.fid.peaks), dtype=object) + for peak_object in self.fid.fid_object.peaks: + peak_object.species_id = None self.selected_values = {} self.available_peaks = [str(peak) for peak in self.fid.peaks] @@ -1902,71 +1577,161 @@ def _handle_reset(self, b): self.save_button.disabled = False self.reset_button.disabled = True + def _update_fid(self, peak_position, species_id): + # Assign the species ID to the peak object and set the assigned + # flag to True. + for peak in self.fid.fid_object.peaks: + if peak.peak_position not in peak_position: + continue + peak.species_id = species_id.split(" ")[0] + self.fid.species[peak.peak_index] = peak.species_id + self.fid._flags["assigned"] = True + def _display_selections(self): - """Display current selections""" + # Display current selections print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") - def _validate_fid(self, fid): - """Validates FID has peaks and initializes species array""" - if fid.peaks is None or len(fid.peaks) == 0: - raise RuntimeError( - "`fid.peaks` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - - # Initialize empty species array - fid.species = numpy.empty(len(fid.peaks), dtype=object) +class PeakRangeAssigner: + """Interactive widget for assigning species to peaks for all FIDs in + a FidArray based on one selected FID. + """ -class PeakRangeIndexer: - """Interactive widget for assigning species names to peaks across all FIDs in a FidArray.""" - - def __init__(self, fid_array, species_list, index_list=None): + def __init__(self, fid_array, species_list=None, index_list=None): self.fid_array = fid_array - self.fids = self._build_fids(index_list) - self.species_list = species_list - self.available_peaks = [] self.selected_fid = None self.selected_values = {} - # Validate and/or update each Fid + # Determine species source and mode + self._setup_species_source(species_list) + + # Validate and initialize + self.fids = self._build_fids(index_list) for fid in self.fids: - self._validate_and_update_fid(fid) + self._validate_fid(fid) + self._setup_fid(fid) # Create and layout widgets self._create_widgets() self._setup_callbacks() self._layout_widgets() + def _setup_species_source(self, species_source): + # Configure species source and create list of available species + + # Check for default case first + if species_source is None: + if not hasattr(self.fid_array, "enzymeml_document"): + raise ValueError( + "No species list provided and FIDArray has no enzymeml_document" + ) + self.available_species = get_species_from_enzymeml( + self.fid_array.enzymeml_document + ) + return + # Check for EnzymeML document + elif isinstance(species_source, EnzymeMLDocument): + self.available_species = get_species_from_enzymeml(species_source) + return + # Check for list of strings + elif isinstance(species_source, list): + self.available_species = species_source + return + # If we get here, the input was invalid + else: + raise ValueError( + "species_list must be a list of species names, an EnzymeML " + "document, or None if FIDArray has enzymeml_document" + ) + def _build_fids(self, index_list): - """Build list of FIDs to work with based on optional index_list.""" - if index_list is None: + # Create the list of FIDs available to the widget based on + # the index_list. As the formatting of the FID IDs is + # dependent on the number of FIDs available, + # If no specific indices are provided, grab all FIDs + if not index_list: return self.fid_array.get_fids() - return [self.fid_array.get_fids()[i] for i in index_list] - def _validate_and_update_fid(self, fid): - """Validate FID has peaks and initialize species array if needed.""" + # Hand + # 1) Basic bounds check + total_fids = len(self.fid_array.get_fids()) + for i in index_list: + if i >= total_fids: + raise IndexError( + f"Index {i} is out of bounds (there are {total_fids} FIDs)." + ) + + # 2) Determine how many digits for the ID + n_digits = len(str(total_fids - 1)) # e.g., 2 if up to 99, 3 if up to 999 + if n_digits == 1: + fid_format = "fid{}" + else: + fid_format = f"fid{{:0{n_digits}d}}" + + # 3) Build the list of FIDs + fids = [] + for i in index_list: + fid_id = fid_format.format(i) + fids.append(self.fid_array.get_fid(fid_id)) + + return fids + + def _validate_fid(self, fid): + # Validate that FID has peaks and ranges and that their + # lengths are the same if fid.peaks is None or len(fid.peaks) == 0: raise RuntimeError( - f"FID {fid.id} requires peaks but they are empty. " + "`fid.peaks` is required but still empty. " "Please assign them manually or with the `peakpicker` method." ) - if fid.species is None: - fid.species = numpy.empty(len(fid.peaks), dtype=object) + if fid.ranges is None or len(fid.ranges) == 0: + raise RuntimeError( + "`fid.ranges` is required but still empty. " + "Please assign them manually or with the `rangepicker` method." + ) + if len(fid.peaks) != len(fid.ranges): + raise RuntimeError( + "`fid.peaks` and `fid.ranges` must have the same length, as " + "each peak must have a range assigned to it." + ) + + def _setup_fid(self, fid): + # Initialize species array and create or update Peak objects in + # data model + + # Initialize empty species array + fid.species = numpy.empty(len(fid.peaks), dtype=object) + # Create or update Peak objects in data model + for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): + if i < len(fid.fid_object.peaks): + # Peak already exists, update it + fid.fid_object.peaks[i].peak_position = float(peak_val) + fid.fid_object.peaks[i].peak_range = { + "start": float(range_val[0]), + "end": float(range_val[1]), + } + else: + # Peak does not yet exist, create it + fid.fid_object.add_to_peaks( + peak_index=i, + peak_position=float(peak_val), + peak_range={ + "start": float(range_val[0]), + "end": float(range_val[1]), + }, + ) def _create_widgets(self): - """Create all widget components""" + # Create all widget components self.title_label = Label(value="Assign peaks for all FIDs") - self.combobox = Combobox( options=[fid.id for fid in self.fids], description="Select FID to base entire array on:", layout={"width": "max-content"}, style={"description_width": "initial"}, ) - self.peak_dropdown = Dropdown( options=[], description="Select a peak:", @@ -1974,36 +1739,29 @@ def _create_widgets(self): style={"description_width": "initial"}, disabled=True, ) - self.species_dropdown = Dropdown( - options=self.species_list, # Now using the species_list + options=[], description="Select a species:", layout={"width": "max-content"}, style={"description_width": "initial"}, disabled=True, ) - self.save_button = Button( - description="Save selection", - icon="file-arrow-down", - disabled=True, - ) - - self.reset_button = Button( - description="Reset selection", - disabled=True, + description="Save selection", icon="file-arrow-down", disabled=True ) - + self.reset_button = Button(description="Reset selection", disabled=True) self.selection_output = Output() def _setup_callbacks(self): - """Set up all widget callbacks""" - self.combobox.observe(self._handle_fid_selection, names="value") + # Set up all widget callbacks + self.combobox.observe(self._handle_combobox_change) + self.peak_dropdown.observe(self._handle_peak_change) + self.species_dropdown.observe(self._handle_species_change) self.save_button.on_click(self._handle_save) self.reset_button.on_click(self._handle_reset) def _layout_widgets(self): - """Create widget layout and display""" + # Create widget layout and display self.container = VBox( [ self.title_label, @@ -2017,25 +1775,35 @@ def _layout_widgets(self): ) display(self.container) - def _handle_fid_selection(self, change): - """Handle FID selection from combobox""" - if change["type"] == "change" and change["name"] == "value": - # Find selected FID - self.selected_fid = next( - fid for fid in self.fids if fid.id == change["new"] - ) - - # Update available peaks - self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - self.peak_dropdown.options = self.available_peaks + def _handle_combobox_change(self, event): + # Enable the peak dropdown when a FID is selected + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option in self.combobox.options: + self.peak_dropdown.disabled = False + self.selected_fid = self.fid_array.get_fid(selected_option) + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] + self.peak_dropdown.options = self.available_peaks + if self.peak_dropdown.options: + self.peak_dropdown.value = self.peak_dropdown.options[0] - # Enable dropdowns - self.peak_dropdown.disabled = False + def _handle_peak_change(self, event): + # Format the species options for disply and enable the species + # dropdown when a peak is selected + if event["type"] == "change" and event["name"] == "value": self.species_dropdown.disabled = False + self.species_dropdown.options = [ + format_species_string(species) for species in self.available_species + ] + if self.species_dropdown.options: + self.species_dropdown.value = self.species_dropdown.options[0] + + def _handle_species_change(self, event): + # Enable the save button when a species is selected + if event["type"] == "change" and event["name"] == "value": self.save_button.disabled = False def _handle_save(self, b): - """Handle save button click""" with self.selection_output: self.selection_output.clear_output(wait=True) @@ -2048,46 +1816,54 @@ def _handle_save(self, b): self.selected_values[species].append(peak_value) # Update available peaks - self.available_peaks.remove(str(peak_value)) + self.available_peaks.remove(self.peak_dropdown.value) self.peak_dropdown.options = self.available_peaks if not self.available_peaks: self.peak_dropdown.disabled = True - self.save_button.disabled = True - # Update species array in all FIDs - for fid in self.fids: - try: - peak_idx = fid.peaks.tolist().index(peak_value) - fid.species[peak_idx] = species - except ValueError: - continue # Skip if peak not found in this FID + # Update FIDs + for species_id, peak_position in self.selected_values.items(): + for fid in self.fids: + self._update_fid(fid, peak_position, species_id) + # Print the selected values self._display_selections() + + # Re-enable the reset button self.reset_button.disabled = False def _handle_reset(self, b): - """Handle reset button click""" + # Reset the widget state with self.selection_output: self.selection_output.clear_output(wait=True) print("\nCleared selections!") - - # Reset state for all FIDs + # Reset FIDs' state for fid in self.fids: + fid._flags["assigned"] = False fid.species = numpy.empty(len(fid.peaks), dtype=object) - + for peak_object in fid.fid_object.peaks: + peak_object.species_id = None self.selected_values = {} + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - if self.selected_fid: - self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - self.peak_dropdown.options = self.available_peaks - self.peak_dropdown.disabled = False - self.save_button.disabled = False - + # Reset widgets + self.peak_dropdown.options = self.available_peaks + self.peak_dropdown.disabled = False self.reset_button.disabled = True + def _update_fid(self, fid, peak_position, species_id): + # Assign the species ID to the peak object and set the assigned + # flag to True. + for peak in fid.fid_object.peaks: + if peak.peak_position not in peak_position: + continue + peak.species_id = species_id.split(" ")[0] + fid.species[peak.peak_index] = peak.species_id + fid._flags["assigned"] = True + def _display_selections(self): - """Display current selections""" + # Display current selections print("\nSaved selections:") for key, value in self.selected_values.items(): print(f"{key}: {value}") diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 0968303..96a9ab7 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -125,7 +125,9 @@ def format_species_string(enzymeml_species) -> str: Returns: str: The formatted species string. """ - if enzymeml_species.name: + if isinstance(enzymeml_species, str): + return enzymeml_species + elif enzymeml_species.name: return f"{enzymeml_species.id} ({enzymeml_species.name})" else: return f"{enzymeml_species.id}" diff --git a/nmrpy/datamodel/schemes/datamodel_schema.md b/specifications/datamodel_schema.md similarity index 91% rename from nmrpy/datamodel/schemes/datamodel_schema.md rename to specifications/datamodel_schema.md index 9d10e79..93d3a5d 100644 --- a/nmrpy/datamodel/schemes/datamodel_schema.md +++ b/specifications/datamodel_schema.md @@ -5,10 +5,11 @@ classDiagram FIDObject *-- Parameters FIDObject *-- ProcessingSteps FIDObject *-- Peak - + Peak *-- PeakRange + class NMRpy { - +datetime datetime_created* - +datetime datetime_modified + +string datetime_created* + +string datetime_modified +Experiment experiment } @@ -61,6 +62,11 @@ classDiagram +float peak_integral +string species_id } + + class PeakRange { + +float start + +float end + } class FileFormats { << Enumeration >> diff --git a/specifications/nmrpy.md b/specifications/nmrpy.md index 2edbf70..624c631 100644 --- a/specifications/nmrpy.md +++ b/specifications/nmrpy.md @@ -1,6 +1,6 @@ # NMRpy data model -Python object model specifications based on the [software-driven-rdm](https://github.com/JR-1991/software-driven-rdm) Python library. The NMRpy data model is designed to store both raw and processed NMR data, as well as the parameters used for processing. As NMRpy is primarily used for the analysis of time-course data, often for determining (enzyme) kinetics, the data model is designed for maximum compatibility with the [EnzymeML](https://enzymeml.github.io/services/) standard, which provides a standardised data exchange format for kinetics data from biocatalysis, enzymology, and beyond. Therefore, relevant fields that are mandatory in the EnzymeML standard are also mandatory in this NMRpy data model. +Python object model specifications based on the [md-models](https://github.com/FAIRChemistry/md-models) Rust library. The NMRpy data model is designed to store both raw and processed NMR data, as well as the parameters used for processing. As NMRpy is primarily used for the analysis of time-course data, often for determining (enzyme) kinetics, the data model is designed for maximum compatibility with the [EnzymeML](https://enzymeml.github.io/services/) standard, which provides a standardised data exchange format for kinetics data from biocatalysis, enzymology, and beyond. Therefore, relevant fields that are mandatory in the EnzymeML standard are also mandatory in this NMRpy data model. ## Core objects @@ -9,10 +9,10 @@ Python object model specifications based on the [software-driven-rdm](https://gi Root element of the NMRpy data model. Following the specifications of the EnzymeML standard, the `datetime_created` field is mandatory. Since each NMRpy instance is meant to hold a single experiment (e.g., one time-course), the data model reflects this by only allowing a single `experiment` object. - __datetime_created__ - - Type: datetime + - Type: string - Description: Date and time this dataset has been created. - datetime_modified - - Type: datetime + - Type: string - Description: Date and time this dataset has last been modified. - experiment - Type: [Experiment](#experiment) @@ -39,7 +39,7 @@ Container for a single NMR spectrum, containing both raw data with relevant inst - Description: Complex spectral data from numpy array as string of format `{array.real}+{array.imag}j`. - Multiple: True - processed_data - - Type: string,float + - Type: string, float - Description: Processed data array. - Multiple: True - nmr_parameters @@ -146,13 +146,13 @@ Container for processing steps performed, as well as parameter for them. Process Container for a single peak in the NMR spectrum, associated with a species from an EnzymeML document. To ensure unambiguity of every peak, the `peak_index` field (counted from left to right in the NMR spectrum) is mandatory. Species from EnzymeML are identified by their `species_id` as found in the EnzymeML document. - __peak_index__ - - Type: int + - Type: integer - Description: Index of the peak in the NMR spectrum, counted from left to right. - peak_position - Type: float - Description: Position of the peak in the NMR spectrum. - peak_range - - Type: {start: float, end: float} + - Type: [PeakRange](#peakrange) - Description: Range of the peak, given as a start and end value. - peak_integral - Type: float @@ -161,6 +161,17 @@ Container for a single peak in the NMR spectrum, associated with a species from - Type: string - Description: ID of an EnzymeML species. +### PeakRange + +Container for the peak range of one peak. + +- __start__ + - Type: float + - Description: Start value of the peak range. +- __end__ + - Type: float + - Description: End value of the peak range. + ## Enumerations ### FileFormats From 58e94babd8fe85741613018ad1825a9ffe9ab144 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 20 Jan 2025 15:46:25 +0100 Subject: [PATCH 26/54] Temporarily remove conc. calculation - Remove features from FidArray.calculate_concentrations() and raise NotImplementedError when called - Remove features from ConcentraionCalculator and raise NotImplementedError when initialised --- nmrpy/data_objects.py | 6 +- nmrpy/plotting.py | 157 +----------------------------------------- 2 files changed, 6 insertions(+), 157 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 691ab03..d1fa041 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -2422,9 +2422,9 @@ def clear_peaks(self): for fid in self.get_fids(): fid.species = None - def calculate_concentrations(self): # TODO: Write docstring - self._concentration_widget = ConcentrationCalculator( - fid_array=self, enzymeml_document=self.enzymeml_document + def calculate_concentrations(self): + raise NotImplementedError( + "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 626e3b5..3c58629 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -2367,162 +2367,11 @@ def assign(self): class ConcentrationCalculator: - def __init__(self, fid_array, enzymeml_document): - self.fid_array = fid_array - self.enzymeml_document = enzymeml_document - self.available_species = get_ordered_list_of_species_names( - self.fid_array.get_fids()[0] - ) - self.c_n = sp.symbols("c_n") - self.x_s = sp.symbols("x_s") - self.x_n = sp.symbols("x_n") - self.c_n_value = float("nan") - self.x_s_value = float("nan") - self.x_n_value = float("nan") - - # Create the label widget for the title - title_label = Label( - value="Calculate concentrations from peak integrals for all FIDs" - ) - - # Create the dropdown widget for the internal standard - standard_dropdown = Dropdown( - options=self.available_species, - description="Select the internal standard:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - - # Create a text input widget for the concentration equation - concentration_equation = Text( - value="", - placeholder="Enter the equation for the concentration here", - description="Concentration equation: c_s =", - layout={"width": "auto"}, - style={ - "description_width": "initial", - }, - ) - - # Create an HTML widget to display a legend for the concentration equation - legend_html = HTML( - value="'c_s': Concentration of species
'c_n': Concentration of internal standard
'x_s': Peak integral of species
'x_n': Peak integral of internal standard
Example: c_s = c_n * x_s / x_n", - description="Legend:", - ) - - # Create a button to calculate the concentrations - calculate_button = Button( - description="Calculate concentrations", - icon="calculator", - layout={"width": "max-content"}, - disabled=True, - ) - - # Create an output widget to display the calculation progress - output = Output() - - # Define a method to handle the text input widget's change event - def on_text_change(event): - if event["type"] == "change" and event["name"] == "value": - calculate_button.disabled = False - - # Attach the method to the text input widget's change event - concentration_equation.observe(on_text_change) - - # Define a method to handle the calculate button's click event - def on_calculate_button_click(b): - with output: - output.clear_output(wait=True) - # Fetch the values from the standard dropdown and the - # text widget and add them to a dictionary with species as - # keys - print("\nCalculating concentrations...") - - equation = sp.sympify(concentration_equation.value) - print(f"`equation` is {equation}.") - # Create a dictionary to store the concentrations for each species - self.fid_array.concentrations = { - species: [] for species in self.available_species - } - - for fid in self.fid_array.get_fids(): - # Get data from the internal standard using next() - standard = next( - ( - identity - for identity in fid.fid_object.peak_identities - if identity.name == standard_dropdown.value - ), - None, - ) - # Extract the initial concentration of the standard from the EnzymeML document by its ID - self.c_n_value = get_initial_concentration_by_species_id( - enzymeml_document=self.enzymeml_document, - species_id=standard.species_id, - ) - # Set the peak integral values for the standard - self.x_n_value = sum(standard.associated_integrals) - self.x_s_value = self.x_n_value - # Calculate the concentration of the standard and append it to the list of concentrations - concentration = equation.subs( - { - self.c_n: self.c_n_value, - self.x_s: self.x_s_value, - self.x_n: self.x_n_value, - } - ) - current_concentration = concentration.evalf() - # print(f"adding {current_concentration} to {standard.name}.") - self.fid_array.concentrations[standard.name].append( - current_concentration - ) - # print( - # f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." - # ) - - # Iterate over all identities but the standard - for identity in fid.fid_object.peak_identities: - if identity.name == standard_dropdown.value: - pass - # Calculate the peak integral value for the species - self.x_s_value = sum(identity.associated_integrals) - # Calculate the concentration of the species and append it to the list of concentrations - concentration = equation.subs( - { - self.c_n: self.c_n_value, - self.x_s: self.x_s_value, - self.x_n: self.x_n_value, - } - ) - current_concentration = concentration.evalf() - # print(f"adding {current_concentration} to {identity.name}.") - self.fid_array.concentrations[identity.name].append( - current_concentration - ) - # print( - # f"`self.fid_array.concentrations` is {self.fid_array.concentrations}." - # ) - - print(f"Done! Get concentrations with `FidArray.concentrations`.") - - # Attach the function to the calculate button's click event - calculate_button.on_click(on_calculate_button_click) - - # Create the container - container = VBox( - [ - title_label, - standard_dropdown, - concentration_equation, - legend_html, - calculate_button, - output, - ] + def __init__(self): + raise NotImplementedError( + "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) - # Display the container - display(container) - if __name__ == "__main__": pass From 037e60f358706b50a486e664a2c037f28c8a2f1f Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 28 Jan 2025 16:01:20 +0100 Subject: [PATCH 27/54] Revert linting changes --- nmrpy/data_objects.py | 1409 +++++++++++----------- nmrpy/plotting.py | 2570 +++++++++++++++++++---------------------- 2 files changed, 1848 insertions(+), 2131 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index d1fa041..250a122 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1,4 +1,3 @@ -from pathlib import Path import numpy import scipy from matplotlib import pyplot @@ -10,7 +9,7 @@ from nmrpy.plotting import * import os import pickle -from ipywidgets import SelectMultiple + from nmrpy.nmrpy_model import ( NMRpy, Experiment, @@ -20,27 +19,25 @@ Peak, PeakRange, ) -from nmrpy.utils import create_enzymeml +from nmrpy.utils import create_enzymeml, get_species_from_enzymeml import pyenzyme as pe -import pyenzyme.equations as peq from pyenzyme.model import EnzymeMLDocument -from typing import List, Union -class Base: +class Base(): _complex_dtypes = [ - numpy.dtype("csingle"), - numpy.dtype("cdouble"), - numpy.dtype("clongdouble"), - ] + numpy.dtype('csingle'), + numpy.dtype('cdouble'), + numpy.dtype('clongdouble'), + ] - _file_formats = ["varian", "bruker", None] + _file_formats = ['varian', 'bruker', None] def __init__(self, *args, **kwargs): - self.id = kwargs.get("id", None) - self._procpar = kwargs.get("procpar", None) + self.id = kwargs.get('id', None) + self._procpar = kwargs.get('procpar', None) self._params = None - self.fid_path = kwargs.get("fid_path", ".") + self.fid_path = kwargs.get('fid_path', '.') self._file_format = None @property @@ -52,8 +49,8 @@ def id(self, id): if isinstance(id, str) or id is None: self.__id = id else: - raise AttributeError("ID must be a string or None.") - + raise AttributeError('ID must be a string or None.') + @property def fid_path(self): return self.__fid_path @@ -63,7 +60,7 @@ def fid_path(self, fid_path): if isinstance(fid_path, str): self.__fid_path = fid_path else: - raise AttributeError("fid_path must be a string.") + raise AttributeError('fid_path must be a string.') @property def _file_format(self): @@ -107,12 +104,12 @@ def _procpar(self): @_procpar.setter def _procpar(self, procpar): if procpar is None: - self.__procpar = procpar + self.__procpar = procpar elif isinstance(procpar, dict): - self.__procpar = procpar + self.__procpar = procpar self._params = self._extract_procpar(procpar) else: - raise AttributeError("procpar must be a dictionary or None.") + raise AttributeError('procpar must be a dictionary or None.') @property def _params(self): @@ -123,16 +120,16 @@ def _params(self, params): if isinstance(params, dict) or params is None: self.__params = params else: - raise AttributeError("params must be a dictionary or None.") + raise AttributeError('params must be a dictionary or None.') - # processing + #processing def _extract_procpar(self, procpar): - if self._file_format == "bruker": + if self._file_format == 'bruker': return self._extract_procpar_bruker(procpar) - elif self._file_format == "varian": + elif self._file_format == 'varian': return self._extract_procpar_varian(procpar) - # else: - # raise AttributeError('Could not parse procpar.') + #else: + # raise AttributeError('Could not parse procpar.') @staticmethod def _extract_procpar_varian(procpar): @@ -178,20 +175,20 @@ def _extract_procpar_varian(procpar): return params @staticmethod - def _extract_procpar_bruker(procpar): + def _extract_procpar_bruker(procpar): """ Extract some commonly-used NMR parameters (using Bruker denotations) and return a parameter dictionary 'params'. """ - d1 = procpar["acqus"]["D"][1] - reffrq = procpar["acqus"]["SFO1"] - nt = procpar["acqus"]["NS"] - sw_hz = procpar["acqus"]["SW_h"] - sw = procpar["acqus"]["SW"] + d1 = procpar['acqus']['D'][1] + reffrq = procpar['acqus']['SFO1'] + nt = procpar['acqus']['NS'] + sw_hz = procpar['acqus']['SW_h'] + sw = procpar['acqus']['SW'] # lefthand offset of the processed data in ppm - if "procs" in procpar: - sfrq = procpar["procs"]["SF"] - sw_left = procpar["procs"]["OFFSET"] + if 'procs' in procpar: + sfrq = procpar['procs']['SF'] + sw_left = procpar['procs']['OFFSET'] else: sfrq = procpar['acqus']['BF1'] sw_left = (0.5+1e6*(sfrq-reffrq)/sw_hz)*sw_hz/sfrq @@ -220,27 +217,21 @@ def _extract_procpar_bruker(procpar): sfrq=sfrq, reffrq=reffrq, sw_left=sw_left, - ) + ) return params - class Fid(Base): - """ + ''' The basic FID (Free Induction Decay) class contains all the data for a single spectrum (:attr:`~nmrpy.data_objects.Fid.data`), and the necessary methods to process these data. - """ + ''' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.data = kwargs.get("data", []) + self.data = kwargs.get('data', []) self.peaks = None self.ranges = None self.species = None - self._deconvoluted_peaks = None - self._flags = { - "ft": False, - "assigned": False, - } self.fid_object = FIDObject( raw_data=[(str(datum)) for datum in self.data], processed_data=[], @@ -248,34 +239,14 @@ def __init__(self, *args, **kwargs): processing_steps=ProcessingSteps(), ) self.enzymeml_species = None + self._deconvoluted_peaks = None + self._flags = { + "ft": False, + "assigned": False, + } def __str__(self): - return "FID: %s (%i data)" % (self.id, len(self.data)) - - @property - def fid_object(self): - return self.__fid_object - - @fid_object.setter - def fid_object(self, fid_object): - if isinstance(fid_object, FIDObject): - self.__fid_object = fid_object - - @property - def processing_steps(self): - return self.__processing_steps - - @processing_steps.setter - def processing_steps(self, processing_steps): - raise PermissionError("Forbidden!") - - @property - def enzymeml_species(self): - return self.__enzymeml_species - - @enzymeml_species.setter - def enzymeml_species(self, enzymeml_species): - self.__enzymeml_species = enzymeml_species + return 'FID: %s (%i data)'%(self.id, len(self.data)) @property def data(self): @@ -283,8 +254,8 @@ def data(self): The spectral data. This is the primary object upon which the processing and analysis functions work. """ return self.__data - - @data.setter + + @data.setter def data(self, data): if Fid._is_valid_dataset(data): self.__data = numpy.array(data) @@ -295,11 +266,7 @@ def _ppm(self): Index of :attr:`~nmrpy.data_objects.Fid.data` in ppm (parts per million). """ if self._params is not None and self.data is not None: - return numpy.linspace( - self._params["sw_left"] - self._params["sw"], - self._params["sw_left"], - len(self.data), - )[::-1] + return numpy.linspace(self._params['sw_left']-self._params['sw'], self._params['sw_left'], len(self.data))[::-1] else: return None @@ -309,14 +276,14 @@ def peaks(self): Picked peaks for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._peaks - - @peaks.setter + + @peaks.setter def peaks(self, peaks): if peaks is not None: if not Fid._is_flat_iter(peaks): - raise AttributeError("peaks must be a flat iterable") + raise AttributeError('peaks must be a flat iterable') if not all(isinstance(i, numbers.Number) for i in peaks): - raise AttributeError("peaks must be numbers") + raise AttributeError('peaks must be numbers') self._peaks = numpy.array(peaks) else: self._peaks = peaks @@ -327,22 +294,20 @@ def ranges(self): Picked ranges for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. """ return self._ranges - - @ranges.setter + + @ranges.setter def ranges(self, ranges): if ranges is None: self._ranges = None return if not Fid._is_iter_of_iters(ranges) or ranges is None: - raise AttributeError("ranges must be an iterable of iterables or None") + raise AttributeError('ranges must be an iterable of iterables or None') ranges = numpy.array(ranges) if ranges.shape[1] != 2: - raise AttributeError( - "ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]" - ) + raise AttributeError('ranges must be an iterable of 2-length iterables or an empty iterables e.g. [[]]') for r in ranges: if not all(isinstance(i, numbers.Number) for i in r): - raise AttributeError("ranges must be numbers") + raise AttributeError('ranges must be numbers') self._ranges = ranges @property @@ -359,23 +324,40 @@ def species(self, species): return if species is not None: if not all((i is None) or isinstance(i, str) for i in species): - raise AttributeError("species must be strings") + raise AttributeError('species must be strings') if not len(species) == len(self.peaks): - raise AttributeError("species must have the same length as peaks") + raise AttributeError('species must have the same length as peaks') self._species = numpy.array(species, dtype=object) + @property + def fid_object(self): + return self.__fid_object + + @fid_object.setter + def fid_object(self, fid_object): + if isinstance(fid_object, FIDObject): + self.__fid_object = fid_object + + @property + def enzymeml_species(self): + return self.__enzymeml_species + + @enzymeml_species.setter + def enzymeml_species(self, enzymeml_species): + self.__enzymeml_species = enzymeml_species + @property def _bl_ppm(self): return self.__bl_ppm - - @_bl_ppm.setter + + @_bl_ppm.setter def _bl_ppm(self, bl_ppm): if bl_ppm is not None: if not Fid._is_flat_iter(bl_ppm): - raise AttributeError("baseline indices must be a flat iterable") + raise AttributeError('baseline indices must be a flat iterable') if len(bl_ppm) > 0: if not all(isinstance(i, numbers.Number) for i in bl_ppm): - raise AttributeError("baseline indices must be numbers") + raise AttributeError('baseline indices must be numbers') self.__bl_ppm = numpy.sort(list(set(bl_ppm)))[::-1] else: self.__bl_ppm = None @@ -385,26 +367,21 @@ def _bl_ppm(self, bl_ppm): @property def _bl_indices(self): if self._bl_ppm is not None: - return self._conv_to_index( - self.data, - self._bl_ppm, - self._params["sw_left"], - self._params["sw"], - ) + return self._conv_to_index(self.data, self._bl_ppm, self._params['sw_left'], self._params['sw']) else: return None @property def _bl_poly(self): return self.__bl_poly - - @_bl_poly.setter + + @_bl_poly.setter def _bl_poly(self, bl_poly): if bl_poly is not None: if not Fid._is_flat_iter(bl_poly): - raise AttributeError("baseline polynomial must be a flat iterable") + raise AttributeError('baseline polynomial must be a flat iterable') if not all(isinstance(i, numbers.Number) for i in bl_poly): - raise AttributeError("baseline polynomial must be numbers") + raise AttributeError('baseline polynomial must be numbers') self.__bl_poly = numpy.array(bl_poly) else: self.__bl_ppm = bl_poly @@ -415,14 +392,9 @@ def _index_peaks(self): :attr:`~nmrpy.data_objects.Fid.peaks` converted to indices rather than ppm """ if self.peaks is not None: - return self._conv_to_index( - self.data, - self.peaks, - self._params["sw_left"], - self._params["sw"], - ) + return self._conv_to_index(self.data, self.peaks, self._params['sw_left'], self._params['sw']) else: - return [] + return [] @property def _index_ranges(self): @@ -431,15 +403,10 @@ def _index_ranges(self): """ if self.ranges is not None: shp = self.ranges.shape - index_ranges = self._conv_to_index( - self.data, - self.ranges.flatten(), - self._params["sw_left"], - self._params["sw"], - ) + index_ranges = self._conv_to_index(self.data, self.ranges.flatten(), self._params['sw_left'], self._params['sw']) return index_ranges.reshape(shp) else: - return [] + return [] @property def _grouped_peaklist(self): @@ -447,37 +414,18 @@ def _grouped_peaklist(self): :attr:`~nmrpy.data_objects.Fid.peaks` grouped according to :attr:`~nmrpy.data_objects.Fid.ranges` """ if self.ranges is not None: - return numpy.array( - [ - [ - peak - for peak in self.peaks - if peak > min(peak_range) and peak < max(peak_range) - ] - for peak_range in self.ranges - ], - dtype=object, - ) + return numpy.array([[peak for peak in self.peaks if peak > min(peak_range) and peak < max(peak_range)] + for peak_range in self.ranges], dtype=object) else: return [] - @property def _grouped_index_peaklist(self): """ :attr:`~nmrpy.data_objects.Fid._index_peaks` grouped according to :attr:`~nmrpy.data_objects.Fid._index_ranges` """ if self._index_ranges is not None: - return numpy.array( - [ - [ - peak - for peak in self._index_peaks - if peak > min(peak_range) and peak < max(peak_range) - ] - for peak_range in self._index_ranges - ], - dtype=object, - ) + return numpy.array([[peak for peak in self._index_peaks if peak > min(peak_range) and peak < max(peak_range)] + for peak_range in self._index_ranges], dtype=object) else: return [] @@ -489,17 +437,17 @@ def _deconvoluted_peaks(self): def _deconvoluted_peaks(self, deconvoluted_peaks): """This is a list of lists of peak parameters with the order [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss]: - offset: spectral offset + offset: spectral offset - gauss_sigma: Gaussian sigma + gauss_sigma: Gaussian sigma - lorentz_hwhm: Lorentzian half-width-at-half-maximum + lorentz_hwhm: Lorentzian half-width-at-half-maximum - amplitude: height of peak + amplitude: height of peak - frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) - """ - self.__deconvoluted_peaks = deconvoluted_peaks + frac_gauss: fraction of peak to be Gaussian (Lorentzian fraction is 1-frac_gauss) + """ + self.__deconvoluted_peaks = deconvoluted_peaks @property def deconvoluted_integrals(self): @@ -508,36 +456,22 @@ def deconvoluted_integrals(self): """ if self._deconvoluted_peaks is not None: integrals = [] - i = 0 - for peak in self._deconvoluted_peaks: - int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) - int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) - integrals.append(int_gauss + int_lorentz) - print(f"Peak {i} integral: {integrals[i]}") - - for peak_object in self.fid_object.peaks: - print(f"Peak object: {peak_object.peak_index}") - if i == peak_object.peak_index: - if integrals[i] == peak_object.peak_integral: - print( - f"Integral {integrals[i]} already stored in peak object" - ) - pass - peak_object.peak_integral = float(integrals[i]) - print(f"Added integral {integrals[i]} to peak object") - i += 1 - print(f"incremented i to {i}") + for i, peak in enumerate(self._deconvoluted_peaks): + int_gauss = peak[-1]*Fid._f_gauss_int(peak[3], peak[1]) + int_lorentz = (1-peak[-1])*Fid._f_lorentz_int(peak[3], peak[2]) + integral = int_gauss+int_lorentz + integrals.append(integral) + # Update data model + peak_object = self.fid_object.peaks[i] + if peak_object.peak_integral != integral: + peak_object.peak_integral = float(integral) return integrals - + def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. """ - plots = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Plot) - ] + plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] return plots def _del_plots(self): @@ -553,12 +487,12 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.Fid`. """ widgets = [ - id - for id in sorted(self.__dict__) + id for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], Calibrator) or isinstance(self.__dict__[id], DataPeakSelector) or isinstance(self.__dict__[id], FidRangeSelector) + or isinstance(self.__dict__[id], PeakAssigner) ] return widgets @@ -573,14 +507,15 @@ def _del_widgets(self): @classmethod def _is_valid_dataset(cls, data): if isinstance(data, str): - raise TypeError("Data must be iterable not a string.") + raise TypeError('Data must be iterable not a string.') if not cls._is_iter(data): - raise TypeError("Data must be an iterable.") + raise TypeError('Data must be an iterable.') if not cls._is_flat_iter(data): - raise TypeError("Data must not be nested.") + raise TypeError('Data must not be nested.') if not all(isinstance(i, numbers.Number) for i in data): - raise TypeError("Data must consist of numbers only.") - return True + raise TypeError('Data must consist of numbers only.') + return True + @classmethod def from_data(cls, data): @@ -589,11 +524,11 @@ def from_data(cls, data): Instantiate a new :class:`~nmrpy.data_objects.Fid` object by providing a spectral data object as argument. Eg. :: - fid = Fid.from_data(data) + fid = Fid.from_data(data) """ new_instance = cls() new_instance.data = data - return new_instance + return new_instance def zf(self): """ @@ -605,7 +540,8 @@ def zf(self): in an artificially increased resolution once Fourier-transformed. """ - self.data = numpy.append(self.data, 0 * self.data) + self.data = numpy.append(self.data, 0*self.data) + # Update data model self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_zero_filled = True @@ -618,12 +554,8 @@ def emhz(self, lb=5.0): :keyword lb: degree of line-broadening in Hz. """ - self.data = ( - numpy.exp( - -numpy.pi * numpy.arange(len(self.data)) * (lb / self._params["sw_hz"]) - ) - * self.data - ) + self.data = numpy.exp(-numpy.pi*numpy.arange(len(self.data)) * (lb/self._params['sw_hz'])) * self.data + # Update data model self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_apodised = True self.fid_object.processing_steps.apodisation_frequency = lb @@ -633,9 +565,10 @@ def real(self): Discard imaginary component of :attr:`~nmrpy.data_objects.Fid.data`. """ self.data = numpy.real(self.data) + # Update data model self.fid_object.processed_data = [float(datum) for datum in self.data] self.fid_object.processing_steps.is_only_real = True - + # GENERAL FUNCTIONS def ft(self): """ @@ -647,15 +580,16 @@ def ft(self): series,' Math. Comput. 19: 297-301.*) """ - if self._flags["ft"]: - raise ValueError("Data have already been Fourier Transformed.") + if self._flags['ft']: + raise ValueError('Data have already been Fourier Transformed.') if Fid._is_valid_dataset(self.data): list_params = (self.data, self._file_format) self.data = Fid._ft(list_params) - self._flags["ft"] = True + self._flags['ft'] = True + # Update data model self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_fourier_transformed = True - self.fid_object.processing_steps.fourier_transform_type = "FFT" + self.fid_object.processing_steps.fourier_transform_type = 'FFT' @classmethod def _ft(cls, list_params): @@ -664,34 +598,31 @@ def _ft(cls, list_params): list_params is a tuple of (, ). """ if len(list_params) != 2: - raise ValueError( - "Wrong number of parameters. list_params must contain [, ]" - ) + raise ValueError('Wrong number of parameters. list_params must contain [, ]') data, file_format = list_params if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) s = len(data) - if file_format == "varian" or file_format == None: - ft_data = numpy.append(data[int(s / 2.0) :], data[: int(s / 2.0)]) - if file_format == "bruker": - ft_data = numpy.append( - data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] - ) + if file_format == 'varian' or file_format == None: + ft_data = numpy.append(data[int(s / 2.0):], data[: int(s / 2.0)]) + if file_format == 'bruker': + ft_data = numpy.append(data[int(s / 2.0):: -1], data[s: int(s / 2.0): -1]) return ft_data + @staticmethod def _conv_to_ppm(data, index, sw_left, sw): - """ - Convert index array to ppm. - """ - if isinstance(index, list): - index = numpy.array(index) - frc_sw = index / float(len(data)) - ppm = sw_left - sw * frc_sw - if Fid._is_iter(ppm): - return numpy.array([round(i, 2) for i in ppm]) - else: - return round(ppm, 2) + """ + Convert index array to ppm. + """ + if isinstance(index, list): + index = numpy.array(index) + frc_sw = index/float(len(data)) + ppm = sw_left-sw*frc_sw + if Fid._is_iter(ppm): + return numpy.array([round(i, 2) for i in ppm]) + else: + return round(ppm, 2) @staticmethod def _conv_to_index(data, ppm, sw_left, sw): @@ -714,20 +645,17 @@ def _conv_to_index(data, ppm, sw_left, sw): def phase_correct(self, method='leastsq', verbose = True): """ - Automatically phase-correct :attr:`~nmrpy.data_objects.Fid.data` by minimising - total absolute area. + :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: + + Nelder-Mead (nelder) - :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - - Nelder-Mead (nelder) - - L-BFGS-B (l-bfgs-b) + L-BFGS-B (l-bfgs-b) - Conjugate Gradient (cg) + Conjugate Gradient (cg) - Powell (powell) + Powell (powell) - Newton-CG (newton) + Newton-CG (newton) :keyword verbose: prints out phase angles if True (default) """ @@ -765,49 +693,50 @@ def _phase_correct(cls, list_params): @classmethod def _phased_data_sum(cls, pars, data): - err = Fid._ps(data, p0=pars["p0"].value, p1=pars["p1"].value).real - return numpy.array([abs(err).sum()] * 2) + err = Fid._ps(data, p0=pars['p0'].value, p1=pars['p1'].value).real + return numpy.array([abs(err).sum()]*2) @classmethod def _ps(cls, data, p0=0.0, p1=0.0): - """ - Linear phase correction - - :keyword p0: Zero order phase in degrees. - - :keyword p1: First order phase in degrees. + """ + Linear phase correction + + :keyword p0: Zero order phase in degrees. + + :keyword p1: First order phase in degrees. - """ - if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError("p0 and p1 must be floats or ints.") - if not data.dtype in Fid._complex_dtypes: - raise TypeError("data must be complex.") - # convert to radians - p0 = p0 * numpy.pi / 180.0 - p1 = p1 * numpy.pi / 180.0 - size = len(data) - ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) - return ph * data + """ + if not all(isinstance(i, (float, int)) for i in [p0, p1]): + raise TypeError('p0 and p1 must be floats or ints.') + if not data.dtype in Fid._complex_dtypes: + raise TypeError('data must be complex.') + # convert to radians + p0 = p0*numpy.pi/180.0 + p1 = p1*numpy.pi/180.0 + size = len(data) + ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) + return ph*data def ps(self, p0=0.0, p1=0.0): """ Linear phase correction of :attr:`~nmrpy.data_objects.Fid.data` - + :keyword p0: Zero order phase in degrees :keyword p1: First order phase in degrees - + """ if not all(isinstance(i, (float, int)) for i in [p0, p1]): - raise TypeError("p0 and p1 must be floats or ints.") + raise TypeError('p0 and p1 must be floats or ints.') if not self.data.dtype in self._complex_dtypes: - raise TypeError("data must be complex.") + raise TypeError('data must be complex.') # convert to radians - p0 = p0 * numpy.pi / 180.0 - p1 = p1 * numpy.pi / 180.0 + p0 = p0*numpy.pi/180.0 + p1 = p1*numpy.pi/180.0 size = len(self.data) - ph = numpy.exp(1.0j * (p0 + (p1 * numpy.arange(size) / size))) - self.data = ph * self.data + ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) + self.data = ph*self.data + # Update data model self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_phased = True self.fid_object.processing_steps.zero_order_phase = p0 @@ -818,29 +747,29 @@ def phaser(self): Instantiate a phase-correction GUI widget which applies to :attr:`~nmrpy.data_objects.Fid.data`. """ if not len(self.data): - raise AttributeError("data does not exist.") + raise AttributeError('data does not exist.') if self.data.dtype not in self._complex_dtypes: - raise TypeError("data must be complex.") + raise TypeError('data must be complex.') if not Fid._is_flat_iter(self.data): - raise AttributeError("data must be 1 dimensional.") + raise AttributeError('data must be 1 dimensional.') global _phaser_widget self._phaser_widget = Phaser(self) def calibrate(self): """ - Instantiate a GUI widget to select a peak and calibrate spectrum. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate spectrum. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration. """ - plot_label = """ + plot_label = \ +''' Left - select peak -""" +''' plot_title = "Calibration {}".format(self.id) - self._calibrate_widget = Calibrator( - self, - title=plot_title, - label=plot_label, - ) + self._calibrate_widget = Calibrator(self, + title=plot_title, + label=plot_label, + ) def baseline_correct(self, deg=2): """ @@ -849,22 +778,20 @@ def baseline_correct(self, deg=2): (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) with polynomial of specified degree (stored in :attr:`~nmrpy.data_objects.Fid._bl_ppm`) and subtract this polynomial from :attr:`~nmrpy.data_objects.Fid.data`. - + :keyword deg: degree of fitted polynomial """ if self._bl_indices is None: - raise AttributeError( - "No points selected for baseline correction. Run fid.baseliner()" - ) + raise AttributeError('No points selected for baseline correction. Run fid.baseliner()') if not len(self.data): - raise AttributeError("data does not exist.") + raise AttributeError('data does not exist.') if self.data.dtype in self._complex_dtypes: - raise TypeError("data must not be complex.") + raise TypeError('data must not be complex.') if not Fid._is_flat_iter(self.data): - raise AttributeError("data must be 1 dimensional.") - + raise AttributeError('data must be 1 dimensional.') + data = self.data x = numpy.arange(len(data)) m = numpy.ones_like(x) @@ -875,24 +802,23 @@ def baseline_correct(self, deg=2): p = numpy.ma.polyfit(xm, ym, deg) yp = numpy.polyval(p, x) self._bl_poly = yp - data_bl = data - yp + data_bl = data-yp self.data = numpy.array(data_bl) - self.fid_object.processed_data = [float(datum) for datum in self.data] + # Update data model + self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_baseline_corrected = True def peakpick(self, thresh=0.1): - """ + """ Attempt to automatically identify peaks. Picked peaks are assigned to :attr:`~nmrpy.data_objects.Fid.peaks`. :keyword thresh: fractional threshold for peak-picking """ - peaks_ind = nmrglue.peakpick.pick(self.data, thresh * self.data.max()) + peaks_ind = nmrglue.peakpick.pick(self.data, thresh*self.data.max()) peaks_ind = [i[0] for i in peaks_ind] - peaks_ppm = Fid._conv_to_ppm( - self.data, peaks_ind, self._params["sw_left"], self._params["sw"] - ) + peaks_ppm = Fid._conv_to_ppm(self.data, peaks_ind, self._params['sw_left'], self._params['sw']) self.peaks = peaks_ppm print(self.peaks) @@ -906,19 +832,19 @@ def peakpicker(self): :meth:`~nmrpy.data_objects.Fid.deconv`). """ - plot_label = """ + plot_label = \ +''' Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -""" +''' plot_title = "Peak-picking {}".format(self.id) - self._peakpicker_widget = DataPeakSelector( - self, - title=plot_title, - label=plot_label, - ) + self._peakpicker_widget = DataPeakSelector(self, + title=plot_title, + label=plot_label, + ) def clear_peaks(self): """ @@ -947,56 +873,48 @@ def baseliner(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = """ + plot_label = \ +''' Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -""" +''' plot_title = "Baseline correction {}".format(self.id) - self._baseliner_widget = FidRangeSelector( - self, - title=plot_title, - label=plot_label, - ) - + self._baseliner_widget = FidRangeSelector(self, + title=plot_title, + label=plot_label, + ) + @classmethod def _f_gauss(cls, offset, amplitude, gauss_sigma, x): - return amplitude * numpy.exp(-((offset - x) ** 2.0) / (2.0 * gauss_sigma**2.0)) - + return amplitude*numpy.exp(-((offset-x)**2.0)/(2.0*gauss_sigma**2.0)) + @classmethod def _f_lorentz(cls, offset, amplitude, lorentz_hwhm, x): - # return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) - return amplitude * lorentz_hwhm**2.0 / (lorentz_hwhm**2.0 + (x - offset) ** 2.0) + #return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+4.0*(offset-x)**2.0) + return amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-offset)**2.0) @classmethod def _f_gauss_int(cls, amplitude, gauss_sigma): - return amplitude * numpy.sqrt(2.0 * numpy.pi * gauss_sigma**2.0) + return amplitude*numpy.sqrt(2.0*numpy.pi*gauss_sigma**2.0) @classmethod def _f_lorentz_int(cls, amplitude, lorentz_hwhm): - # empirical integral commented out - # x = numpy.arange(1000*lorentz_hwhm) - # return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) - # this integral forumula from http://magicplot.com/wiki/fit_equations - return amplitude * lorentz_hwhm * numpy.pi + #empirical integral commented out + #x = numpy.arange(1000*lorentz_hwhm) + #return numpy.sum(amplitude*lorentz_hwhm**2.0/(lorentz_hwhm**2.0+(x-len(x)/2)**2.0)) + #this integral forumula from http://magicplot.com/wiki/fit_equations + return amplitude*lorentz_hwhm*numpy.pi @classmethod - def _f_pk( - cls, - x, - offset=0.0, - gauss_sigma=1.0, - lorentz_hwhm=1.0, - amplitude=1.0, - frac_gauss=0.0, - ): + def _f_pk(cls, x, offset=0.0, gauss_sigma=1.0, lorentz_hwhm=1.0, amplitude=1.0, frac_gauss=0.0): """ Return the a combined Gaussian/Lorentzian peakshape for deconvolution of :attr:`~nmrpy.data_objects.Fid.data`. - + :arg x: array of equal length to :attr:`~nmrpy.data_objects.Fid.data` - + :keyword offset: spectral offset in x @@ -1008,37 +926,39 @@ def _f_pk( :keyword frac_gauss: fraction of function to be Gaussian (0 -> 1). Note: specifying a Gaussian fraction of 0 will produce a pure Lorentzian and vice - versa.""" - - # validation + versa. """ + + #validation parameters = [offset, gauss_sigma, lorentz_hwhm, amplitude, frac_gauss] if not all(isinstance(i, numbers.Number) for i in parameters): - raise TypeError("Keyword parameters must be numbers.") + raise TypeError('Keyword parameters must be numbers.') if not cls._is_iter(x): - raise TypeError("x must be an iterable") + raise TypeError('x must be an iterable') if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) if frac_gauss > 1.0: frac_gauss = 1.0 if frac_gauss < 0.0: frac_gauss = 0.0 - + gauss_peak = cls._f_gauss(offset, amplitude, gauss_sigma, x) lorentz_peak = cls._f_lorentz(offset, amplitude, lorentz_hwhm, x) - peak = frac_gauss * gauss_peak + (1 - frac_gauss) * lorentz_peak - + peak = frac_gauss*gauss_peak + (1-frac_gauss)*lorentz_peak + return peak + + @classmethod def _f_makep(cls, data, peaks, frac_gauss=None): """ Make a set of initial peak parameters for deconvolution. - + :arg data: data to be fitted :arg peaks: selected peak positions (see peakpicker()) - + :returns: an array of peaks, each consisting of the following parameters: spectral offset (x) @@ -1052,15 +972,15 @@ def _f_makep(cls, data, peaks, frac_gauss=None): frac_gauss: fraction of function to be Gaussian (0 -> 1) """ if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") + raise TypeError('data must be a flat iterable') if not cls._is_flat_iter(peaks): - raise TypeError("peaks must be a flat iterable") + raise TypeError('peaks must be a flat iterable') if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + p = [] for i in peaks: - pamp = 0.9 * abs(data[int(i)]) + pamp = 0.9*abs(data[int(i)]) single_peak = [i, 10, 0.1, pamp, frac_gauss] p.append(single_peak) return numpy.array(p) @@ -1069,99 +989,95 @@ def _f_makep(cls, data, peaks, frac_gauss=None): def _f_conv(cls, parameterset_list, data): """ Returns the maximum of a convolution of an initial set of lineshapes and the data to be fitted. - - parameterset_list -- a list of parameter lists: n*[[spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + + parameterset_list -- a list of parameter lists: n*[[spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)]] where n is the number of peaks data -- 1D spectral array - + """ if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") + raise TypeError('data must be a flat iterable') if not cls._is_iter(parameterset_list): - raise TypeError("parameterset_list must be an iterable") + raise TypeError('parameterset_list must be an iterable') if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + data[data == 0.0] = 1e-6 - x = numpy.arange(len(data), dtype="f8") + x = numpy.arange(len(data), dtype='f8') peaks_init = cls._f_pks(parameterset_list, x) data_convolution = numpy.convolve(data, peaks_init[::-1]) auto_convolution = numpy.convolve(peaks_init, peaks_init[::-1]) - max_data_convolution = numpy.where(data_convolution == data_convolution.max())[ - 0 - ][0] - max_auto_convolution = numpy.where(auto_convolution == auto_convolution.max())[ - 0 - ][0] + max_data_convolution = numpy.where(data_convolution == data_convolution.max())[0][0] + max_auto_convolution = numpy.where(auto_convolution == auto_convolution.max())[0][0] return max_data_convolution - max_auto_convolution - @classmethod + @classmethod def _f_pks_list(cls, parameterset_list, x): """ Return a list of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ if not cls._is_iter_of_iters(parameterset_list): - raise TypeError("Parameter set must be an iterable of iterables") + raise TypeError('Parameter set must be an iterable of iterables') for p in parameterset_list: if not cls._is_iter(p): - raise TypeError("Parameter set must be an iterable") + raise TypeError('Parameter set must be an iterable') if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError("Keyword parameters must be numbers.") + raise TypeError('Keyword parameters must be numbers.') if not cls._is_iter(x): - raise TypeError("x must be an iterable") + raise TypeError('x must be an iterable') if not isinstance(x, numpy.ndarray): - x = numpy.array(x) + x = numpy.array(x) return numpy.array([Fid._f_pk(x, *peak) for peak in parameterset_list]) + - @classmethod + @classmethod def _f_pks(cls, parameterset_list, x): """ Return the sum of a series of peak evaluations for deconvolution. See _f_pk(). - + Keyword arguments: - parameterset_list -- a list of parameter lists: [spectral offset (x), - gauss: 2*sigma**2, - lorentz: scale (HWHM), - amplitude: amplitude of peak, + parameterset_list -- a list of parameter lists: [spectral offset (x), + gauss: 2*sigma**2, + lorentz: scale (HWHM), + amplitude: amplitude of peak, frac_gauss: fraction of function to be Gaussian (0 -> 1)] x -- array of equal length to FID """ - + if not cls._is_iter_of_iters(parameterset_list): - raise TypeError("Parameter set must be an iterable of iterables") + raise TypeError('Parameter set must be an iterable of iterables') for p in parameterset_list: if not cls._is_iter(p): - raise TypeError("Parameter set must be an iterable") + raise TypeError('Parameter set must be an iterable') if not all(isinstance(i, numbers.Number) for i in p): - raise TypeError("Keyword parameters must be numbers.") + raise TypeError('Keyword parameters must be numbers.') if not cls._is_iter(x): - raise TypeError("x must be an iterable") + raise TypeError('x must be an iterable') if not isinstance(x, numpy.ndarray): - x = numpy.array(x) - - peaks = x * 0.0 + x = numpy.array(x) + + peaks = x*0.0 for p in parameterset_list: - peak = cls._f_pk( - x, - offset=p[0], - gauss_sigma=p[1], - lorentz_hwhm=p[2], - amplitude=p[3], - frac_gauss=p[4], - ) + peak = cls._f_pk(x, + offset=p[0], + gauss_sigma=p[1], + lorentz_hwhm=p[2], + amplitude=p[3], + frac_gauss=p[4], + ) peaks += peak return peaks @@ -1169,9 +1085,9 @@ def _f_pks(cls, parameterset_list, x): def _f_res(cls, p, data): """ Objective function for deconvolution. Returns residuals of the devonvolution fit. - + x -- array of equal length to FID - + Keyword arguments: p -- lmfit parameters object: offset_n -- spectral offset in x @@ -1181,126 +1097,115 @@ def _f_res(cls, p, data): frac_gauss_n -- fraction of function to be Gaussian (0 -> 1) where n is the peak number (zero-indexed) data -- spectrum array - + """ if not isinstance(p, lmfit.parameter.Parameters): - raise TypeError("Parameters must be of type lmfit.parameter.Parameters.") + raise TypeError('Parameters must be of type lmfit.parameter.Parameters.') if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable.") + raise TypeError('data must be a flat iterable.') if not isinstance(data, numpy.ndarray): - data = numpy.array(data) - + data = numpy.array(data) + params = Fid._parameters_to_list(p) - x = numpy.arange(len(data), dtype="f8") - res = data - cls._f_pks(params, x) + x = numpy.arange(len(data), dtype='f8') + res = data-cls._f_pks(params, x) return res @classmethod - def _f_fitp(cls, data, peaks, frac_gauss=None, method="leastsq"): + def _f_fitp(cls, data, peaks, frac_gauss=None, method='leastsq'): """Fit a section of spectral data with a combination of Gaussian/Lorentzian peaks for deconvolution. - + Keyword arguments: peaks -- selected peak positions (see peakpicker()) frac_gauss -- fraction of fitted function to be Gaussian (1 - Guassian, 0 - Lorentzian) - + returns: fits -- list of fitted peak parameter sets - + Note: peaks are fitted by default using the Levenberg-Marquardt algorithm[1]. Other fitting algorithms are available (http://cars9.uchicago.edu/software/python/lmfit/fitting.html#choosing-different-fitting-methods). - + [1] Marquardt, Donald W. 'An algorithm for least-squares estimation of nonlinear parameters.' Journal of the Society for Industrial & Applied Mathematics 11.2 (1963): 431-441. """ data = numpy.real(data) if not cls._is_flat_iter(data): - raise TypeError("data must be a flat iterable") + raise TypeError('data must be a flat iterable') if not cls._is_flat_iter(peaks): - raise TypeError("peaks must be a flat iterable") - if any(peak > (len(data) - 1) for peak in peaks): - raise ValueError("peaks must be within the length of data.") + raise TypeError('peaks must be a flat iterable') + if any(peak > (len(data)-1) for peak in peaks): + raise ValueError('peaks must be within the length of data.') if not isinstance(data, numpy.ndarray): - data = numpy.array(data) + data = numpy.array(data) p = cls._f_makep(data, peaks, frac_gauss=0.5) init_ref = cls._f_conv(p, data) - if any(peaks + init_ref < 0) or any(peaks + init_ref > len(data) - 1): - init_ref = 0 - if frac_gauss == None: - p = cls._f_makep(data, peaks + init_ref, frac_gauss=0.5) + if any(peaks+init_ref < 0) or any(peaks+init_ref > len(data)-1): + init_ref = 0 + if frac_gauss==None: + p = cls._f_makep(data, peaks+init_ref, frac_gauss=0.5) else: - p = cls._f_makep(data, peaks + init_ref, frac_gauss=frac_gauss) - + p = cls._f_makep(data, peaks+init_ref, frac_gauss=frac_gauss) + params = lmfit.Parameters() for parset in range(len(p)): - current_parset = dict( - zip( - ["offset", "sigma", "hwhm", "amplitude", "frac_gauss"], - p[parset], - ) - ) - for k, v in current_parset.items(): - par_name = "%s_%i" % (k, parset) - params.add(name=par_name, value=v, vary=True, min=0.0) - if "offset" in par_name: - params[par_name].max = len(data) - 1 - if "frac_gauss" in par_name: + current_parset = dict(zip(['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss'], p[parset])) + for k,v in current_parset.items(): + par_name = '%s_%i'%(k, parset) + params.add(name=par_name, + value=v, + vary=True, + min=0.0) + if 'offset' in par_name: + params[par_name].max = len(data)-1 + if 'frac_gauss' in par_name: params[par_name].max = 1.0 if frac_gauss is not None: params[par_name].vary = False - # if 'sigma' in par_name or 'hwhm' in par_name: - # params[par_name].max = 0.01*current_parset['amplitude'] - if "amplitude" in par_name: - params[par_name].max = 2.0 * data.max() - + #if 'sigma' in par_name or 'hwhm' in par_name: + # params[par_name].max = 0.01*current_parset['amplitude'] + if 'amplitude' in par_name: + params[par_name].max = 2.0*data.max() + try: mz = lmfit.minimize(cls._f_res, params, args=([data]), method=method) fits = Fid._parameters_to_list(mz.params) - except Exception: + except: fits = None return fits @classmethod def _parameters_to_list(cls, p): - n_pks = int(len(p) / 5) + n_pks = int(len(p)/5) params = [] for i in range(n_pks): - current_params = [ - p["%s_%s" % (par, i)].value - for par in [ - "offset", - "sigma", - "hwhm", - "amplitude", - "frac_gauss", - ] - ] + current_params = [p['%s_%s'%(par, i)].value for par in ['offset', 'sigma', 'hwhm', 'amplitude', 'frac_gauss']] params.append(current_params) return params + @classmethod def _deconv_datum(cls, list_parameters): if len(list_parameters) != 5: - raise ValueError("list_parameters must consist of five objects.") - if (type(list_parameters[1]) == list and len(list_parameters[1]) == 0) or ( - type(list_parameters[2]) == list and len(list_parameters[2]) == 0 - ): + raise ValueError('list_parameters must consist of five objects.') + if (type(list_parameters[1]) == list and len(list_parameters[1]) == 0) or \ + (type(list_parameters[2]) == list and len(list_parameters[2]) == 0): return [] datum, peaks, ranges, frac_gauss, method = list_parameters if not cls._is_iter_of_iters(ranges): - raise TypeError("ranges must be an iterable of iterables") + raise TypeError('ranges must be an iterable of iterables') if not all(len(rng) == 2 for rng in ranges): - raise ValueError("ranges must contain two values.") + raise ValueError('ranges must contain two values.') if not all(rng[0] != rng[1] for rng in ranges): - raise ValueError("data_index must contain different values.") + raise ValueError('data_index must contain different values.') if not isinstance(datum, numpy.ndarray): - datum = numpy.array(datum) + datum = numpy.array(datum) if datum.dtype in cls._complex_dtypes: - raise TypeError("data must be not be complex.") + raise TypeError('data must be not be complex.') fit = [] for j in zip(peaks, ranges): - d_slice = datum[j[1][0] : j[1][1]] - p_slice = j[0] - j[1][0] + d_slice = datum[j[1][0]:j[1][1]] + p_slice = j[0]-j[1][0] f = cls._f_fitp(d_slice, p_slice, frac_gauss=frac_gauss, method=method) f = numpy.array(f).transpose() f[0] += j[1][0] @@ -1308,7 +1213,7 @@ def _deconv_datum(cls, list_parameters): fit.append(f) return fit - def deconv(self, method="leastsq", frac_gauss=0.0): + def deconv(self, method='leastsq', frac_gauss=0.0): """ Deconvolute :attr:`~nmrpy.data_obects.Fid.data` object by fitting a @@ -1320,42 +1225,34 @@ def deconv(self, method="leastsq", frac_gauss=0.0): :keyword frac_gauss: (0-1) determines the Gaussian fraction of the peaks. Setting this argument to None will fit this parameter as well. :keyword method: The fitting method to use. Default is 'leastsq', the Levenberg-Marquardt algorithm, which is usually sufficient. Additional options include: - + Nelder-Mead (nelder) - + L-BFGS-B (l-bfgs-b) - + Conjugate Gradient (cg) - + Powell (powell) - + Newton-CG (newton) - + """ if not len(self.data): - raise AttributeError("data does not exist.") + raise AttributeError('data does not exist.') if self.data.dtype in self._complex_dtypes: - raise TypeError("data must be not be complex.") + raise TypeError('data must be not be complex.') if self.peaks is None: - raise AttributeError("peaks must be picked.") + raise AttributeError('peaks must be picked.') if self.ranges is None: - raise AttributeError("ranges must be specified.") - print("deconvoluting {}".format(self.id)) - list_parameters = [ - self.data, - self._grouped_index_peaklist, - self._index_ranges, - frac_gauss, - method, - ] - self._deconvoluted_peaks = numpy.array( - [j for i in Fid._deconv_datum(list_parameters) for j in i] - ) - + raise AttributeError('ranges must be specified.') + print('deconvoluting {}'.format(self.id)) + list_parameters = [self.data, self._grouped_index_peaklist, self._index_ranges, frac_gauss, method] + self._deconvoluted_peaks = numpy.array([j for i in Fid._deconv_datum(list_parameters) for j in i]) print(self.deconvoluted_integrals) + self.fid_object.processing_steps.is_deconvoluted = True + print('deconvolution completed') - print("deconvolution completed") def plot_ppm(self, **kwargs): """ @@ -1365,7 +1262,7 @@ def plot_ppm(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot """ @@ -1382,7 +1279,7 @@ def plot_deconv(self, **kwargs): :keyword lower_ppm: lower spectral bound in ppm - :keyword lw: linewidth of plot + :keyword lw: linewidth of plot :keyword colour: colour of the plot @@ -1391,7 +1288,7 @@ def plot_deconv(self, **kwargs): :keyword residual_colour: colour of the residual signal after subtracting deconvoluted peaks """ if not len(self._deconvoluted_peaks): - raise AttributeError("deconvolution not yet performed") + raise AttributeError('deconvolution not yet performed') plt = Plot() plt._plot_deconv(self, **kwargs) setattr(self, plt.id, plt) @@ -1411,16 +1308,19 @@ def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): title="Assign species for {}".format(self.id), ) - def clear_peaks(self): + def clear_assigned_peaks(self): """ - Clear assigned species stored in - :attr:`~nmrpy.data_objects.Fid.species`. + Clear assigned species stored in :attr:`~nmrpy.data_objects.Fid.species` + and :attr:`~nmrpy.data_objects.Fid.fid_object.peaks.species_id`, as well as + the GUI widget. """ - self.species = None - - + self.clear_species() + for peak in self.fid_object.peaks: + peak.species_id = None + self._assigner_widget = None + class FidArray(Base): - """ + ''' This object collects several :class:`~nmrpy.data_objects.Fid` objects into an array, and it contains all the processing methods necessary for bulk @@ -1433,14 +1333,19 @@ class FidArray(Base): :class:`~nmrpy.data_objects.FidArray` with a unique ID of the form 'fidXX', where 'XX' is an increasing integer . - """ + ''' def __init__(self): self.data_model = NMRpy( datetime_created=str(datetime.now()), experiment=Experiment(name="NMR experiment"), ) + self.enzymeml_document = None + self.concentrations = None + def __str__(self): + return 'FidArray of {} FID(s)'.format(len(self.data)) + @property def data_model(self): for fid in self.get_fids(): @@ -1448,10 +1353,10 @@ def data_model(self): return self.__data_model @data_model.setter - def data_model(self, data_model: NMRpy): + def data_model(self, data_model): if not isinstance(data_model, NMRpy): raise AttributeError( - f"Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead." + f'Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead.' ) self.__data_model = data_model self.__data_model.datetime_modified = str(datetime.now()) @@ -1459,20 +1364,20 @@ def data_model(self, data_model: NMRpy): @data_model.deleter def data_model(self): del self.__data_model - print("The current data model has been deleted.") + print('The current data model has been deleted.') @property def enzymeml_document(self): return self.__enzymeml_document @enzymeml_document.setter - def enzymeml_document(self, enzymeml_document: EnzymeMLDocument): + def enzymeml_document(self, enzymeml_document): if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( - f"Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead." + f'Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead.' ) self.__enzymeml_document = enzymeml_document - self.__enzymeml_document.modified = datetime.now() + self.__enzymeml_document.modified = str(datetime.now()) self.__data_model.experiment.name = self.__enzymeml_document.name for fid in self.get_fids(): fid.enzymeml_species = get_species_from_enzymeml(self.__enzymeml_document) @@ -1480,28 +1385,29 @@ def enzymeml_document(self, enzymeml_document: EnzymeMLDocument): @enzymeml_document.deleter def enzymeml_document(self): del self.__enzymeml_document - print("The current EnzymeML document has been deleted.") + print('The current EnzymeML document has been deleted.') @property def concentrations(self): - """ - An array of the concentration for each FID. - """ - return self.__c + return self.__concentrations @concentrations.setter - def concentrations(self, c): - if not isinstance(c, dict): - raise TypeError("c must be a dictionary.") - self.__c = c + def concentrations(self, concentrations): + if not isinstance(concentrations, dict): + raise TypeError('concentrations must be a dictionary.') + for fid in self.get_fids(): + if not fid.species: + raise ValueError('All FIDs must have species assigned to peaks.') + if not all(species in fid.species for species in concentrations.keys()): + raise ValueError('Keys of concentrations must be species assigned to peaks.') + if not all(len(concentrations[species]) == len(self.t) for species in concentrations.keys()): + raise ValueError('Length of concentrations must match length of FID data.') + self.__concentrations = concentrations @concentrations.deleter def concentrations(self): - del self.__c - print("The current concentrations have been deleted.") - - def __str__(self): - return "FidArray of {} FID(s)".format(len(self.data)) + del self.__concentrations + print('The current concentrations have been deleted.') def get_fid(self, id): """ @@ -1514,28 +1420,20 @@ def get_fid(self, id): try: return getattr(self, id) except AttributeError: - print("{} does not exist.".format(id)) + print('{} does not exist.'.format(id)) def get_fids(self): """ Return a list of all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - fids = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Fid) - ] + fids = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Fid)] return fids def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.FidArray`. """ - plots = [ - self.__dict__[id] - for id in sorted(self.__dict__) - if isinstance(self.__dict__[id], Plot) - ] + plots = [self.__dict__[id] for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Plot)] return plots def _del_plots(self): @@ -1551,8 +1449,7 @@ def _get_widgets(self): Return a list of all widget objects (peak selection, etc.) owned by this :class:`~nmrpy.data_objects.FidArray`. """ widgets = [ - id - for id in sorted(self.__dict__) + id for id in sorted(self.__dict__) if isinstance(self.__dict__[id], Phaser) or isinstance(self.__dict__[id], RangeCalibrator) or isinstance(self.__dict__[id], DataPeakRangeSelector) @@ -1560,6 +1457,7 @@ def _get_widgets(self): or isinstance(self.__dict__[id], DataTraceRangeSelector) or isinstance(self.__dict__[id], DataTraceSelector) or isinstance(self.__dict__[id], PeakRangeAssigner) + or isinstance(self.__dict__[id], ConcentrationCalculator) ] return widgets @@ -1617,7 +1515,7 @@ def _deconvoluted_peaks(self): for fid in self.get_fids(): try: deconvoluted_peaks.append(fid._deconvoluted_peaks) - except Exception: + except: deconvoluted_peaks.append([]) return numpy.array(deconvoluted_peaks) @@ -1630,7 +1528,7 @@ def add_fid(self, fid): if isinstance(fid, Fid): setattr(self, fid.id, fid) else: - raise AttributeError("FidArray requires Fid object.") + raise AttributeError('FidArray requires Fid object.') def del_fid(self, fid_id): """ @@ -1653,14 +1551,14 @@ def del_fid(self, fid_id): # Old NMRpy _params structure self._params['acqtime'] = at else: - raise AttributeError("{} is not an FID object.".format(fid_id)) + raise AttributeError('{} is not an FID object.'.format(fid_id)) else: - raise AttributeError("FID {} does not exist.".format(fid_id)) + raise AttributeError('FID {} does not exist.'.format(fid_id)) def add_fids(self, fids): """ Add a list of :class:`~nmrpy.data_objects.Fid` objects to this :class:`~nmrpy.data_objects.FidArray`. - + :arg fids: a list of :class:`~nmrpy.data_objects.Fid` instances """ if FidArray._is_iter(fids): @@ -1669,7 +1567,7 @@ def add_fids(self, fids): for fid_index in range(num_fids): try: fid = fids[fid_index] - id_str = "fid{0:0" + zero_fill + "d}" + id_str = 'fid{0:0'+zero_fill+'d}' fid.id = id_str.format(fid_index) self.add_fid(fid) except AttributeError as e: @@ -1713,64 +1611,64 @@ def _setup_params(fid_array): del fid_array._params['acqtime'] def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: - """Parse an EnzymeML document and its library from specified - file path. + """ + Parse an EnzymeML document and its library from specified file path. Args: path_to_enzymeml_document (str): Path to file containing an EnzymeML document """ self.enzymeml_document = pe.read_enzymeml( cls=pe.EnzymeMLDocument, path=path_to_enzymeml_document - ) + ) @classmethod def from_data(cls, data): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a 2D data set of spectral arrays. - - :arg data: a 2D data array + + :arg data: a 2D data array """ if not cls._is_iter_of_iters(data): - raise TypeError("data must be an iterable of iterables.") + raise TypeError('data must be an iterable of iterables.') fid_array = cls() fids = [] for fid_index, datum in zip(range(len(data)), data): - fid_id = "fid%i" % fid_index + fid_id = 'fid%i'%fid_index fid = Fid(id=fid_id, data=datum) fids.append(fid) fid_array.add_fids(fids) return fid_array @classmethod - def from_path(cls, fid_path=".", file_format=None, arrayset=None): + def from_path(cls, fid_path='.', file_format=None, arrayset=None): """ Instantiate a new :class:`~nmrpy.data_objects.FidArray` object from a .fid directory. :keyword fid_path: filepath to .fid directory :keyword file_format: 'varian' or 'bruker', usually unnecessary - - :keyword arrayset: (int) array set for interleaved spectra, - user is prompted if not specified + + :keyword arrayset: (int) array set for interleaved spectra, + user is prompted if not specified """ if not file_format: try: - with open(fid_path, "rb") as f: + with open(fid_path, 'rb') as f: return pickle.load(f) - except Exception: - print("Not NMRPy data file.") + except: + print('Not NMRPy data file.') importer = Importer(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == "varian": + elif file_format == 'varian': importer = VarianImporter(fid_path=fid_path) importer.import_fid() - elif file_format == "bruker": + elif file_format == 'bruker': importer = BrukerImporter(fid_path=fid_path) importer.import_fid(arrayset=arrayset) - elif file_format == "nmrpy": - with open(fid_path, "rb") as f: + elif file_format == 'nmrpy': + with open(fid_path, 'rb') as f: return pickle.load(f) - + if cls._is_iter(importer.data): fid_array = cls.from_data(importer.data) fid_array._file_format = importer._file_format @@ -1783,17 +1681,17 @@ def from_path(cls, fid_path=".", file_format=None, arrayset=None): cls._setup_params(fid_array) return fid_array else: - raise IOError("Data could not be imported.") + raise IOError('Data could not be imported.') def zf_fids(self): - """ + """ Zero-fill all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` """ for fid in self.get_fids(): fid.zf() def emhz_fids(self, lb=5.0): - """ + """ Apply line-broadening (apodisation) to all :class:`nmrpy.~data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword lb: degree of line-broadening in Hz. @@ -1802,7 +1700,7 @@ def emhz_fids(self, lb=5.0): fid.emhz(lb=lb) def ft_fids(self, mp=True, cpus=None): - """ + """ Fourier-transform all FIDs. :keyword mp: parallelise over multiple processors, significantly reducing computation time @@ -1815,18 +1713,17 @@ def ft_fids(self, mp=True, cpus=None): ft_data = self._generic_mp(Fid._ft, list_params, cpus) for fid, datum in zip(fids, ft_data): fid.data = datum - fid._flags["ft"] = True + fid._flags['ft'] = True fid.fid_object.processed_data = [str(data) for data in datum] fid.fid_object.processing_steps.is_fourier_transformed = True - fid.fid_object.processing_steps.fourier_transform_type = "FFT" - - else: + fid.fid_object.processing_steps.fourier_transform_type = 'FFT' + else: for fid in self.get_fids(): fid.ft() - print("Fourier-transformation completed") + print('Fourier-transformation completed') def real_fids(self): - """ + """ Discard imaginary component of FID data sets. """ @@ -1834,13 +1731,13 @@ def real_fids(self): fid.real() def norm_fids(self): - """ + """ Normalise FIDs by maximum data value in :attr:`~nmrpy.data_objects.FidArray.data`. """ dmax = self.data.max() for fid in self.get_fids(): - fid.data = fid.data / dmax + fid.data = fid.data/dmax fid.fid_object.processed_data = [float(datum) for datum in fid.data] fid.fid_object.processing_steps.is_normalised = True fid.fid_object.processing_steps.max_value = float(dmax) @@ -1857,7 +1754,7 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) :keyword verbose: prints out phase angles if True (default) """ - if mp: + if mp: fids = self.get_fids() if not all(fid.data.dtype in self._complex_dtypes for fid in fids): raise TypeError('Only complex data can be phase-corrected.') @@ -1869,6 +1766,7 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) fid.data = datum fid.fid_object.processed_data = [str(data) for data in datum] fid.fid_object.processing_steps.is_phased = True + fid.fid_object.processing_steps.phase_correction_method = method else: for fid in self.get_fids(): fid.phase_correct(method=method, verbose=verbose) @@ -1884,18 +1782,17 @@ def baseliner_fids(self): :meth:`~nmrpy.data_objects.Fid.baseline_correction`). """ - plot_label = """ + plot_label = \ +''' Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -""" - plot_title = "Select data for baseline-correction" - self._baseliner_widget = FidArrayRangeSelector( - self, title=plot_title, label=plot_label, voff=0.01 - ) - +''' + plot_title = 'Select data for baseline-correction' + self._baseliner_widget = FidArrayRangeSelector(self, title=plot_title, label=plot_label, voff=0.01) + def baseline_correct_fids(self, deg=2): - """ + """ Apply baseline-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) @@ -1903,11 +1800,9 @@ def baseline_correct_fids(self, deg=2): for fid in self.get_fids(): try: fid.baseline_correct(deg=deg) - except Exception: - print( - "failed for {}. Perhaps first run baseliner_fids()".format(fid.id) - ) - print("baseline-correction completed") + except: + print('failed for {}. Perhaps first run baseliner_fids()'.format(fid.id)) + print('baseline-correction completed') @property def _data_traces(self): @@ -1915,7 +1810,7 @@ def _data_traces(self): @_data_traces.setter def _data_traces(self, data_traces): - self.__data_traces = data_traces + self.__data_traces = data_traces @property def _index_traces(self): @@ -1923,7 +1818,7 @@ def _index_traces(self): @_index_traces.setter def _index_traces(self, index_traces): - self.__index_traces = index_traces + self.__index_traces = index_traces @property def _trace_mask(self): @@ -1931,7 +1826,7 @@ def _trace_mask(self): @_trace_mask.setter def _trace_mask(self, trace_mask): - self.__trace_mask = trace_mask + self.__trace_mask = trace_mask @property def _trace_mean_ppm(self): @@ -1939,8 +1834,8 @@ def _trace_mean_ppm(self): @_trace_mean_ppm.setter def _trace_mean_ppm(self, trace_mean_ppm): - trace_mean_ppm - self.__trace_mean_ppm = trace_mean_ppm + trace_mean_ppm + self.__trace_mean_ppm = trace_mean_ppm @property def integral_traces(self): @@ -1952,10 +1847,10 @@ def integral_traces(self): @integral_traces.setter def integral_traces(self, integral_traces): - self._integral_traces = integral_traces + self._integral_traces = integral_traces - def deconv_fids(self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0): - """ + def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): + """ Apply deconvolution to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`, using the :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` attribute of each respective :class:`~nmrpy.data_objects.Fid`. :keyword method: see :meth:`~nmrpy.data_objects.Fid.phase_correct` @@ -1964,48 +1859,29 @@ def deconv_fids(self, mp=True, cpus=None, method="leastsq", frac_gauss=0.0): :keyword cpus: defines number of CPUs to utilise if 'mp' is set to True, default is n-1 cores """ - if mp: + if mp: fids = self.get_fids() - if not all(fid._flags["ft"] for fid in fids): - raise ValueError("Only Fourier-transformed data can be deconvoluted.") - list_params = [ - [ - fid.data, - fid._grouped_index_peaklist, - fid._index_ranges, - frac_gauss, - method, - ] - for fid in fids - ] + if not all(fid._flags['ft'] for fid in fids): + raise ValueError('Only Fourier-transformed data can be deconvoluted.') + list_params = [[fid.data, fid._grouped_index_peaklist, fid._index_ranges, frac_gauss, method] for fid in fids] deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) - for fid, datum in zip(fids, deconv_datum): fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) - # Iterate over newly deconvoluted peaks and calculate integrals integrals = [] - i = 0 - for peak in fid._deconvoluted_peaks: + for i, peak in enumerate(fid._deconvoluted_peaks): int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) int_lorentz = (1 - peak[-1]) * Fid._f_lorentz_int(peak[3], peak[2]) - integrals.append(int_gauss + int_lorentz) - # Iterate over peaks and assign integrals based on the peak and integral indices - for peak_object in fid.fid_object.peaks: - if i == peak_object.peak_index: - # Check if the integral has already been assigned to the peak identity - if integrals[i] == peak_object.peak_integral: - pass - # If not, assign the integral to the peak - peak_object.peak_integral = float(integrals[i]) - i += 1 + integral = int_gauss + int_lorentz + integrals.append(integral) + # Update data model + peak_object = self.fid_object.peaks[i] + if peak_object.peak_integral != integral: + peak_object.peak_integral = float(integral) fid.fid_object.processing_steps.is_deconvoluted = True - else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) - fid.fid_object.processing_steps.is_deconvoluted = True - - print("deconvolution completed") + print('deconvolution completed') def get_masked_integrals(self): """ @@ -2015,15 +1891,16 @@ def get_masked_integrals(self): try: ints = [list(i) for i in self.deconvoluted_integrals] for i in self._trace_mask: - ints_current = numpy.zeros_like(i, dtype="f8") + ints_current = numpy.zeros_like(i, dtype='f8') for j in range(len(i)): if i[j] != -1: ints_current[j] = ints[j].pop(0) result.append(ints_current) except AttributeError: - print("peakpicker_traces() or deconv_fids() probably not yet run.") + print('peakpicker_traces() or deconv_fids() probably not yet run.') return result + def ps_fids(self, p0=0.0, p1=0.0): """ Apply manual phase-correction to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray` @@ -2033,18 +1910,19 @@ def ps_fids(self, p0=0.0, p1=0.0): :keyword p1: First order phase in degrees """ for fid in self.get_fids(): - fid.ps(p0=p0, p1=p1) + fid.ps(p0=p0, p1=p1) @staticmethod def _generic_mp(fcn, iterable, cpus): if cpus is None: - cpus = cpu_count() - 1 + cpus = cpu_count()-1 proc_pool = Pool(cpus) result = proc_pool.map(fcn, iterable) proc_pool.close() proc_pool.join() return result + def plot_array(self, **kwargs): """ Plot :attr:`~nmrpy.data_objects.FidArray.data`. @@ -2112,40 +1990,43 @@ def plot_deconv_array(self, **kwargs): """ plt = Plot() - plt._plot_deconv_array(self.get_fids(), **kwargs) + plt._plot_deconv_array(self.get_fids(), + **kwargs) setattr(self, plt.id, plt) + - def calibrate(self, fid_number=None, assign_only_to_index=False, voff=0.02): + def calibrate(self, fid_number=None, assign_only_to_index=False, + voff=0.02): """ - Instantiate a GUI widget to select a peak and calibrate - spectra in a :class:`~nmrpy.data_objects.FidArray`. - Left-clicking selects a peak. The user is then prompted to enter + Instantiate a GUI widget to select a peak and calibrate + spectra in a :class:`~nmrpy.data_objects.FidArray`. + Left-clicking selects a peak. The user is then prompted to enter the PPM value of that peak for calibration; this will be applied to all :class:`~nmrpy.data_objects.Fid` objects owned by this :class:`~nmrpy.data_objects.FidArray`. See also :meth:`~nmrpy.data_objects.Fid.calibrate`. - + :keyword fid_number: list or number, index of :class:`~nmrpy.data_objects.Fid` to use for calibration. If None, the whole data array is plotted. :keyword assign_only_to_index: if True, assigns calibration only to :class:`~nmrpy.data_objects.Fid` objects indexed by fid_number; if False, assigns to all. :keyword voff: vertical offset for spectra """ - plot_label = """ + plot_label = \ +''' Left - select peak -""" - self._calibrate_widget = RangeCalibrator( - self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) +''' + self._calibrate_widget = RangeCalibrator(self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label, + ) def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): """ - Instantiate peak-picker widget for + Instantiate peak-picker widget for :attr:`~nmrpy.data_objects.Fid.data`, and apply selected :attr:`~nmrpy.data_objects.Fid.peaks` and :attr:`~nmrpy.data_objects.Fid.ranges` to all :class:`~nmrpy.data_objects.Fid` @@ -2159,22 +2040,23 @@ def peakpicker(self, fid_number=None, assign_only_to_index=True, voff=0.02): :keyword voff: vertical offset for spectra """ - plot_label = """ + plot_label = \ +''' Left - select peak Ctrl+Left - delete nearest peak Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -""" - self._peakpicker_widget = DataPeakRangeSelector( - self, - y_indices=fid_number, - aoti=assign_only_to_index, - voff=voff, - label=plot_label, - ) +''' + self._peakpicker_widget = DataPeakRangeSelector(self, + y_indices=fid_number, + aoti=assign_only_to_index, + voff=voff, + label=plot_label) - def peakpicker_traces(self, voff=0.02, lw=1): + def peakpicker_traces(self, + voff=0.02, + lw=1): """ Instantiates a widget to pick peaks and ranges employing a polygon shape (or 'trace'). This is useful for picking peaks that are subject to drift and peaks @@ -2186,21 +2068,22 @@ def peakpicker_traces(self, voff=0.02, lw=1): """ if self.data is None: - raise AttributeError("No FIDs.") - plot_label = """ + raise AttributeError('No FIDs.') + plot_label = \ +''' Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Drag Right - select range Ctrl+Right - delete range Ctrl+Alt+Right - assign -""" +''' self._peakpicker_widget = DataTraceRangeSelector( self, voff=voff, lw=lw, label=plot_label, - ) + ) def clear_peaks(self): """ @@ -2217,22 +2100,30 @@ def clear_ranges(self): """ for fid in self.get_fids(): fid.ranges = None + + def clear_species(self): + """ + Calls :meth:`~nmrpy.data_objects.Fid.clear_species` on every :class:`~nmrpy.data_objects.Fid` + object in this :class:`~nmrpy.data_objects.FidArray`. + """ + for fid in self.get_fids(): + fid.species = None def _generate_trace_mask(self, traces): ppm = [numpy.round(numpy.mean(i[0]), 2) for i in traces] self._trace_mean_ppm = ppm tt = [i[1] for i in traces] - ln = len(self.data) + ln = len(self.data) filled_tt = [] for i in tt: rng = numpy.arange(ln) if len(i) < ln: - rng[~(~(rng < min(i)) * ~(rng > max(i)))] = -1 + rng[~(~(rngmax(i)))] = -1 filled_tt.append(rng) filled_tt = numpy.array(filled_tt) return filled_tt - def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): + def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): traces = [dict(zip(i[1], i[0])) for i in traces] fids = self.get_fids() fids_i = range(len(self.data)) @@ -2244,14 +2135,15 @@ def _set_all_peaks_ranges_from_traces_and_spans(self, traces, spans): for rng in spans: if peak >= min(rng) and peak <= max(rng): peaks.append(peak) - fids[i].peaks = peaks + fids[i].peaks = peaks ranges = [] - for rng in spans: - if any((peaks > min(rng)) * (peaks < max(rng))): + for rng in spans: + if any((peaks>min(rng))*(peaks 0.1 * pk.max() + pk_ind = pk > 0.1*pk.max() pk_x.append(ppm[pk_ind]) pk_y.append(pk[pk_ind]) peakshapes_short_x.append(pk_x) @@ -2307,25 +2199,24 @@ def select_integral_traces(self, voff=0.02, lw=1): :keyword lw: linewidth of plot (1) """ if self.data is None: - raise AttributeError("No FIDs.") - if (self.deconvoluted_integrals == None).any(): - raise AttributeError("No integrals.") + raise AttributeError('No FIDs.') + if (self.deconvoluted_integrals==None).any(): + raise AttributeError('No integrals.') peakshapes = self._get_all_summed_peakshapes() - # pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() - plot_label = """ + #pk_x, pk_y = self._get_truncated_peak_shapes_for_plotting() + plot_label = \ +''' Left - add trace point Right - finalize trace Ctrl+Left - delete nearest trace Ctrl+Alt+Right - assign -""" - self._select_trace_widget = DataTraceSelector( - self, - extra_data=peakshapes, - extra_data_colour="b", - voff=voff, +''' + self._select_trace_widget = DataTraceSelector(self, + extra_data=peakshapes, + extra_data_colour='b', + voff=voff, label=plot_label, - lw=lw, - ) + lw=lw) def get_integrals_from_traces(self): """ @@ -2333,14 +2224,13 @@ def get_integrals_from_traces(self): :class:`~nmrpy.data_objects.Fid` objects calculated from trace dictionary :attr:`~nmrpy.data_objects.FidArray.integral_traces`. """ - if self.deconvoluted_integrals is None or None in self.deconvoluted_integrals: - raise AttributeError("No integrals.") - if not hasattr(self, "_integral_traces"): - raise AttributeError( - "No integral traces. First run select_integral_traces()." - ) + if self.deconvoluted_integrals is None or \ + None in self.deconvoluted_integrals: + raise AttributeError('No integrals.') + if not hasattr(self, '_integral_traces'): + raise AttributeError('No integral traces. First run select_integral_traces().') integrals_set = {} - decon_set = self.deconvoluted_integrals + decon_set = self.deconvoluted_integrals for i, tr in self.integral_traces.items(): tr_keys = numpy.array([fid for fid in tr.keys()]) tr_vals = numpy.array([val for val in tr.values()]) @@ -2348,15 +2238,38 @@ def get_integrals_from_traces(self): tr_keys = tr_keys[tr_sort] tr_vals = tr_vals[tr_sort] integrals = decon_set[tr_keys, tr_vals] - integrals_set[i] = integrals + integrals_set[i] = integrals return integrals_set + + def assign_peaks(self, species_list=None, index_list=None): + """ + Instantiate a peak-assignment GUI widget. Select a FID by + its ID from the combobox. Select peaks from dropdown menu + containing :attr:`~nmrpy.data_objects.Fid.peaks`. Attach a + species to the selected peak from second dropdown menu + containing species defined in EnzymeML. When satisfied with + assignment, press Assign button to apply. + """ + self._assigner_widget = PeakRangeAssigner( + fid_array=self, species_list=species_list, index_list=index_list + ) - def assign_integrals(self, integrals_set: list) -> dict: # deprecated? - print("~~~ Method under contruction ~~~") # TODO: make pretty - widget_list = [] - for i, j in enumerate(integrals_set): - widget_list.append((i, list(j))) - return SelectMultiple(options=widget_list, description="Integrals:") + def clear_assigned_peaks(self): + """ + Clear assigned peaks stored in :attr:`~nmrpy.data_objects.Fid.species` + and :attr:`~nmrpy.data_objects.Fid.fid_object.peaks`, as well as + the GUI widget. + """ + for fid in self.get_fids(): + fid.species = None + for peak in fid.fid_object.peaks: + peak.species_id = None + self._assigner_widget = None + + def calculate_concentrations(self): + raise NotImplementedError( + "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." + ) def save_to_file(self, filename=None, overwrite=False): """ @@ -2369,13 +2282,13 @@ def save_to_file(self, filename=None, overwrite=False): """ if filename is None: basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] - filename = basename + ".nmrpy" + filename = basename+'.nmrpy' if not isinstance(filename, str): - raise TypeError("filename must be a string.") - if filename[-6:] != ".nmrpy": - filename += ".nmrpy" + raise TypeError('filename must be a string.') + if filename[-6:] != '.nmrpy': + filename += '.nmrpy' if os.path.isfile(filename) and not overwrite: - print("File " + filename + " exists, set overwrite=True to force.") + print('File '+filename+' exists, set overwrite=True to force.') return 1 # delete all matplotlib plots to reduce file size self._del_plots() @@ -2385,50 +2298,19 @@ def save_to_file(self, filename=None, overwrite=False): self._del_widgets() for fid in self.get_fids(): fid._del_widgets() - # delete EnzymeML library & document (can't be pickled) - try: - del self.enzymeml_library - del self.enzymeml_document - except Exception: - pass - with open(filename, "wb") as f: + with open(filename, 'wb') as f: pickle.dump(self, f) - - def apply_to_enzymeml( - self, enzymeml_document: EnzymeMLDocument = None - ) -> EnzymeMLDocument: + + def apply_to_enzymeml(self, enzymeml_document = None) -> EnzymeMLDocument: + """ + Apply the calculated concentrations from the FidArray to an EnzymeMLDocument. + """ if not enzymeml_document: enzymeml_document = self.enzymeml_document return create_enzymeml(self, enzymeml_document) - - def assign_peaks(self, species_list=None, index_list=None): - """ - Instantiate a peak-assignment GUI widget. Select a FID by - its ID from the combobox. Select peaks from dropdown menu - containing :attr:`~nmrpy.data_objects.Fid.peaks`. Attach a - species to the selected peak from second dropdown menu - containing species defined in EnzymeML. When satisfied with - assignment, press Assign button to apply. - """ - self._assigner_widget = PeakRangeAssigner( - fid_array=self, species_list=species_list, index_list=index_list - ) - - def clear_peaks(self): - """ - Clear assigned peaks stored in - :attr:`~nmrpy.data_objects.Fid.species`. - """ - for fid in self.get_fids(): - fid.species = None - - def calculate_concentrations(self): - raise NotImplementedError( - "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." - ) - - + class Importer(Base): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data = None @@ -2447,16 +2329,17 @@ def data(self, data): elif Importer._is_iter(data): self.__data = numpy.array([data]) else: - raise TypeError("data must be iterable.") + raise TypeError('data must be iterable.') else: - raise TypeError("data must be complex.") + raise TypeError('data must be complex.') + def import_fid(self, arrayset=None): """ This will first attempt to import Bruker data. Failing that, Varian. """ try: - print("Attempting Bruker") + print('Attempting Bruker') brukerimporter = BrukerImporter(fid_path=self.fid_path) brukerimporter.import_fid(arrayset=arrayset) self.data = brukerimporter.data @@ -2464,48 +2347,45 @@ def import_fid(self, arrayset=None): self._file_format = brukerimporter._file_format return except (FileNotFoundError, OSError): - print("fid_path does not specify a valid .fid directory.") - return + print('fid_path does not specify a valid .fid directory.') + return except (TypeError, IndexError): - print("probably not Bruker data") - try: - print("Attempting Varian") + print('probably not Bruker data') + try: + print('Attempting Varian') varianimporter = VarianImporter(fid_path=self.fid_path) varianimporter.import_fid() self._procpar = varianimporter._procpar - self.data = varianimporter.data + self.data = varianimporter.data self._file_format = varianimporter._file_format return except TypeError: - print("probably not Varian data") - + print('probably not Varian data') class VarianImporter(Importer): + def import_fid(self): try: procpar, data = nmrglue.varian.read(self.fid_path) - self.data = data + self.data = data self._procpar = procpar - self._file_format = "varian" + self._file_format = 'varian' except FileNotFoundError: - print("fid_path does not specify a valid .fid directory.") + print('fid_path does not specify a valid .fid directory.') except OSError: - print("fid_path does not specify a valid .fid directory.") - - + print('fid_path does not specify a valid .fid directory.') + class BrukerImporter(Importer): + def import_fid(self, arrayset=None): try: - dirs = [ - int(i) - for i in os.listdir(self.fid_path) - if os.path.isdir(self.fid_path + os.path.sep + i) - ] + dirs = [int(i) for i in os.listdir(self.fid_path) if \ + os.path.isdir(self.fid_path+os.path.sep+i)] dirs.sort() dirs = [str(i) for i in dirs] alldata = [] for d in dirs: - procpar, data = nmrglue.bruker.read(self.fid_path + os.path.sep + d) + procpar, data = nmrglue.bruker.read(self.fid_path+os.path.sep+d) alldata.append((procpar, data)) self.alldata = alldata incr = 1 @@ -2517,50 +2397,47 @@ def import_fid(self, arrayset=None): incr += 1 if incr > 1: if arrayset == None: - print("Total of " + str(incr) + " alternating FidArrays found.") - arrayset = input("Which one to import? ") + print('Total of '+str(incr)+' alternating FidArrays found.') + arrayset = input('Which one to import? ') arrayset = int(arrayset) else: arrayset = arrayset if arrayset < 1 or arrayset > incr: - raise ValueError("Select a value between 1 and " + str(incr) + ".") + raise ValueError('Select a value between 1 and ' + + str(incr) + '.') else: arrayset = 1 self.incr = incr - procpar = alldata[arrayset - 1][0] - data = numpy.vstack([d[1] for d in alldata[(arrayset - 1) :: incr]]) + procpar = alldata[arrayset-1][0] + data = numpy.vstack([d[1] for d in alldata[(arrayset-1)::incr]]) self.data = data self._procpar = procpar - self._file_format = "bruker" + self._file_format = 'bruker' self.data = nmrglue.bruker.remove_digital_filter(procpar, self.data) - ( - self._procpar["tdelta"], - self._procpar["tcum"], - self._procpar["tsingle"], - ) = self._get_time_delta() - self._procpar["arraylength"] = self.data.shape[0] - self._procpar["arrayset"] = arrayset + self._procpar['tdelta'], self._procpar['tcum'],\ + self._procpar['tsingle'] = self._get_time_delta() + self._procpar['arraylength'] = self.data.shape[0] + self._procpar['arrayset'] = arrayset except FileNotFoundError: - print("fid_path does not specify a valid .fid directory.") + print('fid_path does not specify a valid .fid directory.') except OSError: - print("fid_path does not specify a valid .fid directory.") - + print('fid_path does not specify a valid .fid directory.') + def _get_time_delta(self): td = 0.0 tcum = [] tsingle = [] for i in range(self.incr): - pp = self.alldata[i][0]["acqus"] - sw_hz = pp["SW_h"] - at = pp["TD"] / (2 * sw_hz) - d1 = pp["D"][1] - nt = pp["NS"] - tot = (at + d1) * nt / 60.0 # convert to mins + pp = self.alldata[i][0]['acqus'] + sw_hz = pp['SW_h'] + at = pp['TD']/(2*sw_hz) + d1 = pp['D'][1] + nt = pp['NS'] + tot = (at+d1)*nt/60. # convert to mins td += tot tcum.append(td) tsingle.append(tot) return (td, numpy.array(tcum), numpy.array(tsingle)) - -if __name__ == "__main__": +if __name__ == '__main__': pass diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 3c58629..25aea70 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1,45 +1,28 @@ -import asyncio -import logging -import traceback -from datetime import datetime -from typing import List, Union - +import nmrpy.data_objects +import logging, traceback import numpy -import sympy as sp -from IPython.display import display -from ipywidgets import ( - HTML, - Button, - Combobox, - Dropdown, - FloatText, - Label, - Output, - Text, - VBox, -) from matplotlib import pyplot as plt -from matplotlib.backend_bases import Event, NavigationToolbar2 -from matplotlib.collections import PolyCollection +from datetime import datetime from matplotlib.figure import Figure +from matplotlib.collections import PolyCollection + from matplotlib.patches import Rectangle from matplotlib.transforms import blended_transform_factory from matplotlib.widgets import Cursor +from matplotlib.backend_bases import NavigationToolbar2, Event -from pyenzyme.model import EnzymeMLDocument - -import nmrpy.data_objects +from ipywidgets import FloatText, Output, VBox, Button, Combobox, Dropdown, Label +from IPython.display import display +import asyncio -from .utils import ( +from pyenzyme.model import EnzymeMLDocument +from nmrpy.utils import ( format_species_string, - get_initial_concentration_by_species_id, get_ordered_list_of_species_names, get_species_from_enzymeml, - get_species_name_by_id, ) - -class Plot: +class Plot(): """ Basic 'plot' class containing functions for various types of plots. """ @@ -48,7 +31,7 @@ class Plot: def __init__(self): self._time = datetime.now() - self.id = "plot_{}".format(Plot._plot_id_num) + self.id = 'plot_{}'.format(Plot._plot_id_num) Plot._plot_id_num += 1 self.fig = None @@ -69,134 +52,117 @@ def fig(self, fig): if fig is None or isinstance(fig, Figure): self._fig = fig else: - raise TypeError("fig must be of type matplotlib.figure.Figure.") - - def _plot_ppm( - self, - fid, - upper_ppm=None, - lower_ppm=None, - color="k", - lw=1, - filename=None, - ): + raise TypeError('fig must be of type matplotlib.figure.Figure.') + + def _plot_ppm(self, fid, + upper_ppm=None, + lower_ppm=None, + color='k', + lw=1, + filename=None): data = fid.data params = fid._params - ft = fid._flags["ft"] - if not Plot._is_flat_iter(data): - raise AttributeError("data must be flat iterable.") + ft=fid._flags['ft'] + if not Plot._is_flat_iter(data): + raise AttributeError('data must be flat iterable.') if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError("ppm range specified is invalid.") - sw_left = params["sw_left"] - sw = params["sw"] + raise ValueError('ppm range specified is invalid.') + sw_left = params['sw_left'] + sw = params['sw'] if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left - sw + lower_ppm = sw_left-sw - ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] + ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] ppm_bool_index = (ppm < upper_ppm) * (ppm > lower_ppm) ppm = ppm[ppm_bool_index] data = data[ppm_bool_index] - self.fig = plt.figure(figsize=[9, 5]) + self.fig = plt.figure(figsize=[9,5]) ax = self.fig.add_subplot(111) if ft: ax.plot(ppm, data, color=color, lw=lw) ax.invert_xaxis() ax.set_xlim([upper_ppm, lower_ppm]) ax.grid() - ax.set_xlabel("PPM (%.2f MHz)" % (params["reffrq"])) + ax.set_xlabel('PPM (%.2f MHz)'%(params['reffrq'])) elif not ft: - at = params["at"] * 1000 # ms + at = params['at']*1000 # ms t = numpy.linspace(0, at, len(data)) ax.plot(t, data, color=color, lw=lw) ax.set_xlim([0, at]) ax.grid() - ax.set_xlabel("Time (ms)") - # self.fig.show() + ax.set_xlabel('Time (ms)') + #self.fig.show() if filename is not None: - self.fig.savefig(filename, format="pdf") - - def _deconv_generator( - self, - fid, - upper_ppm=None, - lower_ppm=None, - ): + self.fig.savefig(filename, format='pdf') + + def _deconv_generator(self, fid, + upper_ppm=None, + lower_ppm=None, + ): + data = fid.data params = fid._params - if not Plot._is_flat_iter(data): - raise AttributeError("data must be flat iterable.") + if not Plot._is_flat_iter(data): + raise AttributeError('data must be flat iterable.') - peakshapes = fid._f_pks_list(fid._deconvoluted_peaks, numpy.arange(len(data))) + peakshapes = fid._f_pks_list(fid._deconvoluted_peaks, numpy.arange(len(data))) - if not Plot._is_iter_of_iters(peakshapes): - raise AttributeError("data must be flat iterable.") + if not Plot._is_iter_of_iters(peakshapes): + raise AttributeError('data must be flat iterable.') if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError("ppm range specified is invalid.") - sw_left = params["sw_left"] - sw = params["sw"] + raise ValueError('ppm range specified is invalid.') + sw_left = params['sw_left'] + sw = params['sw'] if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left - sw + lower_ppm = sw_left-sw - ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] + ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] ppm_bool_index = (ppm <= upper_ppm) * (ppm >= lower_ppm) ppm = ppm[ppm_bool_index] data = data[ppm_bool_index] peakshapes = peakshapes[:, ppm_bool_index] summed_peaks = peakshapes.sum(0) - residual = data - summed_peaks - return ( - ppm, - data, - peakshapes, - summed_peaks, - residual, - upper_ppm, - lower_ppm, - ) + residual = data-summed_peaks + return ppm, data, peakshapes, summed_peaks, residual, upper_ppm, lower_ppm + + def _plot_deconv(self, fid, + upper_ppm=None, + lower_ppm=None, + colour='k', + peak_colour='b', + summed_peak_colour='r', + residual_colour='g', + lw=1, + show_labels=False + ): - def _plot_deconv( - self, - fid, - upper_ppm=None, - lower_ppm=None, - colour="k", - peak_colour="b", - summed_peak_colour="r", - residual_colour="g", - lw=1, - show_labels=False, - ): - # validation takes place in self._deconv_generator - ( - ppm, - data, - peakshapes, - summed_peaks, - residual, - upper_ppm, - lower_ppm, - ) = self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm) - - self.fig = plt.figure(figsize=[9, 5]) + #validation takes place in self._deconv_generator + ppm, data, peakshapes, summed_peaks, residual, upper_ppm, \ + lower_ppm = self._deconv_generator(fid, + upper_ppm=upper_ppm, + lower_ppm=lower_ppm) + + self.fig = plt.figure(figsize=[9,5]) ax = self.fig.add_subplot(111) ax.plot(ppm, residual, color=residual_colour, lw=lw) ax.plot(ppm, data, color=colour, lw=lw) - ax.plot(ppm, summed_peaks, "--", color=summed_peak_colour, lw=lw) - label_pad = 0.02 * peakshapes.max() + ax.plot(ppm, summed_peaks, '--', color=summed_peak_colour, lw=lw) + label_pad = 0.02*peakshapes.max() for n in range(len(peakshapes)): peak = peakshapes[n] - ax.plot(ppm, peak, "-", color=peak_colour, lw=lw) - if (fid._flags["assigned"]) and (show_labels): + ax.plot(ppm, peak, '-', color=peak_colour, lw=lw) + ax.text(ppm[numpy.argmax(peak)], label_pad+peak.max(), str(n), ha='center') + if (fid._flags['assigned']) and (show_labels): ax.text( ppm[numpy.argmax(peak)], label_pad + peak.max(), @@ -205,46 +171,42 @@ def _plot_deconv( if fid.fid_object.peaks else str(n) ), - ha="center", + ha='center', ) ax.invert_xaxis() ax.set_xlim([upper_ppm, lower_ppm]) ax.grid() - ax.set_xlabel("PPM (%.2f MHz)" % (fid._params["reffrq"])) - - def _plot_deconv_array( - self, - fids, - upper_index=None, - lower_index=None, - upper_ppm=None, - lower_ppm=None, - data_colour="k", - summed_peak_colour="r", - residual_colour="g", - data_filled=False, - summed_peak_filled=True, - residual_filled=False, - figsize=[9, 6], - lw=0.3, - azim=-90, - elev=20, - filename=None, - ): + ax.set_xlabel('PPM (%.2f MHz)'%(fid._params['reffrq'])) + + def _plot_deconv_array(self, fids, + upper_index=None, + lower_index=None, + upper_ppm=None, + lower_ppm=None, + data_colour='k', + summed_peak_colour='r', + residual_colour='g', + data_filled=False, + summed_peak_filled=True, + residual_filled=False, + figsize=[9, 6], + lw=0.3, + azim=-90, + elev=20, + filename=None): + if lower_index is None: lower_index = 0 if upper_index is None: upper_index = len(fids) if lower_index >= upper_index: - raise ValueError("upper_index must exceed lower_index") - fids = fids[lower_index:upper_index] + raise ValueError('upper_index must exceed lower_index') + fids = fids[lower_index: upper_index] generated_deconvs = [] for fid in fids: - generated_deconvs.append( - self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm) - ) - - params = fids[0]._params + generated_deconvs.append(self._deconv_generator(fid, upper_ppm=upper_ppm, lower_ppm=lower_ppm)) + + params = fids[0]._params ppm = generated_deconvs[0][0] data = [i[1] for i in generated_deconvs] peakshapes = [i[2] for i in generated_deconvs] @@ -253,23 +215,21 @@ def _plot_deconv_array( upper_ppm = generated_deconvs[0][5] lower_ppm = generated_deconvs[0][6] - plot_data = numpy.array( - [ - residuals, - data, - summed_peaks, - ] - ) + plot_data = numpy.array([ + residuals, + data, + summed_peaks, + ]) colours_list = [ - [residual_colour] * len(residuals), - [data_colour] * len(data), - [summed_peak_colour] * len(summed_peaks), - ] + [residual_colour]*len(residuals), + [data_colour]*len(data), + [summed_peak_colour]*len(summed_peaks), + ] filled_list = [ - residual_filled, - data_filled, - summed_peak_filled, - ] + residual_filled, + data_filled, + summed_peak_filled, + ] xlabel = 'PPM (%.2f MHz)'%(params['reffrq']) ylabel = 'min.' @@ -290,48 +250,49 @@ def _plot_deconv_array( elev=elev, ) if filename is not None: - self.fig.savefig(filename, format="pdf") + self.fig.savefig(filename, format='pdf') plt.show() + + + + def _plot_array(self, data, params, + upper_index=None, + lower_index=None, + upper_ppm=None, + lower_ppm=None, + figsize=(9, 6), + lw=0.3, + azim=-90, + elev=20, + filled=False, + show_zticks=False, + labels=None, + colour=True, + filename=None, + ): - def _plot_array( - self, - data, - params, - upper_index=None, - lower_index=None, - upper_ppm=None, - lower_ppm=None, - figsize=(9, 6), - lw=0.3, - azim=-90, - elev=20, - filled=False, - show_zticks=False, - labels=None, - colour=True, - filename=None, - ): if not Plot._is_iter_of_iters(data): - raise AttributeError("data must be 2D.") + raise AttributeError('data must be 2D.') if upper_ppm is not None and lower_ppm is not None: if upper_ppm == lower_ppm or upper_ppm < lower_ppm: - raise ValueError("ppm range specified is invalid.") + raise ValueError('ppm range specified is invalid.') if upper_index is not None and lower_index is not None: if upper_index == lower_index or upper_index < lower_index: - raise ValueError("index range specified is invalid.") + raise ValueError('index range specified is invalid.') - sw_left = params["sw_left"] - sw = params["sw"] + + sw_left = params['sw_left'] + sw = params['sw'] if upper_index is None: upper_index = len(data) if lower_index is None: lower_index = 0 - + if upper_ppm is None: upper_ppm = sw_left if lower_ppm is None: - lower_ppm = sw_left - sw + lower_ppm = sw_left-sw if "acqtime_array" in params.keys(): # New NMRpy _params structure @@ -354,23 +315,20 @@ def _plot_array( else: colours_list = None - xlabel = "PPM (%.2f MHz)" % (params["reffrq"]) - ylabel = "min." - self.fig = self._generic_array_plot( - ppm, - minutes, - [data], - colours_list=colours_list, - filled_list=[filled], - figsize=figsize, - xlabel=xlabel, - ylabel=ylabel, - lw=lw, - azim=azim, - elev=elev, - ) + xlabel = 'PPM (%.2f MHz)'%(params['reffrq']) + ylabel = 'min.' + self.fig = self._generic_array_plot(ppm, minutes, [data], + colours_list=colours_list, + filled_list=[filled], + figsize=figsize, + xlabel=xlabel, + ylabel=ylabel, + lw=lw, + azim=azim, + elev=elev, + ) if filename is not None: - self.fig.savefig(filename, format="pdf") + self.fig.savefig(filename, format='pdf') plt.show() @staticmethod @@ -384,25 +342,21 @@ def _interleave_datasets(data): idata.append(data[x][y]) return idata - def _generic_array_plot( - self, - x, - y, - zlist, - colours_list=None, - filled_list=None, - upper_lim=None, - lower_lim=None, - lw=0.3, - azim=-90, - elev=20, - figsize=[5, 5], - show_zticks=False, - labels=None, - xlabel=None, - ylabel=None, - filename=None, - ): + def _generic_array_plot(self, x, y, zlist, + colours_list=None, + filled_list=None, + upper_lim=None, + lower_lim=None, + lw=0.3, + azim=-90, + elev=20, + figsize=[5,5], + show_zticks=False, + labels=None, + xlabel=None, + ylabel=None, + filename=None, + ): """ Generic function for plotting arrayed data on a set of 3D axes. x and y @@ -412,44 +366,46 @@ def _generic_array_plot( """ + + + if colours_list is None: - colours_list = [["k"] * len(y)] * len(zlist) + colours_list = [['k']*len(y)]*len(zlist) if filled_list is None: - filled_list = [False] * len(zlist) + filled_list = [False]*len(zlist) + fig = plt.figure(figsize=figsize) - ax = fig.add_subplot(111, projection="3d", azim=azim, elev=elev) + ax = fig.add_subplot(111, projection='3d', azim=azim, elev=elev) for data_n in range(len(zlist)): data = zlist[data_n] - bh = abs(data.min()) + bh = abs(data.min()) filled = filled_list[data_n] cl = colours_list[data_n] if not filled: - # spectra are plotted in reverse for zorder + #spectra are plotted in reverse for zorder for n in range(len(data))[::-1]: datum = data[n] clr = cl[n] - ax.plot(x, len(datum) * [y[n]], datum, color=clr, lw=lw) + ax.plot(x, len(datum)*[y[n]], datum, color=clr, lw=lw) if filled: verts = [] - plot_data = data + bh + plot_data = data+bh for datum in plot_data: datum[0], datum[-1] = 0, 0 verts.append(list(zip(x, datum))) - - fclr, eclr = ["w"] * len(data), ["k"] * len(data) + + fclr, eclr = ['w']*len(data), ['k']*len(data) fclr = cl - poly = PolyCollection( - verts, + poly = PolyCollection(verts, facecolors=fclr, edgecolors=eclr, - linewidths=[lw] * len(verts), - ) - ax.add_collection3d(poly, zs=y, zdir="y") - - ax.set_zlim([0, 1.1 * max(numpy.array(zlist).flat)]) + linewidths=[lw]*len(verts)) + ax.add_collection3d(poly, zs=y, zdir='y') + + ax.set_zlim([0, 1.1*max(numpy.array(zlist).flat)]) ax.invert_xaxis() if upper_lim is None: upper_lim = x[0] @@ -462,6 +418,7 @@ def _generic_array_plot( if not show_zticks: ax.set_zticklabels([]) return fig + @classmethod def _is_iter(cls, i): @@ -487,54 +444,48 @@ def _is_flat_iter(cls, i): return True return False - class Phaser: """Interactive phase-correction widget""" - def __init__(self, fid): - if not Plot._is_flat_iter(fid.data): - raise ValueError("data must be flat iterable.") + if not Plot._is_flat_iter(fid.data): + raise ValueError('data must be flat iterable.') if fid.data is [] or fid.data is None: - raise ValueError("data must exist.") + raise ValueError('data must exist.') self.fid = fid self.fig = plt.figure(figsize=[9, 6]) self.phases = numpy.array([0.0, 0.0]) self.cum_phases = numpy.array([0.0, 0.0]) self.y = 0.0 self.ax = self.fig.add_subplot(111) - self.ax.plot(self.fid.data, color="k", linewidth=1.0) - self.ax.hlines(0, 0, len(self.fid.data) - 1) + self.ax.plot(self.fid.data, color='k', linewidth=1.0) + self.ax.hlines(0, 0, len(self.fid.data)-1) self.ax.set_xlim([0, len(self.fid.data)]) - xtcks = numpy.linspace(0, 1, 11) * len(self.fid.data) - xtcks[-1] = xtcks[-1] - 1 + xtcks = numpy.linspace(0,1,11)*len(self.fid.data) + xtcks[-1] = xtcks[-1]-1 self.ax.set_xticks(xtcks) - self.ax.set_xlabel("PPM (%.2f MHz)" % (self.fid._params["reffrq"])) + self.ax.set_xlabel('PPM (%.2f MHz)'%(self.fid._params['reffrq'])) self.ax.set_xticklabels([numpy.round(self.fid._ppm[int(i)], 1) for i in xtcks]) - ylims = numpy.array([-1.6, 1.6]) * max(abs(numpy.array(self.ax.get_ylim()))) + ylims = numpy.array([-1.6, 1.6])*max(abs(numpy.array(self.ax.get_ylim()))) self.ax.set_ylim(ylims) self.ax.grid() self.visible = True self.canvas = self.ax.figure.canvas - self.canvas.mpl_connect("motion_notify_event", self.onmove) - self.canvas.mpl_connect("button_press_event", self.press) - self.canvas.mpl_connect("button_release_event", self.release) + self.canvas.mpl_connect('motion_notify_event', self.onmove) + self.canvas.mpl_connect('button_press_event', self.press) + self.canvas.mpl_connect('button_release_event', self.release) self.pressv = None self.buttonDown = False self.prev = (0, 0) - self.ax.text( - 0.05 * self.ax.get_xlim()[1], - 0.7 * self.ax.get_ylim()[1], - "phasing\nleft - zero-order\nright - first order", - ) - cursor = Cursor(self.ax, useblit=True, color="k", linewidth=0.5) + self.ax.text(0.05 *self.ax.get_xlim()[1],0.7 *self.ax.get_ylim()[1],'phasing\nleft - zero-order\nright - first order') + cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) cursor.horizOn = False self.fig.subplots_adjust(bottom=0.13) - self.text1 = self.fig.text(0.12, 0.02, " ", fontsize="large") + self.text1 = self.fig.text(0.12, 0.02, ' ', fontsize='large') plt.show() def press(self, event): tb = plt.get_current_fig_manager().toolbar - if tb.mode == "": + if tb.mode == '': x, y = event.xdata, event.ydata if event.inaxes is not None: self.buttonDown = True @@ -542,9 +493,7 @@ def press(self, event): self.y = y def release(self, event): - self.text1.set_text( - "cumulative p0: {0:.1f} p1: {1:.1f}".format(*self.cum_phases) - ) + self.text1.set_text('cumulative p0: {0:.1f} p1: {1:.1f}'.format(*self.cum_phases)) self.buttonDown = False return False @@ -553,27 +502,26 @@ def onmove(self, event): return x = event.xdata y = event.ydata - dy = y - self.y + dy = y-self.y self.y = y if self.button == 1: - self.phases[0] = 50 * dy / self.ax.get_ylim()[1] + self.phases[0] = 50*dy/self.ax.get_ylim()[1] self.phases[1] = 0.0 if self.button == 3: - self.phases[1] = 50 * dy / self.ax.get_ylim()[1] + self.phases[1] = 50*dy/self.ax.get_ylim()[1] self.phases[0] = 0.0 self.fid.ps(p0=self.phases[0], p1=self.phases[1]) self.cum_phases += self.phases - self.ax.lines[0].set_data( - numpy.array([numpy.arange(len(self.fid.data)), self.fid.data]) - ) + self.ax.lines[0].set_data(numpy.array([numpy.arange(len(self.fid.data)), self.fid.data])) self.canvas.draw() # _idle() return False class BaseSelectorMixin: + def __init__(self): super().__init__() - + def press(self, event): pass @@ -589,19 +537,17 @@ def redraw(self): def change_visible(self): pass - class PolySelectorMixin(BaseSelectorMixin): + def __init__(self): super().__init__() - class Psm: pass - self.psm = Psm() self.psm.btn_add = 1 self.psm.btn_del = 1 self.psm.btn_cls = 3 - self.psm.key_mod = "control" + self.psm.key_mod = 'control' self.psm.xs = [] self.psm.ys = [] self.psm._xs = [] @@ -618,12 +564,12 @@ class Psm: self.psm._yline = None self.psm.lw = 1 self.blocking = False - if not hasattr(self, "show_tracedata"): + if not hasattr(self, 'show_tracedata'): self.show_tracedata = False def redraw(self): super().redraw() - if hasattr(self, "psm"): + if hasattr(self, 'psm'): for i in self.psm._visual_lines: self.ax.draw_artist(i) if self.psm.line is not None: @@ -633,21 +579,20 @@ def redraw(self): def change_visible(self): super().change_visible() - if hasattr(self, "psm"): + if hasattr(self, 'psm'): for i in self.psm._visual_lines: i.set_visible(not i.get_visible()) if self.psm.line is not None: self.psm.line.set_visible(not self.psm.line.get_visible()) - def makepoly( - self, + def makepoly(self, xs=None, ys=None, lw=1, - colour="r", - ms="+", - ls="-", - ): + colour='r', + ms='+', + ls='-', + ): if xs is not None and ys is not None: return self.ax.plot( xs, @@ -656,111 +601,99 @@ def makepoly( color=colour, marker=ms, ls=ls, - ) - + ) + def press(self, event): super().press(event) - if self.check_mode() != "": + if self.check_mode() != '': return if event.xdata is None or event.ydata is None: return if event.button == self.psm.btn_add and event.key != self.psm.key_mod: - self.psm.xs.append(event.xdata) - self.psm.ys.append(event.ydata) - if self.show_tracedata: - self.psm._xs, self.psm._ys = self.get_line_ydata( - self.psm.xs, self.psm.ys - ) - if self.psm.line is None: - (self.psm.line,) = self.makepoly( - self.psm.xs, - self.psm.ys, - lw=self.psm.lw, - ) - self.blocking = True + self.psm.xs.append(event.xdata) + self.psm.ys.append(event.ydata) if self.show_tracedata: - (self.psm._yline,) = self.makepoly( - self.psm._xs, - self.psm._ys, + self.psm._xs, self.psm._ys = self.get_line_ydata(self.psm.xs, self.psm.ys) + if self.psm.line is None: + self.psm.line, = self.makepoly( + self.psm.xs, + self.psm.ys, lw=self.psm.lw, - ms="+", - ls="-", - colour="r", - ) - else: - self.psm.line.set_data(self.psm.xs, self.psm.ys) - if self.show_tracedata: - self.psm._yline.set_data(self.psm._xs, self.psm._ys) + ) + self.blocking = True + if self.show_tracedata: + self.psm._yline, = self.makepoly( + self.psm._xs, + self.psm._ys, + lw=self.psm.lw, + ms='+', + ls='-', + colour='r', + ) + else: + self.psm.line.set_data(self.psm.xs, self.psm.ys) + if self.show_tracedata: + self.psm._yline.set_data(self.psm._xs, self.psm._ys) elif event.button == self.psm.btn_del and event.key == self.psm.key_mod: if len(self.psm._visual_lines) > 0: x = event.xdata y = event.ydata - # trace_dist = [[i[0]-x, i[1]-y] for i in self.psm.lines] - trace_dist = [[i[0] - x] for i in self.psm.lines] - # delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2+i[1]**2)) - delete_trace = numpy.argmin( - [min(numpy.sqrt(i[0] ** 2)) for i in trace_dist] - ) + #trace_dist = [[i[0]-x, i[1]-y] for i in self.psm.lines] + trace_dist = [[i[0]-x] for i in self.psm.lines] + #delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2+i[1]**2)) + delete_trace = numpy.argmin([min(numpy.sqrt(i[0]**2)) for i in trace_dist]) self.psm.lines.pop(delete_trace) self.psm.data_lines.pop(delete_trace) trace = self.psm._visual_lines.pop(delete_trace) trace.remove() elif event.button == self.psm.btn_cls and self.psm.line is not None: if len(self.psm.xs) > 1: - self.psm._visual_lines.append( - self.makepoly( - self.psm.xs, - self.psm.ys, + self.psm._visual_lines.append(self.makepoly( + self.psm.xs, + self.psm.ys, lw=self.psm.lw, - colour="b", - )[0] - ) + colour='b', + )[0]) self.psm.lines.append(numpy.array([self.psm.xs, self.psm.ys])) self.psm.xs, self.psm.ys = [], [] self.psm.line.remove() self.psm.line = None self.psm._yline.remove() self.psm._yline = None - self.psm.data_lines.append( - self.get_polygon_neighbours_data(self.psm.lines[-1]) - ) - self.psm.index_lines.append( - self.get_polygon_neighbours_indices(self.psm.lines[-1]) - ) + self.psm.data_lines.append(self.get_polygon_neighbours_data(self.psm.lines[-1])) + self.psm.index_lines.append(self.get_polygon_neighbours_indices(self.psm.lines[-1])) self.blocking = False else: self.psm.xs, self.psm.ys = [], [] self.psm.line = None - # self.redraw() - + #self.redraw() + def onmove(self, event): super().onmove(event) self.psm._x = event.xdata self.psm._y = event.ydata if self.psm.line is not None: - xs = self.psm.xs + [self.psm._x] - ys = self.psm.ys + [self.psm._y] + xs = self.psm.xs+[self.psm._x] + ys = self.psm.ys+[self.psm._y] self.psm.line.set_data(xs, ys) if self.show_tracedata: current_x_ydata = self.get_line_ydata( - [self.psm.xs[-1]] + [self.psm._x], - [self.psm.ys[-1]] + [self.psm._y], - ) + [self.psm.xs[-1]]+[self.psm._x], + [self.psm.ys[-1]]+[self.psm._y], + ) self.psm._yline.set_data( - self.psm._xs + current_x_ydata[0], - self.psm._ys + current_x_ydata[1], - ) + self.psm._xs+current_x_ydata[0], + self.psm._ys+current_x_ydata[1], + ) def get_line_ydata(self, xs, ys): xdata = [] ydata = [] - for i in range(len(xs) - 1): - current_xy_data = self.get_polygon_neighbours_data( - [ - xs[i : i + 2], - ys[i : i + 2], - ] - ) + for i in range(len(xs)-1): + current_xy_data = self.get_polygon_neighbours_data([ + xs[i:i+2], + ys[i:i+2], + ]) xdata += current_xy_data[0] ydata += current_xy_data[1] return xdata, ydata @@ -772,17 +705,12 @@ def get_polygon_neighbours_data(self, line): """ line_xs = [] line_ys = [] - for i in range(len(line[0]) - 1): - x1, y1, x2, y2 = ( - line[0][i], - line[1][i], - line[0][i + 1], - line[1][i + 1], - ) + for i in range(len(line[0])-1): + x1, y1, x2, y2 = line[0][i], line[1][i], line[0][i+1], line[1][i+1] x, y, x_index, y_index = self.get_neighbours([x1, x2], [y1, y2]) if x is not None and y is not None: - line_xs = line_xs + list(x) - line_ys = line_ys + list(y) + line_xs = line_xs+list(x) + line_ys = line_ys+list(y) return [line_xs, line_ys] def get_polygon_neighbours_indices(self, line): @@ -792,19 +720,14 @@ def get_polygon_neighbours_indices(self, line): """ line_xs = [] line_ys = [] - for i in range(len(line[0]) - 1): - x1, y1, x2, y2 = ( - line[0][i], - line[1][i], - line[0][i + 1], - line[1][i + 1], - ) + for i in range(len(line[0])-1): + x1, y1, x2, y2 = line[0][i], line[1][i], line[0][i+1], line[1][i+1] x, y, x_index, y_index = self.get_neighbours([x1, x2], [y1, y2]) if x_index is not None and y_index is not None: - line_xs = line_xs + list(x_index) - line_ys = line_ys + list(y_index) + line_xs = line_xs+list(x_index) + line_ys = line_ys+list(y_index) return [line_xs, line_ys] - + def get_neighbours(self, xs, ys): """ For a pair of coordinates (xs = [x1, x2], ys = [y1, y2]), return the @@ -816,7 +739,7 @@ def get_neighbours(self, xs, ys): if True not in ymask: return None, None, None, None y_lo = ymask.index(True) - y_hi = len(ymask) - ymask[::-1].index(True) + y_hi = len(ymask)-ymask[::-1].index(True) x_neighbours = [] y_neighbours = [] y_indices = [i for i in range(y_lo, y_hi)] @@ -824,13 +747,13 @@ def get_neighbours(self, xs, ys): y_indices = y_indices[::-1] x_indices = [] for i in y_indices: - x = [self.ppm[0], self.ppm[-1], xs[0], xs[1]] - y = [self.y_indices[i], self.y_indices[i], ys[0], ys[1]] + x = [self.ppm[0], self.ppm[-1], xs[0], xs[1]] + y = [self.y_indices[i], self.y_indices[i], ys[0], ys[1]] x, y = self.get_intersection(x, y) - x = numpy.argmin(abs(self.ppm[::-1] - x)) + x = numpy.argmin(abs(self.ppm[::-1]-x)) x_indices.append(x) x_neighbours.append(self.ppm[::-1][x]) - y_neighbours.append(self.data[i][x] + self.y_indices[i]) + y_neighbours.append(self.data[i][x]+self.y_indices[i]) return x_neighbours, y_neighbours, x_indices, y_indices @staticmethod @@ -842,54 +765,46 @@ def get_intersection(x, y): and [x4, y4] represent the other. See https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Given_two_points_on_each_line """ - px = ( - (x[0] * y[1] - y[0] * x[1]) * (x[2] - x[3]) - - (x[0] - x[1]) * (x[2] * y[3] - y[2] * x[3]) - ) / ((x[0] - x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] - x[3])) - py = ( - (x[0] * y[1] - y[0] * x[1]) * (y[2] - y[3]) - - (y[0] - y[1]) * (x[2] * y[3] - y[2] * x[3]) - ) / ((x[0] - x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] - x[3])) + px = (((x[0]*y[1]-y[0]*x[1])*(x[2]-x[3])-(x[0]-x[1])*(x[2]*y[3]-y[2]*x[3]))/((x[0]-x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]-x[3]))) + py = (((x[0]*y[1]-y[0]*x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]*y[3]-y[2]*x[3]))/((x[0]-x[1])*(y[2]-y[3])-(y[0]-y[1])*(x[2]-x[3]))) return px, py - class LineSelectorMixin(BaseSelectorMixin): + def __init__(self): super().__init__() - class Lsm: pass - self.lsm = Lsm() self.lsm.btn_add = 1 self.lsm.btn_del = 1 - self.lsm.key_mod = "control" + self.lsm.key_mod = 'control' self.lsm.peaklines = {} self.lsm.peaks = [] for x in self.peaks: self.lsm.peaks.append(x) self.lsm.peaklines[x] = self.makeline(x) - # self.ax.draw_artist(self.lsm.peaklines[x]) + #self.ax.draw_artist(self.lsm.peaklines[x]) self.lsm.peaks = sorted(self.lsm.peaks)[::-1] - + def makeline(self, x): return self.ax.plot( - [x, x], + [x, x], self.ylims, - color="#CC0000", + color='#CC0000', lw=1, - # animated=True - )[0] + #animated=True + )[0] def redraw(self): super().redraw() - if hasattr(self, "lsm"): + if hasattr(self, 'lsm'): for i, j in self.lsm.peaklines.items(): self.ax.draw_artist(j) def change_visible(self): super().change_visible() - if hasattr(self, "lsm"): + if hasattr(self, 'lsm'): for i, j in self.lsm.peaklines.items(): j.set_visible(True) j.set_visible(not j.get_visible()) @@ -898,33 +813,30 @@ def press(self, event): super().press(event) x = numpy.round(event.xdata, 2) # left - if ( - event.button == self.lsm.btn_add - and event.key != self.lsm.key_mod - and (x >= self.xlims[1]) - and (x <= self.xlims[0]) - ): + if event.button == self.lsm.btn_add and \ + event.key != self.lsm.key_mod and \ + (x >= self.xlims[1]) and (x <= self.xlims[0]): with self.out: - print("peak {}".format(x)) + print('peak {}'.format(x)) if x not in self.lsm.peaks: self.lsm.peaks.append(x) self.lsm.peaklines[x] = self.makeline(x) self.lsm.peaks = sorted(self.lsm.peaks)[::-1] - # self.ax.draw_artist(self.lsm.peaklines[x]) - # Ctrl+left + #self.ax.draw_artist(self.lsm.peaklines[x]) + #Ctrl+left elif event.button == self.lsm.btn_del and event.key == self.lsm.key_mod: - # find and delete nearest peakline + #find and delete nearest peakline if len(self.lsm.peaks) > 0: - delete_peak = numpy.argmin([abs(i - x) for i in self.lsm.peaks]) + delete_peak = numpy.argmin([abs(i-x) for i in self.lsm.peaks]) old_peak = self.lsm.peaks.pop(delete_peak) - try: + try: peakline = self.lsm.peaklines.pop(old_peak) peakline.remove() - except Exception: + except: with self.out: - print("Could not remove peakline") + print('Could not remove peakline') self.canvas.draw() - # self.redraw() + #self.redraw() def release(self, event): super().release(event) @@ -934,62 +846,59 @@ def onmove(self, event): class SpanSelectorMixin(BaseSelectorMixin): + def __init__(self): super().__init__() - class Ssm: pass - self.ssm = Ssm() self.ssm.btn_add = 3 self.ssm.btn_del = 3 - self.ssm.key_mod = "control" + self.ssm.key_mod = 'control' self.ssm.minspan = 0 self.ssm.rect = None self.ssm.rangespans = [] - self.ssm.rectprops = dict(facecolor="0.5", alpha=0.2) + self.ssm.rectprops = dict(facecolor='0.5', alpha=0.2) self.ssm.ranges = self.ranges for rng in self.ssm.ranges: - self.ssm.rangespans.append(self.makespan(rng[1], rng[0] - rng[1])) + self.ssm.rangespans.append(self.makespan(rng[1], rng[0]-rng[1])) self.redraw() - trans = blended_transform_factory(self.ax.transData, self.ax.transAxes) + trans = blended_transform_factory( + self.ax.transData, + self.ax.transAxes) w, h = 0, 1 - self.ssm.rect = Rectangle( - [0, 0], - w, - h, - transform=trans, - visible=False, - animated=True, - **self.ssm.rectprops, - ) + self.ssm.rect = Rectangle([0, 0], w, h, + transform=trans, + visible=False, + animated=True, + **self.ssm.rectprops + ) self.ax.add_patch(self.ssm.rect) def makespan(self, left, width): - trans = blended_transform_factory(self.ax.transData, self.ax.transAxes) + trans = blended_transform_factory( + self.ax.transData, + self.ax.transAxes) bottom, top = self.ylims - height = top - bottom - rect = Rectangle( - [left, bottom], - width, - height, - transform=trans, - visible=True, - # animated=True, - **self.ssm.rectprops, - ) + height = top-bottom + rect = Rectangle([left, bottom], width, height, + transform=trans, + visible=True, + #animated=True, + **self.ssm.rectprops + ) self.ax.add_patch(rect) return rect def redraw(self): super().redraw() - if hasattr(self, "ssm"): + if hasattr(self, 'ssm'): for i in self.ssm.rangespans: self.ax.draw_artist(i) def change_visible(self): super().change_visible() - if hasattr(self, "ssm"): + if hasattr(self, 'ssm'): for i in self.ssm.rangespans: i.set_visible(not i.get_visible()) @@ -1001,16 +910,13 @@ def press(self, event): self.buttonDown = True self.pressv = event.xdata elif event.button == self.ssm.btn_add and event.key == self.ssm.key_mod: - # find and delete range + #find and delete range if len(self.ssm.ranges) > 0: x = event.xdata rng = 0 while rng < len(self.ssm.ranges): - if ( - x >= (self.ssm.ranges[rng])[1] - and x <= (self.ssm.ranges[rng])[0] - ): - self.ssm.ranges.pop(rng) + if x >= (self.ssm.ranges[rng])[1] and x <= (self.ssm.ranges[rng])[0]: + self.ssm.ranges.pop(rng) rangespan = self.ssm.rangespans.pop(rng) rangespan.remove() break @@ -1027,7 +933,7 @@ def release(self, event): span = vmax - vmin self.pressv = None spantest = False - # if len(self.ssm.ranges) > 0: + #if len(self.ssm.ranges) > 0: # for i in self.ssm.ranges: # if (vmin >= i[1]) and (vmin <= i[0]): # spantest = True @@ -1037,9 +943,10 @@ def release(self, event): self.ssm.ranges.append([numpy.round(vmin, 2), numpy.round(vmax, 2)]) self.ssm.rangespans.append(self.makespan(vmin, span)) with self.out: - print("range {} -> {}".format(vmax, vmin)) + print('range {} -> {}'.format(vmax, vmin)) self.ssm.ranges = [numpy.sort(i)[::-1] for i in self.ssm.ranges] + def onmove(self, event): super().onmove(event) if self.pressv is None or self.buttonDown is False: @@ -1049,46 +956,40 @@ def onmove(self, event): v = x minv, maxv = v, self.pressv if minv > maxv: - minv, maxv = maxv, minv + minv, maxv = maxv, minv vmin = self.pressv vmax = event.xdata # or self.prev[0] if vmin > vmax: - vmin, vmax = vmax, vmin + vmin, vmax = vmax, vmin self.ssm.rect.set_visible(self.visible) self.ssm.rect.set_xy([minv, self.ssm.rect.xy[1]]) - self.ssm.rect.set_width(maxv - minv) + self.ssm.rect.set_width(maxv-minv) self.ax.draw_artist(self.ssm.rect) - class PeakSelectorMixin(BaseSelectorMixin): + def __init__(self): super().__init__() - class Psm: pass - self.psm = Psm() self.psm.btn_add = 1 self.psm.peak = None self.psm.newx = None - + def makeline(self, x): return self.ax.plot( - [x, x], + [x, x], self.ylims, - color="#CC0000", + color='#CC0000', lw=1, - )[0] + )[0] def press(self, event): super().press(event) x = numpy.round(event.xdata, 2) # left - if ( - event.button == self.psm.btn_add - and (x >= self.xlims[1]) - and (x <= self.xlims[0]) - ): + if event.button == self.psm.btn_add and (x >= self.xlims[1]) and (x <= self.xlims[0]): self.psm.peak = x self.makeline(x) self.process() @@ -1098,60 +999,55 @@ def release(self, event): def onmove(self, event): super().onmove(event) - + def process(self): pass - - + class AssignMixin(BaseSelectorMixin): + def __init__(self): super().__init__() - class Am: pass - self.am = Am() self.am.btn_assign = 3 - self.am.key_mod1 = "ctrl+alt" - self.am.key_mod2 = "alt+control" + self.am.key_mod1 = 'ctrl+alt' + self.am.key_mod2 = 'alt+control' def press(self, event): super().press(event) - if event.button == self.am.btn_assign and ( - event.key == self.am.key_mod1 or event.key == self.am.key_mod2 - ): + if event.button == self.am.btn_assign and (event.key == self.am.key_mod1 \ + or event.key == self.am.key_mod2): with self.out: - print("assigned peaks and ranges") - self.assign() + print('assigned peaks and ranges') + self.assign() def assign(self): pass - - -class DataSelector: + +class DataSelector(): """ Interactive selector widget. can inherit from various mixins for functionality: Line selection: :class:`~nmrpy.plotting.LineSelectorMixin` Span selection: :class:`~nmrpy.plotting.SpanSelectorMixin` Poly selection: :class:`~nmrpy.plotting.PolySelectorMixin` - + This class is not intended to be used without inheriting at least one mixin. """ - def __init__( - self, - data, - params, - extra_data=None, - extra_data_colour="k", - peaks=None, - ranges=None, - title=None, - voff=0.001, - label=None, - ): + def __init__(self, + data, + params, + extra_data=None, + extra_data_colour='k', + peaks=None, + ranges=None, + title=None, + voff=0.001, + label=None, + ): if not Plot._is_iter(data): - raise AttributeError("data must be iterable.") + raise AttributeError('data must be iterable.') self.data = numpy.array(data) self.extra_data = extra_data self.extra_data_colour = extra_data_colour @@ -1174,19 +1070,19 @@ def __init__( self.pressv = None self.buttonDown = False self.prev = (0, 0) - self.blocking = False - # self.canvas.restore_region(self.background) - super().__init__() # calling parent init - # self.canvas.blit(self.ax.bbox) - - self.cidmotion = self.canvas.mpl_connect("motion_notify_event", self.onmove) - self.cidpress = self.canvas.mpl_connect("button_press_event", self.press) - self.cidrelease = self.canvas.mpl_connect("button_release_event", self.release) - self.ciddraw = self.canvas.mpl_connect("draw_event", self.on_draw) - # cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) - # cursor.horizOn = False + self.blocking = False + #self.canvas.restore_region(self.background) + super().__init__() #calling parent init + #self.canvas.blit(self.ax.bbox) + + self.cidmotion = self.canvas.mpl_connect('motion_notify_event', self.onmove) + self.cidpress = self.canvas.mpl_connect('button_press_event', self.press) + self.cidrelease = self.canvas.mpl_connect('button_release_event', self.release) + self.ciddraw = self.canvas.mpl_connect('draw_event', self.on_draw) + #cursor = Cursor(self.ax, useblit=True, color='k', linewidth=0.5) + #cursor.horizOn = False # self.canvas.draw() - # self.redraw() + #self.redraw() # plt.show() def disconnect(self): @@ -1198,9 +1094,9 @@ def disconnect(self): def _isnotebook(self): try: shell = get_ipython().__class__.__name__ - if shell == "ZMQInteractiveShell": + if shell == 'ZMQInteractiveShell': return True # Jupyter notebook or qtconsole - elif shell == "TerminalInteractiveShell": + elif shell == 'TerminalInteractiveShell': return False # Terminal running IPython else: return False # Other type (?) @@ -1213,21 +1109,16 @@ def _make_basic_fig(self, *args, **kwargs): self.ax = self.fig.add_subplot(111) if len(self.data.shape) == 1: self.ppm = numpy.mgrid[ - self.params["sw_left"] - - self.params["sw"] : self.params["sw_left"] : complex( - self.data.shape[0] - ) + self.params['sw_left'] + - self.params['sw'] : self.params['sw_left'] : complex(self.data.shape[0]) ] # extra_data if self.extra_data is not None: self.ax.plot( - self.ppm[::-1], - self.extra_data, - color=self.extra_data_colour, - lw=1, + self.ppm[::-1], self.extra_data, color=self.extra_data_colour, lw=1 ) # data - self.ax.plot(self.ppm[::-1], self.data, color="k", lw=1) + self.ax.plot(self.ppm[::-1], self.data, color='k', lw=1) elif len(self.data.shape) == 2: cl = dict( zip( @@ -1236,18 +1127,14 @@ def _make_basic_fig(self, *args, **kwargs): ) ) self.ppm = numpy.mgrid[ - self.params["sw_left"] - - self.params["sw"] : self.params["sw_left"] : complex( - self.data.shape[1] - ) + self.params['sw_left'] + - self.params['sw'] : self.params['sw_left'] : complex(self.data.shape[1]) ] self.y_indices = numpy.arange(len(self.data)) * self.voff * self.data.max() # this is reversed for zorder # extra_data if self.extra_data is not None: - for i, j in zip( - range(len(self.extra_data))[::-1], self.extra_data[::-1] - ): + for i, j in zip(range(len(self.extra_data))[::-1], self.extra_data[::-1]): self.ax.plot( self.ppm[::-1], j + self.y_indices[i], @@ -1257,7 +1144,7 @@ def _make_basic_fig(self, *args, **kwargs): # data for i, j in zip(range(len(self.data))[::-1], self.data[::-1]): self.ax.plot(self.ppm[::-1], j + self.y_indices[i], color=cl[i], lw=1) - self.ax.set_xlabel("ppm") + self.ax.set_xlabel('ppm') self.ylims = numpy.array(self.ax.get_ylim()) # numpy.array([self.ax.get_ylim()[0], self.data.max() + abs(self.ax.get_ylim()[0])]) # self.ax.set_ylim(self.ylims)#self.ax.get_ylim()[0], self.data.max()*1.1]) @@ -1265,11 +1152,7 @@ def _make_basic_fig(self, *args, **kwargs): self.xlims = [self.ppm[-1], self.ppm[0]] self.ax.set_xlim(self.xlims) self.fig.suptitle(self.title, size=20) - self.ax.text( - 0.95 * self.ax.get_xlim()[0], - 0.7 * self.ax.get_ylim()[1], - self.label, - ) + self.ax.text(0.95 * self.ax.get_xlim()[0], 0.7 * self.ax.get_ylim()[1], self.label) self.ax.set_ylim(self.ylims) self.canvas = self.ax.figure.canvas # self.canvas.draw() @@ -1297,7 +1180,7 @@ def on_zoom(self, event): def press(self, event): tb = plt.get_current_fig_manager().toolbar - if tb.mode == "" and event.xdata is not None: + if tb.mode == '' and event.xdata is not None: x = numpy.round(event.xdata, 2) self.canvas.restore_region(self.background) try: @@ -1305,7 +1188,7 @@ def press(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def release(self, event): if self.pressv is None or not self.buttonDown: @@ -1317,7 +1200,7 @@ def release(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def onmove(self, event): if event.inaxes is None: @@ -1330,288 +1213,530 @@ def onmove(self, event): except Exception as e: logging.error(traceback.format_exc()) self.redraw() - self.canvas.blit(self.ax.bbox) + self.canvas.blit(self.ax.bbox) def make_invisible(self): try: - super().make_invisible() + super().make_invisible() except Exception as e: logging.error(traceback.format_exc()) def make_visible(self): try: - super().make_visible() + super().make_visible() except Exception as e: logging.error(traceback.format_exc()) def redraw(self): try: - super().redraw() + super().redraw() except Exception as e: logging.error(traceback.format_exc()) - + def change_visible(self): try: - super().change_visible() + super().change_visible() except Exception as e: logging.error(traceback.format_exc()) - class IntegralDataSelector(DataSelector, PolySelectorMixin, AssignMixin): show_tracedata = True - -class PeakTraceDataSelector( - DataSelector, PolySelectorMixin, SpanSelectorMixin, AssignMixin -): +class PeakTraceDataSelector(DataSelector, PolySelectorMixin, SpanSelectorMixin, AssignMixin): show_tracedata = True - -class LineSpanDataSelector( - DataSelector, LineSelectorMixin, SpanSelectorMixin, AssignMixin -): +class LineSpanDataSelector(DataSelector, LineSelectorMixin, SpanSelectorMixin, AssignMixin): pass - class PeakDataSelector(DataSelector, PeakSelectorMixin): pass - - + class SpanDataSelector(DataSelector, SpanSelectorMixin, AssignMixin): pass +class DataTraceSelector: + """ + Interactive data-selection widget with traces and ranges. Traces are saved + as self.data_traces (WRT data) and self.index_traces (WRT index). + """ + def __init__(self, fid_array, + extra_data=None, + extra_data_colour='b', + voff=1e-3, + lw=1, + label=None, + ): + self.fid_array = fid_array + if fid_array.data is [] or fid_array.data is None: + raise ValueError('data must exist.') + data = fid_array.data + params = fid_array._params + sw_left = params['sw_left'] + sw = params['sw'] -class PeakAssigner: - """Interactive widget for assigning species to peaks in a FID.""" + ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] + + self.integral_selector = IntegralDataSelector( + extra_data, + params, + extra_data=data, + extra_data_colour=extra_data_colour, + peaks=None, + ranges=None, + title='Integral trace selector', + voff=voff, + label=label) + self.integral_selector.assign = self.assign + + def assign(self): + data_traces = self.integral_selector.psm.data_lines + index_traces = self.integral_selector.psm.index_lines + + self.fid_array._data_traces = [dict(zip(i[1], i[0])) for i in data_traces] + self.fid_array._index_traces = [dict(zip(i[1], i[0])) for i in index_traces] - def __init__(self, fid, species_list=None, title="Assign species"): - """ - Initialize peak assigner widget. + decon_peaks = [] + for i in self.fid_array._deconvoluted_peaks: + if len(i): + decon_peaks.append(i.transpose()[0]) + else: + decon_peaks.append(None) - Parameters - ---------- - fid : Fid - The FID object to assign peaks for - species_source : Union[List[str], EnzymeMLDocument], optional - Either a list of species names or an EnzymeML document. - If None, will try to use fid.enzymeml_document - title : str, optional - Title for the widget - """ - self.fid = fid - self.title = title - self.selected_values = {} + trace_dict = {} + for t in range(len(self.fid_array._index_traces)): + trace = self.fid_array._index_traces[t] + integrals = {} + for fid, indx in trace.items(): + try: + integrals[fid] = numpy.argmin(abs(decon_peaks[fid]-indx)) + except: + integrals[fid] = None + trace_dict[t] = integrals + last_fid = (len(self.fid_array.get_fids())-1) + for i in trace_dict: + tmin = min(trace_dict[i]) + tminval = trace_dict[i][tmin] + if tmin > 0: + for j in range(0, tmin): + trace_dict[i][j] = tminval + tmax = max(trace_dict[i]) + tmaxval = trace_dict[i][tmax] + if tmax < last_fid: + for j in range(tmax, last_fid+1): + trace_dict[i][j] = tmaxval + self.fid_array.integral_traces = trace_dict + plt.close(self.integral_selector.fig) - # Determine species source and mode - self._setup_species_source(species_list) +class DataTraceRangeSelector: + """ + Interactive data-selection widget with traces and ranges. Traces are saved + as self.data_traces (WRT data) and self.index_traces (WRT index). Spans are + saves as self.spans. + """ + def __init__(self, fid_array, + peaks=None, + ranges=None, + voff=1e-3, + lw=1, + label=None, + ): + self.fid_array = fid_array + if fid_array.data is [] or fid_array.data is None: + raise ValueError('data must exist.') + data = fid_array.data + params = fid_array._params + sw_left = params['sw_left'] + sw = params['sw'] - # Validate and initialize - self._validate_fid(self.fid) - self._setup_fid(self.fid) - self.available_peaks = [str(peak) for peak in self.fid.peaks] + ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] + + self.peak_selector = PeakTraceDataSelector( + data, + params, + peaks=peaks, + ranges=ranges, + title='Peak and range trace selector', + voff=voff, + label=label) + self.peak_selector.assign = self.assign - # Create and layout widgets - self._create_widgets() - self._setup_callbacks() - self._layout_widgets() + def assign(self): + data_traces = self.peak_selector.psm.data_lines + index_traces = self.peak_selector.psm.index_lines + spans = self.peak_selector.ssm.ranges + + traces = [[i[0], j[1]] for i, j in zip(data_traces, index_traces)] - def _setup_species_source(self, species_source): - # Configure species source and create list of available species - # Check for default case first - if species_source is None: - if not hasattr(self.fid, "enzymeml_species"): - raise ValueError( - "No species list provided and FID has no enzymeml_species" - ) - self.available_species = self.fid.enzymeml_species - return - # Check for EnzymeML document - elif isinstance(species_source, EnzymeMLDocument): - self.available_species = get_species_from_enzymeml(species_source) - return - # Check for list of strings - elif isinstance(species_source, list): - self.available_species = species_source - return - # If we get here, the input was invalid - else: - raise ValueError( - "species_list must be a list of species names, " - "an EnzymeML document, or None if FID has enzymeml_species" - ) + self.fid_array.traces = traces + self.fid_array._trace_mask = self.fid_array._generate_trace_mask(traces) - def _validate_fid(self, fid): - # Validates FID has peaks and ranges and len(peaks) == len(ranges) - if fid.peaks is None or len(fid.peaks) == 0: - raise RuntimeError( - "`fid.peaks` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - if fid.ranges is None or len(fid.ranges) == 0: - raise RuntimeError( - "`fid.ranges` is required but still empty. " - "Please assign them manually or with the `rangepicker` method." - ) - if len(fid.peaks) != len(fid.ranges): - raise RuntimeError( - "`fid.peaks` and `fid.ranges` must have the same length, as " - "each peak must have a range assigned to it." - ) + self.fid_array._set_all_peaks_ranges_from_traces_and_spans( + traces, spans) + plt.close(self.peak_selector.fig) - def _setup_fid(self, fid): - # Initialize species array and creates or updates Peak objects - # in data model if species from EnyzmeML are used. - - # Initialize empty species array - fid.species = numpy.empty(len(fid.peaks), dtype=object) - - # Create or update Peak objects in data model - for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): - if i < len(fid.fid_object.peaks): - # Peak already exists, update it - fid.fid_object.peaks[i].peak_position = float(peak_val) - fid.fid_object.peaks[i].peak_range = { - "start": float(range_val[0]), - "end": float(range_val[1]), - } - else: - # Peak does not yet exist, create it - fid.fid_object.add_to_peaks( - peak_index=i, - peak_position=float(peak_val), - peak_range={ - "start": float(range_val[0]), - "end": float(range_val[1]), - }, - ) +class DataPeakSelector: + """ + Interactive data-selection widget with lines and ranges for a single Fid. + Lines and spans are saved as self.peaks, self.ranges. + """ + def __init__(self, fid, + peaks=None, + ranges=None, + voff=1e-3, + lw=1, + label=None, + title=None, + ): + self.fid = fid + if fid.data is [] or fid.data is None: + raise ValueError('data must exist.') + data = fid.data + params = fid._params + sw_left = params['sw_left'] + sw = params['sw'] + ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] - def _create_widgets(self): - # Create all widget components - self.title_label = Label(value=self.title) - self.peak_dropdown = Dropdown( - options=self.available_peaks, - description="Select a peak:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - self.species_dropdown = Dropdown( - options=[ - format_species_string(species) for species in self.available_species - ], - description="Select a species:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) - self.save_button = Button( - description="Save selection", - icon="file-arrow-down", - ) - self.reset_button = Button(description="Reset selection", disabled=True) - self.selection_output = Output() + if fid.peaks is not None: + peaks = list(fid.peaks) + if fid.ranges is not None: + ranges = list(fid.ranges) + + self.peak_selector = LineSpanDataSelector( + data, + params, + peaks=peaks, + ranges=ranges, + title=title, + voff=voff, + label=label) + self.peak_selector.assign = self.assign + + def assign(self): + if len(self.peak_selector.ssm.ranges) > 0 and len(self.peak_selector.lsm.peaks) > 0: + self.fid.ranges = self.peak_selector.ssm.ranges + peaks = [] + for peak in self.peak_selector.lsm.peaks: + for rng in self.peak_selector.ssm.ranges: + if peak >= rng[1] and peak <= rng[0]: + peaks.append(peak) + self.fid.peaks = peaks + else: + self.fid.peaks = None + self.fid.ranges = None + plt.close(self.peak_selector.fig) - def _setup_callbacks(self): - # Set up all widget callbacks - self.save_button.on_click(self._handle_save) - self.reset_button.on_click(self._handle_reset) +class DataPeakRangeSelector: + """Interactive data-selection widget with lines and ranges. Lines and spans are saved as self.peaks, self.ranges.""" + def __init__(self, fid_array, + peaks=None, + ranges=None, + y_indices=None, + aoti=True, + voff=1e-3, + lw=1, + label=None, + ): + self.fid_array = fid_array + self.fids = fid_array.get_fids() + self.assign_only_to_index = aoti + self.fid_number = y_indices + if self.fid_number is not None: + if not nmrpy.data_objects.Fid._is_iter(self.fid_number): + self.fid_number = [self.fid_number] + else: + self.fid_number = range(len(self.fids)) + if fid_array.data is [] or fid_array.data is None: + raise ValueError('data must exist.') + data = fid_array.data + if y_indices is not None: + data = fid_array.data[numpy.array(self.fid_number)] + params = fid_array._params + sw_left = params['sw_left'] + sw = params['sw'] - def _layout_widgets(self): - # Create widget layout and display - self.container = VBox( - [ - self.title_label, - self.peak_dropdown, - self.species_dropdown, - self.save_button, - self.reset_button, - self.selection_output, - ] - ) - display(self.container) + ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] + + self.peak_selector = LineSpanDataSelector( + data, + params, + peaks=peaks, + ranges=ranges, + title='Peak and range selector', + voff=voff, + label=label) + self.peak_selector.assign = self.assign + + def assign(self): + self.peaks = self.peak_selector.lsm.peaks + self.ranges = self.peak_selector.ssm.ranges + + if len(self.ranges) > 0 and len(self.peaks) > 0: + ranges = self.ranges + peaks = [] + for peak in self.peaks: + for rng in ranges: + if peak >= rng[1] and peak <= rng[0]: + peaks.append(peak) + else: + peaks = None + ranges = None - def _handle_save(self, b): - # Handle save button click - with self.selection_output: - self.selection_output.clear_output(wait=True) + if self.assign_only_to_index: + for fid in [self.fids[i] for i in self.fid_number]: + fid.peaks = peaks + fid.ranges = ranges + else: + for fid in self.fids: + fid.peaks = peaks + fid.ranges = ranges + plt.close(self.peak_selector.fig) + +class Calibrator: + """ + Interactive data-selection widget for calibrating PPM of a spectrum. + """ + def __init__(self, fid, + lw=1, + label=None, + title=None, + ): + self.fid = fid + if fid.data is [] or fid.data is None: + raise ValueError('data must exist.') + if not fid._flags['ft']: + raise ValueError('Only Fourier-transformed data can be calibrated.') - species = self.species_dropdown.value - peak_value = float(self.peak_dropdown.value) + data = fid.data + params = fid._params + sw_left = params['sw_left'] + self.sw_left = sw_left + sw = params['sw'] + ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] - # Update selected values - if species not in self.selected_values: - self.selected_values[species] = [] - self.selected_values[species].append(peak_value) + self.peak_selector = PeakDataSelector( + data, + params, + title=title, + label=label) + self.peak_selector.process = self.process + + self.textinput = FloatText(value=0.0, description='New PPM:', + disabled=False, continuous_update=False) + + def _wait_for_change(self, widget, value): + future = asyncio.Future() + def getvalue(change): + # make the new value available + future.set_result(change.new) + widget.unobserve(getvalue, value) + widget.observe(getvalue, value) + return future + + def process(self): + peak = self.peak_selector.psm.peak + self.peak_selector.out.clear_output() + with self.peak_selector.out: + print('current peak ppm: {}'.format(peak)) + display(self.textinput) + async def f(): + newx = await self._wait_for_change(self.textinput, 'value') + offset = newx - peak + self.fid._params['sw_left'] = self.sw_left + offset + with self.peak_selector.out: + print('calibration done.') + plt.close(self.peak_selector.fig) + asyncio.ensure_future(f()) - # Update available peaks - self.available_peaks.remove(str(peak_value)) - self.peak_dropdown.options = self.available_peaks +class RangeCalibrator: + """ + Interactive data-selection widget for calibrating PPM of an + array of spectra. + """ + def __init__(self, fid_array, + y_indices=None, + aoti=True, + voff=1e-3, + lw=1, + label=None, + ): + self.fid_array = fid_array + self.fids = fid_array.get_fids() + self.assign_only_to_index = aoti + self.fid_number = y_indices + if self.fid_number is not None: + if not nmrpy.data_objects.Fid._is_iter(self.fid_number): + self.fid_number = [self.fid_number] + else: + self.fid_number = range(len(self.fids)) + if fid_array.data is [] or fid_array.data is None: + raise ValueError('data must exist.') + if any (not fid._flags['ft'] for fid in self.fids): + raise ValueError('Only Fourier-transformed data can be calibrated.') + data = fid_array.data + if y_indices is not None: + data = fid_array.data[numpy.array(self.fid_number)] + params = fid_array._params + sw_left = params['sw_left'] + self.sw_left = sw_left + sw = params['sw'] + ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] - if not self.available_peaks: - self.peak_dropdown.disabled = True - self.save_button.disabled = True + self.peak_selector = PeakDataSelector( + data, + params, + title='FidArray calibration', + voff = voff, + label=label) + self.peak_selector.process = self.process + + self.textinput = FloatText(value=0.0, description='New PPM:', + disabled=False, continuous_update=False) + + def _wait_for_change(self, widget, value): + future = asyncio.Future() + def getvalue(change): + # make the new value available + future.set_result(change.new) + widget.unobserve(getvalue, value) + widget.observe(getvalue, value) + return future + + def process(self): + peak = self.peak_selector.psm.peak + self.peak_selector.out.clear_output() + with self.peak_selector.out: + print('current peak ppm: {}'.format(peak)) + display(self.textinput) + async def f(): + newx = await self._wait_for_change(self.textinput, 'value') + offset = newx - peak + self._applycalibration(offset) + with self.peak_selector.out: + print('calibration done.') + plt.close(self.peak_selector.fig) + asyncio.ensure_future(f()) - # Update species array in FID - for species_id, peak_position in self.selected_values.items(): - self._update_fid(peak_position, species_id) - self._display_selections() + def _applycalibration(self, offset): + self.fid_array._params['sw_left'] = self.sw_left + offset + + if self.assign_only_to_index: + for fid in [self.fids[i] for i in self.fid_number]: + fid._params['sw_left'] = self.sw_left + offset + else: + for fid in self.fids: + fid._params['sw_left'] = self.sw_left + offset - # Re-enable the reset button - self.reset_button.disabled = False +class FidArrayRangeSelector: + """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" + def __init__(self, + fid_array, + ranges=None, + y_indices=None, + voff=1e-3, + lw=1, + title=None, + label=None, + ): + self.fid_array = fid_array + self.fids = fid_array.get_fids() + data = fid_array.data + params = fid_array._params + if data is [] or data is None: + raise ValueError('data must exist.') + if y_indices is not None: + data = data[numpy.array(y_indices)] + sw_left = params['sw_left'] + sw = params['sw'] - def _handle_reset(self, b): - # Handle reset button click - with self.selection_output: - self.selection_output.clear_output(wait=True) - print("\nCleared selections!") + ppm = numpy.linspace(sw_left-sw, sw_left, data.shape[1])[::-1] + + self.span_selector = SpanDataSelector( + data, + params, + ranges=ranges, + title=title, + voff=voff, + label=label) + self.span_selector.assign = self.assign - # Reset state - self.fid._flags["assigned"] = False - self.fid.species = numpy.empty(len(self.fid.peaks), dtype=object) - for peak_object in self.fid.fid_object.peaks: - peak_object.species_id = None - self.selected_values = {} - self.available_peaks = [str(peak) for peak in self.fid.peaks] + def assign(self): + self.ranges = self.span_selector.ssm.ranges + for fid in self.fid_array.get_fids(): + bl_ppm = [] + for rng in self.ranges: + peak_ind = (fid._ppm > rng[1]) * (fid._ppm < rng[0]) + cur_peaks = fid._ppm[peak_ind] + bl_ppm.append(cur_peaks) + bl_ppm = numpy.array([j for i in bl_ppm for j in i]) + fid._bl_ppm = bl_ppm + plt.close(self.span_selector.fig) - # Reset widgets - self.peak_dropdown.options = self.available_peaks - self.peak_dropdown.disabled = False - self.save_button.disabled = False - self.reset_button.disabled = True +class FidRangeSelector: + """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" + def __init__(self, + fid, + title=None, + ranges=None, + y_indices=None, + voff=1e-3, + lw=1, + label=None, + ): + self.fid=fid + data = fid.data + params = fid._params + if data is [] or data is None: + raise ValueError('data must exist.') + if y_indices is not None: + data = data[numpy.array(y_indices)] + sw_left = params['sw_left'] + sw = params['sw'] - def _update_fid(self, peak_position, species_id): - # Assign the species ID to the peak object and set the assigned - # flag to True. - for peak in self.fid.fid_object.peaks: - if peak.peak_position not in peak_position: - continue - peak.species_id = species_id.split(" ")[0] - self.fid.species[peak.peak_index] = peak.species_id - self.fid._flags["assigned"] = True + self.ppm = numpy.linspace(sw_left-sw, sw_left, len(data))[::-1] + + self.span_selector = SpanDataSelector( + data, + params, + ranges=ranges, + title=title, + voff=voff, + label=label) + self.span_selector.assign = self.assign - def _display_selections(self): - # Display current selections - print("\nSaved selections:") - for key, value in self.selected_values.items(): - print(f"{key}: {value}") + def assign(self): + self.ranges = self.span_selector.ssm.ranges + bl_ppm = [] + for rng in self.ranges: + peak_ind = (self.ppm > rng[1]) * (self.ppm < rng[0]) + cur_peaks = self.ppm[peak_ind] + bl_ppm.append(cur_peaks) + bl_ppm = numpy.array([j for i in bl_ppm for j in i]) + self.fid._bl_ppm = bl_ppm + plt.close(self.span_selector.fig) +class PeakAssigner: + """Interactive widget for assigning species to peaks in a FID.""" -class PeakRangeAssigner: - """Interactive widget for assigning species to peaks for all FIDs in - a FidArray based on one selected FID. - """ + def __init__(self, fid, species_list=None, title="Assign species"): + """ + Initialize peak assigner widget. - def __init__(self, fid_array, species_list=None, index_list=None): - self.fid_array = fid_array - self.selected_fid = None + Args: + fid (Fid): The FID object to assign peaks for + species_list (list): A list of species names + title (str): The title of the widget + """ + self.fid = fid + self.title = title self.selected_values = {} # Determine species source and mode self._setup_species_source(species_list) # Validate and initialize - self.fids = self._build_fids(index_list) - for fid in self.fids: - self._validate_fid(fid) - self._setup_fid(fid) + self._validate_fid(self.fid) + self._setup_fid(self.fid) + self.available_peaks = [str(peak) for peak in self.fid.peaks] # Create and layout widgets self._create_widgets() @@ -1620,16 +1745,13 @@ def __init__(self, fid_array, species_list=None, index_list=None): def _setup_species_source(self, species_source): # Configure species source and create list of available species - # Check for default case first if species_source is None: - if not hasattr(self.fid_array, "enzymeml_document"): + if not hasattr(self.fid, "enzymeml_species"): raise ValueError( - "No species list provided and FIDArray has no enzymeml_document" + "No species list provided and FID has no enzymeml_species" ) - self.available_species = get_species_from_enzymeml( - self.fid_array.enzymeml_document - ) + self.available_species = self.fid.enzymeml_species return # Check for EnzymeML document elif isinstance(species_source, EnzymeMLDocument): @@ -1641,46 +1763,13 @@ def _setup_species_source(self, species_source): return # If we get here, the input was invalid else: - raise ValueError( - "species_list must be a list of species names, an EnzymeML " - "document, or None if FIDArray has enzymeml_document" - ) - - def _build_fids(self, index_list): - # Create the list of FIDs available to the widget based on - # the index_list. As the formatting of the FID IDs is - # dependent on the number of FIDs available, - # If no specific indices are provided, grab all FIDs - if not index_list: - return self.fid_array.get_fids() - - # Hand - # 1) Basic bounds check - total_fids = len(self.fid_array.get_fids()) - for i in index_list: - if i >= total_fids: - raise IndexError( - f"Index {i} is out of bounds (there are {total_fids} FIDs)." - ) - - # 2) Determine how many digits for the ID - n_digits = len(str(total_fids - 1)) # e.g., 2 if up to 99, 3 if up to 999 - if n_digits == 1: - fid_format = "fid{}" - else: - fid_format = f"fid{{:0{n_digits}d}}" - - # 3) Build the list of FIDs - fids = [] - for i in index_list: - fid_id = fid_format.format(i) - fids.append(self.fid_array.get_fid(fid_id)) - - return fids + raise ValueError( + "species_list must be a list of species names, " + "an EnzymeML document, or None if FID has enzymeml_species" + ) def _validate_fid(self, fid): - # Validate that FID has peaks and ranges and that their - # lengths are the same + # Validates FID has peaks and ranges and len(peaks) == len(ranges) if fid.peaks is None or len(fid.peaks) == 0: raise RuntimeError( "`fid.peaks` is required but still empty. " @@ -1698,11 +1787,12 @@ def _validate_fid(self, fid): ) def _setup_fid(self, fid): - # Initialize species array and create or update Peak objects in - # data model + # Initialize species array and creates or updates Peak objects + # in data model if species from EnyzmeML are used. # Initialize empty species array fid.species = numpy.empty(len(fid.peaks), dtype=object) + # Create or update Peak objects in data model for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): if i < len(fid.fid_object.peaks): @@ -1725,653 +1815,403 @@ def _setup_fid(self, fid): def _create_widgets(self): # Create all widget components - self.title_label = Label(value="Assign peaks for all FIDs") - self.combobox = Combobox( - options=[fid.id for fid in self.fids], - description="Select FID to base entire array on:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - ) + self.title_label = Label(value=self.title) self.peak_dropdown = Dropdown( - options=[], + options=self.available_peaks, description="Select a peak:", layout={"width": "max-content"}, style={"description_width": "initial"}, - disabled=True, ) self.species_dropdown = Dropdown( - options=[], - description="Select a species:", - layout={"width": "max-content"}, - style={"description_width": "initial"}, - disabled=True, - ) - self.save_button = Button( - description="Save selection", icon="file-arrow-down", disabled=True - ) - self.reset_button = Button(description="Reset selection", disabled=True) - self.selection_output = Output() - - def _setup_callbacks(self): - # Set up all widget callbacks - self.combobox.observe(self._handle_combobox_change) - self.peak_dropdown.observe(self._handle_peak_change) - self.species_dropdown.observe(self._handle_species_change) - self.save_button.on_click(self._handle_save) - self.reset_button.on_click(self._handle_reset) - - def _layout_widgets(self): - # Create widget layout and display - self.container = VBox( - [ - self.title_label, - self.combobox, - self.peak_dropdown, - self.species_dropdown, - self.save_button, - self.reset_button, - self.selection_output, - ] - ) - display(self.container) - - def _handle_combobox_change(self, event): - # Enable the peak dropdown when a FID is selected - if event["type"] == "change" and event["name"] == "value": - selected_option = event["new"] - if selected_option in self.combobox.options: - self.peak_dropdown.disabled = False - self.selected_fid = self.fid_array.get_fid(selected_option) - self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - self.peak_dropdown.options = self.available_peaks - if self.peak_dropdown.options: - self.peak_dropdown.value = self.peak_dropdown.options[0] - - def _handle_peak_change(self, event): - # Format the species options for disply and enable the species - # dropdown when a peak is selected - if event["type"] == "change" and event["name"] == "value": - self.species_dropdown.disabled = False - self.species_dropdown.options = [ + options=[ format_species_string(species) for species in self.available_species - ] - if self.species_dropdown.options: - self.species_dropdown.value = self.species_dropdown.options[0] - - def _handle_species_change(self, event): - # Enable the save button when a species is selected - if event["type"] == "change" and event["name"] == "value": - self.save_button.disabled = False - - def _handle_save(self, b): - with self.selection_output: - self.selection_output.clear_output(wait=True) - - species = self.species_dropdown.value - peak_value = float(self.peak_dropdown.value) - - # Update selected values - if species not in self.selected_values: - self.selected_values[species] = [] - self.selected_values[species].append(peak_value) - - # Update available peaks - self.available_peaks.remove(self.peak_dropdown.value) - self.peak_dropdown.options = self.available_peaks - - if not self.available_peaks: - self.peak_dropdown.disabled = True - - # Update FIDs - for species_id, peak_position in self.selected_values.items(): - for fid in self.fids: - self._update_fid(fid, peak_position, species_id) - - # Print the selected values - self._display_selections() - - # Re-enable the reset button - self.reset_button.disabled = False - - def _handle_reset(self, b): - # Reset the widget state - with self.selection_output: - self.selection_output.clear_output(wait=True) - print("\nCleared selections!") - # Reset FIDs' state - for fid in self.fids: - fid._flags["assigned"] = False - fid.species = numpy.empty(len(fid.peaks), dtype=object) - for peak_object in fid.fid_object.peaks: - peak_object.species_id = None - self.selected_values = {} - self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - - # Reset widgets - self.peak_dropdown.options = self.available_peaks - self.peak_dropdown.disabled = False - self.reset_button.disabled = True - - def _update_fid(self, fid, peak_position, species_id): - # Assign the species ID to the peak object and set the assigned - # flag to True. - for peak in fid.fid_object.peaks: - if peak.peak_position not in peak_position: - continue - peak.species_id = species_id.split(" ")[0] - fid.species[peak.peak_index] = peak.species_id - fid._flags["assigned"] = True - - def _display_selections(self): - # Display current selections - print("\nSaved selections:") - for key, value in self.selected_values.items(): - print(f"{key}: {value}") - - -class DataTraceSelector: - """ - Interactive data-selection widget with traces and ranges. Traces are saved - as self.data_traces (WRT data) and self.index_traces (WRT index). - """ - - def __init__( - self, - fid_array, - extra_data=None, - extra_data_colour="b", - voff=1e-3, - lw=1, - label=None, - ): - self.fid_array = fid_array - if fid_array.data is [] or fid_array.data is None: - raise ValueError("data must exist.") - data = fid_array.data - params = fid_array._params - sw_left = params["sw_left"] - sw = params["sw"] - - ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - - self.integral_selector = IntegralDataSelector( - extra_data, - params, - extra_data=data, - extra_data_colour=extra_data_colour, - peaks=None, - ranges=None, - title="Integral trace selector", - voff=voff, - label=label, - ) - self.integral_selector.assign = self.assign - - def assign(self): - data_traces = self.integral_selector.psm.data_lines - index_traces = self.integral_selector.psm.index_lines - - self.fid_array._data_traces = [dict(zip(i[1], i[0])) for i in data_traces] - self.fid_array._index_traces = [dict(zip(i[1], i[0])) for i in index_traces] - - decon_peaks = [] - for i in self.fid_array._deconvoluted_peaks: - if len(i): - decon_peaks.append(i.transpose()[0]) - else: - decon_peaks.append(None) - - trace_dict = {} - for t in range(len(self.fid_array._index_traces)): - trace = self.fid_array._index_traces[t] - integrals = {} - for fid, indx in trace.items(): - try: - integrals[fid] = numpy.argmin(abs(decon_peaks[fid] - indx)) - except Exception: - integrals[fid] = None - trace_dict[t] = integrals - last_fid = len(self.fid_array.get_fids()) - 1 - for i in trace_dict: - tmin = min(trace_dict[i]) - tminval = trace_dict[i][tmin] - if tmin > 0: - for j in range(0, tmin): - trace_dict[i][j] = tminval - tmax = max(trace_dict[i]) - tmaxval = trace_dict[i][tmax] - if tmax < last_fid: - for j in range(tmax, last_fid + 1): - trace_dict[i][j] = tmaxval - self.fid_array.integral_traces = trace_dict - plt.close(self.integral_selector.fig) - - -class DataTraceRangeSelector: - """ - Interactive data-selection widget with traces and ranges. Traces are saved - as self.data_traces (WRT data) and self.index_traces (WRT index). Spans are - saves as self.spans. - """ - - def __init__( - self, - fid_array, - peaks=None, - ranges=None, - voff=1e-3, - lw=1, - label=None, - ): - self.fid_array = fid_array - if fid_array.data is [] or fid_array.data is None: - raise ValueError("data must exist.") - data = fid_array.data - params = fid_array._params - sw_left = params["sw_left"] - sw = params["sw"] - - ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] - - self.peak_selector = PeakTraceDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title="Peak and range trace selector", - voff=voff, - label=label, - ) - self.peak_selector.assign = self.assign - - def assign(self): - data_traces = self.peak_selector.psm.data_lines - index_traces = self.peak_selector.psm.index_lines - spans = self.peak_selector.ssm.ranges - - traces = [[i[0], j[1]] for i, j in zip(data_traces, index_traces)] - - self.fid_array.traces = traces - self.fid_array._trace_mask = self.fid_array._generate_trace_mask(traces) - - self.fid_array._set_all_peaks_ranges_from_traces_and_spans(traces, spans) - plt.close(self.peak_selector.fig) - - -class DataPeakSelector: - """ - Interactive data-selection widget with lines and ranges for a single Fid. - Lines and spans are saved as self.peaks, self.ranges. - """ - - def __init__( - self, - fid, - peaks=None, - ranges=None, - voff=1e-3, - lw=1, - label=None, - title=None, - ): - self.fid = fid - if fid.data is [] or fid.data is None: - raise ValueError("data must exist.") - data = fid.data - params = fid._params - sw_left = params["sw_left"] - sw = params["sw"] - ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] + ], + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + self.save_button = Button( + description="Save selection", + icon="file-arrow-down", + ) + self.reset_button = Button(description="Reset selection", disabled=True) + self.selection_output = Output() - if fid.peaks is not None: - peaks = list(fid.peaks) - if fid.ranges is not None: - ranges = list(fid.ranges) + def _setup_callbacks(self): + # Set up all widget callbacks + self.save_button.on_click(self._handle_save) + self.reset_button.on_click(self._handle_reset) - self.peak_selector = LineSpanDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title=title, - voff=voff, - label=label, + def _layout_widgets(self): + # Create widget layout and display + self.container = VBox( + [ + self.title_label, + self.peak_dropdown, + self.species_dropdown, + self.save_button, + self.reset_button, + self.selection_output, + ] ) - self.peak_selector.assign = self.assign + display(self.container) - def assign(self): - if ( - len(self.peak_selector.ssm.ranges) > 0 - and len(self.peak_selector.lsm.peaks) > 0 - ): - self.fid.ranges = self.peak_selector.ssm.ranges - peaks = [] - for peak in self.peak_selector.lsm.peaks: - for rng in self.peak_selector.ssm.ranges: - if peak >= rng[1] and peak <= rng[0]: - peaks.append(peak) - self.fid.peaks = peaks - else: - self.fid.peaks = None - self.fid.ranges = None - plt.close(self.peak_selector.fig) + def _handle_save(self, b): + # Handle save button click + with self.selection_output: + self.selection_output.clear_output(wait=True) + species = self.species_dropdown.value + peak_value = float(self.peak_dropdown.value) -class DataPeakRangeSelector: - """Interactive data-selection widget with lines and ranges. Lines and spans are saved as self.peaks, self.ranges.""" + # Update selected values + if species not in self.selected_values: + self.selected_values[species] = [] + self.selected_values[species].append(peak_value) - def __init__( - self, - fid_array, - peaks=None, - ranges=None, - y_indices=None, - aoti=True, - voff=1e-3, - lw=1, - label=None, - ): - self.fid_array = fid_array - self.fids = fid_array.get_fids() - self.assign_only_to_index = aoti - self.fid_number = y_indices - if self.fid_number is not None: - if not nmrpy.data_objects.Fid._is_iter(self.fid_number): - self.fid_number = [self.fid_number] - else: - self.fid_number = range(len(self.fids)) - if fid_array.data is [] or fid_array.data is None: - raise ValueError("data must exist.") - data = fid_array.data - if y_indices is not None: - data = fid_array.data[numpy.array(self.fid_number)] - params = fid_array._params - sw_left = params["sw_left"] - sw = params["sw"] + # Update available peaks + self.available_peaks.remove(str(peak_value)) + self.peak_dropdown.options = self.available_peaks - ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] + if not self.available_peaks: + self.peak_dropdown.disabled = True + self.save_button.disabled = True - self.peak_selector = LineSpanDataSelector( - data, - params, - peaks=peaks, - ranges=ranges, - title="Peak and range selector", - voff=voff, - label=label, - ) - self.peak_selector.assign = self.assign + # Update species array in FID + for species_id, peak_position in self.selected_values.items(): + self._update_fid(peak_position, species_id) + self._display_selections() - def assign(self): - self.peaks = self.peak_selector.lsm.peaks - self.ranges = self.peak_selector.ssm.ranges + # Re-enable the reset button + self.reset_button.disabled = False - if len(self.ranges) > 0 and len(self.peaks) > 0: - ranges = self.ranges - peaks = [] - for peak in self.peaks: - for rng in ranges: - if peak >= rng[1] and peak <= rng[0]: - peaks.append(peak) - else: - peaks = None - ranges = None + def _handle_reset(self, b): + # Handle reset button click + with self.selection_output: + self.selection_output.clear_output(wait=True) + print("\nCleared selections!") - if self.assign_only_to_index: - for fid in [self.fids[i] for i in self.fid_number]: - fid.peaks = peaks - fid.ranges = ranges - else: - for fid in self.fids: - fid.peaks = peaks - fid.ranges = ranges - plt.close(self.peak_selector.fig) + # Reset state + self.fid._flags["assigned"] = False + self.fid.species = numpy.empty(len(self.fid.peaks), dtype=object) + for peak_object in self.fid.fid_object.peaks: + peak_object.species_id = None + self.selected_values = {} + self.available_peaks = [str(peak) for peak in self.fid.peaks] + # Reset widgets + self.peak_dropdown.options = self.available_peaks + self.peak_dropdown.disabled = False + self.save_button.disabled = False + self.reset_button.disabled = True -class Calibrator: - """ - Interactive data-selection widget for calibrating PPM of a spectrum. - """ + def _update_fid(self, peak_position, species_id): + # Assign the species ID to the peak object and set the assigned + # flag to True. + for peak in self.fid.fid_object.peaks: + if peak.peak_position not in peak_position: + continue + peak.species_id = species_id.split(" ")[0] + self.fid.species[peak.peak_index] = peak.species_id + self.fid._flags["assigned"] = True - def __init__( - self, - fid, - lw=1, - label=None, - title=None, - ): - self.fid = fid - if fid.data is [] or fid.data is None: - raise ValueError("data must exist.") - if not fid._flags["ft"]: - raise ValueError("Only Fourier-transformed data can be calibrated.") + def _display_selections(self): + # Display current selections + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") - data = fid.data - params = fid._params - sw_left = params["sw_left"] - self.sw_left = sw_left - sw = params["sw"] - ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] - self.peak_selector = PeakDataSelector(data, params, title=title, label=label) - self.peak_selector.process = self.process +class PeakRangeAssigner: + """Interactive widget for assigning species to peaks for all FIDs in + a FidArray based on one selected FID. + """ - self.textinput = FloatText( - value=0.0, - description="New PPM:", - disabled=False, - continuous_update=False, - ) + def __init__(self, fid_array, species_list=None, index_list=None): + """ + Initialize peak assigner widget. - def _wait_for_change(self, widget, value): - future = asyncio.Future() + Args: + fid_array (FidArray): The FidArray object to assign peaks for + species_list (list): A list of species names + index_list (list): A list of indices of FIDs to assign peaks for + """ + self.fid_array = fid_array + self.selected_fid = None + self.selected_values = {} - def getvalue(change): - # make the new value available - future.set_result(change.new) - widget.unobserve(getvalue, value) + # Determine species source and mode + self._setup_species_source(species_list) - widget.observe(getvalue, value) - return future + # Validate and initialize + self.fids = self._build_fids(index_list) + for fid in self.fids: + self._validate_fid(fid) + self._setup_fid(fid) - def process(self): - peak = self.peak_selector.psm.peak - self.peak_selector.out.clear_output() - with self.peak_selector.out: - print("current peak ppm: {}".format(peak)) - display(self.textinput) + # Create and layout widgets + self._create_widgets() + self._setup_callbacks() + self._layout_widgets() - async def f(): - newx = await self._wait_for_change(self.textinput, "value") - offset = newx - peak - self.fid._params["sw_left"] = self.sw_left + offset - with self.peak_selector.out: - print("calibration done.") - plt.close(self.peak_selector.fig) + def _setup_species_source(self, species_source): + # Configure species source and create list of available species - asyncio.ensure_future(f()) + # Check for default case first + if species_source is None: + if not hasattr(self.fid_array, "enzymeml_document"): + raise ValueError( + "No species list provided and FIDArray has no enzymeml_document" + ) + self.available_species = get_species_from_enzymeml( + self.fid_array.enzymeml_document + ) + return + # Check for EnzymeML document + elif isinstance(species_source, EnzymeMLDocument): + self.available_species = get_species_from_enzymeml(species_source) + return + # Check for list of strings + elif isinstance(species_source, list): + self.available_species = species_source + return + # If we get here, the input was invalid + else: + raise ValueError( + "species_list must be a list of species names, an EnzymeML " + "document, or None if FIDArray has enzymeml_document" + ) + def _build_fids(self, index_list): + # Create the list of FIDs available to the widget based on + # the index_list. As the formatting of the FID IDs is + # dependent on the number of FIDs available, + # If no specific indices are provided, grab all FIDs + if not index_list: + return self.fid_array.get_fids() -class RangeCalibrator: - """ - Interactive data-selection widget for calibrating PPM of an - array of spectra. - """ + # Hand + # 1) Basic bounds check + total_fids = len(self.fid_array.get_fids()) + for i in index_list: + if i >= total_fids: + raise IndexError( + f"Index {i} is out of bounds (there are {total_fids} FIDs)." + ) - def __init__( - self, - fid_array, - y_indices=None, - aoti=True, - voff=1e-3, - lw=1, - label=None, - ): - self.fid_array = fid_array - self.fids = fid_array.get_fids() - self.assign_only_to_index = aoti - self.fid_number = y_indices - if self.fid_number is not None: - if not nmrpy.data_objects.Fid._is_iter(self.fid_number): - self.fid_number = [self.fid_number] - else: - self.fid_number = range(len(self.fids)) - if fid_array.data is [] or fid_array.data is None: - raise ValueError("data must exist.") - if any(not fid._flags["ft"] for fid in self.fids): - raise ValueError("Only Fourier-transformed data can be calibrated.") - data = fid_array.data - if y_indices is not None: - data = fid_array.data[numpy.array(self.fid_number)] - params = fid_array._params - sw_left = params["sw_left"] - self.sw_left = sw_left - sw = params["sw"] - ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] + # 2) Determine how many digits for the ID + n_digits = len(str(total_fids - 1)) # e.g., 2 if up to 99, 3 if up to 999 + if n_digits == 1: + fid_format = "fid{}" + else: + fid_format = f"fid{{:0{n_digits}d}}" - self.peak_selector = PeakDataSelector( - data, params, title="FidArray calibration", voff=voff, label=label - ) - self.peak_selector.process = self.process + # 3) Build the list of FIDs + fids = [] + for i in index_list: + fid_id = fid_format.format(i) + fids.append(self.fid_array.get_fid(fid_id)) - self.textinput = FloatText( - value=0.0, - description="New PPM:", - disabled=False, - continuous_update=False, - ) + return fids - def _wait_for_change(self, widget, value): - future = asyncio.Future() + def _validate_fid(self, fid): + # Validate that FID has peaks and ranges and that their + # lengths are the same + if fid.peaks is None or len(fid.peaks) == 0: + raise RuntimeError( + "`fid.peaks` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + if fid.ranges is None or len(fid.ranges) == 0: + raise RuntimeError( + "`fid.ranges` is required but still empty. " + "Please assign them manually or with the `rangepicker` method." + ) + if len(fid.peaks) != len(fid.ranges): + raise RuntimeError( + "`fid.peaks` and `fid.ranges` must have the same length, as " + "each peak must have a range assigned to it." + ) - def getvalue(change): - # make the new value available - future.set_result(change.new) - widget.unobserve(getvalue, value) + def _setup_fid(self, fid): + # Initialize species array and create or update Peak objects in + # data model - widget.observe(getvalue, value) - return future + # Initialize empty species array + fid.species = numpy.empty(len(fid.peaks), dtype=object) + # Create or update Peak objects in data model + for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): + if i < len(fid.fid_object.peaks): + # Peak already exists, update it + fid.fid_object.peaks[i].peak_position = float(peak_val) + fid.fid_object.peaks[i].peak_range = { + "start": float(range_val[0]), + "end": float(range_val[1]), + } + else: + # Peak does not yet exist, create it + fid.fid_object.add_to_peaks( + peak_index=i, + peak_position=float(peak_val), + peak_range={ + "start": float(range_val[0]), + "end": float(range_val[1]), + }, + ) - def process(self): - peak = self.peak_selector.psm.peak - self.peak_selector.out.clear_output() - with self.peak_selector.out: - print("current peak ppm: {}".format(peak)) - display(self.textinput) + def _create_widgets(self): + # Create all widget components + self.title_label = Label(value="Assign peaks for all FIDs") + self.combobox = Combobox( + options=[fid.id for fid in self.fids], + description="Select FID to base entire array on:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + self.peak_dropdown = Dropdown( + options=[], + description="Select a peak:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + self.species_dropdown = Dropdown( + options=[], + description="Select a species:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + self.save_button = Button( + description="Save selection", icon="file-arrow-down", disabled=True + ) + self.reset_button = Button(description="Reset selection", disabled=True) + self.selection_output = Output() - async def f(): - newx = await self._wait_for_change(self.textinput, "value") - offset = newx - peak - self._applycalibration(offset) - with self.peak_selector.out: - print("calibration done.") - plt.close(self.peak_selector.fig) + def _setup_callbacks(self): + # Set up all widget callbacks + self.combobox.observe(self._handle_combobox_change) + self.peak_dropdown.observe(self._handle_peak_change) + self.species_dropdown.observe(self._handle_species_change) + self.save_button.on_click(self._handle_save) + self.reset_button.on_click(self._handle_reset) - asyncio.ensure_future(f()) + def _layout_widgets(self): + # Create widget layout and display + self.container = VBox( + [ + self.title_label, + self.combobox, + self.peak_dropdown, + self.species_dropdown, + self.save_button, + self.reset_button, + self.selection_output, + ] + ) + display(self.container) - def _applycalibration(self, offset): - self.fid_array._params["sw_left"] = self.sw_left + offset + def _handle_combobox_change(self, event): + # Enable the peak dropdown when a FID is selected + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + if selected_option in self.combobox.options: + self.peak_dropdown.disabled = False + self.selected_fid = self.fid_array.get_fid(selected_option) + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] + self.peak_dropdown.options = self.available_peaks + if self.peak_dropdown.options: + self.peak_dropdown.value = self.peak_dropdown.options[0] - if self.assign_only_to_index: - for fid in [self.fids[i] for i in self.fid_number]: - fid._params["sw_left"] = self.sw_left + offset - else: - for fid in self.fids: - fid._params["sw_left"] = self.sw_left + offset + def _handle_peak_change(self, event): + # Format the species options for disply and enable the species + # dropdown when a peak is selected + if event["type"] == "change" and event["name"] == "value": + self.species_dropdown.disabled = False + self.species_dropdown.options = [ + format_species_string(species) for species in self.available_species + ] + if self.species_dropdown.options: + self.species_dropdown.value = self.species_dropdown.options[0] + def _handle_species_change(self, event): + # Enable the save button when a species is selected + if event["type"] == "change" and event["name"] == "value": + self.save_button.disabled = False -class FidArrayRangeSelector: - """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" + def _handle_save(self, b): + with self.selection_output: + self.selection_output.clear_output(wait=True) - def __init__( - self, - fid_array, - ranges=None, - y_indices=None, - voff=1e-3, - lw=1, - title=None, - label=None, - ): - self.fid_array = fid_array - self.fids = fid_array.get_fids() - data = fid_array.data - params = fid_array._params - if data is [] or data is None: - raise ValueError("data must exist.") - if y_indices is not None: - data = data[numpy.array(y_indices)] - sw_left = params["sw_left"] - sw = params["sw"] + species = self.species_dropdown.value + peak_value = float(self.peak_dropdown.value) - ppm = numpy.linspace(sw_left - sw, sw_left, data.shape[1])[::-1] + # Update selected values + if species not in self.selected_values: + self.selected_values[species] = [] + self.selected_values[species].append(peak_value) - self.span_selector = SpanDataSelector( - data, params, ranges=ranges, title=title, voff=voff, label=label - ) - self.span_selector.assign = self.assign + # Update available peaks + self.available_peaks.remove(self.peak_dropdown.value) + self.peak_dropdown.options = self.available_peaks - def assign(self): - self.ranges = self.span_selector.ssm.ranges - for fid in self.fid_array.get_fids(): - bl_ppm = [] - for rng in self.ranges: - peak_ind = (fid._ppm > rng[1]) * (fid._ppm < rng[0]) - cur_peaks = fid._ppm[peak_ind] - bl_ppm.append(cur_peaks) - bl_ppm = numpy.array([j for i in bl_ppm for j in i]) - fid._bl_ppm = bl_ppm - plt.close(self.span_selector.fig) + if not self.available_peaks: + self.peak_dropdown.disabled = True + # Update FIDs + for species_id, peak_position in self.selected_values.items(): + for fid in self.fids: + self._update_fid(fid, peak_position, species_id) -class FidRangeSelector: - """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" + # Print the selected values + self._display_selections() - def __init__( - self, - fid, - title=None, - ranges=None, - y_indices=None, - voff=1e-3, - lw=1, - label=None, - ): - self.fid = fid - data = fid.data - params = fid._params - if data is [] or data is None: - raise ValueError("data must exist.") - if y_indices is not None: - data = data[numpy.array(y_indices)] - sw_left = params["sw_left"] - sw = params["sw"] + # Re-enable the reset button + self.reset_button.disabled = False - self.ppm = numpy.linspace(sw_left - sw, sw_left, len(data))[::-1] + def _handle_reset(self, b): + # Reset the widget state + with self.selection_output: + self.selection_output.clear_output(wait=True) + print("\nCleared selections!") + # Reset FIDs' state + for fid in self.fids: + fid._flags["assigned"] = False + fid.species = numpy.empty(len(fid.peaks), dtype=object) + for peak_object in fid.fid_object.peaks: + peak_object.species_id = None + self.selected_values = {} + self.available_peaks = [str(peak) for peak in self.selected_fid.peaks] - self.span_selector = SpanDataSelector( - data, params, ranges=ranges, title=title, voff=voff, label=label - ) - self.span_selector.assign = self.assign + # Reset widgets + self.peak_dropdown.options = self.available_peaks + self.peak_dropdown.disabled = False + self.reset_button.disabled = True - def assign(self): - self.ranges = self.span_selector.ssm.ranges - bl_ppm = [] - for rng in self.ranges: - peak_ind = (self.ppm > rng[1]) * (self.ppm < rng[0]) - cur_peaks = self.ppm[peak_ind] - bl_ppm.append(cur_peaks) - bl_ppm = numpy.array([j for i in bl_ppm for j in i]) - self.fid._bl_ppm = bl_ppm - plt.close(self.span_selector.fig) + def _update_fid(self, fid, peak_position, species_id): + # Assign the species ID to the peak object and set the assigned + # flag to True. + for peak in fid.fid_object.peaks: + if peak.peak_position not in peak_position: + continue + peak.species_id = species_id.split(" ")[0] + fid.species[peak.peak_index] = peak.species_id + fid._flags["assigned"] = True + def _display_selections(self): + # Display current selections + print("\nSaved selections:") + for key, value in self.selected_values.items(): + print(f"{key}: {value}") class ConcentrationCalculator: + """ + Widget for calculating concentrations. + """ def __init__(self): raise NotImplementedError( "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) - -if __name__ == "__main__": +if __name__ == '__main__': pass From 1909b9a61f0652ba72e7b652de0fde74453c317d Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 28 Jan 2025 17:13:00 +0100 Subject: [PATCH 28/54] Update regular and add optional requirements --- requirements.txt | 1 + setup.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 808213f..47e3e76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ notebook>=6.0.0 ipython>=7.0.0 lmfit>=0.9.10 nmrglue>=0.6 +pydantic>=2.0.0 diff --git a/setup.py b/setup.py index 273da25..d9a5703 100644 --- a/setup.py +++ b/setup.py @@ -43,10 +43,11 @@ NMRPy is developed by Johann Eicher and Johann Rohwer from the Laboratory for Molecular Systems Biology, Dept. of Biochemistry, Stellenbosch University, -South Africa. +South Africa, as well as Torsten Giess from the Insitute of Biochemistry and +Technical Biochemistry, University of Stuttgart, Germany. """, - 'author': 'Johann Eicher , Johann Rohwer ', - 'author_email': 'johanneicher@gmail.com, j.m.rohwer@gmail.com', + 'author': 'Johann Eicher , Johann Rohwer , Torsten Giess ', + 'author_email': 'johanneicher@gmail.com, j.m.rohwer@gmail.com, torsten.giess@ibtb.uni-stuttgart.de', 'maintainer': 'Johann Rohwer', 'maintainer_email': 'j.m.rohwer@gmail.com', 'url': 'https://github.com/NMRPy/nmrpy', @@ -56,7 +57,13 @@ 'packages': ['nmrpy', 'nmrpy.tests'], 'package_data': {'nmrpy.tests': mydata_nmrpy_test, 'nmrpy': mydata_nmrpy}, 'license': 'New BSD', - 'name': 'nmrpy' + 'name': 'nmrpy', + 'extras_require': { + 'pyenzyme': [ + 'pyenzyme @ git+https://github.com/EnzymeML/PyEnzyme.git@v2-migration#egg=pyenzyme', + 'sympy' + ] + } } setup(**config) From 921b9ec27fadbcc51d77056a3b93eb22fd607bfd Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 29 Jan 2025 10:27:01 +0100 Subject: [PATCH 29/54] Add graceful handling of optional imports --- nmrpy/data_objects.py | 47 ++++++++++++++++++++++++++++++++++++++----- nmrpy/plotting.py | 15 ++++++++------ nmrpy/utils.py | 37 ++++++++++++++++++++++++++++------ 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 250a122..27e9411 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -19,9 +19,12 @@ Peak, PeakRange, ) -from nmrpy.utils import create_enzymeml, get_species_from_enzymeml -import pyenzyme as pe -from pyenzyme.model import EnzymeMLDocument +try: + import pyenzyme + from pyenzyme.model import EnzymeMLDocument + from nmrpy.utils import create_enzymeml, get_species_from_enzymeml +except ImportError: + pyenzyme = None class Base(): @@ -1301,7 +1304,17 @@ def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): Attach a species to the selected peak from second dropdown menu containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. + + Args: + species_list (list[str] | EnzymeMLDocument): The list of species to assign to the peaks. + + Raises: + RuntimeError: If EnzymeML document is provided but the `pyenzyme` package is not installed. """ + if (pyenzyme is None) and (isinstance(species_list, EnzymeMLDocument)): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) self._assigner_widget = PeakAssigner( fid=self, species_list=species_list, @@ -1372,6 +1385,10 @@ def enzymeml_document(self): @enzymeml_document.setter def enzymeml_document(self, enzymeml_document): + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( f'Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead.' @@ -1616,9 +1633,16 @@ def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: Args: path_to_enzymeml_document (str): Path to file containing an EnzymeML document + + Raises: + RuntimeError: If the `pyenzyme` package is not installed. """ - self.enzymeml_document = pe.read_enzymeml( - cls=pe.EnzymeMLDocument, path=path_to_enzymeml_document + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) + self.enzymeml_document = pyenzyme.read_enzymeml( + cls=pyenzyme.EnzymeMLDocument, path=path_to_enzymeml_document ) @classmethod @@ -2304,7 +2328,20 @@ def save_to_file(self, filename=None, overwrite=False): def apply_to_enzymeml(self, enzymeml_document = None) -> EnzymeMLDocument: """ Apply the calculated concentrations from the FidArray to an EnzymeMLDocument. + + Args: + enzymeml_document (EnzymeMLDocument): The EnzymeML document to apply the concentrations to. + + Returns: + EnzymeMLDocument: The EnzymeML document with the concentrations applied. + + Raises: + RuntimeError: If the `pyenzyme` package is not installed. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) if not enzymeml_document: enzymeml_document = self.enzymeml_document return create_enzymeml(self, enzymeml_document) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 25aea70..4ac8325 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -15,12 +15,15 @@ from IPython.display import display import asyncio -from pyenzyme.model import EnzymeMLDocument -from nmrpy.utils import ( - format_species_string, - get_ordered_list_of_species_names, - get_species_from_enzymeml, -) +from nmrpy.utils import format_species_string +try: + from pyenzyme.model import EnzymeMLDocument + from nmrpy.utils import ( + get_ordered_list_of_species_names, + get_species_from_enzymeml, + ) +except ImportError: + pyenzyme = None class Plot(): """ diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 96a9ab7..1f4840e 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -1,7 +1,10 @@ -import sympy as sp - -import pyenzyme as pe -from pyenzyme.model import EnzymeMLDocument +try: + import sympy + import pyenzyme + from pyenzyme.model import EnzymeMLDocument +except ImportError: + sympy = None + pyenzyme = None def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: @@ -17,6 +20,10 @@ def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: Returns: list: Available species in EnzymeML document. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( f"Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead." @@ -42,6 +49,10 @@ def get_ordered_list_of_species_names(fid: "Fid") -> list: Returns: list: List of species names in desecending order by peak index. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) list_of_tuples = [] # Iterate over the peak objects and then over their associated peaks # of a given FID object and append a tuple of the identity's name and @@ -71,6 +82,10 @@ def get_initial_concentration_by_species_id( Returns: float: The initial concentration of the species. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) intial_concentration = float("nan") for measurement in enzymeml_document.measurements: for measurement_datum in measurement.species: @@ -91,6 +106,10 @@ def get_species_id_by_name( Returns: str: The `species_id` of the species. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) species_id = None for species in get_species_from_enzymeml(enzymeml_document): if species.name == species_name: @@ -108,6 +127,10 @@ def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) Returns: str: The name of the species. """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) species_name = None for species in get_species_from_enzymeml(enzymeml_document): if species.id == species_id: @@ -145,12 +168,14 @@ def create_enzymeml( Returns: EnzymeMLDocument: The EnzymeML document with the added data. """ - + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + ) if not enzymeml_document.measurements: raise AttributeError( "EnzymeML document does not contain measurement metadata. Please add a measurement to the document first." ) - global_time = (fid_array.t.tolist(),) for measured_species in fid_array.concentrations.items(): for available_species in enzymeml_document.measurements[0].species: From 7b5c86f4542d44599ba7c59090ab2cce01d87c26 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 29 Jan 2025 10:46:20 +0100 Subject: [PATCH 30/54] Fix recurring typo in pyenzyme import error --- nmrpy/data_objects.py | 18 +++++++++++++----- nmrpy/utils.py | 12 ++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 27e9411..9d44461 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -347,6 +347,10 @@ def enzymeml_species(self): @enzymeml_species.setter def enzymeml_species(self, enzymeml_species): + if pyenzyme is None: + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." + ) self.__enzymeml_species = enzymeml_species @property @@ -1313,7 +1317,7 @@ def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): """ if (pyenzyme is None) and (isinstance(species_list, EnzymeMLDocument)): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) self._assigner_widget = PeakAssigner( fid=self, @@ -1387,7 +1391,7 @@ def enzymeml_document(self): def enzymeml_document(self, enzymeml_document): if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( @@ -1639,7 +1643,7 @@ def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) self.enzymeml_document = pyenzyme.read_enzymeml( cls=pyenzyme.EnzymeMLDocument, path=path_to_enzymeml_document @@ -2265,7 +2269,7 @@ def get_integrals_from_traces(self): integrals_set[i] = integrals return integrals_set - def assign_peaks(self, species_list=None, index_list=None): + def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None, index_list: list[int] = None): """ Instantiate a peak-assignment GUI widget. Select a FID by its ID from the combobox. Select peaks from dropdown menu @@ -2274,6 +2278,10 @@ def assign_peaks(self, species_list=None, index_list=None): containing species defined in EnzymeML. When satisfied with assignment, press Assign button to apply. """ + if (pyenzyme is None) and (isinstance(species_list, EnzymeMLDocument)): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." + ) self._assigner_widget = PeakRangeAssigner( fid_array=self, species_list=species_list, index_list=index_list ) @@ -2340,7 +2348,7 @@ def apply_to_enzymeml(self, enzymeml_document = None) -> EnzymeMLDocument: """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) if not enzymeml_document: enzymeml_document = self.enzymeml_document diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 1f4840e..c8c994f 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -22,7 +22,7 @@ def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( @@ -51,7 +51,7 @@ def get_ordered_list_of_species_names(fid: "Fid") -> list: """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) list_of_tuples = [] # Iterate over the peak objects and then over their associated peaks @@ -84,7 +84,7 @@ def get_initial_concentration_by_species_id( """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) intial_concentration = float("nan") for measurement in enzymeml_document.measurements: @@ -108,7 +108,7 @@ def get_species_id_by_name( """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) species_id = None for species in get_species_from_enzymeml(enzymeml_document): @@ -129,7 +129,7 @@ def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) species_name = None for species in get_species_from_enzymeml(enzymeml_document): @@ -170,7 +170,7 @@ def create_enzymeml( """ if (pyenzyme is None): raise RuntimeError( - "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[pyenzyme]`." + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) if not enzymeml_document.measurements: raise AttributeError( From fd908398cedec1aded0c0171e0eec00cc198f4fc Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 29 Jan 2025 10:48:34 +0100 Subject: [PATCH 31/54] Remove deprecated data model linking --- links/enzymeml.toml | 22 ---------------------- links/nmrml.toml | 0 2 files changed, 22 deletions(-) delete mode 100644 links/enzymeml.toml delete mode 100644 links/nmrml.toml diff --git a/links/enzymeml.toml b/links/enzymeml.toml deleted file mode 100644 index 4f2a90f..0000000 --- a/links/enzymeml.toml +++ /dev/null @@ -1,22 +0,0 @@ -__model__ = "NMRpy" - -[__sources__] -EnzymeMLDocument = "https://github.com/EnzymeML/enzymeml-specifications.git@markdown-parser-refactor" - -[NMRpy] -datetime_created = "EnzymeMLDocument.created" -datetime_modified = "EnzymeMLDocument.modified" - -[experiment] -name = "EnzymeMLDocument.name" - -[citation] -doi = "EnzymeMLDocument.doi" - -["citation.authors"] -last_name = "EnzymeMLDocument.creators.family_name" -first_name = "EnzymeMLDocument.creators.given_name" -email = "EnzymeMLDocument.creators.mail" - -["citation.related_publications"] -doi = "EnzymeMLDocument.url" diff --git a/links/nmrml.toml b/links/nmrml.toml deleted file mode 100644 index e69de29..0000000 From dfbb0aa410d206ff6133fd1211f34cd930bba244 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 19 Feb 2025 16:13:43 +0100 Subject: [PATCH 32/54] Add None check for new FidArray properties - Initialisation of enzymeml_document and concentrations properties of FidArray class with initial None value led to Error. Check for None added to fix this issue. - Add correct optional dependency name to setup.py --- nmrpy/data_objects.py | 8 +++++++- setup.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 9d44461..cd27d44 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1389,11 +1389,14 @@ def enzymeml_document(self): @enzymeml_document.setter def enzymeml_document(self, enzymeml_document): + if enzymeml_document is None: + self.__enzymeml_document = None + return if (pyenzyme is None): raise RuntimeError( "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) - if not isinstance(enzymeml_document, EnzymeMLDocument): + if isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( f'Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead.' ) @@ -1414,6 +1417,9 @@ def concentrations(self): @concentrations.setter def concentrations(self, concentrations): + if concentrations is None: + self.__concentrations = None + return if not isinstance(concentrations, dict): raise TypeError('concentrations must be a dictionary.') for fid in self.get_fids(): diff --git a/setup.py b/setup.py index d9a5703..eb8083e 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ 'license': 'New BSD', 'name': 'nmrpy', 'extras_require': { - 'pyenzyme': [ + 'enzymeml': [ 'pyenzyme @ git+https://github.com/EnzymeML/PyEnzyme.git@v2-migration#egg=pyenzyme', 'sympy' ] From 65ebd0085a7c6a068f2e73b3b93ded5ab927f8d1 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Fri, 21 Feb 2025 15:42:57 +0100 Subject: [PATCH 33/54] Fix various bugs - Move the setup of Peak objects from the Peak(Range)Assigner to the deconvolution methods to prevent uninitialised Peak objects in data model. - Update create_enzymeml() method to reflect changes in pyenzyme library. - Add species property to FidArray, similar to the deconvoluted_integrals property. --- nmrpy/data_objects.py | 62 +++++++++++++++++++++++++-- nmrpy/plotting.py | 98 ++----------------------------------------- nmrpy/utils.py | 2 +- 3 files changed, 63 insertions(+), 99 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index cd27d44..54b5522 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1253,6 +1253,7 @@ def deconv(self, method='leastsq', frac_gauss=0.0): raise AttributeError('peaks must be picked.') if self.ranges is None: raise AttributeError('ranges must be specified.') + self._setup_peak_objects() print('deconvoluting {}'.format(self.id)) list_parameters = [self.data, self._grouped_index_peaklist, self._index_ranges, frac_gauss, method] self._deconvoluted_peaks = numpy.array([j for i in Fid._deconv_datum(list_parameters) for j in i]) @@ -1301,6 +1302,48 @@ def plot_deconv(self, **kwargs): setattr(self, plt.id, plt) pyplot.show() + + def _setup_peak_objects(self): + # Create or update Peak objects in data model after validation + # of Fid.peaks and Fid.ranges. + + # Validates FID has peaks and ranges and len(peaks) == len(ranges) + if self.peaks is None or len(self.peaks) == 0: + raise RuntimeError( + "`fid.peaks` is required but still empty. " + "Please assign them manually or with the `peakpicker` method." + ) + if self.ranges is None or len(self.ranges) == 0: + raise RuntimeError( + "`fid.ranges` is required but still empty. " + "Please assign them manually or with the `rangepicker` method." + ) + if len(self.peaks) != len(self.ranges): + raise RuntimeError( + "`fid.peaks` and `fid.ranges` must have the same length, as " + "each peak must have a range assigned to it." + ) + + # Create or update Peak objects in data model + for i, (peak_val, range_val) in enumerate(zip(self.peaks, self.ranges)): + if i < len(self.fid_object.peaks): + # Peak already exists, update it + self.fid_object.peaks[i].peak_position = float(peak_val) + self.fid_object.peaks[i].peak_range = { + "start": float(range_val[0]), + "end": float(range_val[1]), + } + else: + # Peak does not yet exist, create it + self.fid_object.add_to_peaks( + peak_index=i, + peak_position=float(peak_val), + peak_range={ + "start": float(range_val[0]), + "end": float(range_val[1]), + }, + ) + def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): """ Instantiate a species-assignment GUI widget. Select peaks from @@ -1423,7 +1466,7 @@ def concentrations(self, concentrations): if not isinstance(concentrations, dict): raise TypeError('concentrations must be a dictionary.') for fid in self.get_fids(): - if not fid.species: + if not len(fid.species): raise ValueError('All FIDs must have species assigned to peaks.') if not all(species in fid.species for species in concentrations.keys()): raise ValueError('Keys of concentrations must be species assigned to peaks.') @@ -1532,7 +1575,18 @@ def deconvoluted_integrals(self): for fid in self.get_fids(): deconvoluted_integrals.append(fid.deconvoluted_integrals) return numpy.array(deconvoluted_integrals) - + + @property + def species(self): + """ + Collected :class:`~nmrpy.data_objects.Fid.species` + """ + for i, fid in enumerate(self.get_fids()): + species = [s for s in fid.species] + if i>0: + break + return numpy.array(species) + @property def _deconvoluted_peaks(self): """ @@ -1800,7 +1854,6 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) fid.data = datum fid.fid_object.processed_data = [str(data) for data in datum] fid.fid_object.processing_steps.is_phased = True - fid.fid_object.processing_steps.phase_correction_method = method else: for fid in self.get_fids(): fid.phase_correct(method=method, verbose=verbose) @@ -1901,6 +1954,7 @@ def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): deconv_datum = self._generic_mp(Fid._deconv_datum, list_params, cpus) for fid, datum in zip(fids, deconv_datum): fid._deconvoluted_peaks = numpy.array([j for i in datum for j in i]) + fid._setup_peak_objects() integrals = [] for i, peak in enumerate(fid._deconvoluted_peaks): int_gauss = peak[-1] * Fid._f_gauss_int(peak[3], peak[1]) @@ -1908,7 +1962,7 @@ def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): integral = int_gauss + int_lorentz integrals.append(integral) # Update data model - peak_object = self.fid_object.peaks[i] + peak_object = fid.fid_object.peaks[i] if peak_object.peak_integral != integral: peak_object.peak_integral = float(integral) fid.fid_object.processing_steps.is_deconvoluted = True diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 4ac8325..4300eb7 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1737,8 +1737,8 @@ def __init__(self, fid, species_list=None, title="Assign species"): self._setup_species_source(species_list) # Validate and initialize - self._validate_fid(self.fid) - self._setup_fid(self.fid) + self.fid._setup_peak_objects() + self.fid.species = numpy.empty(len(fid.peaks), dtype=object) self.available_peaks = [str(peak) for peak in self.fid.peaks] # Create and layout widgets @@ -1771,51 +1771,6 @@ def _setup_species_source(self, species_source): "an EnzymeML document, or None if FID has enzymeml_species" ) - def _validate_fid(self, fid): - # Validates FID has peaks and ranges and len(peaks) == len(ranges) - if fid.peaks is None or len(fid.peaks) == 0: - raise RuntimeError( - "`fid.peaks` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - if fid.ranges is None or len(fid.ranges) == 0: - raise RuntimeError( - "`fid.ranges` is required but still empty. " - "Please assign them manually or with the `rangepicker` method." - ) - if len(fid.peaks) != len(fid.ranges): - raise RuntimeError( - "`fid.peaks` and `fid.ranges` must have the same length, as " - "each peak must have a range assigned to it." - ) - - def _setup_fid(self, fid): - # Initialize species array and creates or updates Peak objects - # in data model if species from EnyzmeML are used. - - # Initialize empty species array - fid.species = numpy.empty(len(fid.peaks), dtype=object) - - # Create or update Peak objects in data model - for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): - if i < len(fid.fid_object.peaks): - # Peak already exists, update it - fid.fid_object.peaks[i].peak_position = float(peak_val) - fid.fid_object.peaks[i].peak_range = { - "start": float(range_val[0]), - "end": float(range_val[1]), - } - else: - # Peak does not yet exist, create it - fid.fid_object.add_to_peaks( - peak_index=i, - peak_position=float(peak_val), - peak_range={ - "start": float(range_val[0]), - "end": float(range_val[1]), - }, - ) - def _create_widgets(self): # Create all widget components self.title_label = Label(value=self.title) @@ -1949,8 +1904,8 @@ def __init__(self, fid_array, species_list=None, index_list=None): # Validate and initialize self.fids = self._build_fids(index_list) for fid in self.fids: - self._validate_fid(fid) - self._setup_fid(fid) + fid._setup_peak_objects() + fid.species = numpy.empty(len(fid.peaks), dtype=object) # Create and layout widgets self._create_widgets() @@ -2017,51 +1972,6 @@ def _build_fids(self, index_list): return fids - def _validate_fid(self, fid): - # Validate that FID has peaks and ranges and that their - # lengths are the same - if fid.peaks is None or len(fid.peaks) == 0: - raise RuntimeError( - "`fid.peaks` is required but still empty. " - "Please assign them manually or with the `peakpicker` method." - ) - if fid.ranges is None or len(fid.ranges) == 0: - raise RuntimeError( - "`fid.ranges` is required but still empty. " - "Please assign them manually or with the `rangepicker` method." - ) - if len(fid.peaks) != len(fid.ranges): - raise RuntimeError( - "`fid.peaks` and `fid.ranges` must have the same length, as " - "each peak must have a range assigned to it." - ) - - def _setup_fid(self, fid): - # Initialize species array and create or update Peak objects in - # data model - - # Initialize empty species array - fid.species = numpy.empty(len(fid.peaks), dtype=object) - # Create or update Peak objects in data model - for i, (peak_val, range_val) in enumerate(zip(fid.peaks, fid.ranges)): - if i < len(fid.fid_object.peaks): - # Peak already exists, update it - fid.fid_object.peaks[i].peak_position = float(peak_val) - fid.fid_object.peaks[i].peak_range = { - "start": float(range_val[0]), - "end": float(range_val[1]), - } - else: - # Peak does not yet exist, create it - fid.fid_object.add_to_peaks( - peak_index=i, - peak_position=float(peak_val), - peak_range={ - "start": float(range_val[0]), - "end": float(range_val[1]), - }, - ) - def _create_widgets(self): # Create all widget components self.title_label = Label(value="Assign peaks for all FIDs") diff --git a/nmrpy/utils.py b/nmrpy/utils.py index c8c994f..ebb6dc3 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -178,7 +178,7 @@ def create_enzymeml( ) global_time = (fid_array.t.tolist(),) for measured_species in fid_array.concentrations.items(): - for available_species in enzymeml_document.measurements[0].species: + for available_species in enzymeml_document.measurements[0].species_data: if not available_species.species_id == get_species_id_by_name( enzymeml_document, measured_species[0] ): From 7e1b1928380050b6b91a4bd16037e386a1ad187e Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 3 Mar 2025 15:40:55 +0100 Subject: [PATCH 34/54] Update requirements.txt Due to an issue with md-models, pydantic>=2.10.0 is currently causing errors. In the meantime, a version restraint has been added to the requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 47e3e76..b2ff86d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ notebook>=6.0.0 ipython>=7.0.0 lmfit>=0.9.10 nmrglue>=0.6 -pydantic>=2.0.0 +pydantic>=2.0.0,<2.10.0 From 1b0349d295129cb63f0707994239f3081f14ab39 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 4 Mar 2025 16:21:59 +0100 Subject: [PATCH 35/54] Update data_objects.py Fix type checking bug in enzymeml_document property of FidArray class. --- nmrpy/data_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 54b5522..53246a2 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1439,7 +1439,7 @@ def enzymeml_document(self, enzymeml_document): raise RuntimeError( "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) - if isinstance(enzymeml_document, EnzymeMLDocument): + if not isinstance(enzymeml_document, EnzymeMLDocument): raise AttributeError( f'Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead.' ) From f6659b46fd5a1d6553a2b5c84addb6e58d3a26b6 Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Thu, 24 Apr 2025 21:03:03 +0200 Subject: [PATCH 36/54] fix: Fix `Fid.baseline_correct()` and `FidArray.baseline_correct_fids()` --- nmrpy/data_objects.py | 26 +++++++++++++++++--------- nmrpy/plotting.py | 4 ++-- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 53246a2..f35b94d 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -9,6 +9,8 @@ from nmrpy.plotting import * import os import pickle +from ipywidgets import Output +from IPython.display import display from nmrpy.nmrpy_model import ( NMRpy, @@ -373,7 +375,7 @@ def _bl_ppm(self, bl_ppm): @property def _bl_indices(self): - if self._bl_ppm is not None: + if hasattr(self, '_bl_ppm'): return self._conv_to_index(self.data, self._bl_ppm, self._params['sw_left'], self._params['sw']) else: return None @@ -391,7 +393,7 @@ def _bl_poly(self, bl_poly): raise AttributeError('baseline polynomial must be numbers') self.__bl_poly = numpy.array(bl_poly) else: - self.__bl_ppm = bl_poly + self.__bl_poly = bl_poly @property def _index_peaks(self): @@ -791,13 +793,13 @@ def baseline_correct(self, deg=2): """ if self._bl_indices is None: - raise AttributeError('No points selected for baseline correction. Run fid.baseliner()') + raise AttributeError('No points selected for baseline correction. Run fid.baseliner() or fidarray.baseliner_fids()') if not len(self.data): - raise AttributeError('data does not exist.') + raise AttributeError('Data does not exist.') if self.data.dtype in self._complex_dtypes: - raise TypeError('data must not be complex.') + raise TypeError('Data must not be complex.') if not Fid._is_flat_iter(self.data): - raise AttributeError('data must be 1 dimensional.') + raise AttributeError('Data must be 1 dimensional.') data = self.data x = numpy.arange(len(data)) @@ -1884,12 +1886,18 @@ def baseline_correct_fids(self, deg=2): :keyword deg: degree of the baseline polynomial (see :meth:`~nmrpy.data_objects.Fid.baseline_correct`) """ + okay = True for fid in self.get_fids(): try: fid.baseline_correct(deg=deg) - except: - print('failed for {}. Perhaps first run baseliner_fids()'.format(fid.id)) - print('baseline-correction completed') + except TypeError as te: + okay = False + print(f'Failed for {fid.id}. {te}') + except AttributeError as ae: + okay = False + print(f'Failed for {fid.id}. {ae}') + if okay: + print('baseline-correction completed') @property def _data_traces(self): diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 4300eb7..fe86b35 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1671,7 +1671,7 @@ def assign(self): cur_peaks = fid._ppm[peak_ind] bl_ppm.append(cur_peaks) bl_ppm = numpy.array([j for i in bl_ppm for j in i]) - fid._bl_ppm = bl_ppm + fid._bl_ppm = bl_ppm.copy() plt.close(self.span_selector.fig) class FidRangeSelector: @@ -1714,7 +1714,7 @@ def assign(self): cur_peaks = self.ppm[peak_ind] bl_ppm.append(cur_peaks) bl_ppm = numpy.array([j for i in bl_ppm for j in i]) - self.fid._bl_ppm = bl_ppm + self.fid._bl_ppm = bl_ppm.copy() plt.close(self.span_selector.fig) class PeakAssigner: From afa951b977cad66435b8895240dd08d938fbb1f5 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Mon, 5 May 2025 12:10:13 +0200 Subject: [PATCH 37/54] Add measurement handling (#14) * Add create_new_enzymeml_measurement() method - FidArray now has a create_new_enzymeml_measurement() method that acts as the interface for creating new Measurement objects either per GUI or script. - FidArray enzymeml_document property now checks for existence of at least one Measurement object in adder method. - plotting.py now has a skeleton MeasurementCreator class that will later be the GUI for creating new Measurement objects. - utils.py now has a utility function that creates an EnzymeML Measurement object from the parameters passed to it. * Add multiple Measurement support - Add MeasurementCreator widget as GUI for create_new_enzymeml_measurement() method - Update apply_to_enzymeml() method - Update create_enzymeml() method --- nmrpy/data_objects.py | 83 ++++++- nmrpy/plotting.py | 558 +++++++++++++++++++++++++++++++++++++++++- nmrpy/utils.py | 292 ++++++++++++++++++++-- 3 files changed, 909 insertions(+), 24 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index f35b94d..e0a526e 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -23,8 +23,8 @@ ) try: import pyenzyme - from pyenzyme.model import EnzymeMLDocument - from nmrpy.utils import create_enzymeml, get_species_from_enzymeml + from pyenzyme.model import EnzymeMLDocument, Measurement + from nmrpy.utils import create_enzymeml, create_enzymeml_measurement, fill_enzymeml_measurement, get_species_from_enzymeml except ImportError: pyenzyme = None @@ -1445,6 +1445,10 @@ def enzymeml_document(self, enzymeml_document): raise AttributeError( f'Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead.' ) + if not enzymeml_document.measurements: + raise AttributeError( + 'EnzymeML document must contain at least one measurement.' + ) self.__enzymeml_document = enzymeml_document self.__enzymeml_document.modified = str(datetime.now()) self.__data_model.experiment.name = self.__enzymeml_document.name @@ -1470,8 +1474,9 @@ def concentrations(self, concentrations): for fid in self.get_fids(): if not len(fid.species): raise ValueError('All FIDs must have species assigned to peaks.') - if not all(species in fid.species for species in concentrations.keys()): - raise ValueError('Keys of concentrations must be species assigned to peaks.') + if not set(concentrations.keys()).issubset(fid.species): + invalid_species = set(concentrations.keys()) - set(fid.species) + raise ValueError(f'Invalid species in concentrations: {invalid_species}') if not all(len(concentrations[species]) == len(self.t) for species in concentrations.keys()): raise ValueError('Length of concentrations must match length of FID data.') self.__concentrations = concentrations @@ -2400,13 +2405,63 @@ def save_to_file(self, filename=None, overwrite=False): fid._del_widgets() with open(filename, 'wb') as f: pickle.dump(self, f) - - def apply_to_enzymeml(self, enzymeml_document = None) -> EnzymeMLDocument: + + def create_new_enzymeml_measurement( + self, + gui: bool = True, + template_measurement: bool = True, + template_id: str = None, + keep_ph: bool = True, + keep_temperature: bool = True, + keep_initial: bool = False, + **kwargs + ) -> None: + + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." + ) + if not self.enzymeml_document: + raise AttributeError( + "No EnzymeML document found. Please add one using `parse_enzymeml_document()`." + ) + if len(self.enzymeml_document.measurements) == 0: + raise ValueError( + "No measurements found in EnzymeML document. At least one measurement is required." + ) + if not template_measurement and (keep_ph or keep_temperature or keep_initial): + print("Warning: Without a template measurement, there are no pH, temperature, or initial values to keep.") + + if gui: + # TODO: Implement GUI for creating a new measurement + self._measurement_creator = MeasurementCreator( + fid_array=self, + ) + else: + new_measurement = create_enzymeml_measurement( + self.enzymeml_document, + template_measurement=template_measurement, + template_id=template_id, + ) + new_measurement = fill_enzymeml_measurement( + self.enzymeml_document, + new_measurement, + template_measurement=template_measurement, + template_id=template_id, + keep_ph=keep_ph, + keep_temperature=keep_temperature, + keep_initial=keep_initial, + **kwargs + ) + self.enzymeml_document.measurements.append(new_measurement) + + + def apply_to_enzymeml(self, enzymeml_document = None, measurement_id = None) -> EnzymeMLDocument: """ Apply the calculated concentrations from the FidArray to an EnzymeMLDocument. Args: - enzymeml_document (EnzymeMLDocument): The EnzymeML document to apply the concentrations to. + enzymeml_document (EnzymeMLDocument, optional): The EnzymeML document to apply the concentrations to. Returns: EnzymeMLDocument: The EnzymeML document with the concentrations applied. @@ -2418,9 +2473,21 @@ def apply_to_enzymeml(self, enzymeml_document = None) -> EnzymeMLDocument: raise RuntimeError( "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) + if not self.concentrations: + raise RuntimeError( + "No concentrations found. Please calculate concentrations first." + ) + # If no enzymeml_document is provided, use the one stored in the + # FidArray if not enzymeml_document: enzymeml_document = self.enzymeml_document - return create_enzymeml(self, enzymeml_document) + + # If no measurement_id is provided, use the id of the last + # measurement in the EnzymeML document + if not measurement_id: + measurement_id = self.enzymeml_document.measurements[-1].id + + return create_enzymeml(self, enzymeml_document, measurement_id) class Importer(Base): diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index fe86b35..37edd83 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -11,16 +11,22 @@ from matplotlib.widgets import Cursor from matplotlib.backend_bases import NavigationToolbar2, Event -from ipywidgets import FloatText, Output, VBox, Button, Combobox, Dropdown, Label +from ipywidgets import FloatText, Output, VBox, Button, Combobox, Dropdown, Label, Checkbox, HTML, Tab, BoundedFloatText, Text from IPython.display import display import asyncio from nmrpy.utils import format_species_string try: - from pyenzyme.model import EnzymeMLDocument + import pyenzyme + import pyenzyme.units.predefined as predefined + from pyenzyme.model import EnzymeMLDocument, Measurement from nmrpy.utils import ( get_ordered_list_of_species_names, get_species_from_enzymeml, + format_measurement_string, + create_enzymeml_measurement, + fill_enzymeml_measurement, + InitialConditionTab, ) except ImportError: pyenzyme = None @@ -2125,6 +2131,554 @@ def __init__(self): raise NotImplementedError( "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) + +class MeasurementCreator: + """ + Widget for creating a new measurement. + """ + def __init__(self, fid_array): + self.fid_array = fid_array + self.measurements = self.fid_array.enzymeml_document.measurements.copy() + self.template_measurement = None + self.new_measurement = None + self.initialized = False + + self.unit_options_dict = { + name: getattr(predefined, name) + for name in dir(predefined) + if not name.startswith("_") and isinstance(getattr(predefined, name), predefined.UnitDefinition) + } + self.c_units = ["M", "mM", "uM", "nM", "mol", "mmol", "umol", "nmol"] + self.m_units = ["g", "mg", "ug", "ng", "kg"] + self.v_units = ["l", "ml", "ul", "nl"] + self.t_units = ["s", "min", "h", "d"] + self.T_units = ["K", "C"] + + self._initial_name = None + self._initial_id = None + self._current_temp_unit = pyenzyme.units.predefined.K + self._missing_initial_conditions = [] + + self.create_widgets() + self.setup_callbacks() + self.initialize_measurement() + self.layout_widgets() + + def create_widgets(self): + # Create all widget components + self.spacer = HTML(value=" ") + + self.title_html = HTML(value="Create new EnzymeML Measurement") + + self.name_textbox = Text( + value="", + description="Enter name of new measurement:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.id_checkbox = Checkbox( + value=False, + description="Assign a custom ID?", + indent=False, + ) + self.id_textbox = Text( + value="", + description="Enter custom ID:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + self.template_checkbox = Checkbox( + value=False, + description="Use a template measurement?", + indent=False, + ) + self.template_dropdown = Dropdown( + options=[], + description="Select a template measurement:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=True, + ) + + self.ph_checkbox = Checkbox( + value=True, + description="Keep pH?", + indent=False, + disabled=True, + ) + self.ph_textbox = BoundedFloatText( + value=7.0, + min=0.0, + max=14.0, + step=0.1, + description="Select new pH:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.temperature_checkbox = Checkbox( + value=True, + description="Keep temperature?", + indent=False, + disabled=True, + ) + self.temperature_textbox = BoundedFloatText( + value=298.15, + min=0.0, + max=1000.0, + step=0.1, + description="Select new temperature:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + self.temperature_unit_dropdown = Dropdown( + options=[ + (unit_name, self.unit_options_dict[unit_name]) + for unit_name in self.T_units + if unit_name in self.unit_options_dict + ], + value=pyenzyme.units.predefined.K, + description="Select temperature unit:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.initial_checkbox = Checkbox( + value=False, + description="Keep initial conditions?", + indent=False, + disabled=True, + ) + self.initial_tabs = self._create_initial_tabs() + + self.warning_html = HTML(value="") + + self.general_tab = VBox( + [ + self.name_textbox, + self.spacer, + self.id_checkbox, + self.id_textbox, + self.spacer, + self.template_checkbox, + self.template_dropdown, + self.spacer, + self.ph_checkbox, + self.ph_textbox, + self.spacer, + self.temperature_checkbox, + self.temperature_textbox, + self.temperature_unit_dropdown, + self.spacer, + self.initial_checkbox, + self.spacer, + self.spacer, + self.warning_html, + ] + ) + + def _create_initial_tabs(self): + initial_tabs = {} + if self.template_measurement: + selected_measurement = self.template_measurement.model_copy(deep=True) + else: + selected_measurement = self.measurements[-1].model_copy(deep=True) + for species_datum in selected_measurement.species_data: + for species in get_species_from_enzymeml(self.fid_array.enzymeml_document): + if species.id == species_datum.species_id: + enzymeml_species = species + break + + initial_condition_tab = InitialConditionTab( + species_id = enzymeml_species.id, + title = str(enzymeml_species.id), + header = HTML(value=f"Set initial conditions for {format_species_string(enzymeml_species)}"), + textbox = BoundedFloatText( + value=0.0, + min=0.0, + max=1000.0, + step=0.01, + description="Initial condition:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ), + data_type_dropdown = Dropdown( + options=[(data_type.name.capitalize().replace("_", " "), data_type) for data_type in pyenzyme.DataTypes], + description="Data type of initial condition:", + value=pyenzyme.DataTypes.CONCENTRATION, + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ), + data_unit_dropdown = Dropdown( + options=[ + (unit_name, self.unit_options_dict[unit_name]) + for unit_name in self.c_units + if unit_name in self.unit_options_dict + ], + description="Unit of initial condition:", + value=pyenzyme.units.predefined.mM, + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ), + time_unit_dropdown = Dropdown( + options=[ + (unit_name, self.unit_options_dict[unit_name]) + for unit_name in self.t_units + if unit_name in self.unit_options_dict + ], + description="Unit of time course:", + value=pyenzyme.units.predefined.s, + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + ) + initial_tabs[initial_condition_tab.species_id] = initial_condition_tab + return initial_tabs + + def setup_callbacks(self): + # Set up all widget callbacks + self.name_textbox.observe(self._handle_name_change) + + self.id_checkbox.observe(self._handle_id_check) + self.id_textbox.observe(self._handle_id_change) + + self.template_checkbox.observe(self._handle_template_check) + self.template_dropdown.observe(self._handle_template_change) + + self.ph_checkbox.observe(self._handle_ph_check) + self.ph_textbox.observe(self._handle_ph_change) + + self.temperature_checkbox.observe(self._handle_temperature_check) + self.temperature_textbox.observe(self._handle_temperature_change) + self.temperature_unit_dropdown.observe(self._handle_temperature_unit_change) + + self.initial_checkbox.observe(self._handle_initial_check) + for initial_tab in self.initial_tabs.values(): + initial_tab.textbox.observe(lambda event, initial_tab=initial_tab: self._handle_initial_condition_change(event, initial_tab)) + initial_tab.data_type_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_data_type_change(event, initial_tab)) + initial_tab.data_unit_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_data_unit_change(event, initial_tab)) + initial_tab.time_unit_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_time_unit_change(event, initial_tab)) + + def initialize_measurement(self): + # Initialize the new measurement + if self.initialized: + self.fid_array.enzymeml_document.measurements.pop() + + self.new_measurement = create_enzymeml_measurement( + self.fid_array.enzymeml_document, + template_measurement=self.template_measurement, + ) + self.new_measurement.ph = self.ph_textbox.value + self.new_measurement.temperature = self.temperature_textbox.value + self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + self._initial_name = self.new_measurement.name + self._initial_id = self.new_measurement.id + self.fid_array.enzymeml_document.measurements.append(self.new_measurement) + self._initialize_missing_initial_conditions() + self.initialized = True + + def clear_species_data(self, measurement): + # Clear the template species data + measurement.ph = None + measurement.temperature = None + measurement.temperature_unit = None + for species_datum in measurement.species_data: + species_datum.initial = None + species_datum.data_type = None + species_datum.data_unit = None + species_datum.time_unit = None + + def layout_widgets(self): + # Create widget layout and display + tab_children = [self.general_tab] + tab_children.extend(initial_tab.as_vbox() for initial_tab in self.initial_tabs.values()) + tab_titles = ["General"] + tab_titles.extend(initial_tab.title for initial_tab in self.initial_tabs.values()) + self.tab = Tab( + children=tab_children, + titles=tab_titles, + ) + self.container = VBox( + [ + self.title_html, + self.tab, + ] + ) + display(self.container) + + def _initialize_missing_initial_conditions(self): + self._missing_initial_conditions = [] + for species in get_species_from_enzymeml(self.fid_array.enzymeml_document): + enzymeml_species = species + self._missing_initial_conditions.append(format_species_string(enzymeml_species)) + self.warning_html.value = f"WARNING: Initial conditions for {', '.join(self._missing_initial_conditions)} are still missing!" + + def _handle_name_change(self, event): + # Enable the name_textbox when the name_checkbox is checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.new_measurement.name = self.name_textbox.value + else: + self.new_measurement.name = self._initial_name + + def _handle_id_check(self, event): + # Enable the id_textbox when the id_checkbox is checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.id_textbox.disabled = False + else: + self.id_textbox.disabled = True + self.id_textbox.value = "" + self.new_measurement.id = self._initial_id + + def _handle_id_change(self, event): + # Enable the id_textbox when the id_checkbox is checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.new_measurement.id = self.id_textbox.value + else: + self.new_measurement.id = self._initial_id + + def _handle_template_check(self, event): + # Enable the template dropdown when the template checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.template_dropdown.options = [ + (format_measurement_string(measurement), measurement.id) + for measurement in self.measurements + ] + self.template_dropdown.value = self.template_dropdown.options[0][1] + self.template_dropdown.disabled = False + self.ph_checkbox.disabled = False + self.ph_textbox.disabled = True + self.temperature_checkbox.disabled = False + self.temperature_textbox.disabled = True + self.temperature_unit_dropdown.disabled = True + self.initial_checkbox.disabled = False + else: + self.template_dropdown.options = [] + self.template_dropdown.disabled = True + self.ph_checkbox.disabled = True + self.ph_textbox.disabled = False + self.temperature_checkbox.disabled = True + self.temperature_textbox.disabled = False + self.temperature_unit_dropdown.disabled = False + self.initial_checkbox.disabled = True + self.template_measurement = None + current_name = self.new_measurement.name + current_id = self.new_measurement.id + self.initialize_measurement() + self.new_measurement.name = current_name + self.new_measurement.id = current_id + + def _handle_template_change(self, event): + # Populate template_measurement attribute with measurement of + # selected ID if template_checkbox is checked. + if event["type"] == "change" and event["name"] == "value": + selected_option = event["new"] + for measurement in self.measurements: + if measurement.id == selected_option: + self.template_measurement = measurement.model_copy(deep=True) + if self.new_measurement: + # Preserve current name and ID settings + current_name = self.new_measurement.name + current_id = self.new_measurement.id + + # Create new measurement from template + new_measurement = self.template_measurement.model_copy(deep=True) + self.clear_species_data(new_measurement) + + # Update measurement with preserved values + new_measurement.name = current_name + new_measurement.id = current_id + + # Update both references to point to the same object + self.fid_array.enzymeml_document.measurements[-1] = new_measurement + self.new_measurement = self.fid_array.enzymeml_document.measurements[-1] + break + + def _handle_ph_check(self, event): + # Enable the ph_checkbox when the template checkbox is checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.ph_textbox.disabled = True + self.new_measurement.ph = self.template_measurement.ph + else: + self.ph_textbox.disabled = False + self.new_measurement.ph = self.ph_textbox.value + + def _handle_ph_change(self, event): + # Enable the ph_textbox when the ph_checkbox is checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.new_measurement.ph = self.ph_textbox.value + + def _handle_temperature_check(self, event): + # Enable the temperature_checkbox when the template checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.temperature_textbox.disabled = True + self.temperature_unit_dropdown.disabled = True + self.new_measurement.temperature = self.template_measurement.temperature + self.new_measurement.temperature_unit = self.template_measurement.temperature_unit + else: + self.temperature_textbox.disabled = False + self.temperature_unit_dropdown.disabled = False + self.new_measurement.temperature = self.temperature_textbox.value + self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + + def _handle_temperature_change(self, event): + # Enable the temperature_textbox when the temperature_checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + self.new_measurement.temperature = self.temperature_textbox.value + self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + + def _handle_temperature_unit_change(self, event): + # Enable the temperature_unit_dropdown when the template + # checkbox is checked. T_max of 2500 ËšC (2773.15 K) has been + # chosen according to Hodkinson P., Modern Methods in Solid- + # state NMR: A Practitioner's Guide (2018), pp. 262, as the + # highest temperature yet reported for NMR experiments. + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + current_value = self.temperature_textbox.value + new_unit = self.temperature_unit_dropdown.value + old_unit = self._current_temp_unit + + if old_unit == new_unit: + return # No conversion needed + + if new_unit == pyenzyme.units.predefined.K: + # Converting from °C to K + converted_value = current_value + 273.15 + self.temperature_textbox.min = 0.0 + self.temperature_textbox.max = 2773.15 + self.temperature_textbox.value = converted_value + self.new_measurement.temperature = converted_value + self.new_measurement.temperature_unit = new_unit + + elif new_unit == pyenzyme.units.predefined.C: + # Converting from K to °C + converted_value = current_value - 273.15 + self.temperature_textbox.min = -273.15 + self.temperature_textbox.max = 2500.0 + self.temperature_textbox.value = converted_value + self.new_measurement.temperature = converted_value + self.new_measurement.temperature_unit = new_unit + + else: + raise ValueError( + f"Invalid temperature unit. Valid units are K and C, " + f"got {new_unit} instead." + ) + + self._current_temp_unit = new_unit + + def _handle_initial_check(self, event): + # Enable the initial_checkbox when the template checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + for new_datum, template_datum, initial_tab in zip(self.new_measurement.species_data, self.template_measurement.species_data, self.initial_tabs.values()): + initial_tab.textbox.value = template_datum.initial + initial_tab.data_type_dropdown.options = [(template_datum.data_type.name.capitalize().replace("_", " "), template_datum.data_type)] + initial_tab.data_type_dropdown.value = template_datum.data_type + initial_tab.data_unit_dropdown.options = [ + (template_datum.data_unit.name, template_datum.data_unit) + ] + initial_tab.data_unit_dropdown.value = template_datum.data_unit + initial_tab.time_unit_dropdown.options = [ + (template_datum.time_unit.name, template_datum.time_unit) + ] + initial_tab.time_unit_dropdown.value = template_datum.time_unit + new_datum.initial = template_datum.initial + new_datum.data_type = template_datum.data_type + new_datum.data_unit = template_datum.data_unit + new_datum.time_unit = template_datum.time_unit + else: + for new_datum, initial_tab in zip(self.new_measurement.species_data, self.initial_tabs.values()): + initial_tab.textbox.value = 0.0 + initial_tab.data_type_dropdown.options = [(data_type.name.capitalize().replace("_", " "), data_type) for data_type in pyenzyme.DataTypes] + initial_tab.data_type_dropdown.value = pyenzyme.DataTypes.CONCENTRATION + initial_tab.data_unit_dropdown.options = [ + (unit_name, self.unit_options_dict[unit_name]) + for unit_name in self.c_units + if unit_name in self.unit_options_dict + ] + initial_tab.data_unit_dropdown.value = pyenzyme.units.predefined.mM + initial_tab.time_unit_dropdown.options = [ + (unit_name, self.unit_options_dict[unit_name]) + for unit_name in self.t_units + if unit_name in self.unit_options_dict + ] + initial_tab.time_unit_dropdown.value = pyenzyme.units.predefined.s + new_datum.initial = None + new_datum.data_type = None + new_datum.data_unit = None + new_datum.time_unit = None + self._initialize_missing_initial_conditions() + + def _handle_initial_condition_change(self, event, initial_tab): + # Enable the initial_checkbox when the template checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + if not self.new_measurement.species_data: + self.new_measurement.add_to_species_data( + species_id=initial_tab.species_id, + initial=event["new"], + data_type=initial_tab.data_type_dropdown.value, + data_unit=initial_tab.data_unit_dropdown.value, + time_unit=initial_tab.time_unit_dropdown.value + ) + for species_datum in self.new_measurement.species_data: + if species_datum.species_id == initial_tab.species_id: + species_datum.initial = event["new"] + for species in get_species_from_enzymeml(self.fid_array.enzymeml_document): + if species.id == species_datum.species_id: + enzymeml_species = species + if format_species_string(enzymeml_species) in self._missing_initial_conditions: + self._missing_initial_conditions.remove(format_species_string(enzymeml_species)) + break + if len(self._missing_initial_conditions) == 0: + self.warning_html.value = "All initial conditions have been set!" + else: + self.warning_html.value = f"WARNING: Initial conditions for {', '.join(self._missing_initial_conditions)} are still missing!" + + def _handle_data_type_change(self, event, initial_tab): + # Enable the data_type_dropdown when the data_type_checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + for species_datum in self.new_measurement.species_data: + if species_datum.species_id == initial_tab.species_id: + species_datum.data_type = event["new"] + + def _handle_data_unit_change(self, event, initial_tab): + # Enable the data_unit_dropdown when the data_unit_checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + for species_datum in self.new_measurement.species_data: + if species_datum.species_id == initial_tab.species_id: + species_datum.data_unit = event["new"] + + def _handle_time_unit_change(self, event, initial_tab): + # Enable the time_unit_dropdown when the time_unit_checkbox is + # checked + if event["type"] == "change" and event["name"] == "value": + if event["new"]: + for species_datum in self.new_measurement.species_data: + if species_datum.species_id == initial_tab.species_id: + species_datum.time_unit = event["new"] + if __name__ == '__main__': pass diff --git a/nmrpy/utils.py b/nmrpy/utils.py index ebb6dc3..2a42ec7 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -1,12 +1,18 @@ +from dataclasses import dataclass + +from ipywidgets import BoundedFloatText, Button, Dropdown, HTML, VBox + try: import sympy import pyenzyme - from pyenzyme.model import EnzymeMLDocument + from pyenzyme.model import EnzymeMLDocument, Measurement except ImportError: sympy = None pyenzyme = None +##### Getters ##### + def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: """Iterate over various species elements in EnzymeML document, extract them, and return them as a list. @@ -37,7 +43,6 @@ def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: available_species.append(small_molecule) return available_species - def get_ordered_list_of_species_names(fid: "Fid") -> list: """Iterate over the identites in a given FID object and extract a list of species names ordered by peak index, multiple occurences @@ -68,7 +73,6 @@ def get_ordered_list_of_species_names(fid: "Fid") -> list: ordered_list_of_species_names = [t[0] for t in list_of_tuples] return ordered_list_of_species_names - def get_initial_concentration_by_species_id( enzymeml_document: EnzymeMLDocument, species_id: str ) -> float: @@ -93,7 +97,6 @@ def get_initial_concentration_by_species_id( intial_concentration = measurement_datum.init_conc return intial_concentration - def get_species_id_by_name( enzymeml_document: EnzymeMLDocument, species_name: str ) -> str: @@ -116,7 +119,6 @@ def get_species_id_by_name( species_id = species.id return species_id - def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) -> str: """Get the name of a species in an EnzymeML document by its `species_id`. @@ -138,6 +140,8 @@ def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) return species_name +##### Formatters ##### + def format_species_string(enzymeml_species) -> str: """Format a species object from an EnzymeML document as a string for display in widgets. @@ -155,9 +159,261 @@ def format_species_string(enzymeml_species) -> str: else: return f"{enzymeml_species.id}" +def format_measurement_string(measurement: Measurement) -> str: + """Format a measurement object from an EnzymeML document as a string + for display in widgets. + + Args: + measurement (Measurement): A measurement object from an EnzymeML + document. + + Returns: + str: The formatted measurement string. + """ + if not isinstance(measurement, Measurement): + raise ValueError( + f"Parameter `measurement` has to be of type `Measurement`, got {type(measurement)} instead." + ) + if measurement.name: + return f"{measurement.id} ({measurement.name})" + else: + return f"{measurement.id}" + + +##### Measurement creation helpers ##### + +def create_enzymeml_measurement( + enzymeml_document: EnzymeMLDocument, **kwargs +) -> Measurement: + """Create a new EnzymeML Measurement object from a template within an + EnzymeML document or from scratch. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML document. + **kwargs: Keyword arguments: + template_measurement (bool): Whether to use a template measurement. + template_id (str | None): The ID of the template measurement to + use. Defaults to the first measurement in the EnzymeML document. + + Raises: + ValueError: If the provided template ID is not found in the EnzymeML + document. + + Returns: + Measurement: A new EnzymeML Measurement object. + """ + if kwargs["template_measurement"]: + if kwargs["template_id"]: + for measurement in enzymeml_document.measurements: + if measurement.id == kwargs["template_id"]: + new_measurement = measurement.model_copy() + new_measurement.id = ( + f"measurement{len(enzymeml_document.measurements) + 1}" + ) + new_measurement.name = ( + f"Measurement no. {len(enzymeml_document.measurements) + 1}" + ) + break + else: + raise ValueError( + f"Measurement with ID {kwargs['template_id']} not found." + ) + else: + new_measurement = enzymeml_document.measurements[-1].model_copy() + new_measurement.id = f"measurement{len(enzymeml_document.measurements) + 1}" + new_measurement.name = ( + f"Measurement no. {len(enzymeml_document.measurements) + 1}" + ) + else: + new_measurement = Measurement( + id=f"measurement{len(enzymeml_document.measurements) + 1}", + name=f"Measurement no. {len(enzymeml_document.measurements) + 1}", + ) + + return new_measurement + +def fill_enzymeml_measurement( + enzymeml_document: EnzymeMLDocument, measurement: Measurement, **kwargs +) -> Measurement: + """Fill a new EnzymeML Measurement object with data. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML document. + measurement (Measurement): The EnzymeML Measurement object to fill. + **kwargs: Keyword arguments: + template_measurement (bool): Whether to use a template measurement. + template_id (str | None): The ID of the template measurement to + use. Defaults to the first measurement in the EnzymeML document. + keep_ph (bool): Whether to keep the pH of the template measurement. + keep_temperature (bool): Whether to keep the temperature of the + template measurement. + keep_initial (bool): Whether to keep the initial concentrations of + the template measurement. + id (str): The ID of the measurement. + name (str): The name of the measurement. + ph (float): The pH of the measurement. + temperature (float): The temperature of the measurement. + temperature_unit (str): The unit of the temperature of the + measurement. + initial (dict): A dictionary with species IDs (as they are defined + in the EnzymeML document) as keys and initial values as values. + data_type (str): The type of data to be stored in the measurement. + data_unit (str): The unit of the data to be stored in the + measurement. + time_unit (str): The unit of the time to be stored in the + measurement. + + Raises: + ValueError: If no value for `ph`, `temperature`, or `initial` is + provided but `keep_ph`, `keep_temperature`, or `keep_initial` is set + to `False`. + ValueError: If a temperature value is provided but no + `temperature_unit`. + ValueError: If the provided `temperature_unit` is not a valid unit. + ValueError: If the value for `initial` is not a dictionary. + ValueError: If `data_type`, `data_unit`, or `time_unit` is provided but + is not a valid EnzymeML data type, data unit, or time unit. + ValueError: If no template measurement is provided but no value for + `data_type`, `data_unit`, or `time_unit` is provided. + + Returns: + Measurement: The filled EnzymeML Measurement object. + """ + + # ID and name + if "id" in kwargs: + measurement.id = kwargs["id"] + if "name" in kwargs: + measurement.name = kwargs["name"] + + # pH + if "ph" in kwargs: + measurement.ph = float(kwargs["ph"]) + elif kwargs["keep_ph"] and kwargs["template_measurement"]: + pass + else: + raise ValueError( + "The `measurement.ph` field is required in the EnzymeML standard. Please provide a pH value using the `ph` keyword argument." + ) + + # Temperature and unit + if "temperature" in kwargs: + measurement.temperature = float(kwargs["temperature"]) + if "temperature_unit" in kwargs: + if hasattr(pyenzyme.units.predefined, kwargs["temperature_unit"]): + measurement.temperature_unit = getattr( + pyenzyme.units.predefined, kwargs["temperature_unit"] + ) + else: + raise ValueError( + "The `temperature_unit` keyword argument must be a valid EnzymeML temperature unit." + ) + else: + raise ValueError( + "The `temperature_unit` keyword argument is required when setting a new temperature value." + ) + elif kwargs["keep_temperature"] and kwargs["template_measurement"]: + pass + else: + raise ValueError( + "The `measurement.temperature` field is required in the EnzymeML standard. Please provide a temperature value using the `temperature` keyword argument." + ) + + # Initial + if "initial" in kwargs: + if not isinstance(kwargs["initial"], dict): + raise ValueError( + "The `initial` keyword argument must be a dictionary with species IDs (as they are defined in the EnzymeML document) as keys and initial values as values." + ) + _data_type = None + _data_unit = None + _time_unit = None + if "data_type" in kwargs: + try: + _data_type = pyenzyme.DataTypes[kwargs["data_type"].upper()] + except ValueError: + raise ValueError( + f"The `data_type` keyword argument must be a valid EnzymeML data type. Valid types are: {', '.join([data_type.name for data_type in pyenzyme.DataTypes])}." + ) + if "data_unit" in kwargs: + if hasattr(pyenzyme.units.predefined, kwargs["data_unit"]): + _data_unit = getattr(pyenzyme.units.predefined, kwargs["data_unit"]) + else: + raise ValueError( + "The `data_unit` keyword argument must be a valid EnzymeML data unit." + ) + if "time_unit" in kwargs: + if hasattr(pyenzyme.units.predefined, kwargs["time_unit"]): + _time_unit = getattr(pyenzyme.units.predefined, kwargs["time_unit"]) + else: + raise ValueError( + "The `time_unit` keyword argument must be a valid EnzymeML time unit." + ) + if kwargs["template_measurement"]: + for species_datum in measurement.species_data: + if species_datum.species_id in kwargs["initial"]: + species_datum.initial = kwargs["initial"][species_datum.species_id] + if _data_type: + species_datum.data_type = _data_type + if _data_unit: + species_datum.data_unit = _data_unit + if _time_unit: + species_datum.time_unit = _time_unit + else: + if not _data_type: + raise ValueError( + "The `data_type` keyword argument is required when creating a new measurement without a template measurement." + ) + if not _data_unit: + raise ValueError( + "The `data_unit` keyword argument is required when creating a new measurement without a template measurement." + ) + if not _time_unit: + raise ValueError( + "The `timec_unit` keyword argument is required when creating a new measurement without a template measurement." + ) + for species_type in ["small_molecules", "proteins", "complexes"]: + for species in getattr(enzymeml_document, species_type): + measurement.add_to_species_data( + species_id=species.id, + initial=kwargs["initial"][species.id], + data_type=_data_type, + data_unit=_data_unit, + time_unit=_time_unit, + ) + elif kwargs["keep_initial"] and kwargs["template_measurement"]: + pass + else: + raise ValueError( + "The `measurement.species_data.initial` field is required in the EnzymeML standard. Please provide a dictionary with species IDs (as they are defined in the EnzymeML document) as keys and initial values as values using the `initial` keyword argument." + ) + + return measurement + +@dataclass +class InitialConditionTab: + species_id: str + title: str + header: HTML + textbox: BoundedFloatText + data_type_dropdown: Dropdown + data_unit_dropdown: Dropdown + time_unit_dropdown: Dropdown + + def as_vbox(self): + return VBox([ + self.header, + self.textbox, + self.data_type_dropdown, + self.data_unit_dropdown, + self.time_unit_dropdown, + ]) + + +##### Serialization ##### def create_enzymeml( - fid_array: "FidArray", enzymeml_document: EnzymeMLDocument + fid_array: "FidArray", enzymeml_document: EnzymeMLDocument, measurement_id: str ) -> EnzymeMLDocument: """Create an EnzymeML document from a given FidArray object. @@ -176,14 +432,22 @@ def create_enzymeml( raise AttributeError( "EnzymeML document does not contain measurement metadata. Please add a measurement to the document first." ) + if not measurement_id: + raise ValueError( + "A measurement ID is required to create an EnzymeML document. Please provide a measurement ID using the `measurement_id` keyword argument." + ) global_time = (fid_array.t.tolist(),) - for measured_species in fid_array.concentrations.items(): - for available_species in enzymeml_document.measurements[0].species_data: - if not available_species.species_id == get_species_id_by_name( - enzymeml_document, measured_species[0] - ): + measurement = next( + measurement for measurement in enzymeml_document.measurements + if measurement.id == measurement_id + ) + print(f"Selected measurement: {measurement}") + for measured_species, concentrations in fid_array.concentrations.items(): + for available_species in measurement.species_data: + if not available_species.species_id == measured_species: pass - available_species.time = [float(x) for x in global_time[0]] - available_species.data = [float(x) for x in measured_species[1]] - + else: + available_species.time = [float(x) for x in global_time[0]] + available_species.data = [float(x) for x in concentrations] + return enzymeml_document From 848e643e07576937e5ee687fbda4b6a7859344c6 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Mon, 5 May 2025 17:17:34 +0200 Subject: [PATCH 38/54] Rework data handling (#15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update EnzymeML species handling - Add species type flags to get_species_from_enzymeml() util function to allow filtering for specific types of species. - Remove unnecessary display of proteins in PeakAssigner and PeakRangeAssigner using the new species type flags. * Add flags for keeping data models upon saving * Change data array handling of NMRpy data model - Add save_data_model() method to serialise the NMRpy data model. - Change handling of data arrays: They are now saved as numpy.ndarrays in each Fid object and only copied as lists into the data model upon serialisation. * Update data_objects.py - Resolve Pydantic serialisation issues (complex → string conversion). - Optimise processing loops for faster execution and lower memory usage. * Change keep_data_model flag to False --- nmrpy/data_objects.py | 72 ++++++++++++++++++++++++++++++++++++++----- nmrpy/plotting.py | 19 ++++++++++-- nmrpy/utils.py | 26 +++++++++++----- 3 files changed, 99 insertions(+), 18 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index e0a526e..6941ea7 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -11,6 +11,7 @@ import pickle from ipywidgets import Output from IPython.display import display +from datetime import datetime from nmrpy.nmrpy_model import ( NMRpy, @@ -234,11 +235,12 @@ class Fid(Base): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data = kwargs.get('data', []) + self.raw_data = self.data.copy() self.peaks = None self.ranges = None self.species = None self.fid_object = FIDObject( - raw_data=[(str(datum)) for datum in self.data], + raw_data=[], processed_data=[], nmr_parameters=Parameters(), processing_steps=ProcessingSteps(), @@ -551,7 +553,6 @@ def zf(self): """ self.data = numpy.append(self.data, 0*self.data) # Update data model - self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_zero_filled = True def emhz(self, lb=5.0): @@ -565,7 +566,6 @@ def emhz(self, lb=5.0): """ self.data = numpy.exp(-numpy.pi*numpy.arange(len(self.data)) * (lb/self._params['sw_hz'])) * self.data # Update data model - self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_apodised = True self.fid_object.processing_steps.apodisation_frequency = lb @@ -575,7 +575,7 @@ def real(self): """ self.data = numpy.real(self.data) # Update data model - self.fid_object.processed_data = [float(datum) for datum in self.data] + self.fid_object.processing_steps.is_only_real = True # GENERAL FUNCTIONS @@ -596,7 +596,6 @@ def ft(self): self.data = Fid._ft(list_params) self._flags['ft'] = True # Update data model - self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_fourier_transformed = True self.fid_object.processing_steps.fourier_transform_type = 'FFT' @@ -746,7 +745,6 @@ def ps(self, p0=0.0, p1=0.0): ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) self.data = ph*self.data # Update data model - self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_phased = True self.fid_object.processing_steps.zero_order_phase = p0 self.fid_object.processing_steps.first_order_phase = p1 @@ -814,7 +812,6 @@ def baseline_correct(self, deg=2): data_bl = data-yp self.data = numpy.array(data_bl) # Update data model - self.fid_object.processed_data = [str(datum) for datum in self.data] self.fid_object.processing_steps.is_baseline_corrected = True def peakpick(self, thresh=0.1): @@ -2376,7 +2373,7 @@ def calculate_concentrations(self): "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) - def save_to_file(self, filename=None, overwrite=False): + def save_to_file(self, filename=None, overwrite=False, keep_data_model=False, keep_enzymeml=True): """ Save :class:`~nmrpy.data_objects.FidArray` object to file, including all objects owned. @@ -2384,6 +2381,9 @@ def save_to_file(self, filename=None, overwrite=False): :keyword overwrite: if True, overwrite existing file + :keyword keep_data_model: if True, keep the NMRpy data model (default is True) + + :keyword keep_enzymeml: if True, keep the EnzymeML document (default is True) """ if filename is None: basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] @@ -2403,9 +2403,65 @@ def save_to_file(self, filename=None, overwrite=False): self._del_widgets() for fid in self.get_fids(): fid._del_widgets() + if not keep_data_model: + self.data_model = None + for fid in self.get_fids(): + fid.fid_object = None + if not keep_enzymeml: + self.enzymeml_document = None + for fid in self.get_fids(): + fid.enzymeml_species = None with open(filename, 'wb') as f: pickle.dump(self, f) + def save_data_model(self, format: str = 'json', filename=None, overwrite=False): + """ + Save the NMRpy data model to a file. + + :keyword format: format of the file to save the data model to (default is 'json') + + :keyword filename: filename to save the data model to + + :keyword overwrite: if True, overwrite existing file + """ + if filename is None: + basename = os.path.split(os.path.splitext(self.fid_path)[0])[-1] + filename = basename+'.'+format + if not isinstance(filename, str): + raise TypeError('filename must be a string.') + if filename[-len(format):] != format: + filename += '.'+format + if not overwrite and os.path.exists(filename): + raise FileExistsError(f'File {filename} already exists. Set overwrite=True to force.') + + # Convert raw_data and processed_data to lists for serialisation + for fid in self.get_fids(): + # Raw data is always complex, convert to a list of strings + fid.fid_object.raw_data = [str(datum) for datum in fid.raw_data.copy()] + # If the processed data is still complex, also convert to a + # list of strings + if isinstance(fid.data.flat[0], numpy.complexfloating): + fid.fid_object.processed_data = [str(datum) for datum in fid.data.copy()] + # If the processed data is already real, convert to a list + # of floats instead + else: + fid.fid_object.processed_data = fid.data.tolist() + self.data_model.datetime_modified = datetime.now().isoformat() + + # Save the data model + if format == 'json': + with open(filename, 'w') as f: + json_string = self.data_model.model_dump_json( + indent=2, + by_alias=True, + exclude_none=True + ) + f.write(json_string) + print(f'Data model saved to "{filename}".') + else: + raise ValueError(f'Unsupported format: {format}') + + def create_new_enzymeml_measurement( self, gui: bool = True, diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 37edd83..dae9899 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1764,7 +1764,12 @@ def _setup_species_source(self, species_source): return # Check for EnzymeML document elif isinstance(species_source, EnzymeMLDocument): - self.available_species = get_species_from_enzymeml(species_source) + self.available_species = get_species_from_enzymeml( + species_source, + proteins=False, + complexes=True, + small_molecules=True + ) return # Check for list of strings elif isinstance(species_source, list): @@ -1928,12 +1933,20 @@ def _setup_species_source(self, species_source): "No species list provided and FIDArray has no enzymeml_document" ) self.available_species = get_species_from_enzymeml( - self.fid_array.enzymeml_document + self.fid_array.enzymeml_document, + proteins=False, + complexes=True, + small_molecules=True ) return # Check for EnzymeML document elif isinstance(species_source, EnzymeMLDocument): - self.available_species = get_species_from_enzymeml(species_source) + self.available_species = get_species_from_enzymeml( + species_source, + proteins=False, + complexes=True, + small_molecules=True + ) return # Check for list of strings elif isinstance(species_source, list): diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 2a42ec7..5dc8b26 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -13,7 +13,12 @@ ##### Getters ##### -def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: +def get_species_from_enzymeml( + enzymeml_document: EnzymeMLDocument, + proteins: bool = True, + complexes: bool = True, + small_molecules: bool = True +) -> list: """Iterate over various species elements in EnzymeML document, extract them, and return them as a list. @@ -34,13 +39,20 @@ def get_species_from_enzymeml(enzymeml_document: EnzymeMLDocument) -> list: raise AttributeError( f"Parameter `enzymeml_document` has to be of type `EnzymeMLDocument`, got {type(enzymeml_document)} instead." ) + if not proteins and not complexes and not small_molecules: + raise ValueError( + "At least one of the parameters `proteins`, `complexes`, or `small_molecules` must be `True`." + ) available_species = [] - for protein in enzymeml_document.proteins: - available_species.append(protein) - for complex in enzymeml_document.complexes: - available_species.append(complex) - for small_molecule in enzymeml_document.small_molecules: - available_species.append(small_molecule) + if proteins: + for protein in enzymeml_document.proteins: + available_species.append(protein) + if complexes: + for complex in enzymeml_document.complexes: + available_species.append(complex) + if small_molecules: + for small_molecule in enzymeml_document.small_molecules: + available_species.append(small_molecule) return available_species def get_ordered_list_of_species_names(fid: "Fid") -> list: From d674e41de98d4666aa0368c572660d7353f44c45 Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Wed, 21 May 2025 22:36:05 +0200 Subject: [PATCH 39/54] update ruff.toml --- ruff.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ruff.toml b/ruff.toml index 749ef04..6bb05b2 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,2 +1,9 @@ +line-length = 90 + +[lint.flake8-quotes] +docstring-quotes = "double" +inline-quotes = "single" + [format] +# Prefer single quotes over double quotes quote-style = "single" From e0a724c38fdd7e08eb93b83f42e4dccf301eea4c Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Wed, 21 May 2025 22:42:37 +0200 Subject: [PATCH 40/54] fix data_model.setter ; formatting fixes --- nmrpy/data_objects.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 6941ea7..05fea03 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -590,7 +590,7 @@ def ft(self): """ if self._flags['ft']: - raise ValueError('Data have already been Fourier Transformed.') + raise ValueError('Data have already been Fourier Transformed.') if Fid._is_valid_dataset(self.data): list_params = (self.data, self._file_format) self.data = Fid._ft(list_params) @@ -606,17 +606,21 @@ def _ft(cls, list_params): list_params is a tuple of (, ). """ if len(list_params) != 2: - raise ValueError('Wrong number of parameters. list_params must contain [, ]') + raise ValueError( + 'Wrong number of parameters. list_params must contain [, ]' + ) data, file_format = list_params if Fid._is_valid_dataset(data) and file_format in Fid._file_formats: data = numpy.array(numpy.fft.fft(data), dtype=data.dtype) s = len(data) if file_format == 'varian' or file_format == None: - ft_data = numpy.append(data[int(s / 2.0):], data[: int(s / 2.0)]) + ft_data = numpy.append(data[int(s / 2.0) :], data[: int(s / 2.0)]) if file_format == 'bruker': - ft_data = numpy.append(data[int(s / 2.0):: -1], data[s: int(s / 2.0): -1]) + ft_data = numpy.append( + data[int(s / 2.0) :: -1], data[s : int(s / 2.0) : -1] + ) return ft_data - + return None @staticmethod def _conv_to_ppm(data, index, sw_left, sw): @@ -1413,12 +1417,13 @@ def data_model(self): @data_model.setter def data_model(self, data_model): - if not isinstance(data_model, NMRpy): + if not isinstance(data_model, NMRpy) and data_model is not None: raise AttributeError( f'Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead.' ) self.__data_model = data_model - self.__data_model.datetime_modified = str(datetime.now()) + if data_model is not None: + self.__data_model.datetime_modified = str(datetime.now()) @data_model.deleter def data_model(self): From 651684ce26931f589d77b06de3dfe4c0cfcb8eac Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Thu, 22 May 2025 11:37:59 +0200 Subject: [PATCH 41/54] Fix `FidArray.save_to_file()` when deleting data model --- nmrpy/data_objects.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 05fea03..a17d32c 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -345,6 +345,10 @@ def fid_object(self, fid_object): if isinstance(fid_object, FIDObject): self.__fid_object = fid_object + @fid_object.deleter + def fid_object(self): + del self.__fid_object + @property def enzymeml_species(self): return self.__enzymeml_species @@ -2408,10 +2412,12 @@ def save_to_file(self, filename=None, overwrite=False, keep_data_model=False, ke self._del_widgets() for fid in self.get_fids(): fid._del_widgets() + # delete data model if required if not keep_data_model: - self.data_model = None + del self.data_model for fid in self.get_fids(): - fid.fid_object = None + del fid.fid_object + # delete enzymeml document if required if not keep_enzymeml: self.enzymeml_document = None for fid in self.get_fids(): From 38072de86642c1e7366cc1b09d365de1b38f2f37 Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Fri, 23 May 2025 17:14:08 +0200 Subject: [PATCH 42/54] rework calibrate() widget not to make use of asyncio.Future() --- nmrpy/plotting.py | 81 +++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index dae9899..42a9816 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -9,11 +9,9 @@ from matplotlib.patches import Rectangle from matplotlib.transforms import blended_transform_factory from matplotlib.widgets import Cursor -from matplotlib.backend_bases import NavigationToolbar2, Event -from ipywidgets import FloatText, Output, VBox, Button, Combobox, Dropdown, Label, Checkbox, HTML, Tab, BoundedFloatText, Text +from ipywidgets import FloatText, Output, VBox, HBox, Button, Combobox, Dropdown, Label, Checkbox, HTML, Tab, BoundedFloatText, Text from IPython.display import display -import asyncio from nmrpy.utils import format_species_string try: @@ -1531,30 +1529,25 @@ def __init__(self, fid, self.textinput = FloatText(value=0.0, description='New PPM:', disabled=False, continuous_update=False) - - def _wait_for_change(self, widget, value): - future = asyncio.Future() - def getvalue(change): - # make the new value available - future.set_result(change.new) - widget.unobserve(getvalue, value) - widget.observe(getvalue, value) - return future - + self.button = Button(description='Apply!', disabled=False, button_style='') + self.button.on_click(self._applycalibration) + def process(self): - peak = self.peak_selector.psm.peak + self.peak = self.peak_selector.psm.peak self.peak_selector.out.clear_output() with self.peak_selector.out: - print('current peak ppm: {}'.format(peak)) - display(self.textinput) - async def f(): - newx = await self._wait_for_change(self.textinput, 'value') - offset = newx - peak - self.fid._params['sw_left'] = self.sw_left + offset - with self.peak_selector.out: - print('calibration done.') - plt.close(self.peak_selector.fig) - asyncio.ensure_future(f()) + print('current peak ppm: {}'.format(self.peak)) + display(HBox([self.textinput, self.button])) + + def _applycalibration(self, event): + newx = self.textinput.value + offset = newx - self.peak + self.fid._params['sw_left'] = self.sw_left + offset + + with self.peak_selector.out: + print('calibration done.') + self.button.disabled = True + plt.close(self.peak_selector.fig) class RangeCalibrator: """ @@ -1600,41 +1593,33 @@ def __init__(self, fid_array, self.textinput = FloatText(value=0.0, description='New PPM:', disabled=False, continuous_update=False) - - def _wait_for_change(self, widget, value): - future = asyncio.Future() - def getvalue(change): - # make the new value available - future.set_result(change.new) - widget.unobserve(getvalue, value) - widget.observe(getvalue, value) - return future + self.button = Button(description='Apply!', disabled=False, button_style='') + self.button.on_click(self._applycalibration) def process(self): - peak = self.peak_selector.psm.peak + self.peak = self.peak_selector.psm.peak self.peak_selector.out.clear_output() with self.peak_selector.out: - print('current peak ppm: {}'.format(peak)) - display(self.textinput) - async def f(): - newx = await self._wait_for_change(self.textinput, 'value') - offset = newx - peak - self._applycalibration(offset) - with self.peak_selector.out: - print('calibration done.') - plt.close(self.peak_selector.fig) - asyncio.ensure_future(f()) - - def _applycalibration(self, offset): + print('current peak ppm: {}'.format(self.peak)) + display(HBox([self.textinput, self.button])) + + def _applycalibration(self, event): + newx = self.textinput.value + offset = newx - self.peak self.fid_array._params['sw_left'] = self.sw_left + offset - + if self.assign_only_to_index: for fid in [self.fids[i] for i in self.fid_number]: fid._params['sw_left'] = self.sw_left + offset - else: + else: for fid in self.fids: fid._params['sw_left'] = self.sw_left + offset + with self.peak_selector.out: + print('calibration done.') + self.button.disabled = True + plt.close(self.peak_selector.fig) + class FidArrayRangeSelector: """Interactive data-selection widget with ranges. Spans are saved as self.ranges.""" def __init__(self, From c851b6084793841b3cd8744b7189fdf96146f7dd Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Sat, 24 May 2025 15:07:57 +0200 Subject: [PATCH 43/54] revert @data_model.setter changes as data_model is now deleted during save, not set to None --- nmrpy/data_objects.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index a17d32c..a626f41 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1421,13 +1421,12 @@ def data_model(self): @data_model.setter def data_model(self, data_model): - if not isinstance(data_model, NMRpy) and data_model is not None: + if not isinstance(data_model, NMRpy): raise AttributeError( f'Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead.' ) self.__data_model = data_model - if data_model is not None: - self.__data_model.datetime_modified = str(datetime.now()) + self.__data_model.datetime_modified = str(datetime.now()) @data_model.deleter def data_model(self): From e987ad3e93b0b9c8c863fb6e85c507d49acf2f3e Mon Sep 17 00:00:00 2001 From: JM Rohwer Date: Mon, 4 Aug 2025 14:35:27 +0200 Subject: [PATCH 44/54] fix pyenzyme imports, update dependencies --- nmrpy/data_objects.py | 4 ++-- nmrpy/utils.py | 5 +++-- requirements.txt | 14 +++++++------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index a626f41..f75218a 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -24,7 +24,7 @@ ) try: import pyenzyme - from pyenzyme.model import EnzymeMLDocument, Measurement + from pyenzyme import EnzymeMLDocument, Measurement from nmrpy.utils import create_enzymeml, create_enzymeml_measurement, fill_enzymeml_measurement, get_species_from_enzymeml except ImportError: pyenzyme = None @@ -1718,7 +1718,7 @@ def parse_enzymeml_document(self, path_to_enzymeml_document) -> None: "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) self.enzymeml_document = pyenzyme.read_enzymeml( - cls=pyenzyme.EnzymeMLDocument, path=path_to_enzymeml_document + path=path_to_enzymeml_document ) @classmethod diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 5dc8b26..6d4ded1 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -5,8 +5,9 @@ try: import sympy import pyenzyme - from pyenzyme.model import EnzymeMLDocument, Measurement -except ImportError: + from pyenzyme import EnzymeMLDocument, Measurement +except ImportError as ex: + print(ex) sympy = None pyenzyme = None diff --git a/requirements.txt b/requirements.txt index b2ff86d..dbe38e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ numpy scipy -matplotlib>=3.0.0 -ipympl>=0.4.0 -notebook>=6.0.0 -ipython>=7.0.0 -lmfit>=0.9.10 -nmrglue>=0.6 -pydantic>=2.0.0,<2.10.0 +matplotlib>=3.9.0 +ipympl>=0.9.5 +jupyterlab +ipython>=8.0.0 +lmfit>=1.3.0 +nmrglue>=0.9 +pydantic From b28d5e689131f1b5e42932a66000100869b3311b Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Thu, 7 Aug 2025 15:54:19 +0200 Subject: [PATCH 45/54] Upgrade to pyenzyme v2 --- nmrpy/data_objects.py | 7 ++- nmrpy/plotting.py | 141 +++++++++++++++++++----------------------- nmrpy/utils.py | 16 ++--- setup.py | 2 +- 4 files changed, 78 insertions(+), 88 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index f75218a..3ecff57 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -26,7 +26,8 @@ import pyenzyme from pyenzyme import EnzymeMLDocument, Measurement from nmrpy.utils import create_enzymeml, create_enzymeml_measurement, fill_enzymeml_measurement, get_species_from_enzymeml -except ImportError: +except ImportError as ex: + print(f"Optional dependency import failed for data_objects.py: {ex}") pyenzyme = None @@ -2495,6 +2496,10 @@ def create_new_enzymeml_measurement( raise ValueError( "No measurements found in EnzymeML document. At least one measurement is required." ) + if any(len(measurement.species_data) == 0 for measurement in self.enzymeml_document.measurements): + raise ValueError( + "No species data found in at least one EnzymeML measurement. Species data is required for each measurement." + ) if not template_measurement and (keep_ph or keep_temperature or keep_initial): print("Warning: Without a template measurement, there are no pH, temperature, or initial values to keep.") diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 42a9816..4bc02ee 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -16,8 +16,7 @@ from nmrpy.utils import format_species_string try: import pyenzyme - import pyenzyme.units.predefined as predefined - from pyenzyme.model import EnzymeMLDocument, Measurement + from pyenzyme import EnzymeMLDocument, Measurement from nmrpy.utils import ( get_ordered_list_of_species_names, get_species_from_enzymeml, @@ -26,7 +25,8 @@ fill_enzymeml_measurement, InitialConditionTab, ) -except ImportError: +except ImportError as ex: + print(f"Optional dependency import failed for plotting.py: {ex}") pyenzyme = None class Plot(): @@ -2141,20 +2141,15 @@ def __init__(self, fid_array): self.new_measurement = None self.initialized = False - self.unit_options_dict = { - name: getattr(predefined, name) - for name in dir(predefined) - if not name.startswith("_") and isinstance(getattr(predefined, name), predefined.UnitDefinition) - } - self.c_units = ["M", "mM", "uM", "nM", "mol", "mmol", "umol", "nmol"] - self.m_units = ["g", "mg", "ug", "ng", "kg"] + self.c_units = ["mol/l", "mmol/l", "umol/l", "nmol/l", "mol", "mmol", "umol", "nmol"] + self.m_units = ["g", "mg", "ug"] self.v_units = ["l", "ml", "ul", "nl"] self.t_units = ["s", "min", "h", "d"] self.T_units = ["K", "C"] self._initial_name = None self._initial_id = None - self._current_temp_unit = pyenzyme.units.predefined.K + self._current_temp_unit = "K" self._missing_initial_conditions = [] self.create_widgets() @@ -2232,14 +2227,12 @@ def create_widgets(self): layout={"width": "max-content"}, style={"description_width": "initial"}, ) - self.temperature_unit_dropdown = Dropdown( - options=[ - (unit_name, self.unit_options_dict[unit_name]) - for unit_name in self.T_units - if unit_name in self.unit_options_dict - ], - value=pyenzyme.units.predefined.K, + self.temperature_unit_combobox = Combobox( + options=self.T_units, + value="K", description="Select temperature unit:", + ensure_option=False, + placeholder="Select or type unit", layout={"width": "max-content"}, style={"description_width": "initial"}, ) @@ -2269,7 +2262,7 @@ def create_widgets(self): self.spacer, self.temperature_checkbox, self.temperature_textbox, - self.temperature_unit_dropdown, + self.temperature_unit_combobox, self.spacer, self.initial_checkbox, self.spacer, @@ -2310,25 +2303,21 @@ def _create_initial_tabs(self): layout={"width": "max-content"}, style={"description_width": "initial"}, ), - data_unit_dropdown = Dropdown( - options=[ - (unit_name, self.unit_options_dict[unit_name]) - for unit_name in self.c_units - if unit_name in self.unit_options_dict - ], + data_unit_combobox = Combobox( + options=self.c_units, description="Unit of initial condition:", - value=pyenzyme.units.predefined.mM, + value="mM", + ensure_option=False, + placeholder="Select or type unit", layout={"width": "max-content"}, style={"description_width": "initial"}, ), - time_unit_dropdown = Dropdown( - options=[ - (unit_name, self.unit_options_dict[unit_name]) - for unit_name in self.t_units - if unit_name in self.unit_options_dict - ], + time_unit_combobox = Combobox( + options=self.t_units, description="Unit of time course:", - value=pyenzyme.units.predefined.s, + value="s", + ensure_option=False, + placeholder="Select or type unit", layout={"width": "max-content"}, style={"description_width": "initial"}, ) @@ -2351,14 +2340,14 @@ def setup_callbacks(self): self.temperature_checkbox.observe(self._handle_temperature_check) self.temperature_textbox.observe(self._handle_temperature_change) - self.temperature_unit_dropdown.observe(self._handle_temperature_unit_change) + self.temperature_unit_combobox.observe(self._handle_temperature_unit_change) self.initial_checkbox.observe(self._handle_initial_check) for initial_tab in self.initial_tabs.values(): initial_tab.textbox.observe(lambda event, initial_tab=initial_tab: self._handle_initial_condition_change(event, initial_tab)) initial_tab.data_type_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_data_type_change(event, initial_tab)) - initial_tab.data_unit_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_data_unit_change(event, initial_tab)) - initial_tab.time_unit_dropdown.observe(lambda event, initial_tab=initial_tab: self._handle_time_unit_change(event, initial_tab)) + initial_tab.data_unit_combobox.observe(lambda event, initial_tab=initial_tab: self._handle_data_unit_change(event, initial_tab)) + initial_tab.time_unit_combobox.observe(lambda event, initial_tab=initial_tab: self._handle_time_unit_change(event, initial_tab)) def initialize_measurement(self): # Initialize the new measurement @@ -2371,7 +2360,7 @@ def initialize_measurement(self): ) self.new_measurement.ph = self.ph_textbox.value self.new_measurement.temperature = self.temperature_textbox.value - self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + self.new_measurement.temperature_unit = self.temperature_unit_combobox.value self._initial_name = self.new_measurement.name self._initial_id = self.new_measurement.id self.fid_array.enzymeml_document.measurements.append(self.new_measurement) @@ -2384,10 +2373,10 @@ def clear_species_data(self, measurement): measurement.temperature = None measurement.temperature_unit = None for species_datum in measurement.species_data: - species_datum.initial = None - species_datum.data_type = None - species_datum.data_unit = None - species_datum.time_unit = None + del species_datum.initial + del species_datum.data_type + del species_datum.data_unit + del species_datum.time_unit def layout_widgets(self): # Create widget layout and display @@ -2455,7 +2444,7 @@ def _handle_template_check(self, event): self.ph_textbox.disabled = True self.temperature_checkbox.disabled = False self.temperature_textbox.disabled = True - self.temperature_unit_dropdown.disabled = True + self.temperature_unit_combobox.disabled = True self.initial_checkbox.disabled = False else: self.template_dropdown.options = [] @@ -2464,7 +2453,7 @@ def _handle_template_check(self, event): self.ph_textbox.disabled = False self.temperature_checkbox.disabled = True self.temperature_textbox.disabled = False - self.temperature_unit_dropdown.disabled = False + self.temperature_unit_combobox.disabled = False self.initial_checkbox.disabled = True self.template_measurement = None current_name = self.new_measurement.name @@ -2521,14 +2510,14 @@ def _handle_temperature_check(self, event): if event["type"] == "change" and event["name"] == "value": if event["new"]: self.temperature_textbox.disabled = True - self.temperature_unit_dropdown.disabled = True + self.temperature_unit_combobox.disabled = True self.new_measurement.temperature = self.template_measurement.temperature self.new_measurement.temperature_unit = self.template_measurement.temperature_unit else: self.temperature_textbox.disabled = False - self.temperature_unit_dropdown.disabled = False + self.temperature_unit_combobox.disabled = False self.new_measurement.temperature = self.temperature_textbox.value - self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + self.new_measurement.temperature_unit = self.temperature_unit_combobox.value def _handle_temperature_change(self, event): # Enable the temperature_textbox when the temperature_checkbox is @@ -2536,7 +2525,7 @@ def _handle_temperature_change(self, event): if event["type"] == "change" and event["name"] == "value": if event["new"]: self.new_measurement.temperature = self.temperature_textbox.value - self.new_measurement.temperature_unit = self.temperature_unit_dropdown.value + self.new_measurement.temperature_unit = self.temperature_unit_combobox.value def _handle_temperature_unit_change(self, event): # Enable the temperature_unit_dropdown when the template @@ -2547,13 +2536,13 @@ def _handle_temperature_unit_change(self, event): if event["type"] == "change" and event["name"] == "value": if event["new"]: current_value = self.temperature_textbox.value - new_unit = self.temperature_unit_dropdown.value + new_unit = self.temperature_unit_combobox.value old_unit = self._current_temp_unit if old_unit == new_unit: return # No conversion needed - if new_unit == pyenzyme.units.predefined.K: + if new_unit == "K": # Converting from °C to K converted_value = current_value + 273.15 self.temperature_textbox.min = 0.0 @@ -2562,7 +2551,7 @@ def _handle_temperature_unit_change(self, event): self.new_measurement.temperature = converted_value self.new_measurement.temperature_unit = new_unit - elif new_unit == pyenzyme.units.predefined.C: + elif new_unit == "C": # Converting from K to °C converted_value = current_value - 273.15 self.temperature_textbox.min = -273.15 @@ -2572,7 +2561,7 @@ def _handle_temperature_unit_change(self, event): self.new_measurement.temperature_unit = new_unit else: - raise ValueError( + print( f"Invalid temperature unit. Valid units are K and C, " f"got {new_unit} instead." ) @@ -2588,35 +2577,23 @@ def _handle_initial_check(self, event): initial_tab.textbox.value = template_datum.initial initial_tab.data_type_dropdown.options = [(template_datum.data_type.name.capitalize().replace("_", " "), template_datum.data_type)] initial_tab.data_type_dropdown.value = template_datum.data_type - initial_tab.data_unit_dropdown.options = [ - (template_datum.data_unit.name, template_datum.data_unit) - ] - initial_tab.data_unit_dropdown.value = template_datum.data_unit - initial_tab.time_unit_dropdown.options = [ - (template_datum.time_unit.name, template_datum.time_unit) - ] - initial_tab.time_unit_dropdown.value = template_datum.time_unit + initial_tab.data_unit_combobox.options = [template_datum.data_unit.name] + initial_tab.data_unit_combobox.value = template_datum.data_unit.name + initial_tab.time_unit_combobox.options = [template_datum.time_unit.name] + initial_tab.time_unit_combobox.value = template_datum.time_unit.name new_datum.initial = template_datum.initial new_datum.data_type = template_datum.data_type - new_datum.data_unit = template_datum.data_unit - new_datum.time_unit = template_datum.time_unit + new_datum.data_unit = template_datum.data_unit.name + new_datum.time_unit = template_datum.time_unit.name else: for new_datum, initial_tab in zip(self.new_measurement.species_data, self.initial_tabs.values()): initial_tab.textbox.value = 0.0 initial_tab.data_type_dropdown.options = [(data_type.name.capitalize().replace("_", " "), data_type) for data_type in pyenzyme.DataTypes] initial_tab.data_type_dropdown.value = pyenzyme.DataTypes.CONCENTRATION - initial_tab.data_unit_dropdown.options = [ - (unit_name, self.unit_options_dict[unit_name]) - for unit_name in self.c_units - if unit_name in self.unit_options_dict - ] - initial_tab.data_unit_dropdown.value = pyenzyme.units.predefined.mM - initial_tab.time_unit_dropdown.options = [ - (unit_name, self.unit_options_dict[unit_name]) - for unit_name in self.t_units - if unit_name in self.unit_options_dict - ] - initial_tab.time_unit_dropdown.value = pyenzyme.units.predefined.s + initial_tab.data_unit_combobox.options = self.c_units + initial_tab.data_unit_combobox.value = "mM" + initial_tab.time_unit_combobox.options = self.t_units + initial_tab.time_unit_combobox.value = "s" new_datum.initial = None new_datum.data_type = None new_datum.data_unit = None @@ -2633,8 +2610,8 @@ def _handle_initial_condition_change(self, event, initial_tab): species_id=initial_tab.species_id, initial=event["new"], data_type=initial_tab.data_type_dropdown.value, - data_unit=initial_tab.data_unit_dropdown.value, - time_unit=initial_tab.time_unit_dropdown.value + data_unit=initial_tab.data_unit_combobox.value, + time_unit=initial_tab.time_unit_combobox.value ) for species_datum in self.new_measurement.species_data: if species_datum.species_id == initial_tab.species_id: @@ -2660,22 +2637,28 @@ def _handle_data_type_change(self, event, initial_tab): species_datum.data_type = event["new"] def _handle_data_unit_change(self, event, initial_tab): - # Enable the data_unit_dropdown when the data_unit_checkbox is + # Enable the data_unit_combobox when the data_unit_checkbox is # checked if event["type"] == "change" and event["name"] == "value": if event["new"]: for species_datum in self.new_measurement.species_data: if species_datum.species_id == initial_tab.species_id: - species_datum.data_unit = event["new"] + try: + species_datum.data_unit = event["new"] + except Exception: + print(f"Invalid data unit: {event['new']}") def _handle_time_unit_change(self, event, initial_tab): - # Enable the time_unit_dropdown when the time_unit_checkbox is + # Enable the time_unit_combobox when the time_unit_checkbox is # checked if event["type"] == "change" and event["name"] == "value": if event["new"]: for species_datum in self.new_measurement.species_data: if species_datum.species_id == initial_tab.species_id: - species_datum.time_unit = event["new"] + try: + species_datum.time_unit = event["new"] + except Exception: + print(f"Invalid time unit: {event['new']}") if __name__ == '__main__': diff --git a/nmrpy/utils.py b/nmrpy/utils.py index 6d4ded1..d07a40e 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -1,13 +1,13 @@ from dataclasses import dataclass -from ipywidgets import BoundedFloatText, Button, Dropdown, HTML, VBox +from ipywidgets import BoundedFloatText, Button, Combobox, Dropdown, HTML, VBox try: import sympy import pyenzyme from pyenzyme import EnzymeMLDocument, Measurement except ImportError as ex: - print(ex) + print(f"Optional dependency import failed for utils.py: {ex}") sympy = None pyenzyme = None @@ -108,6 +108,7 @@ def get_initial_concentration_by_species_id( for measurement_datum in measurement.species: if measurement_datum.species_id == species_id: intial_concentration = measurement_datum.init_conc + break return intial_concentration def get_species_id_by_name( @@ -150,6 +151,7 @@ def get_species_name_by_id(enzymeml_document: EnzymeMLDocument, species_id: str) for species in get_species_from_enzymeml(enzymeml_document): if species.id == species_id: species_name = species.name + break return species_name @@ -410,16 +412,16 @@ class InitialConditionTab: header: HTML textbox: BoundedFloatText data_type_dropdown: Dropdown - data_unit_dropdown: Dropdown - time_unit_dropdown: Dropdown + data_unit_combobox: Combobox + time_unit_combobox: Combobox def as_vbox(self): return VBox([ self.header, self.textbox, self.data_type_dropdown, - self.data_unit_dropdown, - self.time_unit_dropdown, + self.data_unit_combobox, + self.time_unit_combobox, ]) @@ -449,7 +451,7 @@ def create_enzymeml( raise ValueError( "A measurement ID is required to create an EnzymeML document. Please provide a measurement ID using the `measurement_id` keyword argument." ) - global_time = (fid_array.t.tolist(),) + global_time = ([float(x) for x in fid_array.t],) measurement = next( measurement for measurement in enzymeml_document.measurements if measurement.id == measurement_id diff --git a/setup.py b/setup.py index eb8083e..79346c1 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ 'name': 'nmrpy', 'extras_require': { 'enzymeml': [ - 'pyenzyme @ git+https://github.com/EnzymeML/PyEnzyme.git@v2-migration#egg=pyenzyme', + 'pyenzyme>=2.1.0', 'sympy' ] } From 3dc3d1262edbf9d05b0b5b47c4791e2e8d5b5a29 Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:56:45 +0200 Subject: [PATCH 46/54] Add t0 handling (#18) - Added T0Tab and T0Logic classes to utils.py - Added T0Adder widget class to plotting.py - Added add_t0_to_enzymeml() method that can be used interactively with a Jupyter widget using gui=True or script-like with gui=False --- nmrpy/data_objects.py | 54 ++++++++++- nmrpy/plotting.py | 206 +++++++++++++++++++++++++++++++++++++++++- nmrpy/utils.py | 152 ++++++++++++++++++++++++++++++- 3 files changed, 403 insertions(+), 9 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 3ecff57..73cc8d6 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -25,7 +25,7 @@ try: import pyenzyme from pyenzyme import EnzymeMLDocument, Measurement - from nmrpy.utils import create_enzymeml, create_enzymeml_measurement, fill_enzymeml_measurement, get_species_from_enzymeml + from nmrpy.utils import T0Logic, create_enzymeml, create_enzymeml_measurement, fill_enzymeml_measurement, get_species_from_enzymeml except ImportError as ex: print(f"Optional dependency import failed for data_objects.py: {ex}") pyenzyme = None @@ -2471,8 +2471,59 @@ def save_data_model(self, format: str = 'json', filename=None, overwrite=False): print(f'Data model saved to "{filename}".') else: raise ValueError(f'Unsupported format: {format}') + + def add_t0_to_enzymeml( + self, + gui: bool = True, + measurement_id: Optional[str] = None, + use_t1: bool = True, + t0: Optional[Mapping[str, float]] = None, + offset: Optional[float] = None, + ) -> None: + """ + Add t0 to a measurement in the EnzymeML document either by using + t1 values (zero-shift times) or by providing a dict of t0 data + values, and optionally apply a time-axis offset. + """ + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." + ) + if len(self.enzymeml_document.measurements) == 0: + raise ValueError( + "No measurements found in EnzymeML document. At least one measurement is required." + ) + if gui: + _ = T0Adder( + enzymeml_document=self.enzymeml_document, + measurement_id=measurement_id, + use_t1=use_t1, + t0_values=t0, + offset_enabled=offset is not None, + offset_value=offset or 0.0, + ) + return + + logic = T0Logic(self.enzymeml_document, measurement_id) + + if use_t1: + logic.zero_shift_times() + else: + t0 = t0 or {} + missing = set() + for sid in logic.nonconstant_species_ids(): + if sid in t0: + logic.set_t0_value(sid, float(t0[sid])) + else: + missing.add(sid) + if missing: + print(f"WARNING: {len(missing)} species ID(s) missing in t0: {sorted(missing)}") + if offset is not None: + logic.apply_offset(float(offset)) + logic.update_initials() + def create_new_enzymeml_measurement( self, gui: bool = True, @@ -2504,7 +2555,6 @@ def create_new_enzymeml_measurement( print("Warning: Without a template measurement, there are no pH, temperature, or initial values to keep.") if gui: - # TODO: Implement GUI for creating a new measurement self._measurement_creator = MeasurementCreator( fid_array=self, ) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 4bc02ee..bdfc290 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -1,3 +1,4 @@ +from typing import Mapping, Optional import nmrpy.data_objects import logging, traceback import numpy @@ -13,7 +14,7 @@ from ipywidgets import FloatText, Output, VBox, HBox, Button, Combobox, Dropdown, Label, Checkbox, HTML, Tab, BoundedFloatText, Text from IPython.display import display -from nmrpy.utils import format_species_string +from nmrpy.utils import T0Logic, T0Tab, format_species_string try: import pyenzyme from pyenzyme import EnzymeMLDocument, Measurement @@ -2129,14 +2130,213 @@ def __init__(self): raise NotImplementedError( "Widget for calculating concentrations is currently under heavy construction. Please calculate and assign concentrations manually." ) - + +class T0Adder: + """ + Widget for adding t0 to a measurement with optional t1 zero-shift + and time offset. + """ + + def __init__( + self, + fid_array, + measurement_id: Optional[str] = None, + use_t1: bool = True, + t0_values: Optional[Mapping[str, float]] = None, + offset_enabled: bool = False, + offset_value: float = 0.0, + ): + # Logic state + self.logic = T0Logic(fid_array.enzymeml_document, measurement_id) + self.use_t1 = bool(use_t1) + self.offset_enabled = bool(offset_enabled) + self.offset_value = float(offset_value) if offset_enabled else 0.0 + self.t0_values: dict[str, float] = dict(t0_values or {}) + + # Widget state + self.t0_tabs: dict[str, T0Tab] = {} + self._build_widgets() + self._wire_callbacks() + self._refresh_tabs() + display(self.container) + + # Initial state + if self.use_t1: + self.logic.zero_shift_times() + else: + for sid, val in self.t0_values.items(): + self.logic.set_t0_value(sid, val) + + if self.offset_enabled: + self.logic.apply_offset(self.offset_value) + + self.logic.update_initials() + + def _build_widgets(self): + self.title_html = HTML(value="Add t0 to EnzymeML Measurement") + + self.measurement_dropdown = Dropdown( + options=[m.id for m in self.logic.doc.measurements], + value=self.logic.measurement.id, + description="Select a measurement:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + ) + + self.use_t1_checkbox = Checkbox( + value=self.use_t1, + description=f"Use t1 values from {self.logic.measurement.id}?", + indent=False, + ) + + self.offset_checkbox = Checkbox( + value=self.offset_enabled, + description="Apply offset to time axis?", + indent=False, + ) + + self.offset_textbox = BoundedFloatText( + value=self.offset_value, + min=0.0, + max=1000.0, + step=0.01, + description=f"Offset in {self.logic.get_time_unit_name()}:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=not self.offset_enabled, + ) + + self.general_tab = VBox( + [ + self.measurement_dropdown, + self.use_t1_checkbox, + self.offset_checkbox, + self.offset_textbox, + ] + ) + + self.tab = Tab(children=[self.general_tab]) + self.tab.set_title(0, "General") + + self.container = VBox([self.title_html, self.tab]) + + def _wire_callbacks(self): + self.measurement_dropdown.observe(self._on_measurement_change, names="value") + self.use_t1_checkbox.observe(self._on_use_t1_change, names="value") + self.offset_checkbox.observe(self._on_offset_toggle, names="value") + self.offset_textbox.observe(self._on_offset_value_change, names="value") + + def _refresh_tabs(self): + self.t0_tabs.clear() + + species_ids = self.logic.nonconstant_species_ids() + new_children = [self.general_tab] + titles = ["General"] + + for sid in species_ids: + start_val = float(self.t0_values.get(sid, 0.0)) + header = HTML(value=f"Set t0 for {sid}") + t0_box = BoundedFloatText( + value=start_val, + min=0.0, + max=1000.0, + step=0.01, + description=f"t0 data in {self.logic.get_data_unit_name(sid)}:", + layout={"width": "max-content"}, + style={"description_width": "initial"}, + disabled=self.use_t1, + ) + tab = T0Tab(sid, sid, header, t0_box) + t0_box.observe(lambda ev, sp=sid: self._on_t0_value_change(ev, sp), names="value") + + self.t0_tabs[sid] = tab + new_children.append(tab.as_vbox()) + titles.append(sid) + + self.tab.children = tuple(new_children) + for i, title in enumerate(titles): + self.tab.set_title(i, title) + + def _on_measurement_change(self, change): + if change["type"] != "change": + return + + self.logic = T0Logic(self.logic.doc, change["new"]) + + self.use_t1_checkbox.description = f"Use t1 values from {self.logic.measurement.id}?" + self.offset_textbox.description = f"Offset in {self.logic.get_time_unit_name()}:" + + self._refresh_tabs() + + if self.use_t1: + self.logic.zero_shift_times() + else: + for sid, val in self.t0_values.items(): + self.logic.set_t0_value(sid, val) + + if self.offset_enabled: + self.logic.apply_offset(self.offset_value) + + self.logic.update_initials() + + def _on_use_t1_change(self, change): + if change["type"] != "change": + return + + self.use_t1 = bool(change["new"]) + + for tab in self.t0_tabs.values(): + tab.t0_data_textbox.disabled = self.use_t1 + if self.use_t1: + tab.t0_data_textbox.value = 0.0 + + if self.use_t1: + self.logic.zero_shift_times() + self.logic.update_initials() + + def _on_offset_toggle(self, change): + if change["type"] != "change": + return + + self.offset_enabled = bool(change["new"]) + self.offset_textbox.disabled = not self.offset_enabled + + if not self.offset_enabled: + self.logic.apply_offset(0.0) + else: + self.logic.apply_offset(float(self.offset_textbox.value or 0.0)) + self.logic.update_initials() + + def _on_offset_value_change(self, change): + if change["type"] != "change": + return + if not self.offset_enabled: + return + + self.offset_value = float(change["new"] or 0.0) + + self.logic.apply_offset(self.offset_value) + self.logic.update_initials() + + def _on_t0_value_change(self, change, species_id: str): + if change["type"] != "change": + return + if self.use_t1: + return + + value = float(change["new"] or 0.0) + self.t0_values[species_id] = value + + self.logic.set_t0_value(species_id, value) + self.logic.update_initials() + class MeasurementCreator: """ Widget for creating a new measurement. """ def __init__(self, fid_array): self.fid_array = fid_array - self.measurements = self.fid_array.enzymeml_document.measurements.copy() + self.measurements = self.fid_array.enzymeml_document.measurements.model_copy(deep=True) self.template_measurement = None self.new_measurement = None self.initialized = False diff --git a/nmrpy/utils.py b/nmrpy/utils.py index d07a40e..cf0a4dd 100644 --- a/nmrpy/utils.py +++ b/nmrpy/utils.py @@ -1,11 +1,12 @@ from dataclasses import dataclass +from typing import Optional -from ipywidgets import BoundedFloatText, Button, Combobox, Dropdown, HTML, VBox +from ipywidgets import BoundedFloatText, Button, Checkbox, Combobox, Dropdown, HTML, VBox try: import sympy import pyenzyme - from pyenzyme import EnzymeMLDocument, Measurement + from pyenzyme import EnzymeMLDocument, Measurement, MeasurementData except ImportError as ex: print(f"Optional dependency import failed for utils.py: {ex}") sympy = None @@ -195,13 +196,156 @@ def format_measurement_string(measurement: Measurement) -> str: return f"{measurement.id}" +##### t0 helpers ##### + +class T0Logic: + """Logic handling for the t0 widget.""" + + def __init__( + self, + enzymeml_document: EnzymeMLDocument, + measurement_id: Optional[str] = None, + ): + """Initialize the T0Logic object. + + Args: + enzymeml_document (EnzymeMLDocument): An EnzymeML document. + measurement_id (Optional[str]): The ID of the measurement to use. + If None, the last measurement in the document is used. + + Raises: + ValueError: If no measurements are found in the EnzymeML document. + + Returns: + T0Logic: A T0Logic object. + """ + if not getattr(enzymeml_document, "measurements", None): + raise ValueError("No measurements found in EnzymeML document. At least one measurement is required.") + + self.doc = enzymeml_document + self.measurement = self._select_measurement(measurement_id) + self._previous_offset: float = 0.0 + + def _select_measurement(self, measurement_id: Optional[str]): + if measurement_id is None: + return self.doc.measurements[-1] + for m in self.doc.measurements: + if m.id == measurement_id: + return m + raise ValueError(f"Measurement with ID '{measurement_id}' not found in EnzymeML document.") + + def get_data_unit_name(self, species_id: str) -> str: + """Return the display name for the data unit of a given + species_id. Falls back gracefully if missing/unknown. + """ + try: + sd = self.species_data_by_id()[species_id] + unit = getattr(sd, "data_unit", None) + if unit is None: + return "data units" + return getattr(unit, "name", str(unit)) + except Exception: + return "data units" + + def get_time_unit_name(self) -> str: + """Return the display name for the time unit of a given + measurement. Falls back gracefully if missing/unknown. + """ + try: + return self.measurement.species_data[0].time_unit.name + except Exception: + return "time units" + + def nonconstant_species_ids(self) -> list[str]: + """Return IDs of species that are *not* constant, preserving + measurement order. + """ + ids: list[str] = [] + constant: set[str] = set() + for s in get_species_from_enzymeml(self.doc): + if getattr(s, "constant", False): + constant.add(s.id) + for sd in self.measurement.species_data: + if sd.species_id not in constant: + ids.append(sd.species_id) + return ids + + def species_data_by_id(self) -> dict[str, MeasurementData]: + """Return a dictionary of species data by their ID.""" + return {sd.species_id: sd for sd in self.measurement.species_data} + + def zero_shift_times(self) -> None: + """Shift times so that first timepoint per species is 0.0.""" + for sd in self.measurement.species_data: + if not sd.time: + continue + t0 = sd.time[0] + if t0 != 0.0: + sd.time = [t - t0 for t in sd.time] + + def set_t0_value(self, species_id: str, value: float) -> None: + """Ensure there is a data point at time 0.0 for a species; set + its data to `value`. If time already starts at 0.0, only + overwrite data[0]; otherwise insert (0.0, value). + """ + sdb = self.species_data_by_id() + if species_id not in sdb: + # Keep it graceful for batch operations + return + sd = sdb[species_id] + if sd.time and sd.time[0] == 0.0: + if not sd.data: + sd.data = [value] + else: + sd.data[0] = value + else: + sd.time.insert(0, 0.0) + sd.data.insert(0, value) + + def apply_offset(self, new_offset: float) -> None: + """Apply an absolute offset to times (per species), keeping + time[0] as is. + """ + prev = getattr(self, "_previous_offset", 0.0) + for sd in self.measurement.species_data: + if not sd.time: + continue + # Preserve time[0] exactly; shift others by delta + delta = new_offset - prev + if len(sd.time) > 1: + head = sd.time[0] + tail = [t + delta for t in sd.time[1:]] + sd.time = [head, *tail] + self._previous_offset = new_offset + + def update_initials(self) -> None: + """Set `initial` to data[0] for all species where available.""" + for sd in self.measurement.species_data: + if sd.data: + sd.initial = sd.data[0] + + +@dataclass +class T0Tab: + species_id: str + title: str + header: HTML + t0_data_textbox: BoundedFloatText + + def as_vbox(self): + return VBox([ + self.header, + self.t0_data_textbox, + ]) + + ##### Measurement creation helpers ##### def create_enzymeml_measurement( enzymeml_document: EnzymeMLDocument, **kwargs ) -> Measurement: - """Create a new EnzymeML Measurement object from a template within an - EnzymeML document or from scratch. + """Create a new EnzymeML Measurement object from a template within + an EnzymeML document or from scratch. Args: enzymeml_document (EnzymeMLDocument): An EnzymeML document. From 6cf0830c156da04272be10415ed7db4a8c78bc04 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 8 Sep 2025 14:37:28 +0200 Subject: [PATCH 47/54] Fix copy() error in plotting.py --- nmrpy/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index bdfc290..41b20fa 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -2336,7 +2336,7 @@ class MeasurementCreator: """ def __init__(self, fid_array): self.fid_array = fid_array - self.measurements = self.fid_array.enzymeml_document.measurements.model_copy(deep=True) + self.measurements = self.fid_array.enzymeml_document.measurements.copy() self.template_measurement = None self.new_measurement = None self.initialized = False From 4f48277a1e6685b273cffa28fd393a1a4e76c9d1 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Tue, 16 Sep 2025 15:48:45 +0200 Subject: [PATCH 48/54] Add explicit super init to FidArray --- nmrpy/data_objects.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 73cc8d6..e0674c7 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1403,7 +1403,8 @@ class FidArray(Base): ''' - def __init__(self): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.data_model = NMRpy( datetime_created=str(datetime.now()), experiment=Experiment(name="NMR experiment"), From d796f8de84dfb5c51a86a7a99dcd51bd76219ecc Mon Sep 17 00:00:00 2001 From: Torsten Giess <92164414+torogi94@users.noreply.github.com> Date: Tue, 16 Sep 2025 17:08:52 +0200 Subject: [PATCH 49/54] Update range handling in Fid (#20) Changed behaviour of Fid._setup_peak_objects(). Multiple peaks per range are handled properly now by using Fid._grouped_peaklist instead of Fid.peaks. Also refactored the method to make it more robust and readable overall. --- nmrpy/data_objects.py | 50 ++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index e0674c7..cb5ae06 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -1326,31 +1326,33 @@ def _setup_peak_objects(self): "`fid.ranges` is required but still empty. " "Please assign them manually or with the `rangepicker` method." ) - if len(self.peaks) != len(self.ranges): - raise RuntimeError( - "`fid.peaks` and `fid.ranges` must have the same length, as " - "each peak must have a range assigned to it." - ) - + + def normalize_range(range_group): + start, end = range_group[0], range_group[1] + return { + "start": float(min(start, end)), + "end": float(max(start, end)) + } + # Create or update Peak objects in data model - for i, (peak_val, range_val) in enumerate(zip(self.peaks, self.ranges)): - if i < len(self.fid_object.peaks): - # Peak already exists, update it - self.fid_object.peaks[i].peak_position = float(peak_val) - self.fid_object.peaks[i].peak_range = { - "start": float(range_val[0]), - "end": float(range_val[1]), - } - else: - # Peak does not yet exist, create it - self.fid_object.add_to_peaks( - peak_index=i, - peak_position=float(peak_val), - peak_range={ - "start": float(range_val[0]), - "end": float(range_val[1]), - }, - ) + existing_peaks_count = len(self.fid_object.peaks) + global_index = 0 + for peak_group, range_group in zip(self._grouped_peaklist, self.ranges): + normalized_range = normalize_range(range_group) + + for peak in peak_group: + if global_index < existing_peaks_count: + # Peak already exists, update it + self.fid_object.peaks[global_index].peak_position = float(peak) + self.fid_object.peaks[global_index].peak_range = normalized_range + else: + # Peak does not yet exist, create it + self.fid_object.add_to_peaks( + peak_index=global_index, + peak_position=float(peak), + peak_range=normalized_range, + ) + global_index += 1 def assign_peaks(self, species_list: list[str] | EnzymeMLDocument = None): """ From 80e4e1279b9a20469244472363c99d9ce3ac9da1 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Wed, 17 Sep 2025 13:40:40 +0200 Subject: [PATCH 50/54] Add attribute checks for data model existence --- nmrpy/data_objects.py | 77 +++++++++++++++++++++++++++---------------- nmrpy/plotting.py | 3 +- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index cb5ae06..954bb90 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -478,9 +478,10 @@ def deconvoluted_integrals(self): integral = int_gauss+int_lorentz integrals.append(integral) # Update data model - peak_object = self.fid_object.peaks[i] - if peak_object.peak_integral != integral: - peak_object.peak_integral = float(integral) + if getattr(self, 'fid_object', None) is not None: + peak_object = self.fid_object.peaks[i] + if peak_object.peak_integral != integral: + peak_object.peak_integral = float(integral) return integrals def _get_plots(self): @@ -558,7 +559,8 @@ def zf(self): """ self.data = numpy.append(self.data, 0*self.data) # Update data model - self.fid_object.processing_steps.is_zero_filled = True + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_zero_filled = True def emhz(self, lb=5.0): """ @@ -571,8 +573,9 @@ def emhz(self, lb=5.0): """ self.data = numpy.exp(-numpy.pi*numpy.arange(len(self.data)) * (lb/self._params['sw_hz'])) * self.data # Update data model - self.fid_object.processing_steps.is_apodised = True - self.fid_object.processing_steps.apodisation_frequency = lb + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_apodised = True + self.fid_object.processing_steps.apodisation_frequency = lb def real(self): """ @@ -580,8 +583,8 @@ def real(self): """ self.data = numpy.real(self.data) # Update data model - - self.fid_object.processing_steps.is_only_real = True + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_only_real = True # GENERAL FUNCTIONS def ft(self): @@ -601,8 +604,9 @@ def ft(self): self.data = Fid._ft(list_params) self._flags['ft'] = True # Update data model - self.fid_object.processing_steps.is_fourier_transformed = True - self.fid_object.processing_steps.fourier_transform_type = 'FFT' + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_fourier_transformed = True + self.fid_object.processing_steps.fourier_transform_type = 'FFT' @classmethod def _ft(cls, list_params): @@ -683,8 +687,9 @@ def phase_correct(self, method='leastsq', verbose = True): if verbose: print('phasing: %s'%self.id) self.data = Fid._phase_correct((self.data, method, verbose)) - self.fid_object.processed_data = [str(datum) for datum in self.data] - self.fid_object.processing_steps.is_phased = True + # Update data model + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_phased = True @classmethod def _phase_correct(cls, list_params): @@ -754,9 +759,10 @@ def ps(self, p0=0.0, p1=0.0): ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) self.data = ph*self.data # Update data model - self.fid_object.processing_steps.is_phased = True - self.fid_object.processing_steps.zero_order_phase = p0 - self.fid_object.processing_steps.first_order_phase = p1 + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_phased = True + self.fid_object.processing_steps.zero_order_phase = p0 + self.fid_object.processing_steps.first_order_phase = p1 def phaser(self): """ @@ -821,7 +827,8 @@ def baseline_correct(self, deg=2): data_bl = data-yp self.data = numpy.array(data_bl) # Update data model - self.fid_object.processing_steps.is_baseline_corrected = True + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_baseline_corrected = True def peakpick(self, thresh=0.1): """ @@ -1266,7 +1273,9 @@ def deconv(self, method='leastsq', frac_gauss=0.0): list_parameters = [self.data, self._grouped_index_peaklist, self._index_ranges, frac_gauss, method] self._deconvoluted_peaks = numpy.array([j for i in Fid._deconv_datum(list_parameters) for j in i]) print(self.deconvoluted_integrals) - self.fid_object.processing_steps.is_deconvoluted = True + # Update data model + if getattr(self, 'fid_object', None) is not None: + self.fid_object.processing_steps.is_deconvoluted = True print('deconvolution completed') @@ -1316,6 +1325,8 @@ def _setup_peak_objects(self): # of Fid.peaks and Fid.ranges. # Validates FID has peaks and ranges and len(peaks) == len(ranges) + if getattr(self, 'fid_object', None) is None: + return if self.peaks is None or len(self.peaks) == 0: raise RuntimeError( "`fid.peaks` is required but still empty. " @@ -1818,9 +1829,11 @@ def ft_fids(self, mp=True, cpus=None): for fid, datum in zip(fids, ft_data): fid.data = datum fid._flags['ft'] = True - fid.fid_object.processed_data = [str(data) for data in datum] - fid.fid_object.processing_steps.is_fourier_transformed = True - fid.fid_object.processing_steps.fourier_transform_type = 'FFT' + # Update data model + if getattr(fid, 'fid_object', None) is not None: + fid.fid_object.processed_data = [str(data) for data in datum] + fid.fid_object.processing_steps.is_fourier_transformed = True + fid.fid_object.processing_steps.fourier_transform_type = 'FFT' else: for fid in self.get_fids(): fid.ft() @@ -1842,9 +1855,11 @@ def norm_fids(self): dmax = self.data.max() for fid in self.get_fids(): fid.data = fid.data/dmax - fid.fid_object.processed_data = [float(datum) for datum in fid.data] - fid.fid_object.processing_steps.is_normalised = True - fid.fid_object.processing_steps.max_value = float(dmax) + # Update data model + if getattr(fid, 'fid_object', None) is not None: + fid.fid_object.processed_data = [float(datum) for datum in fid.data] + fid.fid_object.processing_steps.is_normalised = True + fid.fid_object.processing_steps.max_value = float(dmax) def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True): """ @@ -1868,8 +1883,10 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) phased_data = self._generic_mp(Fid._phase_correct, list_params, cpus) for fid, datum in zip(fids, phased_data): fid.data = datum - fid.fid_object.processed_data = [str(data) for data in datum] - fid.fid_object.processing_steps.is_phased = True + # Update data model + if getattr(fid, 'fid_object', None) is not None: + fid.fid_object.processed_data = [str(data) for data in datum] + fid.fid_object.processing_steps.is_phased = True else: for fid in self.get_fids(): fid.phase_correct(method=method, verbose=verbose) @@ -1984,10 +2001,12 @@ def deconv_fids(self, mp=True, cpus=None, method='leastsq', frac_gauss=0.0): integral = int_gauss + int_lorentz integrals.append(integral) # Update data model - peak_object = fid.fid_object.peaks[i] - if peak_object.peak_integral != integral: - peak_object.peak_integral = float(integral) - fid.fid_object.processing_steps.is_deconvoluted = True + if getattr(fid, 'fid_object', None) is not None: + peak_object = fid.fid_object.peaks[i] + if peak_object.peak_integral != integral: + peak_object.peak_integral = float(integral) + if getattr(fid, 'fid_object', None) is not None: + fid.fid_object.processing_steps.is_deconvoluted = True else: for fid in self.get_fids(): fid.deconv(frac_gauss=frac_gauss) diff --git a/nmrpy/plotting.py b/nmrpy/plotting.py index 41b20fa..971ff48 100644 --- a/nmrpy/plotting.py +++ b/nmrpy/plotting.py @@ -170,7 +170,8 @@ def _plot_deconv(self, fid, peak = peakshapes[n] ax.plot(ppm, peak, '-', color=peak_colour, lw=lw) ax.text(ppm[numpy.argmax(peak)], label_pad+peak.max(), str(n), ha='center') - if (fid._flags['assigned']) and (show_labels): + is_assigned = getattr(fid, '_flags', {}).get('assigned', False) + if (is_assigned) and (show_labels): ax.text( ppm[numpy.argmax(peak)], label_pad + peak.max(), From c3dca2dcbab4a036b1e02eeb9a3d9e4b53c9b863 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 22 Sep 2025 15:05:57 +0200 Subject: [PATCH 51/54] Fix faulty argument to T0Adder widget --- nmrpy/data_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 954bb90..74e4e11 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -2517,7 +2517,7 @@ def add_t0_to_enzymeml( ) if gui: _ = T0Adder( - enzymeml_document=self.enzymeml_document, + fid_array=self, measurement_id=measurement_id, use_t1=use_t1, t0_values=t0, From 7a3e67db556aba00d8750a50e69a0b7a1b3587ca Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 22 Sep 2025 15:06:32 +0200 Subject: [PATCH 52/54] Add data model test suites --- nmrpy/tests/nmrpy_tests.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/nmrpy/tests/nmrpy_tests.py b/nmrpy/tests/nmrpy_tests.py index 607c715..4d1af23 100644 --- a/nmrpy/tests/nmrpy_tests.py +++ b/nmrpy/tests/nmrpy_tests.py @@ -697,6 +697,11 @@ def test_peakpicker_traces(self): def test_select_integral_traces(self): self.fid_array_varian.select_integral_traces() +class TestDataModels(unittest.TestCase): + def setUp(self): + ... + + class NMRPyTest: def __init__(self, tests='all'): """ @@ -711,6 +716,8 @@ def __init__(self, tests='all'): 'fidarrayutils' - FidArray utilities tests 'plotutils' - plotting utilities tests 'noplot' - all tests except plotting utilities (scripted usage) + 'datamodels' - data model tests + 'nodatamodels' - all tests except data model tests """ runner = unittest.TextTestRunner() baseinit_test = unittest.makeSuite(TestBaseInitialisation) @@ -719,6 +726,7 @@ def __init__(self, tests='all'): fidutils_test = unittest.makeSuite(TestFidUtils) fidarrayutils_test = unittest.makeSuite(TestFidArrayUtils) plotutils_test = unittest.makeSuite(TestPlottingUtils) + datamodels_test = unittest.makeSuite(TestDataModels) suite = baseinit_test if tests == 'all': @@ -727,11 +735,13 @@ def __init__(self, tests='all'): suite.addTests(fidutils_test) suite.addTests(fidarrayutils_test) suite.addTests(plotutils_test) + suite.addTests(datamodels_test) elif tests == 'noplot': suite.addTests(fidinit_test) suite.addTests(fidarrayinit_test) suite.addTests(fidutils_test) suite.addTests(fidarrayutils_test) + suite.addTests(datamodels_test) elif tests == 'fidinit': suite.addTests(fidinit_test) elif tests == 'fidarrayinit': @@ -742,6 +752,14 @@ def __init__(self, tests='all'): suite.addTests(fidarrayutils_test) elif tests == 'plotutils': suite.addTests(plotutils_test) + elif tests == 'datamodels': + suite.addTests(datamodels_test) + elif tests == 'nodatamodels': + suite.addTests(fidinit_test) + suite.addTests(fidarrayinit_test) + suite.addTests(fidutils_test) + suite.addTests(fidarrayutils_test) + suite.addTests(plotutils_test) else: raise ValueError('Please select a valid set of tests to run.') From 2309dc2cb027bbf3a3ce3a2acd2770004fa98be4 Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Thu, 25 Sep 2025 17:27:18 +0200 Subject: [PATCH 53/54] Add data model property unit tests + fixes --- nmrpy/data_objects.py | 42 ++++++-- nmrpy/nmrpy_model.py | 1 + nmrpy/tests/nmrpy_tests.py | 198 ++++++++++++++++++++++++++++++++++++- 3 files changed, 234 insertions(+), 7 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 74e4e11..213bd30 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -333,6 +333,8 @@ def species(self, species): if species is not None: if not all((i is None) or isinstance(i, str) for i in species): raise AttributeError('species must be strings') + if isinstance(species, str): + species = [species] if not len(species) == len(self.peaks): raise AttributeError('species must have the same length as peaks') self._species = numpy.array(species, dtype=object) @@ -345,6 +347,10 @@ def fid_object(self): def fid_object(self, fid_object): if isinstance(fid_object, FIDObject): self.__fid_object = fid_object + elif fid_object is None: + self.__fid_object = None + else: + raise AttributeError('fid_object must be an instance of FIDObject') @fid_object.deleter def fid_object(self): @@ -360,6 +366,13 @@ def enzymeml_species(self, enzymeml_species): raise RuntimeError( "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]`." ) + if enzymeml_species is None: + self.__enzymeml_species = None + return + if not isinstance(enzymeml_species, list): + enzymeml_species = [enzymeml_species] + if not all(isinstance(i, (pyenzyme.SmallMolecule, pyenzyme.Protein, pyenzyme.Complex)) for i in enzymeml_species): + raise AttributeError('enzymeml_species must be a list of valid EnzymeML species: pyenzyme.SmallMolecule, pyenzyme.Protein, or pyenzyme.Complex') self.__enzymeml_species = enzymeml_species @property @@ -1419,7 +1432,7 @@ class FidArray(Base): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data_model = NMRpy( - datetime_created=str(datetime.now()), + datetime_created=str(datetime.now().isoformat()), experiment=Experiment(name="NMR experiment"), ) self.enzymeml_document = None @@ -1430,18 +1443,28 @@ def __str__(self): @property def data_model(self): - for fid in self.get_fids(): - self.__data_model.experiment.fid_array.append(fid.fid_object) + try: + for fid in self.get_fids(): + if fid.fid_object.ld_id not in [f.ld_id for f in self.__data_model.experiment.fid_array]: + self.__data_model.experiment.fid_array.append(fid.fid_object) + else: + self.__data_model.experiment.fid_array[self.__data_model.experiment.fid_array.index(fid.fid_object)].ld_id = fid.fid_object.ld_id + self.__data_model.datetime_modified = str(datetime.now().isoformat()) + except AttributeError: + print('Warning: FidArray.data_model is not yet set.') return self.__data_model @data_model.setter def data_model(self, data_model): + if data_model is None: + self.__data_model = None + return if not isinstance(data_model, NMRpy): raise AttributeError( f'Parameter `data_model` has to be of type `NMRpy`, got {type(data_model)} instead.' ) self.__data_model = data_model - self.__data_model.datetime_modified = str(datetime.now()) + self.__data_model.datetime_modified = str(datetime.now().isoformat()) @data_model.deleter def data_model(self): @@ -1450,6 +1473,10 @@ def data_model(self): @property def enzymeml_document(self): + try: + self.__enzymeml_document.modified = str(datetime.now().isoformat()) + except AttributeError: + print('Warning: FidArray.enzymeml_document is not yet set.') return self.__enzymeml_document @enzymeml_document.setter @@ -1470,7 +1497,7 @@ def enzymeml_document(self, enzymeml_document): 'EnzymeML document must contain at least one measurement.' ) self.__enzymeml_document = enzymeml_document - self.__enzymeml_document.modified = str(datetime.now()) + self.__enzymeml_document.modified = str(datetime.now().isoformat()) self.__data_model.experiment.name = self.__enzymeml_document.name for fid in self.get_fids(): fid.enzymeml_species = get_species_from_enzymeml(self.__enzymeml_document) @@ -1492,13 +1519,16 @@ def concentrations(self, concentrations): if not isinstance(concentrations, dict): raise TypeError('concentrations must be a dictionary.') for fid in self.get_fids(): - if not len(fid.species): + if fid.species is None or not len(fid.species): raise ValueError('All FIDs must have species assigned to peaks.') if not set(concentrations.keys()).issubset(fid.species): invalid_species = set(concentrations.keys()) - set(fid.species) raise ValueError(f'Invalid species in concentrations: {invalid_species}') if not all(len(concentrations[species]) == len(self.t) for species in concentrations.keys()): raise ValueError('Length of concentrations must match length of FID data.') + for v in concentrations.values(): + if not all(isinstance(i, (int, float)) for i in v): + raise ValueError('Concentrations must be a list of integers or floats.') self.__concentrations = concentrations @concentrations.deleter diff --git a/nmrpy/nmrpy_model.py b/nmrpy/nmrpy_model.py index b814365..01cb05c 100644 --- a/nmrpy/nmrpy_model.py +++ b/nmrpy/nmrpy_model.py @@ -297,6 +297,7 @@ class FIDObject(BaseModel): model_config: ConfigDict = ConfigDict( # type: ignore validate_assigment=True, + extra='forbid', # This will raise ValidationError for unknown fields ) # type: ignore raw_data: list[str] = Field(default_factory=list) diff --git a/nmrpy/tests/nmrpy_tests.py b/nmrpy/tests/nmrpy_tests.py index 4d1af23..6d31c14 100644 --- a/nmrpy/tests/nmrpy_tests.py +++ b/nmrpy/tests/nmrpy_tests.py @@ -3,6 +3,14 @@ import numpy import os +try: + import pyenzyme + from pyenzyme import EnzymeMLDocument, Measurement, MeasurementData +except ImportError as ex: + print(f"Optional dependency import failed for nmrpy_tests.py: {ex}") + pyenzyme = None + + testpath = os.path.dirname(__file__) class TestBaseInitialisation(unittest.TestCase): @@ -699,8 +707,196 @@ def test_select_integral_traces(self): class TestDataModels(unittest.TestCase): def setUp(self): - ... + if (pyenzyme is None): + raise RuntimeError( + "The `pyenzyme` package is required to use NMRpy with an EnzymeML document. Please install it via `pip install nmrpy[enzymeml]` or choose a different set of tests to run." + ) + # Load Bruker test data + path_bruker = os.path.join(testpath, 'test_data', 'bruker1') + self.fid_array = FidArray.from_path(fid_path=path_bruker, file_format='bruker') + self.fid = self.fid_array.get_fids()[0] + + # Load EnzymeML test document + enzml_doc = pyenzyme.EnzymeMLDocument(name='NMRpy test document') + enzml_doc.add_to_creators( + given_name='Foo', + family_name='Bar', + mail='foo.bar@example.com' + ) + enzml_doc.add_to_vessels( + id='test_vessel', + name='Test vessel', + volume=1.0, + unit='ml' + ) + enzml_doc.add_to_small_molecules( + id='test_variable_small_molecule', + name='Test variable small molecule', + vessel_id='test_vessel' + ) + enzml_doc.add_to_small_molecules( + id='test_constant_small_molecule', + name='Test constant small molecule', + constant=True, + vessel_id='test_vessel' + ) + measurement = pyenzyme.Measurement( + id='test_measurement', + name='Test measurement', + ) + for species in getattr(enzml_doc, 'small_molecules'): + measurement.add_to_species_data( + species_id=species.id + ) + enzml_doc.measurements.append(measurement) + self.enzml_doc = enzml_doc + + # Create data model objects + self.data_model = NMRpy( + datetime_created='2025-01-01T00:00:00', + experiment=Experiment(name="Test experiment object") + ) + self.fid_object = FIDObject( + raw_data=[], + processed_data=[], + nmr_parameters=Parameters(), + processing_steps=ProcessingSteps(), + ) + + # Set peaks and ranges for both FidArrays + peaks = [ 4.71, 4.64, 4.17, 0.57] + ranges = [[ 5.29, 3.67], [1.05, 0.27]] + for fid in self.fid_array.get_fids(): + fid.peaks = peaks + fid.ranges = ranges + + # Test Fid properties + def test_fid_species_setter(self): + self.fid.peaks = [1] + self.fid.species = 'string' + self.assertEqual(all(i==j for i, j in zip(self.fid.species, numpy.array(['string'], dtype=object))), True) + self.fid.peaks = [1, 2] + self.fid.species = ['string', 'string2'] + self.assertEqual(all(i==j for i, j in zip(self.fid.species, numpy.array(['string', 'string2'], dtype=object))), True) + self.fid.peaks = [1, 2, 3] + self.fid.species = None + self.assertEqual(self.fid.species, None) + + def test_failed_fid_species_setter(self): + self.fid.peaks = [1] + with self.assertRaises(TypeError): + self.fid.species = 1 + self.fid.peaks = [1, 2] + with self.assertRaises(AttributeError): + self.fid.species = [1, 'string'] + with self.assertRaises(AttributeError): + self.fid.species = [['string', 'string2']] + with self.assertRaises(AttributeError): + self.fid.species = [['string'], ['string2']] + with self.assertRaises(AttributeError): + self.fid.species = [['string', 'string2'], ['string3', 'string4']] + with self.assertRaises(AttributeError): + self.fid.species = ['string', 'string2', 'string3'] + + def test_fid_fid_object_setter(self): + self.assertIsInstance(self.fid.fid_object, FIDObject) + self.fid.fid_object = None + self.assertEqual(self.fid.fid_object, None) + self.fid.fid_object = self.fid_object + self.assertEqual(self.fid.fid_object, self.fid_object) + + def test_failed_fid_fid_object_setter(self): + with self.assertRaises(AttributeError): + self.fid.fid_object = 1 + with self.assertRaises(AttributeError): + self.fid.fid_object = 'string' + with self.assertRaises(AttributeError): + self.fid.fid_object = [1, 2] + with self.assertRaises(AttributeError): + self.fid.fid_object = {'string': 1} + with self.assertRaises(AttributeError): + self.fid.fid_object = True + + def test_fid_enzymeml_species_setter(self): + self.fid.enzymeml_species = self.enzml_doc.small_molecules + self.assertEqual(self.fid.enzymeml_species, self.enzml_doc.small_molecules) + self.fid.enzymeml_species = self.enzml_doc.small_molecules[0] + self.assertEqual(self.fid.enzymeml_species, [self.enzml_doc.small_molecules[0]]) + + def test_failed_fid_enzymeml_species_setter(self): + with self.assertRaises(AttributeError): + self.fid.enzymeml_species = 1 + with self.assertRaises(AttributeError): + self.fid.enzymeml_species = 'string' + with self.assertRaises(AttributeError): + self.fid.enzymeml_species = [1, 2] + with self.assertRaises(AttributeError): + self.fid.enzymeml_species = [self.enzml_doc.small_molecules[0], 'string'] + + # Test FidArray properties + def test_fid_array_data_model_setter(self): + self.assertIsInstance(self.fid_array.data_model, NMRpy) + self.fid_array.data_model = self.data_model + self.assertEqual(self.fid_array.data_model, self.data_model) + self.fid_array.data_model = None + self.assertEqual(self.fid_array.data_model, None) + + def test_failed_fid_array_data_model_setter(self): + with self.assertRaises(AttributeError): + self.fid_array.data_model = 'string' + with self.assertRaises(AttributeError): + self.fid_array.data_model = 1 + with self.assertRaises(AttributeError): + self.fid_array.data_model = [1, 2] + with self.assertRaises(AttributeError): + self.fid_array.data_model = {'string': 1} + with self.assertRaises(AttributeError): + self.fid_array.data_model = True + + def test_fid_array_enzymeml_document_setter(self): + self.fid_array.enzymeml_document = self.enzml_doc + self.assertEqual(self.fid_array.enzymeml_document, self.enzml_doc) + self.fid_array.enzymeml_document = None + self.assertEqual(self.fid_array.enzymeml_document, None) + + def test_failed_fid_array_enzymeml_document_setter(self): + with self.assertRaises(AttributeError): + self.fid_array.enzymeml_document = 'string' + with self.assertRaises(AttributeError): + self.fid_array.enzymeml_document = 1 + with self.assertRaises(AttributeError): + self.fid_array.enzymeml_document = [1, 2] + with self.assertRaises(AttributeError): + self.fid_array.enzymeml_document = {'string': 1} + with self.assertRaises(AttributeError): + self.fid_array.enzymeml_document = True + + def test_fid_array_concentrations_setter(self): + for fid in self.fid_array.get_fids(): + fid.species = ['test_variable_small_molecule', 'test_variable_small_molecule', 'test_variable_small_molecule', 'test_constant_small_molecule'] + test_concentrations = {'test_variable_small_molecule': [1], 'test_constant_small_molecule': [1.0]} + self.fid_array.concentrations = test_concentrations + self.assertEqual(self.fid_array.concentrations, test_concentrations) + self.fid_array.concentrations = None + self.assertEqual(self.fid_array.concentrations, None) + + def test_failed_fid_array_concentrations_setter(self): + for fid in self.fid_array.get_fids(): + fid.species = ['test_variable_small_molecule', 'test_variable_small_molecule', 'test_variable_small_molecule', 'test_constant_small_molecule'] + with self.assertRaises(TypeError): + self.fid_array.concentrations = 'string' + with self.assertRaises(TypeError): + self.fid_array.concentrations = 1 + with self.assertRaises(TypeError): + self.fid_array.concentrations = [1, 2] + with self.assertRaises(TypeError): + self.fid_array.concentrations = True + with self.assertRaises(ValueError): + self.fid_array.concentrations = {'test_variable_small_molecule': [1], 'test_constant_small_molecule': ['string']} + with self.assertRaises(ValueError): + self.fid_array.concentrations = {'test_variable_small_molecule': [1], 'test_constant_small_molecule': [1.0, 2.0]} + # Test methods class NMRPyTest: def __init__(self, tests='all'): From 9f55722a1de71c76a1ec893cfef0f4bbd0587d6b Mon Sep 17 00:00:00 2001 From: Torsten Giess Date: Mon, 3 Nov 2025 13:21:35 +0100 Subject: [PATCH 54/54] Fix parameter mapping to data model - Added NMR parameters to fid_object setter - Fixed p0 and p1 phasing parameter assignment --- nmrpy/data_objects.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/nmrpy/data_objects.py b/nmrpy/data_objects.py index 213bd30..ca5afd3 100644 --- a/nmrpy/data_objects.py +++ b/nmrpy/data_objects.py @@ -341,6 +341,25 @@ def species(self, species): @property def fid_object(self): + try: + self.__fid_object.raw_data = self.data.tolist() + except AttributeError: + print('Warning: Fid.data is not yet set. Raw data will not be updated.') + try: + self.__fid_object.nmr_parameters = Parameters( + acquisition_time=self._params['at'], + relaxation_time=self._params['d1'], + repetition_time=self._params['rt'], + number_of_transients=self._params['nt'], + acquisition_times_array=self._params['acqtime'], + spectral_width_ppm=self._params['sw'], + spectral_width_hz=self._params['sw_hz'], + spectrometer_frequency=self._params['sfrq'], + reference_frequency=self._params['reffrq'], + spectral_width_left=self._params['sw_left'], + ) + except AttributeError: + print('Warning: Fid._params is not yet set. NMR parameters will not be updated.') return self.__fid_object @fid_object.setter @@ -496,7 +515,7 @@ def deconvoluted_integrals(self): if peak_object.peak_integral != integral: peak_object.peak_integral = float(integral) return integrals - + def _get_plots(self): """ Return a list of all :class:`~nmrpy.plotting.Plot` objects owned by this :class:`~nmrpy.data_objects.Fid`. @@ -699,10 +718,13 @@ def phase_correct(self, method='leastsq', verbose = True): raise ValueError('Only Fourier-transformed data can be phase-corrected.') if verbose: print('phasing: %s'%self.id) - self.data = Fid._phase_correct((self.data, method, verbose)) + phased_data, p0, p1 = Fid._phase_correct((self.data, method, verbose)) + self.data = phased_data # Update data model if getattr(self, 'fid_object', None) is not None: self.fid_object.processing_steps.is_phased = True + self.fid_object.processing_steps.zero_order_phase = p0 + self.fid_object.processing_steps.first_order_phase = p1 @classmethod def _phase_correct(cls, list_params): @@ -717,18 +739,18 @@ def _phase_correct(cls, list_params): ('p1', 0.0, True), ) mz = lmfit.minimize(Fid._phased_data_sum, p, args=([data]), method=method) - phased_data = Fid._ps(data, p0=mz.params['p0'].value, p1=mz.params['p1'].value) + phased_data, p0, p1 = Fid._ps(data, p0=mz.params['p0'].value, p1=mz.params['p1'].value) if abs(phased_data.min()) > abs(phased_data.max()): phased_data *= -1 if sum(phased_data) < 0.0: phased_data *= -1 if verbose: print('Zero order: %d\tFirst order: %d\t (In degrees)'%(mz.params['p0'].value, mz.params['p1'].value)) - return phased_data + return phased_data, p0, p1 @classmethod def _phased_data_sum(cls, pars, data): - err = Fid._ps(data, p0=pars['p0'].value, p1=pars['p1'].value).real + err = Fid._ps(data, p0=pars['p0'].value, p1=pars['p1'].value)[0].real return numpy.array([abs(err).sum()]*2) @classmethod @@ -750,7 +772,7 @@ def _ps(cls, data, p0=0.0, p1=0.0): p1 = p1*numpy.pi/180.0 size = len(data) ph = numpy.exp(1.0j*(p0+(p1*numpy.arange(size)/size))) - return ph*data + return ph*data, p0, p1 def ps(self, p0=0.0, p1=0.0): """ @@ -1527,7 +1549,7 @@ def concentrations(self, concentrations): if not all(len(concentrations[species]) == len(self.t) for species in concentrations.keys()): raise ValueError('Length of concentrations must match length of FID data.') for v in concentrations.values(): - if not all(isinstance(i, (int, float)) for i in v): + if not all(isinstance(i, (in4t, float)) for i in v): raise ValueError('Concentrations must be a list of integers or floats.') self.__concentrations = concentrations @@ -1912,11 +1934,13 @@ def phase_correct_fids(self, method='leastsq', mp=True, cpus=None, verbose=True) list_params = [[fid.data, method, verbose] for fid in fids] phased_data = self._generic_mp(Fid._phase_correct, list_params, cpus) for fid, datum in zip(fids, phased_data): - fid.data = datum + fid.data = datum[0] # Update data model if getattr(fid, 'fid_object', None) is not None: fid.fid_object.processed_data = [str(data) for data in datum] fid.fid_object.processing_steps.is_phased = True + fid.fid_object.processing_steps.zero_order_phase = datum[1] + fid.fid_object.processing_steps.first_order_phase = datum[2] else: for fid in self.get_fids(): fid.phase_correct(method=method, verbose=verbose)