corems.mass_spectra.input.andiNetCDF

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Feb 12, 2020"
  3
  4from pathlib import Path
  5from threading import Thread
  6# from io import BytesIO
  7
  8from netCDF4 import Dataset
  9from s3path import S3Path
 10
 11from corems.encapsulation.constant import Labels
 12from corems.encapsulation.factory.parameters import default_parameters
 13from corems.mass_spectra.factory.GC_Class import GCMSBase
 14from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecCentroidLowRes
 15
 16
 17class ReadAndiNetCDF(Thread):
 18    """
 19    A class for reading AndiNetCDF files and extracting mass spectra data.
 20
 21    Parameters
 22    -----------
 23    file_location : str or Path
 24            The location of the AndiNetCDF file.
 25    analyzer : str, optional
 26            The type of analyzer used (default is 'Quadruple').
 27    instrument_label : str, optional
 28            The label of the instrument (default is 'GCMS-Agilent').
 29    auto_process : bool, optional
 30            Whether to automatically process the data (default is True).
 31
 32    Attributes
 33    -----------
 34    file_location : Path
 35            The path to the AndiNetCDF file.
 36    net_cdf_obj : Dataset
 37            The NetCDF dataset object.
 38    ionization_type : str
 39            The ionization type used in the experiment.
 40    experiment_type : str
 41            The type of experiment.
 42    list_scans : range
 43            The range of scan numbers in the dataset.
 44    initial_scan_number : int
 45            The number of the initial scan.
 46    final_scan_number : int
 47            The number of the final scan.
 48    analyzer : str
 49            The type of analyzer used.
 50    instrument_label : str
 51            The label of the instrument.
 52    gcms : GCMSBase
 53            The GCMSBase object for storing mass spectra data.
 54
 55    Methods
 56    --------
 57    * polarity().
 58            Get the polarity of the ionization.
 59    * get_mass_spectrum(mz, abun, rp, d_params).
 60            Add a mass spectrum to the GCMSBase object.
 61    * run().
 62            Populate the GCMSBase object with mass spectra data.
 63    * import_mass_spectra(d_params).
 64            Import mass spectra data from the AndiNetCDF file.
 65    * get_gcms_obj().
 66            Get the GCMSBase object.
 67
 68    """
 69
 70    def __init__(
 71        self,
 72        file_location: str | Path,
 73        analyzer="Quadruple",
 74        instrument_label="GCMS-Agilent",
 75        auto_process=True,
 76    ):
 77        Thread.__init__(self)
 78
 79        if isinstance(file_location, str):
 80            self.file_location = Path(file_location)
 81        else:
 82            self.file_location = file_location
 83
 84        if not self.file_location.exists():
 85            raise FileNotFoundError("File does not exist at %s", file_location)
 86
 87        if isinstance(file_location, S3Path):
 88            bytes_io = self.file_location.open("rb").read()
 89            self.net_cdf_obj = Dataset(
 90                self.file_location.name,
 91                "r",
 92                diskless=True,
 93                memory=bytes_io,
 94                format="NETCDF3_CLASSIC",
 95            )
 96        else:
 97            self.net_cdf_obj = Dataset(
 98                self.file_location, "r", format="NETCDF3_CLASSIC"
 99            )
100
101        self.ionization_type = self.net_cdf_obj.test_ionization_mode
102        self.experiment_type = self.net_cdf_obj.experiment_type
103        self.list_scans = range(
104            len(self.net_cdf_obj.variables.get("actual_scan_number")[:])
105        )
106        self.initial_scan_number = self.list_scans[0]
107        self.final_scan_number = self.list_scans[-1]
108        self.analyzer = analyzer
109        self.instrument_label = instrument_label
110        self.gcms = GCMSBase(self.file_location, analyzer, instrument_label)
111
112    @property
113    def polarity(self):
114        """
115        Get the polarity of the ionization.
116
117        """
118        polarity = str(self.net_cdf_obj.test_ionization_polarity)
119        if polarity == "Positive Polarity":
120            return +1
121        else:
122            return -1
123
124    def get_mass_spectrum(self, mz, abun, rp, d_params):
125        """
126        Add a mass spectrum to the GCMSBase object.
127
128        Parameters
129        -----------
130        mz : array-like
131                The m/z values of the mass spectrum.
132        abun : array-like
133                The abundance values of the mass spectrum.
134        rp : array-like
135                The resolution values of the mass spectrum.
136        d_params : dict
137                Additional parameters for the mass spectrum.
138
139        """
140        data_dict = {
141            Labels.mz: mz,
142            Labels.abundance: abun,
143            Labels.rp: rp,
144            Labels.s2n: None,
145        }
146        mass_spec = MassSpecCentroidLowRes(data_dict, d_params)
147        self.gcms.add_mass_spectrum(mass_spec)
148
149    def run(self):
150        """
151        Populate the GCMSBase object with mass spectra data.
152        """
153        d_parameters = default_parameters(self.file_location)
154        self.import_mass_spectra(d_parameters)
155
156    def import_mass_spectra(self, d_params):
157        """
158        Import mass spectra data from the AndiNetCDF file.
159
160        Parameters
161        -----------
162        d_params : dict
163                Additional parameters for the mass spectra.
164
165        """
166        ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:]
167        list_tic = self.net_cdf_obj.variables.get("total_intensity")[:]
168        list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60
169        mass_values = self.net_cdf_obj.variables.get("mass_values")[:]
170        intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:]
171        resolution = self.net_cdf_obj.variables.get("resolution")[:]
172        individual_rp = len(mass_values) == len(resolution)
173        finish_location = -1
174        for scan_index in self.list_scans:
175            datapoints = ms_datapoints_per_scans[scan_index]
176            finish_location += datapoints
177            start_location = finish_location - datapoints + 1
178            d_params["rt"] = list_rt[scan_index]
179            d_params["scan_number"] = scan_index
180            d_params["label"] = Labels.gcms_centroid
181            d_params["polarity"] = self.polarity
182            d_params["analyzer"] = self.analyzer
183            d_params["instrument_label"] = self.instrument_label
184            mz = mass_values[start_location:finish_location]
185            abun = intensity_values[start_location:finish_location]
186            if individual_rp:
187                rp = resolution[start_location:finish_location]
188            else:
189                rp = [resolution[scan_index]] * datapoints
190            self.get_mass_spectrum(mz, abun, rp, d_params)
191        self.gcms.retention_time = list_rt
192        self.gcms.tic = list_tic
193        self.gcms.scans_number = self.list_scans
194
195    def get_gcms_obj(self):
196        """
197        Get the GCMSBase object.
198
199        """
200        return self.gcms
class ReadAndiNetCDF(threading.Thread):
 18class ReadAndiNetCDF(Thread):
 19    """
 20    A class for reading AndiNetCDF files and extracting mass spectra data.
 21
 22    Parameters
 23    -----------
 24    file_location : str or Path
 25            The location of the AndiNetCDF file.
 26    analyzer : str, optional
 27            The type of analyzer used (default is 'Quadruple').
 28    instrument_label : str, optional
 29            The label of the instrument (default is 'GCMS-Agilent').
 30    auto_process : bool, optional
 31            Whether to automatically process the data (default is True).
 32
 33    Attributes
 34    -----------
 35    file_location : Path
 36            The path to the AndiNetCDF file.
 37    net_cdf_obj : Dataset
 38            The NetCDF dataset object.
 39    ionization_type : str
 40            The ionization type used in the experiment.
 41    experiment_type : str
 42            The type of experiment.
 43    list_scans : range
 44            The range of scan numbers in the dataset.
 45    initial_scan_number : int
 46            The number of the initial scan.
 47    final_scan_number : int
 48            The number of the final scan.
 49    analyzer : str
 50            The type of analyzer used.
 51    instrument_label : str
 52            The label of the instrument.
 53    gcms : GCMSBase
 54            The GCMSBase object for storing mass spectra data.
 55
 56    Methods
 57    --------
 58    * polarity().
 59            Get the polarity of the ionization.
 60    * get_mass_spectrum(mz, abun, rp, d_params).
 61            Add a mass spectrum to the GCMSBase object.
 62    * run().
 63            Populate the GCMSBase object with mass spectra data.
 64    * import_mass_spectra(d_params).
 65            Import mass spectra data from the AndiNetCDF file.
 66    * get_gcms_obj().
 67            Get the GCMSBase object.
 68
 69    """
 70
 71    def __init__(
 72        self,
 73        file_location: str | Path,
 74        analyzer="Quadruple",
 75        instrument_label="GCMS-Agilent",
 76        auto_process=True,
 77    ):
 78        Thread.__init__(self)
 79
 80        if isinstance(file_location, str):
 81            self.file_location = Path(file_location)
 82        else:
 83            self.file_location = file_location
 84
 85        if not self.file_location.exists():
 86            raise FileNotFoundError("File does not exist at %s", file_location)
 87
 88        if isinstance(file_location, S3Path):
 89            bytes_io = self.file_location.open("rb").read()
 90            self.net_cdf_obj = Dataset(
 91                self.file_location.name,
 92                "r",
 93                diskless=True,
 94                memory=bytes_io,
 95                format="NETCDF3_CLASSIC",
 96            )
 97        else:
 98            self.net_cdf_obj = Dataset(
 99                self.file_location, "r", format="NETCDF3_CLASSIC"
100            )
101
102        self.ionization_type = self.net_cdf_obj.test_ionization_mode
103        self.experiment_type = self.net_cdf_obj.experiment_type
104        self.list_scans = range(
105            len(self.net_cdf_obj.variables.get("actual_scan_number")[:])
106        )
107        self.initial_scan_number = self.list_scans[0]
108        self.final_scan_number = self.list_scans[-1]
109        self.analyzer = analyzer
110        self.instrument_label = instrument_label
111        self.gcms = GCMSBase(self.file_location, analyzer, instrument_label)
112
113    @property
114    def polarity(self):
115        """
116        Get the polarity of the ionization.
117
118        """
119        polarity = str(self.net_cdf_obj.test_ionization_polarity)
120        if polarity == "Positive Polarity":
121            return +1
122        else:
123            return -1
124
125    def get_mass_spectrum(self, mz, abun, rp, d_params):
126        """
127        Add a mass spectrum to the GCMSBase object.
128
129        Parameters
130        -----------
131        mz : array-like
132                The m/z values of the mass spectrum.
133        abun : array-like
134                The abundance values of the mass spectrum.
135        rp : array-like
136                The resolution values of the mass spectrum.
137        d_params : dict
138                Additional parameters for the mass spectrum.
139
140        """
141        data_dict = {
142            Labels.mz: mz,
143            Labels.abundance: abun,
144            Labels.rp: rp,
145            Labels.s2n: None,
146        }
147        mass_spec = MassSpecCentroidLowRes(data_dict, d_params)
148        self.gcms.add_mass_spectrum(mass_spec)
149
150    def run(self):
151        """
152        Populate the GCMSBase object with mass spectra data.
153        """
154        d_parameters = default_parameters(self.file_location)
155        self.import_mass_spectra(d_parameters)
156
157    def import_mass_spectra(self, d_params):
158        """
159        Import mass spectra data from the AndiNetCDF file.
160
161        Parameters
162        -----------
163        d_params : dict
164                Additional parameters for the mass spectra.
165
166        """
167        ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:]
168        list_tic = self.net_cdf_obj.variables.get("total_intensity")[:]
169        list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60
170        mass_values = self.net_cdf_obj.variables.get("mass_values")[:]
171        intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:]
172        resolution = self.net_cdf_obj.variables.get("resolution")[:]
173        individual_rp = len(mass_values) == len(resolution)
174        finish_location = -1
175        for scan_index in self.list_scans:
176            datapoints = ms_datapoints_per_scans[scan_index]
177            finish_location += datapoints
178            start_location = finish_location - datapoints + 1
179            d_params["rt"] = list_rt[scan_index]
180            d_params["scan_number"] = scan_index
181            d_params["label"] = Labels.gcms_centroid
182            d_params["polarity"] = self.polarity
183            d_params["analyzer"] = self.analyzer
184            d_params["instrument_label"] = self.instrument_label
185            mz = mass_values[start_location:finish_location]
186            abun = intensity_values[start_location:finish_location]
187            if individual_rp:
188                rp = resolution[start_location:finish_location]
189            else:
190                rp = [resolution[scan_index]] * datapoints
191            self.get_mass_spectrum(mz, abun, rp, d_params)
192        self.gcms.retention_time = list_rt
193        self.gcms.tic = list_tic
194        self.gcms.scans_number = self.list_scans
195
196    def get_gcms_obj(self):
197        """
198        Get the GCMSBase object.
199
200        """
201        return self.gcms

A class for reading AndiNetCDF files and extracting mass spectra data.

Parameters
  • file_location (str or Path): The location of the AndiNetCDF file.
  • analyzer (str, optional): The type of analyzer used (default is 'Quadruple').
  • instrument_label (str, optional): The label of the instrument (default is 'GCMS-Agilent').
  • auto_process (bool, optional): Whether to automatically process the data (default is True).
Attributes
  • file_location (Path): The path to the AndiNetCDF file.
  • net_cdf_obj (Dataset): The NetCDF dataset object.
  • ionization_type (str): The ionization type used in the experiment.
  • experiment_type (str): The type of experiment.
  • list_scans (range): The range of scan numbers in the dataset.
  • initial_scan_number (int): The number of the initial scan.
  • final_scan_number (int): The number of the final scan.
  • analyzer (str): The type of analyzer used.
  • instrument_label (str): The label of the instrument.
  • gcms (GCMSBase): The GCMSBase object for storing mass spectra data.
Methods
  • polarity(). Get the polarity of the ionization.
  • get_mass_spectrum(mz, abun, rp, d_params). Add a mass spectrum to the GCMSBase object.
  • run(). Populate the GCMSBase object with mass spectra data.
  • import_mass_spectra(d_params). Import mass spectra data from the AndiNetCDF file.
  • get_gcms_obj(). Get the GCMSBase object.
ReadAndiNetCDF( file_location: str | pathlib.Path, analyzer='Quadruple', instrument_label='GCMS-Agilent', auto_process=True)
 71    def __init__(
 72        self,
 73        file_location: str | Path,
 74        analyzer="Quadruple",
 75        instrument_label="GCMS-Agilent",
 76        auto_process=True,
 77    ):
 78        Thread.__init__(self)
 79
 80        if isinstance(file_location, str):
 81            self.file_location = Path(file_location)
 82        else:
 83            self.file_location = file_location
 84
 85        if not self.file_location.exists():
 86            raise FileNotFoundError("File does not exist at %s", file_location)
 87
 88        if isinstance(file_location, S3Path):
 89            bytes_io = self.file_location.open("rb").read()
 90            self.net_cdf_obj = Dataset(
 91                self.file_location.name,
 92                "r",
 93                diskless=True,
 94                memory=bytes_io,
 95                format="NETCDF3_CLASSIC",
 96            )
 97        else:
 98            self.net_cdf_obj = Dataset(
 99                self.file_location, "r", format="NETCDF3_CLASSIC"
100            )
101
102        self.ionization_type = self.net_cdf_obj.test_ionization_mode
103        self.experiment_type = self.net_cdf_obj.experiment_type
104        self.list_scans = range(
105            len(self.net_cdf_obj.variables.get("actual_scan_number")[:])
106        )
107        self.initial_scan_number = self.list_scans[0]
108        self.final_scan_number = self.list_scans[-1]
109        self.analyzer = analyzer
110        self.instrument_label = instrument_label
111        self.gcms = GCMSBase(self.file_location, analyzer, instrument_label)

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

ionization_type
experiment_type
list_scans
initial_scan_number
final_scan_number
analyzer
instrument_label
gcms
polarity

Get the polarity of the ionization.

def get_mass_spectrum(self, mz, abun, rp, d_params):
125    def get_mass_spectrum(self, mz, abun, rp, d_params):
126        """
127        Add a mass spectrum to the GCMSBase object.
128
129        Parameters
130        -----------
131        mz : array-like
132                The m/z values of the mass spectrum.
133        abun : array-like
134                The abundance values of the mass spectrum.
135        rp : array-like
136                The resolution values of the mass spectrum.
137        d_params : dict
138                Additional parameters for the mass spectrum.
139
140        """
141        data_dict = {
142            Labels.mz: mz,
143            Labels.abundance: abun,
144            Labels.rp: rp,
145            Labels.s2n: None,
146        }
147        mass_spec = MassSpecCentroidLowRes(data_dict, d_params)
148        self.gcms.add_mass_spectrum(mass_spec)

Add a mass spectrum to the GCMSBase object.

Parameters
  • mz (array-like): The m/z values of the mass spectrum.
  • abun (array-like): The abundance values of the mass spectrum.
  • rp (array-like): The resolution values of the mass spectrum.
  • d_params (dict): Additional parameters for the mass spectrum.
def run(self):
150    def run(self):
151        """
152        Populate the GCMSBase object with mass spectra data.
153        """
154        d_parameters = default_parameters(self.file_location)
155        self.import_mass_spectra(d_parameters)

Populate the GCMSBase object with mass spectra data.

def import_mass_spectra(self, d_params):
157    def import_mass_spectra(self, d_params):
158        """
159        Import mass spectra data from the AndiNetCDF file.
160
161        Parameters
162        -----------
163        d_params : dict
164                Additional parameters for the mass spectra.
165
166        """
167        ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:]
168        list_tic = self.net_cdf_obj.variables.get("total_intensity")[:]
169        list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60
170        mass_values = self.net_cdf_obj.variables.get("mass_values")[:]
171        intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:]
172        resolution = self.net_cdf_obj.variables.get("resolution")[:]
173        individual_rp = len(mass_values) == len(resolution)
174        finish_location = -1
175        for scan_index in self.list_scans:
176            datapoints = ms_datapoints_per_scans[scan_index]
177            finish_location += datapoints
178            start_location = finish_location - datapoints + 1
179            d_params["rt"] = list_rt[scan_index]
180            d_params["scan_number"] = scan_index
181            d_params["label"] = Labels.gcms_centroid
182            d_params["polarity"] = self.polarity
183            d_params["analyzer"] = self.analyzer
184            d_params["instrument_label"] = self.instrument_label
185            mz = mass_values[start_location:finish_location]
186            abun = intensity_values[start_location:finish_location]
187            if individual_rp:
188                rp = resolution[start_location:finish_location]
189            else:
190                rp = [resolution[scan_index]] * datapoints
191            self.get_mass_spectrum(mz, abun, rp, d_params)
192        self.gcms.retention_time = list_rt
193        self.gcms.tic = list_tic
194        self.gcms.scans_number = self.list_scans

Import mass spectra data from the AndiNetCDF file.

Parameters
  • d_params (dict): Additional parameters for the mass spectra.
def get_gcms_obj(self):
196    def get_gcms_obj(self):
197        """
198        Get the GCMSBase object.
199
200        """
201        return self.gcms

Get the GCMSBase object.

Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id