corems.mass_spectra.input.andiNetCDF
1__author__ = "Yuri E. Corilo" 2__date__ = "Feb 12, 2020" 3 4from pathlib import Path 5from threading import Thread 6# from io import BytesIO 7 8from netCDF4 import Dataset 9from s3path import S3Path 10 11from corems.encapsulation.constant import Labels 12from corems.encapsulation.factory.parameters import default_parameters 13from corems.mass_spectra.factory.GC_Class import GCMSBase 14from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecCentroidLowRes 15 16 17class ReadAndiNetCDF(Thread): 18 """ 19 A class for reading AndiNetCDF files and extracting mass spectra data. 20 21 Parameters 22 ----------- 23 file_location : str or Path 24 The location of the AndiNetCDF file. 25 analyzer : str, optional 26 The type of analyzer used (default is 'Quadruple'). 27 instrument_label : str, optional 28 The label of the instrument (default is 'GCMS-Agilent'). 29 auto_process : bool, optional 30 Whether to automatically process the data (default is True). 31 32 Attributes 33 ----------- 34 file_location : Path 35 The path to the AndiNetCDF file. 36 net_cdf_obj : Dataset 37 The NetCDF dataset object. 38 ionization_type : str 39 The ionization type used in the experiment. 40 experiment_type : str 41 The type of experiment. 42 list_scans : range 43 The range of scan numbers in the dataset. 44 initial_scan_number : int 45 The number of the initial scan. 46 final_scan_number : int 47 The number of the final scan. 48 analyzer : str 49 The type of analyzer used. 50 instrument_label : str 51 The label of the instrument. 52 gcms : GCMSBase 53 The GCMSBase object for storing mass spectra data. 54 55 Methods 56 -------- 57 * polarity(). 58 Get the polarity of the ionization. 59 * get_mass_spectrum(mz, abun, rp, d_params). 60 Add a mass spectrum to the GCMSBase object. 61 * run(). 62 Populate the GCMSBase object with mass spectra data. 63 * import_mass_spectra(d_params). 64 Import mass spectra data from the AndiNetCDF file. 65 * get_gcms_obj(). 66 Get the GCMSBase object. 67 68 """ 69 70 def __init__( 71 self, 72 file_location: str | Path, 73 analyzer="Quadruple", 74 instrument_label="GCMS-Agilent", 75 auto_process=True, 76 ): 77 Thread.__init__(self) 78 79 if isinstance(file_location, str): 80 self.file_location = Path(file_location) 81 else: 82 self.file_location = file_location 83 84 if not self.file_location.exists(): 85 raise FileNotFoundError("File does not exist at %s", file_location) 86 87 if isinstance(file_location, S3Path): 88 bytes_io = self.file_location.open("rb").read() 89 self.net_cdf_obj = Dataset( 90 self.file_location.name, 91 "r", 92 diskless=True, 93 memory=bytes_io, 94 format="NETCDF3_CLASSIC", 95 ) 96 else: 97 self.net_cdf_obj = Dataset( 98 self.file_location, "r", format="NETCDF3_CLASSIC" 99 ) 100 101 self.ionization_type = self.net_cdf_obj.test_ionization_mode 102 self.experiment_type = self.net_cdf_obj.experiment_type 103 self.list_scans = range( 104 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) 105 ) 106 self.initial_scan_number = self.list_scans[0] 107 self.final_scan_number = self.list_scans[-1] 108 self.analyzer = analyzer 109 self.instrument_label = instrument_label 110 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) 111 112 @property 113 def polarity(self): 114 """ 115 Get the polarity of the ionization. 116 117 """ 118 polarity = str(self.net_cdf_obj.test_ionization_polarity) 119 if polarity == "Positive Polarity": 120 return +1 121 else: 122 return -1 123 124 def get_mass_spectrum(self, mz, abun, rp, d_params): 125 """ 126 Add a mass spectrum to the GCMSBase object. 127 128 Parameters 129 ----------- 130 mz : array-like 131 The m/z values of the mass spectrum. 132 abun : array-like 133 The abundance values of the mass spectrum. 134 rp : array-like 135 The resolution values of the mass spectrum. 136 d_params : dict 137 Additional parameters for the mass spectrum. 138 139 """ 140 data_dict = { 141 Labels.mz: mz, 142 Labels.abundance: abun, 143 Labels.rp: rp, 144 Labels.s2n: None, 145 } 146 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) 147 self.gcms.add_mass_spectrum(mass_spec) 148 149 def run(self): 150 """ 151 Populate the GCMSBase object with mass spectra data. 152 """ 153 d_parameters = default_parameters(self.file_location) 154 self.import_mass_spectra(d_parameters) 155 156 def import_mass_spectra(self, d_params): 157 """ 158 Import mass spectra data from the AndiNetCDF file. 159 160 Parameters 161 ----------- 162 d_params : dict 163 Additional parameters for the mass spectra. 164 165 """ 166 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] 167 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] 168 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 169 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] 170 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] 171 resolution = self.net_cdf_obj.variables.get("resolution")[:] 172 individual_rp = len(mass_values) == len(resolution) 173 finish_location = -1 174 for scan_index in self.list_scans: 175 datapoints = ms_datapoints_per_scans[scan_index] 176 finish_location += datapoints 177 start_location = finish_location - datapoints + 1 178 d_params["rt"] = list_rt[scan_index] 179 d_params["scan_number"] = scan_index 180 d_params["label"] = Labels.gcms_centroid 181 d_params["polarity"] = self.polarity 182 d_params["analyzer"] = self.analyzer 183 d_params["instrument_label"] = self.instrument_label 184 mz = mass_values[start_location:finish_location] 185 abun = intensity_values[start_location:finish_location] 186 if individual_rp: 187 rp = resolution[start_location:finish_location] 188 else: 189 rp = [resolution[scan_index]] * datapoints 190 self.get_mass_spectrum(mz, abun, rp, d_params) 191 self.gcms.retention_time = list_rt 192 self.gcms.tic = list_tic 193 self.gcms.scans_number = self.list_scans 194 195 def get_gcms_obj(self): 196 """ 197 Get the GCMSBase object. 198 199 """ 200 return self.gcms
18class ReadAndiNetCDF(Thread): 19 """ 20 A class for reading AndiNetCDF files and extracting mass spectra data. 21 22 Parameters 23 ----------- 24 file_location : str or Path 25 The location of the AndiNetCDF file. 26 analyzer : str, optional 27 The type of analyzer used (default is 'Quadruple'). 28 instrument_label : str, optional 29 The label of the instrument (default is 'GCMS-Agilent'). 30 auto_process : bool, optional 31 Whether to automatically process the data (default is True). 32 33 Attributes 34 ----------- 35 file_location : Path 36 The path to the AndiNetCDF file. 37 net_cdf_obj : Dataset 38 The NetCDF dataset object. 39 ionization_type : str 40 The ionization type used in the experiment. 41 experiment_type : str 42 The type of experiment. 43 list_scans : range 44 The range of scan numbers in the dataset. 45 initial_scan_number : int 46 The number of the initial scan. 47 final_scan_number : int 48 The number of the final scan. 49 analyzer : str 50 The type of analyzer used. 51 instrument_label : str 52 The label of the instrument. 53 gcms : GCMSBase 54 The GCMSBase object for storing mass spectra data. 55 56 Methods 57 -------- 58 * polarity(). 59 Get the polarity of the ionization. 60 * get_mass_spectrum(mz, abun, rp, d_params). 61 Add a mass spectrum to the GCMSBase object. 62 * run(). 63 Populate the GCMSBase object with mass spectra data. 64 * import_mass_spectra(d_params). 65 Import mass spectra data from the AndiNetCDF file. 66 * get_gcms_obj(). 67 Get the GCMSBase object. 68 69 """ 70 71 def __init__( 72 self, 73 file_location: str | Path, 74 analyzer="Quadruple", 75 instrument_label="GCMS-Agilent", 76 auto_process=True, 77 ): 78 Thread.__init__(self) 79 80 if isinstance(file_location, str): 81 self.file_location = Path(file_location) 82 else: 83 self.file_location = file_location 84 85 if not self.file_location.exists(): 86 raise FileNotFoundError("File does not exist at %s", file_location) 87 88 if isinstance(file_location, S3Path): 89 bytes_io = self.file_location.open("rb").read() 90 self.net_cdf_obj = Dataset( 91 self.file_location.name, 92 "r", 93 diskless=True, 94 memory=bytes_io, 95 format="NETCDF3_CLASSIC", 96 ) 97 else: 98 self.net_cdf_obj = Dataset( 99 self.file_location, "r", format="NETCDF3_CLASSIC" 100 ) 101 102 self.ionization_type = self.net_cdf_obj.test_ionization_mode 103 self.experiment_type = self.net_cdf_obj.experiment_type 104 self.list_scans = range( 105 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) 106 ) 107 self.initial_scan_number = self.list_scans[0] 108 self.final_scan_number = self.list_scans[-1] 109 self.analyzer = analyzer 110 self.instrument_label = instrument_label 111 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) 112 113 @property 114 def polarity(self): 115 """ 116 Get the polarity of the ionization. 117 118 """ 119 polarity = str(self.net_cdf_obj.test_ionization_polarity) 120 if polarity == "Positive Polarity": 121 return +1 122 else: 123 return -1 124 125 def get_mass_spectrum(self, mz, abun, rp, d_params): 126 """ 127 Add a mass spectrum to the GCMSBase object. 128 129 Parameters 130 ----------- 131 mz : array-like 132 The m/z values of the mass spectrum. 133 abun : array-like 134 The abundance values of the mass spectrum. 135 rp : array-like 136 The resolution values of the mass spectrum. 137 d_params : dict 138 Additional parameters for the mass spectrum. 139 140 """ 141 data_dict = { 142 Labels.mz: mz, 143 Labels.abundance: abun, 144 Labels.rp: rp, 145 Labels.s2n: None, 146 } 147 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) 148 self.gcms.add_mass_spectrum(mass_spec) 149 150 def run(self): 151 """ 152 Populate the GCMSBase object with mass spectra data. 153 """ 154 d_parameters = default_parameters(self.file_location) 155 self.import_mass_spectra(d_parameters) 156 157 def import_mass_spectra(self, d_params): 158 """ 159 Import mass spectra data from the AndiNetCDF file. 160 161 Parameters 162 ----------- 163 d_params : dict 164 Additional parameters for the mass spectra. 165 166 """ 167 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] 168 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] 169 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 170 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] 171 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] 172 resolution = self.net_cdf_obj.variables.get("resolution")[:] 173 individual_rp = len(mass_values) == len(resolution) 174 finish_location = -1 175 for scan_index in self.list_scans: 176 datapoints = ms_datapoints_per_scans[scan_index] 177 finish_location += datapoints 178 start_location = finish_location - datapoints + 1 179 d_params["rt"] = list_rt[scan_index] 180 d_params["scan_number"] = scan_index 181 d_params["label"] = Labels.gcms_centroid 182 d_params["polarity"] = self.polarity 183 d_params["analyzer"] = self.analyzer 184 d_params["instrument_label"] = self.instrument_label 185 mz = mass_values[start_location:finish_location] 186 abun = intensity_values[start_location:finish_location] 187 if individual_rp: 188 rp = resolution[start_location:finish_location] 189 else: 190 rp = [resolution[scan_index]] * datapoints 191 self.get_mass_spectrum(mz, abun, rp, d_params) 192 self.gcms.retention_time = list_rt 193 self.gcms.tic = list_tic 194 self.gcms.scans_number = self.list_scans 195 196 def get_gcms_obj(self): 197 """ 198 Get the GCMSBase object. 199 200 """ 201 return self.gcms
A class for reading AndiNetCDF files and extracting mass spectra data.
Parameters
- file_location (str or Path): The location of the AndiNetCDF file.
- analyzer (str, optional): The type of analyzer used (default is 'Quadruple').
- instrument_label (str, optional): The label of the instrument (default is 'GCMS-Agilent').
- auto_process (bool, optional): Whether to automatically process the data (default is True).
Attributes
- file_location (Path): The path to the AndiNetCDF file.
- net_cdf_obj (Dataset): The NetCDF dataset object.
- ionization_type (str): The ionization type used in the experiment.
- experiment_type (str): The type of experiment.
- list_scans (range): The range of scan numbers in the dataset.
- initial_scan_number (int): The number of the initial scan.
- final_scan_number (int): The number of the final scan.
- analyzer (str): The type of analyzer used.
- instrument_label (str): The label of the instrument.
- gcms (GCMSBase): The GCMSBase object for storing mass spectra data.
Methods
- polarity(). Get the polarity of the ionization.
- get_mass_spectrum(mz, abun, rp, d_params). Add a mass spectrum to the GCMSBase object.
- run(). Populate the GCMSBase object with mass spectra data.
- import_mass_spectra(d_params). Import mass spectra data from the AndiNetCDF file.
- get_gcms_obj(). Get the GCMSBase object.
71 def __init__( 72 self, 73 file_location: str | Path, 74 analyzer="Quadruple", 75 instrument_label="GCMS-Agilent", 76 auto_process=True, 77 ): 78 Thread.__init__(self) 79 80 if isinstance(file_location, str): 81 self.file_location = Path(file_location) 82 else: 83 self.file_location = file_location 84 85 if not self.file_location.exists(): 86 raise FileNotFoundError("File does not exist at %s", file_location) 87 88 if isinstance(file_location, S3Path): 89 bytes_io = self.file_location.open("rb").read() 90 self.net_cdf_obj = Dataset( 91 self.file_location.name, 92 "r", 93 diskless=True, 94 memory=bytes_io, 95 format="NETCDF3_CLASSIC", 96 ) 97 else: 98 self.net_cdf_obj = Dataset( 99 self.file_location, "r", format="NETCDF3_CLASSIC" 100 ) 101 102 self.ionization_type = self.net_cdf_obj.test_ionization_mode 103 self.experiment_type = self.net_cdf_obj.experiment_type 104 self.list_scans = range( 105 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) 106 ) 107 self.initial_scan_number = self.list_scans[0] 108 self.final_scan_number = self.list_scans[-1] 109 self.analyzer = analyzer 110 self.instrument_label = instrument_label 111 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label)
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
125 def get_mass_spectrum(self, mz, abun, rp, d_params): 126 """ 127 Add a mass spectrum to the GCMSBase object. 128 129 Parameters 130 ----------- 131 mz : array-like 132 The m/z values of the mass spectrum. 133 abun : array-like 134 The abundance values of the mass spectrum. 135 rp : array-like 136 The resolution values of the mass spectrum. 137 d_params : dict 138 Additional parameters for the mass spectrum. 139 140 """ 141 data_dict = { 142 Labels.mz: mz, 143 Labels.abundance: abun, 144 Labels.rp: rp, 145 Labels.s2n: None, 146 } 147 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) 148 self.gcms.add_mass_spectrum(mass_spec)
Add a mass spectrum to the GCMSBase object.
Parameters
- mz (array-like): The m/z values of the mass spectrum.
- abun (array-like): The abundance values of the mass spectrum.
- rp (array-like): The resolution values of the mass spectrum.
- d_params (dict): Additional parameters for the mass spectrum.
150 def run(self): 151 """ 152 Populate the GCMSBase object with mass spectra data. 153 """ 154 d_parameters = default_parameters(self.file_location) 155 self.import_mass_spectra(d_parameters)
Populate the GCMSBase object with mass spectra data.
157 def import_mass_spectra(self, d_params): 158 """ 159 Import mass spectra data from the AndiNetCDF file. 160 161 Parameters 162 ----------- 163 d_params : dict 164 Additional parameters for the mass spectra. 165 166 """ 167 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] 168 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] 169 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 170 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] 171 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] 172 resolution = self.net_cdf_obj.variables.get("resolution")[:] 173 individual_rp = len(mass_values) == len(resolution) 174 finish_location = -1 175 for scan_index in self.list_scans: 176 datapoints = ms_datapoints_per_scans[scan_index] 177 finish_location += datapoints 178 start_location = finish_location - datapoints + 1 179 d_params["rt"] = list_rt[scan_index] 180 d_params["scan_number"] = scan_index 181 d_params["label"] = Labels.gcms_centroid 182 d_params["polarity"] = self.polarity 183 d_params["analyzer"] = self.analyzer 184 d_params["instrument_label"] = self.instrument_label 185 mz = mass_values[start_location:finish_location] 186 abun = intensity_values[start_location:finish_location] 187 if individual_rp: 188 rp = resolution[start_location:finish_location] 189 else: 190 rp = [resolution[scan_index]] * datapoints 191 self.get_mass_spectrum(mz, abun, rp, d_params) 192 self.gcms.retention_time = list_rt 193 self.gcms.tic = list_tic 194 self.gcms.scans_number = self.list_scans
Import mass spectra data from the AndiNetCDF file.
Parameters
- d_params (dict): Additional parameters for the mass spectra.
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id