corems.mass_spectra.input.boosterHDF5
1__author__ = "Yuri E. Corilo" 2__date__ = "Oct 29, 2019" 3 4from threading import Thread 5from pathlib import Path 6from io import BytesIO 7 8import h5py 9from s3path import S3Path 10 11from corems.encapsulation.constant import Labels 12from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile 13from corems.mass_spectra.factory.lc_class import LCMSBase 14from corems.encapsulation.factory.parameters import default_parameters 15 16 17class ReadHDF_BoosterMassSpectra(Thread): 18 """ 19 Class for reading HDF5 files containing booster mass spectra. 20 21 Parameters 22 ---------- 23 file_location : Path or S3Path 24 The full path to the HDF5 file. 25 analyzer : str, optional 26 The type of analyzer used for the mass spectra. Defaults to "ICR". 27 instrument_label : str, optional 28 The label of the instrument. Defaults to "21T". 29 auto_process : bool, optional 30 Whether to automatically process the mass spectra. Defaults to True. 31 """ 32 33 def __init__( 34 self, 35 file_location: Path | S3Path, 36 analyzer="ICR", 37 instrument_label="21T", 38 auto_process=True, 39 ): 40 """ 41 Initialize the ReadHDF_BoosterMassSpectra class. 42 43 Parameters 44 ---------- 45 file_location : Path or S3Path 46 The full path to the HDF5 file. 47 analyzer : str, optional 48 The type of analyzer used for the mass spectra. Defaults to "ICR". 49 instrument_label : str, optional 50 The label of the instrument. Defaults to "21T". 51 auto_process : bool, optional 52 Whether to automatically process the mass spectra. Defaults to True. 53 """ 54 Thread.__init__(self) 55 56 if isinstance(file_location, str): 57 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 58 self.file_location = Path(file_location) 59 60 self.lcms = LCMSBase( 61 file_location, analyzer=analyzer, instrument_label=instrument_label 62 ) 63 64 if isinstance(file_location, S3Path): 65 data = BytesIO(file_location.open("rb").read()) 66 else: 67 data = file_location 68 69 self.hdf_obj = h5py.File(data, "r") 70 71 self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) 72 73 self.initial_scan_number = self.list_scans[0] 74 75 self.final_scan_number = self.list_scans[-1] 76 77 self.file_location = file_location 78 79 self.auto_process = True 80 81 self.analyzer = analyzer 82 83 self.instrument_label = instrument_label 84 85 def get_polarity(self, file_location: Path | S3Path, scan: int): 86 """ 87 Get the polarity of a scan. 88 89 Parameters 90 ---------- 91 file_location : Path or S3Path 92 The full path to the HDF5 file. 93 scan : int 94 The scan number. 95 96 """ 97 if isinstance(file_location, S3Path): 98 data = BytesIO(file_location.open("rb").read()) 99 else: 100 data = file_location 101 102 self.h5pydata = h5py.File(data, "r") 103 104 self.scans = list(self.h5pydata.keys()) 105 106 polarity = self.get_attr_data(scan, "r_h_polarity") 107 108 if polarity == "negative scan": 109 return -1 110 else: 111 return +1 112 113 def get_attr_data(self, scan, attr_srt): 114 """ 115 Get the attribute data of a scan. 116 117 Parameters 118 ---------- 119 scan : int 120 The scan number. 121 attr_srt : str 122 The attribute name. 123 124 """ 125 return self.hdf_obj[str(scan)].attrs[attr_srt] 126 127 def import_mass_spectra(self, d_params: dict): 128 """ 129 Import the mass spectra from the HDF5 file. 130 131 Parameters 132 ---------- 133 d_params : dict 134 The parameters for importing the mass spectra. 135 """ 136 list_rt, list_tic = list(), list() 137 138 for scan_number in self.list_scans: 139 d_params["rt"] = list_rt.append( 140 self.get_attr_data(scan_number, "r_h_start_time") 141 ) 142 143 d_params["scan_number"] = scan_number 144 145 d_params["label"] = Labels.booster_profile 146 147 d_params["polarity"] = self.get_polarity(self.file_location, scan_number) 148 149 d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0] 150 151 d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1] 152 153 d_params["analyzer"] = self.analyzer 154 155 d_params["instrument_label"] = self.instrument_label 156 157 list_rt.append(d_params["rt"]) 158 159 list_tic.append(self.get_attr_data(scan_number, "r_h_tic")) 160 161 mass_spec = self.get_mass_spectrum(scan_number, d_params) 162 163 self.lcms.add_mass_spectrum(mass_spec) 164 165 self.lcms.retention_time = list_rt 166 self.lcms.tic = list_tic 167 self.lcms.scans_number = self.list_scans 168 169 def get_mass_spectrum(self, scan: int, d_params: dict): 170 """ 171 Get the mass spectrum for a scan. 172 173 Parameters 174 ---------- 175 scan : int 176 The scan number. 177 d_params : dict 178 The parameters for creating the mass spectrum. 179 180 """ 181 booster_data = self.hdf_obj[str(scan)] 182 183 if booster_data.shape[0] != 2: 184 raise NotImplementedError( 185 "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum" 186 ) 187 else: 188 data_dict = { 189 Labels.mz: booster_data[0], 190 Labels.abundance: booster_data[1], 191 Labels.rp: None, 192 Labels.s2n: None, 193 } 194 195 mass_spec = MassSpecProfile( 196 data_dict, d_params, auto_process=self.auto_process 197 ) 198 199 return mass_spec 200 201 def run(self): 202 """ 203 Run the thread to create the LCMS object. 204 """ 205 d_parameters = default_parameters(self.file_location) 206 self.import_mass_spectra(d_parameters) 207 208 def get_lcms_obj(self): 209 """ 210 Get the LCMS object. 211 212 """ 213 if len(self.lcms) > 0: 214 return self.lcms 215 else: 216 raise Exception("Returning an empty LCMS class")
class
ReadHDF_BoosterMassSpectra(threading.Thread):
18class ReadHDF_BoosterMassSpectra(Thread): 19 """ 20 Class for reading HDF5 files containing booster mass spectra. 21 22 Parameters 23 ---------- 24 file_location : Path or S3Path 25 The full path to the HDF5 file. 26 analyzer : str, optional 27 The type of analyzer used for the mass spectra. Defaults to "ICR". 28 instrument_label : str, optional 29 The label of the instrument. Defaults to "21T". 30 auto_process : bool, optional 31 Whether to automatically process the mass spectra. Defaults to True. 32 """ 33 34 def __init__( 35 self, 36 file_location: Path | S3Path, 37 analyzer="ICR", 38 instrument_label="21T", 39 auto_process=True, 40 ): 41 """ 42 Initialize the ReadHDF_BoosterMassSpectra class. 43 44 Parameters 45 ---------- 46 file_location : Path or S3Path 47 The full path to the HDF5 file. 48 analyzer : str, optional 49 The type of analyzer used for the mass spectra. Defaults to "ICR". 50 instrument_label : str, optional 51 The label of the instrument. Defaults to "21T". 52 auto_process : bool, optional 53 Whether to automatically process the mass spectra. Defaults to True. 54 """ 55 Thread.__init__(self) 56 57 if isinstance(file_location, str): 58 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 59 self.file_location = Path(file_location) 60 61 self.lcms = LCMSBase( 62 file_location, analyzer=analyzer, instrument_label=instrument_label 63 ) 64 65 if isinstance(file_location, S3Path): 66 data = BytesIO(file_location.open("rb").read()) 67 else: 68 data = file_location 69 70 self.hdf_obj = h5py.File(data, "r") 71 72 self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) 73 74 self.initial_scan_number = self.list_scans[0] 75 76 self.final_scan_number = self.list_scans[-1] 77 78 self.file_location = file_location 79 80 self.auto_process = True 81 82 self.analyzer = analyzer 83 84 self.instrument_label = instrument_label 85 86 def get_polarity(self, file_location: Path | S3Path, scan: int): 87 """ 88 Get the polarity of a scan. 89 90 Parameters 91 ---------- 92 file_location : Path or S3Path 93 The full path to the HDF5 file. 94 scan : int 95 The scan number. 96 97 """ 98 if isinstance(file_location, S3Path): 99 data = BytesIO(file_location.open("rb").read()) 100 else: 101 data = file_location 102 103 self.h5pydata = h5py.File(data, "r") 104 105 self.scans = list(self.h5pydata.keys()) 106 107 polarity = self.get_attr_data(scan, "r_h_polarity") 108 109 if polarity == "negative scan": 110 return -1 111 else: 112 return +1 113 114 def get_attr_data(self, scan, attr_srt): 115 """ 116 Get the attribute data of a scan. 117 118 Parameters 119 ---------- 120 scan : int 121 The scan number. 122 attr_srt : str 123 The attribute name. 124 125 """ 126 return self.hdf_obj[str(scan)].attrs[attr_srt] 127 128 def import_mass_spectra(self, d_params: dict): 129 """ 130 Import the mass spectra from the HDF5 file. 131 132 Parameters 133 ---------- 134 d_params : dict 135 The parameters for importing the mass spectra. 136 """ 137 list_rt, list_tic = list(), list() 138 139 for scan_number in self.list_scans: 140 d_params["rt"] = list_rt.append( 141 self.get_attr_data(scan_number, "r_h_start_time") 142 ) 143 144 d_params["scan_number"] = scan_number 145 146 d_params["label"] = Labels.booster_profile 147 148 d_params["polarity"] = self.get_polarity(self.file_location, scan_number) 149 150 d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0] 151 152 d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1] 153 154 d_params["analyzer"] = self.analyzer 155 156 d_params["instrument_label"] = self.instrument_label 157 158 list_rt.append(d_params["rt"]) 159 160 list_tic.append(self.get_attr_data(scan_number, "r_h_tic")) 161 162 mass_spec = self.get_mass_spectrum(scan_number, d_params) 163 164 self.lcms.add_mass_spectrum(mass_spec) 165 166 self.lcms.retention_time = list_rt 167 self.lcms.tic = list_tic 168 self.lcms.scans_number = self.list_scans 169 170 def get_mass_spectrum(self, scan: int, d_params: dict): 171 """ 172 Get the mass spectrum for a scan. 173 174 Parameters 175 ---------- 176 scan : int 177 The scan number. 178 d_params : dict 179 The parameters for creating the mass spectrum. 180 181 """ 182 booster_data = self.hdf_obj[str(scan)] 183 184 if booster_data.shape[0] != 2: 185 raise NotImplementedError( 186 "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum" 187 ) 188 else: 189 data_dict = { 190 Labels.mz: booster_data[0], 191 Labels.abundance: booster_data[1], 192 Labels.rp: None, 193 Labels.s2n: None, 194 } 195 196 mass_spec = MassSpecProfile( 197 data_dict, d_params, auto_process=self.auto_process 198 ) 199 200 return mass_spec 201 202 def run(self): 203 """ 204 Run the thread to create the LCMS object. 205 """ 206 d_parameters = default_parameters(self.file_location) 207 self.import_mass_spectra(d_parameters) 208 209 def get_lcms_obj(self): 210 """ 211 Get the LCMS object. 212 213 """ 214 if len(self.lcms) > 0: 215 return self.lcms 216 else: 217 raise Exception("Returning an empty LCMS class")
Class for reading HDF5 files containing booster mass spectra.
Parameters
- file_location (Path or S3Path): The full path to the HDF5 file.
- analyzer (str, optional): The type of analyzer used for the mass spectra. Defaults to "ICR".
- instrument_label (str, optional): The label of the instrument. Defaults to "21T".
- auto_process (bool, optional): Whether to automatically process the mass spectra. Defaults to True.
ReadHDF_BoosterMassSpectra( file_location: pathlib.Path | s3path.S3Path, analyzer='ICR', instrument_label='21T', auto_process=True)
34 def __init__( 35 self, 36 file_location: Path | S3Path, 37 analyzer="ICR", 38 instrument_label="21T", 39 auto_process=True, 40 ): 41 """ 42 Initialize the ReadHDF_BoosterMassSpectra class. 43 44 Parameters 45 ---------- 46 file_location : Path or S3Path 47 The full path to the HDF5 file. 48 analyzer : str, optional 49 The type of analyzer used for the mass spectra. Defaults to "ICR". 50 instrument_label : str, optional 51 The label of the instrument. Defaults to "21T". 52 auto_process : bool, optional 53 Whether to automatically process the mass spectra. Defaults to True. 54 """ 55 Thread.__init__(self) 56 57 if isinstance(file_location, str): 58 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 59 self.file_location = Path(file_location) 60 61 self.lcms = LCMSBase( 62 file_location, analyzer=analyzer, instrument_label=instrument_label 63 ) 64 65 if isinstance(file_location, S3Path): 66 data = BytesIO(file_location.open("rb").read()) 67 else: 68 data = file_location 69 70 self.hdf_obj = h5py.File(data, "r") 71 72 self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())]) 73 74 self.initial_scan_number = self.list_scans[0] 75 76 self.final_scan_number = self.list_scans[-1] 77 78 self.file_location = file_location 79 80 self.auto_process = True 81 82 self.analyzer = analyzer 83 84 self.instrument_label = instrument_label
Initialize the ReadHDF_BoosterMassSpectra class.
Parameters
- file_location (Path or S3Path): The full path to the HDF5 file.
- analyzer (str, optional): The type of analyzer used for the mass spectra. Defaults to "ICR".
- instrument_label (str, optional): The label of the instrument. Defaults to "21T".
- auto_process (bool, optional): Whether to automatically process the mass spectra. Defaults to True.
def
get_polarity(self, file_location: pathlib.Path | s3path.S3Path, scan: int):
86 def get_polarity(self, file_location: Path | S3Path, scan: int): 87 """ 88 Get the polarity of a scan. 89 90 Parameters 91 ---------- 92 file_location : Path or S3Path 93 The full path to the HDF5 file. 94 scan : int 95 The scan number. 96 97 """ 98 if isinstance(file_location, S3Path): 99 data = BytesIO(file_location.open("rb").read()) 100 else: 101 data = file_location 102 103 self.h5pydata = h5py.File(data, "r") 104 105 self.scans = list(self.h5pydata.keys()) 106 107 polarity = self.get_attr_data(scan, "r_h_polarity") 108 109 if polarity == "negative scan": 110 return -1 111 else: 112 return +1
Get the polarity of a scan.
Parameters
- file_location (Path or S3Path): The full path to the HDF5 file.
- scan (int): The scan number.
def
get_attr_data(self, scan, attr_srt):
114 def get_attr_data(self, scan, attr_srt): 115 """ 116 Get the attribute data of a scan. 117 118 Parameters 119 ---------- 120 scan : int 121 The scan number. 122 attr_srt : str 123 The attribute name. 124 125 """ 126 return self.hdf_obj[str(scan)].attrs[attr_srt]
Get the attribute data of a scan.
Parameters
- scan (int): The scan number.
- attr_srt (str): The attribute name.
def
import_mass_spectra(self, d_params: dict):
128 def import_mass_spectra(self, d_params: dict): 129 """ 130 Import the mass spectra from the HDF5 file. 131 132 Parameters 133 ---------- 134 d_params : dict 135 The parameters for importing the mass spectra. 136 """ 137 list_rt, list_tic = list(), list() 138 139 for scan_number in self.list_scans: 140 d_params["rt"] = list_rt.append( 141 self.get_attr_data(scan_number, "r_h_start_time") 142 ) 143 144 d_params["scan_number"] = scan_number 145 146 d_params["label"] = Labels.booster_profile 147 148 d_params["polarity"] = self.get_polarity(self.file_location, scan_number) 149 150 d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0] 151 152 d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1] 153 154 d_params["analyzer"] = self.analyzer 155 156 d_params["instrument_label"] = self.instrument_label 157 158 list_rt.append(d_params["rt"]) 159 160 list_tic.append(self.get_attr_data(scan_number, "r_h_tic")) 161 162 mass_spec = self.get_mass_spectrum(scan_number, d_params) 163 164 self.lcms.add_mass_spectrum(mass_spec) 165 166 self.lcms.retention_time = list_rt 167 self.lcms.tic = list_tic 168 self.lcms.scans_number = self.list_scans
Import the mass spectra from the HDF5 file.
Parameters
- d_params (dict): The parameters for importing the mass spectra.
def
get_mass_spectrum(self, scan: int, d_params: dict):
170 def get_mass_spectrum(self, scan: int, d_params: dict): 171 """ 172 Get the mass spectrum for a scan. 173 174 Parameters 175 ---------- 176 scan : int 177 The scan number. 178 d_params : dict 179 The parameters for creating the mass spectrum. 180 181 """ 182 booster_data = self.hdf_obj[str(scan)] 183 184 if booster_data.shape[0] != 2: 185 raise NotImplementedError( 186 "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum" 187 ) 188 else: 189 data_dict = { 190 Labels.mz: booster_data[0], 191 Labels.abundance: booster_data[1], 192 Labels.rp: None, 193 Labels.s2n: None, 194 } 195 196 mass_spec = MassSpecProfile( 197 data_dict, d_params, auto_process=self.auto_process 198 ) 199 200 return mass_spec
Get the mass spectrum for a scan.
Parameters
- scan (int): The scan number.
- d_params (dict): The parameters for creating the mass spectrum.
def
run(self):
202 def run(self): 203 """ 204 Run the thread to create the LCMS object. 205 """ 206 d_parameters = default_parameters(self.file_location) 207 self.import_mass_spectra(d_parameters)
Run the thread to create the LCMS object.
def
get_lcms_obj(self):
209 def get_lcms_obj(self): 210 """ 211 Get the LCMS object. 212 213 """ 214 if len(self.lcms) > 0: 215 return self.lcms 216 else: 217 raise Exception("Returning an empty LCMS class")
Get the LCMS object.
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id