corems.mass_spectra.input.boosterHDF5

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Oct 29, 2019"
  3
  4from threading import Thread
  5from pathlib import Path
  6from io import BytesIO
  7
  8import h5py
  9from s3path import S3Path
 10
 11from corems.encapsulation.constant import Labels
 12from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile
 13from corems.mass_spectra.factory.lc_class import LCMSBase
 14from corems.encapsulation.factory.parameters import default_parameters
 15
 16
 17class ReadHDF_BoosterMassSpectra(Thread):
 18    """
 19    Class for reading HDF5 files containing booster mass spectra.
 20
 21    Parameters
 22    ----------
 23    file_location : Path or S3Path
 24        The full path to the HDF5 file.
 25    analyzer : str, optional
 26        The type of analyzer used for the mass spectra. Defaults to "ICR".
 27    instrument_label : str, optional
 28        The label of the instrument. Defaults to "21T".
 29    auto_process : bool, optional
 30        Whether to automatically process the mass spectra. Defaults to True.
 31    """
 32
 33    def __init__(
 34        self,
 35        file_location: Path | S3Path,
 36        analyzer="ICR",
 37        instrument_label="21T",
 38        auto_process=True,
 39    ):
 40        """
 41        Initialize the ReadHDF_BoosterMassSpectra class.
 42
 43        Parameters
 44        ----------
 45        file_location : Path or S3Path
 46            The full path to the HDF5 file.
 47        analyzer : str, optional
 48            The type of analyzer used for the mass spectra. Defaults to "ICR".
 49        instrument_label : str, optional
 50            The label of the instrument. Defaults to "21T".
 51        auto_process : bool, optional
 52            Whether to automatically process the mass spectra. Defaults to True.
 53        """
 54        Thread.__init__(self)
 55
 56        if isinstance(file_location, str):
 57            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
 58            self.file_location = Path(file_location)
 59
 60        self.lcms = LCMSBase(
 61            file_location, analyzer=analyzer, instrument_label=instrument_label
 62        )
 63
 64        if isinstance(file_location, S3Path):
 65            data = BytesIO(file_location.open("rb").read())
 66        else:
 67            data = file_location
 68
 69        self.hdf_obj = h5py.File(data, "r")
 70
 71        self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())])
 72
 73        self.initial_scan_number = self.list_scans[0]
 74
 75        self.final_scan_number = self.list_scans[-1]
 76
 77        self.file_location = file_location
 78
 79        self.auto_process = True
 80
 81        self.analyzer = analyzer
 82
 83        self.instrument_label = instrument_label
 84
 85    def get_polarity(self, file_location: Path | S3Path, scan: int):
 86        """
 87        Get the polarity of a scan.
 88
 89        Parameters
 90        ----------
 91        file_location : Path or S3Path
 92            The full path to the HDF5 file.
 93        scan : int
 94            The scan number.
 95
 96        """
 97        if isinstance(file_location, S3Path):
 98            data = BytesIO(file_location.open("rb").read())
 99        else:
100            data = file_location
101
102        self.h5pydata = h5py.File(data, "r")
103
104        self.scans = list(self.h5pydata.keys())
105
106        polarity = self.get_attr_data(scan, "r_h_polarity")
107
108        if polarity == "negative scan":
109            return -1
110        else:
111            return +1
112
113    def get_attr_data(self, scan, attr_srt):
114        """
115        Get the attribute data of a scan.
116
117        Parameters
118        ----------
119        scan : int
120            The scan number.
121        attr_srt : str
122            The attribute name.
123
124        """
125        return self.hdf_obj[str(scan)].attrs[attr_srt]
126
127    def import_mass_spectra(self, d_params: dict):
128        """
129        Import the mass spectra from the HDF5 file.
130
131        Parameters
132        ----------
133        d_params : dict
134            The parameters for importing the mass spectra.
135        """
136        list_rt, list_tic = list(), list()
137
138        for scan_number in self.list_scans:
139            d_params["rt"] = list_rt.append(
140                self.get_attr_data(scan_number, "r_h_start_time")
141            )
142
143            d_params["scan_number"] = scan_number
144
145            d_params["label"] = Labels.booster_profile
146
147            d_params["polarity"] = self.get_polarity(self.file_location, scan_number)
148
149            d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0]
150
151            d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1]
152
153            d_params["analyzer"] = self.analyzer
154
155            d_params["instrument_label"] = self.instrument_label
156
157            list_rt.append(d_params["rt"])
158
159            list_tic.append(self.get_attr_data(scan_number, "r_h_tic"))
160
161            mass_spec = self.get_mass_spectrum(scan_number, d_params)
162
163            self.lcms.add_mass_spectrum(mass_spec)
164
165        self.lcms.retention_time = list_rt
166        self.lcms.tic = list_tic
167        self.lcms.scans_number = self.list_scans
168
169    def get_mass_spectrum(self, scan: int, d_params: dict):
170        """
171        Get the mass spectrum for a scan.
172
173        Parameters
174        ----------
175        scan : int
176            The scan number.
177        d_params : dict
178            The parameters for creating the mass spectrum.
179
180        """
181        booster_data = self.hdf_obj[str(scan)]
182
183        if booster_data.shape[0] != 2:
184            raise NotImplementedError(
185                "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum"
186            )
187        else:
188            data_dict = {
189                Labels.mz: booster_data[0],
190                Labels.abundance: booster_data[1],
191                Labels.rp: None,
192                Labels.s2n: None,
193            }
194
195            mass_spec = MassSpecProfile(
196                data_dict, d_params, auto_process=self.auto_process
197            )
198
199        return mass_spec
200
201    def run(self):
202        """
203        Run the thread to create the LCMS object.
204        """
205        d_parameters = default_parameters(self.file_location)
206        self.import_mass_spectra(d_parameters)
207
208    def get_lcms_obj(self):
209        """
210        Get the LCMS object.
211
212        """
213        if len(self.lcms) > 0:
214            return self.lcms
215        else:
216            raise Exception("Returning an empty LCMS class")
class ReadHDF_BoosterMassSpectra(threading.Thread):
 18class ReadHDF_BoosterMassSpectra(Thread):
 19    """
 20    Class for reading HDF5 files containing booster mass spectra.
 21
 22    Parameters
 23    ----------
 24    file_location : Path or S3Path
 25        The full path to the HDF5 file.
 26    analyzer : str, optional
 27        The type of analyzer used for the mass spectra. Defaults to "ICR".
 28    instrument_label : str, optional
 29        The label of the instrument. Defaults to "21T".
 30    auto_process : bool, optional
 31        Whether to automatically process the mass spectra. Defaults to True.
 32    """
 33
 34    def __init__(
 35        self,
 36        file_location: Path | S3Path,
 37        analyzer="ICR",
 38        instrument_label="21T",
 39        auto_process=True,
 40    ):
 41        """
 42        Initialize the ReadHDF_BoosterMassSpectra class.
 43
 44        Parameters
 45        ----------
 46        file_location : Path or S3Path
 47            The full path to the HDF5 file.
 48        analyzer : str, optional
 49            The type of analyzer used for the mass spectra. Defaults to "ICR".
 50        instrument_label : str, optional
 51            The label of the instrument. Defaults to "21T".
 52        auto_process : bool, optional
 53            Whether to automatically process the mass spectra. Defaults to True.
 54        """
 55        Thread.__init__(self)
 56
 57        if isinstance(file_location, str):
 58            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
 59            self.file_location = Path(file_location)
 60
 61        self.lcms = LCMSBase(
 62            file_location, analyzer=analyzer, instrument_label=instrument_label
 63        )
 64
 65        if isinstance(file_location, S3Path):
 66            data = BytesIO(file_location.open("rb").read())
 67        else:
 68            data = file_location
 69
 70        self.hdf_obj = h5py.File(data, "r")
 71
 72        self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())])
 73
 74        self.initial_scan_number = self.list_scans[0]
 75
 76        self.final_scan_number = self.list_scans[-1]
 77
 78        self.file_location = file_location
 79
 80        self.auto_process = True
 81
 82        self.analyzer = analyzer
 83
 84        self.instrument_label = instrument_label
 85
 86    def get_polarity(self, file_location: Path | S3Path, scan: int):
 87        """
 88        Get the polarity of a scan.
 89
 90        Parameters
 91        ----------
 92        file_location : Path or S3Path
 93            The full path to the HDF5 file.
 94        scan : int
 95            The scan number.
 96
 97        """
 98        if isinstance(file_location, S3Path):
 99            data = BytesIO(file_location.open("rb").read())
100        else:
101            data = file_location
102
103        self.h5pydata = h5py.File(data, "r")
104
105        self.scans = list(self.h5pydata.keys())
106
107        polarity = self.get_attr_data(scan, "r_h_polarity")
108
109        if polarity == "negative scan":
110            return -1
111        else:
112            return +1
113
114    def get_attr_data(self, scan, attr_srt):
115        """
116        Get the attribute data of a scan.
117
118        Parameters
119        ----------
120        scan : int
121            The scan number.
122        attr_srt : str
123            The attribute name.
124
125        """
126        return self.hdf_obj[str(scan)].attrs[attr_srt]
127
128    def import_mass_spectra(self, d_params: dict):
129        """
130        Import the mass spectra from the HDF5 file.
131
132        Parameters
133        ----------
134        d_params : dict
135            The parameters for importing the mass spectra.
136        """
137        list_rt, list_tic = list(), list()
138
139        for scan_number in self.list_scans:
140            d_params["rt"] = list_rt.append(
141                self.get_attr_data(scan_number, "r_h_start_time")
142            )
143
144            d_params["scan_number"] = scan_number
145
146            d_params["label"] = Labels.booster_profile
147
148            d_params["polarity"] = self.get_polarity(self.file_location, scan_number)
149
150            d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0]
151
152            d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1]
153
154            d_params["analyzer"] = self.analyzer
155
156            d_params["instrument_label"] = self.instrument_label
157
158            list_rt.append(d_params["rt"])
159
160            list_tic.append(self.get_attr_data(scan_number, "r_h_tic"))
161
162            mass_spec = self.get_mass_spectrum(scan_number, d_params)
163
164            self.lcms.add_mass_spectrum(mass_spec)
165
166        self.lcms.retention_time = list_rt
167        self.lcms.tic = list_tic
168        self.lcms.scans_number = self.list_scans
169
170    def get_mass_spectrum(self, scan: int, d_params: dict):
171        """
172        Get the mass spectrum for a scan.
173
174        Parameters
175        ----------
176        scan : int
177            The scan number.
178        d_params : dict
179            The parameters for creating the mass spectrum.
180
181        """
182        booster_data = self.hdf_obj[str(scan)]
183
184        if booster_data.shape[0] != 2:
185            raise NotImplementedError(
186                "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum"
187            )
188        else:
189            data_dict = {
190                Labels.mz: booster_data[0],
191                Labels.abundance: booster_data[1],
192                Labels.rp: None,
193                Labels.s2n: None,
194            }
195
196            mass_spec = MassSpecProfile(
197                data_dict, d_params, auto_process=self.auto_process
198            )
199
200        return mass_spec
201
202    def run(self):
203        """
204        Run the thread to create the LCMS object.
205        """
206        d_parameters = default_parameters(self.file_location)
207        self.import_mass_spectra(d_parameters)
208
209    def get_lcms_obj(self):
210        """
211        Get the LCMS object.
212
213        """
214        if len(self.lcms) > 0:
215            return self.lcms
216        else:
217            raise Exception("Returning an empty LCMS class")

Class for reading HDF5 files containing booster mass spectra.

Parameters
  • file_location (Path or S3Path): The full path to the HDF5 file.
  • analyzer (str, optional): The type of analyzer used for the mass spectra. Defaults to "ICR".
  • instrument_label (str, optional): The label of the instrument. Defaults to "21T".
  • auto_process (bool, optional): Whether to automatically process the mass spectra. Defaults to True.
ReadHDF_BoosterMassSpectra( file_location: pathlib.Path | s3path.S3Path, analyzer='ICR', instrument_label='21T', auto_process=True)
34    def __init__(
35        self,
36        file_location: Path | S3Path,
37        analyzer="ICR",
38        instrument_label="21T",
39        auto_process=True,
40    ):
41        """
42        Initialize the ReadHDF_BoosterMassSpectra class.
43
44        Parameters
45        ----------
46        file_location : Path or S3Path
47            The full path to the HDF5 file.
48        analyzer : str, optional
49            The type of analyzer used for the mass spectra. Defaults to "ICR".
50        instrument_label : str, optional
51            The label of the instrument. Defaults to "21T".
52        auto_process : bool, optional
53            Whether to automatically process the mass spectra. Defaults to True.
54        """
55        Thread.__init__(self)
56
57        if isinstance(file_location, str):
58            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
59            self.file_location = Path(file_location)
60
61        self.lcms = LCMSBase(
62            file_location, analyzer=analyzer, instrument_label=instrument_label
63        )
64
65        if isinstance(file_location, S3Path):
66            data = BytesIO(file_location.open("rb").read())
67        else:
68            data = file_location
69
70        self.hdf_obj = h5py.File(data, "r")
71
72        self.list_scans = sorted([int(i) for i in list(self.hdf_obj.keys())])
73
74        self.initial_scan_number = self.list_scans[0]
75
76        self.final_scan_number = self.list_scans[-1]
77
78        self.file_location = file_location
79
80        self.auto_process = True
81
82        self.analyzer = analyzer
83
84        self.instrument_label = instrument_label

Initialize the ReadHDF_BoosterMassSpectra class.

Parameters
  • file_location (Path or S3Path): The full path to the HDF5 file.
  • analyzer (str, optional): The type of analyzer used for the mass spectra. Defaults to "ICR".
  • instrument_label (str, optional): The label of the instrument. Defaults to "21T".
  • auto_process (bool, optional): Whether to automatically process the mass spectra. Defaults to True.
lcms
hdf_obj
list_scans
initial_scan_number
final_scan_number
file_location
auto_process
analyzer
instrument_label
def get_polarity(self, file_location: pathlib.Path | s3path.S3Path, scan: int):
 86    def get_polarity(self, file_location: Path | S3Path, scan: int):
 87        """
 88        Get the polarity of a scan.
 89
 90        Parameters
 91        ----------
 92        file_location : Path or S3Path
 93            The full path to the HDF5 file.
 94        scan : int
 95            The scan number.
 96
 97        """
 98        if isinstance(file_location, S3Path):
 99            data = BytesIO(file_location.open("rb").read())
100        else:
101            data = file_location
102
103        self.h5pydata = h5py.File(data, "r")
104
105        self.scans = list(self.h5pydata.keys())
106
107        polarity = self.get_attr_data(scan, "r_h_polarity")
108
109        if polarity == "negative scan":
110            return -1
111        else:
112            return +1

Get the polarity of a scan.

Parameters
  • file_location (Path or S3Path): The full path to the HDF5 file.
  • scan (int): The scan number.
def get_attr_data(self, scan, attr_srt):
114    def get_attr_data(self, scan, attr_srt):
115        """
116        Get the attribute data of a scan.
117
118        Parameters
119        ----------
120        scan : int
121            The scan number.
122        attr_srt : str
123            The attribute name.
124
125        """
126        return self.hdf_obj[str(scan)].attrs[attr_srt]

Get the attribute data of a scan.

Parameters
  • scan (int): The scan number.
  • attr_srt (str): The attribute name.
def import_mass_spectra(self, d_params: dict):
128    def import_mass_spectra(self, d_params: dict):
129        """
130        Import the mass spectra from the HDF5 file.
131
132        Parameters
133        ----------
134        d_params : dict
135            The parameters for importing the mass spectra.
136        """
137        list_rt, list_tic = list(), list()
138
139        for scan_number in self.list_scans:
140            d_params["rt"] = list_rt.append(
141                self.get_attr_data(scan_number, "r_h_start_time")
142            )
143
144            d_params["scan_number"] = scan_number
145
146            d_params["label"] = Labels.booster_profile
147
148            d_params["polarity"] = self.get_polarity(self.file_location, scan_number)
149
150            d_params["Aterm"] = self.get_attr_data(scan_number, "r_cparams")[0]
151
152            d_params["Bterm"] = self.get_attr_data(scan_number, "r_cparams")[1]
153
154            d_params["analyzer"] = self.analyzer
155
156            d_params["instrument_label"] = self.instrument_label
157
158            list_rt.append(d_params["rt"])
159
160            list_tic.append(self.get_attr_data(scan_number, "r_h_tic"))
161
162            mass_spec = self.get_mass_spectrum(scan_number, d_params)
163
164            self.lcms.add_mass_spectrum(mass_spec)
165
166        self.lcms.retention_time = list_rt
167        self.lcms.tic = list_tic
168        self.lcms.scans_number = self.list_scans

Import the mass spectra from the HDF5 file.

Parameters
  • d_params (dict): The parameters for importing the mass spectra.
def get_mass_spectrum(self, scan: int, d_params: dict):
170    def get_mass_spectrum(self, scan: int, d_params: dict):
171        """
172        Get the mass spectrum for a scan.
173
174        Parameters
175        ----------
176        scan : int
177            The scan number.
178        d_params : dict
179            The parameters for creating the mass spectrum.
180
181        """
182        booster_data = self.hdf_obj[str(scan)]
183
184        if booster_data.shape[0] != 2:
185            raise NotImplementedError(
186                "opening transient, needs read raw file here, get bandwidth, create transient class and then the mass spectrum"
187            )
188        else:
189            data_dict = {
190                Labels.mz: booster_data[0],
191                Labels.abundance: booster_data[1],
192                Labels.rp: None,
193                Labels.s2n: None,
194            }
195
196            mass_spec = MassSpecProfile(
197                data_dict, d_params, auto_process=self.auto_process
198            )
199
200        return mass_spec

Get the mass spectrum for a scan.

Parameters
  • scan (int): The scan number.
  • d_params (dict): The parameters for creating the mass spectrum.
def run(self):
202    def run(self):
203        """
204        Run the thread to create the LCMS object.
205        """
206        d_parameters = default_parameters(self.file_location)
207        self.import_mass_spectra(d_parameters)

Run the thread to create the LCMS object.

def get_lcms_obj(self):
209    def get_lcms_obj(self):
210        """
211        Get the LCMS object.
212
213        """
214        if len(self.lcms) > 0:
215            return self.lcms
216        else:
217            raise Exception("Returning an empty LCMS class")

Get the LCMS object.

Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id