corems.mass_spectra.input.corems_hdf5
1__author__ = "Yuri E. Corilo" 2__date__ = "Oct 29, 2019" 3 4 5from threading import Thread 6from pathlib import Path 7 8import pandas as pd 9import warnings 10 11from corems.chroma_peak.factory.chroma_peak_classes import LCMSMassFeature 12from corems.encapsulation.input.parameter_from_json import ( 13 load_and_set_json_parameters_lcms, 14 load_and_set_toml_parameters_lcms, 15) 16from corems.mass_spectra.factory.lc_class import LCMSBase, MassSpectraBase 17from corems.mass_spectra.factory.chromat_data import EIC_Data 18from corems.mass_spectra.input.parserbase import SpectraParserInterface 19from corems.mass_spectrum.input.coremsHDF5 import ReadCoreMSHDF_MassSpectrum 20from corems.molecular_id.factory.spectrum_search_results import SpectrumSearchResults 21from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader 22from corems.mass_spectra.input.mzml import MZMLSpectraParser 23 24 25class ReadCoreMSHDFMassSpectra( 26 SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread 27): 28 """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object. 29 30 Parameters 31 ---------- 32 file_location : str 33 The location of the HDF5 file to read, including the suffix. 34 35 Attributes 36 ---------- 37 file_location : str 38 The location of the HDF5 file to read. 39 h5pydata : h5py.File 40 The HDF5 file object. 41 scans : list 42 A list of the location of individual mass spectra within the HDF5 file. 43 scan_number_list : list 44 A list of the scan numbers of the mass spectra within the HDF5 file. 45 parameters_location : str 46 The location of the parameters file (json or toml). 47 48 Methods 49 ------- 50 * import_mass_spectra(mass_spectra). 51 Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object. 52 * get_mass_spectrum_from_scan(scan_number). 53 Return mass spectrum data object from scan number. 54 * load(). 55 Placeholder method to meet the requirements of the SpectraParserInterface. 56 * run(mass_spectra). 57 Runs the importer functions to populate a LCMS or MassSpectraBase object. 58 * import_scan_info(mass_spectra). 59 Imports the scan info from the HDF5 file to populate the _scan_info attribute 60 on the LCMS or MassSpectraBase object 61 * import_ms_unprocessed(mass_spectra). 62 Imports the unprocessed mass spectra from the HDF5 file to populate the 63 _ms_unprocessed attribute on the LCMS or MassSpectraBase object 64 * import_parameters(mass_spectra). 65 Imports the parameters from the HDF5 file to populate the parameters 66 attribute on the LCMS or MassSpectraBase object 67 * import_mass_features(mass_spectra). 68 Imports the mass features from the HDF5 file to populate the mass_features 69 attribute on the LCMS or MassSpectraBase object 70 * import_eics(mass_spectra). 71 Imports the extracted ion chromatograms from the HDF5 file to populate the 72 eics attribute on the LCMS or MassSpectraBase object 73 * import_spectral_search_results(mass_spectra). 74 Imports the spectral search results from the HDF5 file to populate the 75 spectral_search_results attribute on the LCMS or MassSpectraBase object 76 * get_mass_spectra_obj(). 77 Return mass spectra data object, populating the _ms list on the LCMS or 78 MassSpectraBase object from the HDF5 file 79 * get_lcms_obj(). 80 Return LCMSBase object, populating the majority of the attributes on the 81 LCMS object from the HDF5 file 82 83 """ 84 85 def __init__(self, file_location: str): 86 Thread.__init__(self) 87 ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) 88 89 # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file 90 self.scans = [ 91 "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys()) 92 ] 93 self.scan_number_list = sorted( 94 [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())] 95 ) 96 97 # set the location of the parameters file (json or toml) 98 add_files = [ 99 x 100 for x in self.file_location.parent.glob( 101 self.file_location.name.replace(".hdf5", ".*") 102 ) 103 if x.suffix != ".hdf5" 104 ] 105 if len([x for x in add_files if x.suffix == ".json"]) > 0: 106 self.parameters_location = [x for x in add_files if x.suffix == ".json"][0] 107 elif len([x for x in add_files if x.suffix == ".toml"]) > 0: 108 self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0] 109 else: 110 self.parameters_location = None 111 112 def get_mass_spectrum_from_scan(self, scan_number): 113 """Return mass spectrum data object from scan number.""" 114 if scan_number in self.scan_number_list: 115 mass_spec = self.get_mass_spectrum(scan_number) 116 return mass_spec 117 else: 118 raise Exception("Scan number not found in HDF5 file.") 119 120 def get_mass_spectra_from_scan_list( 121 self, scan_list, spectrum_mode, auto_process=True 122 ): 123 """Return a list of mass spectrum data objects from a list of scan numbers. 124 125 Parameters 126 ---------- 127 scan_list : list 128 A list of scan numbers to retrieve mass spectra for. 129 spectrum_mode : str 130 The spectrum mode to use when retrieving the mass spectra. 131 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 132 centroided spectra are saved. 133 auto_process : bool 134 If True, automatically process the mass spectra when retrieving them. 135 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 136 centroided spectra are saved. 137 138 Returns 139 ------- 140 list 141 A list of mass spectrum data objects corresponding to the provided scan numbers. 142 """ 143 mass_spectra_list = [] 144 for scan_number in scan_list: 145 if scan_number in self.scan_number_list: 146 mass_spec = self.get_mass_spectrum_from_scan(scan_number) 147 mass_spectra_list.append(mass_spec) 148 else: 149 warnings.warn(f"Scan number {scan_number} not found in HDF5 file.") 150 return mass_spectra_list 151 152 def load(self) -> None: 153 """ """ 154 pass 155 156 def get_ms_raw(self, spectra=None, scan_df=None) -> dict: 157 """ """ 158 # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation 159 if spectra is not None or scan_df is not None: 160 SyntaxWarning( 161 "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation." 162 ) 163 ms_unprocessed = {} 164 dict_group_load = self.h5pydata["ms_unprocessed"] 165 dict_group_keys = dict_group_load.keys() 166 for k in dict_group_keys: 167 ms_up_int = dict_group_load[k][:] 168 ms_unprocessed[int(k)] = pd.DataFrame( 169 ms_up_int, columns=["scan", "mz", "intensity"] 170 ) 171 return ms_unprocessed 172 173 def get_scan_df(self) -> pd.DataFrame: 174 scan_info = {} 175 dict_group_load = self.h5pydata["scan_info"] 176 dict_group_keys = dict_group_load.keys() 177 for k in dict_group_keys: 178 scan_info[k] = dict_group_load[k][:] 179 scan_df = pd.DataFrame(scan_info) 180 scan_df.set_index("scan", inplace=True, drop=False) 181 str_df = scan_df.select_dtypes([object]) 182 str_df = str_df.stack().str.decode("utf-8").unstack() 183 for col in str_df: 184 scan_df[col] = str_df[col] 185 return scan_df 186 187 def run(self, mass_spectra, load_raw=True) -> None: 188 """Runs the importer functions to populate a LCMS or MassSpectraBase object. 189 190 Notes 191 ----- 192 The following functions are run in order, if the HDF5 file contains the necessary data: 193 1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object. 194 2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object. 195 3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object. 196 4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object. 197 5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object. 198 6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object. 199 7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object. 200 201 Parameters 202 ---------- 203 mass_spectra : LCMSBase or MassSpectraBase 204 The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes. 205 load_raw : bool 206 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 207 Returns 208 ------- 209 None, but populates several attributes on the LCMS or MassSpectraBase object. 210 211 """ 212 if self.parameters_location is not None: 213 # Populate the parameters attribute on the LCMS object 214 self.import_parameters(mass_spectra) 215 216 if "mass_spectra" in self.h5pydata: 217 # Populate the _ms list on the LCMS object 218 self.import_mass_spectra(mass_spectra, load_raw=load_raw) 219 220 if "scan_info" in self.h5pydata: 221 # Populate the _scan_info attribute on the LCMS object 222 self.import_scan_info(mass_spectra) 223 224 if "ms_unprocessed" in self.h5pydata and load_raw: 225 # Populate the _ms_unprocessed attribute on the LCMS object 226 self.import_ms_unprocessed(mass_spectra) 227 228 if "mass_features" in self.h5pydata: 229 # Populate the mass_features attribute on the LCMS object 230 self.import_mass_features(mass_spectra) 231 232 if "eics" in self.h5pydata: 233 # Populate the eics attribute on the LCMS object 234 self.import_eics(mass_spectra) 235 236 if "spectral_search_results" in self.h5pydata: 237 # Populate the spectral_search_results attribute on the LCMS object 238 self.import_spectral_search_results(mass_spectra) 239 240 def import_mass_spectra(self, mass_spectra, load_raw=True) -> None: 241 """Imports all mass spectra from the HDF5 file. 242 243 Parameters 244 ---------- 245 mass_spectra : LCMSBase | MassSpectraBase 246 The MassSpectraBase or LCMSBase object to populate with mass spectra. 247 load_raw : bool 248 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default 249 250 Returns 251 ------- 252 None, but populates the '_ms' list on the LCMSBase or MassSpectraBase 253 object with mass spectra from the HDF5 file. 254 """ 255 for scan_number in self.scan_number_list: 256 mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw) 257 mass_spec.scan_number = scan_number 258 mass_spectra.add_mass_spectrum(mass_spec) 259 260 def import_scan_info(self, mass_spectra) -> None: 261 """Imports the scan info from the HDF5 file. 262 263 Parameters 264 ---------- 265 lcms : LCMSBase | MassSpectraBase 266 The MassSpectraBase or LCMSBase objects 267 268 Returns 269 ------- 270 None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase 271 object with a pandas DataFrame of the 'scan_info' from the HDF5 file. 272 273 """ 274 scan_df = self.get_scan_df() 275 mass_spectra.scan_df = scan_df 276 277 def import_ms_unprocessed(self, mass_spectra) -> None: 278 """Imports the unprocessed mass spectra from the HDF5 file. 279 280 Parameters 281 ---------- 282 lcms : LCMSBase | MassSpectraBase 283 The MassSpectraBase or LCMSBase objects 284 285 Returns 286 ------- 287 None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase 288 object with a dictionary of the 'ms_unprocessed' from the HDF5 file. 289 290 """ 291 ms_unprocessed = self.get_ms_raw() 292 mass_spectra._ms_unprocessed = ms_unprocessed 293 294 def import_parameters(self, mass_spectra) -> None: 295 """Imports the parameters from the HDF5 file. 296 297 Parameters 298 ---------- 299 mass_spectra : LCMSBase | MassSpectraBase 300 The MassSpectraBase or LCMSBase object to populate with parameters. 301 302 Returns 303 ------- 304 None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase 305 object with a dictionary of the 'parameters' from the HDF5 file. 306 307 """ 308 if ".json" == self.parameters_location.suffix: 309 load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location) 310 if ".toml" == self.parameters_location.suffix: 311 load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location) 312 else: 313 raise Exception( 314 "Parameters file must be in JSON format, TOML format is not yet supported." 315 ) 316 317 def import_mass_features(self, mass_spectra) -> None: 318 """Imports the mass features from the HDF5 file. 319 320 Parameters 321 ---------- 322 mass_spectra : LCMSBase | MassSpectraBase 323 The MassSpectraBase or LCMSBase object to populate with mass features. 324 325 Returns 326 ------- 327 None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase 328 object with a dictionary of the 'mass_features' from the HDF5 file. 329 330 """ 331 dict_group_load = self.h5pydata["mass_features"] 332 dict_group_keys = dict_group_load.keys() 333 for k in dict_group_keys: 334 # Instantiate the MassFeature object 335 mass_feature = LCMSMassFeature( 336 mass_spectra, 337 mz=dict_group_load[k].attrs["_mz_exp"], 338 retention_time=dict_group_load[k].attrs["_retention_time"], 339 intensity=dict_group_load[k].attrs["_intensity"], 340 apex_scan=dict_group_load[k].attrs["_apex_scan"], 341 persistence=dict_group_load[k].attrs["_persistence"], 342 id=int(k), 343 ) 344 345 # Populate additional attributes on the MassFeature object 346 for key in dict_group_load[k].attrs.keys() - { 347 "_mz_exp", 348 "_mz_cal", 349 "_retention_time", 350 "_intensity", 351 "_apex_scan", 352 "_persistence", 353 }: 354 setattr(mass_feature, key, dict_group_load[k].attrs[key]) 355 356 # Populate attributes on MassFeature object that are lists 357 for key in dict_group_load[k].keys(): 358 setattr(mass_feature, key, dict_group_load[k][key][:]) 359 # Convert _noise_score from array to tuple 360 if key == "_noise_score": 361 mass_feature._noise_score = tuple(mass_feature._noise_score) 362 mass_spectra.mass_features[int(k)] = mass_feature 363 364 # Associate mass features with ms1 and ms2 spectra, if available 365 for mf_id in mass_spectra.mass_features.keys(): 366 if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys(): 367 mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[ 368 mass_spectra.mass_features[mf_id].apex_scan 369 ] 370 if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None: 371 for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers: 372 if ms2_scan in mass_spectra._ms.keys(): 373 mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = ( 374 mass_spectra._ms[ms2_scan] 375 ) 376 377 def import_eics(self, mass_spectra): 378 """Imports the extracted ion chromatograms from the HDF5 file. 379 380 Parameters 381 ---------- 382 mass_spectra : LCMSBase | MassSpectraBase 383 The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms. 384 385 Returns 386 ------- 387 None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase 388 object with a dictionary of the 'eics' from the HDF5 file. 389 390 """ 391 dict_group_load = self.h5pydata["eics"] 392 dict_group_keys = dict_group_load.keys() 393 for k in dict_group_keys: 394 my_eic = EIC_Data( 395 scans=dict_group_load[k]["scans"][:], 396 time=dict_group_load[k]["time"][:], 397 eic=dict_group_load[k]["eic"][:], 398 ) 399 for key in dict_group_load[k].keys(): 400 if key not in ["scans", "time", "eic"]: 401 setattr(my_eic, key, dict_group_load[k][key][:]) 402 # if key is apexes, convert to a tuple of a list 403 if key == "apexes" and len(my_eic.apexes) > 0: 404 my_eic.apexes = [tuple(x) for x in my_eic.apexes] 405 # Add to mass_spectra object 406 mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic 407 408 # Add to mass features 409 for idx in mass_spectra.mass_features.keys(): 410 mz = mass_spectra.mass_features[idx].mz 411 if mz in mass_spectra.eics.keys(): 412 mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz] 413 414 def import_spectral_search_results(self, mass_spectra): 415 """Imports the spectral search results from the HDF5 file. 416 417 Parameters 418 ---------- 419 mass_spectra : LCMSBase | MassSpectraBase 420 The MassSpectraBase or LCMSBase object to populate with spectral search results. 421 422 Returns 423 ------- 424 None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase 425 object with a dictionary of the 'spectral_search_results' from the HDF5 file. 426 427 """ 428 overall_results_dict = {} 429 ms2_results_load = self.h5pydata["spectral_search_results"] 430 for k in ms2_results_load.keys(): 431 overall_results_dict[int(k)] = {} 432 for k2 in ms2_results_load[k].keys(): 433 ms2_search_res = SpectrumSearchResults( 434 query_spectrum=mass_spectra._ms[int(k)], 435 precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"], 436 spectral_similarity_search_results={}, 437 ) 438 439 for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}: 440 setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:])) 441 overall_results_dict[int(k)][ 442 ms2_results_load[k][k2].attrs["precursor_mz"] 443 ] = ms2_search_res 444 445 # add to mass_spectra 446 mass_spectra.spectral_search_results.update(overall_results_dict) 447 448 # If there are mass features, associate the results with each mass feature 449 if len(mass_spectra.mass_features) > 0: 450 for mass_feature_id, mass_feature in mass_spectra.mass_features.items(): 451 scan_ids = mass_feature.ms2_scan_numbers 452 for ms2_scan_id in scan_ids: 453 precursor_mz = mass_feature.mz 454 try: 455 mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz] 456 except KeyError: 457 pass 458 else: 459 mass_spectra.mass_features[ 460 mass_feature_id 461 ].ms2_similarity_results.append( 462 mass_spectra.spectral_search_results[ms2_scan_id][ 463 precursor_mz 464 ] 465 ) 466 467 def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase: 468 """ 469 Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file. 470 471 Parameters 472 ---------- 473 load_raw : bool 474 If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True. 475 476 """ 477 # Instantiate the LCMS object 478 spectra_obj = MassSpectraBase( 479 file_location=self.file_location, 480 analyzer=self.analyzer, 481 instrument_label=self.instrument_label, 482 sample_name=self.sample_name, 483 ) 484 485 # This will populate the _ms list on the LCMS or MassSpectraBase object 486 self.run(spectra_obj, load_raw=load_raw) 487 488 return spectra_obj 489 490 def get_lcms_obj( 491 self, load_raw=True, use_original_parser=True, raw_file_path=None 492 ) -> LCMSBase: 493 """ 494 Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file. 495 496 Parameters 497 ---------- 498 load_raw : bool 499 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 500 use_original_parser : bool 501 If True, use the original parser to populate the LCMS object. Default is True. 502 raw_file_path : str 503 The location of the raw file to parse if attempting to use original parser. 504 Default is None, which attempts to get the raw file path from the HDF5 file. 505 If the original file path has moved, this parameter can be used to specify the new location. 506 """ 507 # Instantiate the LCMS object 508 lcms_obj = LCMSBase( 509 file_location=self.file_location, 510 analyzer=self.analyzer, 511 instrument_label=self.instrument_label, 512 sample_name=self.sample_name, 513 ) 514 515 # This will populate the majority of the attributes on the LCMS object 516 self.run(lcms_obj, load_raw=load_raw) 517 518 # Set final attributes of the LCMS object 519 lcms_obj.polarity = self.h5pydata.attrs["polarity"] 520 lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan) 521 lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time) 522 lcms_obj._tic_list = list(lcms_obj.scan_df.tic) 523 524 # If use_original_parser is True, instantiate the original parser and populate the LCMS object 525 if use_original_parser: 526 lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path) 527 else: 528 lcms_obj.spectra_parser_class = self.__class__ 529 530 return lcms_obj 531 532 def get_raw_file_location(self): 533 """ 534 Get the raw file location from the HDF5 file attributes. 535 536 Returns 537 ------- 538 str 539 The raw file location. 540 """ 541 if "original_file_location" in self.h5pydata.attrs: 542 return self.h5pydata.attrs["original_file_location"] 543 else: 544 return None 545 546 def add_original_parser(self, mass_spectra, raw_file_path=None): 547 """ 548 Add the original parser to the mass spectra object. 549 550 Parameters 551 ---------- 552 mass_spectra : MassSpectraBase | LCMSBase 553 The MassSpectraBase or LCMSBase object to add the original parser to. 554 raw_file_path : str 555 The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file. 556 """ 557 # Get the original parser type 558 og_parser_type = self.h5pydata.attrs["parser_type"] 559 560 # If raw_file_path is None, get it from the HDF5 file attributes 561 if raw_file_path is None: 562 raw_file_path = self.get_raw_file_location() 563 if raw_file_path is None: 564 raise ValueError( 565 "Raw file path not found in HDF5 file attributes, cannot instantiate original parser." 566 ) 567 568 # Set the raw file path on the mass_spectra object so the parser knows where to find the raw file 569 mass_spectra.raw_file_location = raw_file_path 570 571 if og_parser_type == "ImportMassSpectraThermoMSFileReader": 572 # Check that the parser can be instantiated with the raw file path 573 parser = ImportMassSpectraThermoMSFileReader(raw_file_path) 574 elif og_parser_type == "MZMLSpectraParser": 575 # Check that the parser can be instantiated with the raw file path 576 parser = MZMLSpectraParser(raw_file_path) 577 578 # Set the spectra parser class on the mass_spectra object so the spectra_parser property can be used with the original parser 579 mass_spectra.spectra_parser_class = parser.__class__ 580 581 return mass_spectra 582 583 def get_creation_time(self): 584 """ 585 Raise a NotImplemented Warning, as creation time is not available in CoreMS HDF5 files and returning None. 586 """ 587 warnings.warn( 588 "Creation time is not available in CoreMS HDF5 files, returning None." 589 "This should be accessed through the original parser.", 590 ) 591 return None 592 593 def get_instrument_info(self): 594 """ 595 Raise a NotImplemented Warning, as instrument info is not available in CoreMS HDF5 files and returning None. 596 """ 597 warnings.warn( 598 "Instrument info is not available in CoreMS HDF5 files, returning None." 599 "This should be accessed through the original parser.", 600 ) 601 return None
26class ReadCoreMSHDFMassSpectra( 27 SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread 28): 29 """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object. 30 31 Parameters 32 ---------- 33 file_location : str 34 The location of the HDF5 file to read, including the suffix. 35 36 Attributes 37 ---------- 38 file_location : str 39 The location of the HDF5 file to read. 40 h5pydata : h5py.File 41 The HDF5 file object. 42 scans : list 43 A list of the location of individual mass spectra within the HDF5 file. 44 scan_number_list : list 45 A list of the scan numbers of the mass spectra within the HDF5 file. 46 parameters_location : str 47 The location of the parameters file (json or toml). 48 49 Methods 50 ------- 51 * import_mass_spectra(mass_spectra). 52 Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object. 53 * get_mass_spectrum_from_scan(scan_number). 54 Return mass spectrum data object from scan number. 55 * load(). 56 Placeholder method to meet the requirements of the SpectraParserInterface. 57 * run(mass_spectra). 58 Runs the importer functions to populate a LCMS or MassSpectraBase object. 59 * import_scan_info(mass_spectra). 60 Imports the scan info from the HDF5 file to populate the _scan_info attribute 61 on the LCMS or MassSpectraBase object 62 * import_ms_unprocessed(mass_spectra). 63 Imports the unprocessed mass spectra from the HDF5 file to populate the 64 _ms_unprocessed attribute on the LCMS or MassSpectraBase object 65 * import_parameters(mass_spectra). 66 Imports the parameters from the HDF5 file to populate the parameters 67 attribute on the LCMS or MassSpectraBase object 68 * import_mass_features(mass_spectra). 69 Imports the mass features from the HDF5 file to populate the mass_features 70 attribute on the LCMS or MassSpectraBase object 71 * import_eics(mass_spectra). 72 Imports the extracted ion chromatograms from the HDF5 file to populate the 73 eics attribute on the LCMS or MassSpectraBase object 74 * import_spectral_search_results(mass_spectra). 75 Imports the spectral search results from the HDF5 file to populate the 76 spectral_search_results attribute on the LCMS or MassSpectraBase object 77 * get_mass_spectra_obj(). 78 Return mass spectra data object, populating the _ms list on the LCMS or 79 MassSpectraBase object from the HDF5 file 80 * get_lcms_obj(). 81 Return LCMSBase object, populating the majority of the attributes on the 82 LCMS object from the HDF5 file 83 84 """ 85 86 def __init__(self, file_location: str): 87 Thread.__init__(self) 88 ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) 89 90 # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file 91 self.scans = [ 92 "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys()) 93 ] 94 self.scan_number_list = sorted( 95 [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())] 96 ) 97 98 # set the location of the parameters file (json or toml) 99 add_files = [ 100 x 101 for x in self.file_location.parent.glob( 102 self.file_location.name.replace(".hdf5", ".*") 103 ) 104 if x.suffix != ".hdf5" 105 ] 106 if len([x for x in add_files if x.suffix == ".json"]) > 0: 107 self.parameters_location = [x for x in add_files if x.suffix == ".json"][0] 108 elif len([x for x in add_files if x.suffix == ".toml"]) > 0: 109 self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0] 110 else: 111 self.parameters_location = None 112 113 def get_mass_spectrum_from_scan(self, scan_number): 114 """Return mass spectrum data object from scan number.""" 115 if scan_number in self.scan_number_list: 116 mass_spec = self.get_mass_spectrum(scan_number) 117 return mass_spec 118 else: 119 raise Exception("Scan number not found in HDF5 file.") 120 121 def get_mass_spectra_from_scan_list( 122 self, scan_list, spectrum_mode, auto_process=True 123 ): 124 """Return a list of mass spectrum data objects from a list of scan numbers. 125 126 Parameters 127 ---------- 128 scan_list : list 129 A list of scan numbers to retrieve mass spectra for. 130 spectrum_mode : str 131 The spectrum mode to use when retrieving the mass spectra. 132 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 133 centroided spectra are saved. 134 auto_process : bool 135 If True, automatically process the mass spectra when retrieving them. 136 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 137 centroided spectra are saved. 138 139 Returns 140 ------- 141 list 142 A list of mass spectrum data objects corresponding to the provided scan numbers. 143 """ 144 mass_spectra_list = [] 145 for scan_number in scan_list: 146 if scan_number in self.scan_number_list: 147 mass_spec = self.get_mass_spectrum_from_scan(scan_number) 148 mass_spectra_list.append(mass_spec) 149 else: 150 warnings.warn(f"Scan number {scan_number} not found in HDF5 file.") 151 return mass_spectra_list 152 153 def load(self) -> None: 154 """ """ 155 pass 156 157 def get_ms_raw(self, spectra=None, scan_df=None) -> dict: 158 """ """ 159 # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation 160 if spectra is not None or scan_df is not None: 161 SyntaxWarning( 162 "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation." 163 ) 164 ms_unprocessed = {} 165 dict_group_load = self.h5pydata["ms_unprocessed"] 166 dict_group_keys = dict_group_load.keys() 167 for k in dict_group_keys: 168 ms_up_int = dict_group_load[k][:] 169 ms_unprocessed[int(k)] = pd.DataFrame( 170 ms_up_int, columns=["scan", "mz", "intensity"] 171 ) 172 return ms_unprocessed 173 174 def get_scan_df(self) -> pd.DataFrame: 175 scan_info = {} 176 dict_group_load = self.h5pydata["scan_info"] 177 dict_group_keys = dict_group_load.keys() 178 for k in dict_group_keys: 179 scan_info[k] = dict_group_load[k][:] 180 scan_df = pd.DataFrame(scan_info) 181 scan_df.set_index("scan", inplace=True, drop=False) 182 str_df = scan_df.select_dtypes([object]) 183 str_df = str_df.stack().str.decode("utf-8").unstack() 184 for col in str_df: 185 scan_df[col] = str_df[col] 186 return scan_df 187 188 def run(self, mass_spectra, load_raw=True) -> None: 189 """Runs the importer functions to populate a LCMS or MassSpectraBase object. 190 191 Notes 192 ----- 193 The following functions are run in order, if the HDF5 file contains the necessary data: 194 1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object. 195 2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object. 196 3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object. 197 4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object. 198 5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object. 199 6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object. 200 7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object. 201 202 Parameters 203 ---------- 204 mass_spectra : LCMSBase or MassSpectraBase 205 The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes. 206 load_raw : bool 207 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 208 Returns 209 ------- 210 None, but populates several attributes on the LCMS or MassSpectraBase object. 211 212 """ 213 if self.parameters_location is not None: 214 # Populate the parameters attribute on the LCMS object 215 self.import_parameters(mass_spectra) 216 217 if "mass_spectra" in self.h5pydata: 218 # Populate the _ms list on the LCMS object 219 self.import_mass_spectra(mass_spectra, load_raw=load_raw) 220 221 if "scan_info" in self.h5pydata: 222 # Populate the _scan_info attribute on the LCMS object 223 self.import_scan_info(mass_spectra) 224 225 if "ms_unprocessed" in self.h5pydata and load_raw: 226 # Populate the _ms_unprocessed attribute on the LCMS object 227 self.import_ms_unprocessed(mass_spectra) 228 229 if "mass_features" in self.h5pydata: 230 # Populate the mass_features attribute on the LCMS object 231 self.import_mass_features(mass_spectra) 232 233 if "eics" in self.h5pydata: 234 # Populate the eics attribute on the LCMS object 235 self.import_eics(mass_spectra) 236 237 if "spectral_search_results" in self.h5pydata: 238 # Populate the spectral_search_results attribute on the LCMS object 239 self.import_spectral_search_results(mass_spectra) 240 241 def import_mass_spectra(self, mass_spectra, load_raw=True) -> None: 242 """Imports all mass spectra from the HDF5 file. 243 244 Parameters 245 ---------- 246 mass_spectra : LCMSBase | MassSpectraBase 247 The MassSpectraBase or LCMSBase object to populate with mass spectra. 248 load_raw : bool 249 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default 250 251 Returns 252 ------- 253 None, but populates the '_ms' list on the LCMSBase or MassSpectraBase 254 object with mass spectra from the HDF5 file. 255 """ 256 for scan_number in self.scan_number_list: 257 mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw) 258 mass_spec.scan_number = scan_number 259 mass_spectra.add_mass_spectrum(mass_spec) 260 261 def import_scan_info(self, mass_spectra) -> None: 262 """Imports the scan info from the HDF5 file. 263 264 Parameters 265 ---------- 266 lcms : LCMSBase | MassSpectraBase 267 The MassSpectraBase or LCMSBase objects 268 269 Returns 270 ------- 271 None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase 272 object with a pandas DataFrame of the 'scan_info' from the HDF5 file. 273 274 """ 275 scan_df = self.get_scan_df() 276 mass_spectra.scan_df = scan_df 277 278 def import_ms_unprocessed(self, mass_spectra) -> None: 279 """Imports the unprocessed mass spectra from the HDF5 file. 280 281 Parameters 282 ---------- 283 lcms : LCMSBase | MassSpectraBase 284 The MassSpectraBase or LCMSBase objects 285 286 Returns 287 ------- 288 None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase 289 object with a dictionary of the 'ms_unprocessed' from the HDF5 file. 290 291 """ 292 ms_unprocessed = self.get_ms_raw() 293 mass_spectra._ms_unprocessed = ms_unprocessed 294 295 def import_parameters(self, mass_spectra) -> None: 296 """Imports the parameters from the HDF5 file. 297 298 Parameters 299 ---------- 300 mass_spectra : LCMSBase | MassSpectraBase 301 The MassSpectraBase or LCMSBase object to populate with parameters. 302 303 Returns 304 ------- 305 None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase 306 object with a dictionary of the 'parameters' from the HDF5 file. 307 308 """ 309 if ".json" == self.parameters_location.suffix: 310 load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location) 311 if ".toml" == self.parameters_location.suffix: 312 load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location) 313 else: 314 raise Exception( 315 "Parameters file must be in JSON format, TOML format is not yet supported." 316 ) 317 318 def import_mass_features(self, mass_spectra) -> None: 319 """Imports the mass features from the HDF5 file. 320 321 Parameters 322 ---------- 323 mass_spectra : LCMSBase | MassSpectraBase 324 The MassSpectraBase or LCMSBase object to populate with mass features. 325 326 Returns 327 ------- 328 None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase 329 object with a dictionary of the 'mass_features' from the HDF5 file. 330 331 """ 332 dict_group_load = self.h5pydata["mass_features"] 333 dict_group_keys = dict_group_load.keys() 334 for k in dict_group_keys: 335 # Instantiate the MassFeature object 336 mass_feature = LCMSMassFeature( 337 mass_spectra, 338 mz=dict_group_load[k].attrs["_mz_exp"], 339 retention_time=dict_group_load[k].attrs["_retention_time"], 340 intensity=dict_group_load[k].attrs["_intensity"], 341 apex_scan=dict_group_load[k].attrs["_apex_scan"], 342 persistence=dict_group_load[k].attrs["_persistence"], 343 id=int(k), 344 ) 345 346 # Populate additional attributes on the MassFeature object 347 for key in dict_group_load[k].attrs.keys() - { 348 "_mz_exp", 349 "_mz_cal", 350 "_retention_time", 351 "_intensity", 352 "_apex_scan", 353 "_persistence", 354 }: 355 setattr(mass_feature, key, dict_group_load[k].attrs[key]) 356 357 # Populate attributes on MassFeature object that are lists 358 for key in dict_group_load[k].keys(): 359 setattr(mass_feature, key, dict_group_load[k][key][:]) 360 # Convert _noise_score from array to tuple 361 if key == "_noise_score": 362 mass_feature._noise_score = tuple(mass_feature._noise_score) 363 mass_spectra.mass_features[int(k)] = mass_feature 364 365 # Associate mass features with ms1 and ms2 spectra, if available 366 for mf_id in mass_spectra.mass_features.keys(): 367 if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys(): 368 mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[ 369 mass_spectra.mass_features[mf_id].apex_scan 370 ] 371 if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None: 372 for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers: 373 if ms2_scan in mass_spectra._ms.keys(): 374 mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = ( 375 mass_spectra._ms[ms2_scan] 376 ) 377 378 def import_eics(self, mass_spectra): 379 """Imports the extracted ion chromatograms from the HDF5 file. 380 381 Parameters 382 ---------- 383 mass_spectra : LCMSBase | MassSpectraBase 384 The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms. 385 386 Returns 387 ------- 388 None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase 389 object with a dictionary of the 'eics' from the HDF5 file. 390 391 """ 392 dict_group_load = self.h5pydata["eics"] 393 dict_group_keys = dict_group_load.keys() 394 for k in dict_group_keys: 395 my_eic = EIC_Data( 396 scans=dict_group_load[k]["scans"][:], 397 time=dict_group_load[k]["time"][:], 398 eic=dict_group_load[k]["eic"][:], 399 ) 400 for key in dict_group_load[k].keys(): 401 if key not in ["scans", "time", "eic"]: 402 setattr(my_eic, key, dict_group_load[k][key][:]) 403 # if key is apexes, convert to a tuple of a list 404 if key == "apexes" and len(my_eic.apexes) > 0: 405 my_eic.apexes = [tuple(x) for x in my_eic.apexes] 406 # Add to mass_spectra object 407 mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic 408 409 # Add to mass features 410 for idx in mass_spectra.mass_features.keys(): 411 mz = mass_spectra.mass_features[idx].mz 412 if mz in mass_spectra.eics.keys(): 413 mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz] 414 415 def import_spectral_search_results(self, mass_spectra): 416 """Imports the spectral search results from the HDF5 file. 417 418 Parameters 419 ---------- 420 mass_spectra : LCMSBase | MassSpectraBase 421 The MassSpectraBase or LCMSBase object to populate with spectral search results. 422 423 Returns 424 ------- 425 None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase 426 object with a dictionary of the 'spectral_search_results' from the HDF5 file. 427 428 """ 429 overall_results_dict = {} 430 ms2_results_load = self.h5pydata["spectral_search_results"] 431 for k in ms2_results_load.keys(): 432 overall_results_dict[int(k)] = {} 433 for k2 in ms2_results_load[k].keys(): 434 ms2_search_res = SpectrumSearchResults( 435 query_spectrum=mass_spectra._ms[int(k)], 436 precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"], 437 spectral_similarity_search_results={}, 438 ) 439 440 for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}: 441 setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:])) 442 overall_results_dict[int(k)][ 443 ms2_results_load[k][k2].attrs["precursor_mz"] 444 ] = ms2_search_res 445 446 # add to mass_spectra 447 mass_spectra.spectral_search_results.update(overall_results_dict) 448 449 # If there are mass features, associate the results with each mass feature 450 if len(mass_spectra.mass_features) > 0: 451 for mass_feature_id, mass_feature in mass_spectra.mass_features.items(): 452 scan_ids = mass_feature.ms2_scan_numbers 453 for ms2_scan_id in scan_ids: 454 precursor_mz = mass_feature.mz 455 try: 456 mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz] 457 except KeyError: 458 pass 459 else: 460 mass_spectra.mass_features[ 461 mass_feature_id 462 ].ms2_similarity_results.append( 463 mass_spectra.spectral_search_results[ms2_scan_id][ 464 precursor_mz 465 ] 466 ) 467 468 def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase: 469 """ 470 Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file. 471 472 Parameters 473 ---------- 474 load_raw : bool 475 If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True. 476 477 """ 478 # Instantiate the LCMS object 479 spectra_obj = MassSpectraBase( 480 file_location=self.file_location, 481 analyzer=self.analyzer, 482 instrument_label=self.instrument_label, 483 sample_name=self.sample_name, 484 ) 485 486 # This will populate the _ms list on the LCMS or MassSpectraBase object 487 self.run(spectra_obj, load_raw=load_raw) 488 489 return spectra_obj 490 491 def get_lcms_obj( 492 self, load_raw=True, use_original_parser=True, raw_file_path=None 493 ) -> LCMSBase: 494 """ 495 Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file. 496 497 Parameters 498 ---------- 499 load_raw : bool 500 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 501 use_original_parser : bool 502 If True, use the original parser to populate the LCMS object. Default is True. 503 raw_file_path : str 504 The location of the raw file to parse if attempting to use original parser. 505 Default is None, which attempts to get the raw file path from the HDF5 file. 506 If the original file path has moved, this parameter can be used to specify the new location. 507 """ 508 # Instantiate the LCMS object 509 lcms_obj = LCMSBase( 510 file_location=self.file_location, 511 analyzer=self.analyzer, 512 instrument_label=self.instrument_label, 513 sample_name=self.sample_name, 514 ) 515 516 # This will populate the majority of the attributes on the LCMS object 517 self.run(lcms_obj, load_raw=load_raw) 518 519 # Set final attributes of the LCMS object 520 lcms_obj.polarity = self.h5pydata.attrs["polarity"] 521 lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan) 522 lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time) 523 lcms_obj._tic_list = list(lcms_obj.scan_df.tic) 524 525 # If use_original_parser is True, instantiate the original parser and populate the LCMS object 526 if use_original_parser: 527 lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path) 528 else: 529 lcms_obj.spectra_parser_class = self.__class__ 530 531 return lcms_obj 532 533 def get_raw_file_location(self): 534 """ 535 Get the raw file location from the HDF5 file attributes. 536 537 Returns 538 ------- 539 str 540 The raw file location. 541 """ 542 if "original_file_location" in self.h5pydata.attrs: 543 return self.h5pydata.attrs["original_file_location"] 544 else: 545 return None 546 547 def add_original_parser(self, mass_spectra, raw_file_path=None): 548 """ 549 Add the original parser to the mass spectra object. 550 551 Parameters 552 ---------- 553 mass_spectra : MassSpectraBase | LCMSBase 554 The MassSpectraBase or LCMSBase object to add the original parser to. 555 raw_file_path : str 556 The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file. 557 """ 558 # Get the original parser type 559 og_parser_type = self.h5pydata.attrs["parser_type"] 560 561 # If raw_file_path is None, get it from the HDF5 file attributes 562 if raw_file_path is None: 563 raw_file_path = self.get_raw_file_location() 564 if raw_file_path is None: 565 raise ValueError( 566 "Raw file path not found in HDF5 file attributes, cannot instantiate original parser." 567 ) 568 569 # Set the raw file path on the mass_spectra object so the parser knows where to find the raw file 570 mass_spectra.raw_file_location = raw_file_path 571 572 if og_parser_type == "ImportMassSpectraThermoMSFileReader": 573 # Check that the parser can be instantiated with the raw file path 574 parser = ImportMassSpectraThermoMSFileReader(raw_file_path) 575 elif og_parser_type == "MZMLSpectraParser": 576 # Check that the parser can be instantiated with the raw file path 577 parser = MZMLSpectraParser(raw_file_path) 578 579 # Set the spectra parser class on the mass_spectra object so the spectra_parser property can be used with the original parser 580 mass_spectra.spectra_parser_class = parser.__class__ 581 582 return mass_spectra 583 584 def get_creation_time(self): 585 """ 586 Raise a NotImplemented Warning, as creation time is not available in CoreMS HDF5 files and returning None. 587 """ 588 warnings.warn( 589 "Creation time is not available in CoreMS HDF5 files, returning None." 590 "This should be accessed through the original parser.", 591 ) 592 return None 593 594 def get_instrument_info(self): 595 """ 596 Raise a NotImplemented Warning, as instrument info is not available in CoreMS HDF5 files and returning None. 597 """ 598 warnings.warn( 599 "Instrument info is not available in CoreMS HDF5 files, returning None." 600 "This should be accessed through the original parser.", 601 ) 602 return None
Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object.
Parameters
- file_location (str): The location of the HDF5 file to read, including the suffix.
Attributes
- file_location (str): The location of the HDF5 file to read.
- h5pydata (h5py.File): The HDF5 file object.
- scans (list): A list of the location of individual mass spectra within the HDF5 file.
- scan_number_list (list): A list of the scan numbers of the mass spectra within the HDF5 file.
- parameters_location (str): The location of the parameters file (json or toml).
Methods
- import_mass_spectra(mass_spectra). Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object.
- get_mass_spectrum_from_scan(scan_number). Return mass spectrum data object from scan number.
- load(). Placeholder method to meet the requirements of the SpectraParserInterface.
- run(mass_spectra). Runs the importer functions to populate a LCMS or MassSpectraBase object.
- import_scan_info(mass_spectra). Imports the scan info from the HDF5 file to populate the _scan_info attribute on the LCMS or MassSpectraBase object
- import_ms_unprocessed(mass_spectra). Imports the unprocessed mass spectra from the HDF5 file to populate the _ms_unprocessed attribute on the LCMS or MassSpectraBase object
- import_parameters(mass_spectra). Imports the parameters from the HDF5 file to populate the parameters attribute on the LCMS or MassSpectraBase object
- import_mass_features(mass_spectra). Imports the mass features from the HDF5 file to populate the mass_features attribute on the LCMS or MassSpectraBase object
- import_eics(mass_spectra). Imports the extracted ion chromatograms from the HDF5 file to populate the eics attribute on the LCMS or MassSpectraBase object
- import_spectral_search_results(mass_spectra). Imports the spectral search results from the HDF5 file to populate the spectral_search_results attribute on the LCMS or MassSpectraBase object
- get_mass_spectra_obj(). Return mass spectra data object, populating the _ms list on the LCMS or MassSpectraBase object from the HDF5 file
- get_lcms_obj(). Return LCMSBase object, populating the majority of the attributes on the LCMS object from the HDF5 file
86 def __init__(self, file_location: str): 87 Thread.__init__(self) 88 ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) 89 90 # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file 91 self.scans = [ 92 "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys()) 93 ] 94 self.scan_number_list = sorted( 95 [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())] 96 ) 97 98 # set the location of the parameters file (json or toml) 99 add_files = [ 100 x 101 for x in self.file_location.parent.glob( 102 self.file_location.name.replace(".hdf5", ".*") 103 ) 104 if x.suffix != ".hdf5" 105 ] 106 if len([x for x in add_files if x.suffix == ".json"]) > 0: 107 self.parameters_location = [x for x in add_files if x.suffix == ".json"][0] 108 elif len([x for x in add_files if x.suffix == ".toml"]) > 0: 109 self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0] 110 else: 111 self.parameters_location = None
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
113 def get_mass_spectrum_from_scan(self, scan_number): 114 """Return mass spectrum data object from scan number.""" 115 if scan_number in self.scan_number_list: 116 mass_spec = self.get_mass_spectrum(scan_number) 117 return mass_spec 118 else: 119 raise Exception("Scan number not found in HDF5 file.")
Return mass spectrum data object from scan number.
121 def get_mass_spectra_from_scan_list( 122 self, scan_list, spectrum_mode, auto_process=True 123 ): 124 """Return a list of mass spectrum data objects from a list of scan numbers. 125 126 Parameters 127 ---------- 128 scan_list : list 129 A list of scan numbers to retrieve mass spectra for. 130 spectrum_mode : str 131 The spectrum mode to use when retrieving the mass spectra. 132 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 133 centroided spectra are saved. 134 auto_process : bool 135 If True, automatically process the mass spectra when retrieving them. 136 Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only 137 centroided spectra are saved. 138 139 Returns 140 ------- 141 list 142 A list of mass spectrum data objects corresponding to the provided scan numbers. 143 """ 144 mass_spectra_list = [] 145 for scan_number in scan_list: 146 if scan_number in self.scan_number_list: 147 mass_spec = self.get_mass_spectrum_from_scan(scan_number) 148 mass_spectra_list.append(mass_spec) 149 else: 150 warnings.warn(f"Scan number {scan_number} not found in HDF5 file.") 151 return mass_spectra_list
Return a list of mass spectrum data objects from a list of scan numbers.
Parameters
- scan_list (list): A list of scan numbers to retrieve mass spectra for.
- spectrum_mode (str): The spectrum mode to use when retrieving the mass spectra. Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only centroided spectra are saved.
- auto_process (bool): If True, automatically process the mass spectra when retrieving them. Note that this parameter is not used for CoreMS HDF5 files, as the spectra are already processed and only centroided spectra are saved.
Returns
- list: A list of mass spectrum data objects corresponding to the provided scan numbers.
157 def get_ms_raw(self, spectra=None, scan_df=None) -> dict: 158 """ """ 159 # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation 160 if spectra is not None or scan_df is not None: 161 SyntaxWarning( 162 "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation." 163 ) 164 ms_unprocessed = {} 165 dict_group_load = self.h5pydata["ms_unprocessed"] 166 dict_group_keys = dict_group_load.keys() 167 for k in dict_group_keys: 168 ms_up_int = dict_group_load[k][:] 169 ms_unprocessed[int(k)] = pd.DataFrame( 170 ms_up_int, columns=["scan", "mz", "intensity"] 171 ) 172 return ms_unprocessed
174 def get_scan_df(self) -> pd.DataFrame: 175 scan_info = {} 176 dict_group_load = self.h5pydata["scan_info"] 177 dict_group_keys = dict_group_load.keys() 178 for k in dict_group_keys: 179 scan_info[k] = dict_group_load[k][:] 180 scan_df = pd.DataFrame(scan_info) 181 scan_df.set_index("scan", inplace=True, drop=False) 182 str_df = scan_df.select_dtypes([object]) 183 str_df = str_df.stack().str.decode("utf-8").unstack() 184 for col in str_df: 185 scan_df[col] = str_df[col] 186 return scan_df
Return scan data as a pandas DataFrame.
188 def run(self, mass_spectra, load_raw=True) -> None: 189 """Runs the importer functions to populate a LCMS or MassSpectraBase object. 190 191 Notes 192 ----- 193 The following functions are run in order, if the HDF5 file contains the necessary data: 194 1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object. 195 2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object. 196 3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object. 197 4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object. 198 5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object. 199 6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object. 200 7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object. 201 202 Parameters 203 ---------- 204 mass_spectra : LCMSBase or MassSpectraBase 205 The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes. 206 load_raw : bool 207 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 208 Returns 209 ------- 210 None, but populates several attributes on the LCMS or MassSpectraBase object. 211 212 """ 213 if self.parameters_location is not None: 214 # Populate the parameters attribute on the LCMS object 215 self.import_parameters(mass_spectra) 216 217 if "mass_spectra" in self.h5pydata: 218 # Populate the _ms list on the LCMS object 219 self.import_mass_spectra(mass_spectra, load_raw=load_raw) 220 221 if "scan_info" in self.h5pydata: 222 # Populate the _scan_info attribute on the LCMS object 223 self.import_scan_info(mass_spectra) 224 225 if "ms_unprocessed" in self.h5pydata and load_raw: 226 # Populate the _ms_unprocessed attribute on the LCMS object 227 self.import_ms_unprocessed(mass_spectra) 228 229 if "mass_features" in self.h5pydata: 230 # Populate the mass_features attribute on the LCMS object 231 self.import_mass_features(mass_spectra) 232 233 if "eics" in self.h5pydata: 234 # Populate the eics attribute on the LCMS object 235 self.import_eics(mass_spectra) 236 237 if "spectral_search_results" in self.h5pydata: 238 # Populate the spectral_search_results attribute on the LCMS object 239 self.import_spectral_search_results(mass_spectra)
Runs the importer functions to populate a LCMS or MassSpectraBase object.
Notes
The following functions are run in order, if the HDF5 file contains the necessary data:
- import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
- import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
- import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
- import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
- import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
- import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
- import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
Parameters
- mass_spectra (LCMSBase or MassSpectraBase): The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
- load_raw (bool): If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
Returns
- None, but populates several attributes on the LCMS or MassSpectraBase object.
241 def import_mass_spectra(self, mass_spectra, load_raw=True) -> None: 242 """Imports all mass spectra from the HDF5 file. 243 244 Parameters 245 ---------- 246 mass_spectra : LCMSBase | MassSpectraBase 247 The MassSpectraBase or LCMSBase object to populate with mass spectra. 248 load_raw : bool 249 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default 250 251 Returns 252 ------- 253 None, but populates the '_ms' list on the LCMSBase or MassSpectraBase 254 object with mass spectra from the HDF5 file. 255 """ 256 for scan_number in self.scan_number_list: 257 mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw) 258 mass_spec.scan_number = scan_number 259 mass_spectra.add_mass_spectrum(mass_spec)
Imports all mass spectra from the HDF5 file.
Parameters
- mass_spectra (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase object to populate with mass spectra.
- load_raw (bool): If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
Returns
- None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
- object with mass spectra from the HDF5 file.
261 def import_scan_info(self, mass_spectra) -> None: 262 """Imports the scan info from the HDF5 file. 263 264 Parameters 265 ---------- 266 lcms : LCMSBase | MassSpectraBase 267 The MassSpectraBase or LCMSBase objects 268 269 Returns 270 ------- 271 None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase 272 object with a pandas DataFrame of the 'scan_info' from the HDF5 file. 273 274 """ 275 scan_df = self.get_scan_df() 276 mass_spectra.scan_df = scan_df
Imports the scan info from the HDF5 file.
Parameters
- lcms (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase objects
Returns
- None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
- object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
278 def import_ms_unprocessed(self, mass_spectra) -> None: 279 """Imports the unprocessed mass spectra from the HDF5 file. 280 281 Parameters 282 ---------- 283 lcms : LCMSBase | MassSpectraBase 284 The MassSpectraBase or LCMSBase objects 285 286 Returns 287 ------- 288 None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase 289 object with a dictionary of the 'ms_unprocessed' from the HDF5 file. 290 291 """ 292 ms_unprocessed = self.get_ms_raw() 293 mass_spectra._ms_unprocessed = ms_unprocessed
Imports the unprocessed mass spectra from the HDF5 file.
Parameters
- lcms (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase objects
Returns
- None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
- object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
295 def import_parameters(self, mass_spectra) -> None: 296 """Imports the parameters from the HDF5 file. 297 298 Parameters 299 ---------- 300 mass_spectra : LCMSBase | MassSpectraBase 301 The MassSpectraBase or LCMSBase object to populate with parameters. 302 303 Returns 304 ------- 305 None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase 306 object with a dictionary of the 'parameters' from the HDF5 file. 307 308 """ 309 if ".json" == self.parameters_location.suffix: 310 load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location) 311 if ".toml" == self.parameters_location.suffix: 312 load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location) 313 else: 314 raise Exception( 315 "Parameters file must be in JSON format, TOML format is not yet supported." 316 )
Imports the parameters from the HDF5 file.
Parameters
- mass_spectra (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase object to populate with parameters.
Returns
- None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
- object with a dictionary of the 'parameters' from the HDF5 file.
318 def import_mass_features(self, mass_spectra) -> None: 319 """Imports the mass features from the HDF5 file. 320 321 Parameters 322 ---------- 323 mass_spectra : LCMSBase | MassSpectraBase 324 The MassSpectraBase or LCMSBase object to populate with mass features. 325 326 Returns 327 ------- 328 None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase 329 object with a dictionary of the 'mass_features' from the HDF5 file. 330 331 """ 332 dict_group_load = self.h5pydata["mass_features"] 333 dict_group_keys = dict_group_load.keys() 334 for k in dict_group_keys: 335 # Instantiate the MassFeature object 336 mass_feature = LCMSMassFeature( 337 mass_spectra, 338 mz=dict_group_load[k].attrs["_mz_exp"], 339 retention_time=dict_group_load[k].attrs["_retention_time"], 340 intensity=dict_group_load[k].attrs["_intensity"], 341 apex_scan=dict_group_load[k].attrs["_apex_scan"], 342 persistence=dict_group_load[k].attrs["_persistence"], 343 id=int(k), 344 ) 345 346 # Populate additional attributes on the MassFeature object 347 for key in dict_group_load[k].attrs.keys() - { 348 "_mz_exp", 349 "_mz_cal", 350 "_retention_time", 351 "_intensity", 352 "_apex_scan", 353 "_persistence", 354 }: 355 setattr(mass_feature, key, dict_group_load[k].attrs[key]) 356 357 # Populate attributes on MassFeature object that are lists 358 for key in dict_group_load[k].keys(): 359 setattr(mass_feature, key, dict_group_load[k][key][:]) 360 # Convert _noise_score from array to tuple 361 if key == "_noise_score": 362 mass_feature._noise_score = tuple(mass_feature._noise_score) 363 mass_spectra.mass_features[int(k)] = mass_feature 364 365 # Associate mass features with ms1 and ms2 spectra, if available 366 for mf_id in mass_spectra.mass_features.keys(): 367 if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys(): 368 mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[ 369 mass_spectra.mass_features[mf_id].apex_scan 370 ] 371 if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None: 372 for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers: 373 if ms2_scan in mass_spectra._ms.keys(): 374 mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = ( 375 mass_spectra._ms[ms2_scan] 376 )
Imports the mass features from the HDF5 file.
Parameters
- mass_spectra (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase object to populate with mass features.
Returns
- None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
- object with a dictionary of the 'mass_features' from the HDF5 file.
378 def import_eics(self, mass_spectra): 379 """Imports the extracted ion chromatograms from the HDF5 file. 380 381 Parameters 382 ---------- 383 mass_spectra : LCMSBase | MassSpectraBase 384 The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms. 385 386 Returns 387 ------- 388 None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase 389 object with a dictionary of the 'eics' from the HDF5 file. 390 391 """ 392 dict_group_load = self.h5pydata["eics"] 393 dict_group_keys = dict_group_load.keys() 394 for k in dict_group_keys: 395 my_eic = EIC_Data( 396 scans=dict_group_load[k]["scans"][:], 397 time=dict_group_load[k]["time"][:], 398 eic=dict_group_load[k]["eic"][:], 399 ) 400 for key in dict_group_load[k].keys(): 401 if key not in ["scans", "time", "eic"]: 402 setattr(my_eic, key, dict_group_load[k][key][:]) 403 # if key is apexes, convert to a tuple of a list 404 if key == "apexes" and len(my_eic.apexes) > 0: 405 my_eic.apexes = [tuple(x) for x in my_eic.apexes] 406 # Add to mass_spectra object 407 mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic 408 409 # Add to mass features 410 for idx in mass_spectra.mass_features.keys(): 411 mz = mass_spectra.mass_features[idx].mz 412 if mz in mass_spectra.eics.keys(): 413 mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz]
Imports the extracted ion chromatograms from the HDF5 file.
Parameters
- mass_spectra (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
Returns
- None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
- object with a dictionary of the 'eics' from the HDF5 file.
415 def import_spectral_search_results(self, mass_spectra): 416 """Imports the spectral search results from the HDF5 file. 417 418 Parameters 419 ---------- 420 mass_spectra : LCMSBase | MassSpectraBase 421 The MassSpectraBase or LCMSBase object to populate with spectral search results. 422 423 Returns 424 ------- 425 None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase 426 object with a dictionary of the 'spectral_search_results' from the HDF5 file. 427 428 """ 429 overall_results_dict = {} 430 ms2_results_load = self.h5pydata["spectral_search_results"] 431 for k in ms2_results_load.keys(): 432 overall_results_dict[int(k)] = {} 433 for k2 in ms2_results_load[k].keys(): 434 ms2_search_res = SpectrumSearchResults( 435 query_spectrum=mass_spectra._ms[int(k)], 436 precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"], 437 spectral_similarity_search_results={}, 438 ) 439 440 for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}: 441 setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:])) 442 overall_results_dict[int(k)][ 443 ms2_results_load[k][k2].attrs["precursor_mz"] 444 ] = ms2_search_res 445 446 # add to mass_spectra 447 mass_spectra.spectral_search_results.update(overall_results_dict) 448 449 # If there are mass features, associate the results with each mass feature 450 if len(mass_spectra.mass_features) > 0: 451 for mass_feature_id, mass_feature in mass_spectra.mass_features.items(): 452 scan_ids = mass_feature.ms2_scan_numbers 453 for ms2_scan_id in scan_ids: 454 precursor_mz = mass_feature.mz 455 try: 456 mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz] 457 except KeyError: 458 pass 459 else: 460 mass_spectra.mass_features[ 461 mass_feature_id 462 ].ms2_similarity_results.append( 463 mass_spectra.spectral_search_results[ms2_scan_id][ 464 precursor_mz 465 ] 466 )
Imports the spectral search results from the HDF5 file.
Parameters
- mass_spectra (LCMSBase | MassSpectraBase): The MassSpectraBase or LCMSBase object to populate with spectral search results.
Returns
- None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
- object with a dictionary of the 'spectral_search_results' from the HDF5 file.
468 def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase: 469 """ 470 Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file. 471 472 Parameters 473 ---------- 474 load_raw : bool 475 If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True. 476 477 """ 478 # Instantiate the LCMS object 479 spectra_obj = MassSpectraBase( 480 file_location=self.file_location, 481 analyzer=self.analyzer, 482 instrument_label=self.instrument_label, 483 sample_name=self.sample_name, 484 ) 485 486 # This will populate the _ms list on the LCMS or MassSpectraBase object 487 self.run(spectra_obj, load_raw=load_raw) 488 489 return spectra_obj
Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.
Parameters
- load_raw (bool): If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
491 def get_lcms_obj( 492 self, load_raw=True, use_original_parser=True, raw_file_path=None 493 ) -> LCMSBase: 494 """ 495 Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file. 496 497 Parameters 498 ---------- 499 load_raw : bool 500 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. 501 use_original_parser : bool 502 If True, use the original parser to populate the LCMS object. Default is True. 503 raw_file_path : str 504 The location of the raw file to parse if attempting to use original parser. 505 Default is None, which attempts to get the raw file path from the HDF5 file. 506 If the original file path has moved, this parameter can be used to specify the new location. 507 """ 508 # Instantiate the LCMS object 509 lcms_obj = LCMSBase( 510 file_location=self.file_location, 511 analyzer=self.analyzer, 512 instrument_label=self.instrument_label, 513 sample_name=self.sample_name, 514 ) 515 516 # This will populate the majority of the attributes on the LCMS object 517 self.run(lcms_obj, load_raw=load_raw) 518 519 # Set final attributes of the LCMS object 520 lcms_obj.polarity = self.h5pydata.attrs["polarity"] 521 lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan) 522 lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time) 523 lcms_obj._tic_list = list(lcms_obj.scan_df.tic) 524 525 # If use_original_parser is True, instantiate the original parser and populate the LCMS object 526 if use_original_parser: 527 lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path) 528 else: 529 lcms_obj.spectra_parser_class = self.__class__ 530 531 return lcms_obj
Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.
Parameters
- load_raw (bool): If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
- use_original_parser (bool): If True, use the original parser to populate the LCMS object. Default is True.
- raw_file_path (str): The location of the raw file to parse if attempting to use original parser. Default is None, which attempts to get the raw file path from the HDF5 file. If the original file path has moved, this parameter can be used to specify the new location.
533 def get_raw_file_location(self): 534 """ 535 Get the raw file location from the HDF5 file attributes. 536 537 Returns 538 ------- 539 str 540 The raw file location. 541 """ 542 if "original_file_location" in self.h5pydata.attrs: 543 return self.h5pydata.attrs["original_file_location"] 544 else: 545 return None
Get the raw file location from the HDF5 file attributes.
Returns
- str: The raw file location.
547 def add_original_parser(self, mass_spectra, raw_file_path=None): 548 """ 549 Add the original parser to the mass spectra object. 550 551 Parameters 552 ---------- 553 mass_spectra : MassSpectraBase | LCMSBase 554 The MassSpectraBase or LCMSBase object to add the original parser to. 555 raw_file_path : str 556 The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file. 557 """ 558 # Get the original parser type 559 og_parser_type = self.h5pydata.attrs["parser_type"] 560 561 # If raw_file_path is None, get it from the HDF5 file attributes 562 if raw_file_path is None: 563 raw_file_path = self.get_raw_file_location() 564 if raw_file_path is None: 565 raise ValueError( 566 "Raw file path not found in HDF5 file attributes, cannot instantiate original parser." 567 ) 568 569 # Set the raw file path on the mass_spectra object so the parser knows where to find the raw file 570 mass_spectra.raw_file_location = raw_file_path 571 572 if og_parser_type == "ImportMassSpectraThermoMSFileReader": 573 # Check that the parser can be instantiated with the raw file path 574 parser = ImportMassSpectraThermoMSFileReader(raw_file_path) 575 elif og_parser_type == "MZMLSpectraParser": 576 # Check that the parser can be instantiated with the raw file path 577 parser = MZMLSpectraParser(raw_file_path) 578 579 # Set the spectra parser class on the mass_spectra object so the spectra_parser property can be used with the original parser 580 mass_spectra.spectra_parser_class = parser.__class__ 581 582 return mass_spectra
Add the original parser to the mass spectra object.
Parameters
- mass_spectra (MassSpectraBase | LCMSBase): The MassSpectraBase or LCMSBase object to add the original parser to.
- raw_file_path (str): The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
584 def get_creation_time(self): 585 """ 586 Raise a NotImplemented Warning, as creation time is not available in CoreMS HDF5 files and returning None. 587 """ 588 warnings.warn( 589 "Creation time is not available in CoreMS HDF5 files, returning None." 590 "This should be accessed through the original parser.", 591 ) 592 return None
Raise a NotImplemented Warning, as creation time is not available in CoreMS HDF5 files and returning None.
594 def get_instrument_info(self): 595 """ 596 Raise a NotImplemented Warning, as instrument info is not available in CoreMS HDF5 files and returning None. 597 """ 598 warnings.warn( 599 "Instrument info is not available in CoreMS HDF5 files, returning None." 600 "This should be accessed through the original parser.", 601 ) 602 return None
Raise a NotImplemented Warning, as instrument info is not available in CoreMS HDF5 files and returning None.
Inherited Members
- corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum
- h5pydata
- load_raw_data
- get_mass_spectrum
- load_settings
- get_dataframe
- get_time_index_to_pull
- get_high_level_attr_data
- get_scan_group_attr_data
- get_raw_data_attr_data
- get_output_parameters
- corems.mass_spectrum.input.baseClass.MassListBaseClass
- file_location
- header_lines
- isCentroid
- isThermoProfile
- headerless
- analyzer
- instrument_label
- sample_name
- parameters
- set_parameter_from_toml
- set_parameter_from_json
- data_type
- delimiter
- encoding_detector
- set_data_type
- clean_data_frame
- check_columns
- read_xml_peaks
- get_xml_polarity
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id