corems.mass_spectrum.output.export
1__author__ = "Yuri E. Corilo" 2__date__ = "Nov 11, 2019" 3 4import json 5from datetime import datetime, timezone 6from pathlib import Path 7from threading import Thread 8 9import h5py 10import toml 11from numpy import NaN, empty 12from pandas import DataFrame 13 14from corems.encapsulation.constant import Atoms, Labels #Labels is accessed in the eval() function 15from corems.encapsulation.output import parameter_to_dict 16from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecfromFreq 17 18 19class HighResMassSpecExport(Thread): 20 """A class for exporting high-resolution mass spectra. 21 22 Parameters 23 ---------- 24 out_file_path : str 25 The output file path. 26 mass_spectrum : MassSpectrum 27 The mass spectrum to export. 28 output_type : str, optional 29 The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'. 30 31 Attributes 32 ---------- 33 output_file : Path 34 The output file path. 35 output_type : str 36 The type of output file. 37 mass_spectrum : MassSpectrum 38 The mass spectrum to export. 39 atoms_order_list : list 40 The list of assigned atoms in the order specified by Atoms.atoms_order list. 41 columns_label : list 42 The column labels in order. 43 44 Methods 45 ------- 46 * save(). 47 Save the mass spectrum data to the output file. 48 * run(). 49 Run the export process. 50 * get_pandas_df(). 51 Returns the mass spectrum data as a pandas DataFrame. 52 * write_settings(output_path, mass_spectrum). 53 Writes the settings of the mass spectrum to a JSON file. 54 * to_pandas(write_metadata=True). 55 Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file. 56 * to_excel(write_metadata=True). 57 Exports the mass spectrum data to an Excel file. 58 * to_csv(write_metadata=True). 59 Exports the mass spectrum data to a CSV file. 60 * to_json(). 61 Exports the mass spectrum data to a JSON string. 62 * to_hdf(). 63 Exports the mass spectrum data to an HDF5 file. 64 * parameters_to_toml(). 65 Converts the mass spectrum parameters to a TOML string. 66 * parameters_to_json(). 67 Converts the mass spectrum parameters to a JSON string. 68 * get_mass_spec_attrs(mass_spectrum). 69 Returns the mass spectrum attributes as a dictionary. 70 * get_all_used_atoms_in_order(mass_spectrum). 71 Returns the list of assigned atoms in the order specified by Atoms.atoms_order list. 72 * list_dict_to_list(mass_spectrum, is_hdf5=False). 73 Returns the mass spectrum data as a list of dictionaries. 74 * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False). 75 Returns the mass spectrum data as a list of dictionaries. 76 77 """ 78 79 def __init__(self, out_file_path, mass_spectrum, output_type="excel"): 80 Thread.__init__(self) 81 82 self.output_file = Path(out_file_path) 83 84 # 'excel', 'csv' or 'pandas' 85 self.output_type = output_type 86 87 self.mass_spectrum = mass_spectrum 88 89 # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list 90 self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum) 91 92 self._init_columns() 93 94 def _init_columns(self): 95 """Initialize the columns for the mass spectrum output.""" 96 # column labels in order 97 self.columns_label = [ 98 "Index", 99 "m/z", 100 "Calibrated m/z", 101 "Calculated m/z", 102 "Peak Height", 103 "Peak Area", 104 "Resolving Power", 105 "S/N", 106 "Ion Charge", 107 "m/z Error (ppm)", 108 "m/z Error Score", 109 "Isotopologue Similarity", 110 "Confidence Score", 111 "DBE", 112 "O/C", 113 "H/C", 114 "Heteroatom Class", 115 "Ion Type", 116 "Adduct", 117 "Is Isotopologue", 118 "Mono Isotopic Index", 119 "Molecular Formula", 120 ] 121 122 @property 123 def output_type(self): 124 """Returns the output type of the mass spectrum.""" 125 return self._output_type 126 127 @output_type.setter 128 def output_type(self, output_type): 129 output_types = ["excel", "csv", "pandas", "hdf5"] 130 if output_type in output_types: 131 self._output_type = output_type 132 else: 133 raise TypeError( 134 'Supported types are "excel", "csv" or "pandas", %s entered' 135 % output_type 136 ) 137 138 def save(self): 139 """Save the mass spectrum data to the output file. 140 141 Raises 142 ------ 143 ValueError 144 If the output type is not supported. 145 """ 146 147 if self.output_type == "excel": 148 self.to_excel() 149 elif self.output_type == "csv": 150 self.to_csv() 151 elif self.output_type == "pandas": 152 self.to_pandas() 153 elif self.output_type == "hdf5": 154 self.to_hdf() 155 else: 156 raise ValueError( 157 "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" 158 % self.output_type 159 ) 160 161 def run(self): 162 """Run the export process. 163 164 This method is called when the thread starts. 165 It calls the save method to perform the export.""" 166 self.save() 167 168 def get_pandas_df(self, additional_columns=None): 169 """Returns the mass spectrum data as a pandas DataFrame. 170 171 Parameters 172 ---------- 173 additional_columns : list, optional 174 Additional columns to include in the DataFrame. Defaults to None. 175 Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'. 176 177 Returns 178 ------- 179 DataFrame 180 The mass spectrum data as a pandas DataFrame. 181 """ 182 if additional_columns is not None: 183 possible_additional_columns = [ 184 "Aromaticity Index", 185 "NOSC", 186 "Aromaticity Index (modified)", 187 ] 188 if additional_columns: 189 for column in additional_columns: 190 if column not in possible_additional_columns: 191 raise ValueError("Invalid additional column: %s" % column) 192 columns = ( 193 self.columns_label 194 + additional_columns 195 + self.get_all_used_atoms_in_order(self.mass_spectrum) 196 ) 197 else: 198 columns = self.columns_label + self.get_all_used_atoms_in_order( 199 self.mass_spectrum 200 ) 201 dict_data_list = self.get_list_dict_data( 202 self.mass_spectrum, additional_columns=additional_columns 203 ) 204 df = DataFrame(dict_data_list, columns=columns) 205 df.name = self.output_file 206 return df 207 208 def write_settings(self, output_path, mass_spectrum): 209 """Writes the settings of the mass spectrum to a JSON file. 210 211 Parameters 212 ---------- 213 output_path : str 214 The output file path. 215 mass_spectrum : MassSpectrum 216 The mass spectrum to export. 217 """ 218 219 import json 220 221 dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum) 222 223 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum) 224 dict_setting["analyzer"] = mass_spectrum.analyzer 225 dict_setting["instrument_label"] = mass_spectrum.instrument_label 226 dict_setting["sample_name"] = mass_spectrum.sample_name 227 228 with open( 229 output_path.with_suffix(".json"), 230 "w", 231 encoding="utf8", 232 ) as outfile: 233 output = json.dumps( 234 dict_setting, sort_keys=True, indent=4, separators=(",", ": ") 235 ) 236 outfile.write(output) 237 238 def to_pandas(self, write_metadata=True): 239 """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file. 240 241 Parameters 242 ---------- 243 write_metadata : bool, optional 244 Whether to write the metadata to a JSON file. Defaults to True. 245 """ 246 247 columns = self.columns_label + self.get_all_used_atoms_in_order( 248 self.mass_spectrum 249 ) 250 251 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 252 253 df = DataFrame(dict_data_list, columns=columns) 254 255 df.to_pickle(self.output_file.with_suffix(".pkl")) 256 257 if write_metadata: 258 self.write_settings(self.output_file, self.mass_spectrum) 259 260 def to_excel(self, write_metadata=True): 261 """Exports the mass spectrum data to an Excel file. 262 263 Parameters 264 ---------- 265 write_metadata : bool, optional 266 Whether to write the metadata to a JSON file. Defaults to True. 267 """ 268 269 columns = self.columns_label + self.get_all_used_atoms_in_order( 270 self.mass_spectrum 271 ) 272 273 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 274 275 df = DataFrame(dict_data_list, columns=columns) 276 277 df.to_excel(self.output_file.with_suffix(".xlsx")) 278 279 if write_metadata: 280 self.write_settings(self.output_file, self.mass_spectrum) 281 282 def to_csv(self, write_metadata=True): 283 """Exports the mass spectrum data to a CSV file. 284 285 Parameters 286 ---------- 287 write_metadata : bool, optional 288 Whether to write the metadata to a JSON file. Defaults to True. 289 """ 290 291 columns = self.columns_label + self.get_all_used_atoms_in_order( 292 self.mass_spectrum 293 ) 294 295 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 296 297 import csv 298 299 try: 300 with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile: 301 writer = csv.DictWriter(csvfile, fieldnames=columns) 302 writer.writeheader() 303 for data in dict_data_list: 304 writer.writerow(data) 305 if write_metadata: 306 self.write_settings(self.output_file, self.mass_spectrum) 307 308 except IOError as ioerror: 309 print(ioerror) 310 311 def to_json(self): 312 """Exports the mass spectrum data to a JSON string.""" 313 314 columns = self.columns_label + self.get_all_used_atoms_in_order( 315 self.mass_spectrum 316 ) 317 318 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 319 320 df = DataFrame(dict_data_list, columns=columns) 321 322 # for key, values in dict_data.items(): 323 # if not values: dict_data[key] = NaN 324 325 # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': ')) 326 return df.to_json(orient="records") 327 328 def add_mass_spectrum_to_hdf5( 329 self, 330 hdf_handle, 331 mass_spectrum, 332 group_key, 333 mass_spectra_group=None, 334 export_raw=True, 335 ): 336 """Adds the mass spectrum data to an HDF5 file. 337 338 Parameters 339 ---------- 340 hdf_handle : h5py.File 341 The HDF5 file handle. 342 mass_spectrum : MassSpectrum 343 The mass spectrum to add to the HDF5 file. 344 group_key : str 345 The group key (where to add the mass spectrum data within the HDF5 file). 346 mass_spectra_group : h5py.Group, optional 347 The mass spectra group. Defaults to None (no group, mass spectrum is added to the root). 348 export_raw : bool, optional 349 Whether to export the raw data. Defaults to True. 350 If False, only the processed data (peaks) is exported (essentially centroided data). 351 """ 352 if mass_spectra_group is None: 353 # Check if the file has the necessary attributes and add them if not 354 # This assumes that if there is a mass_spectra_group, these attributes were already added to the file 355 if not hdf_handle.attrs.get("date_utc"): 356 timenow = str( 357 datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z") 358 ) 359 hdf_handle.attrs["date_utc"] = timenow 360 hdf_handle.attrs["file_name"] = mass_spectrum.filename.name 361 hdf_handle.attrs["data_structure"] = "mass_spectrum" 362 hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer 363 hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label 364 hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name 365 366 list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True) 367 368 dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum) 369 370 setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum) 371 372 columns_labels = json.dumps( 373 self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum), 374 sort_keys=False, 375 indent=4, 376 separators=(",", ": "), 377 ) 378 379 group_key = group_key 380 381 if mass_spectra_group is not None: 382 hdf_handle = mass_spectra_group 383 384 if group_key not in hdf_handle.keys(): 385 scan_group = hdf_handle.create_group(group_key) 386 387 # If there is raw data (from profile data) save it 388 if not mass_spectrum.is_centroid and export_raw: 389 mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile))) 390 391 mz_abun_array[0] = mass_spectrum.abundance_profile 392 mz_abun_array[1] = mass_spectrum.mz_exp_profile 393 394 raw_ms_dataset = scan_group.create_dataset( 395 "raw_ms", data=mz_abun_array, dtype="f8" 396 ) 397 398 else: 399 # create empy dataset for missing raw data 400 raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8") 401 402 raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs) 403 404 if isinstance(mass_spectrum, MassSpecfromFreq): 405 raw_ms_dataset.attrs["TransientSetting"] = json.dumps( 406 setting_dicts.get("TransientSetting"), 407 sort_keys=False, 408 indent=4, 409 separators=(",", ": "), 410 ) 411 412 else: 413 scan_group = hdf_handle.get(group_key) 414 415 # if there is not processed data len = 0, otherwise len() will return next index 416 index_processed_data = str(len(scan_group.keys())) 417 418 timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")) 419 420 processed_dset = scan_group.create_dataset( 421 index_processed_data, data=list_results 422 ) 423 424 processed_dset.attrs["date_utc"] = timenow 425 426 processed_dset.attrs["ColumnsLabels"] = columns_labels 427 428 processed_dset.attrs["MoleculaSearchSetting"] = json.dumps( 429 setting_dicts.get("MoleculaSearch"), 430 sort_keys=False, 431 indent=4, 432 separators=(",", ": "), 433 ) 434 435 processed_dset.attrs["MassSpecPeakSetting"] = json.dumps( 436 setting_dicts.get("MassSpecPeak"), 437 sort_keys=False, 438 indent=4, 439 separators=(",", ": "), 440 ) 441 442 processed_dset.attrs["MassSpectrumSetting"] = json.dumps( 443 setting_dicts.get("MassSpectrum"), 444 sort_keys=False, 445 indent=4, 446 separators=(",", ": "), 447 ) 448 449 def to_hdf(self): 450 """Exports the mass spectrum data to an HDF5 file.""" 451 452 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: 453 self.add_mass_spectrum_to_hdf5( 454 hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number) 455 ) 456 457 def parameters_to_toml(self): 458 """Converts the mass spectrum parameters to a TOML string. 459 460 Returns 461 ------- 462 str 463 The TOML string of the mass spectrum parameters. 464 """ 465 466 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 467 468 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 469 dict_setting["analyzer"] = self.mass_spectrum.analyzer 470 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 471 dict_setting["sample_name"] = self.mass_spectrum.sample_name 472 473 output = toml.dumps(dict_setting) 474 475 return output 476 477 def parameters_to_json(self): 478 """Converts the mass spectrum parameters to a JSON string. 479 480 Returns 481 ------- 482 str 483 The JSON string of the mass spectrum parameters. 484 """ 485 486 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 487 488 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 489 dict_setting["analyzer"] = self.mass_spectrum.analyzer 490 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 491 dict_setting["sample_name"] = self.mass_spectrum.sample_name 492 493 output = json.dumps(dict_setting) 494 495 return output 496 497 def get_mass_spec_attrs(self, mass_spectrum): 498 """Returns the mass spectrum attributes as a dictionary. 499 500 Parameters 501 ---------- 502 mass_spectrum : MassSpectrum 503 The mass spectrum to export. 504 505 Returns 506 ------- 507 dict 508 The mass spectrum attributes. 509 """ 510 511 dict_ms_attrs = {} 512 dict_ms_attrs["polarity"] = mass_spectrum.polarity 513 dict_ms_attrs["rt"] = mass_spectrum.retention_time 514 dict_ms_attrs["tic"] = mass_spectrum.tic 515 dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan 516 dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt 517 dict_ms_attrs["Aterm"] = mass_spectrum.Aterm 518 dict_ms_attrs["Bterm"] = mass_spectrum.Bterm 519 dict_ms_attrs["Cterm"] = mass_spectrum.Cterm 520 dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise 521 dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std 522 523 return dict_ms_attrs 524 525 def get_all_used_atoms_in_order(self, mass_spectrum): 526 """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list. 527 528 Parameters 529 ---------- 530 mass_spectrum : MassSpectrum 531 The mass spectrum to export. 532 533 Returns 534 ------- 535 list 536 The list of assigned atoms in the order specified by Atoms.atoms_order list. 537 """ 538 539 atoms_in_order = Atoms.atoms_order 540 all_used_atoms = set() 541 if mass_spectrum: 542 for ms_peak in mass_spectrum: 543 if ms_peak: 544 for m_formula in ms_peak: 545 for atom in m_formula.atoms: 546 all_used_atoms.add(atom) 547 548 def sort_method(atom): 549 return [atoms_in_order.index(atom)] 550 551 return sorted(all_used_atoms, key=sort_method) 552 553 def list_dict_to_list(self, mass_spectrum, is_hdf5=False): 554 """Returns the mass spectrum data as a list of dictionaries. 555 556 Parameters 557 ---------- 558 mass_spectrum : MassSpectrum 559 The mass spectrum to export. 560 is_hdf5 : bool, optional 561 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 562 563 Returns 564 ------- 565 list 566 The mass spectrum data as a list of dictionaries. 567 """ 568 569 column_labels = self.columns_label + self.get_all_used_atoms_in_order( 570 mass_spectrum 571 ) 572 573 dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5) 574 575 all_lines = [] 576 for dict_res in dict_list: 577 result_line = [NaN] * len(column_labels) 578 579 for label, value in dict_res.items(): 580 label_index = column_labels.index(label) 581 result_line[label_index] = value 582 583 all_lines.append(result_line) 584 585 return all_lines 586 587 def get_list_dict_data( 588 self, 589 mass_spectrum, 590 include_no_match=True, 591 include_isotopologues=True, 592 isotopologue_inline=True, 593 no_match_inline=False, 594 is_hdf5=False, 595 additional_columns=None, 596 ): 597 """Returns the mass spectrum data as a list of dictionaries. 598 599 Parameters 600 ---------- 601 mass_spectrum : MassSpectrum 602 The mass spectrum to export. 603 include_no_match : bool, optional 604 Whether to include unassigned (no match) data. Defaults to True. 605 include_isotopologues : bool, optional 606 Whether to include isotopologues. Defaults to True. 607 isotopologue_inline : bool, optional 608 Whether to include isotopologues inline. Defaults to True. 609 no_match_inline : bool, optional 610 Whether to include unassigned (no match) data inline. Defaults to False. 611 is_hdf5 : bool, optional 612 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 613 614 Returns 615 ------- 616 list 617 The mass spectrum data as a list of dictionaries. 618 """ 619 620 dict_data_list = [] 621 622 if is_hdf5: 623 encode = ".encode('utf-8')" 624 else: 625 encode = "" 626 627 def add_no_match_dict_data(index, ms_peak): 628 """ 629 Export dictionary of mspeak info for unassigned (no match) data 630 """ 631 dict_result = { 632 "Index": index, 633 "m/z": ms_peak._mz_exp, 634 "Calibrated m/z": ms_peak.mz_exp, 635 "Peak Height": ms_peak.abundance, 636 "Peak Area": ms_peak.area, 637 "Resolving Power": ms_peak.resolving_power, 638 "S/N": ms_peak.signal_to_noise, 639 "Ion Charge": ms_peak.ion_charge, 640 "Heteroatom Class": eval("Labels.unassigned{}".format(encode)), 641 } 642 643 dict_data_list.append(dict_result) 644 645 def add_match_dict_data(index, ms_peak, mformula, additional_columns=None): 646 """ 647 Export dictionary of mspeak info for assigned (match) data 648 """ 649 formula_dict = mformula.to_dict() 650 651 dict_result = { 652 "Index": index, 653 "m/z": ms_peak._mz_exp, 654 "Calibrated m/z": ms_peak.mz_exp, 655 "Calculated m/z": mformula.mz_calc, 656 "Peak Height": ms_peak.abundance, 657 "Peak Area": ms_peak.area, 658 "Resolving Power": ms_peak.resolving_power, 659 "S/N": ms_peak.signal_to_noise, 660 "Ion Charge": ms_peak.ion_charge, 661 "m/z Error (ppm)": mformula.mz_error, 662 "Confidence Score": mformula.confidence_score, 663 "Isotopologue Similarity": mformula.isotopologue_similarity, 664 "m/z Error Score": mformula.average_mz_error_score, 665 "DBE": mformula.dbe, 666 "Heteroatom Class": eval("mformula.class_label{}".format(encode)), 667 "H/C": mformula.H_C, 668 "O/C": mformula.O_C, 669 "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)), 670 "Is Isotopologue": int(mformula.is_isotopologue), 671 "Molecular Formula": eval("mformula.string{}".format(encode)), 672 } 673 if additional_columns is not None: 674 possible_dict = { 675 "Aromaticity Index": mformula.A_I, 676 "NOSC": mformula.nosc, 677 "Aromaticity Index (modified)": mformula.A_I_mod, 678 } 679 for column in additional_columns: 680 dict_result[column] = possible_dict.get(column) 681 682 if mformula.adduct_atom: 683 dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode)) 684 685 if mformula.is_isotopologue: 686 dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic 687 688 if self.atoms_order_list is None: 689 atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum) 690 else: 691 atoms_order_list = self.atoms_order_list 692 693 for atom in atoms_order_list: 694 if atom in formula_dict.keys(): 695 dict_result[atom] = formula_dict.get(atom) 696 697 dict_data_list.append(dict_result) 698 699 score_methods = mass_spectrum.molecular_search_settings.score_methods 700 selected_score_method = ( 701 mass_spectrum.molecular_search_settings.output_score_method 702 ) 703 704 if selected_score_method in score_methods: 705 # temp set score method as the one chosen in the output 706 current_method = mass_spectrum.molecular_search_settings.score_method 707 mass_spectrum.molecular_search_settings.score_method = selected_score_method 708 709 for index, ms_peak in enumerate(mass_spectrum): 710 # print(ms_peak.mz_exp) 711 712 if ms_peak: 713 m_formula = ms_peak.best_molecular_formula_candidate 714 715 if m_formula: 716 if not m_formula.is_isotopologue: 717 add_match_dict_data( 718 index, 719 ms_peak, 720 m_formula, 721 additional_columns=additional_columns, 722 ) 723 724 for ( 725 iso_mspeak_index, 726 iso_mf_formula, 727 ) in m_formula.mspeak_mf_isotopologues_indexes: 728 iso_ms_peak = mass_spectrum[iso_mspeak_index] 729 add_match_dict_data( 730 iso_mspeak_index, 731 iso_ms_peak, 732 iso_mf_formula, 733 additional_columns=additional_columns, 734 ) 735 else: 736 if include_no_match and no_match_inline: 737 add_no_match_dict_data(index, ms_peak) 738 739 if include_no_match and not no_match_inline: 740 for index, ms_peak in enumerate(mass_spectrum): 741 if not ms_peak: 742 add_no_match_dict_data(index, ms_peak) 743 # reset score method as the one chosen in the output 744 mass_spectrum.molecular_search_settings.score_method = current_method 745 746 else: 747 for index, ms_peak in enumerate(mass_spectrum): 748 # check if there is a molecular formula candidate for the msPeak 749 750 if ms_peak: 751 # m_formula = ms_peak.molecular_formula_lowest_error 752 for m_formula in ms_peak: 753 if mass_spectrum.molecular_search_settings.output_min_score > 0: 754 if ( 755 m_formula.confidence_score 756 >= mass_spectrum.molecular_search_settings.output_min_score 757 ): 758 if m_formula.is_isotopologue: # isotopologues inline 759 if include_isotopologues and isotopologue_inline: 760 add_match_dict_data( 761 index, 762 ms_peak, 763 m_formula, 764 additional_columns=additional_columns, 765 ) 766 else: 767 add_match_dict_data( 768 index, 769 ms_peak, 770 m_formula, 771 additional_columns=additional_columns, 772 ) # add monoisotopic peak 773 774 # cutoff because of low score 775 else: 776 add_no_match_dict_data(index, ms_peak) 777 778 else: 779 if m_formula.is_isotopologue: # isotopologues inline 780 if include_isotopologues and isotopologue_inline: 781 add_match_dict_data( 782 index, 783 ms_peak, 784 m_formula, 785 additional_columns=additional_columns, 786 ) 787 else: 788 add_match_dict_data( 789 index, 790 ms_peak, 791 m_formula, 792 additional_columns=additional_columns, 793 ) # add monoisotopic peak 794 else: 795 # include not_match 796 if include_no_match and no_match_inline: 797 add_no_match_dict_data(index, ms_peak) 798 799 if include_isotopologues and not isotopologue_inline: 800 for index, ms_peak in enumerate(mass_spectrum): 801 for m_formula in ms_peak: 802 if m_formula.is_isotopologue: 803 if ( 804 m_formula.confidence_score 805 >= mass_spectrum.molecular_search_settings.output_min_score 806 ): 807 add_match_dict_data( 808 index, 809 ms_peak, 810 m_formula, 811 additional_columns=additional_columns, 812 ) 813 814 if include_no_match and not no_match_inline: 815 for index, ms_peak in enumerate(mass_spectrum): 816 if not ms_peak: 817 add_no_match_dict_data(index, ms_peak) 818 819 # remove duplicated add_match data possibly introduced on the output_score_filter step 820 res = [] 821 [res.append(x) for x in dict_data_list if x not in res] 822 823 return res
20class HighResMassSpecExport(Thread): 21 """A class for exporting high-resolution mass spectra. 22 23 Parameters 24 ---------- 25 out_file_path : str 26 The output file path. 27 mass_spectrum : MassSpectrum 28 The mass spectrum to export. 29 output_type : str, optional 30 The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'. 31 32 Attributes 33 ---------- 34 output_file : Path 35 The output file path. 36 output_type : str 37 The type of output file. 38 mass_spectrum : MassSpectrum 39 The mass spectrum to export. 40 atoms_order_list : list 41 The list of assigned atoms in the order specified by Atoms.atoms_order list. 42 columns_label : list 43 The column labels in order. 44 45 Methods 46 ------- 47 * save(). 48 Save the mass spectrum data to the output file. 49 * run(). 50 Run the export process. 51 * get_pandas_df(). 52 Returns the mass spectrum data as a pandas DataFrame. 53 * write_settings(output_path, mass_spectrum). 54 Writes the settings of the mass spectrum to a JSON file. 55 * to_pandas(write_metadata=True). 56 Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file. 57 * to_excel(write_metadata=True). 58 Exports the mass spectrum data to an Excel file. 59 * to_csv(write_metadata=True). 60 Exports the mass spectrum data to a CSV file. 61 * to_json(). 62 Exports the mass spectrum data to a JSON string. 63 * to_hdf(). 64 Exports the mass spectrum data to an HDF5 file. 65 * parameters_to_toml(). 66 Converts the mass spectrum parameters to a TOML string. 67 * parameters_to_json(). 68 Converts the mass spectrum parameters to a JSON string. 69 * get_mass_spec_attrs(mass_spectrum). 70 Returns the mass spectrum attributes as a dictionary. 71 * get_all_used_atoms_in_order(mass_spectrum). 72 Returns the list of assigned atoms in the order specified by Atoms.atoms_order list. 73 * list_dict_to_list(mass_spectrum, is_hdf5=False). 74 Returns the mass spectrum data as a list of dictionaries. 75 * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False). 76 Returns the mass spectrum data as a list of dictionaries. 77 78 """ 79 80 def __init__(self, out_file_path, mass_spectrum, output_type="excel"): 81 Thread.__init__(self) 82 83 self.output_file = Path(out_file_path) 84 85 # 'excel', 'csv' or 'pandas' 86 self.output_type = output_type 87 88 self.mass_spectrum = mass_spectrum 89 90 # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list 91 self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum) 92 93 self._init_columns() 94 95 def _init_columns(self): 96 """Initialize the columns for the mass spectrum output.""" 97 # column labels in order 98 self.columns_label = [ 99 "Index", 100 "m/z", 101 "Calibrated m/z", 102 "Calculated m/z", 103 "Peak Height", 104 "Peak Area", 105 "Resolving Power", 106 "S/N", 107 "Ion Charge", 108 "m/z Error (ppm)", 109 "m/z Error Score", 110 "Isotopologue Similarity", 111 "Confidence Score", 112 "DBE", 113 "O/C", 114 "H/C", 115 "Heteroatom Class", 116 "Ion Type", 117 "Adduct", 118 "Is Isotopologue", 119 "Mono Isotopic Index", 120 "Molecular Formula", 121 ] 122 123 @property 124 def output_type(self): 125 """Returns the output type of the mass spectrum.""" 126 return self._output_type 127 128 @output_type.setter 129 def output_type(self, output_type): 130 output_types = ["excel", "csv", "pandas", "hdf5"] 131 if output_type in output_types: 132 self._output_type = output_type 133 else: 134 raise TypeError( 135 'Supported types are "excel", "csv" or "pandas", %s entered' 136 % output_type 137 ) 138 139 def save(self): 140 """Save the mass spectrum data to the output file. 141 142 Raises 143 ------ 144 ValueError 145 If the output type is not supported. 146 """ 147 148 if self.output_type == "excel": 149 self.to_excel() 150 elif self.output_type == "csv": 151 self.to_csv() 152 elif self.output_type == "pandas": 153 self.to_pandas() 154 elif self.output_type == "hdf5": 155 self.to_hdf() 156 else: 157 raise ValueError( 158 "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" 159 % self.output_type 160 ) 161 162 def run(self): 163 """Run the export process. 164 165 This method is called when the thread starts. 166 It calls the save method to perform the export.""" 167 self.save() 168 169 def get_pandas_df(self, additional_columns=None): 170 """Returns the mass spectrum data as a pandas DataFrame. 171 172 Parameters 173 ---------- 174 additional_columns : list, optional 175 Additional columns to include in the DataFrame. Defaults to None. 176 Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'. 177 178 Returns 179 ------- 180 DataFrame 181 The mass spectrum data as a pandas DataFrame. 182 """ 183 if additional_columns is not None: 184 possible_additional_columns = [ 185 "Aromaticity Index", 186 "NOSC", 187 "Aromaticity Index (modified)", 188 ] 189 if additional_columns: 190 for column in additional_columns: 191 if column not in possible_additional_columns: 192 raise ValueError("Invalid additional column: %s" % column) 193 columns = ( 194 self.columns_label 195 + additional_columns 196 + self.get_all_used_atoms_in_order(self.mass_spectrum) 197 ) 198 else: 199 columns = self.columns_label + self.get_all_used_atoms_in_order( 200 self.mass_spectrum 201 ) 202 dict_data_list = self.get_list_dict_data( 203 self.mass_spectrum, additional_columns=additional_columns 204 ) 205 df = DataFrame(dict_data_list, columns=columns) 206 df.name = self.output_file 207 return df 208 209 def write_settings(self, output_path, mass_spectrum): 210 """Writes the settings of the mass spectrum to a JSON file. 211 212 Parameters 213 ---------- 214 output_path : str 215 The output file path. 216 mass_spectrum : MassSpectrum 217 The mass spectrum to export. 218 """ 219 220 import json 221 222 dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum) 223 224 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum) 225 dict_setting["analyzer"] = mass_spectrum.analyzer 226 dict_setting["instrument_label"] = mass_spectrum.instrument_label 227 dict_setting["sample_name"] = mass_spectrum.sample_name 228 229 with open( 230 output_path.with_suffix(".json"), 231 "w", 232 encoding="utf8", 233 ) as outfile: 234 output = json.dumps( 235 dict_setting, sort_keys=True, indent=4, separators=(",", ": ") 236 ) 237 outfile.write(output) 238 239 def to_pandas(self, write_metadata=True): 240 """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file. 241 242 Parameters 243 ---------- 244 write_metadata : bool, optional 245 Whether to write the metadata to a JSON file. Defaults to True. 246 """ 247 248 columns = self.columns_label + self.get_all_used_atoms_in_order( 249 self.mass_spectrum 250 ) 251 252 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 253 254 df = DataFrame(dict_data_list, columns=columns) 255 256 df.to_pickle(self.output_file.with_suffix(".pkl")) 257 258 if write_metadata: 259 self.write_settings(self.output_file, self.mass_spectrum) 260 261 def to_excel(self, write_metadata=True): 262 """Exports the mass spectrum data to an Excel file. 263 264 Parameters 265 ---------- 266 write_metadata : bool, optional 267 Whether to write the metadata to a JSON file. Defaults to True. 268 """ 269 270 columns = self.columns_label + self.get_all_used_atoms_in_order( 271 self.mass_spectrum 272 ) 273 274 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 275 276 df = DataFrame(dict_data_list, columns=columns) 277 278 df.to_excel(self.output_file.with_suffix(".xlsx")) 279 280 if write_metadata: 281 self.write_settings(self.output_file, self.mass_spectrum) 282 283 def to_csv(self, write_metadata=True): 284 """Exports the mass spectrum data to a CSV file. 285 286 Parameters 287 ---------- 288 write_metadata : bool, optional 289 Whether to write the metadata to a JSON file. Defaults to True. 290 """ 291 292 columns = self.columns_label + self.get_all_used_atoms_in_order( 293 self.mass_spectrum 294 ) 295 296 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 297 298 import csv 299 300 try: 301 with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile: 302 writer = csv.DictWriter(csvfile, fieldnames=columns) 303 writer.writeheader() 304 for data in dict_data_list: 305 writer.writerow(data) 306 if write_metadata: 307 self.write_settings(self.output_file, self.mass_spectrum) 308 309 except IOError as ioerror: 310 print(ioerror) 311 312 def to_json(self): 313 """Exports the mass spectrum data to a JSON string.""" 314 315 columns = self.columns_label + self.get_all_used_atoms_in_order( 316 self.mass_spectrum 317 ) 318 319 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 320 321 df = DataFrame(dict_data_list, columns=columns) 322 323 # for key, values in dict_data.items(): 324 # if not values: dict_data[key] = NaN 325 326 # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': ')) 327 return df.to_json(orient="records") 328 329 def add_mass_spectrum_to_hdf5( 330 self, 331 hdf_handle, 332 mass_spectrum, 333 group_key, 334 mass_spectra_group=None, 335 export_raw=True, 336 ): 337 """Adds the mass spectrum data to an HDF5 file. 338 339 Parameters 340 ---------- 341 hdf_handle : h5py.File 342 The HDF5 file handle. 343 mass_spectrum : MassSpectrum 344 The mass spectrum to add to the HDF5 file. 345 group_key : str 346 The group key (where to add the mass spectrum data within the HDF5 file). 347 mass_spectra_group : h5py.Group, optional 348 The mass spectra group. Defaults to None (no group, mass spectrum is added to the root). 349 export_raw : bool, optional 350 Whether to export the raw data. Defaults to True. 351 If False, only the processed data (peaks) is exported (essentially centroided data). 352 """ 353 if mass_spectra_group is None: 354 # Check if the file has the necessary attributes and add them if not 355 # This assumes that if there is a mass_spectra_group, these attributes were already added to the file 356 if not hdf_handle.attrs.get("date_utc"): 357 timenow = str( 358 datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z") 359 ) 360 hdf_handle.attrs["date_utc"] = timenow 361 hdf_handle.attrs["file_name"] = mass_spectrum.filename.name 362 hdf_handle.attrs["data_structure"] = "mass_spectrum" 363 hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer 364 hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label 365 hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name 366 367 list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True) 368 369 dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum) 370 371 setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum) 372 373 columns_labels = json.dumps( 374 self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum), 375 sort_keys=False, 376 indent=4, 377 separators=(",", ": "), 378 ) 379 380 group_key = group_key 381 382 if mass_spectra_group is not None: 383 hdf_handle = mass_spectra_group 384 385 if group_key not in hdf_handle.keys(): 386 scan_group = hdf_handle.create_group(group_key) 387 388 # If there is raw data (from profile data) save it 389 if not mass_spectrum.is_centroid and export_raw: 390 mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile))) 391 392 mz_abun_array[0] = mass_spectrum.abundance_profile 393 mz_abun_array[1] = mass_spectrum.mz_exp_profile 394 395 raw_ms_dataset = scan_group.create_dataset( 396 "raw_ms", data=mz_abun_array, dtype="f8" 397 ) 398 399 else: 400 # create empy dataset for missing raw data 401 raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8") 402 403 raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs) 404 405 if isinstance(mass_spectrum, MassSpecfromFreq): 406 raw_ms_dataset.attrs["TransientSetting"] = json.dumps( 407 setting_dicts.get("TransientSetting"), 408 sort_keys=False, 409 indent=4, 410 separators=(",", ": "), 411 ) 412 413 else: 414 scan_group = hdf_handle.get(group_key) 415 416 # if there is not processed data len = 0, otherwise len() will return next index 417 index_processed_data = str(len(scan_group.keys())) 418 419 timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")) 420 421 processed_dset = scan_group.create_dataset( 422 index_processed_data, data=list_results 423 ) 424 425 processed_dset.attrs["date_utc"] = timenow 426 427 processed_dset.attrs["ColumnsLabels"] = columns_labels 428 429 processed_dset.attrs["MoleculaSearchSetting"] = json.dumps( 430 setting_dicts.get("MoleculaSearch"), 431 sort_keys=False, 432 indent=4, 433 separators=(",", ": "), 434 ) 435 436 processed_dset.attrs["MassSpecPeakSetting"] = json.dumps( 437 setting_dicts.get("MassSpecPeak"), 438 sort_keys=False, 439 indent=4, 440 separators=(",", ": "), 441 ) 442 443 processed_dset.attrs["MassSpectrumSetting"] = json.dumps( 444 setting_dicts.get("MassSpectrum"), 445 sort_keys=False, 446 indent=4, 447 separators=(",", ": "), 448 ) 449 450 def to_hdf(self): 451 """Exports the mass spectrum data to an HDF5 file.""" 452 453 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: 454 self.add_mass_spectrum_to_hdf5( 455 hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number) 456 ) 457 458 def parameters_to_toml(self): 459 """Converts the mass spectrum parameters to a TOML string. 460 461 Returns 462 ------- 463 str 464 The TOML string of the mass spectrum parameters. 465 """ 466 467 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 468 469 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 470 dict_setting["analyzer"] = self.mass_spectrum.analyzer 471 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 472 dict_setting["sample_name"] = self.mass_spectrum.sample_name 473 474 output = toml.dumps(dict_setting) 475 476 return output 477 478 def parameters_to_json(self): 479 """Converts the mass spectrum parameters to a JSON string. 480 481 Returns 482 ------- 483 str 484 The JSON string of the mass spectrum parameters. 485 """ 486 487 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 488 489 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 490 dict_setting["analyzer"] = self.mass_spectrum.analyzer 491 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 492 dict_setting["sample_name"] = self.mass_spectrum.sample_name 493 494 output = json.dumps(dict_setting) 495 496 return output 497 498 def get_mass_spec_attrs(self, mass_spectrum): 499 """Returns the mass spectrum attributes as a dictionary. 500 501 Parameters 502 ---------- 503 mass_spectrum : MassSpectrum 504 The mass spectrum to export. 505 506 Returns 507 ------- 508 dict 509 The mass spectrum attributes. 510 """ 511 512 dict_ms_attrs = {} 513 dict_ms_attrs["polarity"] = mass_spectrum.polarity 514 dict_ms_attrs["rt"] = mass_spectrum.retention_time 515 dict_ms_attrs["tic"] = mass_spectrum.tic 516 dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan 517 dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt 518 dict_ms_attrs["Aterm"] = mass_spectrum.Aterm 519 dict_ms_attrs["Bterm"] = mass_spectrum.Bterm 520 dict_ms_attrs["Cterm"] = mass_spectrum.Cterm 521 dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise 522 dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std 523 524 return dict_ms_attrs 525 526 def get_all_used_atoms_in_order(self, mass_spectrum): 527 """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list. 528 529 Parameters 530 ---------- 531 mass_spectrum : MassSpectrum 532 The mass spectrum to export. 533 534 Returns 535 ------- 536 list 537 The list of assigned atoms in the order specified by Atoms.atoms_order list. 538 """ 539 540 atoms_in_order = Atoms.atoms_order 541 all_used_atoms = set() 542 if mass_spectrum: 543 for ms_peak in mass_spectrum: 544 if ms_peak: 545 for m_formula in ms_peak: 546 for atom in m_formula.atoms: 547 all_used_atoms.add(atom) 548 549 def sort_method(atom): 550 return [atoms_in_order.index(atom)] 551 552 return sorted(all_used_atoms, key=sort_method) 553 554 def list_dict_to_list(self, mass_spectrum, is_hdf5=False): 555 """Returns the mass spectrum data as a list of dictionaries. 556 557 Parameters 558 ---------- 559 mass_spectrum : MassSpectrum 560 The mass spectrum to export. 561 is_hdf5 : bool, optional 562 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 563 564 Returns 565 ------- 566 list 567 The mass spectrum data as a list of dictionaries. 568 """ 569 570 column_labels = self.columns_label + self.get_all_used_atoms_in_order( 571 mass_spectrum 572 ) 573 574 dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5) 575 576 all_lines = [] 577 for dict_res in dict_list: 578 result_line = [NaN] * len(column_labels) 579 580 for label, value in dict_res.items(): 581 label_index = column_labels.index(label) 582 result_line[label_index] = value 583 584 all_lines.append(result_line) 585 586 return all_lines 587 588 def get_list_dict_data( 589 self, 590 mass_spectrum, 591 include_no_match=True, 592 include_isotopologues=True, 593 isotopologue_inline=True, 594 no_match_inline=False, 595 is_hdf5=False, 596 additional_columns=None, 597 ): 598 """Returns the mass spectrum data as a list of dictionaries. 599 600 Parameters 601 ---------- 602 mass_spectrum : MassSpectrum 603 The mass spectrum to export. 604 include_no_match : bool, optional 605 Whether to include unassigned (no match) data. Defaults to True. 606 include_isotopologues : bool, optional 607 Whether to include isotopologues. Defaults to True. 608 isotopologue_inline : bool, optional 609 Whether to include isotopologues inline. Defaults to True. 610 no_match_inline : bool, optional 611 Whether to include unassigned (no match) data inline. Defaults to False. 612 is_hdf5 : bool, optional 613 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 614 615 Returns 616 ------- 617 list 618 The mass spectrum data as a list of dictionaries. 619 """ 620 621 dict_data_list = [] 622 623 if is_hdf5: 624 encode = ".encode('utf-8')" 625 else: 626 encode = "" 627 628 def add_no_match_dict_data(index, ms_peak): 629 """ 630 Export dictionary of mspeak info for unassigned (no match) data 631 """ 632 dict_result = { 633 "Index": index, 634 "m/z": ms_peak._mz_exp, 635 "Calibrated m/z": ms_peak.mz_exp, 636 "Peak Height": ms_peak.abundance, 637 "Peak Area": ms_peak.area, 638 "Resolving Power": ms_peak.resolving_power, 639 "S/N": ms_peak.signal_to_noise, 640 "Ion Charge": ms_peak.ion_charge, 641 "Heteroatom Class": eval("Labels.unassigned{}".format(encode)), 642 } 643 644 dict_data_list.append(dict_result) 645 646 def add_match_dict_data(index, ms_peak, mformula, additional_columns=None): 647 """ 648 Export dictionary of mspeak info for assigned (match) data 649 """ 650 formula_dict = mformula.to_dict() 651 652 dict_result = { 653 "Index": index, 654 "m/z": ms_peak._mz_exp, 655 "Calibrated m/z": ms_peak.mz_exp, 656 "Calculated m/z": mformula.mz_calc, 657 "Peak Height": ms_peak.abundance, 658 "Peak Area": ms_peak.area, 659 "Resolving Power": ms_peak.resolving_power, 660 "S/N": ms_peak.signal_to_noise, 661 "Ion Charge": ms_peak.ion_charge, 662 "m/z Error (ppm)": mformula.mz_error, 663 "Confidence Score": mformula.confidence_score, 664 "Isotopologue Similarity": mformula.isotopologue_similarity, 665 "m/z Error Score": mformula.average_mz_error_score, 666 "DBE": mformula.dbe, 667 "Heteroatom Class": eval("mformula.class_label{}".format(encode)), 668 "H/C": mformula.H_C, 669 "O/C": mformula.O_C, 670 "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)), 671 "Is Isotopologue": int(mformula.is_isotopologue), 672 "Molecular Formula": eval("mformula.string{}".format(encode)), 673 } 674 if additional_columns is not None: 675 possible_dict = { 676 "Aromaticity Index": mformula.A_I, 677 "NOSC": mformula.nosc, 678 "Aromaticity Index (modified)": mformula.A_I_mod, 679 } 680 for column in additional_columns: 681 dict_result[column] = possible_dict.get(column) 682 683 if mformula.adduct_atom: 684 dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode)) 685 686 if mformula.is_isotopologue: 687 dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic 688 689 if self.atoms_order_list is None: 690 atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum) 691 else: 692 atoms_order_list = self.atoms_order_list 693 694 for atom in atoms_order_list: 695 if atom in formula_dict.keys(): 696 dict_result[atom] = formula_dict.get(atom) 697 698 dict_data_list.append(dict_result) 699 700 score_methods = mass_spectrum.molecular_search_settings.score_methods 701 selected_score_method = ( 702 mass_spectrum.molecular_search_settings.output_score_method 703 ) 704 705 if selected_score_method in score_methods: 706 # temp set score method as the one chosen in the output 707 current_method = mass_spectrum.molecular_search_settings.score_method 708 mass_spectrum.molecular_search_settings.score_method = selected_score_method 709 710 for index, ms_peak in enumerate(mass_spectrum): 711 # print(ms_peak.mz_exp) 712 713 if ms_peak: 714 m_formula = ms_peak.best_molecular_formula_candidate 715 716 if m_formula: 717 if not m_formula.is_isotopologue: 718 add_match_dict_data( 719 index, 720 ms_peak, 721 m_formula, 722 additional_columns=additional_columns, 723 ) 724 725 for ( 726 iso_mspeak_index, 727 iso_mf_formula, 728 ) in m_formula.mspeak_mf_isotopologues_indexes: 729 iso_ms_peak = mass_spectrum[iso_mspeak_index] 730 add_match_dict_data( 731 iso_mspeak_index, 732 iso_ms_peak, 733 iso_mf_formula, 734 additional_columns=additional_columns, 735 ) 736 else: 737 if include_no_match and no_match_inline: 738 add_no_match_dict_data(index, ms_peak) 739 740 if include_no_match and not no_match_inline: 741 for index, ms_peak in enumerate(mass_spectrum): 742 if not ms_peak: 743 add_no_match_dict_data(index, ms_peak) 744 # reset score method as the one chosen in the output 745 mass_spectrum.molecular_search_settings.score_method = current_method 746 747 else: 748 for index, ms_peak in enumerate(mass_spectrum): 749 # check if there is a molecular formula candidate for the msPeak 750 751 if ms_peak: 752 # m_formula = ms_peak.molecular_formula_lowest_error 753 for m_formula in ms_peak: 754 if mass_spectrum.molecular_search_settings.output_min_score > 0: 755 if ( 756 m_formula.confidence_score 757 >= mass_spectrum.molecular_search_settings.output_min_score 758 ): 759 if m_formula.is_isotopologue: # isotopologues inline 760 if include_isotopologues and isotopologue_inline: 761 add_match_dict_data( 762 index, 763 ms_peak, 764 m_formula, 765 additional_columns=additional_columns, 766 ) 767 else: 768 add_match_dict_data( 769 index, 770 ms_peak, 771 m_formula, 772 additional_columns=additional_columns, 773 ) # add monoisotopic peak 774 775 # cutoff because of low score 776 else: 777 add_no_match_dict_data(index, ms_peak) 778 779 else: 780 if m_formula.is_isotopologue: # isotopologues inline 781 if include_isotopologues and isotopologue_inline: 782 add_match_dict_data( 783 index, 784 ms_peak, 785 m_formula, 786 additional_columns=additional_columns, 787 ) 788 else: 789 add_match_dict_data( 790 index, 791 ms_peak, 792 m_formula, 793 additional_columns=additional_columns, 794 ) # add monoisotopic peak 795 else: 796 # include not_match 797 if include_no_match and no_match_inline: 798 add_no_match_dict_data(index, ms_peak) 799 800 if include_isotopologues and not isotopologue_inline: 801 for index, ms_peak in enumerate(mass_spectrum): 802 for m_formula in ms_peak: 803 if m_formula.is_isotopologue: 804 if ( 805 m_formula.confidence_score 806 >= mass_spectrum.molecular_search_settings.output_min_score 807 ): 808 add_match_dict_data( 809 index, 810 ms_peak, 811 m_formula, 812 additional_columns=additional_columns, 813 ) 814 815 if include_no_match and not no_match_inline: 816 for index, ms_peak in enumerate(mass_spectrum): 817 if not ms_peak: 818 add_no_match_dict_data(index, ms_peak) 819 820 # remove duplicated add_match data possibly introduced on the output_score_filter step 821 res = [] 822 [res.append(x) for x in dict_data_list if x not in res] 823 824 return res
A class for exporting high-resolution mass spectra.
Parameters
- out_file_path (str): The output file path.
- mass_spectrum (MassSpectrum): The mass spectrum to export.
- output_type (str, optional): The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
Attributes
- output_file (Path): The output file path.
- output_type (str): The type of output file.
- mass_spectrum (MassSpectrum): The mass spectrum to export.
- atoms_order_list (list): The list of assigned atoms in the order specified by Atoms.atoms_order list.
- columns_label (list): The column labels in order.
Methods
- save(). Save the mass spectrum data to the output file.
- run(). Run the export process.
- get_pandas_df(). Returns the mass spectrum data as a pandas DataFrame.
- write_settings(output_path, mass_spectrum). Writes the settings of the mass spectrum to a JSON file.
- to_pandas(write_metadata=True). Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
- to_excel(write_metadata=True). Exports the mass spectrum data to an Excel file.
- to_csv(write_metadata=True). Exports the mass spectrum data to a CSV file.
- to_json(). Exports the mass spectrum data to a JSON string.
- to_hdf(). Exports the mass spectrum data to an HDF5 file.
- parameters_to_toml(). Converts the mass spectrum parameters to a TOML string.
- parameters_to_json(). Converts the mass spectrum parameters to a JSON string.
- get_mass_spec_attrs(mass_spectrum). Returns the mass spectrum attributes as a dictionary.
- get_all_used_atoms_in_order(mass_spectrum). Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
- list_dict_to_list(mass_spectrum, is_hdf5=False). Returns the mass spectrum data as a list of dictionaries.
- get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False). Returns the mass spectrum data as a list of dictionaries.
80 def __init__(self, out_file_path, mass_spectrum, output_type="excel"): 81 Thread.__init__(self) 82 83 self.output_file = Path(out_file_path) 84 85 # 'excel', 'csv' or 'pandas' 86 self.output_type = output_type 87 88 self.mass_spectrum = mass_spectrum 89 90 # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list 91 self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum) 92 93 self._init_columns()
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
139 def save(self): 140 """Save the mass spectrum data to the output file. 141 142 Raises 143 ------ 144 ValueError 145 If the output type is not supported. 146 """ 147 148 if self.output_type == "excel": 149 self.to_excel() 150 elif self.output_type == "csv": 151 self.to_csv() 152 elif self.output_type == "pandas": 153 self.to_pandas() 154 elif self.output_type == "hdf5": 155 self.to_hdf() 156 else: 157 raise ValueError( 158 "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" 159 % self.output_type 160 )
Save the mass spectrum data to the output file.
Raises
- ValueError: If the output type is not supported.
162 def run(self): 163 """Run the export process. 164 165 This method is called when the thread starts. 166 It calls the save method to perform the export.""" 167 self.save()
Run the export process.
This method is called when the thread starts. It calls the save method to perform the export.
169 def get_pandas_df(self, additional_columns=None): 170 """Returns the mass spectrum data as a pandas DataFrame. 171 172 Parameters 173 ---------- 174 additional_columns : list, optional 175 Additional columns to include in the DataFrame. Defaults to None. 176 Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'. 177 178 Returns 179 ------- 180 DataFrame 181 The mass spectrum data as a pandas DataFrame. 182 """ 183 if additional_columns is not None: 184 possible_additional_columns = [ 185 "Aromaticity Index", 186 "NOSC", 187 "Aromaticity Index (modified)", 188 ] 189 if additional_columns: 190 for column in additional_columns: 191 if column not in possible_additional_columns: 192 raise ValueError("Invalid additional column: %s" % column) 193 columns = ( 194 self.columns_label 195 + additional_columns 196 + self.get_all_used_atoms_in_order(self.mass_spectrum) 197 ) 198 else: 199 columns = self.columns_label + self.get_all_used_atoms_in_order( 200 self.mass_spectrum 201 ) 202 dict_data_list = self.get_list_dict_data( 203 self.mass_spectrum, additional_columns=additional_columns 204 ) 205 df = DataFrame(dict_data_list, columns=columns) 206 df.name = self.output_file 207 return df
Returns the mass spectrum data as a pandas DataFrame.
Parameters
- additional_columns (list, optional): Additional columns to include in the DataFrame. Defaults to None. Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
Returns
- DataFrame: The mass spectrum data as a pandas DataFrame.
209 def write_settings(self, output_path, mass_spectrum): 210 """Writes the settings of the mass spectrum to a JSON file. 211 212 Parameters 213 ---------- 214 output_path : str 215 The output file path. 216 mass_spectrum : MassSpectrum 217 The mass spectrum to export. 218 """ 219 220 import json 221 222 dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum) 223 224 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum) 225 dict_setting["analyzer"] = mass_spectrum.analyzer 226 dict_setting["instrument_label"] = mass_spectrum.instrument_label 227 dict_setting["sample_name"] = mass_spectrum.sample_name 228 229 with open( 230 output_path.with_suffix(".json"), 231 "w", 232 encoding="utf8", 233 ) as outfile: 234 output = json.dumps( 235 dict_setting, sort_keys=True, indent=4, separators=(",", ": ") 236 ) 237 outfile.write(output)
Writes the settings of the mass spectrum to a JSON file.
Parameters
- output_path (str): The output file path.
- mass_spectrum (MassSpectrum): The mass spectrum to export.
239 def to_pandas(self, write_metadata=True): 240 """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file. 241 242 Parameters 243 ---------- 244 write_metadata : bool, optional 245 Whether to write the metadata to a JSON file. Defaults to True. 246 """ 247 248 columns = self.columns_label + self.get_all_used_atoms_in_order( 249 self.mass_spectrum 250 ) 251 252 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 253 254 df = DataFrame(dict_data_list, columns=columns) 255 256 df.to_pickle(self.output_file.with_suffix(".pkl")) 257 258 if write_metadata: 259 self.write_settings(self.output_file, self.mass_spectrum)
Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
Parameters
- write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
261 def to_excel(self, write_metadata=True): 262 """Exports the mass spectrum data to an Excel file. 263 264 Parameters 265 ---------- 266 write_metadata : bool, optional 267 Whether to write the metadata to a JSON file. Defaults to True. 268 """ 269 270 columns = self.columns_label + self.get_all_used_atoms_in_order( 271 self.mass_spectrum 272 ) 273 274 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 275 276 df = DataFrame(dict_data_list, columns=columns) 277 278 df.to_excel(self.output_file.with_suffix(".xlsx")) 279 280 if write_metadata: 281 self.write_settings(self.output_file, self.mass_spectrum)
Exports the mass spectrum data to an Excel file.
Parameters
- write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
283 def to_csv(self, write_metadata=True): 284 """Exports the mass spectrum data to a CSV file. 285 286 Parameters 287 ---------- 288 write_metadata : bool, optional 289 Whether to write the metadata to a JSON file. Defaults to True. 290 """ 291 292 columns = self.columns_label + self.get_all_used_atoms_in_order( 293 self.mass_spectrum 294 ) 295 296 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 297 298 import csv 299 300 try: 301 with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile: 302 writer = csv.DictWriter(csvfile, fieldnames=columns) 303 writer.writeheader() 304 for data in dict_data_list: 305 writer.writerow(data) 306 if write_metadata: 307 self.write_settings(self.output_file, self.mass_spectrum) 308 309 except IOError as ioerror: 310 print(ioerror)
Exports the mass spectrum data to a CSV file.
Parameters
- write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
312 def to_json(self): 313 """Exports the mass spectrum data to a JSON string.""" 314 315 columns = self.columns_label + self.get_all_used_atoms_in_order( 316 self.mass_spectrum 317 ) 318 319 dict_data_list = self.get_list_dict_data(self.mass_spectrum) 320 321 df = DataFrame(dict_data_list, columns=columns) 322 323 # for key, values in dict_data.items(): 324 # if not values: dict_data[key] = NaN 325 326 # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': ')) 327 return df.to_json(orient="records")
Exports the mass spectrum data to a JSON string.
329 def add_mass_spectrum_to_hdf5( 330 self, 331 hdf_handle, 332 mass_spectrum, 333 group_key, 334 mass_spectra_group=None, 335 export_raw=True, 336 ): 337 """Adds the mass spectrum data to an HDF5 file. 338 339 Parameters 340 ---------- 341 hdf_handle : h5py.File 342 The HDF5 file handle. 343 mass_spectrum : MassSpectrum 344 The mass spectrum to add to the HDF5 file. 345 group_key : str 346 The group key (where to add the mass spectrum data within the HDF5 file). 347 mass_spectra_group : h5py.Group, optional 348 The mass spectra group. Defaults to None (no group, mass spectrum is added to the root). 349 export_raw : bool, optional 350 Whether to export the raw data. Defaults to True. 351 If False, only the processed data (peaks) is exported (essentially centroided data). 352 """ 353 if mass_spectra_group is None: 354 # Check if the file has the necessary attributes and add them if not 355 # This assumes that if there is a mass_spectra_group, these attributes were already added to the file 356 if not hdf_handle.attrs.get("date_utc"): 357 timenow = str( 358 datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z") 359 ) 360 hdf_handle.attrs["date_utc"] = timenow 361 hdf_handle.attrs["file_name"] = mass_spectrum.filename.name 362 hdf_handle.attrs["data_structure"] = "mass_spectrum" 363 hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer 364 hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label 365 hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name 366 367 list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True) 368 369 dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum) 370 371 setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum) 372 373 columns_labels = json.dumps( 374 self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum), 375 sort_keys=False, 376 indent=4, 377 separators=(",", ": "), 378 ) 379 380 group_key = group_key 381 382 if mass_spectra_group is not None: 383 hdf_handle = mass_spectra_group 384 385 if group_key not in hdf_handle.keys(): 386 scan_group = hdf_handle.create_group(group_key) 387 388 # If there is raw data (from profile data) save it 389 if not mass_spectrum.is_centroid and export_raw: 390 mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile))) 391 392 mz_abun_array[0] = mass_spectrum.abundance_profile 393 mz_abun_array[1] = mass_spectrum.mz_exp_profile 394 395 raw_ms_dataset = scan_group.create_dataset( 396 "raw_ms", data=mz_abun_array, dtype="f8" 397 ) 398 399 else: 400 # create empy dataset for missing raw data 401 raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8") 402 403 raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs) 404 405 if isinstance(mass_spectrum, MassSpecfromFreq): 406 raw_ms_dataset.attrs["TransientSetting"] = json.dumps( 407 setting_dicts.get("TransientSetting"), 408 sort_keys=False, 409 indent=4, 410 separators=(",", ": "), 411 ) 412 413 else: 414 scan_group = hdf_handle.get(group_key) 415 416 # if there is not processed data len = 0, otherwise len() will return next index 417 index_processed_data = str(len(scan_group.keys())) 418 419 timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")) 420 421 processed_dset = scan_group.create_dataset( 422 index_processed_data, data=list_results 423 ) 424 425 processed_dset.attrs["date_utc"] = timenow 426 427 processed_dset.attrs["ColumnsLabels"] = columns_labels 428 429 processed_dset.attrs["MoleculaSearchSetting"] = json.dumps( 430 setting_dicts.get("MoleculaSearch"), 431 sort_keys=False, 432 indent=4, 433 separators=(",", ": "), 434 ) 435 436 processed_dset.attrs["MassSpecPeakSetting"] = json.dumps( 437 setting_dicts.get("MassSpecPeak"), 438 sort_keys=False, 439 indent=4, 440 separators=(",", ": "), 441 ) 442 443 processed_dset.attrs["MassSpectrumSetting"] = json.dumps( 444 setting_dicts.get("MassSpectrum"), 445 sort_keys=False, 446 indent=4, 447 separators=(",", ": "), 448 )
Adds the mass spectrum data to an HDF5 file.
Parameters
- hdf_handle (h5py.File): The HDF5 file handle.
- mass_spectrum (MassSpectrum): The mass spectrum to add to the HDF5 file.
- group_key (str): The group key (where to add the mass spectrum data within the HDF5 file).
- mass_spectra_group (h5py.Group, optional): The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
- export_raw (bool, optional): Whether to export the raw data. Defaults to True. If False, only the processed data (peaks) is exported (essentially centroided data).
450 def to_hdf(self): 451 """Exports the mass spectrum data to an HDF5 file.""" 452 453 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: 454 self.add_mass_spectrum_to_hdf5( 455 hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number) 456 )
Exports the mass spectrum data to an HDF5 file.
458 def parameters_to_toml(self): 459 """Converts the mass spectrum parameters to a TOML string. 460 461 Returns 462 ------- 463 str 464 The TOML string of the mass spectrum parameters. 465 """ 466 467 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 468 469 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 470 dict_setting["analyzer"] = self.mass_spectrum.analyzer 471 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 472 dict_setting["sample_name"] = self.mass_spectrum.sample_name 473 474 output = toml.dumps(dict_setting) 475 476 return output
Converts the mass spectrum parameters to a TOML string.
Returns
- str: The TOML string of the mass spectrum parameters.
478 def parameters_to_json(self): 479 """Converts the mass spectrum parameters to a JSON string. 480 481 Returns 482 ------- 483 str 484 The JSON string of the mass spectrum parameters. 485 """ 486 487 dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum) 488 489 dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum) 490 dict_setting["analyzer"] = self.mass_spectrum.analyzer 491 dict_setting["instrument_label"] = self.mass_spectrum.instrument_label 492 dict_setting["sample_name"] = self.mass_spectrum.sample_name 493 494 output = json.dumps(dict_setting) 495 496 return output
Converts the mass spectrum parameters to a JSON string.
Returns
- str: The JSON string of the mass spectrum parameters.
498 def get_mass_spec_attrs(self, mass_spectrum): 499 """Returns the mass spectrum attributes as a dictionary. 500 501 Parameters 502 ---------- 503 mass_spectrum : MassSpectrum 504 The mass spectrum to export. 505 506 Returns 507 ------- 508 dict 509 The mass spectrum attributes. 510 """ 511 512 dict_ms_attrs = {} 513 dict_ms_attrs["polarity"] = mass_spectrum.polarity 514 dict_ms_attrs["rt"] = mass_spectrum.retention_time 515 dict_ms_attrs["tic"] = mass_spectrum.tic 516 dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan 517 dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt 518 dict_ms_attrs["Aterm"] = mass_spectrum.Aterm 519 dict_ms_attrs["Bterm"] = mass_spectrum.Bterm 520 dict_ms_attrs["Cterm"] = mass_spectrum.Cterm 521 dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise 522 dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std 523 524 return dict_ms_attrs
Returns the mass spectrum attributes as a dictionary.
Parameters
- mass_spectrum (MassSpectrum): The mass spectrum to export.
Returns
- dict: The mass spectrum attributes.
526 def get_all_used_atoms_in_order(self, mass_spectrum): 527 """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list. 528 529 Parameters 530 ---------- 531 mass_spectrum : MassSpectrum 532 The mass spectrum to export. 533 534 Returns 535 ------- 536 list 537 The list of assigned atoms in the order specified by Atoms.atoms_order list. 538 """ 539 540 atoms_in_order = Atoms.atoms_order 541 all_used_atoms = set() 542 if mass_spectrum: 543 for ms_peak in mass_spectrum: 544 if ms_peak: 545 for m_formula in ms_peak: 546 for atom in m_formula.atoms: 547 all_used_atoms.add(atom) 548 549 def sort_method(atom): 550 return [atoms_in_order.index(atom)] 551 552 return sorted(all_used_atoms, key=sort_method)
Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
Parameters
- mass_spectrum (MassSpectrum): The mass spectrum to export.
Returns
- list: The list of assigned atoms in the order specified by Atoms.atoms_order list.
554 def list_dict_to_list(self, mass_spectrum, is_hdf5=False): 555 """Returns the mass spectrum data as a list of dictionaries. 556 557 Parameters 558 ---------- 559 mass_spectrum : MassSpectrum 560 The mass spectrum to export. 561 is_hdf5 : bool, optional 562 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 563 564 Returns 565 ------- 566 list 567 The mass spectrum data as a list of dictionaries. 568 """ 569 570 column_labels = self.columns_label + self.get_all_used_atoms_in_order( 571 mass_spectrum 572 ) 573 574 dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5) 575 576 all_lines = [] 577 for dict_res in dict_list: 578 result_line = [NaN] * len(column_labels) 579 580 for label, value in dict_res.items(): 581 label_index = column_labels.index(label) 582 result_line[label_index] = value 583 584 all_lines.append(result_line) 585 586 return all_lines
Returns the mass spectrum data as a list of dictionaries.
Parameters
- mass_spectrum (MassSpectrum): The mass spectrum to export.
- is_hdf5 (bool, optional): Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
Returns
- list: The mass spectrum data as a list of dictionaries.
588 def get_list_dict_data( 589 self, 590 mass_spectrum, 591 include_no_match=True, 592 include_isotopologues=True, 593 isotopologue_inline=True, 594 no_match_inline=False, 595 is_hdf5=False, 596 additional_columns=None, 597 ): 598 """Returns the mass spectrum data as a list of dictionaries. 599 600 Parameters 601 ---------- 602 mass_spectrum : MassSpectrum 603 The mass spectrum to export. 604 include_no_match : bool, optional 605 Whether to include unassigned (no match) data. Defaults to True. 606 include_isotopologues : bool, optional 607 Whether to include isotopologues. Defaults to True. 608 isotopologue_inline : bool, optional 609 Whether to include isotopologues inline. Defaults to True. 610 no_match_inline : bool, optional 611 Whether to include unassigned (no match) data inline. Defaults to False. 612 is_hdf5 : bool, optional 613 Whether the mass spectrum is being exported to an HDF5 file. Defaults to False. 614 615 Returns 616 ------- 617 list 618 The mass spectrum data as a list of dictionaries. 619 """ 620 621 dict_data_list = [] 622 623 if is_hdf5: 624 encode = ".encode('utf-8')" 625 else: 626 encode = "" 627 628 def add_no_match_dict_data(index, ms_peak): 629 """ 630 Export dictionary of mspeak info for unassigned (no match) data 631 """ 632 dict_result = { 633 "Index": index, 634 "m/z": ms_peak._mz_exp, 635 "Calibrated m/z": ms_peak.mz_exp, 636 "Peak Height": ms_peak.abundance, 637 "Peak Area": ms_peak.area, 638 "Resolving Power": ms_peak.resolving_power, 639 "S/N": ms_peak.signal_to_noise, 640 "Ion Charge": ms_peak.ion_charge, 641 "Heteroatom Class": eval("Labels.unassigned{}".format(encode)), 642 } 643 644 dict_data_list.append(dict_result) 645 646 def add_match_dict_data(index, ms_peak, mformula, additional_columns=None): 647 """ 648 Export dictionary of mspeak info for assigned (match) data 649 """ 650 formula_dict = mformula.to_dict() 651 652 dict_result = { 653 "Index": index, 654 "m/z": ms_peak._mz_exp, 655 "Calibrated m/z": ms_peak.mz_exp, 656 "Calculated m/z": mformula.mz_calc, 657 "Peak Height": ms_peak.abundance, 658 "Peak Area": ms_peak.area, 659 "Resolving Power": ms_peak.resolving_power, 660 "S/N": ms_peak.signal_to_noise, 661 "Ion Charge": ms_peak.ion_charge, 662 "m/z Error (ppm)": mformula.mz_error, 663 "Confidence Score": mformula.confidence_score, 664 "Isotopologue Similarity": mformula.isotopologue_similarity, 665 "m/z Error Score": mformula.average_mz_error_score, 666 "DBE": mformula.dbe, 667 "Heteroatom Class": eval("mformula.class_label{}".format(encode)), 668 "H/C": mformula.H_C, 669 "O/C": mformula.O_C, 670 "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)), 671 "Is Isotopologue": int(mformula.is_isotopologue), 672 "Molecular Formula": eval("mformula.string{}".format(encode)), 673 } 674 if additional_columns is not None: 675 possible_dict = { 676 "Aromaticity Index": mformula.A_I, 677 "NOSC": mformula.nosc, 678 "Aromaticity Index (modified)": mformula.A_I_mod, 679 } 680 for column in additional_columns: 681 dict_result[column] = possible_dict.get(column) 682 683 if mformula.adduct_atom: 684 dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode)) 685 686 if mformula.is_isotopologue: 687 dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic 688 689 if self.atoms_order_list is None: 690 atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum) 691 else: 692 atoms_order_list = self.atoms_order_list 693 694 for atom in atoms_order_list: 695 if atom in formula_dict.keys(): 696 dict_result[atom] = formula_dict.get(atom) 697 698 dict_data_list.append(dict_result) 699 700 score_methods = mass_spectrum.molecular_search_settings.score_methods 701 selected_score_method = ( 702 mass_spectrum.molecular_search_settings.output_score_method 703 ) 704 705 if selected_score_method in score_methods: 706 # temp set score method as the one chosen in the output 707 current_method = mass_spectrum.molecular_search_settings.score_method 708 mass_spectrum.molecular_search_settings.score_method = selected_score_method 709 710 for index, ms_peak in enumerate(mass_spectrum): 711 # print(ms_peak.mz_exp) 712 713 if ms_peak: 714 m_formula = ms_peak.best_molecular_formula_candidate 715 716 if m_formula: 717 if not m_formula.is_isotopologue: 718 add_match_dict_data( 719 index, 720 ms_peak, 721 m_formula, 722 additional_columns=additional_columns, 723 ) 724 725 for ( 726 iso_mspeak_index, 727 iso_mf_formula, 728 ) in m_formula.mspeak_mf_isotopologues_indexes: 729 iso_ms_peak = mass_spectrum[iso_mspeak_index] 730 add_match_dict_data( 731 iso_mspeak_index, 732 iso_ms_peak, 733 iso_mf_formula, 734 additional_columns=additional_columns, 735 ) 736 else: 737 if include_no_match and no_match_inline: 738 add_no_match_dict_data(index, ms_peak) 739 740 if include_no_match and not no_match_inline: 741 for index, ms_peak in enumerate(mass_spectrum): 742 if not ms_peak: 743 add_no_match_dict_data(index, ms_peak) 744 # reset score method as the one chosen in the output 745 mass_spectrum.molecular_search_settings.score_method = current_method 746 747 else: 748 for index, ms_peak in enumerate(mass_spectrum): 749 # check if there is a molecular formula candidate for the msPeak 750 751 if ms_peak: 752 # m_formula = ms_peak.molecular_formula_lowest_error 753 for m_formula in ms_peak: 754 if mass_spectrum.molecular_search_settings.output_min_score > 0: 755 if ( 756 m_formula.confidence_score 757 >= mass_spectrum.molecular_search_settings.output_min_score 758 ): 759 if m_formula.is_isotopologue: # isotopologues inline 760 if include_isotopologues and isotopologue_inline: 761 add_match_dict_data( 762 index, 763 ms_peak, 764 m_formula, 765 additional_columns=additional_columns, 766 ) 767 else: 768 add_match_dict_data( 769 index, 770 ms_peak, 771 m_formula, 772 additional_columns=additional_columns, 773 ) # add monoisotopic peak 774 775 # cutoff because of low score 776 else: 777 add_no_match_dict_data(index, ms_peak) 778 779 else: 780 if m_formula.is_isotopologue: # isotopologues inline 781 if include_isotopologues and isotopologue_inline: 782 add_match_dict_data( 783 index, 784 ms_peak, 785 m_formula, 786 additional_columns=additional_columns, 787 ) 788 else: 789 add_match_dict_data( 790 index, 791 ms_peak, 792 m_formula, 793 additional_columns=additional_columns, 794 ) # add monoisotopic peak 795 else: 796 # include not_match 797 if include_no_match and no_match_inline: 798 add_no_match_dict_data(index, ms_peak) 799 800 if include_isotopologues and not isotopologue_inline: 801 for index, ms_peak in enumerate(mass_spectrum): 802 for m_formula in ms_peak: 803 if m_formula.is_isotopologue: 804 if ( 805 m_formula.confidence_score 806 >= mass_spectrum.molecular_search_settings.output_min_score 807 ): 808 add_match_dict_data( 809 index, 810 ms_peak, 811 m_formula, 812 additional_columns=additional_columns, 813 ) 814 815 if include_no_match and not no_match_inline: 816 for index, ms_peak in enumerate(mass_spectrum): 817 if not ms_peak: 818 add_no_match_dict_data(index, ms_peak) 819 820 # remove duplicated add_match data possibly introduced on the output_score_filter step 821 res = [] 822 [res.append(x) for x in dict_data_list if x not in res] 823 824 return res
Returns the mass spectrum data as a list of dictionaries.
Parameters
- mass_spectrum (MassSpectrum): The mass spectrum to export.
- include_no_match (bool, optional): Whether to include unassigned (no match) data. Defaults to True.
- include_isotopologues (bool, optional): Whether to include isotopologues. Defaults to True.
- isotopologue_inline (bool, optional): Whether to include isotopologues inline. Defaults to True.
- no_match_inline (bool, optional): Whether to include unassigned (no match) data inline. Defaults to False.
- is_hdf5 (bool, optional): Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
Returns
- list: The mass spectrum data as a list of dictionaries.
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id