corems.mass_spectrum.factory.MassSpectrumClasses
1from pathlib import Path 2 3import numpy as np 4from lmfit.models import GaussianModel 5 6# from matplotlib import rcParamsDefault, rcParams 7from numpy import array, float64, histogram, trapz, where 8from pandas import DataFrame 9 10from corems.encapsulation.constant import Labels 11from corems.encapsulation.factory.parameters import MSParameters 12from corems.encapsulation.input.parameter_from_json import ( 13 load_and_set_parameters_ms, 14 load_and_set_toml_parameters_ms, 15) 16from corems.mass_spectrum.calc.KendrickGroup import KendrickGrouping 17from corems.mass_spectrum.calc.MassSpectrumCalc import MassSpecCalc 18from corems.mass_spectrum.calc.MeanResolvingPowerFilter import MeanResolvingPowerFilter 19from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak as MSPeak 20 21__author__ = "Yuri E. Corilo" 22__date__ = "Jun 12, 2019" 23 24 25def overrides(interface_class): 26 """Checks if the method overrides a method from an interface class.""" 27 28 def overrider(method): 29 assert method.__name__ in dir(interface_class) 30 return method 31 32 return overrider 33 34 35class MassSpecBase(MassSpecCalc, KendrickGrouping): 36 """A mass spectrum base class, stores the profile data and instrument settings. 37 38 Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. 39 _mspeaks is populated under the hood by calling process_mass_spec method. 40 Iteration is null if _mspeaks is empty. 41 42 Parameters 43 ---------- 44 mz_exp : array_like 45 The m/z values of the mass spectrum. 46 abundance : array_like 47 The abundance values of the mass spectrum. 48 d_params : dict 49 A dictionary of parameters for the mass spectrum. 50 **kwargs 51 Additional keyword arguments. 52 53 Attributes 54 ---------- 55 56 mspeaks : list 57 A list of mass peaks. 58 is_calibrated : bool 59 Whether the mass spectrum is calibrated. 60 is_centroid : bool 61 Whether the mass spectrum is centroided. 62 has_frequency : bool 63 Whether the mass spectrum has a frequency domain. 64 calibration_order : None or int 65 The order of the mass spectrum's calibration. 66 calibration_points : None or ndarray 67 The calibration points of the mass spectrum. 68 calibration_ref_mzs: None or ndarray 69 The reference m/z values of the mass spectrum's calibration. 70 calibration_meas_mzs : None or ndarray 71 The measured m/z values of the mass spectrum's calibration. 72 calibration_RMS : None or float 73 The root mean square of the mass spectrum's calibration. 74 calibration_segment : None or CalibrationSegment 75 The calibration segment of the mass spectrum. 76 _abundance : ndarray 77 The abundance values of the mass spectrum. 78 _mz_exp : ndarray 79 The m/z values of the mass spectrum. 80 _mspeaks : list 81 A list of mass peaks. 82 _dict_nominal_masses_indexes : dict 83 A dictionary of nominal masses and their indexes. 84 _baseline_noise : float 85 The baseline noise of the mass spectrum. 86 _baseline_noise_std : float 87 The standard deviation of the baseline noise of the mass spectrum. 88 _dynamic_range : float or None 89 The dynamic range of the mass spectrum. 90 _transient_settings : None or TransientSettings 91 The transient settings of the mass spectrum. 92 _frequency_domain : None or FrequencyDomain 93 The frequency domain of the mass spectrum. 94 _mz_cal_profile : None or MzCalibrationProfile 95 The m/z calibration profile of the mass spectrum. 96 97 Methods 98 ------- 99 * process_mass_spec(). Main function to process the mass spectrum, 100 including calculating the noise threshold, peak picking, and resetting the MSpeak indexes. 101 102 See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile() 103 """ 104 105 def __init__(self, mz_exp, abundance, d_params, **kwargs): 106 self._abundance = array(abundance, dtype=float64) 107 self._mz_exp = array(mz_exp, dtype=float64) 108 109 # objects created after process_mass_spec() function 110 self._mspeaks = list() 111 self.mspeaks = list() 112 self._dict_nominal_masses_indexes = dict() 113 self._baseline_noise = 0.001 114 self._baseline_noise_std = 0.001 115 self._dynamic_range = None 116 # set to None: initialization occurs inside subclass MassSpecfromFreq 117 self._transient_settings = None 118 self._frequency_domain = None 119 self._mz_cal_profile = None 120 self.is_calibrated = False 121 122 self._set_parameters_objects(d_params) 123 self._init_settings() 124 125 self.is_centroid = False 126 self.has_frequency = False 127 128 self.calibration_order = None 129 self.calibration_points = None 130 self.calibration_ref_mzs = None 131 self.calibration_meas_mzs = None 132 self.calibration_RMS = None 133 self.calibration_segment = None 134 self.calibration_raw_error_median = None 135 self.calibration_raw_error_stdev = None 136 137 def _init_settings(self): 138 """Initializes the settings for the mass spectrum.""" 139 self._parameters = MSParameters() 140 141 def __len__(self): 142 return len(self.mspeaks) 143 144 def __getitem__(self, position) -> MSPeak: 145 return self.mspeaks[position] 146 147 def set_indexes(self, list_indexes): 148 """Set the mass spectrum to iterate over only the selected MSpeaks indexes. 149 150 Parameters 151 ---------- 152 list_indexes : list of int 153 A list of integers representing the indexes of the MSpeaks to iterate over. 154 155 """ 156 self.mspeaks = [self._mspeaks[i] for i in list_indexes] 157 158 for i, mspeak in enumerate(self.mspeaks): 159 mspeak.index = i 160 161 self._set_nominal_masses_start_final_indexes() 162 163 def reset_indexes(self): 164 """Reset the mass spectrum to iterate over all MSpeaks objects. 165 166 This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. 167 It also sets the index of each MSpeak object to its corresponding position in the mass spectrum. 168 169 """ 170 self.mspeaks = self._mspeaks 171 172 for i, mspeak in enumerate(self.mspeaks): 173 mspeak.index = i 174 175 self._set_nominal_masses_start_final_indexes() 176 177 def add_mspeak( 178 self, 179 ion_charge, 180 mz_exp, 181 abundance, 182 resolving_power, 183 signal_to_noise, 184 massspec_indexes, 185 exp_freq=None, 186 ms_parent=None, 187 ): 188 """Add a new MSPeak object to the MassSpectrum object. 189 190 Parameters 191 ---------- 192 ion_charge : int 193 The ion charge of the MSPeak. 194 mz_exp : float 195 The experimental m/z value of the MSPeak. 196 abundance : float 197 The abundance of the MSPeak. 198 resolving_power : float 199 The resolving power of the MSPeak. 200 signal_to_noise : float 201 The signal-to-noise ratio of the MSPeak. 202 massspec_indexes : list 203 A list of indexes of the MSPeak in the MassSpectrum object. 204 exp_freq : float, optional 205 The experimental frequency of the MSPeak. Defaults to None. 206 ms_parent : MSParent, optional 207 The MSParent object associated with the MSPeak. Defaults to None. 208 """ 209 mspeak = MSPeak( 210 ion_charge, 211 mz_exp, 212 abundance, 213 resolving_power, 214 signal_to_noise, 215 massspec_indexes, 216 len(self._mspeaks), 217 exp_freq=exp_freq, 218 ms_parent=ms_parent, 219 ) 220 221 self._mspeaks.append(mspeak) 222 223 def _set_parameters_objects(self, d_params): 224 """Set the parameters of the MassSpectrum object. 225 226 Parameters 227 ---------- 228 d_params : dict 229 A dictionary containing the parameters to set. 230 231 Notes 232 ----- 233 This method sets the following parameters of the MassSpectrum object: 234 - _calibration_terms 235 - label 236 - analyzer 237 - acquisition_time 238 - instrument_label 239 - polarity 240 - scan_number 241 - retention_time 242 - mobility_rt 243 - mobility_scan 244 - _filename 245 - _dir_location 246 - _baseline_noise 247 - _baseline_noise_std 248 - sample_name 249 """ 250 self._calibration_terms = ( 251 d_params.get("Aterm"), 252 d_params.get("Bterm"), 253 d_params.get("Cterm"), 254 ) 255 256 self.label = d_params.get(Labels.label) 257 258 self.analyzer = d_params.get("analyzer") 259 260 self.acquisition_time = d_params.get("acquisition_time") 261 262 self.instrument_label = d_params.get("instrument_label") 263 264 self.polarity = int(d_params.get("polarity")) 265 266 self.scan_number = d_params.get("scan_number") 267 268 self.retention_time = d_params.get("rt") 269 270 self.mobility_rt = d_params.get("mobility_rt") 271 272 self.mobility_scan = d_params.get("mobility_scan") 273 274 self._filename = d_params.get("filename_path") 275 276 self._dir_location = d_params.get("dir_location") 277 278 self._baseline_noise = d_params.get("baseline_noise") 279 280 self._baseline_noise_std = d_params.get("baseline_noise_std") 281 282 if d_params.get("sample_name") != "Unknown": 283 self.sample_name = d_params.get("sample_name") 284 if not self.sample_name: 285 self.sample_name = self.filename.stem 286 else: 287 self.sample_name = self.filename.stem 288 289 def reset_cal_therms(self, Aterm, Bterm, C, fas=0): 290 """Reset calibration terms and recalculate the mass-to-charge ratio and abundance. 291 292 Parameters 293 ---------- 294 Aterm : float 295 The A-term calibration coefficient. 296 Bterm : float 297 The B-term calibration coefficient. 298 C : float 299 The C-term calibration coefficient. 300 fas : float, optional 301 The frequency amplitude scaling factor. Default is 0. 302 """ 303 self._calibration_terms = (Aterm, Bterm, C) 304 305 self._mz_exp = self._f_to_mz() 306 self._abundance = self._abundance 307 self.find_peaks() 308 self.reset_indexes() 309 310 def clear_molecular_formulas(self): 311 """Clear the molecular formulas for all mspeaks in the MassSpectrum. 312 313 Returns 314 ------- 315 numpy.ndarray 316 An array of the cleared molecular formulas for each mspeak in the MassSpectrum. 317 """ 318 self.check_mspeaks() 319 return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks]) 320 321 def process_mass_spec(self, keep_profile=True): 322 """Process the mass spectrum. 323 324 Parameters 325 ---------- 326 keep_profile : bool, optional 327 Whether to keep the profile data after processing. Defaults to True. 328 329 Notes 330 ----- 331 This method does the following: 332 - calculates the noise threshold 333 - does peak picking (creates mspeak_objs) 334 - resets the mspeak_obj indexes 335 """ 336 337 # if runned mannually make sure to rerun filter_by_noise_threshold 338 # calculates noise threshold 339 # do peak picking( create mspeak_objs) 340 # reset mspeak_obj the indexes 341 342 self.cal_noise_threshold() 343 344 self.find_peaks() 345 self.reset_indexes() 346 347 if self.mspeaks: 348 self._dynamic_range = self.max_abundance / self.min_abundance 349 else: 350 self._dynamic_range = 0 351 if not keep_profile: 352 self._abundance *= 0 353 self._mz_exp *= 0 354 355 def cal_noise_threshold(self): 356 """Calculate the noise threshold of the mass spectrum.""" 357 358 if self.label == Labels.simulated_profile: 359 self._baseline_noise, self._baseline_noise_std = 0.1, 1 360 361 if self.settings.noise_threshold_method == "log": 362 self._baseline_noise, self._baseline_noise_std = ( 363 self.run_log_noise_threshold_calc() 364 ) 365 366 else: 367 self._baseline_noise, self._baseline_noise_std = ( 368 self.run_noise_threshold_calc() 369 ) 370 371 @property 372 def parameters(self): 373 """Return the parameters of the mass spectrum.""" 374 return self._parameters 375 376 @parameters.setter 377 def parameters(self, instance_MSParameters): 378 self._parameters = instance_MSParameters 379 380 def set_parameter_from_json(self, parameters_path): 381 """Set the parameters of the mass spectrum from a JSON file. 382 383 Parameters 384 ---------- 385 parameters_path : str 386 The path to the JSON file containing the parameters. 387 """ 388 load_and_set_parameters_ms(self, parameters_path=parameters_path) 389 390 def set_parameter_from_toml(self, parameters_path): 391 load_and_set_toml_parameters_ms(self, parameters_path=parameters_path) 392 393 @property 394 def mspeaks_settings(self): 395 """Return the MS peak settings of the mass spectrum.""" 396 return self.parameters.ms_peak 397 398 @mspeaks_settings.setter 399 def mspeaks_settings(self, instance_MassSpecPeakSetting): 400 self.parameters.ms_peak = instance_MassSpecPeakSetting 401 402 @property 403 def settings(self): 404 """Return the settings of the mass spectrum.""" 405 return self.parameters.mass_spectrum 406 407 @settings.setter 408 def settings(self, instance_MassSpectrumSetting): 409 self.parameters.mass_spectrum = instance_MassSpectrumSetting 410 411 @property 412 def molecular_search_settings(self): 413 """Return the molecular search settings of the mass spectrum.""" 414 return self.parameters.molecular_search 415 416 @molecular_search_settings.setter 417 def molecular_search_settings(self, instance_MolecularFormulaSearchSettings): 418 self.parameters.molecular_search = instance_MolecularFormulaSearchSettings 419 420 @property 421 def mz_cal_profile(self): 422 """Return the calibrated m/z profile of the mass spectrum.""" 423 return self._mz_cal_profile 424 425 @mz_cal_profile.setter 426 def mz_cal_profile(self, mz_cal_list): 427 if len(mz_cal_list) == len(self._mz_exp): 428 self._mz_cal_profile = mz_cal_list 429 else: 430 raise Exception( 431 "calibrated array (%i) is not of the same size of the data (%i)" 432 % (len(mz_cal_list), len(self.mz_exp_profile)) 433 ) 434 435 @property 436 def mz_cal(self): 437 """Return the calibrated m/z values of the mass spectrum.""" 438 return array([mspeak.mz_cal for mspeak in self.mspeaks]) 439 440 @mz_cal.setter 441 def mz_cal(self, mz_cal_list): 442 if len(mz_cal_list) == len(self.mspeaks): 443 self.is_calibrated = True 444 for index, mz_cal in enumerate(mz_cal_list): 445 self.mspeaks[index].mz_cal = mz_cal 446 else: 447 raise Exception( 448 "calibrated array (%i) is not of the same size of the data (%i)" 449 % (len(mz_cal_list), len(self._mspeaks)) 450 ) 451 452 @property 453 def mz_exp(self): 454 """Return the experimental m/z values of the mass spectrum.""" 455 self.check_mspeaks() 456 457 if self.is_calibrated: 458 return array([mspeak.mz_cal for mspeak in self.mspeaks]) 459 460 else: 461 return array([mspeak.mz_exp for mspeak in self.mspeaks]) 462 463 @property 464 def freq_exp_profile(self): 465 """Return the experimental frequency profile of the mass spectrum.""" 466 return self._frequency_domain 467 468 @freq_exp_profile.setter 469 def freq_exp_profile(self, new_data): 470 self._frequency_domain = array(new_data) 471 472 @property 473 def freq_exp_pp(self): 474 """Return the experimental frequency values of the mass spectrum that are used for peak picking.""" 475 _, _, freq = self.prepare_peak_picking_data() 476 return freq 477 478 @property 479 def mz_exp_profile(self): 480 """Return the experimental m/z profile of the mass spectrum.""" 481 if self.is_calibrated: 482 return self.mz_cal_profile 483 else: 484 return self._mz_exp 485 486 @mz_exp_profile.setter 487 def mz_exp_profile(self, new_data): 488 self._mz_exp = array(new_data) 489 490 @property 491 def mz_exp_pp(self): 492 """Return the experimental m/z values of the mass spectrum that are used for peak picking.""" 493 mz, _, _ = self.prepare_peak_picking_data() 494 return mz 495 496 @property 497 def abundance_profile(self): 498 """Return the abundance profile of the mass spectrum.""" 499 return self._abundance 500 501 @abundance_profile.setter 502 def abundance_profile(self, new_data): 503 self._abundance = array(new_data) 504 505 @property 506 def abundance_profile_pp(self): 507 """Return the abundance profile of the mass spectrum that is used for peak picking.""" 508 _, abundance, _ = self.prepare_peak_picking_data() 509 return abundance 510 511 @property 512 def abundance(self): 513 """Return the abundance values of the mass spectrum.""" 514 self.check_mspeaks() 515 return array([mspeak.abundance for mspeak in self.mspeaks]) 516 517 def freq_exp(self): 518 """Return the experimental frequency values of the mass spectrum.""" 519 self.check_mspeaks() 520 return array([mspeak.freq_exp for mspeak in self.mspeaks]) 521 522 @property 523 def resolving_power(self): 524 """Return the resolving power values of the mass spectrum.""" 525 self.check_mspeaks() 526 return array([mspeak.resolving_power for mspeak in self.mspeaks]) 527 528 @property 529 def signal_to_noise(self): 530 self.check_mspeaks() 531 return array([mspeak.signal_to_noise for mspeak in self.mspeaks]) 532 533 @property 534 def nominal_mz(self): 535 """Return the nominal m/z values of the mass spectrum.""" 536 if self._dict_nominal_masses_indexes: 537 return sorted(list(self._dict_nominal_masses_indexes.keys())) 538 else: 539 raise ValueError("Nominal indexes not yet set") 540 541 def get_mz_and_abundance_peaks_tuples(self): 542 """Return a list of tuples containing the m/z and abundance values of the mass spectrum.""" 543 self.check_mspeaks() 544 return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks] 545 546 @property 547 def kmd(self): 548 """Return the Kendrick mass defect values of the mass spectrum.""" 549 self.check_mspeaks() 550 return array([mspeak.kmd for mspeak in self.mspeaks]) 551 552 @property 553 def kendrick_mass(self): 554 """Return the Kendrick mass values of the mass spectrum.""" 555 self.check_mspeaks() 556 return array([mspeak.kendrick_mass for mspeak in self.mspeaks]) 557 558 @property 559 def max_mz_exp(self): 560 """Return the maximum experimental m/z value of the mass spectrum.""" 561 return max([mspeak.mz_exp for mspeak in self.mspeaks]) 562 563 @property 564 def min_mz_exp(self): 565 """Return the minimum experimental m/z value of the mass spectrum.""" 566 return min([mspeak.mz_exp for mspeak in self.mspeaks]) 567 568 @property 569 def max_abundance(self): 570 """Return the maximum abundance value of the mass spectrum.""" 571 return max([mspeak.abundance for mspeak in self.mspeaks]) 572 573 @property 574 def max_signal_to_noise(self): 575 """Return the maximum signal-to-noise ratio of the mass spectrum.""" 576 return max([mspeak.signal_to_noise for mspeak in self.mspeaks]) 577 578 @property 579 def most_abundant_mspeak(self): 580 """Return the most abundant MSpeak object of the mass spectrum.""" 581 return max(self.mspeaks, key=lambda m: m.abundance) 582 583 @property 584 def min_abundance(self): 585 """Return the minimum abundance value of the mass spectrum.""" 586 return min([mspeak.abundance for mspeak in self.mspeaks]) 587 588 # takes too much cpu time 589 @property 590 def dynamic_range(self): 591 """Return the dynamic range of the mass spectrum.""" 592 return self._dynamic_range 593 594 @property 595 def baseline_noise(self): 596 """Return the baseline noise of the mass spectrum.""" 597 if self._baseline_noise: 598 return self._baseline_noise 599 else: 600 return None 601 602 @property 603 def baseline_noise_std(self): 604 """Return the standard deviation of the baseline noise of the mass spectrum.""" 605 if self._baseline_noise_std == 0: 606 return self._baseline_noise_std 607 if self._baseline_noise_std: 608 return self._baseline_noise_std 609 else: 610 return None 611 612 @property 613 def Aterm(self): 614 """Return the A-term calibration coefficient of the mass spectrum.""" 615 return self._calibration_terms[0] 616 617 @property 618 def Bterm(self): 619 """Return the B-term calibration coefficient of the mass spectrum.""" 620 return self._calibration_terms[1] 621 622 @property 623 def Cterm(self): 624 """Return the C-term calibration coefficient of the mass spectrum.""" 625 return self._calibration_terms[2] 626 627 @property 628 def filename(self): 629 """Return the filename of the mass spectrum.""" 630 return Path(self._filename) 631 632 @property 633 def dir_location(self): 634 """Return the directory location of the mass spectrum.""" 635 return self._dir_location 636 637 def sort_by_mz(self): 638 """Sort the mass spectrum by m/z values.""" 639 return sorted(self, key=lambda m: m.mz_exp) 640 641 def sort_by_abundance(self, reverse=False): 642 """Sort the mass spectrum by abundance values.""" 643 return sorted(self, key=lambda m: m.abundance, reverse=reverse) 644 645 @property 646 def tic(self): 647 """Return the total ion current of the mass spectrum.""" 648 return trapz(self.abundance_profile, self.mz_exp_profile) 649 650 def check_mspeaks_warning(self): 651 """Check if the mass spectrum has MSpeaks objects. 652 653 Raises 654 ------ 655 Warning 656 If the mass spectrum has no MSpeaks objects. 657 """ 658 import warnings 659 660 if self.mspeaks: 661 pass 662 else: 663 warnings.warn("mspeaks list is empty, continuing without filtering data") 664 665 def check_mspeaks(self): 666 """Check if the mass spectrum has MSpeaks objects. 667 668 Raises 669 ------ 670 Exception 671 If the mass spectrum has no MSpeaks objects. 672 """ 673 if self.mspeaks: 674 pass 675 else: 676 raise Exception( 677 "mspeaks list is empty, please run process_mass_spec() first" 678 ) 679 680 def remove_assignment_by_index(self, indexes): 681 """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes. 682 683 Parameters 684 ---------- 685 indexes : list of int 686 A list of indexes of the MSpeaks objects to remove the molecular formula assignment from. 687 """ 688 for i in indexes: 689 self.mspeaks[i].clear_molecular_formulas() 690 691 def filter_by_index(self, list_indexes): 692 """Filter the mass spectrum by the specified indexes. 693 694 Parameters 695 ---------- 696 list_indexes : list of int 697 A list of indexes of the MSpeaks objects to drop. 698 699 """ 700 701 self.mspeaks = [ 702 self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes 703 ] 704 705 for i, mspeak in enumerate(self.mspeaks): 706 mspeak.index = i 707 708 self._set_nominal_masses_start_final_indexes() 709 710 def filter_by_mz(self, min_mz, max_mz): 711 """Filter the mass spectrum by the specified m/z range. 712 713 Parameters 714 ---------- 715 min_mz : float 716 The minimum m/z value to keep. 717 max_mz : float 718 The maximum m/z value to keep. 719 720 """ 721 self.check_mspeaks_warning() 722 indexes = [ 723 index 724 for index, mspeak in enumerate(self.mspeaks) 725 if not min_mz <= mspeak.mz_exp <= max_mz 726 ] 727 self.filter_by_index(indexes) 728 729 def filter_by_s2n(self, min_s2n, max_s2n=False): 730 """Filter the mass spectrum by the specified signal-to-noise ratio range. 731 732 Parameters 733 ---------- 734 min_s2n : float 735 The minimum signal-to-noise ratio to keep. 736 max_s2n : float, optional 737 The maximum signal-to-noise ratio to keep. Defaults to False (no maximum). 738 739 """ 740 self.check_mspeaks_warning() 741 if max_s2n: 742 indexes = [ 743 index 744 for index, mspeak in enumerate(self.mspeaks) 745 if not min_s2n <= mspeak.signal_to_noise <= max_s2n 746 ] 747 else: 748 indexes = [ 749 index 750 for index, mspeak in enumerate(self.mspeaks) 751 if mspeak.signal_to_noise <= min_s2n 752 ] 753 self.filter_by_index(indexes) 754 755 def filter_by_abundance(self, min_abund, max_abund=False): 756 """Filter the mass spectrum by the specified abundance range. 757 758 Parameters 759 ---------- 760 min_abund : float 761 The minimum abundance to keep. 762 max_abund : float, optional 763 The maximum abundance to keep. Defaults to False (no maximum). 764 765 """ 766 self.check_mspeaks_warning() 767 if max_abund: 768 indexes = [ 769 index 770 for index, mspeak in enumerate(self.mspeaks) 771 if not min_abund <= mspeak.abundance <= max_abund 772 ] 773 else: 774 indexes = [ 775 index 776 for index, mspeak in enumerate(self.mspeaks) 777 if mspeak.abundance <= min_abund 778 ] 779 self.filter_by_index(indexes) 780 781 def filter_by_max_resolving_power(self, B, T): 782 """Filter the mass spectrum by the specified maximum resolving power. 783 784 Parameters 785 ---------- 786 B : float 787 T : float 788 789 """ 790 791 rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z) 792 793 self.check_mspeaks_warning() 794 795 indexes_to_remove = [ 796 index 797 for index, mspeak in enumerate(self.mspeaks) 798 if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge) 799 ] 800 self.filter_by_index(indexes_to_remove) 801 802 def filter_by_mean_resolving_power( 803 self, ndeviations=3, plot=False, guess_pars=False 804 ): 805 """Filter the mass spectrum by the specified mean resolving power. 806 807 Parameters 808 ---------- 809 ndeviations : float, optional 810 The number of standard deviations to use for filtering. Defaults to 3. 811 plot : bool, optional 812 Whether to plot the resolving power distribution. Defaults to False. 813 guess_pars : bool, optional 814 Whether to guess the parameters for the Gaussian model. Defaults to False. 815 816 """ 817 self.check_mspeaks_warning() 818 indexes_to_remove = MeanResolvingPowerFilter( 819 self, ndeviations, plot, guess_pars 820 ).main() 821 self.filter_by_index(indexes_to_remove) 822 823 def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0): 824 """Filter the mass spectrum by the calculated minimum theoretical resolving power. 825 826 This is currently designed only for FTICR data, and accounts only for magnitude mode data 827 Accurate results require passing the apodisaion method used to calculate the resolving power. 828 see the ICRMassPeak function `resolving_power_calc` for more details. 829 830 Parameters 831 ---------- 832 B : Magnetic field strength in Tesla, float 833 T : transient length in seconds, float 834 apodization_method : str, optional 835 The apodization method to use for calculating the resolving power. Defaults to None. 836 tolerance : float, optional 837 The tolerance for the threshold. Defaults to 0, i.e. no tolerance 838 839 """ 840 if self.analyzer != "ICR": 841 raise Exception( 842 "This method is only applicable to ICR mass spectra. " 843 ) 844 845 self.check_mspeaks_warning() 846 847 indexes_to_remove = [ 848 index 849 for index, mspeak in enumerate(self.mspeaks) 850 if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method) 851 ] 852 self.filter_by_index(indexes_to_remove) 853 854 def filter_by_noise_threshold(self): 855 """Filter the mass spectrum by the noise threshold.""" 856 857 threshold = self.get_noise_threshold()[1][0] 858 859 self.check_mspeaks_warning() 860 861 indexes_to_remove = [ 862 index 863 for index, mspeak in enumerate(self.mspeaks) 864 if mspeak.abundance <= threshold 865 ] 866 self.filter_by_index(indexes_to_remove) 867 868 def find_peaks(self): 869 """Find the peaks of the mass spectrum.""" 870 # needs to clear previous results from peak_picking 871 self._mspeaks = list() 872 873 # then do peak picking 874 self.do_peak_picking() 875 # print("A total of %i peaks were found" % len(self._mspeaks)) 876 877 def change_kendrick_base_all_mspeaks(self, kendrick_dict_base): 878 """Change the Kendrick base of all MSpeaks objects. 879 880 Parameters 881 ---------- 882 kendrick_dict_base : dict 883 A dictionary of the Kendrick base to change to. 884 885 Notes 886 ----- 887 Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc 888 """ 889 self.parameters.ms_peak.kendrick_base = kendrick_dict_base 890 891 for mspeak in self.mspeaks: 892 mspeak.change_kendrick_base(kendrick_dict_base) 893 894 def get_nominal_mz_first_last_indexes(self, nominal_mass): 895 """Return the first and last indexes of the MSpeaks objects with the specified nominal mass. 896 897 Parameters 898 ---------- 899 nominal_mass : int 900 The nominal mass to get the indexes for. 901 902 Returns 903 ------- 904 tuple 905 A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass. 906 """ 907 if self._dict_nominal_masses_indexes: 908 if nominal_mass in self._dict_nominal_masses_indexes.keys(): 909 return ( 910 self._dict_nominal_masses_indexes.get(nominal_mass)[0], 911 self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1, 912 ) 913 914 else: 915 # import warnings 916 # uncomment warn to distribution 917 # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass) 918 return (0, 0) 919 else: 920 raise Exception( 921 "run process_mass_spec() function before trying to access the data" 922 ) 923 924 def get_masses_count_by_nominal_mass(self): 925 """Return a dictionary of the nominal masses and their counts.""" 926 927 dict_nominal_masses_count = {} 928 929 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 930 931 for nominal_mass in all_nominal_masses: 932 if nominal_mass not in dict_nominal_masses_count: 933 dict_nominal_masses_count[nominal_mass] = len( 934 list(self.get_nominal_mass_indexes(nominal_mass)) 935 ) 936 937 return dict_nominal_masses_count 938 939 def datapoints_count_by_nominal_mz(self, mz_overlay=0.1): 940 """Return a dictionary of the nominal masses and their counts. 941 942 Parameters 943 ---------- 944 mz_overlay : float, optional 945 The m/z overlay to use for counting. Defaults to 0.1. 946 947 Returns 948 ------- 949 dict 950 A dictionary of the nominal masses and their counts. 951 """ 952 dict_nominal_masses_count = {} 953 954 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 955 956 for nominal_mass in all_nominal_masses: 957 if nominal_mass not in dict_nominal_masses_count: 958 min_mz = nominal_mass - mz_overlay 959 960 max_mz = nominal_mass + 1 + mz_overlay 961 962 indexes = indexes = where( 963 (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz) 964 ) 965 966 dict_nominal_masses_count[nominal_mass] = indexes[0].size 967 968 return dict_nominal_masses_count 969 970 def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1): 971 """Return the indexes of the MSpeaks objects with the specified nominal mass. 972 973 Parameters 974 ---------- 975 nominal_mass : int 976 The nominal mass to get the indexes for. 977 overlay : float, optional 978 The m/z overlay to use for counting. Defaults to 0.1. 979 980 Returns 981 ------- 982 generator 983 A generator of the indexes of the MSpeaks objects with the specified nominal mass. 984 """ 985 min_mz_to_look = nominal_mass - overlay 986 max_mz_to_look = nominal_mass + 1 + overlay 987 988 return ( 989 i 990 for i in range(len(self.mspeaks)) 991 if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look 992 ) 993 994 # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look) 995 # return indexes 996 997 def _set_nominal_masses_start_final_indexes(self): 998 """Set the start and final indexes of the MSpeaks objects for all nominal masses.""" 999 dict_nominal_masses_indexes = {} 1000 1001 all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks) 1002 1003 for nominal_mass in all_nominal_masses: 1004 # indexes = self.get_nominal_mass_indexes(nominal_mass) 1005 # Convert the iterator to a list to avoid multiple calls 1006 indexes = list(self.get_nominal_mass_indexes(nominal_mass)) 1007 1008 # If the list is not empty, find the first and last; otherwise, set None 1009 if indexes: 1010 first, last = indexes[0], indexes[-1] 1011 else: 1012 first = last = None 1013 # defaultvalue = None 1014 # first = last = next(indexes, defaultvalue) 1015 # for last in indexes: 1016 # pass 1017 1018 dict_nominal_masses_indexes[nominal_mass] = (first, last) 1019 1020 self._dict_nominal_masses_indexes = dict_nominal_masses_indexes 1021 1022 def plot_centroid(self, ax=None, c="g"): 1023 """Plot the centroid data of the mass spectrum. 1024 1025 Parameters 1026 ---------- 1027 ax : matplotlib.axes.Axes, optional 1028 The matplotlib axes to plot on. Defaults to None. 1029 c : str, optional 1030 The color to use for the plot. Defaults to 'g' (green). 1031 1032 Returns 1033 ------- 1034 matplotlib.axes.Axes 1035 The matplotlib axes containing the plot. 1036 1037 Raises 1038 ------ 1039 Exception 1040 If no centroid data is found. 1041 """ 1042 1043 import matplotlib.pyplot as plt 1044 1045 if self._mspeaks: 1046 if ax is None: 1047 ax = plt.gca() 1048 1049 markerline_a, stemlines_a, baseline_a = ax.stem( 1050 self.mz_exp, self.abundance, linefmt="-", markerfmt=" " 1051 ) 1052 1053 plt.setp(markerline_a, "color", c, "linewidth", 2) 1054 plt.setp(stemlines_a, "color", c, "linewidth", 2) 1055 plt.setp(baseline_a, "color", c, "linewidth", 2) 1056 1057 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1058 ax.set_ylabel("Abundance", fontsize=12) 1059 ax.tick_params(axis="both", which="major", labelsize=12) 1060 1061 ax.axes.spines["top"].set_visible(False) 1062 ax.axes.spines["right"].set_visible(False) 1063 1064 ax.get_yaxis().set_visible(False) 1065 ax.spines["left"].set_visible(False) 1066 1067 else: 1068 raise Exception("No centroid data found, please run process_mass_spec") 1069 1070 return ax 1071 1072 def plot_profile_and_noise_threshold(self, ax=None, legend=False): 1073 """Plot the profile data and noise threshold of the mass spectrum. 1074 1075 Parameters 1076 ---------- 1077 ax : matplotlib.axes.Axes, optional 1078 The matplotlib axes to plot on. Defaults to None. 1079 legend : bool, optional 1080 Whether to show the legend. Defaults to False. 1081 1082 Returns 1083 ------- 1084 matplotlib.axes.Axes 1085 The matplotlib axes containing the plot. 1086 1087 Raises 1088 ------ 1089 Exception 1090 If no noise threshold is found. 1091 """ 1092 import matplotlib.pyplot as plt 1093 1094 if self.baseline_noise_std and self.baseline_noise_std: 1095 # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max()) 1096 baseline = (self.baseline_noise, self.baseline_noise) 1097 1098 # std = self.parameters.mass_spectrum.noise_threshold_min_std 1099 # threshold = self.baseline_noise_std + (std * self.baseline_noise_std) 1100 x, y = self.get_noise_threshold() 1101 1102 if ax is None: 1103 ax = plt.gca() 1104 1105 ax.plot( 1106 self.mz_exp_profile, 1107 self.abundance_profile, 1108 color="green", 1109 label="Spectrum", 1110 ) 1111 ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise") 1112 ax.plot(x, y, color="red", label="Noise Threshold") 1113 1114 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1115 ax.set_ylabel("Abundance", fontsize=12) 1116 ax.tick_params(axis="both", which="major", labelsize=12) 1117 1118 ax.axes.spines["top"].set_visible(False) 1119 ax.axes.spines["right"].set_visible(False) 1120 1121 ax.get_yaxis().set_visible(False) 1122 ax.spines["left"].set_visible(False) 1123 if legend: 1124 ax.legend() 1125 1126 else: 1127 raise Exception("Calculate noise threshold first") 1128 1129 return ax 1130 1131 def plot_mz_domain_profile(self, color="green", ax=None): 1132 """Plot the m/z domain profile of the mass spectrum. 1133 1134 Parameters 1135 ---------- 1136 color : str, optional 1137 The color to use for the plot. Defaults to 'green'. 1138 ax : matplotlib.axes.Axes, optional 1139 The matplotlib axes to plot on. Defaults to None. 1140 1141 Returns 1142 ------- 1143 matplotlib.axes.Axes 1144 The matplotlib axes containing the plot. 1145 """ 1146 1147 import matplotlib.pyplot as plt 1148 1149 if ax is None: 1150 ax = plt.gca() 1151 ax.plot(self.mz_exp_profile, self.abundance_profile, color=color) 1152 ax.set(xlabel="m/z", ylabel="abundance") 1153 1154 return ax 1155 1156 def to_excel(self, out_file_path, write_metadata=True): 1157 """Export the mass spectrum to an Excel file. 1158 1159 Parameters 1160 ---------- 1161 out_file_path : str 1162 The path to the Excel file to export to. 1163 write_metadata : bool, optional 1164 Whether to write the metadata to the Excel file. Defaults to True. 1165 1166 Returns 1167 ------- 1168 None 1169 """ 1170 from corems.mass_spectrum.output.export import HighResMassSpecExport 1171 1172 exportMS = HighResMassSpecExport(out_file_path, self) 1173 exportMS.to_excel(write_metadata=write_metadata) 1174 1175 def to_hdf(self, out_file_path): 1176 """Export the mass spectrum to an HDF file. 1177 1178 Parameters 1179 ---------- 1180 out_file_path : str 1181 The path to the HDF file to export to. 1182 1183 Returns 1184 ------- 1185 None 1186 """ 1187 from corems.mass_spectrum.output.export import HighResMassSpecExport 1188 1189 exportMS = HighResMassSpecExport(out_file_path, self) 1190 exportMS.to_hdf() 1191 1192 def to_csv(self, out_file_path, write_metadata=True): 1193 """Export the mass spectrum to a CSV file. 1194 1195 Parameters 1196 ---------- 1197 out_file_path : str 1198 The path to the CSV file to export to. 1199 write_metadata : bool, optional 1200 Whether to write the metadata to the CSV file. Defaults to True. 1201 1202 """ 1203 from corems.mass_spectrum.output.export import HighResMassSpecExport 1204 1205 exportMS = HighResMassSpecExport(out_file_path, self) 1206 exportMS.to_csv(write_metadata=write_metadata) 1207 1208 def to_pandas(self, out_file_path, write_metadata=True): 1209 """Export the mass spectrum to a Pandas dataframe with pkl extension. 1210 1211 Parameters 1212 ---------- 1213 out_file_path : str 1214 The path to the CSV file to export to. 1215 write_metadata : bool, optional 1216 Whether to write the metadata to the CSV file. Defaults to True. 1217 1218 """ 1219 from corems.mass_spectrum.output.export import HighResMassSpecExport 1220 1221 exportMS = HighResMassSpecExport(out_file_path, self) 1222 exportMS.to_pandas(write_metadata=write_metadata) 1223 1224 def to_dataframe(self, additional_columns=None): 1225 """Return the mass spectrum as a Pandas dataframe. 1226 1227 Parameters 1228 ---------- 1229 additional_columns : list, optional 1230 A list of additional columns to include in the dataframe. Defaults to None. 1231 Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC" 1232 1233 Returns 1234 ------- 1235 pandas.DataFrame 1236 The mass spectrum as a Pandas dataframe. 1237 """ 1238 from corems.mass_spectrum.output.export import HighResMassSpecExport 1239 1240 exportMS = HighResMassSpecExport(self.filename, self) 1241 return exportMS.get_pandas_df(additional_columns=additional_columns) 1242 1243 def to_json(self): 1244 """Return the mass spectrum as a JSON file.""" 1245 from corems.mass_spectrum.output.export import HighResMassSpecExport 1246 1247 exportMS = HighResMassSpecExport(self.filename, self) 1248 return exportMS.to_json() 1249 1250 def parameters_json(self): 1251 """Return the parameters of the mass spectrum as a JSON string.""" 1252 from corems.mass_spectrum.output.export import HighResMassSpecExport 1253 1254 exportMS = HighResMassSpecExport(self.filename, self) 1255 return exportMS.parameters_to_json() 1256 1257 def parameters_toml(self): 1258 """Return the parameters of the mass spectrum as a TOML string.""" 1259 from corems.mass_spectrum.output.export import HighResMassSpecExport 1260 1261 exportMS = HighResMassSpecExport(self.filename, self) 1262 return exportMS.parameters_to_toml() 1263 1264 1265class MassSpecProfile(MassSpecBase): 1266 """A mass spectrum class when the entry point is on profile format 1267 1268 Notes 1269 ----- 1270 Stores the profile data and instrument settings. 1271 Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. 1272 _mspeaks is populated under the hood by calling process_mass_spec method. 1273 Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase(). 1274 1275 Parameters 1276 ---------- 1277 data_dict : dict 1278 A dictionary containing the profile data. 1279 d_params : dict{'str': float, int or str} 1280 contains the instrument settings and processing settings 1281 auto_process : bool, optional 1282 Whether to automatically process the mass spectrum. Defaults to True. 1283 1284 1285 Attributes 1286 ---------- 1287 _abundance : ndarray 1288 The abundance values of the mass spectrum. 1289 _mz_exp : ndarray 1290 The m/z values of the mass spectrum. 1291 _mspeaks : list 1292 A list of mass peaks. 1293 1294 Methods 1295 ---------- 1296 * process_mass_spec(). Process the mass spectrum. 1297 1298 see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid() 1299 """ 1300 1301 def __init__(self, data_dict, d_params, auto_process=True): 1302 # print(data_dict.keys()) 1303 super().__init__( 1304 data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params 1305 ) 1306 1307 if auto_process: 1308 self.process_mass_spec() 1309 1310 1311class MassSpecfromFreq(MassSpecBase): 1312 """A mass spectrum class when data entry is on frequency domain 1313 1314 Notes 1315 ----- 1316 - Transform to m/z based on the settings stored at d_params 1317 - Stores the profile data and instrument settings 1318 - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes 1319 - _mspeaks is populated under the hood by calling process_mass_spec method 1320 - iteration is null if _mspeaks is empty 1321 1322 Parameters 1323 ---------- 1324 frequency_domain : list(float) 1325 all datapoints in frequency domain in Hz 1326 magnitude : frequency_domain : list(float) 1327 all datapoints in for magnitude of each frequency datapoint 1328 d_params : dict{'str': float, int or str} 1329 contains the instrument settings and processing settings 1330 auto_process : bool, optional 1331 Whether to automatically process the mass spectrum. Defaults to True. 1332 keep_profile : bool, optional 1333 Whether to keep the profile data. Defaults to True. 1334 1335 Attributes 1336 ---------- 1337 has_frequency : bool 1338 Whether the mass spectrum has frequency data. 1339 _frequency_domain : list(float) 1340 Frequency domain in Hz 1341 label : str 1342 store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points 1343 _abundance : ndarray 1344 The abundance values of the mass spectrum. 1345 _mz_exp : ndarray 1346 The m/z values of the mass spectrum. 1347 _mspeaks : list 1348 A list of mass peaks. 1349 See Also: all the attributes of MassSpecBase class 1350 1351 Methods 1352 ---------- 1353 * _set_mz_domain(). 1354 calculates the m_z based on the setting of d_params 1355 * process_mass_spec(). Process the mass spectrum. 1356 1357 see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid() 1358 """ 1359 1360 def __init__( 1361 self, 1362 frequency_domain, 1363 magnitude, 1364 d_params, 1365 auto_process=True, 1366 keep_profile=True, 1367 ): 1368 super().__init__(None, magnitude, d_params) 1369 1370 self._frequency_domain = frequency_domain 1371 self.has_frequency = True 1372 self._set_mz_domain() 1373 self._sort_mz_domain() 1374 1375 self.magnetron_frequency = None 1376 self.magnetron_frequency_sigma = None 1377 1378 # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect 1379 1380 if auto_process: 1381 self.process_mass_spec(keep_profile=keep_profile) 1382 1383 def _sort_mz_domain(self): 1384 """Sort the mass spectrum by m/z values.""" 1385 1386 if self._mz_exp[0] > self._mz_exp[-1]: 1387 self._mz_exp = self._mz_exp[::-1] 1388 self._abundance = self._abundance[::-1] 1389 self._frequency_domain = self._frequency_domain[::-1] 1390 1391 def _set_mz_domain(self): 1392 """Set the m/z domain of the mass spectrum based on the settings of d_params.""" 1393 if self.label == Labels.bruker_frequency: 1394 self._mz_exp = self._f_to_mz_bruker() 1395 1396 else: 1397 self._mz_exp = self._f_to_mz() 1398 1399 @property 1400 def transient_settings(self): 1401 """Return the transient settings of the mass spectrum.""" 1402 return self.parameters.transient 1403 1404 @transient_settings.setter 1405 def transient_settings(self, instance_TransientSetting): 1406 self.parameters.transient = instance_TransientSetting 1407 1408 def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300): 1409 """Calculates the magnetron frequency of the mass spectrum. 1410 1411 Parameters 1412 ---------- 1413 max_magnetron_freq : float, optional 1414 The maximum magnetron frequency. Defaults to 50. 1415 magnetron_freq_bins : int, optional 1416 The number of bins to use for the histogram. Defaults to 300. 1417 1418 Returns 1419 ------- 1420 None 1421 1422 Notes 1423 ----- 1424 Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. 1425 A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. 1426 A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. 1427 This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks. 1428 """ 1429 ms_df = DataFrame(self.freq_exp(), columns=["Freq"]) 1430 ms_df["FreqDelta"] = ms_df["Freq"].diff() 1431 1432 freq_hist = histogram( 1433 ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"], 1434 bins=magnetron_freq_bins, 1435 ) 1436 1437 mod = GaussianModel() 1438 pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1]) 1439 out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1]) 1440 self.magnetron_frequency = out.best_values["center"] 1441 self.magnetron_frequency_sigma = out.best_values["sigma"] 1442 1443 1444class MassSpecCentroid(MassSpecBase): 1445 """A mass spectrum class when the entry point is on centroid format 1446 1447 Notes 1448 ----- 1449 - Stores the centroid data and instrument settings 1450 - Simulate profile data based on Gaussian or Lorentzian peak shape 1451 - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes 1452 - _mspeaks is populated under the hood by calling process_mass_spec method 1453 - iteration is null if _mspeaks is empty 1454 1455 Parameters 1456 ---------- 1457 data_dict : dict {string: numpy array float64 ) 1458 contains keys [m/z, Abundance, Resolving Power, S/N] 1459 d_params : dict{'str': float, int or str} 1460 contains the instrument settings and processing settings 1461 auto_process : bool, optional 1462 Whether to automatically process the mass spectrum. Defaults to True. 1463 1464 Attributes 1465 ---------- 1466 label : str 1467 store label (Bruker, Midas Transient, see Labels class) 1468 _baseline_noise : float 1469 store baseline noise 1470 _baseline_noise_std : float 1471 store baseline noise std 1472 _abundance : ndarray 1473 The abundance values of the mass spectrum. 1474 _mz_exp : ndarray 1475 The m/z values of the mass spectrum. 1476 _mspeaks : list 1477 A list of mass peaks. 1478 1479 1480 Methods 1481 ---------- 1482 * process_mass_spec(). 1483 Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data. 1484 * __simulate_profile__data__(). 1485 Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only. 1486 1487 see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile() 1488 """ 1489 1490 def __init__(self, data_dict, d_params, auto_process=True): 1491 super().__init__([], [], d_params) 1492 1493 self._set_parameters_objects(d_params) 1494 1495 if self.label == Labels.thermo_centroid: 1496 self._baseline_noise = d_params.get("baseline_noise") 1497 self._baseline_noise_std = d_params.get("baseline_noise_std") 1498 1499 self.is_centroid = True 1500 self.data_dict = data_dict 1501 self._mz_exp = data_dict[Labels.mz] 1502 self._abundance = data_dict[Labels.abundance] 1503 1504 if auto_process: 1505 self.process_mass_spec() 1506 1507 def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid): 1508 """Simulate profile data based on Gaussian or Lorentzian peak shape 1509 1510 Notes 1511 ----- 1512 Needs theoretical resolving power calculation and define peak shape. 1513 This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only. 1514 1515 Parameters 1516 ---------- 1517 exp_mz_centroid : list(float) 1518 list of m/z values 1519 magnitude_centroid : list(float) 1520 list of abundance values 1521 1522 1523 Returns 1524 ------- 1525 x : list(float) 1526 list of m/z values 1527 y : list(float) 1528 list of abundance values 1529 """ 1530 1531 x, y = [], [] 1532 for i in range(len(exp_mz_centroid)): 1533 x.append(exp_mz_centroid[i] - 0.0000001) 1534 x.append(exp_mz_centroid[i]) 1535 x.append(exp_mz_centroid[i] + 0.0000001) 1536 y.append(0) 1537 y.append(magnitude_centroid[i]) 1538 y.append(0) 1539 return x, y 1540 1541 @property 1542 def mz_exp_profile(self): 1543 """Return the m/z profile of the mass spectrum.""" 1544 mz_list = [] 1545 for mz in self.mz_exp: 1546 mz_list.append(mz - 0.0000001) 1547 mz_list.append(mz) 1548 mz_list.append(mz + 0.0000001) 1549 return mz_list 1550 1551 @mz_exp_profile.setter 1552 def mz_exp_profile(self, _mz_exp): 1553 self._mz_exp = _mz_exp 1554 1555 @property 1556 def abundance_profile(self): 1557 """Return the abundance profile of the mass spectrum.""" 1558 ab_list = [] 1559 for ab in self.abundance: 1560 ab_list.append(0) 1561 ab_list.append(ab) 1562 ab_list.append(0) 1563 return ab_list 1564 1565 @abundance_profile.setter 1566 def abundance_profile(self, abundance): 1567 self._abundance = abundance 1568 1569 @property 1570 def tic(self): 1571 """Return the total ion current of the mass spectrum.""" 1572 return sum(self.abundance) 1573 1574 def process_mass_spec(self): 1575 """Process the mass spectrum.""" 1576 import tqdm 1577 1578 # overwrite process_mass_spec 1579 # mspeak objs are usually added inside the PeaKPicking class 1580 # for profile and freq based data 1581 data_dict = self.data_dict 1582 ion_charge = self.polarity 1583 1584 # Check if resolving power is present 1585 rp_present = True 1586 if not data_dict.get(Labels.rp): 1587 rp_present = False 1588 if rp_present and list(data_dict.get(Labels.rp)) == [None] * len( 1589 data_dict.get(Labels.rp) 1590 ): 1591 rp_present = False 1592 1593 # Check if s2n is present 1594 s2n_present = True 1595 if not data_dict.get(Labels.s2n): 1596 s2n_present = False 1597 if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len( 1598 data_dict.get(Labels.s2n) 1599 ): 1600 s2n_present = False 1601 1602 # Warning if no s2n data but noise thresholding is set to signal_noise 1603 if ( 1604 not s2n_present 1605 and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1606 ): 1607 raise Exception("Signal to Noise data is missing for noise thresholding") 1608 1609 # Pull out abundance data 1610 abun = array(data_dict.get(Labels.abundance)).astype(float) 1611 1612 # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding 1613 abundance_threshold, factor = self.get_threshold(abun) 1614 1615 # Set rp_i and s2n_i to None which will be overwritten if present 1616 rp_i, s2n_i = np.nan, np.nan 1617 for index, mz in enumerate(data_dict.get(Labels.mz)): 1618 if rp_present: 1619 if not data_dict.get(Labels.rp)[index]: 1620 rp_i = np.nan 1621 else: 1622 rp_i = float(data_dict.get(Labels.rp)[index]) 1623 if s2n_present: 1624 if not data_dict.get(Labels.s2n)[index]: 1625 s2n_i = np.nan 1626 else: 1627 s2n_i = float(data_dict.get(Labels.s2n)[index]) 1628 1629 # centroid peak does not have start and end peak index pos 1630 massspec_indexes = (index, index, index) 1631 1632 # Add peaks based on the noise thresholding method 1633 if ( 1634 self.parameters.mass_spectrum.noise_threshold_method 1635 in ["minima", "relative_abundance", "absolute_abundance"] 1636 and abun[index] / factor >= abundance_threshold 1637 ): 1638 self.add_mspeak( 1639 ion_charge, 1640 mz, 1641 abun[index], 1642 rp_i, 1643 s2n_i, 1644 massspec_indexes, 1645 ms_parent=self, 1646 ) 1647 if ( 1648 self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1649 and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n 1650 ): 1651 self.add_mspeak( 1652 ion_charge, 1653 mz, 1654 abun[index], 1655 rp_i, 1656 s2n_i, 1657 massspec_indexes, 1658 ms_parent=self, 1659 ) 1660 1661 self.mspeaks = self._mspeaks 1662 self._dynamic_range = self.max_abundance / self.min_abundance 1663 self._set_nominal_masses_start_final_indexes() 1664 1665 if self.label != Labels.thermo_centroid: 1666 if self.settings.noise_threshold_method == "log": 1667 raise Exception("log noise Not tested for centroid data") 1668 # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc() 1669 1670 else: 1671 self._baseline_noise, self._baseline_noise_std = ( 1672 self.run_noise_threshold_calc() 1673 ) 1674 1675 del self.data_dict 1676 1677 1678class MassSpecCentroidLowRes(MassSpecCentroid): 1679 """A mass spectrum class when the entry point is on low resolution centroid format 1680 1681 Notes 1682 ----- 1683 Does not store MSPeak Objs, will iterate over mz, abundance pairs instead 1684 1685 Parameters 1686 ---------- 1687 data_dict : dict {string: numpy array float64 ) 1688 contains keys [m/z, Abundance, Resolving Power, S/N] 1689 d_params : dict{'str': float, int or str} 1690 contains the instrument settings and processing settings 1691 1692 Attributes 1693 ---------- 1694 _processed_tic : float 1695 store processed total ion current 1696 _abundance : ndarray 1697 The abundance values of the mass spectrum. 1698 _mz_exp : ndarray 1699 The m/z values of the mass spectrum. 1700 """ 1701 1702 def __init__(self, data_dict, d_params): 1703 self._set_parameters_objects(d_params) 1704 self._mz_exp = array(data_dict.get(Labels.mz)) 1705 self._abundance = array(data_dict.get(Labels.abundance)) 1706 self._processed_tic = None 1707 1708 def __len__(self): 1709 return len(self.mz_exp) 1710 1711 def __getitem__(self, position): 1712 return (self.mz_exp[position], self.abundance[position]) 1713 1714 @property 1715 def mz_exp(self): 1716 """Return the m/z values of the mass spectrum.""" 1717 return self._mz_exp 1718 1719 @property 1720 def abundance(self): 1721 """Return the abundance values of the mass spectrum.""" 1722 return self._abundance 1723 1724 @property 1725 def processed_tic(self): 1726 """Return the processed total ion current of the mass spectrum.""" 1727 return sum(self._processed_tic) 1728 1729 @property 1730 def tic(self): 1731 """Return the total ion current of the mass spectrum.""" 1732 if self._processed_tic: 1733 return self._processed_tic 1734 else: 1735 return sum(self.abundance) 1736 1737 @property 1738 def mz_abun_tuples(self): 1739 """Return the m/z and abundance values of the mass spectrum as a list of tuples.""" 1740 r = lambda x: (int(round(x[0], 0), int(round(x[1], 0)))) 1741 1742 return [r(i) for i in self] 1743 1744 @property 1745 def mz_abun_dict(self): 1746 """Return the m/z and abundance values of the mass spectrum as a dictionary.""" 1747 r = lambda x: int(round(x, 0)) 1748 1749 return {r(i[0]): r(i[1]) for i in self}
26def overrides(interface_class): 27 """Checks if the method overrides a method from an interface class.""" 28 29 def overrider(method): 30 assert method.__name__ in dir(interface_class) 31 return method 32 33 return overrider
Checks if the method overrides a method from an interface class.
36class MassSpecBase(MassSpecCalc, KendrickGrouping): 37 """A mass spectrum base class, stores the profile data and instrument settings. 38 39 Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. 40 _mspeaks is populated under the hood by calling process_mass_spec method. 41 Iteration is null if _mspeaks is empty. 42 43 Parameters 44 ---------- 45 mz_exp : array_like 46 The m/z values of the mass spectrum. 47 abundance : array_like 48 The abundance values of the mass spectrum. 49 d_params : dict 50 A dictionary of parameters for the mass spectrum. 51 **kwargs 52 Additional keyword arguments. 53 54 Attributes 55 ---------- 56 57 mspeaks : list 58 A list of mass peaks. 59 is_calibrated : bool 60 Whether the mass spectrum is calibrated. 61 is_centroid : bool 62 Whether the mass spectrum is centroided. 63 has_frequency : bool 64 Whether the mass spectrum has a frequency domain. 65 calibration_order : None or int 66 The order of the mass spectrum's calibration. 67 calibration_points : None or ndarray 68 The calibration points of the mass spectrum. 69 calibration_ref_mzs: None or ndarray 70 The reference m/z values of the mass spectrum's calibration. 71 calibration_meas_mzs : None or ndarray 72 The measured m/z values of the mass spectrum's calibration. 73 calibration_RMS : None or float 74 The root mean square of the mass spectrum's calibration. 75 calibration_segment : None or CalibrationSegment 76 The calibration segment of the mass spectrum. 77 _abundance : ndarray 78 The abundance values of the mass spectrum. 79 _mz_exp : ndarray 80 The m/z values of the mass spectrum. 81 _mspeaks : list 82 A list of mass peaks. 83 _dict_nominal_masses_indexes : dict 84 A dictionary of nominal masses and their indexes. 85 _baseline_noise : float 86 The baseline noise of the mass spectrum. 87 _baseline_noise_std : float 88 The standard deviation of the baseline noise of the mass spectrum. 89 _dynamic_range : float or None 90 The dynamic range of the mass spectrum. 91 _transient_settings : None or TransientSettings 92 The transient settings of the mass spectrum. 93 _frequency_domain : None or FrequencyDomain 94 The frequency domain of the mass spectrum. 95 _mz_cal_profile : None or MzCalibrationProfile 96 The m/z calibration profile of the mass spectrum. 97 98 Methods 99 ------- 100 * process_mass_spec(). Main function to process the mass spectrum, 101 including calculating the noise threshold, peak picking, and resetting the MSpeak indexes. 102 103 See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile() 104 """ 105 106 def __init__(self, mz_exp, abundance, d_params, **kwargs): 107 self._abundance = array(abundance, dtype=float64) 108 self._mz_exp = array(mz_exp, dtype=float64) 109 110 # objects created after process_mass_spec() function 111 self._mspeaks = list() 112 self.mspeaks = list() 113 self._dict_nominal_masses_indexes = dict() 114 self._baseline_noise = 0.001 115 self._baseline_noise_std = 0.001 116 self._dynamic_range = None 117 # set to None: initialization occurs inside subclass MassSpecfromFreq 118 self._transient_settings = None 119 self._frequency_domain = None 120 self._mz_cal_profile = None 121 self.is_calibrated = False 122 123 self._set_parameters_objects(d_params) 124 self._init_settings() 125 126 self.is_centroid = False 127 self.has_frequency = False 128 129 self.calibration_order = None 130 self.calibration_points = None 131 self.calibration_ref_mzs = None 132 self.calibration_meas_mzs = None 133 self.calibration_RMS = None 134 self.calibration_segment = None 135 self.calibration_raw_error_median = None 136 self.calibration_raw_error_stdev = None 137 138 def _init_settings(self): 139 """Initializes the settings for the mass spectrum.""" 140 self._parameters = MSParameters() 141 142 def __len__(self): 143 return len(self.mspeaks) 144 145 def __getitem__(self, position) -> MSPeak: 146 return self.mspeaks[position] 147 148 def set_indexes(self, list_indexes): 149 """Set the mass spectrum to iterate over only the selected MSpeaks indexes. 150 151 Parameters 152 ---------- 153 list_indexes : list of int 154 A list of integers representing the indexes of the MSpeaks to iterate over. 155 156 """ 157 self.mspeaks = [self._mspeaks[i] for i in list_indexes] 158 159 for i, mspeak in enumerate(self.mspeaks): 160 mspeak.index = i 161 162 self._set_nominal_masses_start_final_indexes() 163 164 def reset_indexes(self): 165 """Reset the mass spectrum to iterate over all MSpeaks objects. 166 167 This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. 168 It also sets the index of each MSpeak object to its corresponding position in the mass spectrum. 169 170 """ 171 self.mspeaks = self._mspeaks 172 173 for i, mspeak in enumerate(self.mspeaks): 174 mspeak.index = i 175 176 self._set_nominal_masses_start_final_indexes() 177 178 def add_mspeak( 179 self, 180 ion_charge, 181 mz_exp, 182 abundance, 183 resolving_power, 184 signal_to_noise, 185 massspec_indexes, 186 exp_freq=None, 187 ms_parent=None, 188 ): 189 """Add a new MSPeak object to the MassSpectrum object. 190 191 Parameters 192 ---------- 193 ion_charge : int 194 The ion charge of the MSPeak. 195 mz_exp : float 196 The experimental m/z value of the MSPeak. 197 abundance : float 198 The abundance of the MSPeak. 199 resolving_power : float 200 The resolving power of the MSPeak. 201 signal_to_noise : float 202 The signal-to-noise ratio of the MSPeak. 203 massspec_indexes : list 204 A list of indexes of the MSPeak in the MassSpectrum object. 205 exp_freq : float, optional 206 The experimental frequency of the MSPeak. Defaults to None. 207 ms_parent : MSParent, optional 208 The MSParent object associated with the MSPeak. Defaults to None. 209 """ 210 mspeak = MSPeak( 211 ion_charge, 212 mz_exp, 213 abundance, 214 resolving_power, 215 signal_to_noise, 216 massspec_indexes, 217 len(self._mspeaks), 218 exp_freq=exp_freq, 219 ms_parent=ms_parent, 220 ) 221 222 self._mspeaks.append(mspeak) 223 224 def _set_parameters_objects(self, d_params): 225 """Set the parameters of the MassSpectrum object. 226 227 Parameters 228 ---------- 229 d_params : dict 230 A dictionary containing the parameters to set. 231 232 Notes 233 ----- 234 This method sets the following parameters of the MassSpectrum object: 235 - _calibration_terms 236 - label 237 - analyzer 238 - acquisition_time 239 - instrument_label 240 - polarity 241 - scan_number 242 - retention_time 243 - mobility_rt 244 - mobility_scan 245 - _filename 246 - _dir_location 247 - _baseline_noise 248 - _baseline_noise_std 249 - sample_name 250 """ 251 self._calibration_terms = ( 252 d_params.get("Aterm"), 253 d_params.get("Bterm"), 254 d_params.get("Cterm"), 255 ) 256 257 self.label = d_params.get(Labels.label) 258 259 self.analyzer = d_params.get("analyzer") 260 261 self.acquisition_time = d_params.get("acquisition_time") 262 263 self.instrument_label = d_params.get("instrument_label") 264 265 self.polarity = int(d_params.get("polarity")) 266 267 self.scan_number = d_params.get("scan_number") 268 269 self.retention_time = d_params.get("rt") 270 271 self.mobility_rt = d_params.get("mobility_rt") 272 273 self.mobility_scan = d_params.get("mobility_scan") 274 275 self._filename = d_params.get("filename_path") 276 277 self._dir_location = d_params.get("dir_location") 278 279 self._baseline_noise = d_params.get("baseline_noise") 280 281 self._baseline_noise_std = d_params.get("baseline_noise_std") 282 283 if d_params.get("sample_name") != "Unknown": 284 self.sample_name = d_params.get("sample_name") 285 if not self.sample_name: 286 self.sample_name = self.filename.stem 287 else: 288 self.sample_name = self.filename.stem 289 290 def reset_cal_therms(self, Aterm, Bterm, C, fas=0): 291 """Reset calibration terms and recalculate the mass-to-charge ratio and abundance. 292 293 Parameters 294 ---------- 295 Aterm : float 296 The A-term calibration coefficient. 297 Bterm : float 298 The B-term calibration coefficient. 299 C : float 300 The C-term calibration coefficient. 301 fas : float, optional 302 The frequency amplitude scaling factor. Default is 0. 303 """ 304 self._calibration_terms = (Aterm, Bterm, C) 305 306 self._mz_exp = self._f_to_mz() 307 self._abundance = self._abundance 308 self.find_peaks() 309 self.reset_indexes() 310 311 def clear_molecular_formulas(self): 312 """Clear the molecular formulas for all mspeaks in the MassSpectrum. 313 314 Returns 315 ------- 316 numpy.ndarray 317 An array of the cleared molecular formulas for each mspeak in the MassSpectrum. 318 """ 319 self.check_mspeaks() 320 return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks]) 321 322 def process_mass_spec(self, keep_profile=True): 323 """Process the mass spectrum. 324 325 Parameters 326 ---------- 327 keep_profile : bool, optional 328 Whether to keep the profile data after processing. Defaults to True. 329 330 Notes 331 ----- 332 This method does the following: 333 - calculates the noise threshold 334 - does peak picking (creates mspeak_objs) 335 - resets the mspeak_obj indexes 336 """ 337 338 # if runned mannually make sure to rerun filter_by_noise_threshold 339 # calculates noise threshold 340 # do peak picking( create mspeak_objs) 341 # reset mspeak_obj the indexes 342 343 self.cal_noise_threshold() 344 345 self.find_peaks() 346 self.reset_indexes() 347 348 if self.mspeaks: 349 self._dynamic_range = self.max_abundance / self.min_abundance 350 else: 351 self._dynamic_range = 0 352 if not keep_profile: 353 self._abundance *= 0 354 self._mz_exp *= 0 355 356 def cal_noise_threshold(self): 357 """Calculate the noise threshold of the mass spectrum.""" 358 359 if self.label == Labels.simulated_profile: 360 self._baseline_noise, self._baseline_noise_std = 0.1, 1 361 362 if self.settings.noise_threshold_method == "log": 363 self._baseline_noise, self._baseline_noise_std = ( 364 self.run_log_noise_threshold_calc() 365 ) 366 367 else: 368 self._baseline_noise, self._baseline_noise_std = ( 369 self.run_noise_threshold_calc() 370 ) 371 372 @property 373 def parameters(self): 374 """Return the parameters of the mass spectrum.""" 375 return self._parameters 376 377 @parameters.setter 378 def parameters(self, instance_MSParameters): 379 self._parameters = instance_MSParameters 380 381 def set_parameter_from_json(self, parameters_path): 382 """Set the parameters of the mass spectrum from a JSON file. 383 384 Parameters 385 ---------- 386 parameters_path : str 387 The path to the JSON file containing the parameters. 388 """ 389 load_and_set_parameters_ms(self, parameters_path=parameters_path) 390 391 def set_parameter_from_toml(self, parameters_path): 392 load_and_set_toml_parameters_ms(self, parameters_path=parameters_path) 393 394 @property 395 def mspeaks_settings(self): 396 """Return the MS peak settings of the mass spectrum.""" 397 return self.parameters.ms_peak 398 399 @mspeaks_settings.setter 400 def mspeaks_settings(self, instance_MassSpecPeakSetting): 401 self.parameters.ms_peak = instance_MassSpecPeakSetting 402 403 @property 404 def settings(self): 405 """Return the settings of the mass spectrum.""" 406 return self.parameters.mass_spectrum 407 408 @settings.setter 409 def settings(self, instance_MassSpectrumSetting): 410 self.parameters.mass_spectrum = instance_MassSpectrumSetting 411 412 @property 413 def molecular_search_settings(self): 414 """Return the molecular search settings of the mass spectrum.""" 415 return self.parameters.molecular_search 416 417 @molecular_search_settings.setter 418 def molecular_search_settings(self, instance_MolecularFormulaSearchSettings): 419 self.parameters.molecular_search = instance_MolecularFormulaSearchSettings 420 421 @property 422 def mz_cal_profile(self): 423 """Return the calibrated m/z profile of the mass spectrum.""" 424 return self._mz_cal_profile 425 426 @mz_cal_profile.setter 427 def mz_cal_profile(self, mz_cal_list): 428 if len(mz_cal_list) == len(self._mz_exp): 429 self._mz_cal_profile = mz_cal_list 430 else: 431 raise Exception( 432 "calibrated array (%i) is not of the same size of the data (%i)" 433 % (len(mz_cal_list), len(self.mz_exp_profile)) 434 ) 435 436 @property 437 def mz_cal(self): 438 """Return the calibrated m/z values of the mass spectrum.""" 439 return array([mspeak.mz_cal for mspeak in self.mspeaks]) 440 441 @mz_cal.setter 442 def mz_cal(self, mz_cal_list): 443 if len(mz_cal_list) == len(self.mspeaks): 444 self.is_calibrated = True 445 for index, mz_cal in enumerate(mz_cal_list): 446 self.mspeaks[index].mz_cal = mz_cal 447 else: 448 raise Exception( 449 "calibrated array (%i) is not of the same size of the data (%i)" 450 % (len(mz_cal_list), len(self._mspeaks)) 451 ) 452 453 @property 454 def mz_exp(self): 455 """Return the experimental m/z values of the mass spectrum.""" 456 self.check_mspeaks() 457 458 if self.is_calibrated: 459 return array([mspeak.mz_cal for mspeak in self.mspeaks]) 460 461 else: 462 return array([mspeak.mz_exp for mspeak in self.mspeaks]) 463 464 @property 465 def freq_exp_profile(self): 466 """Return the experimental frequency profile of the mass spectrum.""" 467 return self._frequency_domain 468 469 @freq_exp_profile.setter 470 def freq_exp_profile(self, new_data): 471 self._frequency_domain = array(new_data) 472 473 @property 474 def freq_exp_pp(self): 475 """Return the experimental frequency values of the mass spectrum that are used for peak picking.""" 476 _, _, freq = self.prepare_peak_picking_data() 477 return freq 478 479 @property 480 def mz_exp_profile(self): 481 """Return the experimental m/z profile of the mass spectrum.""" 482 if self.is_calibrated: 483 return self.mz_cal_profile 484 else: 485 return self._mz_exp 486 487 @mz_exp_profile.setter 488 def mz_exp_profile(self, new_data): 489 self._mz_exp = array(new_data) 490 491 @property 492 def mz_exp_pp(self): 493 """Return the experimental m/z values of the mass spectrum that are used for peak picking.""" 494 mz, _, _ = self.prepare_peak_picking_data() 495 return mz 496 497 @property 498 def abundance_profile(self): 499 """Return the abundance profile of the mass spectrum.""" 500 return self._abundance 501 502 @abundance_profile.setter 503 def abundance_profile(self, new_data): 504 self._abundance = array(new_data) 505 506 @property 507 def abundance_profile_pp(self): 508 """Return the abundance profile of the mass spectrum that is used for peak picking.""" 509 _, abundance, _ = self.prepare_peak_picking_data() 510 return abundance 511 512 @property 513 def abundance(self): 514 """Return the abundance values of the mass spectrum.""" 515 self.check_mspeaks() 516 return array([mspeak.abundance for mspeak in self.mspeaks]) 517 518 def freq_exp(self): 519 """Return the experimental frequency values of the mass spectrum.""" 520 self.check_mspeaks() 521 return array([mspeak.freq_exp for mspeak in self.mspeaks]) 522 523 @property 524 def resolving_power(self): 525 """Return the resolving power values of the mass spectrum.""" 526 self.check_mspeaks() 527 return array([mspeak.resolving_power for mspeak in self.mspeaks]) 528 529 @property 530 def signal_to_noise(self): 531 self.check_mspeaks() 532 return array([mspeak.signal_to_noise for mspeak in self.mspeaks]) 533 534 @property 535 def nominal_mz(self): 536 """Return the nominal m/z values of the mass spectrum.""" 537 if self._dict_nominal_masses_indexes: 538 return sorted(list(self._dict_nominal_masses_indexes.keys())) 539 else: 540 raise ValueError("Nominal indexes not yet set") 541 542 def get_mz_and_abundance_peaks_tuples(self): 543 """Return a list of tuples containing the m/z and abundance values of the mass spectrum.""" 544 self.check_mspeaks() 545 return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks] 546 547 @property 548 def kmd(self): 549 """Return the Kendrick mass defect values of the mass spectrum.""" 550 self.check_mspeaks() 551 return array([mspeak.kmd for mspeak in self.mspeaks]) 552 553 @property 554 def kendrick_mass(self): 555 """Return the Kendrick mass values of the mass spectrum.""" 556 self.check_mspeaks() 557 return array([mspeak.kendrick_mass for mspeak in self.mspeaks]) 558 559 @property 560 def max_mz_exp(self): 561 """Return the maximum experimental m/z value of the mass spectrum.""" 562 return max([mspeak.mz_exp for mspeak in self.mspeaks]) 563 564 @property 565 def min_mz_exp(self): 566 """Return the minimum experimental m/z value of the mass spectrum.""" 567 return min([mspeak.mz_exp for mspeak in self.mspeaks]) 568 569 @property 570 def max_abundance(self): 571 """Return the maximum abundance value of the mass spectrum.""" 572 return max([mspeak.abundance for mspeak in self.mspeaks]) 573 574 @property 575 def max_signal_to_noise(self): 576 """Return the maximum signal-to-noise ratio of the mass spectrum.""" 577 return max([mspeak.signal_to_noise for mspeak in self.mspeaks]) 578 579 @property 580 def most_abundant_mspeak(self): 581 """Return the most abundant MSpeak object of the mass spectrum.""" 582 return max(self.mspeaks, key=lambda m: m.abundance) 583 584 @property 585 def min_abundance(self): 586 """Return the minimum abundance value of the mass spectrum.""" 587 return min([mspeak.abundance for mspeak in self.mspeaks]) 588 589 # takes too much cpu time 590 @property 591 def dynamic_range(self): 592 """Return the dynamic range of the mass spectrum.""" 593 return self._dynamic_range 594 595 @property 596 def baseline_noise(self): 597 """Return the baseline noise of the mass spectrum.""" 598 if self._baseline_noise: 599 return self._baseline_noise 600 else: 601 return None 602 603 @property 604 def baseline_noise_std(self): 605 """Return the standard deviation of the baseline noise of the mass spectrum.""" 606 if self._baseline_noise_std == 0: 607 return self._baseline_noise_std 608 if self._baseline_noise_std: 609 return self._baseline_noise_std 610 else: 611 return None 612 613 @property 614 def Aterm(self): 615 """Return the A-term calibration coefficient of the mass spectrum.""" 616 return self._calibration_terms[0] 617 618 @property 619 def Bterm(self): 620 """Return the B-term calibration coefficient of the mass spectrum.""" 621 return self._calibration_terms[1] 622 623 @property 624 def Cterm(self): 625 """Return the C-term calibration coefficient of the mass spectrum.""" 626 return self._calibration_terms[2] 627 628 @property 629 def filename(self): 630 """Return the filename of the mass spectrum.""" 631 return Path(self._filename) 632 633 @property 634 def dir_location(self): 635 """Return the directory location of the mass spectrum.""" 636 return self._dir_location 637 638 def sort_by_mz(self): 639 """Sort the mass spectrum by m/z values.""" 640 return sorted(self, key=lambda m: m.mz_exp) 641 642 def sort_by_abundance(self, reverse=False): 643 """Sort the mass spectrum by abundance values.""" 644 return sorted(self, key=lambda m: m.abundance, reverse=reverse) 645 646 @property 647 def tic(self): 648 """Return the total ion current of the mass spectrum.""" 649 return trapz(self.abundance_profile, self.mz_exp_profile) 650 651 def check_mspeaks_warning(self): 652 """Check if the mass spectrum has MSpeaks objects. 653 654 Raises 655 ------ 656 Warning 657 If the mass spectrum has no MSpeaks objects. 658 """ 659 import warnings 660 661 if self.mspeaks: 662 pass 663 else: 664 warnings.warn("mspeaks list is empty, continuing without filtering data") 665 666 def check_mspeaks(self): 667 """Check if the mass spectrum has MSpeaks objects. 668 669 Raises 670 ------ 671 Exception 672 If the mass spectrum has no MSpeaks objects. 673 """ 674 if self.mspeaks: 675 pass 676 else: 677 raise Exception( 678 "mspeaks list is empty, please run process_mass_spec() first" 679 ) 680 681 def remove_assignment_by_index(self, indexes): 682 """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes. 683 684 Parameters 685 ---------- 686 indexes : list of int 687 A list of indexes of the MSpeaks objects to remove the molecular formula assignment from. 688 """ 689 for i in indexes: 690 self.mspeaks[i].clear_molecular_formulas() 691 692 def filter_by_index(self, list_indexes): 693 """Filter the mass spectrum by the specified indexes. 694 695 Parameters 696 ---------- 697 list_indexes : list of int 698 A list of indexes of the MSpeaks objects to drop. 699 700 """ 701 702 self.mspeaks = [ 703 self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes 704 ] 705 706 for i, mspeak in enumerate(self.mspeaks): 707 mspeak.index = i 708 709 self._set_nominal_masses_start_final_indexes() 710 711 def filter_by_mz(self, min_mz, max_mz): 712 """Filter the mass spectrum by the specified m/z range. 713 714 Parameters 715 ---------- 716 min_mz : float 717 The minimum m/z value to keep. 718 max_mz : float 719 The maximum m/z value to keep. 720 721 """ 722 self.check_mspeaks_warning() 723 indexes = [ 724 index 725 for index, mspeak in enumerate(self.mspeaks) 726 if not min_mz <= mspeak.mz_exp <= max_mz 727 ] 728 self.filter_by_index(indexes) 729 730 def filter_by_s2n(self, min_s2n, max_s2n=False): 731 """Filter the mass spectrum by the specified signal-to-noise ratio range. 732 733 Parameters 734 ---------- 735 min_s2n : float 736 The minimum signal-to-noise ratio to keep. 737 max_s2n : float, optional 738 The maximum signal-to-noise ratio to keep. Defaults to False (no maximum). 739 740 """ 741 self.check_mspeaks_warning() 742 if max_s2n: 743 indexes = [ 744 index 745 for index, mspeak in enumerate(self.mspeaks) 746 if not min_s2n <= mspeak.signal_to_noise <= max_s2n 747 ] 748 else: 749 indexes = [ 750 index 751 for index, mspeak in enumerate(self.mspeaks) 752 if mspeak.signal_to_noise <= min_s2n 753 ] 754 self.filter_by_index(indexes) 755 756 def filter_by_abundance(self, min_abund, max_abund=False): 757 """Filter the mass spectrum by the specified abundance range. 758 759 Parameters 760 ---------- 761 min_abund : float 762 The minimum abundance to keep. 763 max_abund : float, optional 764 The maximum abundance to keep. Defaults to False (no maximum). 765 766 """ 767 self.check_mspeaks_warning() 768 if max_abund: 769 indexes = [ 770 index 771 for index, mspeak in enumerate(self.mspeaks) 772 if not min_abund <= mspeak.abundance <= max_abund 773 ] 774 else: 775 indexes = [ 776 index 777 for index, mspeak in enumerate(self.mspeaks) 778 if mspeak.abundance <= min_abund 779 ] 780 self.filter_by_index(indexes) 781 782 def filter_by_max_resolving_power(self, B, T): 783 """Filter the mass spectrum by the specified maximum resolving power. 784 785 Parameters 786 ---------- 787 B : float 788 T : float 789 790 """ 791 792 rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z) 793 794 self.check_mspeaks_warning() 795 796 indexes_to_remove = [ 797 index 798 for index, mspeak in enumerate(self.mspeaks) 799 if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge) 800 ] 801 self.filter_by_index(indexes_to_remove) 802 803 def filter_by_mean_resolving_power( 804 self, ndeviations=3, plot=False, guess_pars=False 805 ): 806 """Filter the mass spectrum by the specified mean resolving power. 807 808 Parameters 809 ---------- 810 ndeviations : float, optional 811 The number of standard deviations to use for filtering. Defaults to 3. 812 plot : bool, optional 813 Whether to plot the resolving power distribution. Defaults to False. 814 guess_pars : bool, optional 815 Whether to guess the parameters for the Gaussian model. Defaults to False. 816 817 """ 818 self.check_mspeaks_warning() 819 indexes_to_remove = MeanResolvingPowerFilter( 820 self, ndeviations, plot, guess_pars 821 ).main() 822 self.filter_by_index(indexes_to_remove) 823 824 def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0): 825 """Filter the mass spectrum by the calculated minimum theoretical resolving power. 826 827 This is currently designed only for FTICR data, and accounts only for magnitude mode data 828 Accurate results require passing the apodisaion method used to calculate the resolving power. 829 see the ICRMassPeak function `resolving_power_calc` for more details. 830 831 Parameters 832 ---------- 833 B : Magnetic field strength in Tesla, float 834 T : transient length in seconds, float 835 apodization_method : str, optional 836 The apodization method to use for calculating the resolving power. Defaults to None. 837 tolerance : float, optional 838 The tolerance for the threshold. Defaults to 0, i.e. no tolerance 839 840 """ 841 if self.analyzer != "ICR": 842 raise Exception( 843 "This method is only applicable to ICR mass spectra. " 844 ) 845 846 self.check_mspeaks_warning() 847 848 indexes_to_remove = [ 849 index 850 for index, mspeak in enumerate(self.mspeaks) 851 if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method) 852 ] 853 self.filter_by_index(indexes_to_remove) 854 855 def filter_by_noise_threshold(self): 856 """Filter the mass spectrum by the noise threshold.""" 857 858 threshold = self.get_noise_threshold()[1][0] 859 860 self.check_mspeaks_warning() 861 862 indexes_to_remove = [ 863 index 864 for index, mspeak in enumerate(self.mspeaks) 865 if mspeak.abundance <= threshold 866 ] 867 self.filter_by_index(indexes_to_remove) 868 869 def find_peaks(self): 870 """Find the peaks of the mass spectrum.""" 871 # needs to clear previous results from peak_picking 872 self._mspeaks = list() 873 874 # then do peak picking 875 self.do_peak_picking() 876 # print("A total of %i peaks were found" % len(self._mspeaks)) 877 878 def change_kendrick_base_all_mspeaks(self, kendrick_dict_base): 879 """Change the Kendrick base of all MSpeaks objects. 880 881 Parameters 882 ---------- 883 kendrick_dict_base : dict 884 A dictionary of the Kendrick base to change to. 885 886 Notes 887 ----- 888 Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc 889 """ 890 self.parameters.ms_peak.kendrick_base = kendrick_dict_base 891 892 for mspeak in self.mspeaks: 893 mspeak.change_kendrick_base(kendrick_dict_base) 894 895 def get_nominal_mz_first_last_indexes(self, nominal_mass): 896 """Return the first and last indexes of the MSpeaks objects with the specified nominal mass. 897 898 Parameters 899 ---------- 900 nominal_mass : int 901 The nominal mass to get the indexes for. 902 903 Returns 904 ------- 905 tuple 906 A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass. 907 """ 908 if self._dict_nominal_masses_indexes: 909 if nominal_mass in self._dict_nominal_masses_indexes.keys(): 910 return ( 911 self._dict_nominal_masses_indexes.get(nominal_mass)[0], 912 self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1, 913 ) 914 915 else: 916 # import warnings 917 # uncomment warn to distribution 918 # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass) 919 return (0, 0) 920 else: 921 raise Exception( 922 "run process_mass_spec() function before trying to access the data" 923 ) 924 925 def get_masses_count_by_nominal_mass(self): 926 """Return a dictionary of the nominal masses and their counts.""" 927 928 dict_nominal_masses_count = {} 929 930 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 931 932 for nominal_mass in all_nominal_masses: 933 if nominal_mass not in dict_nominal_masses_count: 934 dict_nominal_masses_count[nominal_mass] = len( 935 list(self.get_nominal_mass_indexes(nominal_mass)) 936 ) 937 938 return dict_nominal_masses_count 939 940 def datapoints_count_by_nominal_mz(self, mz_overlay=0.1): 941 """Return a dictionary of the nominal masses and their counts. 942 943 Parameters 944 ---------- 945 mz_overlay : float, optional 946 The m/z overlay to use for counting. Defaults to 0.1. 947 948 Returns 949 ------- 950 dict 951 A dictionary of the nominal masses and their counts. 952 """ 953 dict_nominal_masses_count = {} 954 955 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 956 957 for nominal_mass in all_nominal_masses: 958 if nominal_mass not in dict_nominal_masses_count: 959 min_mz = nominal_mass - mz_overlay 960 961 max_mz = nominal_mass + 1 + mz_overlay 962 963 indexes = indexes = where( 964 (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz) 965 ) 966 967 dict_nominal_masses_count[nominal_mass] = indexes[0].size 968 969 return dict_nominal_masses_count 970 971 def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1): 972 """Return the indexes of the MSpeaks objects with the specified nominal mass. 973 974 Parameters 975 ---------- 976 nominal_mass : int 977 The nominal mass to get the indexes for. 978 overlay : float, optional 979 The m/z overlay to use for counting. Defaults to 0.1. 980 981 Returns 982 ------- 983 generator 984 A generator of the indexes of the MSpeaks objects with the specified nominal mass. 985 """ 986 min_mz_to_look = nominal_mass - overlay 987 max_mz_to_look = nominal_mass + 1 + overlay 988 989 return ( 990 i 991 for i in range(len(self.mspeaks)) 992 if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look 993 ) 994 995 # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look) 996 # return indexes 997 998 def _set_nominal_masses_start_final_indexes(self): 999 """Set the start and final indexes of the MSpeaks objects for all nominal masses.""" 1000 dict_nominal_masses_indexes = {} 1001 1002 all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks) 1003 1004 for nominal_mass in all_nominal_masses: 1005 # indexes = self.get_nominal_mass_indexes(nominal_mass) 1006 # Convert the iterator to a list to avoid multiple calls 1007 indexes = list(self.get_nominal_mass_indexes(nominal_mass)) 1008 1009 # If the list is not empty, find the first and last; otherwise, set None 1010 if indexes: 1011 first, last = indexes[0], indexes[-1] 1012 else: 1013 first = last = None 1014 # defaultvalue = None 1015 # first = last = next(indexes, defaultvalue) 1016 # for last in indexes: 1017 # pass 1018 1019 dict_nominal_masses_indexes[nominal_mass] = (first, last) 1020 1021 self._dict_nominal_masses_indexes = dict_nominal_masses_indexes 1022 1023 def plot_centroid(self, ax=None, c="g"): 1024 """Plot the centroid data of the mass spectrum. 1025 1026 Parameters 1027 ---------- 1028 ax : matplotlib.axes.Axes, optional 1029 The matplotlib axes to plot on. Defaults to None. 1030 c : str, optional 1031 The color to use for the plot. Defaults to 'g' (green). 1032 1033 Returns 1034 ------- 1035 matplotlib.axes.Axes 1036 The matplotlib axes containing the plot. 1037 1038 Raises 1039 ------ 1040 Exception 1041 If no centroid data is found. 1042 """ 1043 1044 import matplotlib.pyplot as plt 1045 1046 if self._mspeaks: 1047 if ax is None: 1048 ax = plt.gca() 1049 1050 markerline_a, stemlines_a, baseline_a = ax.stem( 1051 self.mz_exp, self.abundance, linefmt="-", markerfmt=" " 1052 ) 1053 1054 plt.setp(markerline_a, "color", c, "linewidth", 2) 1055 plt.setp(stemlines_a, "color", c, "linewidth", 2) 1056 plt.setp(baseline_a, "color", c, "linewidth", 2) 1057 1058 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1059 ax.set_ylabel("Abundance", fontsize=12) 1060 ax.tick_params(axis="both", which="major", labelsize=12) 1061 1062 ax.axes.spines["top"].set_visible(False) 1063 ax.axes.spines["right"].set_visible(False) 1064 1065 ax.get_yaxis().set_visible(False) 1066 ax.spines["left"].set_visible(False) 1067 1068 else: 1069 raise Exception("No centroid data found, please run process_mass_spec") 1070 1071 return ax 1072 1073 def plot_profile_and_noise_threshold(self, ax=None, legend=False): 1074 """Plot the profile data and noise threshold of the mass spectrum. 1075 1076 Parameters 1077 ---------- 1078 ax : matplotlib.axes.Axes, optional 1079 The matplotlib axes to plot on. Defaults to None. 1080 legend : bool, optional 1081 Whether to show the legend. Defaults to False. 1082 1083 Returns 1084 ------- 1085 matplotlib.axes.Axes 1086 The matplotlib axes containing the plot. 1087 1088 Raises 1089 ------ 1090 Exception 1091 If no noise threshold is found. 1092 """ 1093 import matplotlib.pyplot as plt 1094 1095 if self.baseline_noise_std and self.baseline_noise_std: 1096 # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max()) 1097 baseline = (self.baseline_noise, self.baseline_noise) 1098 1099 # std = self.parameters.mass_spectrum.noise_threshold_min_std 1100 # threshold = self.baseline_noise_std + (std * self.baseline_noise_std) 1101 x, y = self.get_noise_threshold() 1102 1103 if ax is None: 1104 ax = plt.gca() 1105 1106 ax.plot( 1107 self.mz_exp_profile, 1108 self.abundance_profile, 1109 color="green", 1110 label="Spectrum", 1111 ) 1112 ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise") 1113 ax.plot(x, y, color="red", label="Noise Threshold") 1114 1115 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1116 ax.set_ylabel("Abundance", fontsize=12) 1117 ax.tick_params(axis="both", which="major", labelsize=12) 1118 1119 ax.axes.spines["top"].set_visible(False) 1120 ax.axes.spines["right"].set_visible(False) 1121 1122 ax.get_yaxis().set_visible(False) 1123 ax.spines["left"].set_visible(False) 1124 if legend: 1125 ax.legend() 1126 1127 else: 1128 raise Exception("Calculate noise threshold first") 1129 1130 return ax 1131 1132 def plot_mz_domain_profile(self, color="green", ax=None): 1133 """Plot the m/z domain profile of the mass spectrum. 1134 1135 Parameters 1136 ---------- 1137 color : str, optional 1138 The color to use for the plot. Defaults to 'green'. 1139 ax : matplotlib.axes.Axes, optional 1140 The matplotlib axes to plot on. Defaults to None. 1141 1142 Returns 1143 ------- 1144 matplotlib.axes.Axes 1145 The matplotlib axes containing the plot. 1146 """ 1147 1148 import matplotlib.pyplot as plt 1149 1150 if ax is None: 1151 ax = plt.gca() 1152 ax.plot(self.mz_exp_profile, self.abundance_profile, color=color) 1153 ax.set(xlabel="m/z", ylabel="abundance") 1154 1155 return ax 1156 1157 def to_excel(self, out_file_path, write_metadata=True): 1158 """Export the mass spectrum to an Excel file. 1159 1160 Parameters 1161 ---------- 1162 out_file_path : str 1163 The path to the Excel file to export to. 1164 write_metadata : bool, optional 1165 Whether to write the metadata to the Excel file. Defaults to True. 1166 1167 Returns 1168 ------- 1169 None 1170 """ 1171 from corems.mass_spectrum.output.export import HighResMassSpecExport 1172 1173 exportMS = HighResMassSpecExport(out_file_path, self) 1174 exportMS.to_excel(write_metadata=write_metadata) 1175 1176 def to_hdf(self, out_file_path): 1177 """Export the mass spectrum to an HDF file. 1178 1179 Parameters 1180 ---------- 1181 out_file_path : str 1182 The path to the HDF file to export to. 1183 1184 Returns 1185 ------- 1186 None 1187 """ 1188 from corems.mass_spectrum.output.export import HighResMassSpecExport 1189 1190 exportMS = HighResMassSpecExport(out_file_path, self) 1191 exportMS.to_hdf() 1192 1193 def to_csv(self, out_file_path, write_metadata=True): 1194 """Export the mass spectrum to a CSV file. 1195 1196 Parameters 1197 ---------- 1198 out_file_path : str 1199 The path to the CSV file to export to. 1200 write_metadata : bool, optional 1201 Whether to write the metadata to the CSV file. Defaults to True. 1202 1203 """ 1204 from corems.mass_spectrum.output.export import HighResMassSpecExport 1205 1206 exportMS = HighResMassSpecExport(out_file_path, self) 1207 exportMS.to_csv(write_metadata=write_metadata) 1208 1209 def to_pandas(self, out_file_path, write_metadata=True): 1210 """Export the mass spectrum to a Pandas dataframe with pkl extension. 1211 1212 Parameters 1213 ---------- 1214 out_file_path : str 1215 The path to the CSV file to export to. 1216 write_metadata : bool, optional 1217 Whether to write the metadata to the CSV file. Defaults to True. 1218 1219 """ 1220 from corems.mass_spectrum.output.export import HighResMassSpecExport 1221 1222 exportMS = HighResMassSpecExport(out_file_path, self) 1223 exportMS.to_pandas(write_metadata=write_metadata) 1224 1225 def to_dataframe(self, additional_columns=None): 1226 """Return the mass spectrum as a Pandas dataframe. 1227 1228 Parameters 1229 ---------- 1230 additional_columns : list, optional 1231 A list of additional columns to include in the dataframe. Defaults to None. 1232 Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC" 1233 1234 Returns 1235 ------- 1236 pandas.DataFrame 1237 The mass spectrum as a Pandas dataframe. 1238 """ 1239 from corems.mass_spectrum.output.export import HighResMassSpecExport 1240 1241 exportMS = HighResMassSpecExport(self.filename, self) 1242 return exportMS.get_pandas_df(additional_columns=additional_columns) 1243 1244 def to_json(self): 1245 """Return the mass spectrum as a JSON file.""" 1246 from corems.mass_spectrum.output.export import HighResMassSpecExport 1247 1248 exportMS = HighResMassSpecExport(self.filename, self) 1249 return exportMS.to_json() 1250 1251 def parameters_json(self): 1252 """Return the parameters of the mass spectrum as a JSON string.""" 1253 from corems.mass_spectrum.output.export import HighResMassSpecExport 1254 1255 exportMS = HighResMassSpecExport(self.filename, self) 1256 return exportMS.parameters_to_json() 1257 1258 def parameters_toml(self): 1259 """Return the parameters of the mass spectrum as a TOML string.""" 1260 from corems.mass_spectrum.output.export import HighResMassSpecExport 1261 1262 exportMS = HighResMassSpecExport(self.filename, self) 1263 return exportMS.parameters_to_toml()
A mass spectrum base class, stores the profile data and instrument settings.
Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty.
Parameters
- mz_exp (array_like): The m/z values of the mass spectrum.
- abundance (array_like): The abundance values of the mass spectrum.
- d_params (dict): A dictionary of parameters for the mass spectrum.
- **kwargs: Additional keyword arguments.
Attributes
- mspeaks (list): A list of mass peaks.
- is_calibrated (bool): Whether the mass spectrum is calibrated.
- is_centroid (bool): Whether the mass spectrum is centroided.
- has_frequency (bool): Whether the mass spectrum has a frequency domain.
- calibration_order (None or int): The order of the mass spectrum's calibration.
- calibration_points (None or ndarray): The calibration points of the mass spectrum.
- calibration_ref_mzs (None or ndarray): The reference m/z values of the mass spectrum's calibration.
- calibration_meas_mzs (None or ndarray): The measured m/z values of the mass spectrum's calibration.
- calibration_RMS (None or float): The root mean square of the mass spectrum's calibration.
- calibration_segment (None or CalibrationSegment): The calibration segment of the mass spectrum.
- _abundance (ndarray): The abundance values of the mass spectrum.
- _mz_exp (ndarray): The m/z values of the mass spectrum.
- _mspeaks (list): A list of mass peaks.
- _dict_nominal_masses_indexes (dict): A dictionary of nominal masses and their indexes.
- _baseline_noise (float): The baseline noise of the mass spectrum.
- _baseline_noise_std (float): The standard deviation of the baseline noise of the mass spectrum.
- _dynamic_range (float or None): The dynamic range of the mass spectrum.
- _transient_settings (None or TransientSettings): The transient settings of the mass spectrum.
- _frequency_domain (None or FrequencyDomain): The frequency domain of the mass spectrum.
- _mz_cal_profile (None or MzCalibrationProfile): The m/z calibration profile of the mass spectrum.
Methods
- process_mass_spec(). Main function to process the mass spectrum, including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
106 def __init__(self, mz_exp, abundance, d_params, **kwargs): 107 self._abundance = array(abundance, dtype=float64) 108 self._mz_exp = array(mz_exp, dtype=float64) 109 110 # objects created after process_mass_spec() function 111 self._mspeaks = list() 112 self.mspeaks = list() 113 self._dict_nominal_masses_indexes = dict() 114 self._baseline_noise = 0.001 115 self._baseline_noise_std = 0.001 116 self._dynamic_range = None 117 # set to None: initialization occurs inside subclass MassSpecfromFreq 118 self._transient_settings = None 119 self._frequency_domain = None 120 self._mz_cal_profile = None 121 self.is_calibrated = False 122 123 self._set_parameters_objects(d_params) 124 self._init_settings() 125 126 self.is_centroid = False 127 self.has_frequency = False 128 129 self.calibration_order = None 130 self.calibration_points = None 131 self.calibration_ref_mzs = None 132 self.calibration_meas_mzs = None 133 self.calibration_RMS = None 134 self.calibration_segment = None 135 self.calibration_raw_error_median = None 136 self.calibration_raw_error_stdev = None
148 def set_indexes(self, list_indexes): 149 """Set the mass spectrum to iterate over only the selected MSpeaks indexes. 150 151 Parameters 152 ---------- 153 list_indexes : list of int 154 A list of integers representing the indexes of the MSpeaks to iterate over. 155 156 """ 157 self.mspeaks = [self._mspeaks[i] for i in list_indexes] 158 159 for i, mspeak in enumerate(self.mspeaks): 160 mspeak.index = i 161 162 self._set_nominal_masses_start_final_indexes()
Set the mass spectrum to iterate over only the selected MSpeaks indexes.
Parameters
- list_indexes (list of int): A list of integers representing the indexes of the MSpeaks to iterate over.
164 def reset_indexes(self): 165 """Reset the mass spectrum to iterate over all MSpeaks objects. 166 167 This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. 168 It also sets the index of each MSpeak object to its corresponding position in the mass spectrum. 169 170 """ 171 self.mspeaks = self._mspeaks 172 173 for i, mspeak in enumerate(self.mspeaks): 174 mspeak.index = i 175 176 self._set_nominal_masses_start_final_indexes()
Reset the mass spectrum to iterate over all MSpeaks objects.
This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
178 def add_mspeak( 179 self, 180 ion_charge, 181 mz_exp, 182 abundance, 183 resolving_power, 184 signal_to_noise, 185 massspec_indexes, 186 exp_freq=None, 187 ms_parent=None, 188 ): 189 """Add a new MSPeak object to the MassSpectrum object. 190 191 Parameters 192 ---------- 193 ion_charge : int 194 The ion charge of the MSPeak. 195 mz_exp : float 196 The experimental m/z value of the MSPeak. 197 abundance : float 198 The abundance of the MSPeak. 199 resolving_power : float 200 The resolving power of the MSPeak. 201 signal_to_noise : float 202 The signal-to-noise ratio of the MSPeak. 203 massspec_indexes : list 204 A list of indexes of the MSPeak in the MassSpectrum object. 205 exp_freq : float, optional 206 The experimental frequency of the MSPeak. Defaults to None. 207 ms_parent : MSParent, optional 208 The MSParent object associated with the MSPeak. Defaults to None. 209 """ 210 mspeak = MSPeak( 211 ion_charge, 212 mz_exp, 213 abundance, 214 resolving_power, 215 signal_to_noise, 216 massspec_indexes, 217 len(self._mspeaks), 218 exp_freq=exp_freq, 219 ms_parent=ms_parent, 220 ) 221 222 self._mspeaks.append(mspeak)
Add a new MSPeak object to the MassSpectrum object.
Parameters
- ion_charge (int): The ion charge of the MSPeak.
- mz_exp (float): The experimental m/z value of the MSPeak.
- abundance (float): The abundance of the MSPeak.
- resolving_power (float): The resolving power of the MSPeak.
- signal_to_noise (float): The signal-to-noise ratio of the MSPeak.
- massspec_indexes (list): A list of indexes of the MSPeak in the MassSpectrum object.
- exp_freq (float, optional): The experimental frequency of the MSPeak. Defaults to None.
- ms_parent (MSParent, optional): The MSParent object associated with the MSPeak. Defaults to None.
290 def reset_cal_therms(self, Aterm, Bterm, C, fas=0): 291 """Reset calibration terms and recalculate the mass-to-charge ratio and abundance. 292 293 Parameters 294 ---------- 295 Aterm : float 296 The A-term calibration coefficient. 297 Bterm : float 298 The B-term calibration coefficient. 299 C : float 300 The C-term calibration coefficient. 301 fas : float, optional 302 The frequency amplitude scaling factor. Default is 0. 303 """ 304 self._calibration_terms = (Aterm, Bterm, C) 305 306 self._mz_exp = self._f_to_mz() 307 self._abundance = self._abundance 308 self.find_peaks() 309 self.reset_indexes()
Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
Parameters
- Aterm (float): The A-term calibration coefficient.
- Bterm (float): The B-term calibration coefficient.
- C (float): The C-term calibration coefficient.
- fas (float, optional): The frequency amplitude scaling factor. Default is 0.
311 def clear_molecular_formulas(self): 312 """Clear the molecular formulas for all mspeaks in the MassSpectrum. 313 314 Returns 315 ------- 316 numpy.ndarray 317 An array of the cleared molecular formulas for each mspeak in the MassSpectrum. 318 """ 319 self.check_mspeaks() 320 return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
Clear the molecular formulas for all mspeaks in the MassSpectrum.
Returns
- numpy.ndarray: An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
322 def process_mass_spec(self, keep_profile=True): 323 """Process the mass spectrum. 324 325 Parameters 326 ---------- 327 keep_profile : bool, optional 328 Whether to keep the profile data after processing. Defaults to True. 329 330 Notes 331 ----- 332 This method does the following: 333 - calculates the noise threshold 334 - does peak picking (creates mspeak_objs) 335 - resets the mspeak_obj indexes 336 """ 337 338 # if runned mannually make sure to rerun filter_by_noise_threshold 339 # calculates noise threshold 340 # do peak picking( create mspeak_objs) 341 # reset mspeak_obj the indexes 342 343 self.cal_noise_threshold() 344 345 self.find_peaks() 346 self.reset_indexes() 347 348 if self.mspeaks: 349 self._dynamic_range = self.max_abundance / self.min_abundance 350 else: 351 self._dynamic_range = 0 352 if not keep_profile: 353 self._abundance *= 0 354 self._mz_exp *= 0
Process the mass spectrum.
Parameters
- keep_profile (bool, optional): Whether to keep the profile data after processing. Defaults to True.
Notes
This method does the following:
- calculates the noise threshold
- does peak picking (creates mspeak_objs)
- resets the mspeak_obj indexes
356 def cal_noise_threshold(self): 357 """Calculate the noise threshold of the mass spectrum.""" 358 359 if self.label == Labels.simulated_profile: 360 self._baseline_noise, self._baseline_noise_std = 0.1, 1 361 362 if self.settings.noise_threshold_method == "log": 363 self._baseline_noise, self._baseline_noise_std = ( 364 self.run_log_noise_threshold_calc() 365 ) 366 367 else: 368 self._baseline_noise, self._baseline_noise_std = ( 369 self.run_noise_threshold_calc() 370 )
Calculate the noise threshold of the mass spectrum.
381 def set_parameter_from_json(self, parameters_path): 382 """Set the parameters of the mass spectrum from a JSON file. 383 384 Parameters 385 ---------- 386 parameters_path : str 387 The path to the JSON file containing the parameters. 388 """ 389 load_and_set_parameters_ms(self, parameters_path=parameters_path)
Set the parameters of the mass spectrum from a JSON file.
Parameters
- parameters_path (str): The path to the JSON file containing the parameters.
Return the experimental frequency values of the mass spectrum that are used for peak picking.
Return the abundance profile of the mass spectrum that is used for peak picking.
518 def freq_exp(self): 519 """Return the experimental frequency values of the mass spectrum.""" 520 self.check_mspeaks() 521 return array([mspeak.freq_exp for mspeak in self.mspeaks])
Return the experimental frequency values of the mass spectrum.
542 def get_mz_and_abundance_peaks_tuples(self): 543 """Return a list of tuples containing the m/z and abundance values of the mass spectrum.""" 544 self.check_mspeaks() 545 return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
Return a list of tuples containing the m/z and abundance values of the mass spectrum.
638 def sort_by_mz(self): 639 """Sort the mass spectrum by m/z values.""" 640 return sorted(self, key=lambda m: m.mz_exp)
Sort the mass spectrum by m/z values.
642 def sort_by_abundance(self, reverse=False): 643 """Sort the mass spectrum by abundance values.""" 644 return sorted(self, key=lambda m: m.abundance, reverse=reverse)
Sort the mass spectrum by abundance values.
651 def check_mspeaks_warning(self): 652 """Check if the mass spectrum has MSpeaks objects. 653 654 Raises 655 ------ 656 Warning 657 If the mass spectrum has no MSpeaks objects. 658 """ 659 import warnings 660 661 if self.mspeaks: 662 pass 663 else: 664 warnings.warn("mspeaks list is empty, continuing without filtering data")
Check if the mass spectrum has MSpeaks objects.
Raises
- Warning: If the mass spectrum has no MSpeaks objects.
666 def check_mspeaks(self): 667 """Check if the mass spectrum has MSpeaks objects. 668 669 Raises 670 ------ 671 Exception 672 If the mass spectrum has no MSpeaks objects. 673 """ 674 if self.mspeaks: 675 pass 676 else: 677 raise Exception( 678 "mspeaks list is empty, please run process_mass_spec() first" 679 )
Check if the mass spectrum has MSpeaks objects.
Raises
- Exception: If the mass spectrum has no MSpeaks objects.
681 def remove_assignment_by_index(self, indexes): 682 """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes. 683 684 Parameters 685 ---------- 686 indexes : list of int 687 A list of indexes of the MSpeaks objects to remove the molecular formula assignment from. 688 """ 689 for i in indexes: 690 self.mspeaks[i].clear_molecular_formulas()
Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
Parameters
- indexes (list of int): A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
692 def filter_by_index(self, list_indexes): 693 """Filter the mass spectrum by the specified indexes. 694 695 Parameters 696 ---------- 697 list_indexes : list of int 698 A list of indexes of the MSpeaks objects to drop. 699 700 """ 701 702 self.mspeaks = [ 703 self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes 704 ] 705 706 for i, mspeak in enumerate(self.mspeaks): 707 mspeak.index = i 708 709 self._set_nominal_masses_start_final_indexes()
Filter the mass spectrum by the specified indexes.
Parameters
- list_indexes (list of int): A list of indexes of the MSpeaks objects to drop.
711 def filter_by_mz(self, min_mz, max_mz): 712 """Filter the mass spectrum by the specified m/z range. 713 714 Parameters 715 ---------- 716 min_mz : float 717 The minimum m/z value to keep. 718 max_mz : float 719 The maximum m/z value to keep. 720 721 """ 722 self.check_mspeaks_warning() 723 indexes = [ 724 index 725 for index, mspeak in enumerate(self.mspeaks) 726 if not min_mz <= mspeak.mz_exp <= max_mz 727 ] 728 self.filter_by_index(indexes)
Filter the mass spectrum by the specified m/z range.
Parameters
- min_mz (float): The minimum m/z value to keep.
- max_mz (float): The maximum m/z value to keep.
730 def filter_by_s2n(self, min_s2n, max_s2n=False): 731 """Filter the mass spectrum by the specified signal-to-noise ratio range. 732 733 Parameters 734 ---------- 735 min_s2n : float 736 The minimum signal-to-noise ratio to keep. 737 max_s2n : float, optional 738 The maximum signal-to-noise ratio to keep. Defaults to False (no maximum). 739 740 """ 741 self.check_mspeaks_warning() 742 if max_s2n: 743 indexes = [ 744 index 745 for index, mspeak in enumerate(self.mspeaks) 746 if not min_s2n <= mspeak.signal_to_noise <= max_s2n 747 ] 748 else: 749 indexes = [ 750 index 751 for index, mspeak in enumerate(self.mspeaks) 752 if mspeak.signal_to_noise <= min_s2n 753 ] 754 self.filter_by_index(indexes)
Filter the mass spectrum by the specified signal-to-noise ratio range.
Parameters
- min_s2n (float): The minimum signal-to-noise ratio to keep.
- max_s2n (float, optional): The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
756 def filter_by_abundance(self, min_abund, max_abund=False): 757 """Filter the mass spectrum by the specified abundance range. 758 759 Parameters 760 ---------- 761 min_abund : float 762 The minimum abundance to keep. 763 max_abund : float, optional 764 The maximum abundance to keep. Defaults to False (no maximum). 765 766 """ 767 self.check_mspeaks_warning() 768 if max_abund: 769 indexes = [ 770 index 771 for index, mspeak in enumerate(self.mspeaks) 772 if not min_abund <= mspeak.abundance <= max_abund 773 ] 774 else: 775 indexes = [ 776 index 777 for index, mspeak in enumerate(self.mspeaks) 778 if mspeak.abundance <= min_abund 779 ] 780 self.filter_by_index(indexes)
Filter the mass spectrum by the specified abundance range.
Parameters
- min_abund (float): The minimum abundance to keep.
- max_abund (float, optional): The maximum abundance to keep. Defaults to False (no maximum).
782 def filter_by_max_resolving_power(self, B, T): 783 """Filter the mass spectrum by the specified maximum resolving power. 784 785 Parameters 786 ---------- 787 B : float 788 T : float 789 790 """ 791 792 rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z) 793 794 self.check_mspeaks_warning() 795 796 indexes_to_remove = [ 797 index 798 for index, mspeak in enumerate(self.mspeaks) 799 if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge) 800 ] 801 self.filter_by_index(indexes_to_remove)
Filter the mass spectrum by the specified maximum resolving power.
Parameters
B (float):
T (float):
803 def filter_by_mean_resolving_power( 804 self, ndeviations=3, plot=False, guess_pars=False 805 ): 806 """Filter the mass spectrum by the specified mean resolving power. 807 808 Parameters 809 ---------- 810 ndeviations : float, optional 811 The number of standard deviations to use for filtering. Defaults to 3. 812 plot : bool, optional 813 Whether to plot the resolving power distribution. Defaults to False. 814 guess_pars : bool, optional 815 Whether to guess the parameters for the Gaussian model. Defaults to False. 816 817 """ 818 self.check_mspeaks_warning() 819 indexes_to_remove = MeanResolvingPowerFilter( 820 self, ndeviations, plot, guess_pars 821 ).main() 822 self.filter_by_index(indexes_to_remove)
Filter the mass spectrum by the specified mean resolving power.
Parameters
- ndeviations (float, optional): The number of standard deviations to use for filtering. Defaults to 3.
- plot (bool, optional): Whether to plot the resolving power distribution. Defaults to False.
- guess_pars (bool, optional): Whether to guess the parameters for the Gaussian model. Defaults to False.
824 def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0): 825 """Filter the mass spectrum by the calculated minimum theoretical resolving power. 826 827 This is currently designed only for FTICR data, and accounts only for magnitude mode data 828 Accurate results require passing the apodisaion method used to calculate the resolving power. 829 see the ICRMassPeak function `resolving_power_calc` for more details. 830 831 Parameters 832 ---------- 833 B : Magnetic field strength in Tesla, float 834 T : transient length in seconds, float 835 apodization_method : str, optional 836 The apodization method to use for calculating the resolving power. Defaults to None. 837 tolerance : float, optional 838 The tolerance for the threshold. Defaults to 0, i.e. no tolerance 839 840 """ 841 if self.analyzer != "ICR": 842 raise Exception( 843 "This method is only applicable to ICR mass spectra. " 844 ) 845 846 self.check_mspeaks_warning() 847 848 indexes_to_remove = [ 849 index 850 for index, mspeak in enumerate(self.mspeaks) 851 if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method) 852 ] 853 self.filter_by_index(indexes_to_remove)
Filter the mass spectrum by the calculated minimum theoretical resolving power.
This is currently designed only for FTICR data, and accounts only for magnitude mode data
Accurate results require passing the apodisaion method used to calculate the resolving power.
see the ICRMassPeak function resolving_power_calc
for more details.
Parameters
B (Magnetic field strength in Tesla, float):
T (transient length in seconds, float):
apodization_method (str, optional): The apodization method to use for calculating the resolving power. Defaults to None.
- tolerance (float, optional): The tolerance for the threshold. Defaults to 0, i.e. no tolerance
855 def filter_by_noise_threshold(self): 856 """Filter the mass spectrum by the noise threshold.""" 857 858 threshold = self.get_noise_threshold()[1][0] 859 860 self.check_mspeaks_warning() 861 862 indexes_to_remove = [ 863 index 864 for index, mspeak in enumerate(self.mspeaks) 865 if mspeak.abundance <= threshold 866 ] 867 self.filter_by_index(indexes_to_remove)
Filter the mass spectrum by the noise threshold.
869 def find_peaks(self): 870 """Find the peaks of the mass spectrum.""" 871 # needs to clear previous results from peak_picking 872 self._mspeaks = list() 873 874 # then do peak picking 875 self.do_peak_picking() 876 # print("A total of %i peaks were found" % len(self._mspeaks))
Find the peaks of the mass spectrum.
878 def change_kendrick_base_all_mspeaks(self, kendrick_dict_base): 879 """Change the Kendrick base of all MSpeaks objects. 880 881 Parameters 882 ---------- 883 kendrick_dict_base : dict 884 A dictionary of the Kendrick base to change to. 885 886 Notes 887 ----- 888 Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc 889 """ 890 self.parameters.ms_peak.kendrick_base = kendrick_dict_base 891 892 for mspeak in self.mspeaks: 893 mspeak.change_kendrick_base(kendrick_dict_base)
Change the Kendrick base of all MSpeaks objects.
Parameters
- kendrick_dict_base (dict): A dictionary of the Kendrick base to change to.
Notes
Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
895 def get_nominal_mz_first_last_indexes(self, nominal_mass): 896 """Return the first and last indexes of the MSpeaks objects with the specified nominal mass. 897 898 Parameters 899 ---------- 900 nominal_mass : int 901 The nominal mass to get the indexes for. 902 903 Returns 904 ------- 905 tuple 906 A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass. 907 """ 908 if self._dict_nominal_masses_indexes: 909 if nominal_mass in self._dict_nominal_masses_indexes.keys(): 910 return ( 911 self._dict_nominal_masses_indexes.get(nominal_mass)[0], 912 self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1, 913 ) 914 915 else: 916 # import warnings 917 # uncomment warn to distribution 918 # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass) 919 return (0, 0) 920 else: 921 raise Exception( 922 "run process_mass_spec() function before trying to access the data" 923 )
Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
Parameters
- nominal_mass (int): The nominal mass to get the indexes for.
Returns
- tuple: A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
925 def get_masses_count_by_nominal_mass(self): 926 """Return a dictionary of the nominal masses and their counts.""" 927 928 dict_nominal_masses_count = {} 929 930 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 931 932 for nominal_mass in all_nominal_masses: 933 if nominal_mass not in dict_nominal_masses_count: 934 dict_nominal_masses_count[nominal_mass] = len( 935 list(self.get_nominal_mass_indexes(nominal_mass)) 936 ) 937 938 return dict_nominal_masses_count
Return a dictionary of the nominal masses and their counts.
940 def datapoints_count_by_nominal_mz(self, mz_overlay=0.1): 941 """Return a dictionary of the nominal masses and their counts. 942 943 Parameters 944 ---------- 945 mz_overlay : float, optional 946 The m/z overlay to use for counting. Defaults to 0.1. 947 948 Returns 949 ------- 950 dict 951 A dictionary of the nominal masses and their counts. 952 """ 953 dict_nominal_masses_count = {} 954 955 all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks])) 956 957 for nominal_mass in all_nominal_masses: 958 if nominal_mass not in dict_nominal_masses_count: 959 min_mz = nominal_mass - mz_overlay 960 961 max_mz = nominal_mass + 1 + mz_overlay 962 963 indexes = indexes = where( 964 (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz) 965 ) 966 967 dict_nominal_masses_count[nominal_mass] = indexes[0].size 968 969 return dict_nominal_masses_count
Return a dictionary of the nominal masses and their counts.
Parameters
- mz_overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
- dict: A dictionary of the nominal masses and their counts.
971 def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1): 972 """Return the indexes of the MSpeaks objects with the specified nominal mass. 973 974 Parameters 975 ---------- 976 nominal_mass : int 977 The nominal mass to get the indexes for. 978 overlay : float, optional 979 The m/z overlay to use for counting. Defaults to 0.1. 980 981 Returns 982 ------- 983 generator 984 A generator of the indexes of the MSpeaks objects with the specified nominal mass. 985 """ 986 min_mz_to_look = nominal_mass - overlay 987 max_mz_to_look = nominal_mass + 1 + overlay 988 989 return ( 990 i 991 for i in range(len(self.mspeaks)) 992 if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look 993 ) 994 995 # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look) 996 # return indexes
Return the indexes of the MSpeaks objects with the specified nominal mass.
Parameters
- nominal_mass (int): The nominal mass to get the indexes for.
- overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
- generator: A generator of the indexes of the MSpeaks objects with the specified nominal mass.
1023 def plot_centroid(self, ax=None, c="g"): 1024 """Plot the centroid data of the mass spectrum. 1025 1026 Parameters 1027 ---------- 1028 ax : matplotlib.axes.Axes, optional 1029 The matplotlib axes to plot on. Defaults to None. 1030 c : str, optional 1031 The color to use for the plot. Defaults to 'g' (green). 1032 1033 Returns 1034 ------- 1035 matplotlib.axes.Axes 1036 The matplotlib axes containing the plot. 1037 1038 Raises 1039 ------ 1040 Exception 1041 If no centroid data is found. 1042 """ 1043 1044 import matplotlib.pyplot as plt 1045 1046 if self._mspeaks: 1047 if ax is None: 1048 ax = plt.gca() 1049 1050 markerline_a, stemlines_a, baseline_a = ax.stem( 1051 self.mz_exp, self.abundance, linefmt="-", markerfmt=" " 1052 ) 1053 1054 plt.setp(markerline_a, "color", c, "linewidth", 2) 1055 plt.setp(stemlines_a, "color", c, "linewidth", 2) 1056 plt.setp(baseline_a, "color", c, "linewidth", 2) 1057 1058 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1059 ax.set_ylabel("Abundance", fontsize=12) 1060 ax.tick_params(axis="both", which="major", labelsize=12) 1061 1062 ax.axes.spines["top"].set_visible(False) 1063 ax.axes.spines["right"].set_visible(False) 1064 1065 ax.get_yaxis().set_visible(False) 1066 ax.spines["left"].set_visible(False) 1067 1068 else: 1069 raise Exception("No centroid data found, please run process_mass_spec") 1070 1071 return ax
Plot the centroid data of the mass spectrum.
Parameters
- ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
- c (str, optional): The color to use for the plot. Defaults to 'g' (green).
Returns
- matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
- Exception: If no centroid data is found.
1073 def plot_profile_and_noise_threshold(self, ax=None, legend=False): 1074 """Plot the profile data and noise threshold of the mass spectrum. 1075 1076 Parameters 1077 ---------- 1078 ax : matplotlib.axes.Axes, optional 1079 The matplotlib axes to plot on. Defaults to None. 1080 legend : bool, optional 1081 Whether to show the legend. Defaults to False. 1082 1083 Returns 1084 ------- 1085 matplotlib.axes.Axes 1086 The matplotlib axes containing the plot. 1087 1088 Raises 1089 ------ 1090 Exception 1091 If no noise threshold is found. 1092 """ 1093 import matplotlib.pyplot as plt 1094 1095 if self.baseline_noise_std and self.baseline_noise_std: 1096 # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max()) 1097 baseline = (self.baseline_noise, self.baseline_noise) 1098 1099 # std = self.parameters.mass_spectrum.noise_threshold_min_std 1100 # threshold = self.baseline_noise_std + (std * self.baseline_noise_std) 1101 x, y = self.get_noise_threshold() 1102 1103 if ax is None: 1104 ax = plt.gca() 1105 1106 ax.plot( 1107 self.mz_exp_profile, 1108 self.abundance_profile, 1109 color="green", 1110 label="Spectrum", 1111 ) 1112 ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise") 1113 ax.plot(x, y, color="red", label="Noise Threshold") 1114 1115 ax.set_xlabel("$\t{m/z}$", fontsize=12) 1116 ax.set_ylabel("Abundance", fontsize=12) 1117 ax.tick_params(axis="both", which="major", labelsize=12) 1118 1119 ax.axes.spines["top"].set_visible(False) 1120 ax.axes.spines["right"].set_visible(False) 1121 1122 ax.get_yaxis().set_visible(False) 1123 ax.spines["left"].set_visible(False) 1124 if legend: 1125 ax.legend() 1126 1127 else: 1128 raise Exception("Calculate noise threshold first") 1129 1130 return ax
Plot the profile data and noise threshold of the mass spectrum.
Parameters
- ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
- legend (bool, optional): Whether to show the legend. Defaults to False.
Returns
- matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
- Exception: If no noise threshold is found.
1132 def plot_mz_domain_profile(self, color="green", ax=None): 1133 """Plot the m/z domain profile of the mass spectrum. 1134 1135 Parameters 1136 ---------- 1137 color : str, optional 1138 The color to use for the plot. Defaults to 'green'. 1139 ax : matplotlib.axes.Axes, optional 1140 The matplotlib axes to plot on. Defaults to None. 1141 1142 Returns 1143 ------- 1144 matplotlib.axes.Axes 1145 The matplotlib axes containing the plot. 1146 """ 1147 1148 import matplotlib.pyplot as plt 1149 1150 if ax is None: 1151 ax = plt.gca() 1152 ax.plot(self.mz_exp_profile, self.abundance_profile, color=color) 1153 ax.set(xlabel="m/z", ylabel="abundance") 1154 1155 return ax
Plot the m/z domain profile of the mass spectrum.
Parameters
- color (str, optional): The color to use for the plot. Defaults to 'green'.
- ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
Returns
- matplotlib.axes.Axes: The matplotlib axes containing the plot.
1157 def to_excel(self, out_file_path, write_metadata=True): 1158 """Export the mass spectrum to an Excel file. 1159 1160 Parameters 1161 ---------- 1162 out_file_path : str 1163 The path to the Excel file to export to. 1164 write_metadata : bool, optional 1165 Whether to write the metadata to the Excel file. Defaults to True. 1166 1167 Returns 1168 ------- 1169 None 1170 """ 1171 from corems.mass_spectrum.output.export import HighResMassSpecExport 1172 1173 exportMS = HighResMassSpecExport(out_file_path, self) 1174 exportMS.to_excel(write_metadata=write_metadata)
Export the mass spectrum to an Excel file.
Parameters
- out_file_path (str): The path to the Excel file to export to.
- write_metadata (bool, optional): Whether to write the metadata to the Excel file. Defaults to True.
Returns
- None
1176 def to_hdf(self, out_file_path): 1177 """Export the mass spectrum to an HDF file. 1178 1179 Parameters 1180 ---------- 1181 out_file_path : str 1182 The path to the HDF file to export to. 1183 1184 Returns 1185 ------- 1186 None 1187 """ 1188 from corems.mass_spectrum.output.export import HighResMassSpecExport 1189 1190 exportMS = HighResMassSpecExport(out_file_path, self) 1191 exportMS.to_hdf()
Export the mass spectrum to an HDF file.
Parameters
- out_file_path (str): The path to the HDF file to export to.
Returns
- None
1193 def to_csv(self, out_file_path, write_metadata=True): 1194 """Export the mass spectrum to a CSV file. 1195 1196 Parameters 1197 ---------- 1198 out_file_path : str 1199 The path to the CSV file to export to. 1200 write_metadata : bool, optional 1201 Whether to write the metadata to the CSV file. Defaults to True. 1202 1203 """ 1204 from corems.mass_spectrum.output.export import HighResMassSpecExport 1205 1206 exportMS = HighResMassSpecExport(out_file_path, self) 1207 exportMS.to_csv(write_metadata=write_metadata)
Export the mass spectrum to a CSV file.
Parameters
- out_file_path (str): The path to the CSV file to export to.
- write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
1209 def to_pandas(self, out_file_path, write_metadata=True): 1210 """Export the mass spectrum to a Pandas dataframe with pkl extension. 1211 1212 Parameters 1213 ---------- 1214 out_file_path : str 1215 The path to the CSV file to export to. 1216 write_metadata : bool, optional 1217 Whether to write the metadata to the CSV file. Defaults to True. 1218 1219 """ 1220 from corems.mass_spectrum.output.export import HighResMassSpecExport 1221 1222 exportMS = HighResMassSpecExport(out_file_path, self) 1223 exportMS.to_pandas(write_metadata=write_metadata)
Export the mass spectrum to a Pandas dataframe with pkl extension.
Parameters
- out_file_path (str): The path to the CSV file to export to.
- write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
1225 def to_dataframe(self, additional_columns=None): 1226 """Return the mass spectrum as a Pandas dataframe. 1227 1228 Parameters 1229 ---------- 1230 additional_columns : list, optional 1231 A list of additional columns to include in the dataframe. Defaults to None. 1232 Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC" 1233 1234 Returns 1235 ------- 1236 pandas.DataFrame 1237 The mass spectrum as a Pandas dataframe. 1238 """ 1239 from corems.mass_spectrum.output.export import HighResMassSpecExport 1240 1241 exportMS = HighResMassSpecExport(self.filename, self) 1242 return exportMS.get_pandas_df(additional_columns=additional_columns)
Return the mass spectrum as a Pandas dataframe.
Parameters
- additional_columns (list, optional): A list of additional columns to include in the dataframe. Defaults to None. Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
Returns
- pandas.DataFrame: The mass spectrum as a Pandas dataframe.
1244 def to_json(self): 1245 """Return the mass spectrum as a JSON file.""" 1246 from corems.mass_spectrum.output.export import HighResMassSpecExport 1247 1248 exportMS = HighResMassSpecExport(self.filename, self) 1249 return exportMS.to_json()
Return the mass spectrum as a JSON file.
1251 def parameters_json(self): 1252 """Return the parameters of the mass spectrum as a JSON string.""" 1253 from corems.mass_spectrum.output.export import HighResMassSpecExport 1254 1255 exportMS = HighResMassSpecExport(self.filename, self) 1256 return exportMS.parameters_to_json()
Return the parameters of the mass spectrum as a JSON string.
1258 def parameters_toml(self): 1259 """Return the parameters of the mass spectrum as a TOML string.""" 1260 from corems.mass_spectrum.output.export import HighResMassSpecExport 1261 1262 exportMS = HighResMassSpecExport(self.filename, self) 1263 return exportMS.parameters_to_toml()
Return the parameters of the mass spectrum as a TOML string.
Inherited Members
- corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
- percentile_assigned
- resolving_power_calc
- number_average_molecular_weight
- weight_average_molecular_weight
- corems.mass_spectrum.calc.PeakPicking.PeakPicking
- prepare_peak_picking_data
- cut_mz_domain_peak_picking
- legacy_cut_mz_domain_peak_picking
- extrapolate_axis
- extrapolate_axes_for_pp
- do_peak_picking
- find_minima
- linear_fit_calc
- calculate_resolving_power
- cal_minima
- calc_centroid
- get_threshold
- algebraic_quadratic
- find_apex_fit_quadratic
- check_prominence
- use_the_max
- calc_centroid_legacy
1266class MassSpecProfile(MassSpecBase): 1267 """A mass spectrum class when the entry point is on profile format 1268 1269 Notes 1270 ----- 1271 Stores the profile data and instrument settings. 1272 Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. 1273 _mspeaks is populated under the hood by calling process_mass_spec method. 1274 Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase(). 1275 1276 Parameters 1277 ---------- 1278 data_dict : dict 1279 A dictionary containing the profile data. 1280 d_params : dict{'str': float, int or str} 1281 contains the instrument settings and processing settings 1282 auto_process : bool, optional 1283 Whether to automatically process the mass spectrum. Defaults to True. 1284 1285 1286 Attributes 1287 ---------- 1288 _abundance : ndarray 1289 The abundance values of the mass spectrum. 1290 _mz_exp : ndarray 1291 The m/z values of the mass spectrum. 1292 _mspeaks : list 1293 A list of mass peaks. 1294 1295 Methods 1296 ---------- 1297 * process_mass_spec(). Process the mass spectrum. 1298 1299 see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid() 1300 """ 1301 1302 def __init__(self, data_dict, d_params, auto_process=True): 1303 # print(data_dict.keys()) 1304 super().__init__( 1305 data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params 1306 ) 1307 1308 if auto_process: 1309 self.process_mass_spec()
A mass spectrum class when the entry point is on profile format
Notes
Stores the profile data and instrument settings. Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
Parameters
- data_dict (dict): A dictionary containing the profile data.
- d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
- auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
- _abundance (ndarray): The abundance values of the mass spectrum.
- _mz_exp (ndarray): The m/z values of the mass spectrum.
- _mspeaks (list): A list of mass peaks.
Methods
- process_mass_spec(). Process the mass spectrum.
see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
Inherited Members
- MassSpecBase
- mspeaks
- is_calibrated
- is_centroid
- has_frequency
- calibration_order
- calibration_points
- calibration_ref_mzs
- calibration_meas_mzs
- calibration_RMS
- calibration_segment
- calibration_raw_error_median
- calibration_raw_error_stdev
- set_indexes
- reset_indexes
- add_mspeak
- reset_cal_therms
- clear_molecular_formulas
- process_mass_spec
- cal_noise_threshold
- parameters
- set_parameter_from_json
- set_parameter_from_toml
- mspeaks_settings
- settings
- molecular_search_settings
- mz_cal_profile
- mz_cal
- mz_exp
- freq_exp_profile
- freq_exp_pp
- mz_exp_profile
- mz_exp_pp
- abundance_profile
- abundance_profile_pp
- abundance
- freq_exp
- resolving_power
- signal_to_noise
- nominal_mz
- get_mz_and_abundance_peaks_tuples
- kmd
- kendrick_mass
- max_mz_exp
- min_mz_exp
- max_abundance
- max_signal_to_noise
- most_abundant_mspeak
- min_abundance
- dynamic_range
- baseline_noise
- baseline_noise_std
- Aterm
- Bterm
- Cterm
- filename
- dir_location
- sort_by_mz
- sort_by_abundance
- tic
- check_mspeaks_warning
- check_mspeaks
- remove_assignment_by_index
- filter_by_index
- filter_by_mz
- filter_by_s2n
- filter_by_abundance
- filter_by_max_resolving_power
- filter_by_mean_resolving_power
- filter_by_min_resolving_power
- filter_by_noise_threshold
- find_peaks
- change_kendrick_base_all_mspeaks
- get_nominal_mz_first_last_indexes
- get_masses_count_by_nominal_mass
- datapoints_count_by_nominal_mz
- get_nominal_mass_indexes
- plot_centroid
- plot_profile_and_noise_threshold
- plot_mz_domain_profile
- to_excel
- to_hdf
- to_csv
- to_pandas
- to_dataframe
- to_json
- parameters_json
- parameters_toml
- corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
- percentile_assigned
- resolving_power_calc
- number_average_molecular_weight
- weight_average_molecular_weight
- corems.mass_spectrum.calc.PeakPicking.PeakPicking
- prepare_peak_picking_data
- cut_mz_domain_peak_picking
- legacy_cut_mz_domain_peak_picking
- extrapolate_axis
- extrapolate_axes_for_pp
- do_peak_picking
- find_minima
- linear_fit_calc
- calculate_resolving_power
- cal_minima
- calc_centroid
- get_threshold
- algebraic_quadratic
- find_apex_fit_quadratic
- check_prominence
- use_the_max
- calc_centroid_legacy
1312class MassSpecfromFreq(MassSpecBase): 1313 """A mass spectrum class when data entry is on frequency domain 1314 1315 Notes 1316 ----- 1317 - Transform to m/z based on the settings stored at d_params 1318 - Stores the profile data and instrument settings 1319 - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes 1320 - _mspeaks is populated under the hood by calling process_mass_spec method 1321 - iteration is null if _mspeaks is empty 1322 1323 Parameters 1324 ---------- 1325 frequency_domain : list(float) 1326 all datapoints in frequency domain in Hz 1327 magnitude : frequency_domain : list(float) 1328 all datapoints in for magnitude of each frequency datapoint 1329 d_params : dict{'str': float, int or str} 1330 contains the instrument settings and processing settings 1331 auto_process : bool, optional 1332 Whether to automatically process the mass spectrum. Defaults to True. 1333 keep_profile : bool, optional 1334 Whether to keep the profile data. Defaults to True. 1335 1336 Attributes 1337 ---------- 1338 has_frequency : bool 1339 Whether the mass spectrum has frequency data. 1340 _frequency_domain : list(float) 1341 Frequency domain in Hz 1342 label : str 1343 store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points 1344 _abundance : ndarray 1345 The abundance values of the mass spectrum. 1346 _mz_exp : ndarray 1347 The m/z values of the mass spectrum. 1348 _mspeaks : list 1349 A list of mass peaks. 1350 See Also: all the attributes of MassSpecBase class 1351 1352 Methods 1353 ---------- 1354 * _set_mz_domain(). 1355 calculates the m_z based on the setting of d_params 1356 * process_mass_spec(). Process the mass spectrum. 1357 1358 see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid() 1359 """ 1360 1361 def __init__( 1362 self, 1363 frequency_domain, 1364 magnitude, 1365 d_params, 1366 auto_process=True, 1367 keep_profile=True, 1368 ): 1369 super().__init__(None, magnitude, d_params) 1370 1371 self._frequency_domain = frequency_domain 1372 self.has_frequency = True 1373 self._set_mz_domain() 1374 self._sort_mz_domain() 1375 1376 self.magnetron_frequency = None 1377 self.magnetron_frequency_sigma = None 1378 1379 # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect 1380 1381 if auto_process: 1382 self.process_mass_spec(keep_profile=keep_profile) 1383 1384 def _sort_mz_domain(self): 1385 """Sort the mass spectrum by m/z values.""" 1386 1387 if self._mz_exp[0] > self._mz_exp[-1]: 1388 self._mz_exp = self._mz_exp[::-1] 1389 self._abundance = self._abundance[::-1] 1390 self._frequency_domain = self._frequency_domain[::-1] 1391 1392 def _set_mz_domain(self): 1393 """Set the m/z domain of the mass spectrum based on the settings of d_params.""" 1394 if self.label == Labels.bruker_frequency: 1395 self._mz_exp = self._f_to_mz_bruker() 1396 1397 else: 1398 self._mz_exp = self._f_to_mz() 1399 1400 @property 1401 def transient_settings(self): 1402 """Return the transient settings of the mass spectrum.""" 1403 return self.parameters.transient 1404 1405 @transient_settings.setter 1406 def transient_settings(self, instance_TransientSetting): 1407 self.parameters.transient = instance_TransientSetting 1408 1409 def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300): 1410 """Calculates the magnetron frequency of the mass spectrum. 1411 1412 Parameters 1413 ---------- 1414 max_magnetron_freq : float, optional 1415 The maximum magnetron frequency. Defaults to 50. 1416 magnetron_freq_bins : int, optional 1417 The number of bins to use for the histogram. Defaults to 300. 1418 1419 Returns 1420 ------- 1421 None 1422 1423 Notes 1424 ----- 1425 Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. 1426 A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. 1427 A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. 1428 This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks. 1429 """ 1430 ms_df = DataFrame(self.freq_exp(), columns=["Freq"]) 1431 ms_df["FreqDelta"] = ms_df["Freq"].diff() 1432 1433 freq_hist = histogram( 1434 ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"], 1435 bins=magnetron_freq_bins, 1436 ) 1437 1438 mod = GaussianModel() 1439 pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1]) 1440 out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1]) 1441 self.magnetron_frequency = out.best_values["center"] 1442 self.magnetron_frequency_sigma = out.best_values["sigma"]
A mass spectrum class when data entry is on frequency domain
Notes
- Transform to m/z based on the settings stored at d_params
- Stores the profile data and instrument settings
- Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
- _mspeaks is populated under the hood by calling process_mass_spec method
- iteration is null if _mspeaks is empty
Parameters
- frequency_domain (list(float)): all datapoints in frequency domain in Hz
- magnitude : frequency_domain (list(float)): all datapoints in for magnitude of each frequency datapoint
- d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
- auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
- keep_profile (bool, optional): Whether to keep the profile data. Defaults to True.
Attributes
- has_frequency (bool): Whether the mass spectrum has frequency data.
- _frequency_domain (list(float)): Frequency domain in Hz
- label (str): store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
- _abundance (ndarray): The abundance values of the mass spectrum.
- _mz_exp (ndarray): The m/z values of the mass spectrum.
- _mspeaks (list): A list of mass peaks.
- See Also (all the attributes of MassSpecBase class):
Methods
- _set_mz_domain(). calculates the m_z based on the setting of d_params
- process_mass_spec(). Process the mass spectrum.
see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
1361 def __init__( 1362 self, 1363 frequency_domain, 1364 magnitude, 1365 d_params, 1366 auto_process=True, 1367 keep_profile=True, 1368 ): 1369 super().__init__(None, magnitude, d_params) 1370 1371 self._frequency_domain = frequency_domain 1372 self.has_frequency = True 1373 self._set_mz_domain() 1374 self._sort_mz_domain() 1375 1376 self.magnetron_frequency = None 1377 self.magnetron_frequency_sigma = None 1378 1379 # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect 1380 1381 if auto_process: 1382 self.process_mass_spec(keep_profile=keep_profile)
1409 def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300): 1410 """Calculates the magnetron frequency of the mass spectrum. 1411 1412 Parameters 1413 ---------- 1414 max_magnetron_freq : float, optional 1415 The maximum magnetron frequency. Defaults to 50. 1416 magnetron_freq_bins : int, optional 1417 The number of bins to use for the histogram. Defaults to 300. 1418 1419 Returns 1420 ------- 1421 None 1422 1423 Notes 1424 ----- 1425 Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. 1426 A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. 1427 A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. 1428 This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks. 1429 """ 1430 ms_df = DataFrame(self.freq_exp(), columns=["Freq"]) 1431 ms_df["FreqDelta"] = ms_df["Freq"].diff() 1432 1433 freq_hist = histogram( 1434 ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"], 1435 bins=magnetron_freq_bins, 1436 ) 1437 1438 mod = GaussianModel() 1439 pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1]) 1440 out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1]) 1441 self.magnetron_frequency = out.best_values["center"] 1442 self.magnetron_frequency_sigma = out.best_values["sigma"]
Calculates the magnetron frequency of the mass spectrum.
Parameters
- max_magnetron_freq (float, optional): The maximum magnetron frequency. Defaults to 50.
- magnetron_freq_bins (int, optional): The number of bins to use for the histogram. Defaults to 300.
Returns
- None
Notes
Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
Inherited Members
- MassSpecBase
- mspeaks
- is_calibrated
- is_centroid
- calibration_order
- calibration_points
- calibration_ref_mzs
- calibration_meas_mzs
- calibration_RMS
- calibration_segment
- calibration_raw_error_median
- calibration_raw_error_stdev
- set_indexes
- reset_indexes
- add_mspeak
- reset_cal_therms
- clear_molecular_formulas
- process_mass_spec
- cal_noise_threshold
- parameters
- set_parameter_from_json
- set_parameter_from_toml
- mspeaks_settings
- settings
- molecular_search_settings
- mz_cal_profile
- mz_cal
- mz_exp
- freq_exp_profile
- freq_exp_pp
- mz_exp_profile
- mz_exp_pp
- abundance_profile
- abundance_profile_pp
- abundance
- freq_exp
- resolving_power
- signal_to_noise
- nominal_mz
- get_mz_and_abundance_peaks_tuples
- kmd
- kendrick_mass
- max_mz_exp
- min_mz_exp
- max_abundance
- max_signal_to_noise
- most_abundant_mspeak
- min_abundance
- dynamic_range
- baseline_noise
- baseline_noise_std
- Aterm
- Bterm
- Cterm
- filename
- dir_location
- sort_by_mz
- sort_by_abundance
- tic
- check_mspeaks_warning
- check_mspeaks
- remove_assignment_by_index
- filter_by_index
- filter_by_mz
- filter_by_s2n
- filter_by_abundance
- filter_by_max_resolving_power
- filter_by_mean_resolving_power
- filter_by_min_resolving_power
- filter_by_noise_threshold
- find_peaks
- change_kendrick_base_all_mspeaks
- get_nominal_mz_first_last_indexes
- get_masses_count_by_nominal_mass
- datapoints_count_by_nominal_mz
- get_nominal_mass_indexes
- plot_centroid
- plot_profile_and_noise_threshold
- plot_mz_domain_profile
- to_excel
- to_hdf
- to_csv
- to_pandas
- to_dataframe
- to_json
- parameters_json
- parameters_toml
- corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
- percentile_assigned
- resolving_power_calc
- number_average_molecular_weight
- weight_average_molecular_weight
- corems.mass_spectrum.calc.PeakPicking.PeakPicking
- prepare_peak_picking_data
- cut_mz_domain_peak_picking
- legacy_cut_mz_domain_peak_picking
- extrapolate_axis
- extrapolate_axes_for_pp
- do_peak_picking
- find_minima
- linear_fit_calc
- calculate_resolving_power
- cal_minima
- calc_centroid
- get_threshold
- algebraic_quadratic
- find_apex_fit_quadratic
- check_prominence
- use_the_max
- calc_centroid_legacy
1445class MassSpecCentroid(MassSpecBase): 1446 """A mass spectrum class when the entry point is on centroid format 1447 1448 Notes 1449 ----- 1450 - Stores the centroid data and instrument settings 1451 - Simulate profile data based on Gaussian or Lorentzian peak shape 1452 - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes 1453 - _mspeaks is populated under the hood by calling process_mass_spec method 1454 - iteration is null if _mspeaks is empty 1455 1456 Parameters 1457 ---------- 1458 data_dict : dict {string: numpy array float64 ) 1459 contains keys [m/z, Abundance, Resolving Power, S/N] 1460 d_params : dict{'str': float, int or str} 1461 contains the instrument settings and processing settings 1462 auto_process : bool, optional 1463 Whether to automatically process the mass spectrum. Defaults to True. 1464 1465 Attributes 1466 ---------- 1467 label : str 1468 store label (Bruker, Midas Transient, see Labels class) 1469 _baseline_noise : float 1470 store baseline noise 1471 _baseline_noise_std : float 1472 store baseline noise std 1473 _abundance : ndarray 1474 The abundance values of the mass spectrum. 1475 _mz_exp : ndarray 1476 The m/z values of the mass spectrum. 1477 _mspeaks : list 1478 A list of mass peaks. 1479 1480 1481 Methods 1482 ---------- 1483 * process_mass_spec(). 1484 Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data. 1485 * __simulate_profile__data__(). 1486 Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only. 1487 1488 see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile() 1489 """ 1490 1491 def __init__(self, data_dict, d_params, auto_process=True): 1492 super().__init__([], [], d_params) 1493 1494 self._set_parameters_objects(d_params) 1495 1496 if self.label == Labels.thermo_centroid: 1497 self._baseline_noise = d_params.get("baseline_noise") 1498 self._baseline_noise_std = d_params.get("baseline_noise_std") 1499 1500 self.is_centroid = True 1501 self.data_dict = data_dict 1502 self._mz_exp = data_dict[Labels.mz] 1503 self._abundance = data_dict[Labels.abundance] 1504 1505 if auto_process: 1506 self.process_mass_spec() 1507 1508 def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid): 1509 """Simulate profile data based on Gaussian or Lorentzian peak shape 1510 1511 Notes 1512 ----- 1513 Needs theoretical resolving power calculation and define peak shape. 1514 This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only. 1515 1516 Parameters 1517 ---------- 1518 exp_mz_centroid : list(float) 1519 list of m/z values 1520 magnitude_centroid : list(float) 1521 list of abundance values 1522 1523 1524 Returns 1525 ------- 1526 x : list(float) 1527 list of m/z values 1528 y : list(float) 1529 list of abundance values 1530 """ 1531 1532 x, y = [], [] 1533 for i in range(len(exp_mz_centroid)): 1534 x.append(exp_mz_centroid[i] - 0.0000001) 1535 x.append(exp_mz_centroid[i]) 1536 x.append(exp_mz_centroid[i] + 0.0000001) 1537 y.append(0) 1538 y.append(magnitude_centroid[i]) 1539 y.append(0) 1540 return x, y 1541 1542 @property 1543 def mz_exp_profile(self): 1544 """Return the m/z profile of the mass spectrum.""" 1545 mz_list = [] 1546 for mz in self.mz_exp: 1547 mz_list.append(mz - 0.0000001) 1548 mz_list.append(mz) 1549 mz_list.append(mz + 0.0000001) 1550 return mz_list 1551 1552 @mz_exp_profile.setter 1553 def mz_exp_profile(self, _mz_exp): 1554 self._mz_exp = _mz_exp 1555 1556 @property 1557 def abundance_profile(self): 1558 """Return the abundance profile of the mass spectrum.""" 1559 ab_list = [] 1560 for ab in self.abundance: 1561 ab_list.append(0) 1562 ab_list.append(ab) 1563 ab_list.append(0) 1564 return ab_list 1565 1566 @abundance_profile.setter 1567 def abundance_profile(self, abundance): 1568 self._abundance = abundance 1569 1570 @property 1571 def tic(self): 1572 """Return the total ion current of the mass spectrum.""" 1573 return sum(self.abundance) 1574 1575 def process_mass_spec(self): 1576 """Process the mass spectrum.""" 1577 import tqdm 1578 1579 # overwrite process_mass_spec 1580 # mspeak objs are usually added inside the PeaKPicking class 1581 # for profile and freq based data 1582 data_dict = self.data_dict 1583 ion_charge = self.polarity 1584 1585 # Check if resolving power is present 1586 rp_present = True 1587 if not data_dict.get(Labels.rp): 1588 rp_present = False 1589 if rp_present and list(data_dict.get(Labels.rp)) == [None] * len( 1590 data_dict.get(Labels.rp) 1591 ): 1592 rp_present = False 1593 1594 # Check if s2n is present 1595 s2n_present = True 1596 if not data_dict.get(Labels.s2n): 1597 s2n_present = False 1598 if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len( 1599 data_dict.get(Labels.s2n) 1600 ): 1601 s2n_present = False 1602 1603 # Warning if no s2n data but noise thresholding is set to signal_noise 1604 if ( 1605 not s2n_present 1606 and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1607 ): 1608 raise Exception("Signal to Noise data is missing for noise thresholding") 1609 1610 # Pull out abundance data 1611 abun = array(data_dict.get(Labels.abundance)).astype(float) 1612 1613 # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding 1614 abundance_threshold, factor = self.get_threshold(abun) 1615 1616 # Set rp_i and s2n_i to None which will be overwritten if present 1617 rp_i, s2n_i = np.nan, np.nan 1618 for index, mz in enumerate(data_dict.get(Labels.mz)): 1619 if rp_present: 1620 if not data_dict.get(Labels.rp)[index]: 1621 rp_i = np.nan 1622 else: 1623 rp_i = float(data_dict.get(Labels.rp)[index]) 1624 if s2n_present: 1625 if not data_dict.get(Labels.s2n)[index]: 1626 s2n_i = np.nan 1627 else: 1628 s2n_i = float(data_dict.get(Labels.s2n)[index]) 1629 1630 # centroid peak does not have start and end peak index pos 1631 massspec_indexes = (index, index, index) 1632 1633 # Add peaks based on the noise thresholding method 1634 if ( 1635 self.parameters.mass_spectrum.noise_threshold_method 1636 in ["minima", "relative_abundance", "absolute_abundance"] 1637 and abun[index] / factor >= abundance_threshold 1638 ): 1639 self.add_mspeak( 1640 ion_charge, 1641 mz, 1642 abun[index], 1643 rp_i, 1644 s2n_i, 1645 massspec_indexes, 1646 ms_parent=self, 1647 ) 1648 if ( 1649 self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1650 and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n 1651 ): 1652 self.add_mspeak( 1653 ion_charge, 1654 mz, 1655 abun[index], 1656 rp_i, 1657 s2n_i, 1658 massspec_indexes, 1659 ms_parent=self, 1660 ) 1661 1662 self.mspeaks = self._mspeaks 1663 self._dynamic_range = self.max_abundance / self.min_abundance 1664 self._set_nominal_masses_start_final_indexes() 1665 1666 if self.label != Labels.thermo_centroid: 1667 if self.settings.noise_threshold_method == "log": 1668 raise Exception("log noise Not tested for centroid data") 1669 # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc() 1670 1671 else: 1672 self._baseline_noise, self._baseline_noise_std = ( 1673 self.run_noise_threshold_calc() 1674 ) 1675 1676 del self.data_dict
A mass spectrum class when the entry point is on centroid format
Notes
- Stores the centroid data and instrument settings
- Simulate profile data based on Gaussian or Lorentzian peak shape
- Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
- _mspeaks is populated under the hood by calling process_mass_spec method
- iteration is null if _mspeaks is empty
Parameters
- data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
- d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
- auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
- label (str): store label (Bruker, Midas Transient, see Labels class)
- _baseline_noise (float): store baseline noise
- _baseline_noise_std (float): store baseline noise std
- _abundance (ndarray): The abundance values of the mass spectrum.
- _mz_exp (ndarray): The m/z values of the mass spectrum.
- _mspeaks (list): A list of mass peaks.
Methods
- process_mass_spec(). Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
- __simulate_profile__data__(). Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
1491 def __init__(self, data_dict, d_params, auto_process=True): 1492 super().__init__([], [], d_params) 1493 1494 self._set_parameters_objects(d_params) 1495 1496 if self.label == Labels.thermo_centroid: 1497 self._baseline_noise = d_params.get("baseline_noise") 1498 self._baseline_noise_std = d_params.get("baseline_noise_std") 1499 1500 self.is_centroid = True 1501 self.data_dict = data_dict 1502 self._mz_exp = data_dict[Labels.mz] 1503 self._abundance = data_dict[Labels.abundance] 1504 1505 if auto_process: 1506 self.process_mass_spec()
1575 def process_mass_spec(self): 1576 """Process the mass spectrum.""" 1577 import tqdm 1578 1579 # overwrite process_mass_spec 1580 # mspeak objs are usually added inside the PeaKPicking class 1581 # for profile and freq based data 1582 data_dict = self.data_dict 1583 ion_charge = self.polarity 1584 1585 # Check if resolving power is present 1586 rp_present = True 1587 if not data_dict.get(Labels.rp): 1588 rp_present = False 1589 if rp_present and list(data_dict.get(Labels.rp)) == [None] * len( 1590 data_dict.get(Labels.rp) 1591 ): 1592 rp_present = False 1593 1594 # Check if s2n is present 1595 s2n_present = True 1596 if not data_dict.get(Labels.s2n): 1597 s2n_present = False 1598 if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len( 1599 data_dict.get(Labels.s2n) 1600 ): 1601 s2n_present = False 1602 1603 # Warning if no s2n data but noise thresholding is set to signal_noise 1604 if ( 1605 not s2n_present 1606 and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1607 ): 1608 raise Exception("Signal to Noise data is missing for noise thresholding") 1609 1610 # Pull out abundance data 1611 abun = array(data_dict.get(Labels.abundance)).astype(float) 1612 1613 # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding 1614 abundance_threshold, factor = self.get_threshold(abun) 1615 1616 # Set rp_i and s2n_i to None which will be overwritten if present 1617 rp_i, s2n_i = np.nan, np.nan 1618 for index, mz in enumerate(data_dict.get(Labels.mz)): 1619 if rp_present: 1620 if not data_dict.get(Labels.rp)[index]: 1621 rp_i = np.nan 1622 else: 1623 rp_i = float(data_dict.get(Labels.rp)[index]) 1624 if s2n_present: 1625 if not data_dict.get(Labels.s2n)[index]: 1626 s2n_i = np.nan 1627 else: 1628 s2n_i = float(data_dict.get(Labels.s2n)[index]) 1629 1630 # centroid peak does not have start and end peak index pos 1631 massspec_indexes = (index, index, index) 1632 1633 # Add peaks based on the noise thresholding method 1634 if ( 1635 self.parameters.mass_spectrum.noise_threshold_method 1636 in ["minima", "relative_abundance", "absolute_abundance"] 1637 and abun[index] / factor >= abundance_threshold 1638 ): 1639 self.add_mspeak( 1640 ion_charge, 1641 mz, 1642 abun[index], 1643 rp_i, 1644 s2n_i, 1645 massspec_indexes, 1646 ms_parent=self, 1647 ) 1648 if ( 1649 self.parameters.mass_spectrum.noise_threshold_method == "signal_noise" 1650 and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n 1651 ): 1652 self.add_mspeak( 1653 ion_charge, 1654 mz, 1655 abun[index], 1656 rp_i, 1657 s2n_i, 1658 massspec_indexes, 1659 ms_parent=self, 1660 ) 1661 1662 self.mspeaks = self._mspeaks 1663 self._dynamic_range = self.max_abundance / self.min_abundance 1664 self._set_nominal_masses_start_final_indexes() 1665 1666 if self.label != Labels.thermo_centroid: 1667 if self.settings.noise_threshold_method == "log": 1668 raise Exception("log noise Not tested for centroid data") 1669 # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc() 1670 1671 else: 1672 self._baseline_noise, self._baseline_noise_std = ( 1673 self.run_noise_threshold_calc() 1674 ) 1675 1676 del self.data_dict
Process the mass spectrum.
Inherited Members
- MassSpecBase
- mspeaks
- is_calibrated
- has_frequency
- calibration_order
- calibration_points
- calibration_ref_mzs
- calibration_meas_mzs
- calibration_RMS
- calibration_segment
- calibration_raw_error_median
- calibration_raw_error_stdev
- set_indexes
- reset_indexes
- add_mspeak
- reset_cal_therms
- clear_molecular_formulas
- cal_noise_threshold
- parameters
- set_parameter_from_json
- set_parameter_from_toml
- mspeaks_settings
- settings
- molecular_search_settings
- mz_cal_profile
- mz_cal
- mz_exp
- freq_exp_profile
- freq_exp_pp
- mz_exp_pp
- abundance_profile_pp
- abundance
- freq_exp
- resolving_power
- signal_to_noise
- nominal_mz
- get_mz_and_abundance_peaks_tuples
- kmd
- kendrick_mass
- max_mz_exp
- min_mz_exp
- max_abundance
- max_signal_to_noise
- most_abundant_mspeak
- min_abundance
- dynamic_range
- baseline_noise
- baseline_noise_std
- Aterm
- Bterm
- Cterm
- filename
- dir_location
- sort_by_mz
- sort_by_abundance
- check_mspeaks_warning
- check_mspeaks
- remove_assignment_by_index
- filter_by_index
- filter_by_mz
- filter_by_s2n
- filter_by_abundance
- filter_by_max_resolving_power
- filter_by_mean_resolving_power
- filter_by_min_resolving_power
- filter_by_noise_threshold
- find_peaks
- change_kendrick_base_all_mspeaks
- get_nominal_mz_first_last_indexes
- get_masses_count_by_nominal_mass
- datapoints_count_by_nominal_mz
- get_nominal_mass_indexes
- plot_centroid
- plot_profile_and_noise_threshold
- plot_mz_domain_profile
- to_excel
- to_hdf
- to_csv
- to_pandas
- to_dataframe
- to_json
- parameters_json
- parameters_toml
- corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
- percentile_assigned
- resolving_power_calc
- number_average_molecular_weight
- weight_average_molecular_weight
- corems.mass_spectrum.calc.PeakPicking.PeakPicking
- prepare_peak_picking_data
- cut_mz_domain_peak_picking
- legacy_cut_mz_domain_peak_picking
- extrapolate_axis
- extrapolate_axes_for_pp
- do_peak_picking
- find_minima
- linear_fit_calc
- calculate_resolving_power
- cal_minima
- calc_centroid
- get_threshold
- algebraic_quadratic
- find_apex_fit_quadratic
- check_prominence
- use_the_max
- calc_centroid_legacy
1679class MassSpecCentroidLowRes(MassSpecCentroid): 1680 """A mass spectrum class when the entry point is on low resolution centroid format 1681 1682 Notes 1683 ----- 1684 Does not store MSPeak Objs, will iterate over mz, abundance pairs instead 1685 1686 Parameters 1687 ---------- 1688 data_dict : dict {string: numpy array float64 ) 1689 contains keys [m/z, Abundance, Resolving Power, S/N] 1690 d_params : dict{'str': float, int or str} 1691 contains the instrument settings and processing settings 1692 1693 Attributes 1694 ---------- 1695 _processed_tic : float 1696 store processed total ion current 1697 _abundance : ndarray 1698 The abundance values of the mass spectrum. 1699 _mz_exp : ndarray 1700 The m/z values of the mass spectrum. 1701 """ 1702 1703 def __init__(self, data_dict, d_params): 1704 self._set_parameters_objects(d_params) 1705 self._mz_exp = array(data_dict.get(Labels.mz)) 1706 self._abundance = array(data_dict.get(Labels.abundance)) 1707 self._processed_tic = None 1708 1709 def __len__(self): 1710 return len(self.mz_exp) 1711 1712 def __getitem__(self, position): 1713 return (self.mz_exp[position], self.abundance[position]) 1714 1715 @property 1716 def mz_exp(self): 1717 """Return the m/z values of the mass spectrum.""" 1718 return self._mz_exp 1719 1720 @property 1721 def abundance(self): 1722 """Return the abundance values of the mass spectrum.""" 1723 return self._abundance 1724 1725 @property 1726 def processed_tic(self): 1727 """Return the processed total ion current of the mass spectrum.""" 1728 return sum(self._processed_tic) 1729 1730 @property 1731 def tic(self): 1732 """Return the total ion current of the mass spectrum.""" 1733 if self._processed_tic: 1734 return self._processed_tic 1735 else: 1736 return sum(self.abundance) 1737 1738 @property 1739 def mz_abun_tuples(self): 1740 """Return the m/z and abundance values of the mass spectrum as a list of tuples.""" 1741 r = lambda x: (int(round(x[0], 0), int(round(x[1], 0)))) 1742 1743 return [r(i) for i in self] 1744 1745 @property 1746 def mz_abun_dict(self): 1747 """Return the m/z and abundance values of the mass spectrum as a dictionary.""" 1748 r = lambda x: int(round(x, 0)) 1749 1750 return {r(i[0]): r(i[1]) for i in self}
A mass spectrum class when the entry point is on low resolution centroid format
Notes
Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
Parameters
- data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
- d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
Attributes
- _processed_tic (float): store processed total ion current
- _abundance (ndarray): The abundance values of the mass spectrum.
- _mz_exp (ndarray): The m/z values of the mass spectrum.
Inherited Members
- MassSpecBase
- mspeaks
- is_calibrated
- has_frequency
- calibration_order
- calibration_points
- calibration_ref_mzs
- calibration_meas_mzs
- calibration_RMS
- calibration_segment
- calibration_raw_error_median
- calibration_raw_error_stdev
- set_indexes
- reset_indexes
- add_mspeak
- reset_cal_therms
- clear_molecular_formulas
- cal_noise_threshold
- parameters
- set_parameter_from_json
- set_parameter_from_toml
- mspeaks_settings
- settings
- molecular_search_settings
- mz_cal_profile
- mz_cal
- freq_exp_profile
- freq_exp_pp
- mz_exp_pp
- abundance_profile_pp
- freq_exp
- resolving_power
- signal_to_noise
- nominal_mz
- get_mz_and_abundance_peaks_tuples
- kmd
- kendrick_mass
- max_mz_exp
- min_mz_exp
- max_abundance
- max_signal_to_noise
- most_abundant_mspeak
- min_abundance
- dynamic_range
- baseline_noise
- baseline_noise_std
- Aterm
- Bterm
- Cterm
- filename
- dir_location
- sort_by_mz
- sort_by_abundance
- check_mspeaks_warning
- check_mspeaks
- remove_assignment_by_index
- filter_by_index
- filter_by_mz
- filter_by_s2n
- filter_by_abundance
- filter_by_max_resolving_power
- filter_by_mean_resolving_power
- filter_by_min_resolving_power
- filter_by_noise_threshold
- find_peaks
- change_kendrick_base_all_mspeaks
- get_nominal_mz_first_last_indexes
- get_masses_count_by_nominal_mass
- datapoints_count_by_nominal_mz
- get_nominal_mass_indexes
- plot_centroid
- plot_profile_and_noise_threshold
- plot_mz_domain_profile
- to_excel
- to_hdf
- to_csv
- to_pandas
- to_dataframe
- to_json
- parameters_json
- parameters_toml
- corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
- percentile_assigned
- resolving_power_calc
- number_average_molecular_weight
- weight_average_molecular_weight
- corems.mass_spectrum.calc.PeakPicking.PeakPicking
- prepare_peak_picking_data
- cut_mz_domain_peak_picking
- legacy_cut_mz_domain_peak_picking
- extrapolate_axis
- extrapolate_axes_for_pp
- do_peak_picking
- find_minima
- linear_fit_calc
- calculate_resolving_power
- cal_minima
- calc_centroid
- get_threshold
- algebraic_quadratic
- find_apex_fit_quadratic
- check_prominence
- use_the_max
- calc_centroid_legacy