corems.mass_spectrum.factory.MassSpectrumClasses

   1from pathlib import Path
   2
   3import numpy as np
   4from lmfit.models import GaussianModel
   5
   6# from matplotlib import rcParamsDefault, rcParams
   7from numpy import array, float64, histogram, trapz, where
   8from pandas import DataFrame
   9
  10from corems.encapsulation.constant import Labels
  11from corems.encapsulation.factory.parameters import MSParameters
  12from corems.encapsulation.input.parameter_from_json import (
  13    load_and_set_parameters_ms,
  14    load_and_set_toml_parameters_ms,
  15)
  16from corems.mass_spectrum.calc.KendrickGroup import KendrickGrouping
  17from corems.mass_spectrum.calc.MassSpectrumCalc import MassSpecCalc
  18from corems.mass_spectrum.calc.MeanResolvingPowerFilter import MeanResolvingPowerFilter
  19from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak as MSPeak
  20
  21__author__ = "Yuri E. Corilo"
  22__date__ = "Jun 12, 2019"
  23
  24
  25def overrides(interface_class):
  26    """Checks if the method overrides a method from an interface class."""
  27
  28    def overrider(method):
  29        assert method.__name__ in dir(interface_class)
  30        return method
  31
  32    return overrider
  33
  34
  35class MassSpecBase(MassSpecCalc, KendrickGrouping):
  36    """A mass spectrum base class, stores the profile data and instrument settings.
  37
  38    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
  39    _mspeaks is populated under the hood by calling process_mass_spec method.
  40    Iteration is null if _mspeaks is empty.
  41
  42    Parameters
  43    ----------
  44    mz_exp : array_like
  45        The m/z values of the mass spectrum.
  46    abundance : array_like
  47        The abundance values of the mass spectrum.
  48    d_params : dict
  49        A dictionary of parameters for the mass spectrum.
  50    **kwargs
  51        Additional keyword arguments.
  52
  53    Attributes
  54    ----------
  55
  56    mspeaks : list
  57        A list of mass peaks.
  58    is_calibrated : bool
  59        Whether the mass spectrum is calibrated.
  60    is_centroid : bool
  61        Whether the mass spectrum is centroided.
  62    has_frequency : bool
  63        Whether the mass spectrum has a frequency domain.
  64    calibration_order : None or int
  65        The order of the mass spectrum's calibration.
  66    calibration_points : None or ndarray
  67        The calibration points of the mass spectrum.
  68    calibration_ref_mzs: None or ndarray
  69        The reference m/z values of the mass spectrum's calibration.
  70    calibration_meas_mzs : None or ndarray
  71        The measured m/z values of the mass spectrum's calibration.
  72    calibration_RMS : None or float
  73        The root mean square of the mass spectrum's calibration.
  74    calibration_segment : None or CalibrationSegment
  75        The calibration segment of the mass spectrum.
  76    _abundance : ndarray
  77        The abundance values of the mass spectrum.
  78    _mz_exp : ndarray
  79        The m/z values of the mass spectrum.
  80    _mspeaks : list
  81        A list of mass peaks.
  82    _dict_nominal_masses_indexes : dict
  83        A dictionary of nominal masses and their indexes.
  84    _baseline_noise : float
  85        The baseline noise of the mass spectrum.
  86    _baseline_noise_std : float
  87        The standard deviation of the baseline noise of the mass spectrum.
  88    _dynamic_range : float or None
  89        The dynamic range of the mass spectrum.
  90    _transient_settings : None or TransientSettings
  91        The transient settings of the mass spectrum.
  92    _frequency_domain : None or FrequencyDomain
  93        The frequency domain of the mass spectrum.
  94    _mz_cal_profile : None or MzCalibrationProfile
  95        The m/z calibration profile of the mass spectrum.
  96
  97    Methods
  98    -------
  99    * process_mass_spec(). Main function to process the mass spectrum,
 100    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
 101
 102    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
 103    """
 104
 105    def __init__(self, mz_exp, abundance, d_params, **kwargs):
 106        self._abundance = array(abundance, dtype=float64)
 107        self._mz_exp = array(mz_exp, dtype=float64)
 108
 109        # objects created after process_mass_spec() function
 110        self._mspeaks = list()
 111        self.mspeaks = list()
 112        self._dict_nominal_masses_indexes = dict()
 113        self._baseline_noise = 0.001
 114        self._baseline_noise_std = 0.001
 115        self._dynamic_range = None
 116        # set to None: initialization occurs inside subclass MassSpecfromFreq
 117        self._transient_settings = None
 118        self._frequency_domain = None
 119        self._mz_cal_profile = None
 120        self.is_calibrated = False
 121
 122        self._set_parameters_objects(d_params)
 123        self._init_settings()
 124
 125        self.is_centroid = False
 126        self.has_frequency = False
 127
 128        self.calibration_order = None
 129        self.calibration_points = None
 130        self.calibration_ref_mzs = None
 131        self.calibration_meas_mzs = None
 132        self.calibration_RMS = None
 133        self.calibration_segment = None
 134        self.calibration_raw_error_median = None
 135        self.calibration_raw_error_stdev = None
 136
 137    def _init_settings(self):
 138        """Initializes the settings for the mass spectrum."""
 139        self._parameters = MSParameters()
 140
 141    def __len__(self):
 142        return len(self.mspeaks)
 143
 144    def __getitem__(self, position) -> MSPeak:
 145        return self.mspeaks[position]
 146
 147    def set_indexes(self, list_indexes):
 148        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
 149
 150        Parameters
 151        ----------
 152        list_indexes : list of int
 153            A list of integers representing the indexes of the MSpeaks to iterate over.
 154
 155        """
 156        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
 157
 158        for i, mspeak in enumerate(self.mspeaks):
 159            mspeak.index = i
 160
 161        self._set_nominal_masses_start_final_indexes()
 162
 163    def reset_indexes(self):
 164        """Reset the mass spectrum to iterate over all MSpeaks objects.
 165
 166        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
 167        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
 168
 169        """
 170        self.mspeaks = self._mspeaks
 171
 172        for i, mspeak in enumerate(self.mspeaks):
 173            mspeak.index = i
 174
 175        self._set_nominal_masses_start_final_indexes()
 176
 177    def add_mspeak(
 178        self,
 179        ion_charge,
 180        mz_exp,
 181        abundance,
 182        resolving_power,
 183        signal_to_noise,
 184        massspec_indexes,
 185        exp_freq=None,
 186        ms_parent=None,
 187    ):
 188        """Add a new MSPeak object to the MassSpectrum object.
 189
 190        Parameters
 191        ----------
 192        ion_charge : int
 193            The ion charge of the MSPeak.
 194        mz_exp : float
 195            The experimental m/z value of the MSPeak.
 196        abundance : float
 197            The abundance of the MSPeak.
 198        resolving_power : float
 199            The resolving power of the MSPeak.
 200        signal_to_noise : float
 201            The signal-to-noise ratio of the MSPeak.
 202        massspec_indexes : list
 203            A list of indexes of the MSPeak in the MassSpectrum object.
 204        exp_freq : float, optional
 205            The experimental frequency of the MSPeak. Defaults to None.
 206        ms_parent : MSParent, optional
 207            The MSParent object associated with the MSPeak. Defaults to None.
 208        """
 209        mspeak = MSPeak(
 210            ion_charge,
 211            mz_exp,
 212            abundance,
 213            resolving_power,
 214            signal_to_noise,
 215            massspec_indexes,
 216            len(self._mspeaks),
 217            exp_freq=exp_freq,
 218            ms_parent=ms_parent,
 219        )
 220
 221        self._mspeaks.append(mspeak)
 222
 223    def _set_parameters_objects(self, d_params):
 224        """Set the parameters of the MassSpectrum object.
 225
 226        Parameters
 227        ----------
 228        d_params : dict
 229            A dictionary containing the parameters to set.
 230
 231        Notes
 232        -----
 233        This method sets the following parameters of the MassSpectrum object:
 234        - _calibration_terms
 235        - label
 236        - analyzer
 237        - acquisition_time
 238        - instrument_label
 239        - polarity
 240        - scan_number
 241        - retention_time
 242        - mobility_rt
 243        - mobility_scan
 244        - _filename
 245        - _dir_location
 246        - _baseline_noise
 247        - _baseline_noise_std
 248        - sample_name
 249        """
 250        self._calibration_terms = (
 251            d_params.get("Aterm"),
 252            d_params.get("Bterm"),
 253            d_params.get("Cterm"),
 254        )
 255
 256        self.label = d_params.get(Labels.label)
 257
 258        self.analyzer = d_params.get("analyzer")
 259
 260        self.acquisition_time = d_params.get("acquisition_time")
 261
 262        self.instrument_label = d_params.get("instrument_label")
 263
 264        self.polarity = int(d_params.get("polarity"))
 265
 266        self.scan_number = d_params.get("scan_number")
 267
 268        self.retention_time = d_params.get("rt")
 269
 270        self.mobility_rt = d_params.get("mobility_rt")
 271
 272        self.mobility_scan = d_params.get("mobility_scan")
 273
 274        self._filename = d_params.get("filename_path")
 275
 276        self._dir_location = d_params.get("dir_location")
 277
 278        self._baseline_noise = d_params.get("baseline_noise")
 279
 280        self._baseline_noise_std = d_params.get("baseline_noise_std")
 281
 282        if d_params.get("sample_name") != "Unknown":
 283            self.sample_name = d_params.get("sample_name")
 284            if not self.sample_name:
 285                self.sample_name = self.filename.stem
 286        else:
 287            self.sample_name = self.filename.stem
 288
 289    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
 290        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
 291
 292        Parameters
 293        ----------
 294        Aterm : float
 295            The A-term calibration coefficient.
 296        Bterm : float
 297            The B-term calibration coefficient.
 298        C : float
 299            The C-term calibration coefficient.
 300        fas : float, optional
 301            The frequency amplitude scaling factor. Default is 0.
 302        """
 303        self._calibration_terms = (Aterm, Bterm, C)
 304
 305        self._mz_exp = self._f_to_mz()
 306        self._abundance = self._abundance
 307        self.find_peaks()
 308        self.reset_indexes()
 309
 310    def clear_molecular_formulas(self):
 311        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
 312
 313        Returns
 314        -------
 315        numpy.ndarray
 316            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
 317        """
 318        self.check_mspeaks()
 319        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
 320
 321    def process_mass_spec(self, keep_profile=True):
 322        """Process the mass spectrum.
 323
 324        Parameters
 325        ----------
 326        keep_profile : bool, optional
 327            Whether to keep the profile data after processing. Defaults to True.
 328
 329        Notes
 330        -----
 331        This method does the following:
 332        - calculates the noise threshold
 333        - does peak picking (creates mspeak_objs)
 334        - resets the mspeak_obj indexes
 335        """
 336
 337        # if runned mannually make sure to rerun filter_by_noise_threshold
 338        # calculates noise threshold
 339        # do peak picking( create mspeak_objs)
 340        # reset mspeak_obj the indexes
 341
 342        self.cal_noise_threshold()
 343
 344        self.find_peaks()
 345        self.reset_indexes()
 346
 347        if self.mspeaks:
 348            self._dynamic_range = self.max_abundance / self.min_abundance
 349        else:
 350            self._dynamic_range = 0
 351        if not keep_profile:
 352            self._abundance *= 0
 353            self._mz_exp *= 0
 354
 355    def cal_noise_threshold(self):
 356        """Calculate the noise threshold of the mass spectrum."""
 357
 358        if self.label == Labels.simulated_profile:
 359            self._baseline_noise, self._baseline_noise_std = 0.1, 1
 360
 361        if self.settings.noise_threshold_method == "log":
 362            self._baseline_noise, self._baseline_noise_std = (
 363                self.run_log_noise_threshold_calc()
 364            )
 365
 366        else:
 367            self._baseline_noise, self._baseline_noise_std = (
 368                self.run_noise_threshold_calc()
 369            )
 370
 371    @property
 372    def parameters(self):
 373        """Return the parameters of the mass spectrum."""
 374        return self._parameters
 375
 376    @parameters.setter
 377    def parameters(self, instance_MSParameters):
 378        self._parameters = instance_MSParameters
 379
 380    def set_parameter_from_json(self, parameters_path):
 381        """Set the parameters of the mass spectrum from a JSON file.
 382
 383        Parameters
 384        ----------
 385        parameters_path : str
 386            The path to the JSON file containing the parameters.
 387        """
 388        load_and_set_parameters_ms(self, parameters_path=parameters_path)
 389
 390    def set_parameter_from_toml(self, parameters_path):
 391        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
 392
 393    @property
 394    def mspeaks_settings(self):
 395        """Return the MS peak settings of the mass spectrum."""
 396        return self.parameters.ms_peak
 397
 398    @mspeaks_settings.setter
 399    def mspeaks_settings(self, instance_MassSpecPeakSetting):
 400        self.parameters.ms_peak = instance_MassSpecPeakSetting
 401
 402    @property
 403    def settings(self):
 404        """Return the settings of the mass spectrum."""
 405        return self.parameters.mass_spectrum
 406
 407    @settings.setter
 408    def settings(self, instance_MassSpectrumSetting):
 409        self.parameters.mass_spectrum = instance_MassSpectrumSetting
 410
 411    @property
 412    def molecular_search_settings(self):
 413        """Return the molecular search settings of the mass spectrum."""
 414        return self.parameters.molecular_search
 415
 416    @molecular_search_settings.setter
 417    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
 418        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
 419
 420    @property
 421    def mz_cal_profile(self):
 422        """Return the calibrated m/z profile of the mass spectrum."""
 423        return self._mz_cal_profile
 424
 425    @mz_cal_profile.setter
 426    def mz_cal_profile(self, mz_cal_list):
 427        if len(mz_cal_list) == len(self._mz_exp):
 428            self._mz_cal_profile = mz_cal_list
 429        else:
 430            raise Exception(
 431                "calibrated array (%i) is not of the same size of the data (%i)"
 432                % (len(mz_cal_list), len(self.mz_exp_profile))
 433            )
 434
 435    @property
 436    def mz_cal(self):
 437        """Return the calibrated m/z values of the mass spectrum."""
 438        return array([mspeak.mz_cal for mspeak in self.mspeaks])
 439
 440    @mz_cal.setter
 441    def mz_cal(self, mz_cal_list):
 442        if len(mz_cal_list) == len(self.mspeaks):
 443            self.is_calibrated = True
 444            for index, mz_cal in enumerate(mz_cal_list):
 445                self.mspeaks[index].mz_cal = mz_cal
 446        else:
 447            raise Exception(
 448                "calibrated array (%i) is not of the same size of the data (%i)"
 449                % (len(mz_cal_list), len(self._mspeaks))
 450            )
 451
 452    @property
 453    def mz_exp(self):
 454        """Return the experimental m/z values of the mass spectrum."""
 455        self.check_mspeaks()
 456
 457        if self.is_calibrated:
 458            return array([mspeak.mz_cal for mspeak in self.mspeaks])
 459
 460        else:
 461            return array([mspeak.mz_exp for mspeak in self.mspeaks])
 462
 463    @property
 464    def freq_exp_profile(self):
 465        """Return the experimental frequency profile of the mass spectrum."""
 466        return self._frequency_domain
 467
 468    @freq_exp_profile.setter
 469    def freq_exp_profile(self, new_data):
 470        self._frequency_domain = array(new_data)
 471
 472    @property
 473    def freq_exp_pp(self):
 474        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
 475        _, _, freq = self.prepare_peak_picking_data()
 476        return freq
 477
 478    @property
 479    def mz_exp_profile(self):
 480        """Return the experimental m/z profile of the mass spectrum."""
 481        if self.is_calibrated:
 482            return self.mz_cal_profile
 483        else:
 484            return self._mz_exp
 485
 486    @mz_exp_profile.setter
 487    def mz_exp_profile(self, new_data):
 488        self._mz_exp = array(new_data)
 489
 490    @property
 491    def mz_exp_pp(self):
 492        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
 493        mz, _, _ = self.prepare_peak_picking_data()
 494        return mz
 495
 496    @property
 497    def abundance_profile(self):
 498        """Return the abundance profile of the mass spectrum."""
 499        return self._abundance
 500
 501    @abundance_profile.setter
 502    def abundance_profile(self, new_data):
 503        self._abundance = array(new_data)
 504
 505    @property
 506    def abundance_profile_pp(self):
 507        """Return the abundance profile of the mass spectrum that is used for peak picking."""
 508        _, abundance, _ = self.prepare_peak_picking_data()
 509        return abundance
 510
 511    @property
 512    def abundance(self):
 513        """Return the abundance values of the mass spectrum."""
 514        self.check_mspeaks()
 515        return array([mspeak.abundance for mspeak in self.mspeaks])
 516
 517    def freq_exp(self):
 518        """Return the experimental frequency values of the mass spectrum."""
 519        self.check_mspeaks()
 520        return array([mspeak.freq_exp for mspeak in self.mspeaks])
 521
 522    @property
 523    def resolving_power(self):
 524        """Return the resolving power values of the mass spectrum."""
 525        self.check_mspeaks()
 526        return array([mspeak.resolving_power for mspeak in self.mspeaks])
 527
 528    @property
 529    def signal_to_noise(self):
 530        self.check_mspeaks()
 531        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
 532
 533    @property
 534    def nominal_mz(self):
 535        """Return the nominal m/z values of the mass spectrum."""
 536        if self._dict_nominal_masses_indexes:
 537            return sorted(list(self._dict_nominal_masses_indexes.keys()))
 538        else:
 539            raise ValueError("Nominal indexes not yet set")
 540
 541    def get_mz_and_abundance_peaks_tuples(self):
 542        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
 543        self.check_mspeaks()
 544        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
 545
 546    @property
 547    def kmd(self):
 548        """Return the Kendrick mass defect values of the mass spectrum."""
 549        self.check_mspeaks()
 550        return array([mspeak.kmd for mspeak in self.mspeaks])
 551
 552    @property
 553    def kendrick_mass(self):
 554        """Return the Kendrick mass values of the mass spectrum."""
 555        self.check_mspeaks()
 556        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
 557
 558    @property
 559    def max_mz_exp(self):
 560        """Return the maximum experimental m/z value of the mass spectrum."""
 561        return max([mspeak.mz_exp for mspeak in self.mspeaks])
 562
 563    @property
 564    def min_mz_exp(self):
 565        """Return the minimum experimental m/z value of the mass spectrum."""
 566        return min([mspeak.mz_exp for mspeak in self.mspeaks])
 567
 568    @property
 569    def max_abundance(self):
 570        """Return the maximum abundance value of the mass spectrum."""
 571        return max([mspeak.abundance for mspeak in self.mspeaks])
 572
 573    @property
 574    def max_signal_to_noise(self):
 575        """Return the maximum signal-to-noise ratio of the mass spectrum."""
 576        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
 577
 578    @property
 579    def most_abundant_mspeak(self):
 580        """Return the most abundant MSpeak object of the mass spectrum."""
 581        return max(self.mspeaks, key=lambda m: m.abundance)
 582
 583    @property
 584    def min_abundance(self):
 585        """Return the minimum abundance value of the mass spectrum."""
 586        return min([mspeak.abundance for mspeak in self.mspeaks])
 587
 588    # takes too much cpu time
 589    @property
 590    def dynamic_range(self):
 591        """Return the dynamic range of the mass spectrum."""
 592        return self._dynamic_range
 593
 594    @property
 595    def baseline_noise(self):
 596        """Return the baseline noise of the mass spectrum."""
 597        if self._baseline_noise:
 598            return self._baseline_noise
 599        else:
 600            return None
 601
 602    @property
 603    def baseline_noise_std(self):
 604        """Return the standard deviation of the baseline noise of the mass spectrum."""
 605        if self._baseline_noise_std == 0:
 606            return self._baseline_noise_std
 607        if self._baseline_noise_std:
 608            return self._baseline_noise_std
 609        else:
 610            return None
 611
 612    @property
 613    def Aterm(self):
 614        """Return the A-term calibration coefficient of the mass spectrum."""
 615        return self._calibration_terms[0]
 616
 617    @property
 618    def Bterm(self):
 619        """Return the B-term calibration coefficient of the mass spectrum."""
 620        return self._calibration_terms[1]
 621
 622    @property
 623    def Cterm(self):
 624        """Return the C-term calibration coefficient of the mass spectrum."""
 625        return self._calibration_terms[2]
 626
 627    @property
 628    def filename(self):
 629        """Return the filename of the mass spectrum."""
 630        return Path(self._filename)
 631
 632    @property
 633    def dir_location(self):
 634        """Return the directory location of the mass spectrum."""
 635        return self._dir_location
 636
 637    def sort_by_mz(self):
 638        """Sort the mass spectrum by m/z values."""
 639        return sorted(self, key=lambda m: m.mz_exp)
 640
 641    def sort_by_abundance(self, reverse=False):
 642        """Sort the mass spectrum by abundance values."""
 643        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
 644
 645    @property
 646    def tic(self):
 647        """Return the total ion current of the mass spectrum."""
 648        return trapz(self.abundance_profile, self.mz_exp_profile)
 649
 650    def check_mspeaks_warning(self):
 651        """Check if the mass spectrum has MSpeaks objects.
 652
 653        Raises
 654        ------
 655        Warning
 656            If the mass spectrum has no MSpeaks objects.
 657        """
 658        import warnings
 659
 660        if self.mspeaks:
 661            pass
 662        else:
 663            warnings.warn("mspeaks list is empty, continuing without filtering data")
 664
 665    def check_mspeaks(self):
 666        """Check if the mass spectrum has MSpeaks objects.
 667
 668        Raises
 669        ------
 670        Exception
 671            If the mass spectrum has no MSpeaks objects.
 672        """
 673        if self.mspeaks:
 674            pass
 675        else:
 676            raise Exception(
 677                "mspeaks list is empty, please run process_mass_spec() first"
 678            )
 679
 680    def remove_assignment_by_index(self, indexes):
 681        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
 682
 683        Parameters
 684        ----------
 685        indexes : list of int
 686            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
 687        """
 688        for i in indexes:
 689            self.mspeaks[i].clear_molecular_formulas()
 690
 691    def filter_by_index(self, list_indexes):
 692        """Filter the mass spectrum by the specified indexes.
 693
 694        Parameters
 695        ----------
 696        list_indexes : list of int
 697            A list of indexes of the MSpeaks objects to drop.
 698
 699        """
 700
 701        self.mspeaks = [
 702            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
 703        ]
 704
 705        for i, mspeak in enumerate(self.mspeaks):
 706            mspeak.index = i
 707
 708        self._set_nominal_masses_start_final_indexes()
 709
 710    def filter_by_mz(self, min_mz, max_mz):
 711        """Filter the mass spectrum by the specified m/z range.
 712
 713        Parameters
 714        ----------
 715        min_mz : float
 716            The minimum m/z value to keep.
 717        max_mz : float
 718            The maximum m/z value to keep.
 719
 720        """
 721        self.check_mspeaks_warning()
 722        indexes = [
 723            index
 724            for index, mspeak in enumerate(self.mspeaks)
 725            if not min_mz <= mspeak.mz_exp <= max_mz
 726        ]
 727        self.filter_by_index(indexes)
 728
 729    def filter_by_s2n(self, min_s2n, max_s2n=False):
 730        """Filter the mass spectrum by the specified signal-to-noise ratio range.
 731
 732        Parameters
 733        ----------
 734        min_s2n : float
 735            The minimum signal-to-noise ratio to keep.
 736        max_s2n : float, optional
 737            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
 738
 739        """
 740        self.check_mspeaks_warning()
 741        if max_s2n:
 742            indexes = [
 743                index
 744                for index, mspeak in enumerate(self.mspeaks)
 745                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
 746            ]
 747        else:
 748            indexes = [
 749                index
 750                for index, mspeak in enumerate(self.mspeaks)
 751                if mspeak.signal_to_noise <= min_s2n
 752            ]
 753        self.filter_by_index(indexes)
 754
 755    def filter_by_abundance(self, min_abund, max_abund=False):
 756        """Filter the mass spectrum by the specified abundance range.
 757
 758        Parameters
 759        ----------
 760        min_abund : float
 761            The minimum abundance to keep.
 762        max_abund : float, optional
 763            The maximum abundance to keep. Defaults to False (no maximum).
 764
 765        """
 766        self.check_mspeaks_warning()
 767        if max_abund:
 768            indexes = [
 769                index
 770                for index, mspeak in enumerate(self.mspeaks)
 771                if not min_abund <= mspeak.abundance <= max_abund
 772            ]
 773        else:
 774            indexes = [
 775                index
 776                for index, mspeak in enumerate(self.mspeaks)
 777                if mspeak.abundance <= min_abund
 778            ]
 779        self.filter_by_index(indexes)
 780
 781    def filter_by_max_resolving_power(self, B, T):
 782        """Filter the mass spectrum by the specified maximum resolving power.
 783
 784        Parameters
 785        ----------
 786        B : float
 787        T : float
 788
 789        """
 790
 791        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 792
 793        self.check_mspeaks_warning()
 794
 795        indexes_to_remove = [
 796            index
 797            for index, mspeak in enumerate(self.mspeaks)
 798            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
 799        ]
 800        self.filter_by_index(indexes_to_remove)
 801
 802    def filter_by_mean_resolving_power(
 803        self, ndeviations=3, plot=False, guess_pars=False
 804    ):
 805        """Filter the mass spectrum by the specified mean resolving power.
 806
 807        Parameters
 808        ----------
 809        ndeviations : float, optional
 810            The number of standard deviations to use for filtering. Defaults to 3.
 811        plot : bool, optional
 812            Whether to plot the resolving power distribution. Defaults to False.
 813        guess_pars : bool, optional
 814            Whether to guess the parameters for the Gaussian model. Defaults to False.
 815
 816        """
 817        self.check_mspeaks_warning()
 818        indexes_to_remove = MeanResolvingPowerFilter(
 819            self, ndeviations, plot, guess_pars
 820        ).main()
 821        self.filter_by_index(indexes_to_remove)
 822
 823    def filter_by_min_resolving_power(self, B, T):
 824        """Filter the mass spectrum by the specified minimum resolving power.
 825
 826        Parameters
 827        ----------
 828        B : float
 829        T : float
 830
 831        """
 832        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 833
 834        self.check_mspeaks_warning()
 835
 836        indexes_to_remove = [
 837            index
 838            for index, mspeak in enumerate(self.mspeaks)
 839            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
 840        ]
 841        self.filter_by_index(indexes_to_remove)
 842
 843    def filter_by_noise_threshold(self):
 844        """Filter the mass spectrum by the noise threshold."""
 845
 846        threshold = self.get_noise_threshold()[1][0]
 847
 848        self.check_mspeaks_warning()
 849
 850        indexes_to_remove = [
 851            index
 852            for index, mspeak in enumerate(self.mspeaks)
 853            if mspeak.abundance <= threshold
 854        ]
 855        self.filter_by_index(indexes_to_remove)
 856
 857    def find_peaks(self):
 858        """Find the peaks of the mass spectrum."""
 859        # needs to clear previous results from peak_picking
 860        self._mspeaks = list()
 861
 862        # then do peak picking
 863        self.do_peak_picking()
 864        # print("A total of %i peaks were found" % len(self._mspeaks))
 865
 866    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
 867        """Change the Kendrick base of all MSpeaks objects.
 868
 869        Parameters
 870        ----------
 871        kendrick_dict_base : dict
 872            A dictionary of the Kendrick base to change to.
 873
 874        Notes
 875        -----
 876        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
 877        """
 878        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
 879
 880        for mspeak in self.mspeaks:
 881            mspeak.change_kendrick_base(kendrick_dict_base)
 882
 883    def get_nominal_mz_first_last_indexes(self, nominal_mass):
 884        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
 885
 886        Parameters
 887        ----------
 888        nominal_mass : int
 889            The nominal mass to get the indexes for.
 890
 891        Returns
 892        -------
 893        tuple
 894            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
 895        """
 896        if self._dict_nominal_masses_indexes:
 897            if nominal_mass in self._dict_nominal_masses_indexes.keys():
 898                return (
 899                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
 900                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
 901                )
 902
 903            else:
 904                # import warnings
 905                # uncomment warn to distribution
 906                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
 907                return (0, 0)
 908        else:
 909            raise Exception(
 910                "run process_mass_spec() function before trying to access the data"
 911            )
 912
 913    def get_masses_count_by_nominal_mass(self):
 914        """Return a dictionary of the nominal masses and their counts."""
 915
 916        dict_nominal_masses_count = {}
 917
 918        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 919
 920        for nominal_mass in all_nominal_masses:
 921            if nominal_mass not in dict_nominal_masses_count:
 922                dict_nominal_masses_count[nominal_mass] = len(
 923                    list(self.get_nominal_mass_indexes(nominal_mass))
 924                )
 925
 926        return dict_nominal_masses_count
 927
 928    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
 929        """Return a dictionary of the nominal masses and their counts.
 930
 931        Parameters
 932        ----------
 933        mz_overlay : float, optional
 934            The m/z overlay to use for counting. Defaults to 0.1.
 935
 936        Returns
 937        -------
 938        dict
 939            A dictionary of the nominal masses and their counts.
 940        """
 941        dict_nominal_masses_count = {}
 942
 943        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 944
 945        for nominal_mass in all_nominal_masses:
 946            if nominal_mass not in dict_nominal_masses_count:
 947                min_mz = nominal_mass - mz_overlay
 948
 949                max_mz = nominal_mass + 1 + mz_overlay
 950
 951                indexes = indexes = where(
 952                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
 953                )
 954
 955                dict_nominal_masses_count[nominal_mass] = indexes[0].size
 956
 957        return dict_nominal_masses_count
 958
 959    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
 960        """Return the indexes of the MSpeaks objects with the specified nominal mass.
 961
 962        Parameters
 963        ----------
 964        nominal_mass : int
 965            The nominal mass to get the indexes for.
 966        overlay : float, optional
 967            The m/z overlay to use for counting. Defaults to 0.1.
 968
 969        Returns
 970        -------
 971        generator
 972            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
 973        """
 974        min_mz_to_look = nominal_mass - overlay
 975        max_mz_to_look = nominal_mass + 1 + overlay
 976
 977        return (
 978            i
 979            for i in range(len(self.mspeaks))
 980            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
 981        )
 982
 983        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
 984        # return indexes
 985
 986    def _set_nominal_masses_start_final_indexes(self):
 987        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
 988        dict_nominal_masses_indexes = {}
 989
 990        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
 991
 992        for nominal_mass in all_nominal_masses:
 993            # indexes = self.get_nominal_mass_indexes(nominal_mass)
 994            # Convert the iterator to a list to avoid multiple calls
 995            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
 996
 997            # If the list is not empty, find the first and last; otherwise, set None
 998            if indexes:
 999                first, last = indexes[0], indexes[-1]
1000            else:
1001                first = last = None
1002            # defaultvalue = None
1003            # first = last = next(indexes, defaultvalue)
1004            # for last in indexes:
1005            #    pass
1006
1007            dict_nominal_masses_indexes[nominal_mass] = (first, last)
1008
1009        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
1010
1011    def plot_centroid(self, ax=None, c="g"):
1012        """Plot the centroid data of the mass spectrum.
1013
1014        Parameters
1015        ----------
1016        ax : matplotlib.axes.Axes, optional
1017            The matplotlib axes to plot on. Defaults to None.
1018        c : str, optional
1019            The color to use for the plot. Defaults to 'g' (green).
1020
1021        Returns
1022        -------
1023        matplotlib.axes.Axes
1024            The matplotlib axes containing the plot.
1025
1026        Raises
1027        ------
1028        Exception
1029            If no centroid data is found.
1030        """
1031
1032        import matplotlib.pyplot as plt
1033
1034        if self._mspeaks:
1035            if ax is None:
1036                ax = plt.gca()
1037
1038            markerline_a, stemlines_a, baseline_a = ax.stem(
1039                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1040            )
1041
1042            plt.setp(markerline_a, "color", c, "linewidth", 2)
1043            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1044            plt.setp(baseline_a, "color", c, "linewidth", 2)
1045
1046            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1047            ax.set_ylabel("Abundance", fontsize=12)
1048            ax.tick_params(axis="both", which="major", labelsize=12)
1049
1050            ax.axes.spines["top"].set_visible(False)
1051            ax.axes.spines["right"].set_visible(False)
1052
1053            ax.get_yaxis().set_visible(False)
1054            ax.spines["left"].set_visible(False)
1055
1056        else:
1057            raise Exception("No centroid data found, please run process_mass_spec")
1058
1059        return ax
1060
1061    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1062        """Plot the profile data and noise threshold of the mass spectrum.
1063
1064        Parameters
1065        ----------
1066        ax : matplotlib.axes.Axes, optional
1067            The matplotlib axes to plot on. Defaults to None.
1068        legend : bool, optional
1069            Whether to show the legend. Defaults to False.
1070
1071        Returns
1072        -------
1073        matplotlib.axes.Axes
1074            The matplotlib axes containing the plot.
1075
1076        Raises
1077        ------
1078        Exception
1079            If no noise threshold is found.
1080        """
1081        import matplotlib.pyplot as plt
1082
1083        if self.baseline_noise_std and self.baseline_noise_std:
1084            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1085            baseline = (self.baseline_noise, self.baseline_noise)
1086
1087            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1088            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1089            x, y = self.get_noise_threshold()
1090
1091            if ax is None:
1092                ax = plt.gca()
1093
1094            ax.plot(
1095                self.mz_exp_profile,
1096                self.abundance_profile,
1097                color="green",
1098                label="Spectrum",
1099            )
1100            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1101            ax.plot(x, y, color="red", label="Noise Threshold")
1102
1103            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1104            ax.set_ylabel("Abundance", fontsize=12)
1105            ax.tick_params(axis="both", which="major", labelsize=12)
1106
1107            ax.axes.spines["top"].set_visible(False)
1108            ax.axes.spines["right"].set_visible(False)
1109
1110            ax.get_yaxis().set_visible(False)
1111            ax.spines["left"].set_visible(False)
1112            if legend:
1113                ax.legend()
1114
1115        else:
1116            raise Exception("Calculate noise threshold first")
1117
1118        return ax
1119
1120    def plot_mz_domain_profile(self, color="green", ax=None):
1121        """Plot the m/z domain profile of the mass spectrum.
1122
1123        Parameters
1124        ----------
1125        color : str, optional
1126            The color to use for the plot. Defaults to 'green'.
1127        ax : matplotlib.axes.Axes, optional
1128            The matplotlib axes to plot on. Defaults to None.
1129
1130        Returns
1131        -------
1132        matplotlib.axes.Axes
1133            The matplotlib axes containing the plot.
1134        """
1135
1136        import matplotlib.pyplot as plt
1137
1138        if ax is None:
1139            ax = plt.gca()
1140        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1141        ax.set(xlabel="m/z", ylabel="abundance")
1142
1143        return ax
1144
1145    def to_excel(self, out_file_path, write_metadata=True):
1146        """Export the mass spectrum to an Excel file.
1147
1148        Parameters
1149        ----------
1150        out_file_path : str
1151            The path to the Excel file to export to.
1152        write_metadata : bool, optional
1153            Whether to write the metadata to the Excel file. Defaults to True.
1154
1155        Returns
1156        -------
1157        None
1158        """
1159        from corems.mass_spectrum.output.export import HighResMassSpecExport
1160
1161        exportMS = HighResMassSpecExport(out_file_path, self)
1162        exportMS.to_excel(write_metadata=write_metadata)
1163
1164    def to_hdf(self, out_file_path):
1165        """Export the mass spectrum to an HDF file.
1166
1167        Parameters
1168        ----------
1169        out_file_path : str
1170            The path to the HDF file to export to.
1171
1172        Returns
1173        -------
1174        None
1175        """
1176        from corems.mass_spectrum.output.export import HighResMassSpecExport
1177
1178        exportMS = HighResMassSpecExport(out_file_path, self)
1179        exportMS.to_hdf()
1180
1181    def to_csv(self, out_file_path, write_metadata=True):
1182        """Export the mass spectrum to a CSV file.
1183
1184        Parameters
1185        ----------
1186        out_file_path : str
1187            The path to the CSV file to export to.
1188        write_metadata : bool, optional
1189            Whether to write the metadata to the CSV file. Defaults to True.
1190
1191        """
1192        from corems.mass_spectrum.output.export import HighResMassSpecExport
1193
1194        exportMS = HighResMassSpecExport(out_file_path, self)
1195        exportMS.to_csv(write_metadata=write_metadata)
1196
1197    def to_pandas(self, out_file_path, write_metadata=True):
1198        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1199
1200        Parameters
1201        ----------
1202        out_file_path : str
1203            The path to the CSV file to export to.
1204        write_metadata : bool, optional
1205            Whether to write the metadata to the CSV file. Defaults to True.
1206
1207        """
1208        from corems.mass_spectrum.output.export import HighResMassSpecExport
1209
1210        exportMS = HighResMassSpecExport(out_file_path, self)
1211        exportMS.to_pandas(write_metadata=write_metadata)
1212
1213    def to_dataframe(self, additional_columns=None):
1214        """Return the mass spectrum as a Pandas dataframe.
1215
1216        Parameters
1217        ----------
1218        additional_columns : list, optional
1219            A list of additional columns to include in the dataframe. Defaults to None.
1220            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1221
1222        Returns
1223        -------
1224        pandas.DataFrame
1225            The mass spectrum as a Pandas dataframe.
1226        """
1227        from corems.mass_spectrum.output.export import HighResMassSpecExport
1228
1229        exportMS = HighResMassSpecExport(self.filename, self)
1230        return exportMS.get_pandas_df(additional_columns=additional_columns)
1231
1232    def to_json(self):
1233        """Return the mass spectrum as a JSON file."""
1234        from corems.mass_spectrum.output.export import HighResMassSpecExport
1235
1236        exportMS = HighResMassSpecExport(self.filename, self)
1237        return exportMS.to_json()
1238
1239    def parameters_json(self):
1240        """Return the parameters of the mass spectrum as a JSON string."""
1241        from corems.mass_spectrum.output.export import HighResMassSpecExport
1242
1243        exportMS = HighResMassSpecExport(self.filename, self)
1244        return exportMS.parameters_to_json()
1245
1246    def parameters_toml(self):
1247        """Return the parameters of the mass spectrum as a TOML string."""
1248        from corems.mass_spectrum.output.export import HighResMassSpecExport
1249
1250        exportMS = HighResMassSpecExport(self.filename, self)
1251        return exportMS.parameters_to_toml()
1252
1253
1254class MassSpecProfile(MassSpecBase):
1255    """A mass spectrum class when the entry point is on profile format
1256
1257    Notes
1258    -----
1259    Stores the profile data and instrument settings.
1260    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
1261    _mspeaks is populated under the hood by calling process_mass_spec method.
1262    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
1263
1264    Parameters
1265    ----------
1266    data_dict : dict
1267        A dictionary containing the profile data.
1268    d_params : dict{'str': float, int or str}
1269        contains the instrument settings and processing settings
1270    auto_process : bool, optional
1271        Whether to automatically process the mass spectrum. Defaults to True.
1272
1273
1274    Attributes
1275    ----------
1276    _abundance : ndarray
1277        The abundance values of the mass spectrum.
1278    _mz_exp : ndarray
1279        The m/z values of the mass spectrum.
1280    _mspeaks : list
1281        A list of mass peaks.
1282
1283    Methods
1284    ----------
1285    * process_mass_spec(). Process the mass spectrum.
1286
1287    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
1288    """
1289
1290    def __init__(self, data_dict, d_params, auto_process=True):
1291        # print(data_dict.keys())
1292        super().__init__(
1293            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1294        )
1295
1296        if auto_process:
1297            self.process_mass_spec()
1298
1299
1300class MassSpecfromFreq(MassSpecBase):
1301    """A mass spectrum class when data entry is on frequency domain
1302
1303    Notes
1304    -----
1305    - Transform to m/z based on the settings stored at d_params
1306    - Stores the profile data and instrument settings
1307    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1308    - _mspeaks is populated under the hood by calling process_mass_spec method
1309    - iteration is null if _mspeaks is empty
1310
1311    Parameters
1312    ----------
1313    frequency_domain : list(float)
1314        all datapoints in frequency domain in Hz
1315    magnitude :  frequency_domain : list(float)
1316        all datapoints in for magnitude of each frequency datapoint
1317    d_params : dict{'str': float, int or str}
1318        contains the instrument settings and processing settings
1319    auto_process : bool, optional
1320        Whether to automatically process the mass spectrum. Defaults to True.
1321    keep_profile : bool, optional
1322        Whether to keep the profile data. Defaults to True.
1323
1324    Attributes
1325    ----------
1326    has_frequency : bool
1327        Whether the mass spectrum has frequency data.
1328    _frequency_domain : list(float)
1329        Frequency domain in Hz
1330    label : str
1331        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
1332    _abundance : ndarray
1333        The abundance values of the mass spectrum.
1334    _mz_exp : ndarray
1335        The m/z values of the mass spectrum.
1336    _mspeaks : list
1337        A list of mass peaks.
1338    See Also: all the attributes of MassSpecBase class
1339
1340    Methods
1341    ----------
1342    * _set_mz_domain().
1343        calculates the m_z based on the setting of d_params
1344    * process_mass_spec().  Process the mass spectrum.
1345
1346    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
1347    """
1348
1349    def __init__(
1350        self,
1351        frequency_domain,
1352        magnitude,
1353        d_params,
1354        auto_process=True,
1355        keep_profile=True,
1356    ):
1357        super().__init__(None, magnitude, d_params)
1358
1359        self._frequency_domain = frequency_domain
1360        self.has_frequency = True
1361        self._set_mz_domain()
1362        self._sort_mz_domain()
1363
1364        self.magnetron_frequency = None
1365        self.magnetron_frequency_sigma = None
1366
1367        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1368
1369        if auto_process:
1370            self.process_mass_spec(keep_profile=keep_profile)
1371
1372    def _sort_mz_domain(self):
1373        """Sort the mass spectrum by m/z values."""
1374
1375        if self._mz_exp[0] > self._mz_exp[-1]:
1376            self._mz_exp = self._mz_exp[::-1]
1377            self._abundance = self._abundance[::-1]
1378            self._frequency_domain = self._frequency_domain[::-1]
1379
1380    def _set_mz_domain(self):
1381        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
1382        if self.label == Labels.bruker_frequency:
1383            self._mz_exp = self._f_to_mz_bruker()
1384
1385        else:
1386            self._mz_exp = self._f_to_mz()
1387
1388    @property
1389    def transient_settings(self):
1390        """Return the transient settings of the mass spectrum."""
1391        return self.parameters.transient
1392
1393    @transient_settings.setter
1394    def transient_settings(self, instance_TransientSetting):
1395        self.parameters.transient = instance_TransientSetting
1396
1397    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1398        """Calculates the magnetron frequency of the mass spectrum.
1399
1400        Parameters
1401        ----------
1402        max_magnetron_freq : float, optional
1403            The maximum magnetron frequency. Defaults to 50.
1404        magnetron_freq_bins : int, optional
1405            The number of bins to use for the histogram. Defaults to 300.
1406
1407        Returns
1408        -------
1409        None
1410
1411        Notes
1412        -----
1413        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1414        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1415        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1416        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1417        """
1418        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1419        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1420
1421        freq_hist = histogram(
1422            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1423            bins=magnetron_freq_bins,
1424        )
1425
1426        mod = GaussianModel()
1427        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1428        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1429        self.magnetron_frequency = out.best_values["center"]
1430        self.magnetron_frequency_sigma = out.best_values["sigma"]
1431
1432
1433class MassSpecCentroid(MassSpecBase):
1434    """A mass spectrum class when the entry point is on centroid format
1435
1436    Notes
1437    -----
1438    - Stores the centroid data and instrument settings
1439    - Simulate profile data based on Gaussian or Lorentzian peak shape
1440    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1441    - _mspeaks is populated under the hood by calling process_mass_spec method
1442    - iteration is null if _mspeaks is empty
1443
1444    Parameters
1445    ----------
1446    data_dict : dict {string: numpy array float64 )
1447        contains keys [m/z, Abundance, Resolving Power, S/N]
1448    d_params : dict{'str': float, int or str}
1449        contains the instrument settings and processing settings
1450    auto_process : bool, optional
1451        Whether to automatically process the mass spectrum. Defaults to True.
1452
1453    Attributes
1454    ----------
1455    label : str
1456        store label (Bruker, Midas Transient, see Labels class)
1457    _baseline_noise : float
1458        store baseline noise
1459    _baseline_noise_std : float
1460        store baseline noise std
1461    _abundance : ndarray
1462        The abundance values of the mass spectrum.
1463    _mz_exp : ndarray
1464        The m/z values of the mass spectrum.
1465    _mspeaks : list
1466        A list of mass peaks.
1467
1468
1469    Methods
1470    ----------
1471    * process_mass_spec().
1472        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
1473    * __simulate_profile__data__().
1474        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
1475
1476    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
1477    """
1478
1479    def __init__(self, data_dict, d_params, auto_process=True):
1480        super().__init__([], [], d_params)
1481
1482        self._set_parameters_objects(d_params)
1483
1484        if self.label == Labels.thermo_centroid:
1485            self._baseline_noise = d_params.get("baseline_noise")
1486            self._baseline_noise_std = d_params.get("baseline_noise_std")
1487
1488        self.is_centroid = True
1489        self.data_dict = data_dict
1490        self._mz_exp = data_dict[Labels.mz]
1491        self._abundance = data_dict[Labels.abundance]
1492
1493        if auto_process:
1494            self.process_mass_spec()
1495
1496    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
1497        """Simulate profile data based on Gaussian or Lorentzian peak shape
1498
1499        Notes
1500        -----
1501        Needs theoretical resolving power calculation and define peak shape.
1502        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
1503
1504        Parameters
1505        ----------
1506        exp_mz_centroid : list(float)
1507            list of m/z values
1508        magnitude_centroid : list(float)
1509            list of abundance values
1510
1511
1512        Returns
1513        -------
1514        x : list(float)
1515            list of m/z values
1516        y : list(float)
1517            list of abundance values
1518        """
1519
1520        x, y = [], []
1521        for i in range(len(exp_mz_centroid)):
1522            x.append(exp_mz_centroid[i] - 0.0000001)
1523            x.append(exp_mz_centroid[i])
1524            x.append(exp_mz_centroid[i] + 0.0000001)
1525            y.append(0)
1526            y.append(magnitude_centroid[i])
1527            y.append(0)
1528        return x, y
1529
1530    @property
1531    def mz_exp_profile(self):
1532        """Return the m/z profile of the mass spectrum."""
1533        mz_list = []
1534        for mz in self.mz_exp:
1535            mz_list.append(mz - 0.0000001)
1536            mz_list.append(mz)
1537            mz_list.append(mz + 0.0000001)
1538        return mz_list
1539
1540    @mz_exp_profile.setter
1541    def mz_exp_profile(self, _mz_exp):
1542        self._mz_exp = _mz_exp
1543
1544    @property
1545    def abundance_profile(self):
1546        """Return the abundance profile of the mass spectrum."""
1547        ab_list = []
1548        for ab in self.abundance:
1549            ab_list.append(0)
1550            ab_list.append(ab)
1551            ab_list.append(0)
1552        return ab_list
1553
1554    @abundance_profile.setter
1555    def abundance_profile(self, abundance):
1556        self._abundance = abundance
1557
1558    @property
1559    def tic(self):
1560        """Return the total ion current of the mass spectrum."""
1561        return sum(self.abundance)
1562
1563    def process_mass_spec(self):
1564        """Process the mass spectrum."""
1565        import tqdm
1566
1567        # overwrite process_mass_spec
1568        # mspeak objs are usually added inside the PeaKPicking class
1569        # for profile and freq based data
1570        data_dict = self.data_dict
1571        ion_charge = self.polarity
1572
1573        # Check if resolving power is present
1574        rp_present = True
1575        if not data_dict.get(Labels.rp):
1576            rp_present = False
1577        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1578            data_dict.get(Labels.rp)
1579        ):
1580            rp_present = False
1581
1582        # Check if s2n is present
1583        s2n_present = True
1584        if not data_dict.get(Labels.s2n):
1585            s2n_present = False
1586        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1587            data_dict.get(Labels.s2n)
1588        ):
1589            s2n_present = False
1590
1591        # Warning if no s2n data but noise thresholding is set to signal_noise
1592        if (
1593            not s2n_present
1594            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1595        ):
1596            raise Exception("Signal to Noise data is missing for noise thresholding")
1597
1598        # Pull out abundance data
1599        abun = array(data_dict.get(Labels.abundance)).astype(float)
1600
1601        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1602        abundance_threshold, factor = self.get_threshold(abun)
1603
1604        # Set rp_i and s2n_i to None which will be overwritten if present
1605        rp_i, s2n_i = np.nan, np.nan
1606        for index, mz in enumerate(data_dict.get(Labels.mz)):
1607            if rp_present:
1608                if not data_dict.get(Labels.rp)[index]:
1609                    rp_i = np.nan
1610                else:
1611                    rp_i = float(data_dict.get(Labels.rp)[index])
1612            if s2n_present:
1613                if not data_dict.get(Labels.s2n)[index]:
1614                    s2n_i = np.nan
1615                else:
1616                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1617
1618            # centroid peak does not have start and end peak index pos
1619            massspec_indexes = (index, index, index)
1620
1621            # Add peaks based on the noise thresholding method
1622            if (
1623                self.parameters.mass_spectrum.noise_threshold_method
1624                in ["minima", "relative_abundance", "absolute_abundance"]
1625                and abun[index] / factor >= abundance_threshold
1626            ):
1627                self.add_mspeak(
1628                    ion_charge,
1629                    mz,
1630                    abun[index],
1631                    rp_i,
1632                    s2n_i,
1633                    massspec_indexes,
1634                    ms_parent=self,
1635                )
1636            if (
1637                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1638                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1639            ):
1640                self.add_mspeak(
1641                    ion_charge,
1642                    mz,
1643                    abun[index],
1644                    rp_i,
1645                    s2n_i,
1646                    massspec_indexes,
1647                    ms_parent=self,
1648                )
1649
1650        self.mspeaks = self._mspeaks
1651        self._dynamic_range = self.max_abundance / self.min_abundance
1652        self._set_nominal_masses_start_final_indexes()
1653
1654        if self.label != Labels.thermo_centroid:
1655            if self.settings.noise_threshold_method == "log":
1656                raise Exception("log noise Not tested for centroid data")
1657                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1658
1659            else:
1660                self._baseline_noise, self._baseline_noise_std = (
1661                    self.run_noise_threshold_calc()
1662                )
1663
1664        del self.data_dict
1665
1666
1667class MassSpecCentroidLowRes(MassSpecCentroid):
1668    """A mass spectrum class when the entry point is on low resolution centroid format
1669
1670    Notes
1671    -----
1672    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
1673
1674    Parameters
1675    ----------
1676    data_dict : dict {string: numpy array float64 )
1677        contains keys [m/z, Abundance, Resolving Power, S/N]
1678    d_params : dict{'str': float, int or str}
1679        contains the instrument settings and processing settings
1680
1681    Attributes
1682    ----------
1683    _processed_tic : float
1684        store processed total ion current
1685    _abundance : ndarray
1686        The abundance values of the mass spectrum.
1687    _mz_exp : ndarray
1688        The m/z values of the mass spectrum.
1689    """
1690
1691    def __init__(self, data_dict, d_params):
1692        self._set_parameters_objects(d_params)
1693        self._mz_exp = array(data_dict.get(Labels.mz))
1694        self._abundance = array(data_dict.get(Labels.abundance))
1695        self._processed_tic = None
1696
1697    def __len__(self):
1698        return len(self.mz_exp)
1699
1700    def __getitem__(self, position):
1701        return (self.mz_exp[position], self.abundance[position])
1702
1703    @property
1704    def mz_exp(self):
1705        """Return the m/z values of the mass spectrum."""
1706        return self._mz_exp
1707
1708    @property
1709    def abundance(self):
1710        """Return the abundance values of the mass spectrum."""
1711        return self._abundance
1712
1713    @property
1714    def processed_tic(self):
1715        """Return the processed total ion current of the mass spectrum."""
1716        return sum(self._processed_tic)
1717
1718    @property
1719    def tic(self):
1720        """Return the total ion current of the mass spectrum."""
1721        if self._processed_tic:
1722            return self._processed_tic
1723        else:
1724            return sum(self.abundance)
1725
1726    @property
1727    def mz_abun_tuples(self):
1728        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
1729        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
1730
1731        return [r(i) for i in self]
1732
1733    @property
1734    def mz_abun_dict(self):
1735        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
1736        r = lambda x: int(round(x, 0))
1737
1738        return {r(i[0]): r(i[1]) for i in self}
def overrides(interface_class):
26def overrides(interface_class):
27    """Checks if the method overrides a method from an interface class."""
28
29    def overrider(method):
30        assert method.__name__ in dir(interface_class)
31        return method
32
33    return overrider

Checks if the method overrides a method from an interface class.

  36class MassSpecBase(MassSpecCalc, KendrickGrouping):
  37    """A mass spectrum base class, stores the profile data and instrument settings.
  38
  39    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
  40    _mspeaks is populated under the hood by calling process_mass_spec method.
  41    Iteration is null if _mspeaks is empty.
  42
  43    Parameters
  44    ----------
  45    mz_exp : array_like
  46        The m/z values of the mass spectrum.
  47    abundance : array_like
  48        The abundance values of the mass spectrum.
  49    d_params : dict
  50        A dictionary of parameters for the mass spectrum.
  51    **kwargs
  52        Additional keyword arguments.
  53
  54    Attributes
  55    ----------
  56
  57    mspeaks : list
  58        A list of mass peaks.
  59    is_calibrated : bool
  60        Whether the mass spectrum is calibrated.
  61    is_centroid : bool
  62        Whether the mass spectrum is centroided.
  63    has_frequency : bool
  64        Whether the mass spectrum has a frequency domain.
  65    calibration_order : None or int
  66        The order of the mass spectrum's calibration.
  67    calibration_points : None or ndarray
  68        The calibration points of the mass spectrum.
  69    calibration_ref_mzs: None or ndarray
  70        The reference m/z values of the mass spectrum's calibration.
  71    calibration_meas_mzs : None or ndarray
  72        The measured m/z values of the mass spectrum's calibration.
  73    calibration_RMS : None or float
  74        The root mean square of the mass spectrum's calibration.
  75    calibration_segment : None or CalibrationSegment
  76        The calibration segment of the mass spectrum.
  77    _abundance : ndarray
  78        The abundance values of the mass spectrum.
  79    _mz_exp : ndarray
  80        The m/z values of the mass spectrum.
  81    _mspeaks : list
  82        A list of mass peaks.
  83    _dict_nominal_masses_indexes : dict
  84        A dictionary of nominal masses and their indexes.
  85    _baseline_noise : float
  86        The baseline noise of the mass spectrum.
  87    _baseline_noise_std : float
  88        The standard deviation of the baseline noise of the mass spectrum.
  89    _dynamic_range : float or None
  90        The dynamic range of the mass spectrum.
  91    _transient_settings : None or TransientSettings
  92        The transient settings of the mass spectrum.
  93    _frequency_domain : None or FrequencyDomain
  94        The frequency domain of the mass spectrum.
  95    _mz_cal_profile : None or MzCalibrationProfile
  96        The m/z calibration profile of the mass spectrum.
  97
  98    Methods
  99    -------
 100    * process_mass_spec(). Main function to process the mass spectrum,
 101    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
 102
 103    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
 104    """
 105
 106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
 107        self._abundance = array(abundance, dtype=float64)
 108        self._mz_exp = array(mz_exp, dtype=float64)
 109
 110        # objects created after process_mass_spec() function
 111        self._mspeaks = list()
 112        self.mspeaks = list()
 113        self._dict_nominal_masses_indexes = dict()
 114        self._baseline_noise = 0.001
 115        self._baseline_noise_std = 0.001
 116        self._dynamic_range = None
 117        # set to None: initialization occurs inside subclass MassSpecfromFreq
 118        self._transient_settings = None
 119        self._frequency_domain = None
 120        self._mz_cal_profile = None
 121        self.is_calibrated = False
 122
 123        self._set_parameters_objects(d_params)
 124        self._init_settings()
 125
 126        self.is_centroid = False
 127        self.has_frequency = False
 128
 129        self.calibration_order = None
 130        self.calibration_points = None
 131        self.calibration_ref_mzs = None
 132        self.calibration_meas_mzs = None
 133        self.calibration_RMS = None
 134        self.calibration_segment = None
 135        self.calibration_raw_error_median = None
 136        self.calibration_raw_error_stdev = None
 137
 138    def _init_settings(self):
 139        """Initializes the settings for the mass spectrum."""
 140        self._parameters = MSParameters()
 141
 142    def __len__(self):
 143        return len(self.mspeaks)
 144
 145    def __getitem__(self, position) -> MSPeak:
 146        return self.mspeaks[position]
 147
 148    def set_indexes(self, list_indexes):
 149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
 150
 151        Parameters
 152        ----------
 153        list_indexes : list of int
 154            A list of integers representing the indexes of the MSpeaks to iterate over.
 155
 156        """
 157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
 158
 159        for i, mspeak in enumerate(self.mspeaks):
 160            mspeak.index = i
 161
 162        self._set_nominal_masses_start_final_indexes()
 163
 164    def reset_indexes(self):
 165        """Reset the mass spectrum to iterate over all MSpeaks objects.
 166
 167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
 168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
 169
 170        """
 171        self.mspeaks = self._mspeaks
 172
 173        for i, mspeak in enumerate(self.mspeaks):
 174            mspeak.index = i
 175
 176        self._set_nominal_masses_start_final_indexes()
 177
 178    def add_mspeak(
 179        self,
 180        ion_charge,
 181        mz_exp,
 182        abundance,
 183        resolving_power,
 184        signal_to_noise,
 185        massspec_indexes,
 186        exp_freq=None,
 187        ms_parent=None,
 188    ):
 189        """Add a new MSPeak object to the MassSpectrum object.
 190
 191        Parameters
 192        ----------
 193        ion_charge : int
 194            The ion charge of the MSPeak.
 195        mz_exp : float
 196            The experimental m/z value of the MSPeak.
 197        abundance : float
 198            The abundance of the MSPeak.
 199        resolving_power : float
 200            The resolving power of the MSPeak.
 201        signal_to_noise : float
 202            The signal-to-noise ratio of the MSPeak.
 203        massspec_indexes : list
 204            A list of indexes of the MSPeak in the MassSpectrum object.
 205        exp_freq : float, optional
 206            The experimental frequency of the MSPeak. Defaults to None.
 207        ms_parent : MSParent, optional
 208            The MSParent object associated with the MSPeak. Defaults to None.
 209        """
 210        mspeak = MSPeak(
 211            ion_charge,
 212            mz_exp,
 213            abundance,
 214            resolving_power,
 215            signal_to_noise,
 216            massspec_indexes,
 217            len(self._mspeaks),
 218            exp_freq=exp_freq,
 219            ms_parent=ms_parent,
 220        )
 221
 222        self._mspeaks.append(mspeak)
 223
 224    def _set_parameters_objects(self, d_params):
 225        """Set the parameters of the MassSpectrum object.
 226
 227        Parameters
 228        ----------
 229        d_params : dict
 230            A dictionary containing the parameters to set.
 231
 232        Notes
 233        -----
 234        This method sets the following parameters of the MassSpectrum object:
 235        - _calibration_terms
 236        - label
 237        - analyzer
 238        - acquisition_time
 239        - instrument_label
 240        - polarity
 241        - scan_number
 242        - retention_time
 243        - mobility_rt
 244        - mobility_scan
 245        - _filename
 246        - _dir_location
 247        - _baseline_noise
 248        - _baseline_noise_std
 249        - sample_name
 250        """
 251        self._calibration_terms = (
 252            d_params.get("Aterm"),
 253            d_params.get("Bterm"),
 254            d_params.get("Cterm"),
 255        )
 256
 257        self.label = d_params.get(Labels.label)
 258
 259        self.analyzer = d_params.get("analyzer")
 260
 261        self.acquisition_time = d_params.get("acquisition_time")
 262
 263        self.instrument_label = d_params.get("instrument_label")
 264
 265        self.polarity = int(d_params.get("polarity"))
 266
 267        self.scan_number = d_params.get("scan_number")
 268
 269        self.retention_time = d_params.get("rt")
 270
 271        self.mobility_rt = d_params.get("mobility_rt")
 272
 273        self.mobility_scan = d_params.get("mobility_scan")
 274
 275        self._filename = d_params.get("filename_path")
 276
 277        self._dir_location = d_params.get("dir_location")
 278
 279        self._baseline_noise = d_params.get("baseline_noise")
 280
 281        self._baseline_noise_std = d_params.get("baseline_noise_std")
 282
 283        if d_params.get("sample_name") != "Unknown":
 284            self.sample_name = d_params.get("sample_name")
 285            if not self.sample_name:
 286                self.sample_name = self.filename.stem
 287        else:
 288            self.sample_name = self.filename.stem
 289
 290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
 291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
 292
 293        Parameters
 294        ----------
 295        Aterm : float
 296            The A-term calibration coefficient.
 297        Bterm : float
 298            The B-term calibration coefficient.
 299        C : float
 300            The C-term calibration coefficient.
 301        fas : float, optional
 302            The frequency amplitude scaling factor. Default is 0.
 303        """
 304        self._calibration_terms = (Aterm, Bterm, C)
 305
 306        self._mz_exp = self._f_to_mz()
 307        self._abundance = self._abundance
 308        self.find_peaks()
 309        self.reset_indexes()
 310
 311    def clear_molecular_formulas(self):
 312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
 313
 314        Returns
 315        -------
 316        numpy.ndarray
 317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
 318        """
 319        self.check_mspeaks()
 320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
 321
 322    def process_mass_spec(self, keep_profile=True):
 323        """Process the mass spectrum.
 324
 325        Parameters
 326        ----------
 327        keep_profile : bool, optional
 328            Whether to keep the profile data after processing. Defaults to True.
 329
 330        Notes
 331        -----
 332        This method does the following:
 333        - calculates the noise threshold
 334        - does peak picking (creates mspeak_objs)
 335        - resets the mspeak_obj indexes
 336        """
 337
 338        # if runned mannually make sure to rerun filter_by_noise_threshold
 339        # calculates noise threshold
 340        # do peak picking( create mspeak_objs)
 341        # reset mspeak_obj the indexes
 342
 343        self.cal_noise_threshold()
 344
 345        self.find_peaks()
 346        self.reset_indexes()
 347
 348        if self.mspeaks:
 349            self._dynamic_range = self.max_abundance / self.min_abundance
 350        else:
 351            self._dynamic_range = 0
 352        if not keep_profile:
 353            self._abundance *= 0
 354            self._mz_exp *= 0
 355
 356    def cal_noise_threshold(self):
 357        """Calculate the noise threshold of the mass spectrum."""
 358
 359        if self.label == Labels.simulated_profile:
 360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
 361
 362        if self.settings.noise_threshold_method == "log":
 363            self._baseline_noise, self._baseline_noise_std = (
 364                self.run_log_noise_threshold_calc()
 365            )
 366
 367        else:
 368            self._baseline_noise, self._baseline_noise_std = (
 369                self.run_noise_threshold_calc()
 370            )
 371
 372    @property
 373    def parameters(self):
 374        """Return the parameters of the mass spectrum."""
 375        return self._parameters
 376
 377    @parameters.setter
 378    def parameters(self, instance_MSParameters):
 379        self._parameters = instance_MSParameters
 380
 381    def set_parameter_from_json(self, parameters_path):
 382        """Set the parameters of the mass spectrum from a JSON file.
 383
 384        Parameters
 385        ----------
 386        parameters_path : str
 387            The path to the JSON file containing the parameters.
 388        """
 389        load_and_set_parameters_ms(self, parameters_path=parameters_path)
 390
 391    def set_parameter_from_toml(self, parameters_path):
 392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
 393
 394    @property
 395    def mspeaks_settings(self):
 396        """Return the MS peak settings of the mass spectrum."""
 397        return self.parameters.ms_peak
 398
 399    @mspeaks_settings.setter
 400    def mspeaks_settings(self, instance_MassSpecPeakSetting):
 401        self.parameters.ms_peak = instance_MassSpecPeakSetting
 402
 403    @property
 404    def settings(self):
 405        """Return the settings of the mass spectrum."""
 406        return self.parameters.mass_spectrum
 407
 408    @settings.setter
 409    def settings(self, instance_MassSpectrumSetting):
 410        self.parameters.mass_spectrum = instance_MassSpectrumSetting
 411
 412    @property
 413    def molecular_search_settings(self):
 414        """Return the molecular search settings of the mass spectrum."""
 415        return self.parameters.molecular_search
 416
 417    @molecular_search_settings.setter
 418    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
 419        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
 420
 421    @property
 422    def mz_cal_profile(self):
 423        """Return the calibrated m/z profile of the mass spectrum."""
 424        return self._mz_cal_profile
 425
 426    @mz_cal_profile.setter
 427    def mz_cal_profile(self, mz_cal_list):
 428        if len(mz_cal_list) == len(self._mz_exp):
 429            self._mz_cal_profile = mz_cal_list
 430        else:
 431            raise Exception(
 432                "calibrated array (%i) is not of the same size of the data (%i)"
 433                % (len(mz_cal_list), len(self.mz_exp_profile))
 434            )
 435
 436    @property
 437    def mz_cal(self):
 438        """Return the calibrated m/z values of the mass spectrum."""
 439        return array([mspeak.mz_cal for mspeak in self.mspeaks])
 440
 441    @mz_cal.setter
 442    def mz_cal(self, mz_cal_list):
 443        if len(mz_cal_list) == len(self.mspeaks):
 444            self.is_calibrated = True
 445            for index, mz_cal in enumerate(mz_cal_list):
 446                self.mspeaks[index].mz_cal = mz_cal
 447        else:
 448            raise Exception(
 449                "calibrated array (%i) is not of the same size of the data (%i)"
 450                % (len(mz_cal_list), len(self._mspeaks))
 451            )
 452
 453    @property
 454    def mz_exp(self):
 455        """Return the experimental m/z values of the mass spectrum."""
 456        self.check_mspeaks()
 457
 458        if self.is_calibrated:
 459            return array([mspeak.mz_cal for mspeak in self.mspeaks])
 460
 461        else:
 462            return array([mspeak.mz_exp for mspeak in self.mspeaks])
 463
 464    @property
 465    def freq_exp_profile(self):
 466        """Return the experimental frequency profile of the mass spectrum."""
 467        return self._frequency_domain
 468
 469    @freq_exp_profile.setter
 470    def freq_exp_profile(self, new_data):
 471        self._frequency_domain = array(new_data)
 472
 473    @property
 474    def freq_exp_pp(self):
 475        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
 476        _, _, freq = self.prepare_peak_picking_data()
 477        return freq
 478
 479    @property
 480    def mz_exp_profile(self):
 481        """Return the experimental m/z profile of the mass spectrum."""
 482        if self.is_calibrated:
 483            return self.mz_cal_profile
 484        else:
 485            return self._mz_exp
 486
 487    @mz_exp_profile.setter
 488    def mz_exp_profile(self, new_data):
 489        self._mz_exp = array(new_data)
 490
 491    @property
 492    def mz_exp_pp(self):
 493        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
 494        mz, _, _ = self.prepare_peak_picking_data()
 495        return mz
 496
 497    @property
 498    def abundance_profile(self):
 499        """Return the abundance profile of the mass spectrum."""
 500        return self._abundance
 501
 502    @abundance_profile.setter
 503    def abundance_profile(self, new_data):
 504        self._abundance = array(new_data)
 505
 506    @property
 507    def abundance_profile_pp(self):
 508        """Return the abundance profile of the mass spectrum that is used for peak picking."""
 509        _, abundance, _ = self.prepare_peak_picking_data()
 510        return abundance
 511
 512    @property
 513    def abundance(self):
 514        """Return the abundance values of the mass spectrum."""
 515        self.check_mspeaks()
 516        return array([mspeak.abundance for mspeak in self.mspeaks])
 517
 518    def freq_exp(self):
 519        """Return the experimental frequency values of the mass spectrum."""
 520        self.check_mspeaks()
 521        return array([mspeak.freq_exp for mspeak in self.mspeaks])
 522
 523    @property
 524    def resolving_power(self):
 525        """Return the resolving power values of the mass spectrum."""
 526        self.check_mspeaks()
 527        return array([mspeak.resolving_power for mspeak in self.mspeaks])
 528
 529    @property
 530    def signal_to_noise(self):
 531        self.check_mspeaks()
 532        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
 533
 534    @property
 535    def nominal_mz(self):
 536        """Return the nominal m/z values of the mass spectrum."""
 537        if self._dict_nominal_masses_indexes:
 538            return sorted(list(self._dict_nominal_masses_indexes.keys()))
 539        else:
 540            raise ValueError("Nominal indexes not yet set")
 541
 542    def get_mz_and_abundance_peaks_tuples(self):
 543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
 544        self.check_mspeaks()
 545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
 546
 547    @property
 548    def kmd(self):
 549        """Return the Kendrick mass defect values of the mass spectrum."""
 550        self.check_mspeaks()
 551        return array([mspeak.kmd for mspeak in self.mspeaks])
 552
 553    @property
 554    def kendrick_mass(self):
 555        """Return the Kendrick mass values of the mass spectrum."""
 556        self.check_mspeaks()
 557        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
 558
 559    @property
 560    def max_mz_exp(self):
 561        """Return the maximum experimental m/z value of the mass spectrum."""
 562        return max([mspeak.mz_exp for mspeak in self.mspeaks])
 563
 564    @property
 565    def min_mz_exp(self):
 566        """Return the minimum experimental m/z value of the mass spectrum."""
 567        return min([mspeak.mz_exp for mspeak in self.mspeaks])
 568
 569    @property
 570    def max_abundance(self):
 571        """Return the maximum abundance value of the mass spectrum."""
 572        return max([mspeak.abundance for mspeak in self.mspeaks])
 573
 574    @property
 575    def max_signal_to_noise(self):
 576        """Return the maximum signal-to-noise ratio of the mass spectrum."""
 577        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
 578
 579    @property
 580    def most_abundant_mspeak(self):
 581        """Return the most abundant MSpeak object of the mass spectrum."""
 582        return max(self.mspeaks, key=lambda m: m.abundance)
 583
 584    @property
 585    def min_abundance(self):
 586        """Return the minimum abundance value of the mass spectrum."""
 587        return min([mspeak.abundance for mspeak in self.mspeaks])
 588
 589    # takes too much cpu time
 590    @property
 591    def dynamic_range(self):
 592        """Return the dynamic range of the mass spectrum."""
 593        return self._dynamic_range
 594
 595    @property
 596    def baseline_noise(self):
 597        """Return the baseline noise of the mass spectrum."""
 598        if self._baseline_noise:
 599            return self._baseline_noise
 600        else:
 601            return None
 602
 603    @property
 604    def baseline_noise_std(self):
 605        """Return the standard deviation of the baseline noise of the mass spectrum."""
 606        if self._baseline_noise_std == 0:
 607            return self._baseline_noise_std
 608        if self._baseline_noise_std:
 609            return self._baseline_noise_std
 610        else:
 611            return None
 612
 613    @property
 614    def Aterm(self):
 615        """Return the A-term calibration coefficient of the mass spectrum."""
 616        return self._calibration_terms[0]
 617
 618    @property
 619    def Bterm(self):
 620        """Return the B-term calibration coefficient of the mass spectrum."""
 621        return self._calibration_terms[1]
 622
 623    @property
 624    def Cterm(self):
 625        """Return the C-term calibration coefficient of the mass spectrum."""
 626        return self._calibration_terms[2]
 627
 628    @property
 629    def filename(self):
 630        """Return the filename of the mass spectrum."""
 631        return Path(self._filename)
 632
 633    @property
 634    def dir_location(self):
 635        """Return the directory location of the mass spectrum."""
 636        return self._dir_location
 637
 638    def sort_by_mz(self):
 639        """Sort the mass spectrum by m/z values."""
 640        return sorted(self, key=lambda m: m.mz_exp)
 641
 642    def sort_by_abundance(self, reverse=False):
 643        """Sort the mass spectrum by abundance values."""
 644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
 645
 646    @property
 647    def tic(self):
 648        """Return the total ion current of the mass spectrum."""
 649        return trapz(self.abundance_profile, self.mz_exp_profile)
 650
 651    def check_mspeaks_warning(self):
 652        """Check if the mass spectrum has MSpeaks objects.
 653
 654        Raises
 655        ------
 656        Warning
 657            If the mass spectrum has no MSpeaks objects.
 658        """
 659        import warnings
 660
 661        if self.mspeaks:
 662            pass
 663        else:
 664            warnings.warn("mspeaks list is empty, continuing without filtering data")
 665
 666    def check_mspeaks(self):
 667        """Check if the mass spectrum has MSpeaks objects.
 668
 669        Raises
 670        ------
 671        Exception
 672            If the mass spectrum has no MSpeaks objects.
 673        """
 674        if self.mspeaks:
 675            pass
 676        else:
 677            raise Exception(
 678                "mspeaks list is empty, please run process_mass_spec() first"
 679            )
 680
 681    def remove_assignment_by_index(self, indexes):
 682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
 683
 684        Parameters
 685        ----------
 686        indexes : list of int
 687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
 688        """
 689        for i in indexes:
 690            self.mspeaks[i].clear_molecular_formulas()
 691
 692    def filter_by_index(self, list_indexes):
 693        """Filter the mass spectrum by the specified indexes.
 694
 695        Parameters
 696        ----------
 697        list_indexes : list of int
 698            A list of indexes of the MSpeaks objects to drop.
 699
 700        """
 701
 702        self.mspeaks = [
 703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
 704        ]
 705
 706        for i, mspeak in enumerate(self.mspeaks):
 707            mspeak.index = i
 708
 709        self._set_nominal_masses_start_final_indexes()
 710
 711    def filter_by_mz(self, min_mz, max_mz):
 712        """Filter the mass spectrum by the specified m/z range.
 713
 714        Parameters
 715        ----------
 716        min_mz : float
 717            The minimum m/z value to keep.
 718        max_mz : float
 719            The maximum m/z value to keep.
 720
 721        """
 722        self.check_mspeaks_warning()
 723        indexes = [
 724            index
 725            for index, mspeak in enumerate(self.mspeaks)
 726            if not min_mz <= mspeak.mz_exp <= max_mz
 727        ]
 728        self.filter_by_index(indexes)
 729
 730    def filter_by_s2n(self, min_s2n, max_s2n=False):
 731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
 732
 733        Parameters
 734        ----------
 735        min_s2n : float
 736            The minimum signal-to-noise ratio to keep.
 737        max_s2n : float, optional
 738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
 739
 740        """
 741        self.check_mspeaks_warning()
 742        if max_s2n:
 743            indexes = [
 744                index
 745                for index, mspeak in enumerate(self.mspeaks)
 746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
 747            ]
 748        else:
 749            indexes = [
 750                index
 751                for index, mspeak in enumerate(self.mspeaks)
 752                if mspeak.signal_to_noise <= min_s2n
 753            ]
 754        self.filter_by_index(indexes)
 755
 756    def filter_by_abundance(self, min_abund, max_abund=False):
 757        """Filter the mass spectrum by the specified abundance range.
 758
 759        Parameters
 760        ----------
 761        min_abund : float
 762            The minimum abundance to keep.
 763        max_abund : float, optional
 764            The maximum abundance to keep. Defaults to False (no maximum).
 765
 766        """
 767        self.check_mspeaks_warning()
 768        if max_abund:
 769            indexes = [
 770                index
 771                for index, mspeak in enumerate(self.mspeaks)
 772                if not min_abund <= mspeak.abundance <= max_abund
 773            ]
 774        else:
 775            indexes = [
 776                index
 777                for index, mspeak in enumerate(self.mspeaks)
 778                if mspeak.abundance <= min_abund
 779            ]
 780        self.filter_by_index(indexes)
 781
 782    def filter_by_max_resolving_power(self, B, T):
 783        """Filter the mass spectrum by the specified maximum resolving power.
 784
 785        Parameters
 786        ----------
 787        B : float
 788        T : float
 789
 790        """
 791
 792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 793
 794        self.check_mspeaks_warning()
 795
 796        indexes_to_remove = [
 797            index
 798            for index, mspeak in enumerate(self.mspeaks)
 799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
 800        ]
 801        self.filter_by_index(indexes_to_remove)
 802
 803    def filter_by_mean_resolving_power(
 804        self, ndeviations=3, plot=False, guess_pars=False
 805    ):
 806        """Filter the mass spectrum by the specified mean resolving power.
 807
 808        Parameters
 809        ----------
 810        ndeviations : float, optional
 811            The number of standard deviations to use for filtering. Defaults to 3.
 812        plot : bool, optional
 813            Whether to plot the resolving power distribution. Defaults to False.
 814        guess_pars : bool, optional
 815            Whether to guess the parameters for the Gaussian model. Defaults to False.
 816
 817        """
 818        self.check_mspeaks_warning()
 819        indexes_to_remove = MeanResolvingPowerFilter(
 820            self, ndeviations, plot, guess_pars
 821        ).main()
 822        self.filter_by_index(indexes_to_remove)
 823
 824    def filter_by_min_resolving_power(self, B, T):
 825        """Filter the mass spectrum by the specified minimum resolving power.
 826
 827        Parameters
 828        ----------
 829        B : float
 830        T : float
 831
 832        """
 833        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 834
 835        self.check_mspeaks_warning()
 836
 837        indexes_to_remove = [
 838            index
 839            for index, mspeak in enumerate(self.mspeaks)
 840            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
 841        ]
 842        self.filter_by_index(indexes_to_remove)
 843
 844    def filter_by_noise_threshold(self):
 845        """Filter the mass spectrum by the noise threshold."""
 846
 847        threshold = self.get_noise_threshold()[1][0]
 848
 849        self.check_mspeaks_warning()
 850
 851        indexes_to_remove = [
 852            index
 853            for index, mspeak in enumerate(self.mspeaks)
 854            if mspeak.abundance <= threshold
 855        ]
 856        self.filter_by_index(indexes_to_remove)
 857
 858    def find_peaks(self):
 859        """Find the peaks of the mass spectrum."""
 860        # needs to clear previous results from peak_picking
 861        self._mspeaks = list()
 862
 863        # then do peak picking
 864        self.do_peak_picking()
 865        # print("A total of %i peaks were found" % len(self._mspeaks))
 866
 867    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
 868        """Change the Kendrick base of all MSpeaks objects.
 869
 870        Parameters
 871        ----------
 872        kendrick_dict_base : dict
 873            A dictionary of the Kendrick base to change to.
 874
 875        Notes
 876        -----
 877        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
 878        """
 879        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
 880
 881        for mspeak in self.mspeaks:
 882            mspeak.change_kendrick_base(kendrick_dict_base)
 883
 884    def get_nominal_mz_first_last_indexes(self, nominal_mass):
 885        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
 886
 887        Parameters
 888        ----------
 889        nominal_mass : int
 890            The nominal mass to get the indexes for.
 891
 892        Returns
 893        -------
 894        tuple
 895            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
 896        """
 897        if self._dict_nominal_masses_indexes:
 898            if nominal_mass in self._dict_nominal_masses_indexes.keys():
 899                return (
 900                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
 901                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
 902                )
 903
 904            else:
 905                # import warnings
 906                # uncomment warn to distribution
 907                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
 908                return (0, 0)
 909        else:
 910            raise Exception(
 911                "run process_mass_spec() function before trying to access the data"
 912            )
 913
 914    def get_masses_count_by_nominal_mass(self):
 915        """Return a dictionary of the nominal masses and their counts."""
 916
 917        dict_nominal_masses_count = {}
 918
 919        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 920
 921        for nominal_mass in all_nominal_masses:
 922            if nominal_mass not in dict_nominal_masses_count:
 923                dict_nominal_masses_count[nominal_mass] = len(
 924                    list(self.get_nominal_mass_indexes(nominal_mass))
 925                )
 926
 927        return dict_nominal_masses_count
 928
 929    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
 930        """Return a dictionary of the nominal masses and their counts.
 931
 932        Parameters
 933        ----------
 934        mz_overlay : float, optional
 935            The m/z overlay to use for counting. Defaults to 0.1.
 936
 937        Returns
 938        -------
 939        dict
 940            A dictionary of the nominal masses and their counts.
 941        """
 942        dict_nominal_masses_count = {}
 943
 944        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 945
 946        for nominal_mass in all_nominal_masses:
 947            if nominal_mass not in dict_nominal_masses_count:
 948                min_mz = nominal_mass - mz_overlay
 949
 950                max_mz = nominal_mass + 1 + mz_overlay
 951
 952                indexes = indexes = where(
 953                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
 954                )
 955
 956                dict_nominal_masses_count[nominal_mass] = indexes[0].size
 957
 958        return dict_nominal_masses_count
 959
 960    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
 961        """Return the indexes of the MSpeaks objects with the specified nominal mass.
 962
 963        Parameters
 964        ----------
 965        nominal_mass : int
 966            The nominal mass to get the indexes for.
 967        overlay : float, optional
 968            The m/z overlay to use for counting. Defaults to 0.1.
 969
 970        Returns
 971        -------
 972        generator
 973            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
 974        """
 975        min_mz_to_look = nominal_mass - overlay
 976        max_mz_to_look = nominal_mass + 1 + overlay
 977
 978        return (
 979            i
 980            for i in range(len(self.mspeaks))
 981            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
 982        )
 983
 984        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
 985        # return indexes
 986
 987    def _set_nominal_masses_start_final_indexes(self):
 988        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
 989        dict_nominal_masses_indexes = {}
 990
 991        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
 992
 993        for nominal_mass in all_nominal_masses:
 994            # indexes = self.get_nominal_mass_indexes(nominal_mass)
 995            # Convert the iterator to a list to avoid multiple calls
 996            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
 997
 998            # If the list is not empty, find the first and last; otherwise, set None
 999            if indexes:
1000                first, last = indexes[0], indexes[-1]
1001            else:
1002                first = last = None
1003            # defaultvalue = None
1004            # first = last = next(indexes, defaultvalue)
1005            # for last in indexes:
1006            #    pass
1007
1008            dict_nominal_masses_indexes[nominal_mass] = (first, last)
1009
1010        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
1011
1012    def plot_centroid(self, ax=None, c="g"):
1013        """Plot the centroid data of the mass spectrum.
1014
1015        Parameters
1016        ----------
1017        ax : matplotlib.axes.Axes, optional
1018            The matplotlib axes to plot on. Defaults to None.
1019        c : str, optional
1020            The color to use for the plot. Defaults to 'g' (green).
1021
1022        Returns
1023        -------
1024        matplotlib.axes.Axes
1025            The matplotlib axes containing the plot.
1026
1027        Raises
1028        ------
1029        Exception
1030            If no centroid data is found.
1031        """
1032
1033        import matplotlib.pyplot as plt
1034
1035        if self._mspeaks:
1036            if ax is None:
1037                ax = plt.gca()
1038
1039            markerline_a, stemlines_a, baseline_a = ax.stem(
1040                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1041            )
1042
1043            plt.setp(markerline_a, "color", c, "linewidth", 2)
1044            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1045            plt.setp(baseline_a, "color", c, "linewidth", 2)
1046
1047            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1048            ax.set_ylabel("Abundance", fontsize=12)
1049            ax.tick_params(axis="both", which="major", labelsize=12)
1050
1051            ax.axes.spines["top"].set_visible(False)
1052            ax.axes.spines["right"].set_visible(False)
1053
1054            ax.get_yaxis().set_visible(False)
1055            ax.spines["left"].set_visible(False)
1056
1057        else:
1058            raise Exception("No centroid data found, please run process_mass_spec")
1059
1060        return ax
1061
1062    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1063        """Plot the profile data and noise threshold of the mass spectrum.
1064
1065        Parameters
1066        ----------
1067        ax : matplotlib.axes.Axes, optional
1068            The matplotlib axes to plot on. Defaults to None.
1069        legend : bool, optional
1070            Whether to show the legend. Defaults to False.
1071
1072        Returns
1073        -------
1074        matplotlib.axes.Axes
1075            The matplotlib axes containing the plot.
1076
1077        Raises
1078        ------
1079        Exception
1080            If no noise threshold is found.
1081        """
1082        import matplotlib.pyplot as plt
1083
1084        if self.baseline_noise_std and self.baseline_noise_std:
1085            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1086            baseline = (self.baseline_noise, self.baseline_noise)
1087
1088            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1089            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1090            x, y = self.get_noise_threshold()
1091
1092            if ax is None:
1093                ax = plt.gca()
1094
1095            ax.plot(
1096                self.mz_exp_profile,
1097                self.abundance_profile,
1098                color="green",
1099                label="Spectrum",
1100            )
1101            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1102            ax.plot(x, y, color="red", label="Noise Threshold")
1103
1104            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1105            ax.set_ylabel("Abundance", fontsize=12)
1106            ax.tick_params(axis="both", which="major", labelsize=12)
1107
1108            ax.axes.spines["top"].set_visible(False)
1109            ax.axes.spines["right"].set_visible(False)
1110
1111            ax.get_yaxis().set_visible(False)
1112            ax.spines["left"].set_visible(False)
1113            if legend:
1114                ax.legend()
1115
1116        else:
1117            raise Exception("Calculate noise threshold first")
1118
1119        return ax
1120
1121    def plot_mz_domain_profile(self, color="green", ax=None):
1122        """Plot the m/z domain profile of the mass spectrum.
1123
1124        Parameters
1125        ----------
1126        color : str, optional
1127            The color to use for the plot. Defaults to 'green'.
1128        ax : matplotlib.axes.Axes, optional
1129            The matplotlib axes to plot on. Defaults to None.
1130
1131        Returns
1132        -------
1133        matplotlib.axes.Axes
1134            The matplotlib axes containing the plot.
1135        """
1136
1137        import matplotlib.pyplot as plt
1138
1139        if ax is None:
1140            ax = plt.gca()
1141        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1142        ax.set(xlabel="m/z", ylabel="abundance")
1143
1144        return ax
1145
1146    def to_excel(self, out_file_path, write_metadata=True):
1147        """Export the mass spectrum to an Excel file.
1148
1149        Parameters
1150        ----------
1151        out_file_path : str
1152            The path to the Excel file to export to.
1153        write_metadata : bool, optional
1154            Whether to write the metadata to the Excel file. Defaults to True.
1155
1156        Returns
1157        -------
1158        None
1159        """
1160        from corems.mass_spectrum.output.export import HighResMassSpecExport
1161
1162        exportMS = HighResMassSpecExport(out_file_path, self)
1163        exportMS.to_excel(write_metadata=write_metadata)
1164
1165    def to_hdf(self, out_file_path):
1166        """Export the mass spectrum to an HDF file.
1167
1168        Parameters
1169        ----------
1170        out_file_path : str
1171            The path to the HDF file to export to.
1172
1173        Returns
1174        -------
1175        None
1176        """
1177        from corems.mass_spectrum.output.export import HighResMassSpecExport
1178
1179        exportMS = HighResMassSpecExport(out_file_path, self)
1180        exportMS.to_hdf()
1181
1182    def to_csv(self, out_file_path, write_metadata=True):
1183        """Export the mass spectrum to a CSV file.
1184
1185        Parameters
1186        ----------
1187        out_file_path : str
1188            The path to the CSV file to export to.
1189        write_metadata : bool, optional
1190            Whether to write the metadata to the CSV file. Defaults to True.
1191
1192        """
1193        from corems.mass_spectrum.output.export import HighResMassSpecExport
1194
1195        exportMS = HighResMassSpecExport(out_file_path, self)
1196        exportMS.to_csv(write_metadata=write_metadata)
1197
1198    def to_pandas(self, out_file_path, write_metadata=True):
1199        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1200
1201        Parameters
1202        ----------
1203        out_file_path : str
1204            The path to the CSV file to export to.
1205        write_metadata : bool, optional
1206            Whether to write the metadata to the CSV file. Defaults to True.
1207
1208        """
1209        from corems.mass_spectrum.output.export import HighResMassSpecExport
1210
1211        exportMS = HighResMassSpecExport(out_file_path, self)
1212        exportMS.to_pandas(write_metadata=write_metadata)
1213
1214    def to_dataframe(self, additional_columns=None):
1215        """Return the mass spectrum as a Pandas dataframe.
1216
1217        Parameters
1218        ----------
1219        additional_columns : list, optional
1220            A list of additional columns to include in the dataframe. Defaults to None.
1221            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1222
1223        Returns
1224        -------
1225        pandas.DataFrame
1226            The mass spectrum as a Pandas dataframe.
1227        """
1228        from corems.mass_spectrum.output.export import HighResMassSpecExport
1229
1230        exportMS = HighResMassSpecExport(self.filename, self)
1231        return exportMS.get_pandas_df(additional_columns=additional_columns)
1232
1233    def to_json(self):
1234        """Return the mass spectrum as a JSON file."""
1235        from corems.mass_spectrum.output.export import HighResMassSpecExport
1236
1237        exportMS = HighResMassSpecExport(self.filename, self)
1238        return exportMS.to_json()
1239
1240    def parameters_json(self):
1241        """Return the parameters of the mass spectrum as a JSON string."""
1242        from corems.mass_spectrum.output.export import HighResMassSpecExport
1243
1244        exportMS = HighResMassSpecExport(self.filename, self)
1245        return exportMS.parameters_to_json()
1246
1247    def parameters_toml(self):
1248        """Return the parameters of the mass spectrum as a TOML string."""
1249        from corems.mass_spectrum.output.export import HighResMassSpecExport
1250
1251        exportMS = HighResMassSpecExport(self.filename, self)
1252        return exportMS.parameters_to_toml()

A mass spectrum base class, stores the profile data and instrument settings.

Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty.

Parameters
  • mz_exp (array_like): The m/z values of the mass spectrum.
  • abundance (array_like): The abundance values of the mass spectrum.
  • d_params (dict): A dictionary of parameters for the mass spectrum.
  • **kwargs: Additional keyword arguments.
Attributes
  • mspeaks (list): A list of mass peaks.
  • is_calibrated (bool): Whether the mass spectrum is calibrated.
  • is_centroid (bool): Whether the mass spectrum is centroided.
  • has_frequency (bool): Whether the mass spectrum has a frequency domain.
  • calibration_order (None or int): The order of the mass spectrum's calibration.
  • calibration_points (None or ndarray): The calibration points of the mass spectrum.
  • calibration_ref_mzs (None or ndarray): The reference m/z values of the mass spectrum's calibration.
  • calibration_meas_mzs (None or ndarray): The measured m/z values of the mass spectrum's calibration.
  • calibration_RMS (None or float): The root mean square of the mass spectrum's calibration.
  • calibration_segment (None or CalibrationSegment): The calibration segment of the mass spectrum.
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
  • _dict_nominal_masses_indexes (dict): A dictionary of nominal masses and their indexes.
  • _baseline_noise (float): The baseline noise of the mass spectrum.
  • _baseline_noise_std (float): The standard deviation of the baseline noise of the mass spectrum.
  • _dynamic_range (float or None): The dynamic range of the mass spectrum.
  • _transient_settings (None or TransientSettings): The transient settings of the mass spectrum.
  • _frequency_domain (None or FrequencyDomain): The frequency domain of the mass spectrum.
  • _mz_cal_profile (None or MzCalibrationProfile): The m/z calibration profile of the mass spectrum.
Methods
  • process_mass_spec(). Main function to process the mass spectrum, including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.

See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()

MassSpecBase(mz_exp, abundance, d_params, **kwargs)
106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
107        self._abundance = array(abundance, dtype=float64)
108        self._mz_exp = array(mz_exp, dtype=float64)
109
110        # objects created after process_mass_spec() function
111        self._mspeaks = list()
112        self.mspeaks = list()
113        self._dict_nominal_masses_indexes = dict()
114        self._baseline_noise = 0.001
115        self._baseline_noise_std = 0.001
116        self._dynamic_range = None
117        # set to None: initialization occurs inside subclass MassSpecfromFreq
118        self._transient_settings = None
119        self._frequency_domain = None
120        self._mz_cal_profile = None
121        self.is_calibrated = False
122
123        self._set_parameters_objects(d_params)
124        self._init_settings()
125
126        self.is_centroid = False
127        self.has_frequency = False
128
129        self.calibration_order = None
130        self.calibration_points = None
131        self.calibration_ref_mzs = None
132        self.calibration_meas_mzs = None
133        self.calibration_RMS = None
134        self.calibration_segment = None
135        self.calibration_raw_error_median = None
136        self.calibration_raw_error_stdev = None
mspeaks
is_calibrated
is_centroid
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
def set_indexes(self, list_indexes):
148    def set_indexes(self, list_indexes):
149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
150
151        Parameters
152        ----------
153        list_indexes : list of int
154            A list of integers representing the indexes of the MSpeaks to iterate over.
155
156        """
157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
158
159        for i, mspeak in enumerate(self.mspeaks):
160            mspeak.index = i
161
162        self._set_nominal_masses_start_final_indexes()

Set the mass spectrum to iterate over only the selected MSpeaks indexes.

Parameters
  • list_indexes (list of int): A list of integers representing the indexes of the MSpeaks to iterate over.
def reset_indexes(self):
164    def reset_indexes(self):
165        """Reset the mass spectrum to iterate over all MSpeaks objects.
166
167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
169
170        """
171        self.mspeaks = self._mspeaks
172
173        for i, mspeak in enumerate(self.mspeaks):
174            mspeak.index = i
175
176        self._set_nominal_masses_start_final_indexes()

Reset the mass spectrum to iterate over all MSpeaks objects.

This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.

def add_mspeak( self, ion_charge, mz_exp, abundance, resolving_power, signal_to_noise, massspec_indexes, exp_freq=None, ms_parent=None):
178    def add_mspeak(
179        self,
180        ion_charge,
181        mz_exp,
182        abundance,
183        resolving_power,
184        signal_to_noise,
185        massspec_indexes,
186        exp_freq=None,
187        ms_parent=None,
188    ):
189        """Add a new MSPeak object to the MassSpectrum object.
190
191        Parameters
192        ----------
193        ion_charge : int
194            The ion charge of the MSPeak.
195        mz_exp : float
196            The experimental m/z value of the MSPeak.
197        abundance : float
198            The abundance of the MSPeak.
199        resolving_power : float
200            The resolving power of the MSPeak.
201        signal_to_noise : float
202            The signal-to-noise ratio of the MSPeak.
203        massspec_indexes : list
204            A list of indexes of the MSPeak in the MassSpectrum object.
205        exp_freq : float, optional
206            The experimental frequency of the MSPeak. Defaults to None.
207        ms_parent : MSParent, optional
208            The MSParent object associated with the MSPeak. Defaults to None.
209        """
210        mspeak = MSPeak(
211            ion_charge,
212            mz_exp,
213            abundance,
214            resolving_power,
215            signal_to_noise,
216            massspec_indexes,
217            len(self._mspeaks),
218            exp_freq=exp_freq,
219            ms_parent=ms_parent,
220        )
221
222        self._mspeaks.append(mspeak)

Add a new MSPeak object to the MassSpectrum object.

Parameters
  • ion_charge (int): The ion charge of the MSPeak.
  • mz_exp (float): The experimental m/z value of the MSPeak.
  • abundance (float): The abundance of the MSPeak.
  • resolving_power (float): The resolving power of the MSPeak.
  • signal_to_noise (float): The signal-to-noise ratio of the MSPeak.
  • massspec_indexes (list): A list of indexes of the MSPeak in the MassSpectrum object.
  • exp_freq (float, optional): The experimental frequency of the MSPeak. Defaults to None.
  • ms_parent (MSParent, optional): The MSParent object associated with the MSPeak. Defaults to None.
def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
292
293        Parameters
294        ----------
295        Aterm : float
296            The A-term calibration coefficient.
297        Bterm : float
298            The B-term calibration coefficient.
299        C : float
300            The C-term calibration coefficient.
301        fas : float, optional
302            The frequency amplitude scaling factor. Default is 0.
303        """
304        self._calibration_terms = (Aterm, Bterm, C)
305
306        self._mz_exp = self._f_to_mz()
307        self._abundance = self._abundance
308        self.find_peaks()
309        self.reset_indexes()

Reset calibration terms and recalculate the mass-to-charge ratio and abundance.

Parameters
  • Aterm (float): The A-term calibration coefficient.
  • Bterm (float): The B-term calibration coefficient.
  • C (float): The C-term calibration coefficient.
  • fas (float, optional): The frequency amplitude scaling factor. Default is 0.
def clear_molecular_formulas(self):
311    def clear_molecular_formulas(self):
312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
313
314        Returns
315        -------
316        numpy.ndarray
317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
318        """
319        self.check_mspeaks()
320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])

Clear the molecular formulas for all mspeaks in the MassSpectrum.

Returns
  • numpy.ndarray: An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
def process_mass_spec(self, keep_profile=True):
322    def process_mass_spec(self, keep_profile=True):
323        """Process the mass spectrum.
324
325        Parameters
326        ----------
327        keep_profile : bool, optional
328            Whether to keep the profile data after processing. Defaults to True.
329
330        Notes
331        -----
332        This method does the following:
333        - calculates the noise threshold
334        - does peak picking (creates mspeak_objs)
335        - resets the mspeak_obj indexes
336        """
337
338        # if runned mannually make sure to rerun filter_by_noise_threshold
339        # calculates noise threshold
340        # do peak picking( create mspeak_objs)
341        # reset mspeak_obj the indexes
342
343        self.cal_noise_threshold()
344
345        self.find_peaks()
346        self.reset_indexes()
347
348        if self.mspeaks:
349            self._dynamic_range = self.max_abundance / self.min_abundance
350        else:
351            self._dynamic_range = 0
352        if not keep_profile:
353            self._abundance *= 0
354            self._mz_exp *= 0

Process the mass spectrum.

Parameters
  • keep_profile (bool, optional): Whether to keep the profile data after processing. Defaults to True.
Notes

This method does the following:

  • calculates the noise threshold
  • does peak picking (creates mspeak_objs)
  • resets the mspeak_obj indexes
def cal_noise_threshold(self):
356    def cal_noise_threshold(self):
357        """Calculate the noise threshold of the mass spectrum."""
358
359        if self.label == Labels.simulated_profile:
360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
361
362        if self.settings.noise_threshold_method == "log":
363            self._baseline_noise, self._baseline_noise_std = (
364                self.run_log_noise_threshold_calc()
365            )
366
367        else:
368            self._baseline_noise, self._baseline_noise_std = (
369                self.run_noise_threshold_calc()
370            )

Calculate the noise threshold of the mass spectrum.

parameters

Return the parameters of the mass spectrum.

def set_parameter_from_json(self, parameters_path):
381    def set_parameter_from_json(self, parameters_path):
382        """Set the parameters of the mass spectrum from a JSON file.
383
384        Parameters
385        ----------
386        parameters_path : str
387            The path to the JSON file containing the parameters.
388        """
389        load_and_set_parameters_ms(self, parameters_path=parameters_path)

Set the parameters of the mass spectrum from a JSON file.

Parameters
  • parameters_path (str): The path to the JSON file containing the parameters.
def set_parameter_from_toml(self, parameters_path):
391    def set_parameter_from_toml(self, parameters_path):
392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
mspeaks_settings

Return the MS peak settings of the mass spectrum.

settings

Return the settings of the mass spectrum.

molecular_search_settings

Return the molecular search settings of the mass spectrum.

mz_cal_profile

Return the calibrated m/z profile of the mass spectrum.

mz_cal

Return the calibrated m/z values of the mass spectrum.

mz_exp

Return the experimental m/z values of the mass spectrum.

freq_exp_profile

Return the experimental frequency profile of the mass spectrum.

freq_exp_pp

Return the experimental frequency values of the mass spectrum that are used for peak picking.

mz_exp_profile

Return the experimental m/z profile of the mass spectrum.

mz_exp_pp

Return the experimental m/z values of the mass spectrum that are used for peak picking.

abundance_profile

Return the abundance profile of the mass spectrum.

abundance_profile_pp

Return the abundance profile of the mass spectrum that is used for peak picking.

abundance

Return the abundance values of the mass spectrum.

def freq_exp(self):
518    def freq_exp(self):
519        """Return the experimental frequency values of the mass spectrum."""
520        self.check_mspeaks()
521        return array([mspeak.freq_exp for mspeak in self.mspeaks])

Return the experimental frequency values of the mass spectrum.

resolving_power

Return the resolving power values of the mass spectrum.

signal_to_noise
nominal_mz

Return the nominal m/z values of the mass spectrum.

def get_mz_and_abundance_peaks_tuples(self):
542    def get_mz_and_abundance_peaks_tuples(self):
543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
544        self.check_mspeaks()
545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]

Return a list of tuples containing the m/z and abundance values of the mass spectrum.

kmd

Return the Kendrick mass defect values of the mass spectrum.

kendrick_mass

Return the Kendrick mass values of the mass spectrum.

max_mz_exp

Return the maximum experimental m/z value of the mass spectrum.

min_mz_exp

Return the minimum experimental m/z value of the mass spectrum.

max_abundance

Return the maximum abundance value of the mass spectrum.

max_signal_to_noise

Return the maximum signal-to-noise ratio of the mass spectrum.

most_abundant_mspeak

Return the most abundant MSpeak object of the mass spectrum.

min_abundance

Return the minimum abundance value of the mass spectrum.

dynamic_range

Return the dynamic range of the mass spectrum.

baseline_noise

Return the baseline noise of the mass spectrum.

baseline_noise_std

Return the standard deviation of the baseline noise of the mass spectrum.

Aterm

Return the A-term calibration coefficient of the mass spectrum.

Bterm

Return the B-term calibration coefficient of the mass spectrum.

Cterm

Return the C-term calibration coefficient of the mass spectrum.

filename

Return the filename of the mass spectrum.

dir_location

Return the directory location of the mass spectrum.

def sort_by_mz(self):
638    def sort_by_mz(self):
639        """Sort the mass spectrum by m/z values."""
640        return sorted(self, key=lambda m: m.mz_exp)

Sort the mass spectrum by m/z values.

def sort_by_abundance(self, reverse=False):
642    def sort_by_abundance(self, reverse=False):
643        """Sort the mass spectrum by abundance values."""
644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)

Sort the mass spectrum by abundance values.

tic

Return the total ion current of the mass spectrum.

def check_mspeaks_warning(self):
651    def check_mspeaks_warning(self):
652        """Check if the mass spectrum has MSpeaks objects.
653
654        Raises
655        ------
656        Warning
657            If the mass spectrum has no MSpeaks objects.
658        """
659        import warnings
660
661        if self.mspeaks:
662            pass
663        else:
664            warnings.warn("mspeaks list is empty, continuing without filtering data")

Check if the mass spectrum has MSpeaks objects.

Raises
  • Warning: If the mass spectrum has no MSpeaks objects.
def check_mspeaks(self):
666    def check_mspeaks(self):
667        """Check if the mass spectrum has MSpeaks objects.
668
669        Raises
670        ------
671        Exception
672            If the mass spectrum has no MSpeaks objects.
673        """
674        if self.mspeaks:
675            pass
676        else:
677            raise Exception(
678                "mspeaks list is empty, please run process_mass_spec() first"
679            )

Check if the mass spectrum has MSpeaks objects.

Raises
  • Exception: If the mass spectrum has no MSpeaks objects.
def remove_assignment_by_index(self, indexes):
681    def remove_assignment_by_index(self, indexes):
682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
683
684        Parameters
685        ----------
686        indexes : list of int
687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
688        """
689        for i in indexes:
690            self.mspeaks[i].clear_molecular_formulas()

Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.

Parameters
  • indexes (list of int): A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
def filter_by_index(self, list_indexes):
692    def filter_by_index(self, list_indexes):
693        """Filter the mass spectrum by the specified indexes.
694
695        Parameters
696        ----------
697        list_indexes : list of int
698            A list of indexes of the MSpeaks objects to drop.
699
700        """
701
702        self.mspeaks = [
703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
704        ]
705
706        for i, mspeak in enumerate(self.mspeaks):
707            mspeak.index = i
708
709        self._set_nominal_masses_start_final_indexes()

Filter the mass spectrum by the specified indexes.

Parameters
  • list_indexes (list of int): A list of indexes of the MSpeaks objects to drop.
def filter_by_mz(self, min_mz, max_mz):
711    def filter_by_mz(self, min_mz, max_mz):
712        """Filter the mass spectrum by the specified m/z range.
713
714        Parameters
715        ----------
716        min_mz : float
717            The minimum m/z value to keep.
718        max_mz : float
719            The maximum m/z value to keep.
720
721        """
722        self.check_mspeaks_warning()
723        indexes = [
724            index
725            for index, mspeak in enumerate(self.mspeaks)
726            if not min_mz <= mspeak.mz_exp <= max_mz
727        ]
728        self.filter_by_index(indexes)

Filter the mass spectrum by the specified m/z range.

Parameters
  • min_mz (float): The minimum m/z value to keep.
  • max_mz (float): The maximum m/z value to keep.
def filter_by_s2n(self, min_s2n, max_s2n=False):
730    def filter_by_s2n(self, min_s2n, max_s2n=False):
731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
732
733        Parameters
734        ----------
735        min_s2n : float
736            The minimum signal-to-noise ratio to keep.
737        max_s2n : float, optional
738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
739
740        """
741        self.check_mspeaks_warning()
742        if max_s2n:
743            indexes = [
744                index
745                for index, mspeak in enumerate(self.mspeaks)
746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
747            ]
748        else:
749            indexes = [
750                index
751                for index, mspeak in enumerate(self.mspeaks)
752                if mspeak.signal_to_noise <= min_s2n
753            ]
754        self.filter_by_index(indexes)

Filter the mass spectrum by the specified signal-to-noise ratio range.

Parameters
  • min_s2n (float): The minimum signal-to-noise ratio to keep.
  • max_s2n (float, optional): The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
def filter_by_abundance(self, min_abund, max_abund=False):
756    def filter_by_abundance(self, min_abund, max_abund=False):
757        """Filter the mass spectrum by the specified abundance range.
758
759        Parameters
760        ----------
761        min_abund : float
762            The minimum abundance to keep.
763        max_abund : float, optional
764            The maximum abundance to keep. Defaults to False (no maximum).
765
766        """
767        self.check_mspeaks_warning()
768        if max_abund:
769            indexes = [
770                index
771                for index, mspeak in enumerate(self.mspeaks)
772                if not min_abund <= mspeak.abundance <= max_abund
773            ]
774        else:
775            indexes = [
776                index
777                for index, mspeak in enumerate(self.mspeaks)
778                if mspeak.abundance <= min_abund
779            ]
780        self.filter_by_index(indexes)

Filter the mass spectrum by the specified abundance range.

Parameters
  • min_abund (float): The minimum abundance to keep.
  • max_abund (float, optional): The maximum abundance to keep. Defaults to False (no maximum).
def filter_by_max_resolving_power(self, B, T):
782    def filter_by_max_resolving_power(self, B, T):
783        """Filter the mass spectrum by the specified maximum resolving power.
784
785        Parameters
786        ----------
787        B : float
788        T : float
789
790        """
791
792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
793
794        self.check_mspeaks_warning()
795
796        indexes_to_remove = [
797            index
798            for index, mspeak in enumerate(self.mspeaks)
799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
800        ]
801        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the specified maximum resolving power.

Parameters
  • B (float):

  • T (float):

def filter_by_mean_resolving_power(self, ndeviations=3, plot=False, guess_pars=False):
803    def filter_by_mean_resolving_power(
804        self, ndeviations=3, plot=False, guess_pars=False
805    ):
806        """Filter the mass spectrum by the specified mean resolving power.
807
808        Parameters
809        ----------
810        ndeviations : float, optional
811            The number of standard deviations to use for filtering. Defaults to 3.
812        plot : bool, optional
813            Whether to plot the resolving power distribution. Defaults to False.
814        guess_pars : bool, optional
815            Whether to guess the parameters for the Gaussian model. Defaults to False.
816
817        """
818        self.check_mspeaks_warning()
819        indexes_to_remove = MeanResolvingPowerFilter(
820            self, ndeviations, plot, guess_pars
821        ).main()
822        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the specified mean resolving power.

Parameters
  • ndeviations (float, optional): The number of standard deviations to use for filtering. Defaults to 3.
  • plot (bool, optional): Whether to plot the resolving power distribution. Defaults to False.
  • guess_pars (bool, optional): Whether to guess the parameters for the Gaussian model. Defaults to False.
def filter_by_min_resolving_power(self, B, T):
824    def filter_by_min_resolving_power(self, B, T):
825        """Filter the mass spectrum by the specified minimum resolving power.
826
827        Parameters
828        ----------
829        B : float
830        T : float
831
832        """
833        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
834
835        self.check_mspeaks_warning()
836
837        indexes_to_remove = [
838            index
839            for index, mspeak in enumerate(self.mspeaks)
840            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
841        ]
842        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the specified minimum resolving power.

Parameters
  • B (float):

  • T (float):

def filter_by_noise_threshold(self):
844    def filter_by_noise_threshold(self):
845        """Filter the mass spectrum by the noise threshold."""
846
847        threshold = self.get_noise_threshold()[1][0]
848
849        self.check_mspeaks_warning()
850
851        indexes_to_remove = [
852            index
853            for index, mspeak in enumerate(self.mspeaks)
854            if mspeak.abundance <= threshold
855        ]
856        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the noise threshold.

def find_peaks(self):
858    def find_peaks(self):
859        """Find the peaks of the mass spectrum."""
860        # needs to clear previous results from peak_picking
861        self._mspeaks = list()
862
863        # then do peak picking
864        self.do_peak_picking()
865        # print("A total of %i peaks were found" % len(self._mspeaks))

Find the peaks of the mass spectrum.

def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
867    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
868        """Change the Kendrick base of all MSpeaks objects.
869
870        Parameters
871        ----------
872        kendrick_dict_base : dict
873            A dictionary of the Kendrick base to change to.
874
875        Notes
876        -----
877        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
878        """
879        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
880
881        for mspeak in self.mspeaks:
882            mspeak.change_kendrick_base(kendrick_dict_base)

Change the Kendrick base of all MSpeaks objects.

Parameters
  • kendrick_dict_base (dict): A dictionary of the Kendrick base to change to.
Notes

Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc

def get_nominal_mz_first_last_indexes(self, nominal_mass):
884    def get_nominal_mz_first_last_indexes(self, nominal_mass):
885        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
886
887        Parameters
888        ----------
889        nominal_mass : int
890            The nominal mass to get the indexes for.
891
892        Returns
893        -------
894        tuple
895            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
896        """
897        if self._dict_nominal_masses_indexes:
898            if nominal_mass in self._dict_nominal_masses_indexes.keys():
899                return (
900                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
901                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
902                )
903
904            else:
905                # import warnings
906                # uncomment warn to distribution
907                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
908                return (0, 0)
909        else:
910            raise Exception(
911                "run process_mass_spec() function before trying to access the data"
912            )

Return the first and last indexes of the MSpeaks objects with the specified nominal mass.

Parameters
  • nominal_mass (int): The nominal mass to get the indexes for.
Returns
  • tuple: A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
def get_masses_count_by_nominal_mass(self):
914    def get_masses_count_by_nominal_mass(self):
915        """Return a dictionary of the nominal masses and their counts."""
916
917        dict_nominal_masses_count = {}
918
919        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
920
921        for nominal_mass in all_nominal_masses:
922            if nominal_mass not in dict_nominal_masses_count:
923                dict_nominal_masses_count[nominal_mass] = len(
924                    list(self.get_nominal_mass_indexes(nominal_mass))
925                )
926
927        return dict_nominal_masses_count

Return a dictionary of the nominal masses and their counts.

def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
929    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
930        """Return a dictionary of the nominal masses and their counts.
931
932        Parameters
933        ----------
934        mz_overlay : float, optional
935            The m/z overlay to use for counting. Defaults to 0.1.
936
937        Returns
938        -------
939        dict
940            A dictionary of the nominal masses and their counts.
941        """
942        dict_nominal_masses_count = {}
943
944        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
945
946        for nominal_mass in all_nominal_masses:
947            if nominal_mass not in dict_nominal_masses_count:
948                min_mz = nominal_mass - mz_overlay
949
950                max_mz = nominal_mass + 1 + mz_overlay
951
952                indexes = indexes = where(
953                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
954                )
955
956                dict_nominal_masses_count[nominal_mass] = indexes[0].size
957
958        return dict_nominal_masses_count

Return a dictionary of the nominal masses and their counts.

Parameters
  • mz_overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
  • dict: A dictionary of the nominal masses and their counts.
def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
960    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
961        """Return the indexes of the MSpeaks objects with the specified nominal mass.
962
963        Parameters
964        ----------
965        nominal_mass : int
966            The nominal mass to get the indexes for.
967        overlay : float, optional
968            The m/z overlay to use for counting. Defaults to 0.1.
969
970        Returns
971        -------
972        generator
973            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
974        """
975        min_mz_to_look = nominal_mass - overlay
976        max_mz_to_look = nominal_mass + 1 + overlay
977
978        return (
979            i
980            for i in range(len(self.mspeaks))
981            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
982        )
983
984        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
985        # return indexes

Return the indexes of the MSpeaks objects with the specified nominal mass.

Parameters
  • nominal_mass (int): The nominal mass to get the indexes for.
  • overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
  • generator: A generator of the indexes of the MSpeaks objects with the specified nominal mass.
def plot_centroid(self, ax=None, c='g'):
1012    def plot_centroid(self, ax=None, c="g"):
1013        """Plot the centroid data of the mass spectrum.
1014
1015        Parameters
1016        ----------
1017        ax : matplotlib.axes.Axes, optional
1018            The matplotlib axes to plot on. Defaults to None.
1019        c : str, optional
1020            The color to use for the plot. Defaults to 'g' (green).
1021
1022        Returns
1023        -------
1024        matplotlib.axes.Axes
1025            The matplotlib axes containing the plot.
1026
1027        Raises
1028        ------
1029        Exception
1030            If no centroid data is found.
1031        """
1032
1033        import matplotlib.pyplot as plt
1034
1035        if self._mspeaks:
1036            if ax is None:
1037                ax = plt.gca()
1038
1039            markerline_a, stemlines_a, baseline_a = ax.stem(
1040                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1041            )
1042
1043            plt.setp(markerline_a, "color", c, "linewidth", 2)
1044            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1045            plt.setp(baseline_a, "color", c, "linewidth", 2)
1046
1047            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1048            ax.set_ylabel("Abundance", fontsize=12)
1049            ax.tick_params(axis="both", which="major", labelsize=12)
1050
1051            ax.axes.spines["top"].set_visible(False)
1052            ax.axes.spines["right"].set_visible(False)
1053
1054            ax.get_yaxis().set_visible(False)
1055            ax.spines["left"].set_visible(False)
1056
1057        else:
1058            raise Exception("No centroid data found, please run process_mass_spec")
1059
1060        return ax

Plot the centroid data of the mass spectrum.

Parameters
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
  • c (str, optional): The color to use for the plot. Defaults to 'g' (green).
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
  • Exception: If no centroid data is found.
def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1062    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1063        """Plot the profile data and noise threshold of the mass spectrum.
1064
1065        Parameters
1066        ----------
1067        ax : matplotlib.axes.Axes, optional
1068            The matplotlib axes to plot on. Defaults to None.
1069        legend : bool, optional
1070            Whether to show the legend. Defaults to False.
1071
1072        Returns
1073        -------
1074        matplotlib.axes.Axes
1075            The matplotlib axes containing the plot.
1076
1077        Raises
1078        ------
1079        Exception
1080            If no noise threshold is found.
1081        """
1082        import matplotlib.pyplot as plt
1083
1084        if self.baseline_noise_std and self.baseline_noise_std:
1085            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1086            baseline = (self.baseline_noise, self.baseline_noise)
1087
1088            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1089            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1090            x, y = self.get_noise_threshold()
1091
1092            if ax is None:
1093                ax = plt.gca()
1094
1095            ax.plot(
1096                self.mz_exp_profile,
1097                self.abundance_profile,
1098                color="green",
1099                label="Spectrum",
1100            )
1101            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1102            ax.plot(x, y, color="red", label="Noise Threshold")
1103
1104            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1105            ax.set_ylabel("Abundance", fontsize=12)
1106            ax.tick_params(axis="both", which="major", labelsize=12)
1107
1108            ax.axes.spines["top"].set_visible(False)
1109            ax.axes.spines["right"].set_visible(False)
1110
1111            ax.get_yaxis().set_visible(False)
1112            ax.spines["left"].set_visible(False)
1113            if legend:
1114                ax.legend()
1115
1116        else:
1117            raise Exception("Calculate noise threshold first")
1118
1119        return ax

Plot the profile data and noise threshold of the mass spectrum.

Parameters
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
  • legend (bool, optional): Whether to show the legend. Defaults to False.
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
  • Exception: If no noise threshold is found.
def plot_mz_domain_profile(self, color='green', ax=None):
1121    def plot_mz_domain_profile(self, color="green", ax=None):
1122        """Plot the m/z domain profile of the mass spectrum.
1123
1124        Parameters
1125        ----------
1126        color : str, optional
1127            The color to use for the plot. Defaults to 'green'.
1128        ax : matplotlib.axes.Axes, optional
1129            The matplotlib axes to plot on. Defaults to None.
1130
1131        Returns
1132        -------
1133        matplotlib.axes.Axes
1134            The matplotlib axes containing the plot.
1135        """
1136
1137        import matplotlib.pyplot as plt
1138
1139        if ax is None:
1140            ax = plt.gca()
1141        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1142        ax.set(xlabel="m/z", ylabel="abundance")
1143
1144        return ax

Plot the m/z domain profile of the mass spectrum.

Parameters
  • color (str, optional): The color to use for the plot. Defaults to 'green'.
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
def to_excel(self, out_file_path, write_metadata=True):
1146    def to_excel(self, out_file_path, write_metadata=True):
1147        """Export the mass spectrum to an Excel file.
1148
1149        Parameters
1150        ----------
1151        out_file_path : str
1152            The path to the Excel file to export to.
1153        write_metadata : bool, optional
1154            Whether to write the metadata to the Excel file. Defaults to True.
1155
1156        Returns
1157        -------
1158        None
1159        """
1160        from corems.mass_spectrum.output.export import HighResMassSpecExport
1161
1162        exportMS = HighResMassSpecExport(out_file_path, self)
1163        exportMS.to_excel(write_metadata=write_metadata)

Export the mass spectrum to an Excel file.

Parameters
  • out_file_path (str): The path to the Excel file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the Excel file. Defaults to True.
Returns
  • None
def to_hdf(self, out_file_path):
1165    def to_hdf(self, out_file_path):
1166        """Export the mass spectrum to an HDF file.
1167
1168        Parameters
1169        ----------
1170        out_file_path : str
1171            The path to the HDF file to export to.
1172
1173        Returns
1174        -------
1175        None
1176        """
1177        from corems.mass_spectrum.output.export import HighResMassSpecExport
1178
1179        exportMS = HighResMassSpecExport(out_file_path, self)
1180        exportMS.to_hdf()

Export the mass spectrum to an HDF file.

Parameters
  • out_file_path (str): The path to the HDF file to export to.
Returns
  • None
def to_csv(self, out_file_path, write_metadata=True):
1182    def to_csv(self, out_file_path, write_metadata=True):
1183        """Export the mass spectrum to a CSV file.
1184
1185        Parameters
1186        ----------
1187        out_file_path : str
1188            The path to the CSV file to export to.
1189        write_metadata : bool, optional
1190            Whether to write the metadata to the CSV file. Defaults to True.
1191
1192        """
1193        from corems.mass_spectrum.output.export import HighResMassSpecExport
1194
1195        exportMS = HighResMassSpecExport(out_file_path, self)
1196        exportMS.to_csv(write_metadata=write_metadata)

Export the mass spectrum to a CSV file.

Parameters
  • out_file_path (str): The path to the CSV file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
def to_pandas(self, out_file_path, write_metadata=True):
1198    def to_pandas(self, out_file_path, write_metadata=True):
1199        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1200
1201        Parameters
1202        ----------
1203        out_file_path : str
1204            The path to the CSV file to export to.
1205        write_metadata : bool, optional
1206            Whether to write the metadata to the CSV file. Defaults to True.
1207
1208        """
1209        from corems.mass_spectrum.output.export import HighResMassSpecExport
1210
1211        exportMS = HighResMassSpecExport(out_file_path, self)
1212        exportMS.to_pandas(write_metadata=write_metadata)

Export the mass spectrum to a Pandas dataframe with pkl extension.

Parameters
  • out_file_path (str): The path to the CSV file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
def to_dataframe(self, additional_columns=None):
1214    def to_dataframe(self, additional_columns=None):
1215        """Return the mass spectrum as a Pandas dataframe.
1216
1217        Parameters
1218        ----------
1219        additional_columns : list, optional
1220            A list of additional columns to include in the dataframe. Defaults to None.
1221            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1222
1223        Returns
1224        -------
1225        pandas.DataFrame
1226            The mass spectrum as a Pandas dataframe.
1227        """
1228        from corems.mass_spectrum.output.export import HighResMassSpecExport
1229
1230        exportMS = HighResMassSpecExport(self.filename, self)
1231        return exportMS.get_pandas_df(additional_columns=additional_columns)

Return the mass spectrum as a Pandas dataframe.

Parameters
  • additional_columns (list, optional): A list of additional columns to include in the dataframe. Defaults to None. Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
Returns
  • pandas.DataFrame: The mass spectrum as a Pandas dataframe.
def to_json(self):
1233    def to_json(self):
1234        """Return the mass spectrum as a JSON file."""
1235        from corems.mass_spectrum.output.export import HighResMassSpecExport
1236
1237        exportMS = HighResMassSpecExport(self.filename, self)
1238        return exportMS.to_json()

Return the mass spectrum as a JSON file.

def parameters_json(self):
1240    def parameters_json(self):
1241        """Return the parameters of the mass spectrum as a JSON string."""
1242        from corems.mass_spectrum.output.export import HighResMassSpecExport
1243
1244        exportMS = HighResMassSpecExport(self.filename, self)
1245        return exportMS.parameters_to_json()

Return the parameters of the mass spectrum as a JSON string.

def parameters_toml(self):
1247    def parameters_toml(self):
1248        """Return the parameters of the mass spectrum as a TOML string."""
1249        from corems.mass_spectrum.output.export import HighResMassSpecExport
1250
1251        exportMS = HighResMassSpecExport(self.filename, self)
1252        return exportMS.parameters_to_toml()

Return the parameters of the mass spectrum as a TOML string.

class MassSpecProfile(MassSpecBase):
1255class MassSpecProfile(MassSpecBase):
1256    """A mass spectrum class when the entry point is on profile format
1257
1258    Notes
1259    -----
1260    Stores the profile data and instrument settings.
1261    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
1262    _mspeaks is populated under the hood by calling process_mass_spec method.
1263    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
1264
1265    Parameters
1266    ----------
1267    data_dict : dict
1268        A dictionary containing the profile data.
1269    d_params : dict{'str': float, int or str}
1270        contains the instrument settings and processing settings
1271    auto_process : bool, optional
1272        Whether to automatically process the mass spectrum. Defaults to True.
1273
1274
1275    Attributes
1276    ----------
1277    _abundance : ndarray
1278        The abundance values of the mass spectrum.
1279    _mz_exp : ndarray
1280        The m/z values of the mass spectrum.
1281    _mspeaks : list
1282        A list of mass peaks.
1283
1284    Methods
1285    ----------
1286    * process_mass_spec(). Process the mass spectrum.
1287
1288    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
1289    """
1290
1291    def __init__(self, data_dict, d_params, auto_process=True):
1292        # print(data_dict.keys())
1293        super().__init__(
1294            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1295        )
1296
1297        if auto_process:
1298            self.process_mass_spec()

A mass spectrum class when the entry point is on profile format

Notes

Stores the profile data and instrument settings. Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().

Parameters
  • data_dict (dict): A dictionary containing the profile data.
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
Methods
  • process_mass_spec(). Process the mass spectrum.

see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()

MassSpecProfile(data_dict, d_params, auto_process=True)
1291    def __init__(self, data_dict, d_params, auto_process=True):
1292        # print(data_dict.keys())
1293        super().__init__(
1294            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1295        )
1296
1297        if auto_process:
1298            self.process_mass_spec()
Inherited Members
MassSpecBase
mspeaks
is_calibrated
is_centroid
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
process_mass_spec
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_profile
mz_exp_pp
abundance_profile
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
tic
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecfromFreq(MassSpecBase):
1301class MassSpecfromFreq(MassSpecBase):
1302    """A mass spectrum class when data entry is on frequency domain
1303
1304    Notes
1305    -----
1306    - Transform to m/z based on the settings stored at d_params
1307    - Stores the profile data and instrument settings
1308    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1309    - _mspeaks is populated under the hood by calling process_mass_spec method
1310    - iteration is null if _mspeaks is empty
1311
1312    Parameters
1313    ----------
1314    frequency_domain : list(float)
1315        all datapoints in frequency domain in Hz
1316    magnitude :  frequency_domain : list(float)
1317        all datapoints in for magnitude of each frequency datapoint
1318    d_params : dict{'str': float, int or str}
1319        contains the instrument settings and processing settings
1320    auto_process : bool, optional
1321        Whether to automatically process the mass spectrum. Defaults to True.
1322    keep_profile : bool, optional
1323        Whether to keep the profile data. Defaults to True.
1324
1325    Attributes
1326    ----------
1327    has_frequency : bool
1328        Whether the mass spectrum has frequency data.
1329    _frequency_domain : list(float)
1330        Frequency domain in Hz
1331    label : str
1332        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
1333    _abundance : ndarray
1334        The abundance values of the mass spectrum.
1335    _mz_exp : ndarray
1336        The m/z values of the mass spectrum.
1337    _mspeaks : list
1338        A list of mass peaks.
1339    See Also: all the attributes of MassSpecBase class
1340
1341    Methods
1342    ----------
1343    * _set_mz_domain().
1344        calculates the m_z based on the setting of d_params
1345    * process_mass_spec().  Process the mass spectrum.
1346
1347    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
1348    """
1349
1350    def __init__(
1351        self,
1352        frequency_domain,
1353        magnitude,
1354        d_params,
1355        auto_process=True,
1356        keep_profile=True,
1357    ):
1358        super().__init__(None, magnitude, d_params)
1359
1360        self._frequency_domain = frequency_domain
1361        self.has_frequency = True
1362        self._set_mz_domain()
1363        self._sort_mz_domain()
1364
1365        self.magnetron_frequency = None
1366        self.magnetron_frequency_sigma = None
1367
1368        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1369
1370        if auto_process:
1371            self.process_mass_spec(keep_profile=keep_profile)
1372
1373    def _sort_mz_domain(self):
1374        """Sort the mass spectrum by m/z values."""
1375
1376        if self._mz_exp[0] > self._mz_exp[-1]:
1377            self._mz_exp = self._mz_exp[::-1]
1378            self._abundance = self._abundance[::-1]
1379            self._frequency_domain = self._frequency_domain[::-1]
1380
1381    def _set_mz_domain(self):
1382        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
1383        if self.label == Labels.bruker_frequency:
1384            self._mz_exp = self._f_to_mz_bruker()
1385
1386        else:
1387            self._mz_exp = self._f_to_mz()
1388
1389    @property
1390    def transient_settings(self):
1391        """Return the transient settings of the mass spectrum."""
1392        return self.parameters.transient
1393
1394    @transient_settings.setter
1395    def transient_settings(self, instance_TransientSetting):
1396        self.parameters.transient = instance_TransientSetting
1397
1398    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1399        """Calculates the magnetron frequency of the mass spectrum.
1400
1401        Parameters
1402        ----------
1403        max_magnetron_freq : float, optional
1404            The maximum magnetron frequency. Defaults to 50.
1405        magnetron_freq_bins : int, optional
1406            The number of bins to use for the histogram. Defaults to 300.
1407
1408        Returns
1409        -------
1410        None
1411
1412        Notes
1413        -----
1414        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1415        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1416        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1417        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1418        """
1419        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1420        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1421
1422        freq_hist = histogram(
1423            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1424            bins=magnetron_freq_bins,
1425        )
1426
1427        mod = GaussianModel()
1428        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1429        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1430        self.magnetron_frequency = out.best_values["center"]
1431        self.magnetron_frequency_sigma = out.best_values["sigma"]

A mass spectrum class when data entry is on frequency domain

Notes
  • Transform to m/z based on the settings stored at d_params
  • Stores the profile data and instrument settings
  • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
  • _mspeaks is populated under the hood by calling process_mass_spec method
  • iteration is null if _mspeaks is empty
Parameters
  • frequency_domain (list(float)): all datapoints in frequency domain in Hz
  • magnitude : frequency_domain (list(float)): all datapoints in for magnitude of each frequency datapoint
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
  • keep_profile (bool, optional): Whether to keep the profile data. Defaults to True.
Attributes
  • has_frequency (bool): Whether the mass spectrum has frequency data.
  • _frequency_domain (list(float)): Frequency domain in Hz
  • label (str): store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
  • See Also (all the attributes of MassSpecBase class):
Methods
  • _set_mz_domain(). calculates the m_z based on the setting of d_params
  • process_mass_spec(). Process the mass spectrum.

see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()

MassSpecfromFreq( frequency_domain, magnitude, d_params, auto_process=True, keep_profile=True)
1350    def __init__(
1351        self,
1352        frequency_domain,
1353        magnitude,
1354        d_params,
1355        auto_process=True,
1356        keep_profile=True,
1357    ):
1358        super().__init__(None, magnitude, d_params)
1359
1360        self._frequency_domain = frequency_domain
1361        self.has_frequency = True
1362        self._set_mz_domain()
1363        self._sort_mz_domain()
1364
1365        self.magnetron_frequency = None
1366        self.magnetron_frequency_sigma = None
1367
1368        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1369
1370        if auto_process:
1371            self.process_mass_spec(keep_profile=keep_profile)
has_frequency
magnetron_frequency
magnetron_frequency_sigma
transient_settings

Return the transient settings of the mass spectrum.

def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1398    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1399        """Calculates the magnetron frequency of the mass spectrum.
1400
1401        Parameters
1402        ----------
1403        max_magnetron_freq : float, optional
1404            The maximum magnetron frequency. Defaults to 50.
1405        magnetron_freq_bins : int, optional
1406            The number of bins to use for the histogram. Defaults to 300.
1407
1408        Returns
1409        -------
1410        None
1411
1412        Notes
1413        -----
1414        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1415        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1416        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1417        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1418        """
1419        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1420        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1421
1422        freq_hist = histogram(
1423            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1424            bins=magnetron_freq_bins,
1425        )
1426
1427        mod = GaussianModel()
1428        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1429        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1430        self.magnetron_frequency = out.best_values["center"]
1431        self.magnetron_frequency_sigma = out.best_values["sigma"]

Calculates the magnetron frequency of the mass spectrum.

Parameters
  • max_magnetron_freq (float, optional): The maximum magnetron frequency. Defaults to 50.
  • magnetron_freq_bins (int, optional): The number of bins to use for the histogram. Defaults to 300.
Returns
  • None
Notes

Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.

Inherited Members
MassSpecBase
mspeaks
is_calibrated
is_centroid
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
process_mass_spec
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_profile
mz_exp_pp
abundance_profile
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
tic
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecCentroid(MassSpecBase):
1434class MassSpecCentroid(MassSpecBase):
1435    """A mass spectrum class when the entry point is on centroid format
1436
1437    Notes
1438    -----
1439    - Stores the centroid data and instrument settings
1440    - Simulate profile data based on Gaussian or Lorentzian peak shape
1441    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1442    - _mspeaks is populated under the hood by calling process_mass_spec method
1443    - iteration is null if _mspeaks is empty
1444
1445    Parameters
1446    ----------
1447    data_dict : dict {string: numpy array float64 )
1448        contains keys [m/z, Abundance, Resolving Power, S/N]
1449    d_params : dict{'str': float, int or str}
1450        contains the instrument settings and processing settings
1451    auto_process : bool, optional
1452        Whether to automatically process the mass spectrum. Defaults to True.
1453
1454    Attributes
1455    ----------
1456    label : str
1457        store label (Bruker, Midas Transient, see Labels class)
1458    _baseline_noise : float
1459        store baseline noise
1460    _baseline_noise_std : float
1461        store baseline noise std
1462    _abundance : ndarray
1463        The abundance values of the mass spectrum.
1464    _mz_exp : ndarray
1465        The m/z values of the mass spectrum.
1466    _mspeaks : list
1467        A list of mass peaks.
1468
1469
1470    Methods
1471    ----------
1472    * process_mass_spec().
1473        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
1474    * __simulate_profile__data__().
1475        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
1476
1477    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
1478    """
1479
1480    def __init__(self, data_dict, d_params, auto_process=True):
1481        super().__init__([], [], d_params)
1482
1483        self._set_parameters_objects(d_params)
1484
1485        if self.label == Labels.thermo_centroid:
1486            self._baseline_noise = d_params.get("baseline_noise")
1487            self._baseline_noise_std = d_params.get("baseline_noise_std")
1488
1489        self.is_centroid = True
1490        self.data_dict = data_dict
1491        self._mz_exp = data_dict[Labels.mz]
1492        self._abundance = data_dict[Labels.abundance]
1493
1494        if auto_process:
1495            self.process_mass_spec()
1496
1497    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
1498        """Simulate profile data based on Gaussian or Lorentzian peak shape
1499
1500        Notes
1501        -----
1502        Needs theoretical resolving power calculation and define peak shape.
1503        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
1504
1505        Parameters
1506        ----------
1507        exp_mz_centroid : list(float)
1508            list of m/z values
1509        magnitude_centroid : list(float)
1510            list of abundance values
1511
1512
1513        Returns
1514        -------
1515        x : list(float)
1516            list of m/z values
1517        y : list(float)
1518            list of abundance values
1519        """
1520
1521        x, y = [], []
1522        for i in range(len(exp_mz_centroid)):
1523            x.append(exp_mz_centroid[i] - 0.0000001)
1524            x.append(exp_mz_centroid[i])
1525            x.append(exp_mz_centroid[i] + 0.0000001)
1526            y.append(0)
1527            y.append(magnitude_centroid[i])
1528            y.append(0)
1529        return x, y
1530
1531    @property
1532    def mz_exp_profile(self):
1533        """Return the m/z profile of the mass spectrum."""
1534        mz_list = []
1535        for mz in self.mz_exp:
1536            mz_list.append(mz - 0.0000001)
1537            mz_list.append(mz)
1538            mz_list.append(mz + 0.0000001)
1539        return mz_list
1540
1541    @mz_exp_profile.setter
1542    def mz_exp_profile(self, _mz_exp):
1543        self._mz_exp = _mz_exp
1544
1545    @property
1546    def abundance_profile(self):
1547        """Return the abundance profile of the mass spectrum."""
1548        ab_list = []
1549        for ab in self.abundance:
1550            ab_list.append(0)
1551            ab_list.append(ab)
1552            ab_list.append(0)
1553        return ab_list
1554
1555    @abundance_profile.setter
1556    def abundance_profile(self, abundance):
1557        self._abundance = abundance
1558
1559    @property
1560    def tic(self):
1561        """Return the total ion current of the mass spectrum."""
1562        return sum(self.abundance)
1563
1564    def process_mass_spec(self):
1565        """Process the mass spectrum."""
1566        import tqdm
1567
1568        # overwrite process_mass_spec
1569        # mspeak objs are usually added inside the PeaKPicking class
1570        # for profile and freq based data
1571        data_dict = self.data_dict
1572        ion_charge = self.polarity
1573
1574        # Check if resolving power is present
1575        rp_present = True
1576        if not data_dict.get(Labels.rp):
1577            rp_present = False
1578        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1579            data_dict.get(Labels.rp)
1580        ):
1581            rp_present = False
1582
1583        # Check if s2n is present
1584        s2n_present = True
1585        if not data_dict.get(Labels.s2n):
1586            s2n_present = False
1587        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1588            data_dict.get(Labels.s2n)
1589        ):
1590            s2n_present = False
1591
1592        # Warning if no s2n data but noise thresholding is set to signal_noise
1593        if (
1594            not s2n_present
1595            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1596        ):
1597            raise Exception("Signal to Noise data is missing for noise thresholding")
1598
1599        # Pull out abundance data
1600        abun = array(data_dict.get(Labels.abundance)).astype(float)
1601
1602        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1603        abundance_threshold, factor = self.get_threshold(abun)
1604
1605        # Set rp_i and s2n_i to None which will be overwritten if present
1606        rp_i, s2n_i = np.nan, np.nan
1607        for index, mz in enumerate(data_dict.get(Labels.mz)):
1608            if rp_present:
1609                if not data_dict.get(Labels.rp)[index]:
1610                    rp_i = np.nan
1611                else:
1612                    rp_i = float(data_dict.get(Labels.rp)[index])
1613            if s2n_present:
1614                if not data_dict.get(Labels.s2n)[index]:
1615                    s2n_i = np.nan
1616                else:
1617                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1618
1619            # centroid peak does not have start and end peak index pos
1620            massspec_indexes = (index, index, index)
1621
1622            # Add peaks based on the noise thresholding method
1623            if (
1624                self.parameters.mass_spectrum.noise_threshold_method
1625                in ["minima", "relative_abundance", "absolute_abundance"]
1626                and abun[index] / factor >= abundance_threshold
1627            ):
1628                self.add_mspeak(
1629                    ion_charge,
1630                    mz,
1631                    abun[index],
1632                    rp_i,
1633                    s2n_i,
1634                    massspec_indexes,
1635                    ms_parent=self,
1636                )
1637            if (
1638                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1639                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1640            ):
1641                self.add_mspeak(
1642                    ion_charge,
1643                    mz,
1644                    abun[index],
1645                    rp_i,
1646                    s2n_i,
1647                    massspec_indexes,
1648                    ms_parent=self,
1649                )
1650
1651        self.mspeaks = self._mspeaks
1652        self._dynamic_range = self.max_abundance / self.min_abundance
1653        self._set_nominal_masses_start_final_indexes()
1654
1655        if self.label != Labels.thermo_centroid:
1656            if self.settings.noise_threshold_method == "log":
1657                raise Exception("log noise Not tested for centroid data")
1658                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1659
1660            else:
1661                self._baseline_noise, self._baseline_noise_std = (
1662                    self.run_noise_threshold_calc()
1663                )
1664
1665        del self.data_dict

A mass spectrum class when the entry point is on centroid format

Notes
  • Stores the centroid data and instrument settings
  • Simulate profile data based on Gaussian or Lorentzian peak shape
  • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
  • _mspeaks is populated under the hood by calling process_mass_spec method
  • iteration is null if _mspeaks is empty
Parameters
  • data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
  • label (str): store label (Bruker, Midas Transient, see Labels class)
  • _baseline_noise (float): store baseline noise
  • _baseline_noise_std (float): store baseline noise std
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
Methods
  • process_mass_spec(). Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
  • __simulate_profile__data__(). Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.

see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()

MassSpecCentroid(data_dict, d_params, auto_process=True)
1480    def __init__(self, data_dict, d_params, auto_process=True):
1481        super().__init__([], [], d_params)
1482
1483        self._set_parameters_objects(d_params)
1484
1485        if self.label == Labels.thermo_centroid:
1486            self._baseline_noise = d_params.get("baseline_noise")
1487            self._baseline_noise_std = d_params.get("baseline_noise_std")
1488
1489        self.is_centroid = True
1490        self.data_dict = data_dict
1491        self._mz_exp = data_dict[Labels.mz]
1492        self._abundance = data_dict[Labels.abundance]
1493
1494        if auto_process:
1495            self.process_mass_spec()
is_centroid
data_dict
mz_exp_profile

Return the m/z profile of the mass spectrum.

abundance_profile

Return the abundance profile of the mass spectrum.

tic

Return the total ion current of the mass spectrum.

def process_mass_spec(self):
1564    def process_mass_spec(self):
1565        """Process the mass spectrum."""
1566        import tqdm
1567
1568        # overwrite process_mass_spec
1569        # mspeak objs are usually added inside the PeaKPicking class
1570        # for profile and freq based data
1571        data_dict = self.data_dict
1572        ion_charge = self.polarity
1573
1574        # Check if resolving power is present
1575        rp_present = True
1576        if not data_dict.get(Labels.rp):
1577            rp_present = False
1578        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1579            data_dict.get(Labels.rp)
1580        ):
1581            rp_present = False
1582
1583        # Check if s2n is present
1584        s2n_present = True
1585        if not data_dict.get(Labels.s2n):
1586            s2n_present = False
1587        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1588            data_dict.get(Labels.s2n)
1589        ):
1590            s2n_present = False
1591
1592        # Warning if no s2n data but noise thresholding is set to signal_noise
1593        if (
1594            not s2n_present
1595            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1596        ):
1597            raise Exception("Signal to Noise data is missing for noise thresholding")
1598
1599        # Pull out abundance data
1600        abun = array(data_dict.get(Labels.abundance)).astype(float)
1601
1602        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1603        abundance_threshold, factor = self.get_threshold(abun)
1604
1605        # Set rp_i and s2n_i to None which will be overwritten if present
1606        rp_i, s2n_i = np.nan, np.nan
1607        for index, mz in enumerate(data_dict.get(Labels.mz)):
1608            if rp_present:
1609                if not data_dict.get(Labels.rp)[index]:
1610                    rp_i = np.nan
1611                else:
1612                    rp_i = float(data_dict.get(Labels.rp)[index])
1613            if s2n_present:
1614                if not data_dict.get(Labels.s2n)[index]:
1615                    s2n_i = np.nan
1616                else:
1617                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1618
1619            # centroid peak does not have start and end peak index pos
1620            massspec_indexes = (index, index, index)
1621
1622            # Add peaks based on the noise thresholding method
1623            if (
1624                self.parameters.mass_spectrum.noise_threshold_method
1625                in ["minima", "relative_abundance", "absolute_abundance"]
1626                and abun[index] / factor >= abundance_threshold
1627            ):
1628                self.add_mspeak(
1629                    ion_charge,
1630                    mz,
1631                    abun[index],
1632                    rp_i,
1633                    s2n_i,
1634                    massspec_indexes,
1635                    ms_parent=self,
1636                )
1637            if (
1638                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1639                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1640            ):
1641                self.add_mspeak(
1642                    ion_charge,
1643                    mz,
1644                    abun[index],
1645                    rp_i,
1646                    s2n_i,
1647                    massspec_indexes,
1648                    ms_parent=self,
1649                )
1650
1651        self.mspeaks = self._mspeaks
1652        self._dynamic_range = self.max_abundance / self.min_abundance
1653        self._set_nominal_masses_start_final_indexes()
1654
1655        if self.label != Labels.thermo_centroid:
1656            if self.settings.noise_threshold_method == "log":
1657                raise Exception("log noise Not tested for centroid data")
1658                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1659
1660            else:
1661                self._baseline_noise, self._baseline_noise_std = (
1662                    self.run_noise_threshold_calc()
1663                )
1664
1665        del self.data_dict

Process the mass spectrum.

Inherited Members
MassSpecBase
mspeaks
is_calibrated
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_pp
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecCentroidLowRes(MassSpecCentroid):
1668class MassSpecCentroidLowRes(MassSpecCentroid):
1669    """A mass spectrum class when the entry point is on low resolution centroid format
1670
1671    Notes
1672    -----
1673    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
1674
1675    Parameters
1676    ----------
1677    data_dict : dict {string: numpy array float64 )
1678        contains keys [m/z, Abundance, Resolving Power, S/N]
1679    d_params : dict{'str': float, int or str}
1680        contains the instrument settings and processing settings
1681
1682    Attributes
1683    ----------
1684    _processed_tic : float
1685        store processed total ion current
1686    _abundance : ndarray
1687        The abundance values of the mass spectrum.
1688    _mz_exp : ndarray
1689        The m/z values of the mass spectrum.
1690    """
1691
1692    def __init__(self, data_dict, d_params):
1693        self._set_parameters_objects(d_params)
1694        self._mz_exp = array(data_dict.get(Labels.mz))
1695        self._abundance = array(data_dict.get(Labels.abundance))
1696        self._processed_tic = None
1697
1698    def __len__(self):
1699        return len(self.mz_exp)
1700
1701    def __getitem__(self, position):
1702        return (self.mz_exp[position], self.abundance[position])
1703
1704    @property
1705    def mz_exp(self):
1706        """Return the m/z values of the mass spectrum."""
1707        return self._mz_exp
1708
1709    @property
1710    def abundance(self):
1711        """Return the abundance values of the mass spectrum."""
1712        return self._abundance
1713
1714    @property
1715    def processed_tic(self):
1716        """Return the processed total ion current of the mass spectrum."""
1717        return sum(self._processed_tic)
1718
1719    @property
1720    def tic(self):
1721        """Return the total ion current of the mass spectrum."""
1722        if self._processed_tic:
1723            return self._processed_tic
1724        else:
1725            return sum(self.abundance)
1726
1727    @property
1728    def mz_abun_tuples(self):
1729        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
1730        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
1731
1732        return [r(i) for i in self]
1733
1734    @property
1735    def mz_abun_dict(self):
1736        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
1737        r = lambda x: int(round(x, 0))
1738
1739        return {r(i[0]): r(i[1]) for i in self}

A mass spectrum class when the entry point is on low resolution centroid format

Notes

Does not store MSPeak Objs, will iterate over mz, abundance pairs instead

Parameters
  • data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
Attributes
  • _processed_tic (float): store processed total ion current
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
MassSpecCentroidLowRes(data_dict, d_params)
1692    def __init__(self, data_dict, d_params):
1693        self._set_parameters_objects(d_params)
1694        self._mz_exp = array(data_dict.get(Labels.mz))
1695        self._abundance = array(data_dict.get(Labels.abundance))
1696        self._processed_tic = None
mz_exp

Return the m/z values of the mass spectrum.

abundance

Return the abundance values of the mass spectrum.

processed_tic

Return the processed total ion current of the mass spectrum.

tic

Return the total ion current of the mass spectrum.

mz_abun_tuples

Return the m/z and abundance values of the mass spectrum as a list of tuples.

mz_abun_dict

Return the m/z and abundance values of the mass spectrum as a dictionary.

Inherited Members
MassSpecCentroid
is_centroid
data_dict
mz_exp_profile
abundance_profile
process_mass_spec
MassSpecBase
mspeaks
is_calibrated
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
freq_exp_profile
freq_exp_pp
mz_exp_pp
abundance_profile_pp
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes