corems.mass_spectrum.factory.MassSpectrumClasses

   1from pathlib import Path
   2
   3import numpy as np
   4from lmfit.models import GaussianModel
   5
   6# from matplotlib import rcParamsDefault, rcParams
   7from numpy import array, float64, histogram, trapz, where
   8from pandas import DataFrame
   9
  10from corems.encapsulation.constant import Labels
  11from corems.encapsulation.factory.parameters import MSParameters
  12from corems.encapsulation.input.parameter_from_json import (
  13    load_and_set_parameters_ms,
  14    load_and_set_toml_parameters_ms,
  15)
  16from corems.mass_spectrum.calc.KendrickGroup import KendrickGrouping
  17from corems.mass_spectrum.calc.MassSpectrumCalc import MassSpecCalc
  18from corems.mass_spectrum.calc.MeanResolvingPowerFilter import MeanResolvingPowerFilter
  19from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak as MSPeak
  20
  21__author__ = "Yuri E. Corilo"
  22__date__ = "Jun 12, 2019"
  23
  24
  25def overrides(interface_class):
  26    """Checks if the method overrides a method from an interface class."""
  27
  28    def overrider(method):
  29        assert method.__name__ in dir(interface_class)
  30        return method
  31
  32    return overrider
  33
  34
  35class MassSpecBase(MassSpecCalc, KendrickGrouping):
  36    """A mass spectrum base class, stores the profile data and instrument settings.
  37
  38    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
  39    _mspeaks is populated under the hood by calling process_mass_spec method.
  40    Iteration is null if _mspeaks is empty.
  41
  42    Parameters
  43    ----------
  44    mz_exp : array_like
  45        The m/z values of the mass spectrum.
  46    abundance : array_like
  47        The abundance values of the mass spectrum.
  48    d_params : dict
  49        A dictionary of parameters for the mass spectrum.
  50    **kwargs
  51        Additional keyword arguments.
  52
  53    Attributes
  54    ----------
  55
  56    mspeaks : list
  57        A list of mass peaks.
  58    is_calibrated : bool
  59        Whether the mass spectrum is calibrated.
  60    is_centroid : bool
  61        Whether the mass spectrum is centroided.
  62    has_frequency : bool
  63        Whether the mass spectrum has a frequency domain.
  64    calibration_order : None or int
  65        The order of the mass spectrum's calibration.
  66    calibration_points : None or ndarray
  67        The calibration points of the mass spectrum.
  68    calibration_ref_mzs: None or ndarray
  69        The reference m/z values of the mass spectrum's calibration.
  70    calibration_meas_mzs : None or ndarray
  71        The measured m/z values of the mass spectrum's calibration.
  72    calibration_RMS : None or float
  73        The root mean square of the mass spectrum's calibration.
  74    calibration_segment : None or CalibrationSegment
  75        The calibration segment of the mass spectrum.
  76    _abundance : ndarray
  77        The abundance values of the mass spectrum.
  78    _mz_exp : ndarray
  79        The m/z values of the mass spectrum.
  80    _mspeaks : list
  81        A list of mass peaks.
  82    _dict_nominal_masses_indexes : dict
  83        A dictionary of nominal masses and their indexes.
  84    _baseline_noise : float
  85        The baseline noise of the mass spectrum.
  86    _baseline_noise_std : float
  87        The standard deviation of the baseline noise of the mass spectrum.
  88    _dynamic_range : float or None
  89        The dynamic range of the mass spectrum.
  90    _transient_settings : None or TransientSettings
  91        The transient settings of the mass spectrum.
  92    _frequency_domain : None or FrequencyDomain
  93        The frequency domain of the mass spectrum.
  94    _mz_cal_profile : None or MzCalibrationProfile
  95        The m/z calibration profile of the mass spectrum.
  96
  97    Methods
  98    -------
  99    * process_mass_spec(). Main function to process the mass spectrum,
 100    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
 101
 102    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
 103    """
 104
 105    def __init__(self, mz_exp, abundance, d_params, **kwargs):
 106        self._abundance = array(abundance, dtype=float64)
 107        self._mz_exp = array(mz_exp, dtype=float64)
 108
 109        # objects created after process_mass_spec() function
 110        self._mspeaks = list()
 111        self.mspeaks = list()
 112        self._dict_nominal_masses_indexes = dict()
 113        self._baseline_noise = 0.001
 114        self._baseline_noise_std = 0.001
 115        self._dynamic_range = None
 116        # set to None: initialization occurs inside subclass MassSpecfromFreq
 117        self._transient_settings = None
 118        self._frequency_domain = None
 119        self._mz_cal_profile = None
 120        self.is_calibrated = False
 121
 122        self._set_parameters_objects(d_params)
 123        self._init_settings()
 124
 125        self.is_centroid = False
 126        self.has_frequency = False
 127
 128        self.calibration_order = None
 129        self.calibration_points = None
 130        self.calibration_ref_mzs = None
 131        self.calibration_meas_mzs = None
 132        self.calibration_RMS = None
 133        self.calibration_segment = None
 134        self.calibration_raw_error_median = None
 135        self.calibration_raw_error_stdev = None
 136
 137    def _init_settings(self):
 138        """Initializes the settings for the mass spectrum."""
 139        self._parameters = MSParameters()
 140
 141    def __len__(self):
 142        return len(self.mspeaks)
 143
 144    def __getitem__(self, position) -> MSPeak:
 145        return self.mspeaks[position]
 146
 147    def set_indexes(self, list_indexes):
 148        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
 149
 150        Parameters
 151        ----------
 152        list_indexes : list of int
 153            A list of integers representing the indexes of the MSpeaks to iterate over.
 154
 155        """
 156        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
 157
 158        for i, mspeak in enumerate(self.mspeaks):
 159            mspeak.index = i
 160
 161        self._set_nominal_masses_start_final_indexes()
 162
 163    def reset_indexes(self):
 164        """Reset the mass spectrum to iterate over all MSpeaks objects.
 165
 166        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
 167        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
 168
 169        """
 170        self.mspeaks = self._mspeaks
 171
 172        for i, mspeak in enumerate(self.mspeaks):
 173            mspeak.index = i
 174
 175        self._set_nominal_masses_start_final_indexes()
 176
 177    def add_mspeak(
 178        self,
 179        ion_charge,
 180        mz_exp,
 181        abundance,
 182        resolving_power,
 183        signal_to_noise,
 184        massspec_indexes,
 185        exp_freq=None,
 186        ms_parent=None,
 187    ):
 188        """Add a new MSPeak object to the MassSpectrum object.
 189
 190        Parameters
 191        ----------
 192        ion_charge : int
 193            The ion charge of the MSPeak.
 194        mz_exp : float
 195            The experimental m/z value of the MSPeak.
 196        abundance : float
 197            The abundance of the MSPeak.
 198        resolving_power : float
 199            The resolving power of the MSPeak.
 200        signal_to_noise : float
 201            The signal-to-noise ratio of the MSPeak.
 202        massspec_indexes : list
 203            A list of indexes of the MSPeak in the MassSpectrum object.
 204        exp_freq : float, optional
 205            The experimental frequency of the MSPeak. Defaults to None.
 206        ms_parent : MSParent, optional
 207            The MSParent object associated with the MSPeak. Defaults to None.
 208        """
 209        mspeak = MSPeak(
 210            ion_charge,
 211            mz_exp,
 212            abundance,
 213            resolving_power,
 214            signal_to_noise,
 215            massspec_indexes,
 216            len(self._mspeaks),
 217            exp_freq=exp_freq,
 218            ms_parent=ms_parent,
 219        )
 220
 221        self._mspeaks.append(mspeak)
 222
 223    def _set_parameters_objects(self, d_params):
 224        """Set the parameters of the MassSpectrum object.
 225
 226        Parameters
 227        ----------
 228        d_params : dict
 229            A dictionary containing the parameters to set.
 230
 231        Notes
 232        -----
 233        This method sets the following parameters of the MassSpectrum object:
 234        - _calibration_terms
 235        - label
 236        - analyzer
 237        - acquisition_time
 238        - instrument_label
 239        - polarity
 240        - scan_number
 241        - retention_time
 242        - mobility_rt
 243        - mobility_scan
 244        - _filename
 245        - _dir_location
 246        - _baseline_noise
 247        - _baseline_noise_std
 248        - sample_name
 249        """
 250        self._calibration_terms = (
 251            d_params.get("Aterm"),
 252            d_params.get("Bterm"),
 253            d_params.get("Cterm"),
 254        )
 255
 256        self.label = d_params.get(Labels.label)
 257
 258        self.analyzer = d_params.get("analyzer")
 259
 260        self.acquisition_time = d_params.get("acquisition_time")
 261
 262        self.instrument_label = d_params.get("instrument_label")
 263
 264        self.polarity = int(d_params.get("polarity"))
 265
 266        self.scan_number = d_params.get("scan_number")
 267
 268        self.retention_time = d_params.get("rt")
 269
 270        self.mobility_rt = d_params.get("mobility_rt")
 271
 272        self.mobility_scan = d_params.get("mobility_scan")
 273
 274        self._filename = d_params.get("filename_path")
 275
 276        self._dir_location = d_params.get("dir_location")
 277
 278        self._baseline_noise = d_params.get("baseline_noise")
 279
 280        self._baseline_noise_std = d_params.get("baseline_noise_std")
 281
 282        if d_params.get("sample_name") != "Unknown":
 283            self.sample_name = d_params.get("sample_name")
 284            if not self.sample_name:
 285                self.sample_name = self.filename.stem
 286        else:
 287            self.sample_name = self.filename.stem
 288
 289    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
 290        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
 291
 292        Parameters
 293        ----------
 294        Aterm : float
 295            The A-term calibration coefficient.
 296        Bterm : float
 297            The B-term calibration coefficient.
 298        C : float
 299            The C-term calibration coefficient.
 300        fas : float, optional
 301            The frequency amplitude scaling factor. Default is 0.
 302        """
 303        self._calibration_terms = (Aterm, Bterm, C)
 304
 305        self._mz_exp = self._f_to_mz()
 306        self._abundance = self._abundance
 307        self.find_peaks()
 308        self.reset_indexes()
 309
 310    def clear_molecular_formulas(self):
 311        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
 312
 313        Returns
 314        -------
 315        numpy.ndarray
 316            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
 317        """
 318        self.check_mspeaks()
 319        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
 320
 321    def process_mass_spec(self, keep_profile=True):
 322        """Process the mass spectrum.
 323
 324        Parameters
 325        ----------
 326        keep_profile : bool, optional
 327            Whether to keep the profile data after processing. Defaults to True.
 328
 329        Notes
 330        -----
 331        This method does the following:
 332        - calculates the noise threshold
 333        - does peak picking (creates mspeak_objs)
 334        - resets the mspeak_obj indexes
 335        """
 336
 337        # if runned mannually make sure to rerun filter_by_noise_threshold
 338        # calculates noise threshold
 339        # do peak picking( create mspeak_objs)
 340        # reset mspeak_obj the indexes
 341
 342        self.cal_noise_threshold()
 343
 344        self.find_peaks()
 345        self.reset_indexes()
 346
 347        if self.mspeaks:
 348            self._dynamic_range = self.max_abundance / self.min_abundance
 349        else:
 350            self._dynamic_range = 0
 351        if not keep_profile:
 352            self._abundance *= 0
 353            self._mz_exp *= 0
 354
 355    def cal_noise_threshold(self):
 356        """Calculate the noise threshold of the mass spectrum."""
 357
 358        if self.label == Labels.simulated_profile:
 359            self._baseline_noise, self._baseline_noise_std = 0.1, 1
 360
 361        if self.settings.noise_threshold_method == "log":
 362            self._baseline_noise, self._baseline_noise_std = (
 363                self.run_log_noise_threshold_calc()
 364            )
 365
 366        else:
 367            self._baseline_noise, self._baseline_noise_std = (
 368                self.run_noise_threshold_calc()
 369            )
 370
 371    @property
 372    def parameters(self):
 373        """Return the parameters of the mass spectrum."""
 374        return self._parameters
 375
 376    @parameters.setter
 377    def parameters(self, instance_MSParameters):
 378        self._parameters = instance_MSParameters
 379
 380    def set_parameter_from_json(self, parameters_path):
 381        """Set the parameters of the mass spectrum from a JSON file.
 382
 383        Parameters
 384        ----------
 385        parameters_path : str
 386            The path to the JSON file containing the parameters.
 387        """
 388        load_and_set_parameters_ms(self, parameters_path=parameters_path)
 389
 390    def set_parameter_from_toml(self, parameters_path):
 391        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
 392
 393    @property
 394    def mspeaks_settings(self):
 395        """Return the MS peak settings of the mass spectrum."""
 396        return self.parameters.ms_peak
 397
 398    @mspeaks_settings.setter
 399    def mspeaks_settings(self, instance_MassSpecPeakSetting):
 400        self.parameters.ms_peak = instance_MassSpecPeakSetting
 401
 402    @property
 403    def settings(self):
 404        """Return the settings of the mass spectrum."""
 405        return self.parameters.mass_spectrum
 406
 407    @settings.setter
 408    def settings(self, instance_MassSpectrumSetting):
 409        self.parameters.mass_spectrum = instance_MassSpectrumSetting
 410
 411    @property
 412    def molecular_search_settings(self):
 413        """Return the molecular search settings of the mass spectrum."""
 414        return self.parameters.molecular_search
 415
 416    @molecular_search_settings.setter
 417    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
 418        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
 419
 420    @property
 421    def mz_cal_profile(self):
 422        """Return the calibrated m/z profile of the mass spectrum."""
 423        return self._mz_cal_profile
 424
 425    @mz_cal_profile.setter
 426    def mz_cal_profile(self, mz_cal_list):
 427        if len(mz_cal_list) == len(self._mz_exp):
 428            self._mz_cal_profile = mz_cal_list
 429        else:
 430            raise Exception(
 431                "calibrated array (%i) is not of the same size of the data (%i)"
 432                % (len(mz_cal_list), len(self.mz_exp_profile))
 433            )
 434
 435    @property
 436    def mz_cal(self):
 437        """Return the calibrated m/z values of the mass spectrum."""
 438        return array([mspeak.mz_cal for mspeak in self.mspeaks])
 439
 440    @mz_cal.setter
 441    def mz_cal(self, mz_cal_list):
 442        if len(mz_cal_list) == len(self.mspeaks):
 443            self.is_calibrated = True
 444            for index, mz_cal in enumerate(mz_cal_list):
 445                self.mspeaks[index].mz_cal = mz_cal
 446        else:
 447            raise Exception(
 448                "calibrated array (%i) is not of the same size of the data (%i)"
 449                % (len(mz_cal_list), len(self._mspeaks))
 450            )
 451
 452    @property
 453    def mz_exp(self):
 454        """Return the experimental m/z values of the mass spectrum."""
 455        self.check_mspeaks()
 456
 457        if self.is_calibrated:
 458            return array([mspeak.mz_cal for mspeak in self.mspeaks])
 459
 460        else:
 461            return array([mspeak.mz_exp for mspeak in self.mspeaks])
 462
 463    @property
 464    def freq_exp_profile(self):
 465        """Return the experimental frequency profile of the mass spectrum."""
 466        return self._frequency_domain
 467
 468    @freq_exp_profile.setter
 469    def freq_exp_profile(self, new_data):
 470        self._frequency_domain = array(new_data)
 471
 472    @property
 473    def freq_exp_pp(self):
 474        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
 475        _, _, freq = self.prepare_peak_picking_data()
 476        return freq
 477
 478    @property
 479    def mz_exp_profile(self):
 480        """Return the experimental m/z profile of the mass spectrum."""
 481        if self.is_calibrated:
 482            return self.mz_cal_profile
 483        else:
 484            return self._mz_exp
 485
 486    @mz_exp_profile.setter
 487    def mz_exp_profile(self, new_data):
 488        self._mz_exp = array(new_data)
 489
 490    @property
 491    def mz_exp_pp(self):
 492        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
 493        mz, _, _ = self.prepare_peak_picking_data()
 494        return mz
 495
 496    @property
 497    def abundance_profile(self):
 498        """Return the abundance profile of the mass spectrum."""
 499        return self._abundance
 500
 501    @abundance_profile.setter
 502    def abundance_profile(self, new_data):
 503        self._abundance = array(new_data)
 504
 505    @property
 506    def abundance_profile_pp(self):
 507        """Return the abundance profile of the mass spectrum that is used for peak picking."""
 508        _, abundance, _ = self.prepare_peak_picking_data()
 509        return abundance
 510
 511    @property
 512    def abundance(self):
 513        """Return the abundance values of the mass spectrum."""
 514        self.check_mspeaks()
 515        return array([mspeak.abundance for mspeak in self.mspeaks])
 516
 517    def freq_exp(self):
 518        """Return the experimental frequency values of the mass spectrum."""
 519        self.check_mspeaks()
 520        return array([mspeak.freq_exp for mspeak in self.mspeaks])
 521
 522    @property
 523    def resolving_power(self):
 524        """Return the resolving power values of the mass spectrum."""
 525        self.check_mspeaks()
 526        return array([mspeak.resolving_power for mspeak in self.mspeaks])
 527
 528    @property
 529    def signal_to_noise(self):
 530        self.check_mspeaks()
 531        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
 532
 533    @property
 534    def nominal_mz(self):
 535        """Return the nominal m/z values of the mass spectrum."""
 536        if self._dict_nominal_masses_indexes:
 537            return sorted(list(self._dict_nominal_masses_indexes.keys()))
 538        else:
 539            raise ValueError("Nominal indexes not yet set")
 540
 541    def get_mz_and_abundance_peaks_tuples(self):
 542        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
 543        self.check_mspeaks()
 544        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
 545
 546    @property
 547    def kmd(self):
 548        """Return the Kendrick mass defect values of the mass spectrum."""
 549        self.check_mspeaks()
 550        return array([mspeak.kmd for mspeak in self.mspeaks])
 551
 552    @property
 553    def kendrick_mass(self):
 554        """Return the Kendrick mass values of the mass spectrum."""
 555        self.check_mspeaks()
 556        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
 557
 558    @property
 559    def max_mz_exp(self):
 560        """Return the maximum experimental m/z value of the mass spectrum."""
 561        return max([mspeak.mz_exp for mspeak in self.mspeaks])
 562
 563    @property
 564    def min_mz_exp(self):
 565        """Return the minimum experimental m/z value of the mass spectrum."""
 566        return min([mspeak.mz_exp for mspeak in self.mspeaks])
 567
 568    @property
 569    def max_abundance(self):
 570        """Return the maximum abundance value of the mass spectrum."""
 571        return max([mspeak.abundance for mspeak in self.mspeaks])
 572
 573    @property
 574    def max_signal_to_noise(self):
 575        """Return the maximum signal-to-noise ratio of the mass spectrum."""
 576        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
 577
 578    @property
 579    def most_abundant_mspeak(self):
 580        """Return the most abundant MSpeak object of the mass spectrum."""
 581        return max(self.mspeaks, key=lambda m: m.abundance)
 582
 583    @property
 584    def min_abundance(self):
 585        """Return the minimum abundance value of the mass spectrum."""
 586        return min([mspeak.abundance for mspeak in self.mspeaks])
 587
 588    # takes too much cpu time
 589    @property
 590    def dynamic_range(self):
 591        """Return the dynamic range of the mass spectrum."""
 592        return self._dynamic_range
 593
 594    @property
 595    def baseline_noise(self):
 596        """Return the baseline noise of the mass spectrum."""
 597        if self._baseline_noise:
 598            return self._baseline_noise
 599        else:
 600            return None
 601
 602    @property
 603    def baseline_noise_std(self):
 604        """Return the standard deviation of the baseline noise of the mass spectrum."""
 605        if self._baseline_noise_std == 0:
 606            return self._baseline_noise_std
 607        if self._baseline_noise_std:
 608            return self._baseline_noise_std
 609        else:
 610            return None
 611
 612    @property
 613    def Aterm(self):
 614        """Return the A-term calibration coefficient of the mass spectrum."""
 615        return self._calibration_terms[0]
 616
 617    @property
 618    def Bterm(self):
 619        """Return the B-term calibration coefficient of the mass spectrum."""
 620        return self._calibration_terms[1]
 621
 622    @property
 623    def Cterm(self):
 624        """Return the C-term calibration coefficient of the mass spectrum."""
 625        return self._calibration_terms[2]
 626
 627    @property
 628    def filename(self):
 629        """Return the filename of the mass spectrum."""
 630        return Path(self._filename)
 631
 632    @property
 633    def dir_location(self):
 634        """Return the directory location of the mass spectrum."""
 635        return self._dir_location
 636
 637    def sort_by_mz(self):
 638        """Sort the mass spectrum by m/z values."""
 639        return sorted(self, key=lambda m: m.mz_exp)
 640
 641    def sort_by_abundance(self, reverse=False):
 642        """Sort the mass spectrum by abundance values."""
 643        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
 644
 645    @property
 646    def tic(self):
 647        """Return the total ion current of the mass spectrum."""
 648        return trapz(self.abundance_profile, self.mz_exp_profile)
 649
 650    def check_mspeaks_warning(self):
 651        """Check if the mass spectrum has MSpeaks objects.
 652
 653        Raises
 654        ------
 655        Warning
 656            If the mass spectrum has no MSpeaks objects.
 657        """
 658        import warnings
 659
 660        if self.mspeaks:
 661            pass
 662        else:
 663            warnings.warn("mspeaks list is empty, continuing without filtering data")
 664
 665    def check_mspeaks(self):
 666        """Check if the mass spectrum has MSpeaks objects.
 667
 668        Raises
 669        ------
 670        Exception
 671            If the mass spectrum has no MSpeaks objects.
 672        """
 673        if self.mspeaks:
 674            pass
 675        else:
 676            raise Exception(
 677                "mspeaks list is empty, please run process_mass_spec() first"
 678            )
 679
 680    def remove_assignment_by_index(self, indexes):
 681        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
 682
 683        Parameters
 684        ----------
 685        indexes : list of int
 686            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
 687        """
 688        for i in indexes:
 689            self.mspeaks[i].clear_molecular_formulas()
 690
 691    def filter_by_index(self, list_indexes):
 692        """Filter the mass spectrum by the specified indexes.
 693
 694        Parameters
 695        ----------
 696        list_indexes : list of int
 697            A list of indexes of the MSpeaks objects to drop.
 698
 699        """
 700
 701        self.mspeaks = [
 702            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
 703        ]
 704
 705        for i, mspeak in enumerate(self.mspeaks):
 706            mspeak.index = i
 707
 708        self._set_nominal_masses_start_final_indexes()
 709
 710    def filter_by_mz(self, min_mz, max_mz):
 711        """Filter the mass spectrum by the specified m/z range.
 712
 713        Parameters
 714        ----------
 715        min_mz : float
 716            The minimum m/z value to keep.
 717        max_mz : float
 718            The maximum m/z value to keep.
 719
 720        """
 721        self.check_mspeaks_warning()
 722        indexes = [
 723            index
 724            for index, mspeak in enumerate(self.mspeaks)
 725            if not min_mz <= mspeak.mz_exp <= max_mz
 726        ]
 727        self.filter_by_index(indexes)
 728
 729    def filter_by_s2n(self, min_s2n, max_s2n=False):
 730        """Filter the mass spectrum by the specified signal-to-noise ratio range.
 731
 732        Parameters
 733        ----------
 734        min_s2n : float
 735            The minimum signal-to-noise ratio to keep.
 736        max_s2n : float, optional
 737            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
 738
 739        """
 740        self.check_mspeaks_warning()
 741        if max_s2n:
 742            indexes = [
 743                index
 744                for index, mspeak in enumerate(self.mspeaks)
 745                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
 746            ]
 747        else:
 748            indexes = [
 749                index
 750                for index, mspeak in enumerate(self.mspeaks)
 751                if mspeak.signal_to_noise <= min_s2n
 752            ]
 753        self.filter_by_index(indexes)
 754
 755    def filter_by_abundance(self, min_abund, max_abund=False):
 756        """Filter the mass spectrum by the specified abundance range.
 757
 758        Parameters
 759        ----------
 760        min_abund : float
 761            The minimum abundance to keep.
 762        max_abund : float, optional
 763            The maximum abundance to keep. Defaults to False (no maximum).
 764
 765        """
 766        self.check_mspeaks_warning()
 767        if max_abund:
 768            indexes = [
 769                index
 770                for index, mspeak in enumerate(self.mspeaks)
 771                if not min_abund <= mspeak.abundance <= max_abund
 772            ]
 773        else:
 774            indexes = [
 775                index
 776                for index, mspeak in enumerate(self.mspeaks)
 777                if mspeak.abundance <= min_abund
 778            ]
 779        self.filter_by_index(indexes)
 780
 781    def filter_by_max_resolving_power(self, B, T):
 782        """Filter the mass spectrum by the specified maximum resolving power.
 783
 784        Parameters
 785        ----------
 786        B : float
 787        T : float
 788
 789        """
 790
 791        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 792
 793        self.check_mspeaks_warning()
 794
 795        indexes_to_remove = [
 796            index
 797            for index, mspeak in enumerate(self.mspeaks)
 798            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
 799        ]
 800        self.filter_by_index(indexes_to_remove)
 801
 802    def filter_by_mean_resolving_power(
 803        self, ndeviations=3, plot=False, guess_pars=False
 804    ):
 805        """Filter the mass spectrum by the specified mean resolving power.
 806
 807        Parameters
 808        ----------
 809        ndeviations : float, optional
 810            The number of standard deviations to use for filtering. Defaults to 3.
 811        plot : bool, optional
 812            Whether to plot the resolving power distribution. Defaults to False.
 813        guess_pars : bool, optional
 814            Whether to guess the parameters for the Gaussian model. Defaults to False.
 815
 816        """
 817        self.check_mspeaks_warning()
 818        indexes_to_remove = MeanResolvingPowerFilter(
 819            self, ndeviations, plot, guess_pars
 820        ).main()
 821        self.filter_by_index(indexes_to_remove)
 822
 823    def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0):
 824        """Filter the mass spectrum by the calculated minimum theoretical resolving power.
 825
 826        This is currently designed only for FTICR data, and accounts only for magnitude mode data
 827        Accurate results require passing the apodisaion method used to calculate the resolving power.
 828        see the ICRMassPeak function `resolving_power_calc` for more details.
 829
 830        Parameters
 831        ----------
 832        B : Magnetic field strength in Tesla, float
 833        T : transient length in seconds, float
 834        apodization_method : str, optional
 835            The apodization method to use for calculating the resolving power. Defaults to None.
 836        tolerance : float, optional
 837            The tolerance for the threshold. Defaults to 0, i.e. no tolerance
 838
 839        """
 840        if self.analyzer != "ICR":
 841            raise Exception(
 842                "This method is only applicable to ICR mass spectra. "
 843            )
 844
 845        self.check_mspeaks_warning()
 846
 847        indexes_to_remove = [
 848            index
 849            for index, mspeak in enumerate(self.mspeaks)
 850            if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method)
 851        ]
 852        self.filter_by_index(indexes_to_remove)
 853
 854    def filter_by_noise_threshold(self):
 855        """Filter the mass spectrum by the noise threshold."""
 856
 857        threshold = self.get_noise_threshold()[1][0]
 858
 859        self.check_mspeaks_warning()
 860
 861        indexes_to_remove = [
 862            index
 863            for index, mspeak in enumerate(self.mspeaks)
 864            if mspeak.abundance <= threshold
 865        ]
 866        self.filter_by_index(indexes_to_remove)
 867
 868    def find_peaks(self):
 869        """Find the peaks of the mass spectrum."""
 870        # needs to clear previous results from peak_picking
 871        self._mspeaks = list()
 872
 873        # then do peak picking
 874        self.do_peak_picking()
 875        # print("A total of %i peaks were found" % len(self._mspeaks))
 876
 877    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
 878        """Change the Kendrick base of all MSpeaks objects.
 879
 880        Parameters
 881        ----------
 882        kendrick_dict_base : dict
 883            A dictionary of the Kendrick base to change to.
 884
 885        Notes
 886        -----
 887        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
 888        """
 889        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
 890
 891        for mspeak in self.mspeaks:
 892            mspeak.change_kendrick_base(kendrick_dict_base)
 893
 894    def get_nominal_mz_first_last_indexes(self, nominal_mass):
 895        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
 896
 897        Parameters
 898        ----------
 899        nominal_mass : int
 900            The nominal mass to get the indexes for.
 901
 902        Returns
 903        -------
 904        tuple
 905            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
 906        """
 907        if self._dict_nominal_masses_indexes:
 908            if nominal_mass in self._dict_nominal_masses_indexes.keys():
 909                return (
 910                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
 911                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
 912                )
 913
 914            else:
 915                # import warnings
 916                # uncomment warn to distribution
 917                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
 918                return (0, 0)
 919        else:
 920            raise Exception(
 921                "run process_mass_spec() function before trying to access the data"
 922            )
 923
 924    def get_masses_count_by_nominal_mass(self):
 925        """Return a dictionary of the nominal masses and their counts."""
 926
 927        dict_nominal_masses_count = {}
 928
 929        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 930
 931        for nominal_mass in all_nominal_masses:
 932            if nominal_mass not in dict_nominal_masses_count:
 933                dict_nominal_masses_count[nominal_mass] = len(
 934                    list(self.get_nominal_mass_indexes(nominal_mass))
 935                )
 936
 937        return dict_nominal_masses_count
 938
 939    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
 940        """Return a dictionary of the nominal masses and their counts.
 941
 942        Parameters
 943        ----------
 944        mz_overlay : float, optional
 945            The m/z overlay to use for counting. Defaults to 0.1.
 946
 947        Returns
 948        -------
 949        dict
 950            A dictionary of the nominal masses and their counts.
 951        """
 952        dict_nominal_masses_count = {}
 953
 954        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 955
 956        for nominal_mass in all_nominal_masses:
 957            if nominal_mass not in dict_nominal_masses_count:
 958                min_mz = nominal_mass - mz_overlay
 959
 960                max_mz = nominal_mass + 1 + mz_overlay
 961
 962                indexes = indexes = where(
 963                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
 964                )
 965
 966                dict_nominal_masses_count[nominal_mass] = indexes[0].size
 967
 968        return dict_nominal_masses_count
 969
 970    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
 971        """Return the indexes of the MSpeaks objects with the specified nominal mass.
 972
 973        Parameters
 974        ----------
 975        nominal_mass : int
 976            The nominal mass to get the indexes for.
 977        overlay : float, optional
 978            The m/z overlay to use for counting. Defaults to 0.1.
 979
 980        Returns
 981        -------
 982        generator
 983            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
 984        """
 985        min_mz_to_look = nominal_mass - overlay
 986        max_mz_to_look = nominal_mass + 1 + overlay
 987
 988        return (
 989            i
 990            for i in range(len(self.mspeaks))
 991            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
 992        )
 993
 994        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
 995        # return indexes
 996
 997    def _set_nominal_masses_start_final_indexes(self):
 998        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
 999        dict_nominal_masses_indexes = {}
1000
1001        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
1002
1003        for nominal_mass in all_nominal_masses:
1004            # indexes = self.get_nominal_mass_indexes(nominal_mass)
1005            # Convert the iterator to a list to avoid multiple calls
1006            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
1007
1008            # If the list is not empty, find the first and last; otherwise, set None
1009            if indexes:
1010                first, last = indexes[0], indexes[-1]
1011            else:
1012                first = last = None
1013            # defaultvalue = None
1014            # first = last = next(indexes, defaultvalue)
1015            # for last in indexes:
1016            #    pass
1017
1018            dict_nominal_masses_indexes[nominal_mass] = (first, last)
1019
1020        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
1021
1022    def plot_centroid(self, ax=None, c="g"):
1023        """Plot the centroid data of the mass spectrum.
1024
1025        Parameters
1026        ----------
1027        ax : matplotlib.axes.Axes, optional
1028            The matplotlib axes to plot on. Defaults to None.
1029        c : str, optional
1030            The color to use for the plot. Defaults to 'g' (green).
1031
1032        Returns
1033        -------
1034        matplotlib.axes.Axes
1035            The matplotlib axes containing the plot.
1036
1037        Raises
1038        ------
1039        Exception
1040            If no centroid data is found.
1041        """
1042
1043        import matplotlib.pyplot as plt
1044
1045        if self._mspeaks:
1046            if ax is None:
1047                ax = plt.gca()
1048
1049            markerline_a, stemlines_a, baseline_a = ax.stem(
1050                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1051            )
1052
1053            plt.setp(markerline_a, "color", c, "linewidth", 2)
1054            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1055            plt.setp(baseline_a, "color", c, "linewidth", 2)
1056
1057            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1058            ax.set_ylabel("Abundance", fontsize=12)
1059            ax.tick_params(axis="both", which="major", labelsize=12)
1060
1061            ax.axes.spines["top"].set_visible(False)
1062            ax.axes.spines["right"].set_visible(False)
1063
1064            ax.get_yaxis().set_visible(False)
1065            ax.spines["left"].set_visible(False)
1066
1067        else:
1068            raise Exception("No centroid data found, please run process_mass_spec")
1069
1070        return ax
1071
1072    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1073        """Plot the profile data and noise threshold of the mass spectrum.
1074
1075        Parameters
1076        ----------
1077        ax : matplotlib.axes.Axes, optional
1078            The matplotlib axes to plot on. Defaults to None.
1079        legend : bool, optional
1080            Whether to show the legend. Defaults to False.
1081
1082        Returns
1083        -------
1084        matplotlib.axes.Axes
1085            The matplotlib axes containing the plot.
1086
1087        Raises
1088        ------
1089        Exception
1090            If no noise threshold is found.
1091        """
1092        import matplotlib.pyplot as plt
1093
1094        if self.baseline_noise_std and self.baseline_noise_std:
1095            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1096            baseline = (self.baseline_noise, self.baseline_noise)
1097
1098            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1099            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1100            x, y = self.get_noise_threshold()
1101
1102            if ax is None:
1103                ax = plt.gca()
1104
1105            ax.plot(
1106                self.mz_exp_profile,
1107                self.abundance_profile,
1108                color="green",
1109                label="Spectrum",
1110            )
1111            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1112            ax.plot(x, y, color="red", label="Noise Threshold")
1113
1114            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1115            ax.set_ylabel("Abundance", fontsize=12)
1116            ax.tick_params(axis="both", which="major", labelsize=12)
1117
1118            ax.axes.spines["top"].set_visible(False)
1119            ax.axes.spines["right"].set_visible(False)
1120
1121            ax.get_yaxis().set_visible(False)
1122            ax.spines["left"].set_visible(False)
1123            if legend:
1124                ax.legend()
1125
1126        else:
1127            raise Exception("Calculate noise threshold first")
1128
1129        return ax
1130
1131    def plot_mz_domain_profile(self, color="green", ax=None):
1132        """Plot the m/z domain profile of the mass spectrum.
1133
1134        Parameters
1135        ----------
1136        color : str, optional
1137            The color to use for the plot. Defaults to 'green'.
1138        ax : matplotlib.axes.Axes, optional
1139            The matplotlib axes to plot on. Defaults to None.
1140
1141        Returns
1142        -------
1143        matplotlib.axes.Axes
1144            The matplotlib axes containing the plot.
1145        """
1146
1147        import matplotlib.pyplot as plt
1148
1149        if ax is None:
1150            ax = plt.gca()
1151        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1152        ax.set(xlabel="m/z", ylabel="abundance")
1153
1154        return ax
1155
1156    def to_excel(self, out_file_path, write_metadata=True):
1157        """Export the mass spectrum to an Excel file.
1158
1159        Parameters
1160        ----------
1161        out_file_path : str
1162            The path to the Excel file to export to.
1163        write_metadata : bool, optional
1164            Whether to write the metadata to the Excel file. Defaults to True.
1165
1166        Returns
1167        -------
1168        None
1169        """
1170        from corems.mass_spectrum.output.export import HighResMassSpecExport
1171
1172        exportMS = HighResMassSpecExport(out_file_path, self)
1173        exportMS.to_excel(write_metadata=write_metadata)
1174
1175    def to_hdf(self, out_file_path):
1176        """Export the mass spectrum to an HDF file.
1177
1178        Parameters
1179        ----------
1180        out_file_path : str
1181            The path to the HDF file to export to.
1182
1183        Returns
1184        -------
1185        None
1186        """
1187        from corems.mass_spectrum.output.export import HighResMassSpecExport
1188
1189        exportMS = HighResMassSpecExport(out_file_path, self)
1190        exportMS.to_hdf()
1191
1192    def to_csv(self, out_file_path, write_metadata=True):
1193        """Export the mass spectrum to a CSV file.
1194
1195        Parameters
1196        ----------
1197        out_file_path : str
1198            The path to the CSV file to export to.
1199        write_metadata : bool, optional
1200            Whether to write the metadata to the CSV file. Defaults to True.
1201
1202        """
1203        from corems.mass_spectrum.output.export import HighResMassSpecExport
1204
1205        exportMS = HighResMassSpecExport(out_file_path, self)
1206        exportMS.to_csv(write_metadata=write_metadata)
1207
1208    def to_pandas(self, out_file_path, write_metadata=True):
1209        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1210
1211        Parameters
1212        ----------
1213        out_file_path : str
1214            The path to the CSV file to export to.
1215        write_metadata : bool, optional
1216            Whether to write the metadata to the CSV file. Defaults to True.
1217
1218        """
1219        from corems.mass_spectrum.output.export import HighResMassSpecExport
1220
1221        exportMS = HighResMassSpecExport(out_file_path, self)
1222        exportMS.to_pandas(write_metadata=write_metadata)
1223
1224    def to_dataframe(self, additional_columns=None):
1225        """Return the mass spectrum as a Pandas dataframe.
1226
1227        Parameters
1228        ----------
1229        additional_columns : list, optional
1230            A list of additional columns to include in the dataframe. Defaults to None.
1231            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1232
1233        Returns
1234        -------
1235        pandas.DataFrame
1236            The mass spectrum as a Pandas dataframe.
1237        """
1238        from corems.mass_spectrum.output.export import HighResMassSpecExport
1239
1240        exportMS = HighResMassSpecExport(self.filename, self)
1241        return exportMS.get_pandas_df(additional_columns=additional_columns)
1242
1243    def to_json(self):
1244        """Return the mass spectrum as a JSON file."""
1245        from corems.mass_spectrum.output.export import HighResMassSpecExport
1246
1247        exportMS = HighResMassSpecExport(self.filename, self)
1248        return exportMS.to_json()
1249
1250    def parameters_json(self):
1251        """Return the parameters of the mass spectrum as a JSON string."""
1252        from corems.mass_spectrum.output.export import HighResMassSpecExport
1253
1254        exportMS = HighResMassSpecExport(self.filename, self)
1255        return exportMS.parameters_to_json()
1256
1257    def parameters_toml(self):
1258        """Return the parameters of the mass spectrum as a TOML string."""
1259        from corems.mass_spectrum.output.export import HighResMassSpecExport
1260
1261        exportMS = HighResMassSpecExport(self.filename, self)
1262        return exportMS.parameters_to_toml()
1263
1264
1265class MassSpecProfile(MassSpecBase):
1266    """A mass spectrum class when the entry point is on profile format
1267
1268    Notes
1269    -----
1270    Stores the profile data and instrument settings.
1271    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
1272    _mspeaks is populated under the hood by calling process_mass_spec method.
1273    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
1274
1275    Parameters
1276    ----------
1277    data_dict : dict
1278        A dictionary containing the profile data.
1279    d_params : dict{'str': float, int or str}
1280        contains the instrument settings and processing settings
1281    auto_process : bool, optional
1282        Whether to automatically process the mass spectrum. Defaults to True.
1283
1284
1285    Attributes
1286    ----------
1287    _abundance : ndarray
1288        The abundance values of the mass spectrum.
1289    _mz_exp : ndarray
1290        The m/z values of the mass spectrum.
1291    _mspeaks : list
1292        A list of mass peaks.
1293
1294    Methods
1295    ----------
1296    * process_mass_spec(). Process the mass spectrum.
1297
1298    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
1299    """
1300
1301    def __init__(self, data_dict, d_params, auto_process=True):
1302        # print(data_dict.keys())
1303        super().__init__(
1304            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1305        )
1306
1307        if auto_process:
1308            self.process_mass_spec()
1309
1310
1311class MassSpecfromFreq(MassSpecBase):
1312    """A mass spectrum class when data entry is on frequency domain
1313
1314    Notes
1315    -----
1316    - Transform to m/z based on the settings stored at d_params
1317    - Stores the profile data and instrument settings
1318    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1319    - _mspeaks is populated under the hood by calling process_mass_spec method
1320    - iteration is null if _mspeaks is empty
1321
1322    Parameters
1323    ----------
1324    frequency_domain : list(float)
1325        all datapoints in frequency domain in Hz
1326    magnitude :  frequency_domain : list(float)
1327        all datapoints in for magnitude of each frequency datapoint
1328    d_params : dict{'str': float, int or str}
1329        contains the instrument settings and processing settings
1330    auto_process : bool, optional
1331        Whether to automatically process the mass spectrum. Defaults to True.
1332    keep_profile : bool, optional
1333        Whether to keep the profile data. Defaults to True.
1334
1335    Attributes
1336    ----------
1337    has_frequency : bool
1338        Whether the mass spectrum has frequency data.
1339    _frequency_domain : list(float)
1340        Frequency domain in Hz
1341    label : str
1342        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
1343    _abundance : ndarray
1344        The abundance values of the mass spectrum.
1345    _mz_exp : ndarray
1346        The m/z values of the mass spectrum.
1347    _mspeaks : list
1348        A list of mass peaks.
1349    See Also: all the attributes of MassSpecBase class
1350
1351    Methods
1352    ----------
1353    * _set_mz_domain().
1354        calculates the m_z based on the setting of d_params
1355    * process_mass_spec().  Process the mass spectrum.
1356
1357    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
1358    """
1359
1360    def __init__(
1361        self,
1362        frequency_domain,
1363        magnitude,
1364        d_params,
1365        auto_process=True,
1366        keep_profile=True,
1367    ):
1368        super().__init__(None, magnitude, d_params)
1369
1370        self._frequency_domain = frequency_domain
1371        self.has_frequency = True
1372        self._set_mz_domain()
1373        self._sort_mz_domain()
1374
1375        self.magnetron_frequency = None
1376        self.magnetron_frequency_sigma = None
1377
1378        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1379
1380        if auto_process:
1381            self.process_mass_spec(keep_profile=keep_profile)
1382
1383    def _sort_mz_domain(self):
1384        """Sort the mass spectrum by m/z values."""
1385
1386        if self._mz_exp[0] > self._mz_exp[-1]:
1387            self._mz_exp = self._mz_exp[::-1]
1388            self._abundance = self._abundance[::-1]
1389            self._frequency_domain = self._frequency_domain[::-1]
1390
1391    def _set_mz_domain(self):
1392        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
1393        if self.label == Labels.bruker_frequency:
1394            self._mz_exp = self._f_to_mz_bruker()
1395
1396        else:
1397            self._mz_exp = self._f_to_mz()
1398
1399    @property
1400    def transient_settings(self):
1401        """Return the transient settings of the mass spectrum."""
1402        return self.parameters.transient
1403
1404    @transient_settings.setter
1405    def transient_settings(self, instance_TransientSetting):
1406        self.parameters.transient = instance_TransientSetting
1407
1408    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1409        """Calculates the magnetron frequency of the mass spectrum.
1410
1411        Parameters
1412        ----------
1413        max_magnetron_freq : float, optional
1414            The maximum magnetron frequency. Defaults to 50.
1415        magnetron_freq_bins : int, optional
1416            The number of bins to use for the histogram. Defaults to 300.
1417
1418        Returns
1419        -------
1420        None
1421
1422        Notes
1423        -----
1424        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1425        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1426        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1427        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1428        """
1429        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1430        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1431
1432        freq_hist = histogram(
1433            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1434            bins=magnetron_freq_bins,
1435        )
1436
1437        mod = GaussianModel()
1438        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1439        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1440        self.magnetron_frequency = out.best_values["center"]
1441        self.magnetron_frequency_sigma = out.best_values["sigma"]
1442
1443
1444class MassSpecCentroid(MassSpecBase):
1445    """A mass spectrum class when the entry point is on centroid format
1446
1447    Notes
1448    -----
1449    - Stores the centroid data and instrument settings
1450    - Simulate profile data based on Gaussian or Lorentzian peak shape
1451    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1452    - _mspeaks is populated under the hood by calling process_mass_spec method
1453    - iteration is null if _mspeaks is empty
1454
1455    Parameters
1456    ----------
1457    data_dict : dict {string: numpy array float64 )
1458        contains keys [m/z, Abundance, Resolving Power, S/N]
1459    d_params : dict{'str': float, int or str}
1460        contains the instrument settings and processing settings
1461    auto_process : bool, optional
1462        Whether to automatically process the mass spectrum. Defaults to True.
1463
1464    Attributes
1465    ----------
1466    label : str
1467        store label (Bruker, Midas Transient, see Labels class)
1468    _baseline_noise : float
1469        store baseline noise
1470    _baseline_noise_std : float
1471        store baseline noise std
1472    _abundance : ndarray
1473        The abundance values of the mass spectrum.
1474    _mz_exp : ndarray
1475        The m/z values of the mass spectrum.
1476    _mspeaks : list
1477        A list of mass peaks.
1478
1479
1480    Methods
1481    ----------
1482    * process_mass_spec().
1483        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
1484    * __simulate_profile__data__().
1485        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
1486
1487    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
1488    """
1489
1490    def __init__(self, data_dict, d_params, auto_process=True):
1491        super().__init__([], [], d_params)
1492
1493        self._set_parameters_objects(d_params)
1494
1495        if self.label == Labels.thermo_centroid:
1496            self._baseline_noise = d_params.get("baseline_noise")
1497            self._baseline_noise_std = d_params.get("baseline_noise_std")
1498
1499        self.is_centroid = True
1500        self.data_dict = data_dict
1501        self._mz_exp = data_dict[Labels.mz]
1502        self._abundance = data_dict[Labels.abundance]
1503
1504        if auto_process:
1505            self.process_mass_spec()
1506
1507    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
1508        """Simulate profile data based on Gaussian or Lorentzian peak shape
1509
1510        Notes
1511        -----
1512        Needs theoretical resolving power calculation and define peak shape.
1513        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
1514
1515        Parameters
1516        ----------
1517        exp_mz_centroid : list(float)
1518            list of m/z values
1519        magnitude_centroid : list(float)
1520            list of abundance values
1521
1522
1523        Returns
1524        -------
1525        x : list(float)
1526            list of m/z values
1527        y : list(float)
1528            list of abundance values
1529        """
1530
1531        x, y = [], []
1532        for i in range(len(exp_mz_centroid)):
1533            x.append(exp_mz_centroid[i] - 0.0000001)
1534            x.append(exp_mz_centroid[i])
1535            x.append(exp_mz_centroid[i] + 0.0000001)
1536            y.append(0)
1537            y.append(magnitude_centroid[i])
1538            y.append(0)
1539        return x, y
1540
1541    @property
1542    def mz_exp_profile(self):
1543        """Return the m/z profile of the mass spectrum."""
1544        mz_list = []
1545        for mz in self.mz_exp:
1546            mz_list.append(mz - 0.0000001)
1547            mz_list.append(mz)
1548            mz_list.append(mz + 0.0000001)
1549        return mz_list
1550
1551    @mz_exp_profile.setter
1552    def mz_exp_profile(self, _mz_exp):
1553        self._mz_exp = _mz_exp
1554
1555    @property
1556    def abundance_profile(self):
1557        """Return the abundance profile of the mass spectrum."""
1558        ab_list = []
1559        for ab in self.abundance:
1560            ab_list.append(0)
1561            ab_list.append(ab)
1562            ab_list.append(0)
1563        return ab_list
1564
1565    @abundance_profile.setter
1566    def abundance_profile(self, abundance):
1567        self._abundance = abundance
1568
1569    @property
1570    def tic(self):
1571        """Return the total ion current of the mass spectrum."""
1572        return sum(self.abundance)
1573
1574    def process_mass_spec(self):
1575        """Process the mass spectrum."""
1576        import tqdm
1577
1578        # overwrite process_mass_spec
1579        # mspeak objs are usually added inside the PeaKPicking class
1580        # for profile and freq based data
1581        data_dict = self.data_dict
1582        ion_charge = self.polarity
1583
1584        # Check if resolving power is present
1585        rp_present = True
1586        if not data_dict.get(Labels.rp):
1587            rp_present = False
1588        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1589            data_dict.get(Labels.rp)
1590        ):
1591            rp_present = False
1592
1593        # Check if s2n is present
1594        s2n_present = True
1595        if not data_dict.get(Labels.s2n):
1596            s2n_present = False
1597        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1598            data_dict.get(Labels.s2n)
1599        ):
1600            s2n_present = False
1601
1602        # Warning if no s2n data but noise thresholding is set to signal_noise
1603        if (
1604            not s2n_present
1605            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1606        ):
1607            raise Exception("Signal to Noise data is missing for noise thresholding")
1608
1609        # Pull out abundance data
1610        abun = array(data_dict.get(Labels.abundance)).astype(float)
1611
1612        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1613        abundance_threshold, factor = self.get_threshold(abun)
1614
1615        # Set rp_i and s2n_i to None which will be overwritten if present
1616        rp_i, s2n_i = np.nan, np.nan
1617        for index, mz in enumerate(data_dict.get(Labels.mz)):
1618            if rp_present:
1619                if not data_dict.get(Labels.rp)[index]:
1620                    rp_i = np.nan
1621                else:
1622                    rp_i = float(data_dict.get(Labels.rp)[index])
1623            if s2n_present:
1624                if not data_dict.get(Labels.s2n)[index]:
1625                    s2n_i = np.nan
1626                else:
1627                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1628
1629            # centroid peak does not have start and end peak index pos
1630            massspec_indexes = (index, index, index)
1631
1632            # Add peaks based on the noise thresholding method
1633            if (
1634                self.parameters.mass_spectrum.noise_threshold_method
1635                in ["minima", "relative_abundance", "absolute_abundance"]
1636                and abun[index] / factor >= abundance_threshold
1637            ):
1638                self.add_mspeak(
1639                    ion_charge,
1640                    mz,
1641                    abun[index],
1642                    rp_i,
1643                    s2n_i,
1644                    massspec_indexes,
1645                    ms_parent=self,
1646                )
1647            if (
1648                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1649                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1650            ):
1651                self.add_mspeak(
1652                    ion_charge,
1653                    mz,
1654                    abun[index],
1655                    rp_i,
1656                    s2n_i,
1657                    massspec_indexes,
1658                    ms_parent=self,
1659                )
1660
1661        self.mspeaks = self._mspeaks
1662        self._dynamic_range = self.max_abundance / self.min_abundance
1663        self._set_nominal_masses_start_final_indexes()
1664
1665        if self.label != Labels.thermo_centroid:
1666            if self.settings.noise_threshold_method == "log":
1667                raise Exception("log noise Not tested for centroid data")
1668                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1669
1670            else:
1671                self._baseline_noise, self._baseline_noise_std = (
1672                    self.run_noise_threshold_calc()
1673                )
1674
1675        del self.data_dict
1676
1677
1678class MassSpecCentroidLowRes(MassSpecCentroid):
1679    """A mass spectrum class when the entry point is on low resolution centroid format
1680
1681    Notes
1682    -----
1683    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
1684
1685    Parameters
1686    ----------
1687    data_dict : dict {string: numpy array float64 )
1688        contains keys [m/z, Abundance, Resolving Power, S/N]
1689    d_params : dict{'str': float, int or str}
1690        contains the instrument settings and processing settings
1691
1692    Attributes
1693    ----------
1694    _processed_tic : float
1695        store processed total ion current
1696    _abundance : ndarray
1697        The abundance values of the mass spectrum.
1698    _mz_exp : ndarray
1699        The m/z values of the mass spectrum.
1700    """
1701
1702    def __init__(self, data_dict, d_params):
1703        self._set_parameters_objects(d_params)
1704        self._mz_exp = array(data_dict.get(Labels.mz))
1705        self._abundance = array(data_dict.get(Labels.abundance))
1706        self._processed_tic = None
1707
1708    def __len__(self):
1709        return len(self.mz_exp)
1710
1711    def __getitem__(self, position):
1712        return (self.mz_exp[position], self.abundance[position])
1713
1714    @property
1715    def mz_exp(self):
1716        """Return the m/z values of the mass spectrum."""
1717        return self._mz_exp
1718
1719    @property
1720    def abundance(self):
1721        """Return the abundance values of the mass spectrum."""
1722        return self._abundance
1723
1724    @property
1725    def processed_tic(self):
1726        """Return the processed total ion current of the mass spectrum."""
1727        return sum(self._processed_tic)
1728
1729    @property
1730    def tic(self):
1731        """Return the total ion current of the mass spectrum."""
1732        if self._processed_tic:
1733            return self._processed_tic
1734        else:
1735            return sum(self.abundance)
1736
1737    @property
1738    def mz_abun_tuples(self):
1739        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
1740        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
1741
1742        return [r(i) for i in self]
1743
1744    @property
1745    def mz_abun_dict(self):
1746        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
1747        r = lambda x: int(round(x, 0))
1748
1749        return {r(i[0]): r(i[1]) for i in self}
def overrides(interface_class):
26def overrides(interface_class):
27    """Checks if the method overrides a method from an interface class."""
28
29    def overrider(method):
30        assert method.__name__ in dir(interface_class)
31        return method
32
33    return overrider

Checks if the method overrides a method from an interface class.

  36class MassSpecBase(MassSpecCalc, KendrickGrouping):
  37    """A mass spectrum base class, stores the profile data and instrument settings.
  38
  39    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
  40    _mspeaks is populated under the hood by calling process_mass_spec method.
  41    Iteration is null if _mspeaks is empty.
  42
  43    Parameters
  44    ----------
  45    mz_exp : array_like
  46        The m/z values of the mass spectrum.
  47    abundance : array_like
  48        The abundance values of the mass spectrum.
  49    d_params : dict
  50        A dictionary of parameters for the mass spectrum.
  51    **kwargs
  52        Additional keyword arguments.
  53
  54    Attributes
  55    ----------
  56
  57    mspeaks : list
  58        A list of mass peaks.
  59    is_calibrated : bool
  60        Whether the mass spectrum is calibrated.
  61    is_centroid : bool
  62        Whether the mass spectrum is centroided.
  63    has_frequency : bool
  64        Whether the mass spectrum has a frequency domain.
  65    calibration_order : None or int
  66        The order of the mass spectrum's calibration.
  67    calibration_points : None or ndarray
  68        The calibration points of the mass spectrum.
  69    calibration_ref_mzs: None or ndarray
  70        The reference m/z values of the mass spectrum's calibration.
  71    calibration_meas_mzs : None or ndarray
  72        The measured m/z values of the mass spectrum's calibration.
  73    calibration_RMS : None or float
  74        The root mean square of the mass spectrum's calibration.
  75    calibration_segment : None or CalibrationSegment
  76        The calibration segment of the mass spectrum.
  77    _abundance : ndarray
  78        The abundance values of the mass spectrum.
  79    _mz_exp : ndarray
  80        The m/z values of the mass spectrum.
  81    _mspeaks : list
  82        A list of mass peaks.
  83    _dict_nominal_masses_indexes : dict
  84        A dictionary of nominal masses and their indexes.
  85    _baseline_noise : float
  86        The baseline noise of the mass spectrum.
  87    _baseline_noise_std : float
  88        The standard deviation of the baseline noise of the mass spectrum.
  89    _dynamic_range : float or None
  90        The dynamic range of the mass spectrum.
  91    _transient_settings : None or TransientSettings
  92        The transient settings of the mass spectrum.
  93    _frequency_domain : None or FrequencyDomain
  94        The frequency domain of the mass spectrum.
  95    _mz_cal_profile : None or MzCalibrationProfile
  96        The m/z calibration profile of the mass spectrum.
  97
  98    Methods
  99    -------
 100    * process_mass_spec(). Main function to process the mass spectrum,
 101    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
 102
 103    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
 104    """
 105
 106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
 107        self._abundance = array(abundance, dtype=float64)
 108        self._mz_exp = array(mz_exp, dtype=float64)
 109
 110        # objects created after process_mass_spec() function
 111        self._mspeaks = list()
 112        self.mspeaks = list()
 113        self._dict_nominal_masses_indexes = dict()
 114        self._baseline_noise = 0.001
 115        self._baseline_noise_std = 0.001
 116        self._dynamic_range = None
 117        # set to None: initialization occurs inside subclass MassSpecfromFreq
 118        self._transient_settings = None
 119        self._frequency_domain = None
 120        self._mz_cal_profile = None
 121        self.is_calibrated = False
 122
 123        self._set_parameters_objects(d_params)
 124        self._init_settings()
 125
 126        self.is_centroid = False
 127        self.has_frequency = False
 128
 129        self.calibration_order = None
 130        self.calibration_points = None
 131        self.calibration_ref_mzs = None
 132        self.calibration_meas_mzs = None
 133        self.calibration_RMS = None
 134        self.calibration_segment = None
 135        self.calibration_raw_error_median = None
 136        self.calibration_raw_error_stdev = None
 137
 138    def _init_settings(self):
 139        """Initializes the settings for the mass spectrum."""
 140        self._parameters = MSParameters()
 141
 142    def __len__(self):
 143        return len(self.mspeaks)
 144
 145    def __getitem__(self, position) -> MSPeak:
 146        return self.mspeaks[position]
 147
 148    def set_indexes(self, list_indexes):
 149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
 150
 151        Parameters
 152        ----------
 153        list_indexes : list of int
 154            A list of integers representing the indexes of the MSpeaks to iterate over.
 155
 156        """
 157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
 158
 159        for i, mspeak in enumerate(self.mspeaks):
 160            mspeak.index = i
 161
 162        self._set_nominal_masses_start_final_indexes()
 163
 164    def reset_indexes(self):
 165        """Reset the mass spectrum to iterate over all MSpeaks objects.
 166
 167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
 168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
 169
 170        """
 171        self.mspeaks = self._mspeaks
 172
 173        for i, mspeak in enumerate(self.mspeaks):
 174            mspeak.index = i
 175
 176        self._set_nominal_masses_start_final_indexes()
 177
 178    def add_mspeak(
 179        self,
 180        ion_charge,
 181        mz_exp,
 182        abundance,
 183        resolving_power,
 184        signal_to_noise,
 185        massspec_indexes,
 186        exp_freq=None,
 187        ms_parent=None,
 188    ):
 189        """Add a new MSPeak object to the MassSpectrum object.
 190
 191        Parameters
 192        ----------
 193        ion_charge : int
 194            The ion charge of the MSPeak.
 195        mz_exp : float
 196            The experimental m/z value of the MSPeak.
 197        abundance : float
 198            The abundance of the MSPeak.
 199        resolving_power : float
 200            The resolving power of the MSPeak.
 201        signal_to_noise : float
 202            The signal-to-noise ratio of the MSPeak.
 203        massspec_indexes : list
 204            A list of indexes of the MSPeak in the MassSpectrum object.
 205        exp_freq : float, optional
 206            The experimental frequency of the MSPeak. Defaults to None.
 207        ms_parent : MSParent, optional
 208            The MSParent object associated with the MSPeak. Defaults to None.
 209        """
 210        mspeak = MSPeak(
 211            ion_charge,
 212            mz_exp,
 213            abundance,
 214            resolving_power,
 215            signal_to_noise,
 216            massspec_indexes,
 217            len(self._mspeaks),
 218            exp_freq=exp_freq,
 219            ms_parent=ms_parent,
 220        )
 221
 222        self._mspeaks.append(mspeak)
 223
 224    def _set_parameters_objects(self, d_params):
 225        """Set the parameters of the MassSpectrum object.
 226
 227        Parameters
 228        ----------
 229        d_params : dict
 230            A dictionary containing the parameters to set.
 231
 232        Notes
 233        -----
 234        This method sets the following parameters of the MassSpectrum object:
 235        - _calibration_terms
 236        - label
 237        - analyzer
 238        - acquisition_time
 239        - instrument_label
 240        - polarity
 241        - scan_number
 242        - retention_time
 243        - mobility_rt
 244        - mobility_scan
 245        - _filename
 246        - _dir_location
 247        - _baseline_noise
 248        - _baseline_noise_std
 249        - sample_name
 250        """
 251        self._calibration_terms = (
 252            d_params.get("Aterm"),
 253            d_params.get("Bterm"),
 254            d_params.get("Cterm"),
 255        )
 256
 257        self.label = d_params.get(Labels.label)
 258
 259        self.analyzer = d_params.get("analyzer")
 260
 261        self.acquisition_time = d_params.get("acquisition_time")
 262
 263        self.instrument_label = d_params.get("instrument_label")
 264
 265        self.polarity = int(d_params.get("polarity"))
 266
 267        self.scan_number = d_params.get("scan_number")
 268
 269        self.retention_time = d_params.get("rt")
 270
 271        self.mobility_rt = d_params.get("mobility_rt")
 272
 273        self.mobility_scan = d_params.get("mobility_scan")
 274
 275        self._filename = d_params.get("filename_path")
 276
 277        self._dir_location = d_params.get("dir_location")
 278
 279        self._baseline_noise = d_params.get("baseline_noise")
 280
 281        self._baseline_noise_std = d_params.get("baseline_noise_std")
 282
 283        if d_params.get("sample_name") != "Unknown":
 284            self.sample_name = d_params.get("sample_name")
 285            if not self.sample_name:
 286                self.sample_name = self.filename.stem
 287        else:
 288            self.sample_name = self.filename.stem
 289
 290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
 291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
 292
 293        Parameters
 294        ----------
 295        Aterm : float
 296            The A-term calibration coefficient.
 297        Bterm : float
 298            The B-term calibration coefficient.
 299        C : float
 300            The C-term calibration coefficient.
 301        fas : float, optional
 302            The frequency amplitude scaling factor. Default is 0.
 303        """
 304        self._calibration_terms = (Aterm, Bterm, C)
 305
 306        self._mz_exp = self._f_to_mz()
 307        self._abundance = self._abundance
 308        self.find_peaks()
 309        self.reset_indexes()
 310
 311    def clear_molecular_formulas(self):
 312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
 313
 314        Returns
 315        -------
 316        numpy.ndarray
 317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
 318        """
 319        self.check_mspeaks()
 320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
 321
 322    def process_mass_spec(self, keep_profile=True):
 323        """Process the mass spectrum.
 324
 325        Parameters
 326        ----------
 327        keep_profile : bool, optional
 328            Whether to keep the profile data after processing. Defaults to True.
 329
 330        Notes
 331        -----
 332        This method does the following:
 333        - calculates the noise threshold
 334        - does peak picking (creates mspeak_objs)
 335        - resets the mspeak_obj indexes
 336        """
 337
 338        # if runned mannually make sure to rerun filter_by_noise_threshold
 339        # calculates noise threshold
 340        # do peak picking( create mspeak_objs)
 341        # reset mspeak_obj the indexes
 342
 343        self.cal_noise_threshold()
 344
 345        self.find_peaks()
 346        self.reset_indexes()
 347
 348        if self.mspeaks:
 349            self._dynamic_range = self.max_abundance / self.min_abundance
 350        else:
 351            self._dynamic_range = 0
 352        if not keep_profile:
 353            self._abundance *= 0
 354            self._mz_exp *= 0
 355
 356    def cal_noise_threshold(self):
 357        """Calculate the noise threshold of the mass spectrum."""
 358
 359        if self.label == Labels.simulated_profile:
 360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
 361
 362        if self.settings.noise_threshold_method == "log":
 363            self._baseline_noise, self._baseline_noise_std = (
 364                self.run_log_noise_threshold_calc()
 365            )
 366
 367        else:
 368            self._baseline_noise, self._baseline_noise_std = (
 369                self.run_noise_threshold_calc()
 370            )
 371
 372    @property
 373    def parameters(self):
 374        """Return the parameters of the mass spectrum."""
 375        return self._parameters
 376
 377    @parameters.setter
 378    def parameters(self, instance_MSParameters):
 379        self._parameters = instance_MSParameters
 380
 381    def set_parameter_from_json(self, parameters_path):
 382        """Set the parameters of the mass spectrum from a JSON file.
 383
 384        Parameters
 385        ----------
 386        parameters_path : str
 387            The path to the JSON file containing the parameters.
 388        """
 389        load_and_set_parameters_ms(self, parameters_path=parameters_path)
 390
 391    def set_parameter_from_toml(self, parameters_path):
 392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
 393
 394    @property
 395    def mspeaks_settings(self):
 396        """Return the MS peak settings of the mass spectrum."""
 397        return self.parameters.ms_peak
 398
 399    @mspeaks_settings.setter
 400    def mspeaks_settings(self, instance_MassSpecPeakSetting):
 401        self.parameters.ms_peak = instance_MassSpecPeakSetting
 402
 403    @property
 404    def settings(self):
 405        """Return the settings of the mass spectrum."""
 406        return self.parameters.mass_spectrum
 407
 408    @settings.setter
 409    def settings(self, instance_MassSpectrumSetting):
 410        self.parameters.mass_spectrum = instance_MassSpectrumSetting
 411
 412    @property
 413    def molecular_search_settings(self):
 414        """Return the molecular search settings of the mass spectrum."""
 415        return self.parameters.molecular_search
 416
 417    @molecular_search_settings.setter
 418    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
 419        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
 420
 421    @property
 422    def mz_cal_profile(self):
 423        """Return the calibrated m/z profile of the mass spectrum."""
 424        return self._mz_cal_profile
 425
 426    @mz_cal_profile.setter
 427    def mz_cal_profile(self, mz_cal_list):
 428        if len(mz_cal_list) == len(self._mz_exp):
 429            self._mz_cal_profile = mz_cal_list
 430        else:
 431            raise Exception(
 432                "calibrated array (%i) is not of the same size of the data (%i)"
 433                % (len(mz_cal_list), len(self.mz_exp_profile))
 434            )
 435
 436    @property
 437    def mz_cal(self):
 438        """Return the calibrated m/z values of the mass spectrum."""
 439        return array([mspeak.mz_cal for mspeak in self.mspeaks])
 440
 441    @mz_cal.setter
 442    def mz_cal(self, mz_cal_list):
 443        if len(mz_cal_list) == len(self.mspeaks):
 444            self.is_calibrated = True
 445            for index, mz_cal in enumerate(mz_cal_list):
 446                self.mspeaks[index].mz_cal = mz_cal
 447        else:
 448            raise Exception(
 449                "calibrated array (%i) is not of the same size of the data (%i)"
 450                % (len(mz_cal_list), len(self._mspeaks))
 451            )
 452
 453    @property
 454    def mz_exp(self):
 455        """Return the experimental m/z values of the mass spectrum."""
 456        self.check_mspeaks()
 457
 458        if self.is_calibrated:
 459            return array([mspeak.mz_cal for mspeak in self.mspeaks])
 460
 461        else:
 462            return array([mspeak.mz_exp for mspeak in self.mspeaks])
 463
 464    @property
 465    def freq_exp_profile(self):
 466        """Return the experimental frequency profile of the mass spectrum."""
 467        return self._frequency_domain
 468
 469    @freq_exp_profile.setter
 470    def freq_exp_profile(self, new_data):
 471        self._frequency_domain = array(new_data)
 472
 473    @property
 474    def freq_exp_pp(self):
 475        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
 476        _, _, freq = self.prepare_peak_picking_data()
 477        return freq
 478
 479    @property
 480    def mz_exp_profile(self):
 481        """Return the experimental m/z profile of the mass spectrum."""
 482        if self.is_calibrated:
 483            return self.mz_cal_profile
 484        else:
 485            return self._mz_exp
 486
 487    @mz_exp_profile.setter
 488    def mz_exp_profile(self, new_data):
 489        self._mz_exp = array(new_data)
 490
 491    @property
 492    def mz_exp_pp(self):
 493        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
 494        mz, _, _ = self.prepare_peak_picking_data()
 495        return mz
 496
 497    @property
 498    def abundance_profile(self):
 499        """Return the abundance profile of the mass spectrum."""
 500        return self._abundance
 501
 502    @abundance_profile.setter
 503    def abundance_profile(self, new_data):
 504        self._abundance = array(new_data)
 505
 506    @property
 507    def abundance_profile_pp(self):
 508        """Return the abundance profile of the mass spectrum that is used for peak picking."""
 509        _, abundance, _ = self.prepare_peak_picking_data()
 510        return abundance
 511
 512    @property
 513    def abundance(self):
 514        """Return the abundance values of the mass spectrum."""
 515        self.check_mspeaks()
 516        return array([mspeak.abundance for mspeak in self.mspeaks])
 517
 518    def freq_exp(self):
 519        """Return the experimental frequency values of the mass spectrum."""
 520        self.check_mspeaks()
 521        return array([mspeak.freq_exp for mspeak in self.mspeaks])
 522
 523    @property
 524    def resolving_power(self):
 525        """Return the resolving power values of the mass spectrum."""
 526        self.check_mspeaks()
 527        return array([mspeak.resolving_power for mspeak in self.mspeaks])
 528
 529    @property
 530    def signal_to_noise(self):
 531        self.check_mspeaks()
 532        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
 533
 534    @property
 535    def nominal_mz(self):
 536        """Return the nominal m/z values of the mass spectrum."""
 537        if self._dict_nominal_masses_indexes:
 538            return sorted(list(self._dict_nominal_masses_indexes.keys()))
 539        else:
 540            raise ValueError("Nominal indexes not yet set")
 541
 542    def get_mz_and_abundance_peaks_tuples(self):
 543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
 544        self.check_mspeaks()
 545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
 546
 547    @property
 548    def kmd(self):
 549        """Return the Kendrick mass defect values of the mass spectrum."""
 550        self.check_mspeaks()
 551        return array([mspeak.kmd for mspeak in self.mspeaks])
 552
 553    @property
 554    def kendrick_mass(self):
 555        """Return the Kendrick mass values of the mass spectrum."""
 556        self.check_mspeaks()
 557        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
 558
 559    @property
 560    def max_mz_exp(self):
 561        """Return the maximum experimental m/z value of the mass spectrum."""
 562        return max([mspeak.mz_exp for mspeak in self.mspeaks])
 563
 564    @property
 565    def min_mz_exp(self):
 566        """Return the minimum experimental m/z value of the mass spectrum."""
 567        return min([mspeak.mz_exp for mspeak in self.mspeaks])
 568
 569    @property
 570    def max_abundance(self):
 571        """Return the maximum abundance value of the mass spectrum."""
 572        return max([mspeak.abundance for mspeak in self.mspeaks])
 573
 574    @property
 575    def max_signal_to_noise(self):
 576        """Return the maximum signal-to-noise ratio of the mass spectrum."""
 577        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
 578
 579    @property
 580    def most_abundant_mspeak(self):
 581        """Return the most abundant MSpeak object of the mass spectrum."""
 582        return max(self.mspeaks, key=lambda m: m.abundance)
 583
 584    @property
 585    def min_abundance(self):
 586        """Return the minimum abundance value of the mass spectrum."""
 587        return min([mspeak.abundance for mspeak in self.mspeaks])
 588
 589    # takes too much cpu time
 590    @property
 591    def dynamic_range(self):
 592        """Return the dynamic range of the mass spectrum."""
 593        return self._dynamic_range
 594
 595    @property
 596    def baseline_noise(self):
 597        """Return the baseline noise of the mass spectrum."""
 598        if self._baseline_noise:
 599            return self._baseline_noise
 600        else:
 601            return None
 602
 603    @property
 604    def baseline_noise_std(self):
 605        """Return the standard deviation of the baseline noise of the mass spectrum."""
 606        if self._baseline_noise_std == 0:
 607            return self._baseline_noise_std
 608        if self._baseline_noise_std:
 609            return self._baseline_noise_std
 610        else:
 611            return None
 612
 613    @property
 614    def Aterm(self):
 615        """Return the A-term calibration coefficient of the mass spectrum."""
 616        return self._calibration_terms[0]
 617
 618    @property
 619    def Bterm(self):
 620        """Return the B-term calibration coefficient of the mass spectrum."""
 621        return self._calibration_terms[1]
 622
 623    @property
 624    def Cterm(self):
 625        """Return the C-term calibration coefficient of the mass spectrum."""
 626        return self._calibration_terms[2]
 627
 628    @property
 629    def filename(self):
 630        """Return the filename of the mass spectrum."""
 631        return Path(self._filename)
 632
 633    @property
 634    def dir_location(self):
 635        """Return the directory location of the mass spectrum."""
 636        return self._dir_location
 637
 638    def sort_by_mz(self):
 639        """Sort the mass spectrum by m/z values."""
 640        return sorted(self, key=lambda m: m.mz_exp)
 641
 642    def sort_by_abundance(self, reverse=False):
 643        """Sort the mass spectrum by abundance values."""
 644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
 645
 646    @property
 647    def tic(self):
 648        """Return the total ion current of the mass spectrum."""
 649        return trapz(self.abundance_profile, self.mz_exp_profile)
 650
 651    def check_mspeaks_warning(self):
 652        """Check if the mass spectrum has MSpeaks objects.
 653
 654        Raises
 655        ------
 656        Warning
 657            If the mass spectrum has no MSpeaks objects.
 658        """
 659        import warnings
 660
 661        if self.mspeaks:
 662            pass
 663        else:
 664            warnings.warn("mspeaks list is empty, continuing without filtering data")
 665
 666    def check_mspeaks(self):
 667        """Check if the mass spectrum has MSpeaks objects.
 668
 669        Raises
 670        ------
 671        Exception
 672            If the mass spectrum has no MSpeaks objects.
 673        """
 674        if self.mspeaks:
 675            pass
 676        else:
 677            raise Exception(
 678                "mspeaks list is empty, please run process_mass_spec() first"
 679            )
 680
 681    def remove_assignment_by_index(self, indexes):
 682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
 683
 684        Parameters
 685        ----------
 686        indexes : list of int
 687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
 688        """
 689        for i in indexes:
 690            self.mspeaks[i].clear_molecular_formulas()
 691
 692    def filter_by_index(self, list_indexes):
 693        """Filter the mass spectrum by the specified indexes.
 694
 695        Parameters
 696        ----------
 697        list_indexes : list of int
 698            A list of indexes of the MSpeaks objects to drop.
 699
 700        """
 701
 702        self.mspeaks = [
 703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
 704        ]
 705
 706        for i, mspeak in enumerate(self.mspeaks):
 707            mspeak.index = i
 708
 709        self._set_nominal_masses_start_final_indexes()
 710
 711    def filter_by_mz(self, min_mz, max_mz):
 712        """Filter the mass spectrum by the specified m/z range.
 713
 714        Parameters
 715        ----------
 716        min_mz : float
 717            The minimum m/z value to keep.
 718        max_mz : float
 719            The maximum m/z value to keep.
 720
 721        """
 722        self.check_mspeaks_warning()
 723        indexes = [
 724            index
 725            for index, mspeak in enumerate(self.mspeaks)
 726            if not min_mz <= mspeak.mz_exp <= max_mz
 727        ]
 728        self.filter_by_index(indexes)
 729
 730    def filter_by_s2n(self, min_s2n, max_s2n=False):
 731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
 732
 733        Parameters
 734        ----------
 735        min_s2n : float
 736            The minimum signal-to-noise ratio to keep.
 737        max_s2n : float, optional
 738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
 739
 740        """
 741        self.check_mspeaks_warning()
 742        if max_s2n:
 743            indexes = [
 744                index
 745                for index, mspeak in enumerate(self.mspeaks)
 746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
 747            ]
 748        else:
 749            indexes = [
 750                index
 751                for index, mspeak in enumerate(self.mspeaks)
 752                if mspeak.signal_to_noise <= min_s2n
 753            ]
 754        self.filter_by_index(indexes)
 755
 756    def filter_by_abundance(self, min_abund, max_abund=False):
 757        """Filter the mass spectrum by the specified abundance range.
 758
 759        Parameters
 760        ----------
 761        min_abund : float
 762            The minimum abundance to keep.
 763        max_abund : float, optional
 764            The maximum abundance to keep. Defaults to False (no maximum).
 765
 766        """
 767        self.check_mspeaks_warning()
 768        if max_abund:
 769            indexes = [
 770                index
 771                for index, mspeak in enumerate(self.mspeaks)
 772                if not min_abund <= mspeak.abundance <= max_abund
 773            ]
 774        else:
 775            indexes = [
 776                index
 777                for index, mspeak in enumerate(self.mspeaks)
 778                if mspeak.abundance <= min_abund
 779            ]
 780        self.filter_by_index(indexes)
 781
 782    def filter_by_max_resolving_power(self, B, T):
 783        """Filter the mass spectrum by the specified maximum resolving power.
 784
 785        Parameters
 786        ----------
 787        B : float
 788        T : float
 789
 790        """
 791
 792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
 793
 794        self.check_mspeaks_warning()
 795
 796        indexes_to_remove = [
 797            index
 798            for index, mspeak in enumerate(self.mspeaks)
 799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
 800        ]
 801        self.filter_by_index(indexes_to_remove)
 802
 803    def filter_by_mean_resolving_power(
 804        self, ndeviations=3, plot=False, guess_pars=False
 805    ):
 806        """Filter the mass spectrum by the specified mean resolving power.
 807
 808        Parameters
 809        ----------
 810        ndeviations : float, optional
 811            The number of standard deviations to use for filtering. Defaults to 3.
 812        plot : bool, optional
 813            Whether to plot the resolving power distribution. Defaults to False.
 814        guess_pars : bool, optional
 815            Whether to guess the parameters for the Gaussian model. Defaults to False.
 816
 817        """
 818        self.check_mspeaks_warning()
 819        indexes_to_remove = MeanResolvingPowerFilter(
 820            self, ndeviations, plot, guess_pars
 821        ).main()
 822        self.filter_by_index(indexes_to_remove)
 823
 824    def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0):
 825        """Filter the mass spectrum by the calculated minimum theoretical resolving power.
 826
 827        This is currently designed only for FTICR data, and accounts only for magnitude mode data
 828        Accurate results require passing the apodisaion method used to calculate the resolving power.
 829        see the ICRMassPeak function `resolving_power_calc` for more details.
 830
 831        Parameters
 832        ----------
 833        B : Magnetic field strength in Tesla, float
 834        T : transient length in seconds, float
 835        apodization_method : str, optional
 836            The apodization method to use for calculating the resolving power. Defaults to None.
 837        tolerance : float, optional
 838            The tolerance for the threshold. Defaults to 0, i.e. no tolerance
 839
 840        """
 841        if self.analyzer != "ICR":
 842            raise Exception(
 843                "This method is only applicable to ICR mass spectra. "
 844            )
 845
 846        self.check_mspeaks_warning()
 847
 848        indexes_to_remove = [
 849            index
 850            for index, mspeak in enumerate(self.mspeaks)
 851            if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method)
 852        ]
 853        self.filter_by_index(indexes_to_remove)
 854
 855    def filter_by_noise_threshold(self):
 856        """Filter the mass spectrum by the noise threshold."""
 857
 858        threshold = self.get_noise_threshold()[1][0]
 859
 860        self.check_mspeaks_warning()
 861
 862        indexes_to_remove = [
 863            index
 864            for index, mspeak in enumerate(self.mspeaks)
 865            if mspeak.abundance <= threshold
 866        ]
 867        self.filter_by_index(indexes_to_remove)
 868
 869    def find_peaks(self):
 870        """Find the peaks of the mass spectrum."""
 871        # needs to clear previous results from peak_picking
 872        self._mspeaks = list()
 873
 874        # then do peak picking
 875        self.do_peak_picking()
 876        # print("A total of %i peaks were found" % len(self._mspeaks))
 877
 878    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
 879        """Change the Kendrick base of all MSpeaks objects.
 880
 881        Parameters
 882        ----------
 883        kendrick_dict_base : dict
 884            A dictionary of the Kendrick base to change to.
 885
 886        Notes
 887        -----
 888        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
 889        """
 890        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
 891
 892        for mspeak in self.mspeaks:
 893            mspeak.change_kendrick_base(kendrick_dict_base)
 894
 895    def get_nominal_mz_first_last_indexes(self, nominal_mass):
 896        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
 897
 898        Parameters
 899        ----------
 900        nominal_mass : int
 901            The nominal mass to get the indexes for.
 902
 903        Returns
 904        -------
 905        tuple
 906            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
 907        """
 908        if self._dict_nominal_masses_indexes:
 909            if nominal_mass in self._dict_nominal_masses_indexes.keys():
 910                return (
 911                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
 912                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
 913                )
 914
 915            else:
 916                # import warnings
 917                # uncomment warn to distribution
 918                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
 919                return (0, 0)
 920        else:
 921            raise Exception(
 922                "run process_mass_spec() function before trying to access the data"
 923            )
 924
 925    def get_masses_count_by_nominal_mass(self):
 926        """Return a dictionary of the nominal masses and their counts."""
 927
 928        dict_nominal_masses_count = {}
 929
 930        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 931
 932        for nominal_mass in all_nominal_masses:
 933            if nominal_mass not in dict_nominal_masses_count:
 934                dict_nominal_masses_count[nominal_mass] = len(
 935                    list(self.get_nominal_mass_indexes(nominal_mass))
 936                )
 937
 938        return dict_nominal_masses_count
 939
 940    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
 941        """Return a dictionary of the nominal masses and their counts.
 942
 943        Parameters
 944        ----------
 945        mz_overlay : float, optional
 946            The m/z overlay to use for counting. Defaults to 0.1.
 947
 948        Returns
 949        -------
 950        dict
 951            A dictionary of the nominal masses and their counts.
 952        """
 953        dict_nominal_masses_count = {}
 954
 955        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
 956
 957        for nominal_mass in all_nominal_masses:
 958            if nominal_mass not in dict_nominal_masses_count:
 959                min_mz = nominal_mass - mz_overlay
 960
 961                max_mz = nominal_mass + 1 + mz_overlay
 962
 963                indexes = indexes = where(
 964                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
 965                )
 966
 967                dict_nominal_masses_count[nominal_mass] = indexes[0].size
 968
 969        return dict_nominal_masses_count
 970
 971    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
 972        """Return the indexes of the MSpeaks objects with the specified nominal mass.
 973
 974        Parameters
 975        ----------
 976        nominal_mass : int
 977            The nominal mass to get the indexes for.
 978        overlay : float, optional
 979            The m/z overlay to use for counting. Defaults to 0.1.
 980
 981        Returns
 982        -------
 983        generator
 984            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
 985        """
 986        min_mz_to_look = nominal_mass - overlay
 987        max_mz_to_look = nominal_mass + 1 + overlay
 988
 989        return (
 990            i
 991            for i in range(len(self.mspeaks))
 992            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
 993        )
 994
 995        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
 996        # return indexes
 997
 998    def _set_nominal_masses_start_final_indexes(self):
 999        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
1000        dict_nominal_masses_indexes = {}
1001
1002        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
1003
1004        for nominal_mass in all_nominal_masses:
1005            # indexes = self.get_nominal_mass_indexes(nominal_mass)
1006            # Convert the iterator to a list to avoid multiple calls
1007            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
1008
1009            # If the list is not empty, find the first and last; otherwise, set None
1010            if indexes:
1011                first, last = indexes[0], indexes[-1]
1012            else:
1013                first = last = None
1014            # defaultvalue = None
1015            # first = last = next(indexes, defaultvalue)
1016            # for last in indexes:
1017            #    pass
1018
1019            dict_nominal_masses_indexes[nominal_mass] = (first, last)
1020
1021        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
1022
1023    def plot_centroid(self, ax=None, c="g"):
1024        """Plot the centroid data of the mass spectrum.
1025
1026        Parameters
1027        ----------
1028        ax : matplotlib.axes.Axes, optional
1029            The matplotlib axes to plot on. Defaults to None.
1030        c : str, optional
1031            The color to use for the plot. Defaults to 'g' (green).
1032
1033        Returns
1034        -------
1035        matplotlib.axes.Axes
1036            The matplotlib axes containing the plot.
1037
1038        Raises
1039        ------
1040        Exception
1041            If no centroid data is found.
1042        """
1043
1044        import matplotlib.pyplot as plt
1045
1046        if self._mspeaks:
1047            if ax is None:
1048                ax = plt.gca()
1049
1050            markerline_a, stemlines_a, baseline_a = ax.stem(
1051                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1052            )
1053
1054            plt.setp(markerline_a, "color", c, "linewidth", 2)
1055            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1056            plt.setp(baseline_a, "color", c, "linewidth", 2)
1057
1058            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1059            ax.set_ylabel("Abundance", fontsize=12)
1060            ax.tick_params(axis="both", which="major", labelsize=12)
1061
1062            ax.axes.spines["top"].set_visible(False)
1063            ax.axes.spines["right"].set_visible(False)
1064
1065            ax.get_yaxis().set_visible(False)
1066            ax.spines["left"].set_visible(False)
1067
1068        else:
1069            raise Exception("No centroid data found, please run process_mass_spec")
1070
1071        return ax
1072
1073    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1074        """Plot the profile data and noise threshold of the mass spectrum.
1075
1076        Parameters
1077        ----------
1078        ax : matplotlib.axes.Axes, optional
1079            The matplotlib axes to plot on. Defaults to None.
1080        legend : bool, optional
1081            Whether to show the legend. Defaults to False.
1082
1083        Returns
1084        -------
1085        matplotlib.axes.Axes
1086            The matplotlib axes containing the plot.
1087
1088        Raises
1089        ------
1090        Exception
1091            If no noise threshold is found.
1092        """
1093        import matplotlib.pyplot as plt
1094
1095        if self.baseline_noise_std and self.baseline_noise_std:
1096            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1097            baseline = (self.baseline_noise, self.baseline_noise)
1098
1099            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1100            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1101            x, y = self.get_noise_threshold()
1102
1103            if ax is None:
1104                ax = plt.gca()
1105
1106            ax.plot(
1107                self.mz_exp_profile,
1108                self.abundance_profile,
1109                color="green",
1110                label="Spectrum",
1111            )
1112            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1113            ax.plot(x, y, color="red", label="Noise Threshold")
1114
1115            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1116            ax.set_ylabel("Abundance", fontsize=12)
1117            ax.tick_params(axis="both", which="major", labelsize=12)
1118
1119            ax.axes.spines["top"].set_visible(False)
1120            ax.axes.spines["right"].set_visible(False)
1121
1122            ax.get_yaxis().set_visible(False)
1123            ax.spines["left"].set_visible(False)
1124            if legend:
1125                ax.legend()
1126
1127        else:
1128            raise Exception("Calculate noise threshold first")
1129
1130        return ax
1131
1132    def plot_mz_domain_profile(self, color="green", ax=None):
1133        """Plot the m/z domain profile of the mass spectrum.
1134
1135        Parameters
1136        ----------
1137        color : str, optional
1138            The color to use for the plot. Defaults to 'green'.
1139        ax : matplotlib.axes.Axes, optional
1140            The matplotlib axes to plot on. Defaults to None.
1141
1142        Returns
1143        -------
1144        matplotlib.axes.Axes
1145            The matplotlib axes containing the plot.
1146        """
1147
1148        import matplotlib.pyplot as plt
1149
1150        if ax is None:
1151            ax = plt.gca()
1152        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1153        ax.set(xlabel="m/z", ylabel="abundance")
1154
1155        return ax
1156
1157    def to_excel(self, out_file_path, write_metadata=True):
1158        """Export the mass spectrum to an Excel file.
1159
1160        Parameters
1161        ----------
1162        out_file_path : str
1163            The path to the Excel file to export to.
1164        write_metadata : bool, optional
1165            Whether to write the metadata to the Excel file. Defaults to True.
1166
1167        Returns
1168        -------
1169        None
1170        """
1171        from corems.mass_spectrum.output.export import HighResMassSpecExport
1172
1173        exportMS = HighResMassSpecExport(out_file_path, self)
1174        exportMS.to_excel(write_metadata=write_metadata)
1175
1176    def to_hdf(self, out_file_path):
1177        """Export the mass spectrum to an HDF file.
1178
1179        Parameters
1180        ----------
1181        out_file_path : str
1182            The path to the HDF file to export to.
1183
1184        Returns
1185        -------
1186        None
1187        """
1188        from corems.mass_spectrum.output.export import HighResMassSpecExport
1189
1190        exportMS = HighResMassSpecExport(out_file_path, self)
1191        exportMS.to_hdf()
1192
1193    def to_csv(self, out_file_path, write_metadata=True):
1194        """Export the mass spectrum to a CSV file.
1195
1196        Parameters
1197        ----------
1198        out_file_path : str
1199            The path to the CSV file to export to.
1200        write_metadata : bool, optional
1201            Whether to write the metadata to the CSV file. Defaults to True.
1202
1203        """
1204        from corems.mass_spectrum.output.export import HighResMassSpecExport
1205
1206        exportMS = HighResMassSpecExport(out_file_path, self)
1207        exportMS.to_csv(write_metadata=write_metadata)
1208
1209    def to_pandas(self, out_file_path, write_metadata=True):
1210        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1211
1212        Parameters
1213        ----------
1214        out_file_path : str
1215            The path to the CSV file to export to.
1216        write_metadata : bool, optional
1217            Whether to write the metadata to the CSV file. Defaults to True.
1218
1219        """
1220        from corems.mass_spectrum.output.export import HighResMassSpecExport
1221
1222        exportMS = HighResMassSpecExport(out_file_path, self)
1223        exportMS.to_pandas(write_metadata=write_metadata)
1224
1225    def to_dataframe(self, additional_columns=None):
1226        """Return the mass spectrum as a Pandas dataframe.
1227
1228        Parameters
1229        ----------
1230        additional_columns : list, optional
1231            A list of additional columns to include in the dataframe. Defaults to None.
1232            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1233
1234        Returns
1235        -------
1236        pandas.DataFrame
1237            The mass spectrum as a Pandas dataframe.
1238        """
1239        from corems.mass_spectrum.output.export import HighResMassSpecExport
1240
1241        exportMS = HighResMassSpecExport(self.filename, self)
1242        return exportMS.get_pandas_df(additional_columns=additional_columns)
1243
1244    def to_json(self):
1245        """Return the mass spectrum as a JSON file."""
1246        from corems.mass_spectrum.output.export import HighResMassSpecExport
1247
1248        exportMS = HighResMassSpecExport(self.filename, self)
1249        return exportMS.to_json()
1250
1251    def parameters_json(self):
1252        """Return the parameters of the mass spectrum as a JSON string."""
1253        from corems.mass_spectrum.output.export import HighResMassSpecExport
1254
1255        exportMS = HighResMassSpecExport(self.filename, self)
1256        return exportMS.parameters_to_json()
1257
1258    def parameters_toml(self):
1259        """Return the parameters of the mass spectrum as a TOML string."""
1260        from corems.mass_spectrum.output.export import HighResMassSpecExport
1261
1262        exportMS = HighResMassSpecExport(self.filename, self)
1263        return exportMS.parameters_to_toml()

A mass spectrum base class, stores the profile data and instrument settings.

Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty.

Parameters
  • mz_exp (array_like): The m/z values of the mass spectrum.
  • abundance (array_like): The abundance values of the mass spectrum.
  • d_params (dict): A dictionary of parameters for the mass spectrum.
  • **kwargs: Additional keyword arguments.
Attributes
  • mspeaks (list): A list of mass peaks.
  • is_calibrated (bool): Whether the mass spectrum is calibrated.
  • is_centroid (bool): Whether the mass spectrum is centroided.
  • has_frequency (bool): Whether the mass spectrum has a frequency domain.
  • calibration_order (None or int): The order of the mass spectrum's calibration.
  • calibration_points (None or ndarray): The calibration points of the mass spectrum.
  • calibration_ref_mzs (None or ndarray): The reference m/z values of the mass spectrum's calibration.
  • calibration_meas_mzs (None or ndarray): The measured m/z values of the mass spectrum's calibration.
  • calibration_RMS (None or float): The root mean square of the mass spectrum's calibration.
  • calibration_segment (None or CalibrationSegment): The calibration segment of the mass spectrum.
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
  • _dict_nominal_masses_indexes (dict): A dictionary of nominal masses and their indexes.
  • _baseline_noise (float): The baseline noise of the mass spectrum.
  • _baseline_noise_std (float): The standard deviation of the baseline noise of the mass spectrum.
  • _dynamic_range (float or None): The dynamic range of the mass spectrum.
  • _transient_settings (None or TransientSettings): The transient settings of the mass spectrum.
  • _frequency_domain (None or FrequencyDomain): The frequency domain of the mass spectrum.
  • _mz_cal_profile (None or MzCalibrationProfile): The m/z calibration profile of the mass spectrum.
Methods
  • process_mass_spec(). Main function to process the mass spectrum, including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.

See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()

MassSpecBase(mz_exp, abundance, d_params, **kwargs)
106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
107        self._abundance = array(abundance, dtype=float64)
108        self._mz_exp = array(mz_exp, dtype=float64)
109
110        # objects created after process_mass_spec() function
111        self._mspeaks = list()
112        self.mspeaks = list()
113        self._dict_nominal_masses_indexes = dict()
114        self._baseline_noise = 0.001
115        self._baseline_noise_std = 0.001
116        self._dynamic_range = None
117        # set to None: initialization occurs inside subclass MassSpecfromFreq
118        self._transient_settings = None
119        self._frequency_domain = None
120        self._mz_cal_profile = None
121        self.is_calibrated = False
122
123        self._set_parameters_objects(d_params)
124        self._init_settings()
125
126        self.is_centroid = False
127        self.has_frequency = False
128
129        self.calibration_order = None
130        self.calibration_points = None
131        self.calibration_ref_mzs = None
132        self.calibration_meas_mzs = None
133        self.calibration_RMS = None
134        self.calibration_segment = None
135        self.calibration_raw_error_median = None
136        self.calibration_raw_error_stdev = None
mspeaks
is_calibrated
is_centroid
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
def set_indexes(self, list_indexes):
148    def set_indexes(self, list_indexes):
149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
150
151        Parameters
152        ----------
153        list_indexes : list of int
154            A list of integers representing the indexes of the MSpeaks to iterate over.
155
156        """
157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
158
159        for i, mspeak in enumerate(self.mspeaks):
160            mspeak.index = i
161
162        self._set_nominal_masses_start_final_indexes()

Set the mass spectrum to iterate over only the selected MSpeaks indexes.

Parameters
  • list_indexes (list of int): A list of integers representing the indexes of the MSpeaks to iterate over.
def reset_indexes(self):
164    def reset_indexes(self):
165        """Reset the mass spectrum to iterate over all MSpeaks objects.
166
167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
169
170        """
171        self.mspeaks = self._mspeaks
172
173        for i, mspeak in enumerate(self.mspeaks):
174            mspeak.index = i
175
176        self._set_nominal_masses_start_final_indexes()

Reset the mass spectrum to iterate over all MSpeaks objects.

This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects. It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.

def add_mspeak( self, ion_charge, mz_exp, abundance, resolving_power, signal_to_noise, massspec_indexes, exp_freq=None, ms_parent=None):
178    def add_mspeak(
179        self,
180        ion_charge,
181        mz_exp,
182        abundance,
183        resolving_power,
184        signal_to_noise,
185        massspec_indexes,
186        exp_freq=None,
187        ms_parent=None,
188    ):
189        """Add a new MSPeak object to the MassSpectrum object.
190
191        Parameters
192        ----------
193        ion_charge : int
194            The ion charge of the MSPeak.
195        mz_exp : float
196            The experimental m/z value of the MSPeak.
197        abundance : float
198            The abundance of the MSPeak.
199        resolving_power : float
200            The resolving power of the MSPeak.
201        signal_to_noise : float
202            The signal-to-noise ratio of the MSPeak.
203        massspec_indexes : list
204            A list of indexes of the MSPeak in the MassSpectrum object.
205        exp_freq : float, optional
206            The experimental frequency of the MSPeak. Defaults to None.
207        ms_parent : MSParent, optional
208            The MSParent object associated with the MSPeak. Defaults to None.
209        """
210        mspeak = MSPeak(
211            ion_charge,
212            mz_exp,
213            abundance,
214            resolving_power,
215            signal_to_noise,
216            massspec_indexes,
217            len(self._mspeaks),
218            exp_freq=exp_freq,
219            ms_parent=ms_parent,
220        )
221
222        self._mspeaks.append(mspeak)

Add a new MSPeak object to the MassSpectrum object.

Parameters
  • ion_charge (int): The ion charge of the MSPeak.
  • mz_exp (float): The experimental m/z value of the MSPeak.
  • abundance (float): The abundance of the MSPeak.
  • resolving_power (float): The resolving power of the MSPeak.
  • signal_to_noise (float): The signal-to-noise ratio of the MSPeak.
  • massspec_indexes (list): A list of indexes of the MSPeak in the MassSpectrum object.
  • exp_freq (float, optional): The experimental frequency of the MSPeak. Defaults to None.
  • ms_parent (MSParent, optional): The MSParent object associated with the MSPeak. Defaults to None.
def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
292
293        Parameters
294        ----------
295        Aterm : float
296            The A-term calibration coefficient.
297        Bterm : float
298            The B-term calibration coefficient.
299        C : float
300            The C-term calibration coefficient.
301        fas : float, optional
302            The frequency amplitude scaling factor. Default is 0.
303        """
304        self._calibration_terms = (Aterm, Bterm, C)
305
306        self._mz_exp = self._f_to_mz()
307        self._abundance = self._abundance
308        self.find_peaks()
309        self.reset_indexes()

Reset calibration terms and recalculate the mass-to-charge ratio and abundance.

Parameters
  • Aterm (float): The A-term calibration coefficient.
  • Bterm (float): The B-term calibration coefficient.
  • C (float): The C-term calibration coefficient.
  • fas (float, optional): The frequency amplitude scaling factor. Default is 0.
def clear_molecular_formulas(self):
311    def clear_molecular_formulas(self):
312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
313
314        Returns
315        -------
316        numpy.ndarray
317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
318        """
319        self.check_mspeaks()
320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])

Clear the molecular formulas for all mspeaks in the MassSpectrum.

Returns
  • numpy.ndarray: An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
def process_mass_spec(self, keep_profile=True):
322    def process_mass_spec(self, keep_profile=True):
323        """Process the mass spectrum.
324
325        Parameters
326        ----------
327        keep_profile : bool, optional
328            Whether to keep the profile data after processing. Defaults to True.
329
330        Notes
331        -----
332        This method does the following:
333        - calculates the noise threshold
334        - does peak picking (creates mspeak_objs)
335        - resets the mspeak_obj indexes
336        """
337
338        # if runned mannually make sure to rerun filter_by_noise_threshold
339        # calculates noise threshold
340        # do peak picking( create mspeak_objs)
341        # reset mspeak_obj the indexes
342
343        self.cal_noise_threshold()
344
345        self.find_peaks()
346        self.reset_indexes()
347
348        if self.mspeaks:
349            self._dynamic_range = self.max_abundance / self.min_abundance
350        else:
351            self._dynamic_range = 0
352        if not keep_profile:
353            self._abundance *= 0
354            self._mz_exp *= 0

Process the mass spectrum.

Parameters
  • keep_profile (bool, optional): Whether to keep the profile data after processing. Defaults to True.
Notes

This method does the following:

  • calculates the noise threshold
  • does peak picking (creates mspeak_objs)
  • resets the mspeak_obj indexes
def cal_noise_threshold(self):
356    def cal_noise_threshold(self):
357        """Calculate the noise threshold of the mass spectrum."""
358
359        if self.label == Labels.simulated_profile:
360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
361
362        if self.settings.noise_threshold_method == "log":
363            self._baseline_noise, self._baseline_noise_std = (
364                self.run_log_noise_threshold_calc()
365            )
366
367        else:
368            self._baseline_noise, self._baseline_noise_std = (
369                self.run_noise_threshold_calc()
370            )

Calculate the noise threshold of the mass spectrum.

parameters

Return the parameters of the mass spectrum.

def set_parameter_from_json(self, parameters_path):
381    def set_parameter_from_json(self, parameters_path):
382        """Set the parameters of the mass spectrum from a JSON file.
383
384        Parameters
385        ----------
386        parameters_path : str
387            The path to the JSON file containing the parameters.
388        """
389        load_and_set_parameters_ms(self, parameters_path=parameters_path)

Set the parameters of the mass spectrum from a JSON file.

Parameters
  • parameters_path (str): The path to the JSON file containing the parameters.
def set_parameter_from_toml(self, parameters_path):
391    def set_parameter_from_toml(self, parameters_path):
392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
mspeaks_settings

Return the MS peak settings of the mass spectrum.

settings

Return the settings of the mass spectrum.

molecular_search_settings

Return the molecular search settings of the mass spectrum.

mz_cal_profile

Return the calibrated m/z profile of the mass spectrum.

mz_cal

Return the calibrated m/z values of the mass spectrum.

mz_exp

Return the experimental m/z values of the mass spectrum.

freq_exp_profile

Return the experimental frequency profile of the mass spectrum.

freq_exp_pp

Return the experimental frequency values of the mass spectrum that are used for peak picking.

mz_exp_profile

Return the experimental m/z profile of the mass spectrum.

mz_exp_pp

Return the experimental m/z values of the mass spectrum that are used for peak picking.

abundance_profile

Return the abundance profile of the mass spectrum.

abundance_profile_pp

Return the abundance profile of the mass spectrum that is used for peak picking.

abundance

Return the abundance values of the mass spectrum.

def freq_exp(self):
518    def freq_exp(self):
519        """Return the experimental frequency values of the mass spectrum."""
520        self.check_mspeaks()
521        return array([mspeak.freq_exp for mspeak in self.mspeaks])

Return the experimental frequency values of the mass spectrum.

resolving_power

Return the resolving power values of the mass spectrum.

signal_to_noise
nominal_mz

Return the nominal m/z values of the mass spectrum.

def get_mz_and_abundance_peaks_tuples(self):
542    def get_mz_and_abundance_peaks_tuples(self):
543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
544        self.check_mspeaks()
545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]

Return a list of tuples containing the m/z and abundance values of the mass spectrum.

kmd

Return the Kendrick mass defect values of the mass spectrum.

kendrick_mass

Return the Kendrick mass values of the mass spectrum.

max_mz_exp

Return the maximum experimental m/z value of the mass spectrum.

min_mz_exp

Return the minimum experimental m/z value of the mass spectrum.

max_abundance

Return the maximum abundance value of the mass spectrum.

max_signal_to_noise

Return the maximum signal-to-noise ratio of the mass spectrum.

most_abundant_mspeak

Return the most abundant MSpeak object of the mass spectrum.

min_abundance

Return the minimum abundance value of the mass spectrum.

dynamic_range

Return the dynamic range of the mass spectrum.

baseline_noise

Return the baseline noise of the mass spectrum.

baseline_noise_std

Return the standard deviation of the baseline noise of the mass spectrum.

Aterm

Return the A-term calibration coefficient of the mass spectrum.

Bterm

Return the B-term calibration coefficient of the mass spectrum.

Cterm

Return the C-term calibration coefficient of the mass spectrum.

filename

Return the filename of the mass spectrum.

dir_location

Return the directory location of the mass spectrum.

def sort_by_mz(self):
638    def sort_by_mz(self):
639        """Sort the mass spectrum by m/z values."""
640        return sorted(self, key=lambda m: m.mz_exp)

Sort the mass spectrum by m/z values.

def sort_by_abundance(self, reverse=False):
642    def sort_by_abundance(self, reverse=False):
643        """Sort the mass spectrum by abundance values."""
644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)

Sort the mass spectrum by abundance values.

tic

Return the total ion current of the mass spectrum.

def check_mspeaks_warning(self):
651    def check_mspeaks_warning(self):
652        """Check if the mass spectrum has MSpeaks objects.
653
654        Raises
655        ------
656        Warning
657            If the mass spectrum has no MSpeaks objects.
658        """
659        import warnings
660
661        if self.mspeaks:
662            pass
663        else:
664            warnings.warn("mspeaks list is empty, continuing without filtering data")

Check if the mass spectrum has MSpeaks objects.

Raises
  • Warning: If the mass spectrum has no MSpeaks objects.
def check_mspeaks(self):
666    def check_mspeaks(self):
667        """Check if the mass spectrum has MSpeaks objects.
668
669        Raises
670        ------
671        Exception
672            If the mass spectrum has no MSpeaks objects.
673        """
674        if self.mspeaks:
675            pass
676        else:
677            raise Exception(
678                "mspeaks list is empty, please run process_mass_spec() first"
679            )

Check if the mass spectrum has MSpeaks objects.

Raises
  • Exception: If the mass spectrum has no MSpeaks objects.
def remove_assignment_by_index(self, indexes):
681    def remove_assignment_by_index(self, indexes):
682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
683
684        Parameters
685        ----------
686        indexes : list of int
687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
688        """
689        for i in indexes:
690            self.mspeaks[i].clear_molecular_formulas()

Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.

Parameters
  • indexes (list of int): A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
def filter_by_index(self, list_indexes):
692    def filter_by_index(self, list_indexes):
693        """Filter the mass spectrum by the specified indexes.
694
695        Parameters
696        ----------
697        list_indexes : list of int
698            A list of indexes of the MSpeaks objects to drop.
699
700        """
701
702        self.mspeaks = [
703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
704        ]
705
706        for i, mspeak in enumerate(self.mspeaks):
707            mspeak.index = i
708
709        self._set_nominal_masses_start_final_indexes()

Filter the mass spectrum by the specified indexes.

Parameters
  • list_indexes (list of int): A list of indexes of the MSpeaks objects to drop.
def filter_by_mz(self, min_mz, max_mz):
711    def filter_by_mz(self, min_mz, max_mz):
712        """Filter the mass spectrum by the specified m/z range.
713
714        Parameters
715        ----------
716        min_mz : float
717            The minimum m/z value to keep.
718        max_mz : float
719            The maximum m/z value to keep.
720
721        """
722        self.check_mspeaks_warning()
723        indexes = [
724            index
725            for index, mspeak in enumerate(self.mspeaks)
726            if not min_mz <= mspeak.mz_exp <= max_mz
727        ]
728        self.filter_by_index(indexes)

Filter the mass spectrum by the specified m/z range.

Parameters
  • min_mz (float): The minimum m/z value to keep.
  • max_mz (float): The maximum m/z value to keep.
def filter_by_s2n(self, min_s2n, max_s2n=False):
730    def filter_by_s2n(self, min_s2n, max_s2n=False):
731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
732
733        Parameters
734        ----------
735        min_s2n : float
736            The minimum signal-to-noise ratio to keep.
737        max_s2n : float, optional
738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
739
740        """
741        self.check_mspeaks_warning()
742        if max_s2n:
743            indexes = [
744                index
745                for index, mspeak in enumerate(self.mspeaks)
746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
747            ]
748        else:
749            indexes = [
750                index
751                for index, mspeak in enumerate(self.mspeaks)
752                if mspeak.signal_to_noise <= min_s2n
753            ]
754        self.filter_by_index(indexes)

Filter the mass spectrum by the specified signal-to-noise ratio range.

Parameters
  • min_s2n (float): The minimum signal-to-noise ratio to keep.
  • max_s2n (float, optional): The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
def filter_by_abundance(self, min_abund, max_abund=False):
756    def filter_by_abundance(self, min_abund, max_abund=False):
757        """Filter the mass spectrum by the specified abundance range.
758
759        Parameters
760        ----------
761        min_abund : float
762            The minimum abundance to keep.
763        max_abund : float, optional
764            The maximum abundance to keep. Defaults to False (no maximum).
765
766        """
767        self.check_mspeaks_warning()
768        if max_abund:
769            indexes = [
770                index
771                for index, mspeak in enumerate(self.mspeaks)
772                if not min_abund <= mspeak.abundance <= max_abund
773            ]
774        else:
775            indexes = [
776                index
777                for index, mspeak in enumerate(self.mspeaks)
778                if mspeak.abundance <= min_abund
779            ]
780        self.filter_by_index(indexes)

Filter the mass spectrum by the specified abundance range.

Parameters
  • min_abund (float): The minimum abundance to keep.
  • max_abund (float, optional): The maximum abundance to keep. Defaults to False (no maximum).
def filter_by_max_resolving_power(self, B, T):
782    def filter_by_max_resolving_power(self, B, T):
783        """Filter the mass spectrum by the specified maximum resolving power.
784
785        Parameters
786        ----------
787        B : float
788        T : float
789
790        """
791
792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
793
794        self.check_mspeaks_warning()
795
796        indexes_to_remove = [
797            index
798            for index, mspeak in enumerate(self.mspeaks)
799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
800        ]
801        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the specified maximum resolving power.

Parameters
  • B (float):

  • T (float):

def filter_by_mean_resolving_power(self, ndeviations=3, plot=False, guess_pars=False):
803    def filter_by_mean_resolving_power(
804        self, ndeviations=3, plot=False, guess_pars=False
805    ):
806        """Filter the mass spectrum by the specified mean resolving power.
807
808        Parameters
809        ----------
810        ndeviations : float, optional
811            The number of standard deviations to use for filtering. Defaults to 3.
812        plot : bool, optional
813            Whether to plot the resolving power distribution. Defaults to False.
814        guess_pars : bool, optional
815            Whether to guess the parameters for the Gaussian model. Defaults to False.
816
817        """
818        self.check_mspeaks_warning()
819        indexes_to_remove = MeanResolvingPowerFilter(
820            self, ndeviations, plot, guess_pars
821        ).main()
822        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the specified mean resolving power.

Parameters
  • ndeviations (float, optional): The number of standard deviations to use for filtering. Defaults to 3.
  • plot (bool, optional): Whether to plot the resolving power distribution. Defaults to False.
  • guess_pars (bool, optional): Whether to guess the parameters for the Gaussian model. Defaults to False.
def filter_by_min_resolving_power(self, B, T, apodization_method: str = None, tolerance: float = 0):
824    def filter_by_min_resolving_power(self, B, T, apodization_method: str=None, tolerance: float=0):
825        """Filter the mass spectrum by the calculated minimum theoretical resolving power.
826
827        This is currently designed only for FTICR data, and accounts only for magnitude mode data
828        Accurate results require passing the apodisaion method used to calculate the resolving power.
829        see the ICRMassPeak function `resolving_power_calc` for more details.
830
831        Parameters
832        ----------
833        B : Magnetic field strength in Tesla, float
834        T : transient length in seconds, float
835        apodization_method : str, optional
836            The apodization method to use for calculating the resolving power. Defaults to None.
837        tolerance : float, optional
838            The tolerance for the threshold. Defaults to 0, i.e. no tolerance
839
840        """
841        if self.analyzer != "ICR":
842            raise Exception(
843                "This method is only applicable to ICR mass spectra. "
844            )
845
846        self.check_mspeaks_warning()
847
848        indexes_to_remove = [
849            index
850            for index, mspeak in enumerate(self.mspeaks)
851            if mspeak.resolving_power < (1-tolerance) * mspeak.resolving_power_calc(B, T, apodization_method=apodization_method)
852        ]
853        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the calculated minimum theoretical resolving power.

This is currently designed only for FTICR data, and accounts only for magnitude mode data Accurate results require passing the apodisaion method used to calculate the resolving power. see the ICRMassPeak function resolving_power_calc for more details.

Parameters
  • B (Magnetic field strength in Tesla, float):

  • T (transient length in seconds, float):

  • apodization_method (str, optional): The apodization method to use for calculating the resolving power. Defaults to None.

  • tolerance (float, optional): The tolerance for the threshold. Defaults to 0, i.e. no tolerance
def filter_by_noise_threshold(self):
855    def filter_by_noise_threshold(self):
856        """Filter the mass spectrum by the noise threshold."""
857
858        threshold = self.get_noise_threshold()[1][0]
859
860        self.check_mspeaks_warning()
861
862        indexes_to_remove = [
863            index
864            for index, mspeak in enumerate(self.mspeaks)
865            if mspeak.abundance <= threshold
866        ]
867        self.filter_by_index(indexes_to_remove)

Filter the mass spectrum by the noise threshold.

def find_peaks(self):
869    def find_peaks(self):
870        """Find the peaks of the mass spectrum."""
871        # needs to clear previous results from peak_picking
872        self._mspeaks = list()
873
874        # then do peak picking
875        self.do_peak_picking()
876        # print("A total of %i peaks were found" % len(self._mspeaks))

Find the peaks of the mass spectrum.

def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
878    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
879        """Change the Kendrick base of all MSpeaks objects.
880
881        Parameters
882        ----------
883        kendrick_dict_base : dict
884            A dictionary of the Kendrick base to change to.
885
886        Notes
887        -----
888        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
889        """
890        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
891
892        for mspeak in self.mspeaks:
893            mspeak.change_kendrick_base(kendrick_dict_base)

Change the Kendrick base of all MSpeaks objects.

Parameters
  • kendrick_dict_base (dict): A dictionary of the Kendrick base to change to.
Notes

Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc

def get_nominal_mz_first_last_indexes(self, nominal_mass):
895    def get_nominal_mz_first_last_indexes(self, nominal_mass):
896        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
897
898        Parameters
899        ----------
900        nominal_mass : int
901            The nominal mass to get the indexes for.
902
903        Returns
904        -------
905        tuple
906            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
907        """
908        if self._dict_nominal_masses_indexes:
909            if nominal_mass in self._dict_nominal_masses_indexes.keys():
910                return (
911                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
912                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
913                )
914
915            else:
916                # import warnings
917                # uncomment warn to distribution
918                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
919                return (0, 0)
920        else:
921            raise Exception(
922                "run process_mass_spec() function before trying to access the data"
923            )

Return the first and last indexes of the MSpeaks objects with the specified nominal mass.

Parameters
  • nominal_mass (int): The nominal mass to get the indexes for.
Returns
  • tuple: A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
def get_masses_count_by_nominal_mass(self):
925    def get_masses_count_by_nominal_mass(self):
926        """Return a dictionary of the nominal masses and their counts."""
927
928        dict_nominal_masses_count = {}
929
930        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
931
932        for nominal_mass in all_nominal_masses:
933            if nominal_mass not in dict_nominal_masses_count:
934                dict_nominal_masses_count[nominal_mass] = len(
935                    list(self.get_nominal_mass_indexes(nominal_mass))
936                )
937
938        return dict_nominal_masses_count

Return a dictionary of the nominal masses and their counts.

def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
940    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
941        """Return a dictionary of the nominal masses and their counts.
942
943        Parameters
944        ----------
945        mz_overlay : float, optional
946            The m/z overlay to use for counting. Defaults to 0.1.
947
948        Returns
949        -------
950        dict
951            A dictionary of the nominal masses and their counts.
952        """
953        dict_nominal_masses_count = {}
954
955        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
956
957        for nominal_mass in all_nominal_masses:
958            if nominal_mass not in dict_nominal_masses_count:
959                min_mz = nominal_mass - mz_overlay
960
961                max_mz = nominal_mass + 1 + mz_overlay
962
963                indexes = indexes = where(
964                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
965                )
966
967                dict_nominal_masses_count[nominal_mass] = indexes[0].size
968
969        return dict_nominal_masses_count

Return a dictionary of the nominal masses and their counts.

Parameters
  • mz_overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
  • dict: A dictionary of the nominal masses and their counts.
def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
971    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
972        """Return the indexes of the MSpeaks objects with the specified nominal mass.
973
974        Parameters
975        ----------
976        nominal_mass : int
977            The nominal mass to get the indexes for.
978        overlay : float, optional
979            The m/z overlay to use for counting. Defaults to 0.1.
980
981        Returns
982        -------
983        generator
984            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
985        """
986        min_mz_to_look = nominal_mass - overlay
987        max_mz_to_look = nominal_mass + 1 + overlay
988
989        return (
990            i
991            for i in range(len(self.mspeaks))
992            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
993        )
994
995        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
996        # return indexes

Return the indexes of the MSpeaks objects with the specified nominal mass.

Parameters
  • nominal_mass (int): The nominal mass to get the indexes for.
  • overlay (float, optional): The m/z overlay to use for counting. Defaults to 0.1.
Returns
  • generator: A generator of the indexes of the MSpeaks objects with the specified nominal mass.
def plot_centroid(self, ax=None, c='g'):
1023    def plot_centroid(self, ax=None, c="g"):
1024        """Plot the centroid data of the mass spectrum.
1025
1026        Parameters
1027        ----------
1028        ax : matplotlib.axes.Axes, optional
1029            The matplotlib axes to plot on. Defaults to None.
1030        c : str, optional
1031            The color to use for the plot. Defaults to 'g' (green).
1032
1033        Returns
1034        -------
1035        matplotlib.axes.Axes
1036            The matplotlib axes containing the plot.
1037
1038        Raises
1039        ------
1040        Exception
1041            If no centroid data is found.
1042        """
1043
1044        import matplotlib.pyplot as plt
1045
1046        if self._mspeaks:
1047            if ax is None:
1048                ax = plt.gca()
1049
1050            markerline_a, stemlines_a, baseline_a = ax.stem(
1051                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
1052            )
1053
1054            plt.setp(markerline_a, "color", c, "linewidth", 2)
1055            plt.setp(stemlines_a, "color", c, "linewidth", 2)
1056            plt.setp(baseline_a, "color", c, "linewidth", 2)
1057
1058            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1059            ax.set_ylabel("Abundance", fontsize=12)
1060            ax.tick_params(axis="both", which="major", labelsize=12)
1061
1062            ax.axes.spines["top"].set_visible(False)
1063            ax.axes.spines["right"].set_visible(False)
1064
1065            ax.get_yaxis().set_visible(False)
1066            ax.spines["left"].set_visible(False)
1067
1068        else:
1069            raise Exception("No centroid data found, please run process_mass_spec")
1070
1071        return ax

Plot the centroid data of the mass spectrum.

Parameters
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
  • c (str, optional): The color to use for the plot. Defaults to 'g' (green).
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
  • Exception: If no centroid data is found.
def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1073    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
1074        """Plot the profile data and noise threshold of the mass spectrum.
1075
1076        Parameters
1077        ----------
1078        ax : matplotlib.axes.Axes, optional
1079            The matplotlib axes to plot on. Defaults to None.
1080        legend : bool, optional
1081            Whether to show the legend. Defaults to False.
1082
1083        Returns
1084        -------
1085        matplotlib.axes.Axes
1086            The matplotlib axes containing the plot.
1087
1088        Raises
1089        ------
1090        Exception
1091            If no noise threshold is found.
1092        """
1093        import matplotlib.pyplot as plt
1094
1095        if self.baseline_noise_std and self.baseline_noise_std:
1096            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
1097            baseline = (self.baseline_noise, self.baseline_noise)
1098
1099            # std = self.parameters.mass_spectrum.noise_threshold_min_std
1100            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
1101            x, y = self.get_noise_threshold()
1102
1103            if ax is None:
1104                ax = plt.gca()
1105
1106            ax.plot(
1107                self.mz_exp_profile,
1108                self.abundance_profile,
1109                color="green",
1110                label="Spectrum",
1111            )
1112            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
1113            ax.plot(x, y, color="red", label="Noise Threshold")
1114
1115            ax.set_xlabel("$\t{m/z}$", fontsize=12)
1116            ax.set_ylabel("Abundance", fontsize=12)
1117            ax.tick_params(axis="both", which="major", labelsize=12)
1118
1119            ax.axes.spines["top"].set_visible(False)
1120            ax.axes.spines["right"].set_visible(False)
1121
1122            ax.get_yaxis().set_visible(False)
1123            ax.spines["left"].set_visible(False)
1124            if legend:
1125                ax.legend()
1126
1127        else:
1128            raise Exception("Calculate noise threshold first")
1129
1130        return ax

Plot the profile data and noise threshold of the mass spectrum.

Parameters
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
  • legend (bool, optional): Whether to show the legend. Defaults to False.
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
Raises
  • Exception: If no noise threshold is found.
def plot_mz_domain_profile(self, color='green', ax=None):
1132    def plot_mz_domain_profile(self, color="green", ax=None):
1133        """Plot the m/z domain profile of the mass spectrum.
1134
1135        Parameters
1136        ----------
1137        color : str, optional
1138            The color to use for the plot. Defaults to 'green'.
1139        ax : matplotlib.axes.Axes, optional
1140            The matplotlib axes to plot on. Defaults to None.
1141
1142        Returns
1143        -------
1144        matplotlib.axes.Axes
1145            The matplotlib axes containing the plot.
1146        """
1147
1148        import matplotlib.pyplot as plt
1149
1150        if ax is None:
1151            ax = plt.gca()
1152        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
1153        ax.set(xlabel="m/z", ylabel="abundance")
1154
1155        return ax

Plot the m/z domain profile of the mass spectrum.

Parameters
  • color (str, optional): The color to use for the plot. Defaults to 'green'.
  • ax (matplotlib.axes.Axes, optional): The matplotlib axes to plot on. Defaults to None.
Returns
  • matplotlib.axes.Axes: The matplotlib axes containing the plot.
def to_excel(self, out_file_path, write_metadata=True):
1157    def to_excel(self, out_file_path, write_metadata=True):
1158        """Export the mass spectrum to an Excel file.
1159
1160        Parameters
1161        ----------
1162        out_file_path : str
1163            The path to the Excel file to export to.
1164        write_metadata : bool, optional
1165            Whether to write the metadata to the Excel file. Defaults to True.
1166
1167        Returns
1168        -------
1169        None
1170        """
1171        from corems.mass_spectrum.output.export import HighResMassSpecExport
1172
1173        exportMS = HighResMassSpecExport(out_file_path, self)
1174        exportMS.to_excel(write_metadata=write_metadata)

Export the mass spectrum to an Excel file.

Parameters
  • out_file_path (str): The path to the Excel file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the Excel file. Defaults to True.
Returns
  • None
def to_hdf(self, out_file_path):
1176    def to_hdf(self, out_file_path):
1177        """Export the mass spectrum to an HDF file.
1178
1179        Parameters
1180        ----------
1181        out_file_path : str
1182            The path to the HDF file to export to.
1183
1184        Returns
1185        -------
1186        None
1187        """
1188        from corems.mass_spectrum.output.export import HighResMassSpecExport
1189
1190        exportMS = HighResMassSpecExport(out_file_path, self)
1191        exportMS.to_hdf()

Export the mass spectrum to an HDF file.

Parameters
  • out_file_path (str): The path to the HDF file to export to.
Returns
  • None
def to_csv(self, out_file_path, write_metadata=True):
1193    def to_csv(self, out_file_path, write_metadata=True):
1194        """Export the mass spectrum to a CSV file.
1195
1196        Parameters
1197        ----------
1198        out_file_path : str
1199            The path to the CSV file to export to.
1200        write_metadata : bool, optional
1201            Whether to write the metadata to the CSV file. Defaults to True.
1202
1203        """
1204        from corems.mass_spectrum.output.export import HighResMassSpecExport
1205
1206        exportMS = HighResMassSpecExport(out_file_path, self)
1207        exportMS.to_csv(write_metadata=write_metadata)

Export the mass spectrum to a CSV file.

Parameters
  • out_file_path (str): The path to the CSV file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
def to_pandas(self, out_file_path, write_metadata=True):
1209    def to_pandas(self, out_file_path, write_metadata=True):
1210        """Export the mass spectrum to a Pandas dataframe with pkl extension.
1211
1212        Parameters
1213        ----------
1214        out_file_path : str
1215            The path to the CSV file to export to.
1216        write_metadata : bool, optional
1217            Whether to write the metadata to the CSV file. Defaults to True.
1218
1219        """
1220        from corems.mass_spectrum.output.export import HighResMassSpecExport
1221
1222        exportMS = HighResMassSpecExport(out_file_path, self)
1223        exportMS.to_pandas(write_metadata=write_metadata)

Export the mass spectrum to a Pandas dataframe with pkl extension.

Parameters
  • out_file_path (str): The path to the CSV file to export to.
  • write_metadata (bool, optional): Whether to write the metadata to the CSV file. Defaults to True.
def to_dataframe(self, additional_columns=None):
1225    def to_dataframe(self, additional_columns=None):
1226        """Return the mass spectrum as a Pandas dataframe.
1227
1228        Parameters
1229        ----------
1230        additional_columns : list, optional
1231            A list of additional columns to include in the dataframe. Defaults to None.
1232            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
1233
1234        Returns
1235        -------
1236        pandas.DataFrame
1237            The mass spectrum as a Pandas dataframe.
1238        """
1239        from corems.mass_spectrum.output.export import HighResMassSpecExport
1240
1241        exportMS = HighResMassSpecExport(self.filename, self)
1242        return exportMS.get_pandas_df(additional_columns=additional_columns)

Return the mass spectrum as a Pandas dataframe.

Parameters
  • additional_columns (list, optional): A list of additional columns to include in the dataframe. Defaults to None. Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
Returns
  • pandas.DataFrame: The mass spectrum as a Pandas dataframe.
def to_json(self):
1244    def to_json(self):
1245        """Return the mass spectrum as a JSON file."""
1246        from corems.mass_spectrum.output.export import HighResMassSpecExport
1247
1248        exportMS = HighResMassSpecExport(self.filename, self)
1249        return exportMS.to_json()

Return the mass spectrum as a JSON file.

def parameters_json(self):
1251    def parameters_json(self):
1252        """Return the parameters of the mass spectrum as a JSON string."""
1253        from corems.mass_spectrum.output.export import HighResMassSpecExport
1254
1255        exportMS = HighResMassSpecExport(self.filename, self)
1256        return exportMS.parameters_to_json()

Return the parameters of the mass spectrum as a JSON string.

def parameters_toml(self):
1258    def parameters_toml(self):
1259        """Return the parameters of the mass spectrum as a TOML string."""
1260        from corems.mass_spectrum.output.export import HighResMassSpecExport
1261
1262        exportMS = HighResMassSpecExport(self.filename, self)
1263        return exportMS.parameters_to_toml()

Return the parameters of the mass spectrum as a TOML string.

class MassSpecProfile(MassSpecBase):
1266class MassSpecProfile(MassSpecBase):
1267    """A mass spectrum class when the entry point is on profile format
1268
1269    Notes
1270    -----
1271    Stores the profile data and instrument settings.
1272    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
1273    _mspeaks is populated under the hood by calling process_mass_spec method.
1274    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
1275
1276    Parameters
1277    ----------
1278    data_dict : dict
1279        A dictionary containing the profile data.
1280    d_params : dict{'str': float, int or str}
1281        contains the instrument settings and processing settings
1282    auto_process : bool, optional
1283        Whether to automatically process the mass spectrum. Defaults to True.
1284
1285
1286    Attributes
1287    ----------
1288    _abundance : ndarray
1289        The abundance values of the mass spectrum.
1290    _mz_exp : ndarray
1291        The m/z values of the mass spectrum.
1292    _mspeaks : list
1293        A list of mass peaks.
1294
1295    Methods
1296    ----------
1297    * process_mass_spec(). Process the mass spectrum.
1298
1299    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
1300    """
1301
1302    def __init__(self, data_dict, d_params, auto_process=True):
1303        # print(data_dict.keys())
1304        super().__init__(
1305            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1306        )
1307
1308        if auto_process:
1309            self.process_mass_spec()

A mass spectrum class when the entry point is on profile format

Notes

Stores the profile data and instrument settings. Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().

Parameters
  • data_dict (dict): A dictionary containing the profile data.
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
Methods
  • process_mass_spec(). Process the mass spectrum.

see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()

MassSpecProfile(data_dict, d_params, auto_process=True)
1302    def __init__(self, data_dict, d_params, auto_process=True):
1303        # print(data_dict.keys())
1304        super().__init__(
1305            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
1306        )
1307
1308        if auto_process:
1309            self.process_mass_spec()
Inherited Members
MassSpecBase
mspeaks
is_calibrated
is_centroid
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
process_mass_spec
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_profile
mz_exp_pp
abundance_profile
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
tic
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecfromFreq(MassSpecBase):
1312class MassSpecfromFreq(MassSpecBase):
1313    """A mass spectrum class when data entry is on frequency domain
1314
1315    Notes
1316    -----
1317    - Transform to m/z based on the settings stored at d_params
1318    - Stores the profile data and instrument settings
1319    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1320    - _mspeaks is populated under the hood by calling process_mass_spec method
1321    - iteration is null if _mspeaks is empty
1322
1323    Parameters
1324    ----------
1325    frequency_domain : list(float)
1326        all datapoints in frequency domain in Hz
1327    magnitude :  frequency_domain : list(float)
1328        all datapoints in for magnitude of each frequency datapoint
1329    d_params : dict{'str': float, int or str}
1330        contains the instrument settings and processing settings
1331    auto_process : bool, optional
1332        Whether to automatically process the mass spectrum. Defaults to True.
1333    keep_profile : bool, optional
1334        Whether to keep the profile data. Defaults to True.
1335
1336    Attributes
1337    ----------
1338    has_frequency : bool
1339        Whether the mass spectrum has frequency data.
1340    _frequency_domain : list(float)
1341        Frequency domain in Hz
1342    label : str
1343        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
1344    _abundance : ndarray
1345        The abundance values of the mass spectrum.
1346    _mz_exp : ndarray
1347        The m/z values of the mass spectrum.
1348    _mspeaks : list
1349        A list of mass peaks.
1350    See Also: all the attributes of MassSpecBase class
1351
1352    Methods
1353    ----------
1354    * _set_mz_domain().
1355        calculates the m_z based on the setting of d_params
1356    * process_mass_spec().  Process the mass spectrum.
1357
1358    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
1359    """
1360
1361    def __init__(
1362        self,
1363        frequency_domain,
1364        magnitude,
1365        d_params,
1366        auto_process=True,
1367        keep_profile=True,
1368    ):
1369        super().__init__(None, magnitude, d_params)
1370
1371        self._frequency_domain = frequency_domain
1372        self.has_frequency = True
1373        self._set_mz_domain()
1374        self._sort_mz_domain()
1375
1376        self.magnetron_frequency = None
1377        self.magnetron_frequency_sigma = None
1378
1379        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1380
1381        if auto_process:
1382            self.process_mass_spec(keep_profile=keep_profile)
1383
1384    def _sort_mz_domain(self):
1385        """Sort the mass spectrum by m/z values."""
1386
1387        if self._mz_exp[0] > self._mz_exp[-1]:
1388            self._mz_exp = self._mz_exp[::-1]
1389            self._abundance = self._abundance[::-1]
1390            self._frequency_domain = self._frequency_domain[::-1]
1391
1392    def _set_mz_domain(self):
1393        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
1394        if self.label == Labels.bruker_frequency:
1395            self._mz_exp = self._f_to_mz_bruker()
1396
1397        else:
1398            self._mz_exp = self._f_to_mz()
1399
1400    @property
1401    def transient_settings(self):
1402        """Return the transient settings of the mass spectrum."""
1403        return self.parameters.transient
1404
1405    @transient_settings.setter
1406    def transient_settings(self, instance_TransientSetting):
1407        self.parameters.transient = instance_TransientSetting
1408
1409    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1410        """Calculates the magnetron frequency of the mass spectrum.
1411
1412        Parameters
1413        ----------
1414        max_magnetron_freq : float, optional
1415            The maximum magnetron frequency. Defaults to 50.
1416        magnetron_freq_bins : int, optional
1417            The number of bins to use for the histogram. Defaults to 300.
1418
1419        Returns
1420        -------
1421        None
1422
1423        Notes
1424        -----
1425        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1426        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1427        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1428        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1429        """
1430        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1431        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1432
1433        freq_hist = histogram(
1434            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1435            bins=magnetron_freq_bins,
1436        )
1437
1438        mod = GaussianModel()
1439        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1440        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1441        self.magnetron_frequency = out.best_values["center"]
1442        self.magnetron_frequency_sigma = out.best_values["sigma"]

A mass spectrum class when data entry is on frequency domain

Notes
  • Transform to m/z based on the settings stored at d_params
  • Stores the profile data and instrument settings
  • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
  • _mspeaks is populated under the hood by calling process_mass_spec method
  • iteration is null if _mspeaks is empty
Parameters
  • frequency_domain (list(float)): all datapoints in frequency domain in Hz
  • magnitude : frequency_domain (list(float)): all datapoints in for magnitude of each frequency datapoint
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
  • keep_profile (bool, optional): Whether to keep the profile data. Defaults to True.
Attributes
  • has_frequency (bool): Whether the mass spectrum has frequency data.
  • _frequency_domain (list(float)): Frequency domain in Hz
  • label (str): store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
  • See Also (all the attributes of MassSpecBase class):
Methods
  • _set_mz_domain(). calculates the m_z based on the setting of d_params
  • process_mass_spec(). Process the mass spectrum.

see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()

MassSpecfromFreq( frequency_domain, magnitude, d_params, auto_process=True, keep_profile=True)
1361    def __init__(
1362        self,
1363        frequency_domain,
1364        magnitude,
1365        d_params,
1366        auto_process=True,
1367        keep_profile=True,
1368    ):
1369        super().__init__(None, magnitude, d_params)
1370
1371        self._frequency_domain = frequency_domain
1372        self.has_frequency = True
1373        self._set_mz_domain()
1374        self._sort_mz_domain()
1375
1376        self.magnetron_frequency = None
1377        self.magnetron_frequency_sigma = None
1378
1379        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
1380
1381        if auto_process:
1382            self.process_mass_spec(keep_profile=keep_profile)
has_frequency
magnetron_frequency
magnetron_frequency_sigma
transient_settings

Return the transient settings of the mass spectrum.

def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1409    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
1410        """Calculates the magnetron frequency of the mass spectrum.
1411
1412        Parameters
1413        ----------
1414        max_magnetron_freq : float, optional
1415            The maximum magnetron frequency. Defaults to 50.
1416        magnetron_freq_bins : int, optional
1417            The number of bins to use for the histogram. Defaults to 300.
1418
1419        Returns
1420        -------
1421        None
1422
1423        Notes
1424        -----
1425        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
1426        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
1427        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
1428        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
1429        """
1430        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
1431        ms_df["FreqDelta"] = ms_df["Freq"].diff()
1432
1433        freq_hist = histogram(
1434            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
1435            bins=magnetron_freq_bins,
1436        )
1437
1438        mod = GaussianModel()
1439        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
1440        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
1441        self.magnetron_frequency = out.best_values["center"]
1442        self.magnetron_frequency_sigma = out.best_values["sigma"]

Calculates the magnetron frequency of the mass spectrum.

Parameters
  • max_magnetron_freq (float, optional): The maximum magnetron frequency. Defaults to 50.
  • magnetron_freq_bins (int, optional): The number of bins to use for the histogram. Defaults to 300.
Returns
  • None
Notes

Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain. A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated. A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency. This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.

Inherited Members
MassSpecBase
mspeaks
is_calibrated
is_centroid
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
process_mass_spec
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_profile
mz_exp_pp
abundance_profile
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
tic
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecCentroid(MassSpecBase):
1445class MassSpecCentroid(MassSpecBase):
1446    """A mass spectrum class when the entry point is on centroid format
1447
1448    Notes
1449    -----
1450    - Stores the centroid data and instrument settings
1451    - Simulate profile data based on Gaussian or Lorentzian peak shape
1452    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
1453    - _mspeaks is populated under the hood by calling process_mass_spec method
1454    - iteration is null if _mspeaks is empty
1455
1456    Parameters
1457    ----------
1458    data_dict : dict {string: numpy array float64 )
1459        contains keys [m/z, Abundance, Resolving Power, S/N]
1460    d_params : dict{'str': float, int or str}
1461        contains the instrument settings and processing settings
1462    auto_process : bool, optional
1463        Whether to automatically process the mass spectrum. Defaults to True.
1464
1465    Attributes
1466    ----------
1467    label : str
1468        store label (Bruker, Midas Transient, see Labels class)
1469    _baseline_noise : float
1470        store baseline noise
1471    _baseline_noise_std : float
1472        store baseline noise std
1473    _abundance : ndarray
1474        The abundance values of the mass spectrum.
1475    _mz_exp : ndarray
1476        The m/z values of the mass spectrum.
1477    _mspeaks : list
1478        A list of mass peaks.
1479
1480
1481    Methods
1482    ----------
1483    * process_mass_spec().
1484        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
1485    * __simulate_profile__data__().
1486        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
1487
1488    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
1489    """
1490
1491    def __init__(self, data_dict, d_params, auto_process=True):
1492        super().__init__([], [], d_params)
1493
1494        self._set_parameters_objects(d_params)
1495
1496        if self.label == Labels.thermo_centroid:
1497            self._baseline_noise = d_params.get("baseline_noise")
1498            self._baseline_noise_std = d_params.get("baseline_noise_std")
1499
1500        self.is_centroid = True
1501        self.data_dict = data_dict
1502        self._mz_exp = data_dict[Labels.mz]
1503        self._abundance = data_dict[Labels.abundance]
1504
1505        if auto_process:
1506            self.process_mass_spec()
1507
1508    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
1509        """Simulate profile data based on Gaussian or Lorentzian peak shape
1510
1511        Notes
1512        -----
1513        Needs theoretical resolving power calculation and define peak shape.
1514        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
1515
1516        Parameters
1517        ----------
1518        exp_mz_centroid : list(float)
1519            list of m/z values
1520        magnitude_centroid : list(float)
1521            list of abundance values
1522
1523
1524        Returns
1525        -------
1526        x : list(float)
1527            list of m/z values
1528        y : list(float)
1529            list of abundance values
1530        """
1531
1532        x, y = [], []
1533        for i in range(len(exp_mz_centroid)):
1534            x.append(exp_mz_centroid[i] - 0.0000001)
1535            x.append(exp_mz_centroid[i])
1536            x.append(exp_mz_centroid[i] + 0.0000001)
1537            y.append(0)
1538            y.append(magnitude_centroid[i])
1539            y.append(0)
1540        return x, y
1541
1542    @property
1543    def mz_exp_profile(self):
1544        """Return the m/z profile of the mass spectrum."""
1545        mz_list = []
1546        for mz in self.mz_exp:
1547            mz_list.append(mz - 0.0000001)
1548            mz_list.append(mz)
1549            mz_list.append(mz + 0.0000001)
1550        return mz_list
1551
1552    @mz_exp_profile.setter
1553    def mz_exp_profile(self, _mz_exp):
1554        self._mz_exp = _mz_exp
1555
1556    @property
1557    def abundance_profile(self):
1558        """Return the abundance profile of the mass spectrum."""
1559        ab_list = []
1560        for ab in self.abundance:
1561            ab_list.append(0)
1562            ab_list.append(ab)
1563            ab_list.append(0)
1564        return ab_list
1565
1566    @abundance_profile.setter
1567    def abundance_profile(self, abundance):
1568        self._abundance = abundance
1569
1570    @property
1571    def tic(self):
1572        """Return the total ion current of the mass spectrum."""
1573        return sum(self.abundance)
1574
1575    def process_mass_spec(self):
1576        """Process the mass spectrum."""
1577        import tqdm
1578
1579        # overwrite process_mass_spec
1580        # mspeak objs are usually added inside the PeaKPicking class
1581        # for profile and freq based data
1582        data_dict = self.data_dict
1583        ion_charge = self.polarity
1584
1585        # Check if resolving power is present
1586        rp_present = True
1587        if not data_dict.get(Labels.rp):
1588            rp_present = False
1589        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1590            data_dict.get(Labels.rp)
1591        ):
1592            rp_present = False
1593
1594        # Check if s2n is present
1595        s2n_present = True
1596        if not data_dict.get(Labels.s2n):
1597            s2n_present = False
1598        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1599            data_dict.get(Labels.s2n)
1600        ):
1601            s2n_present = False
1602
1603        # Warning if no s2n data but noise thresholding is set to signal_noise
1604        if (
1605            not s2n_present
1606            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1607        ):
1608            raise Exception("Signal to Noise data is missing for noise thresholding")
1609
1610        # Pull out abundance data
1611        abun = array(data_dict.get(Labels.abundance)).astype(float)
1612
1613        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1614        abundance_threshold, factor = self.get_threshold(abun)
1615
1616        # Set rp_i and s2n_i to None which will be overwritten if present
1617        rp_i, s2n_i = np.nan, np.nan
1618        for index, mz in enumerate(data_dict.get(Labels.mz)):
1619            if rp_present:
1620                if not data_dict.get(Labels.rp)[index]:
1621                    rp_i = np.nan
1622                else:
1623                    rp_i = float(data_dict.get(Labels.rp)[index])
1624            if s2n_present:
1625                if not data_dict.get(Labels.s2n)[index]:
1626                    s2n_i = np.nan
1627                else:
1628                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1629
1630            # centroid peak does not have start and end peak index pos
1631            massspec_indexes = (index, index, index)
1632
1633            # Add peaks based on the noise thresholding method
1634            if (
1635                self.parameters.mass_spectrum.noise_threshold_method
1636                in ["minima", "relative_abundance", "absolute_abundance"]
1637                and abun[index] / factor >= abundance_threshold
1638            ):
1639                self.add_mspeak(
1640                    ion_charge,
1641                    mz,
1642                    abun[index],
1643                    rp_i,
1644                    s2n_i,
1645                    massspec_indexes,
1646                    ms_parent=self,
1647                )
1648            if (
1649                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1650                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1651            ):
1652                self.add_mspeak(
1653                    ion_charge,
1654                    mz,
1655                    abun[index],
1656                    rp_i,
1657                    s2n_i,
1658                    massspec_indexes,
1659                    ms_parent=self,
1660                )
1661
1662        self.mspeaks = self._mspeaks
1663        self._dynamic_range = self.max_abundance / self.min_abundance
1664        self._set_nominal_masses_start_final_indexes()
1665
1666        if self.label != Labels.thermo_centroid:
1667            if self.settings.noise_threshold_method == "log":
1668                raise Exception("log noise Not tested for centroid data")
1669                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1670
1671            else:
1672                self._baseline_noise, self._baseline_noise_std = (
1673                    self.run_noise_threshold_calc()
1674                )
1675
1676        del self.data_dict

A mass spectrum class when the entry point is on centroid format

Notes
  • Stores the centroid data and instrument settings
  • Simulate profile data based on Gaussian or Lorentzian peak shape
  • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
  • _mspeaks is populated under the hood by calling process_mass_spec method
  • iteration is null if _mspeaks is empty
Parameters
  • data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
  • auto_process (bool, optional): Whether to automatically process the mass spectrum. Defaults to True.
Attributes
  • label (str): store label (Bruker, Midas Transient, see Labels class)
  • _baseline_noise (float): store baseline noise
  • _baseline_noise_std (float): store baseline noise std
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
  • _mspeaks (list): A list of mass peaks.
Methods
  • process_mass_spec(). Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
  • __simulate_profile__data__(). Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.

see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()

MassSpecCentroid(data_dict, d_params, auto_process=True)
1491    def __init__(self, data_dict, d_params, auto_process=True):
1492        super().__init__([], [], d_params)
1493
1494        self._set_parameters_objects(d_params)
1495
1496        if self.label == Labels.thermo_centroid:
1497            self._baseline_noise = d_params.get("baseline_noise")
1498            self._baseline_noise_std = d_params.get("baseline_noise_std")
1499
1500        self.is_centroid = True
1501        self.data_dict = data_dict
1502        self._mz_exp = data_dict[Labels.mz]
1503        self._abundance = data_dict[Labels.abundance]
1504
1505        if auto_process:
1506            self.process_mass_spec()
is_centroid
data_dict
mz_exp_profile

Return the m/z profile of the mass spectrum.

abundance_profile

Return the abundance profile of the mass spectrum.

tic

Return the total ion current of the mass spectrum.

def process_mass_spec(self):
1575    def process_mass_spec(self):
1576        """Process the mass spectrum."""
1577        import tqdm
1578
1579        # overwrite process_mass_spec
1580        # mspeak objs are usually added inside the PeaKPicking class
1581        # for profile and freq based data
1582        data_dict = self.data_dict
1583        ion_charge = self.polarity
1584
1585        # Check if resolving power is present
1586        rp_present = True
1587        if not data_dict.get(Labels.rp):
1588            rp_present = False
1589        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
1590            data_dict.get(Labels.rp)
1591        ):
1592            rp_present = False
1593
1594        # Check if s2n is present
1595        s2n_present = True
1596        if not data_dict.get(Labels.s2n):
1597            s2n_present = False
1598        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
1599            data_dict.get(Labels.s2n)
1600        ):
1601            s2n_present = False
1602
1603        # Warning if no s2n data but noise thresholding is set to signal_noise
1604        if (
1605            not s2n_present
1606            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1607        ):
1608            raise Exception("Signal to Noise data is missing for noise thresholding")
1609
1610        # Pull out abundance data
1611        abun = array(data_dict.get(Labels.abundance)).astype(float)
1612
1613        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
1614        abundance_threshold, factor = self.get_threshold(abun)
1615
1616        # Set rp_i and s2n_i to None which will be overwritten if present
1617        rp_i, s2n_i = np.nan, np.nan
1618        for index, mz in enumerate(data_dict.get(Labels.mz)):
1619            if rp_present:
1620                if not data_dict.get(Labels.rp)[index]:
1621                    rp_i = np.nan
1622                else:
1623                    rp_i = float(data_dict.get(Labels.rp)[index])
1624            if s2n_present:
1625                if not data_dict.get(Labels.s2n)[index]:
1626                    s2n_i = np.nan
1627                else:
1628                    s2n_i = float(data_dict.get(Labels.s2n)[index])
1629
1630            # centroid peak does not have start and end peak index pos
1631            massspec_indexes = (index, index, index)
1632
1633            # Add peaks based on the noise thresholding method
1634            if (
1635                self.parameters.mass_spectrum.noise_threshold_method
1636                in ["minima", "relative_abundance", "absolute_abundance"]
1637                and abun[index] / factor >= abundance_threshold
1638            ):
1639                self.add_mspeak(
1640                    ion_charge,
1641                    mz,
1642                    abun[index],
1643                    rp_i,
1644                    s2n_i,
1645                    massspec_indexes,
1646                    ms_parent=self,
1647                )
1648            if (
1649                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
1650                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
1651            ):
1652                self.add_mspeak(
1653                    ion_charge,
1654                    mz,
1655                    abun[index],
1656                    rp_i,
1657                    s2n_i,
1658                    massspec_indexes,
1659                    ms_parent=self,
1660                )
1661
1662        self.mspeaks = self._mspeaks
1663        self._dynamic_range = self.max_abundance / self.min_abundance
1664        self._set_nominal_masses_start_final_indexes()
1665
1666        if self.label != Labels.thermo_centroid:
1667            if self.settings.noise_threshold_method == "log":
1668                raise Exception("log noise Not tested for centroid data")
1669                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1670
1671            else:
1672                self._baseline_noise, self._baseline_noise_std = (
1673                    self.run_noise_threshold_calc()
1674                )
1675
1676        del self.data_dict

Process the mass spectrum.

Inherited Members
MassSpecBase
mspeaks
is_calibrated
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
mz_exp
freq_exp_profile
freq_exp_pp
mz_exp_pp
abundance_profile_pp
abundance
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes
class MassSpecCentroidLowRes(MassSpecCentroid):
1679class MassSpecCentroidLowRes(MassSpecCentroid):
1680    """A mass spectrum class when the entry point is on low resolution centroid format
1681
1682    Notes
1683    -----
1684    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
1685
1686    Parameters
1687    ----------
1688    data_dict : dict {string: numpy array float64 )
1689        contains keys [m/z, Abundance, Resolving Power, S/N]
1690    d_params : dict{'str': float, int or str}
1691        contains the instrument settings and processing settings
1692
1693    Attributes
1694    ----------
1695    _processed_tic : float
1696        store processed total ion current
1697    _abundance : ndarray
1698        The abundance values of the mass spectrum.
1699    _mz_exp : ndarray
1700        The m/z values of the mass spectrum.
1701    """
1702
1703    def __init__(self, data_dict, d_params):
1704        self._set_parameters_objects(d_params)
1705        self._mz_exp = array(data_dict.get(Labels.mz))
1706        self._abundance = array(data_dict.get(Labels.abundance))
1707        self._processed_tic = None
1708
1709    def __len__(self):
1710        return len(self.mz_exp)
1711
1712    def __getitem__(self, position):
1713        return (self.mz_exp[position], self.abundance[position])
1714
1715    @property
1716    def mz_exp(self):
1717        """Return the m/z values of the mass spectrum."""
1718        return self._mz_exp
1719
1720    @property
1721    def abundance(self):
1722        """Return the abundance values of the mass spectrum."""
1723        return self._abundance
1724
1725    @property
1726    def processed_tic(self):
1727        """Return the processed total ion current of the mass spectrum."""
1728        return sum(self._processed_tic)
1729
1730    @property
1731    def tic(self):
1732        """Return the total ion current of the mass spectrum."""
1733        if self._processed_tic:
1734            return self._processed_tic
1735        else:
1736            return sum(self.abundance)
1737
1738    @property
1739    def mz_abun_tuples(self):
1740        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
1741        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
1742
1743        return [r(i) for i in self]
1744
1745    @property
1746    def mz_abun_dict(self):
1747        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
1748        r = lambda x: int(round(x, 0))
1749
1750        return {r(i[0]): r(i[1]) for i in self}

A mass spectrum class when the entry point is on low resolution centroid format

Notes

Does not store MSPeak Objs, will iterate over mz, abundance pairs instead

Parameters
  • data_dict : dict {string (numpy array float64 )): contains keys [m/z, Abundance, Resolving Power, S/N]
  • d_params : dict{'str' (float, int or str}): contains the instrument settings and processing settings
Attributes
  • _processed_tic (float): store processed total ion current
  • _abundance (ndarray): The abundance values of the mass spectrum.
  • _mz_exp (ndarray): The m/z values of the mass spectrum.
MassSpecCentroidLowRes(data_dict, d_params)
1703    def __init__(self, data_dict, d_params):
1704        self._set_parameters_objects(d_params)
1705        self._mz_exp = array(data_dict.get(Labels.mz))
1706        self._abundance = array(data_dict.get(Labels.abundance))
1707        self._processed_tic = None
mz_exp

Return the m/z values of the mass spectrum.

abundance

Return the abundance values of the mass spectrum.

processed_tic

Return the processed total ion current of the mass spectrum.

tic

Return the total ion current of the mass spectrum.

mz_abun_tuples

Return the m/z and abundance values of the mass spectrum as a list of tuples.

mz_abun_dict

Return the m/z and abundance values of the mass spectrum as a dictionary.

Inherited Members
MassSpecCentroid
is_centroid
data_dict
mz_exp_profile
abundance_profile
process_mass_spec
MassSpecBase
mspeaks
is_calibrated
has_frequency
calibration_order
calibration_points
calibration_ref_mzs
calibration_meas_mzs
calibration_RMS
calibration_segment
calibration_raw_error_median
calibration_raw_error_stdev
set_indexes
reset_indexes
add_mspeak
reset_cal_therms
clear_molecular_formulas
cal_noise_threshold
parameters
set_parameter_from_json
set_parameter_from_toml
mspeaks_settings
settings
molecular_search_settings
mz_cal_profile
mz_cal
freq_exp_profile
freq_exp_pp
mz_exp_pp
abundance_profile_pp
freq_exp
resolving_power
signal_to_noise
nominal_mz
get_mz_and_abundance_peaks_tuples
kmd
kendrick_mass
max_mz_exp
min_mz_exp
max_abundance
max_signal_to_noise
most_abundant_mspeak
min_abundance
dynamic_range
baseline_noise
baseline_noise_std
Aterm
Bterm
Cterm
filename
dir_location
sort_by_mz
sort_by_abundance
check_mspeaks_warning
check_mspeaks
remove_assignment_by_index
filter_by_index
filter_by_mz
filter_by_s2n
filter_by_abundance
filter_by_max_resolving_power
filter_by_mean_resolving_power
filter_by_min_resolving_power
filter_by_noise_threshold
find_peaks
change_kendrick_base_all_mspeaks
get_nominal_mz_first_last_indexes
get_masses_count_by_nominal_mass
datapoints_count_by_nominal_mz
get_nominal_mass_indexes
plot_centroid
plot_profile_and_noise_threshold
plot_mz_domain_profile
to_excel
to_hdf
to_csv
to_pandas
to_dataframe
to_json
parameters_json
parameters_toml
corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc
percentile_assigned
resolving_power_calc
number_average_molecular_weight
weight_average_molecular_weight
corems.mass_spectrum.calc.PeakPicking.PeakPicking
prepare_peak_picking_data
cut_mz_domain_peak_picking
legacy_cut_mz_domain_peak_picking
extrapolate_axis
extrapolate_axes_for_pp
do_peak_picking
find_minima
linear_fit_calc
calculate_resolving_power
cal_minima
calc_centroid
get_threshold
algebraic_quadratic
find_apex_fit_quadratic
check_prominence
use_the_max
calc_centroid_legacy
corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc
get_noise_threshold
cut_mz_domain_noise
get_noise_average
get_abundance_minima_centroid
run_log_noise_threshold_calc
run_noise_threshold_calc
corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping
mz_odd_even_index_lists
calc_error
populate_kendrick_index_dict_error
populate_kendrick_index_dict_rounding
sort_abundance_kendrick_dict
kendrick_groups_indexes