corems.ms_peak.factory.MSPeakClasses

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 12, 2019"
  3
  4import math
  5
  6from numpy import nan
  7
  8from corems.mass_spectra.calc import SignalProcessing as sp
  9from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula
 10from corems.ms_peak.calc.MSPeakCalc import MSPeakCalculation
 11
 12
 13class _MSPeak(MSPeakCalculation):
 14    """A class representing a peak in a mass spectrum.
 15
 16    Parameters:
 17    ----------
 18    ion_charge : int
 19        The ion charge of the peak.
 20    mz_exp : float
 21        The experimental m/z value of the peak.
 22    abundance : float
 23        The abundance of the peak.
 24    resolving_power : float
 25        The resolving power of the peak.
 26    signal_to_noise : float
 27        The signal-to-noise ratio of the peak.
 28    indexes : list[int]
 29        The profile indexes of the peak.
 30    index : int
 31        The index of the peak.
 32    ms_parent : MSParent, optional
 33        The parent mass spectrum object.
 34    exp_freq : float, optional
 35        The experimental frequency of the peak.
 36
 37    Methods:
 38    -------
 39    * __len__().
 40        Returns the number of molecular formulas associated with the peak.
 41    * __setitem__(position, molecular_formula_obj).
 42        Sets the molecular formula at the specified position.
 43    * __getitem__(position) -> MolecularFormula.
 44        Returns the molecular formula at the specified position.
 45    * change_kendrick_base(kendrick_dict_base).
 46        Changes the kendrick base for the peak.
 47    * add_molecular_formula(molecular_formula_obj).
 48        Adds a molecular formula to the peak.
 49    * remove_molecular_formula(mf_obj).
 50        Removes a molecular formula from the peak.
 51    * clear_molecular_formulas().
 52        Clears all molecular formulas associated with the peak.
 53    * plot_simulation(sim_type="lorentz", ax=None, color="green", oversample_multiplier=1, delta_rp=0, mz_overlay=1).
 54        Plots the simulated peak.
 55    * plot(ax=None, color="black", derivative=True, deriv_color='red').
 56        Plots the peak.
 57    * best_molecular_formula_candidate().
 58        Returns the best molecular formula candidate for the peak.
 59    """
 60
 61    def __init__(
 62        self,
 63        ion_charge,
 64        mz_exp,
 65        abundance,
 66        resolving_power,
 67        signal_to_noise,
 68        indexes,
 69        index,
 70        ms_parent=None,
 71        exp_freq=None,
 72    ):
 73        self._ms_parent = ms_parent
 74        # needed to create the object
 75        self.ion_charge = int(ion_charge)
 76        self._mz_exp = float(mz_exp)
 77        self.mass = float(mz_exp) / float(ion_charge)
 78        self.abundance = float(abundance)
 79        self.resolving_power = float(resolving_power)
 80        self.signal_to_noise = float(signal_to_noise)
 81        # profile indexes
 82        self.peak_left_index = int(indexes[0])
 83        self.peak_apex_index = int(indexes[1])
 84        self.peak_right_index = int(indexes[2])
 85
 86        # mass spec obj index
 87        self.index = int(index)
 88        # parent mass spectrum obj instance
 89
 90        # updated after mass error prediction'
 91        self.predicted_std = None
 92        # updated after calibration'
 93        self.mz_cal = None
 94        # updated individual calculation'
 95        self.baseline_noise = None
 96
 97        if exp_freq:
 98            self.freq_exp = float(exp_freq)
 99
100        if self._ms_parent is not None:
101            kendrick_dict_base = self._ms_parent.mspeaks_settings.kendrick_base
102        else:
103            kendrick_dict_base = {"C": 1, "H": 2}
104        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
105            kendrick_dict_base
106        )
107
108        "updated after molecular formula ID"
109
110        self.molecular_formulas = []
111        self._confidence_score = None
112        # placeholder for found isotopologues index
113        self.isotopologue_indexes = []
114        # placeholder for found isotopologues molecular formula obj
115        self.found_isotopologues = {}
116
117        # Label for what type of peak it is - real signal, noise, sinc wiggle, magnetron or harmonic peak, etc.
118        self.peak_type = None
119
120    def __len__(self):
121        return len(self.molecular_formulas)
122
123    def __setitem__(self, position, molecular_formula_obj):
124        self.molecular_formulas[position] = molecular_formula_obj
125
126    def __getitem__(self, position) -> MolecularFormula:
127        return self.molecular_formulas[position]
128
129    def change_kendrick_base(self, kendrick_dict_base):
130        """Changes the kendrick base for the peak.
131
132        Parameters:
133        ----------
134        kendrick_dict_base : dict
135            The kendrick base dictionary.
136            Default is {"C": 1, "H": 2}. (CH2)
137        """
138        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
139            kendrick_dict_base
140        )
141
142    def add_molecular_formula(self, molecular_formula_obj):
143        """Adds a molecular formula to the peak.
144
145        Parameters:
146        ----------
147        molecular_formula_obj : MolecularFormula
148            The molecular formula object to be added.
149
150        Returns:
151        -------
152        MolecularFormula
153            The molecular formula object added.
154
155        """
156        # freeze state
157        molecular_formula_obj._mspeak_parent = self
158
159        # new_mol_formula = deepcopy(molecular_formula_obj)
160        # add link mass spectrum obj instance
161
162        # new_mol_formula.mspeak_parent = self
163
164        self.molecular_formulas.append(molecular_formula_obj)
165
166        return molecular_formula_obj
167
168    def remove_molecular_formula(self, mf_obj):
169        """Removes a molecular formula from the peak.
170
171        Parameters:
172        ----------
173        mf_obj : MolecularFormula
174            The molecular formula object to be removed.
175        """
176        self.molecular_formulas.remove(mf_obj)
177
178    def clear_molecular_formulas(self):
179        """Clears all molecular formulas associated with the peak."""
180        self.molecular_formulas = []
181
182    @property
183    def mz_exp(self):
184        """The experimental m/z value of the peak."""
185        if self.mz_cal:
186            return self.mz_cal
187        else:
188            return self._mz_exp
189
190    @mz_exp.setter
191    def mz_exp(self, mz_exp):
192        """Sets the experimental m/z value of the peak."""
193        self._mz_exp = mz_exp
194
195    @property
196    def area(self):
197        """The area of the peak."""
198        if self._ms_parent.is_centroid:
199            return nan
200        else:
201            return self.calc_area()
202
203    @property
204    def nominal_mz_exp(self):
205        """The experimental nominal (integer) m/z value of the peak."""
206        return math.floor(self.mz_exp)
207
208    @property
209    def kmd(self):
210        """The Kendrick mass defect of the peak."""
211        return self._kmd
212
213    @property
214    def kendrick_mass(self):
215        """The Kendrick mass of the peak."""
216        return self._kendrick_mass
217
218    @property
219    def knm(self):
220        """The Kendrick nominal mass of the peak."""
221        return self._nominal_km
222
223    @property
224    def is_assigned(self) -> bool:
225        """Whether the peak is assigned or not."""
226        return bool(self.molecular_formulas)
227
228    def plot_simulation(
229        self,
230        sim_type="lorentz",
231        ax=None,
232        color="green",
233        oversample_multiplier=1,
234        delta_rp=0,
235        mz_overlay=1,
236    ):
237        """Plots the simulated peak.
238
239        Parameters:
240        ----------
241        sim_type : str, optional
242            The type of simulation to be plotted.
243            Default is "lorentz".
244        ax : matplotlib.axes, optional
245            The axes to plot the simulated peak.
246            Default is None.
247        color : str, optional
248            The color of the simulated peak.
249            Default is "green".
250        oversample_multiplier : int, optional
251            The oversample multiplier.
252            Default is 1.
253        delta_rp : int, optional
254            A delta value to the resolving power
255            Default is 0.
256        mz_overlay : int, optional
257            The mz overlay.
258            Default is 1.
259
260        Returns:
261        -------
262        matplotlib.axes
263            The axes where the simulated peak was plotted.
264
265        """
266        if self._ms_parent:
267            import matplotlib.pyplot as plt
268
269            x, y = eval(
270                "self."
271                + sim_type
272                + "(oversample_multiplier="
273                + str(oversample_multiplier)
274                + ", delta_rp="
275                + str(delta_rp)
276                + ", mz_overlay="
277                + str(mz_overlay)
278                + ")"
279            )
280
281            if ax is None:
282                ax = plt.gca()
283            ax.plot(x, y, color=color, label="Simulation")
284            ax.set(xlabel="m/z", ylabel="abundance")
285
286            plt.legend()
287            return ax
288
289    def plot(
290        self,
291        ax=None,
292        color: str = "black",
293        derivative: bool = True,
294        deriv_color: str = "red",
295    ):  # pragma: no cover
296        """Plots the peak.
297
298        Parameters:
299        ----------
300        ax : matplotlib.axes, optional
301            The axes to plot the peak.
302            Default is None.
303        color : str, optional
304            The color of the peak.
305            Default is "black".
306        derivative : bool, optional
307            Whether to plot the derivative of the peak.
308            Default is True.
309        deriv_color : str, optional
310            The color of the derivative of the peak.
311            Default is "red".
312
313        Returns:
314        -------
315        matplotlib.axes
316            The axes where the peak was plotted.
317
318        """
319        if self._ms_parent:
320            import matplotlib.pyplot as plt
321
322            if ax is None:
323                ax = plt.gca()
324            x = self._ms_parent.mz_exp_profile[
325                self.peak_left_index : self.peak_right_index
326            ]
327            y = self._ms_parent.abundance_profile[
328                self.peak_left_index : self.peak_right_index
329            ]
330
331            ax.plot(x, y, color=color, label="Data")
332            ax.set(xlabel="m/z", ylabel="abundance")
333            if derivative and not self._ms_parent.is_centroid:
334                dy = sp.derivate(
335                    self._ms_parent.abundance_profile[
336                        self.peak_left_index : self.peak_right_index + 1
337                    ]
338                )
339                ax.plot(x, dy, c=deriv_color)
340            else:
341                ax.plot(
342                    (self.mz_exp, self.mz_exp),
343                    (0, self.abundance),
344                    color=color,
345                    label="Data",
346                )
347
348            # plt.legend()
349
350            return ax
351
352        else:
353            raise AttributeError(
354                "No parent mass spectrum object found to plot the peak."
355            )
356
357    @property
358    def best_molecular_formula_candidate(self):
359        """The best molecular formula candidate for the peak.
360
361        Returns a single best formula candidate based on the user defined score method.
362        Score method is set with:
363            molecular_search_settings.score_method
364
365        Returns
366        -------
367        MolecularFormula
368            The best molecular formula candidate for the peak.
369
370        """
371        if (
372            self._ms_parent.molecular_search_settings.score_method
373            == "N_S_P_lowest_error"
374        ):
375            return self.cia_score_N_S_P_error()
376
377        elif (
378            self._ms_parent.molecular_search_settings.score_method == "S_P_lowest_error"
379        ):
380            return self.cia_score_S_P_error()
381
382        elif self._ms_parent.molecular_search_settings.score_method == "lowest_error":
383            return self.molecular_formula_lowest_error()
384
385        elif (
386            self._ms_parent.molecular_search_settings.score_method == "air_filter_error"
387        ):
388            return self.molecular_formula_air_filter()
389
390        elif (
391            self._ms_parent.molecular_search_settings.score_method
392            == "water_filter_error"
393        ):
394            return self.molecular_formula_water_filter()
395
396        elif (
397            self._ms_parent.molecular_search_settings.score_method
398            == "earth_filter_error"
399        ):
400            return self.molecular_formula_earth_filter()
401
402        elif self._ms_parent.molecular_search_settings.score_method == "prob_score":
403            return self.molecular_formula_highest_prob_score()
404        else:
405            raise TypeError(
406                "Unknown score method selected: % s, \
407                            Please check score_method at \
408                            encapsulation.settings.molecular_id.MolecularIDSettings.MolecularFormulaSearchSettings",
409                self._ms_parent.parameters.molecular_search.score_method,
410            )
411
412
413class ICRMassPeak(_MSPeak):
414    """A class representing a peak in an ICR mass spectrum."""
415
416    def __init__(self, *args, ms_parent=None, exp_freq=None):
417        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
418
419    def resolving_power_calc(self, B, T):
420        """Calculate the theoretical resolving power of the peak.
421
422        Parameters
423        ----------
424        T: float
425            transient time
426        B: float
427            Magnetic Filed Strength (Tesla)
428
429        Returns
430        -------
431        float
432            Theoretical resolving power of the peak.
433
434        References
435        ----------
436        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
437            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
438        """
439        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
440
441    def set_calc_resolving_power(self, B: float, T: float):
442        """Set the resolving power of the peak to the calculated one."""
443        self.resolving_power = self.resolving_power_calc(B, T)
444
445    def _mz_to_f_bruker(self):
446        """[Not Functional] Convert a peak m/z value to frequency
447
448        # Currently Broken - Not sure why
449        if self.mz_cal:
450            mz_val = self.mz_cal
451        else:
452            mz_val = self.mz_exp
453        Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm
454        # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage
455
456        if Cterm == 0:
457
458            if Bterm == 0:
459                #uncalibrated data
460                freq_domain = Aterm / mz_val
461
462            else:
463
464                freq_domain = (Aterm / (mz_val)) - Bterm
465
466        # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to
467        else:
468
469            freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm
470
471        return freq_domain
472        """
473        raise RuntimeError("Function not confirmed to work, disabled.")
474
475
476class TOFMassPeak(_MSPeak):
477    """A class representing a peak in a TOF mass spectrum."""
478
479    def __init__(self, *args, exp_freq=None):
480        super().__init__(*args, exp_freq=exp_freq)
481
482    def set_calc_resolving_power(self):
483        return 0
484
485
486class OrbiMassPeak(_MSPeak):
487    """A class representing a peak in an Orbitrap mass spectrum."""
488
489    def __init__(self, *args, exp_freq=None):
490        super().__init__(*args, exp_freq=exp_freq)
491
492    def set_calc_resolving_power(self):
493        return 0
class ICRMassPeak(_MSPeak):
414class ICRMassPeak(_MSPeak):
415    """A class representing a peak in an ICR mass spectrum."""
416
417    def __init__(self, *args, ms_parent=None, exp_freq=None):
418        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
419
420    def resolving_power_calc(self, B, T):
421        """Calculate the theoretical resolving power of the peak.
422
423        Parameters
424        ----------
425        T: float
426            transient time
427        B: float
428            Magnetic Filed Strength (Tesla)
429
430        Returns
431        -------
432        float
433            Theoretical resolving power of the peak.
434
435        References
436        ----------
437        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
438            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
439        """
440        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
441
442    def set_calc_resolving_power(self, B: float, T: float):
443        """Set the resolving power of the peak to the calculated one."""
444        self.resolving_power = self.resolving_power_calc(B, T)
445
446    def _mz_to_f_bruker(self):
447        """[Not Functional] Convert a peak m/z value to frequency
448
449        # Currently Broken - Not sure why
450        if self.mz_cal:
451            mz_val = self.mz_cal
452        else:
453            mz_val = self.mz_exp
454        Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm
455        # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage
456
457        if Cterm == 0:
458
459            if Bterm == 0:
460                #uncalibrated data
461                freq_domain = Aterm / mz_val
462
463            else:
464
465                freq_domain = (Aterm / (mz_val)) - Bterm
466
467        # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to
468        else:
469
470            freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm
471
472        return freq_domain
473        """
474        raise RuntimeError("Function not confirmed to work, disabled.")

A class representing a peak in an ICR mass spectrum.

ICRMassPeak(*args, ms_parent=None, exp_freq=None)
417    def __init__(self, *args, ms_parent=None, exp_freq=None):
418        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
def resolving_power_calc(self, B, T):
420    def resolving_power_calc(self, B, T):
421        """Calculate the theoretical resolving power of the peak.
422
423        Parameters
424        ----------
425        T: float
426            transient time
427        B: float
428            Magnetic Filed Strength (Tesla)
429
430        Returns
431        -------
432        float
433            Theoretical resolving power of the peak.
434
435        References
436        ----------
437        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
438            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
439        """
440        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)

Calculate the theoretical resolving power of the peak.

Parameters
  • T (float): transient time
  • B (float): Magnetic Filed Strength (Tesla)
Returns
  • float: Theoretical resolving power of the peak.
References
  1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
def set_calc_resolving_power(self, B: float, T: float):
442    def set_calc_resolving_power(self, B: float, T: float):
443        """Set the resolving power of the peak to the calculated one."""
444        self.resolving_power = self.resolving_power_calc(B, T)

Set the resolving power of the peak to the calculated one.

class TOFMassPeak(_MSPeak):
477class TOFMassPeak(_MSPeak):
478    """A class representing a peak in a TOF mass spectrum."""
479
480    def __init__(self, *args, exp_freq=None):
481        super().__init__(*args, exp_freq=exp_freq)
482
483    def set_calc_resolving_power(self):
484        return 0

A class representing a peak in a TOF mass spectrum.

TOFMassPeak(*args, exp_freq=None)
480    def __init__(self, *args, exp_freq=None):
481        super().__init__(*args, exp_freq=exp_freq)
def set_calc_resolving_power(self):
483    def set_calc_resolving_power(self):
484        return 0
class OrbiMassPeak(_MSPeak):
487class OrbiMassPeak(_MSPeak):
488    """A class representing a peak in an Orbitrap mass spectrum."""
489
490    def __init__(self, *args, exp_freq=None):
491        super().__init__(*args, exp_freq=exp_freq)
492
493    def set_calc_resolving_power(self):
494        return 0

A class representing a peak in an Orbitrap mass spectrum.

OrbiMassPeak(*args, exp_freq=None)
490    def __init__(self, *args, exp_freq=None):
491        super().__init__(*args, exp_freq=exp_freq)
def set_calc_resolving_power(self):
493    def set_calc_resolving_power(self):
494        return 0