corems.molecular_formula.calc.MolecularFormulaCalc

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 24, 2019"
  3
  4
  5import warnings
  6
  7import IsoSpecPy
  8from numpy import array, exp, isnan, nextafter, power
  9
 10# this is to handle both versions of IsoSpecPy, 2.0.2 and 2.2.2
 11# TODO in a future release remove support for legacy isospecpy
 12from packaging import version
 13
 14from corems.encapsulation.constant import Atoms, Labels
 15from corems.encapsulation.factory.parameters import MSParameters
 16from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity
 17
 18isospec_version = IsoSpecPy.__version__
 19if version.parse(isospec_version) > version.parse("2.0.2"):
 20    legacy_isospec = False
 21else:
 22    legacy_isospec = True
 23if legacy_isospec:
 24    from IsoSpecPy import IsoSpecPy
 25
 26    warnings.warn(
 27        f"IsoSpecPy version {isospec_version} is installed, and support is deprecated. Please update to 2.2.2",
 28        DeprecationWarning,
 29    )
 30
 31
 32class MolecularFormulaCalc:
 33    """Class of calculations related to molecular formula
 34
 35    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula
 36
 37    Attributes
 38    ----------
 39    mz_calc : float
 40        The m/z value of the molecular formula.
 41    neutral_mass : float
 42        The neutral mass of the molecular formula.
 43    ion_charge : int
 44        The ion charge of the molecular formula.
 45    _external_mz : float
 46        The externally provided m/z value of the molecular formula.
 47    _d_molecular_formula : dict
 48        The dictionary representation of the molecular formula.
 49    _mspeak_parent : object
 50        The parent MS peak object associated with the molecular formula.
 51    _assignment_mass_error : float
 52        The mass error of the molecular formula.
 53
 54    Methods
 55    -------
 56    * _calc_resolving_power_low_pressure(B, T)
 57        Calculate the resolving power at low pressure.
 58    * _calc_resolving_power_high_pressure(B, T)
 59        Calculate the resolving power at high pressure.
 60    * _adduct_mz(adduct_atom, ion_charge)
 61        Get the m/z value of an adducted ion version of the molecular formula.
 62    * _protonated_mz(ion_charge)
 63        Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
 64    * _radical_mz(ion_charge)
 65        Get the m/z value of a radical ion version of the molecular formula.
 66    * _neutral_mass()
 67        Get the neutral mass of the molecular formula.
 68    * _calc_mz()
 69        Get the m/z value of the molecular formula.
 70    * _calc_assignment_mass_error(method='ppm')
 71        Calculate the mass error of the molecular formula.
 72    * _calc_mz_confidence(mean=0)
 73        Calculate the m/z confidence of the molecular formula.
 74    * _calc_isotopologue_confidence()
 75        Calculate the isotopologue confidence of the molecular formula.
 76    * normalize_distance(dist, dist_range)
 77        Normalize the distance value.
 78    * subtract_formula(formula_obj, formated=True)
 79        Subtract a formula from the current formula object.
 80    * _calc_average_mz_score()
 81        Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
 82    """
 83
 84    def _calc_resolving_power_low_pressure(self, B, T):
 85        """
 86        Calculate the resolving power at low pressure.
 87
 88        Parameters
 89        ----------
 90        B : float
 91            Magnetic Strength (Testa).
 92        T : float
 93            Transient time (seconds).
 94
 95        """
 96        return (1.274 * 10000000 * B * T) * (1 / self.mz_calc)
 97
 98    def _calc_resolving_power_high_pressure(self, B, T):
 99        """
100        Calculate the resolving power at high pressure.
101
102        Parameters
103        ----------
104        B : float
105            Magnetic Strength (Testa).
106        T : float
107            Transient time (seconds).
108
109        """
110        return (2.758 * 10000000 * B * T) * (1 / self.mz_calc)
111
112    def _adduct_mz(self, adduct_atom, ion_charge):
113        """Get the m/z value of an adducted ion version of the molecular formula.
114
115        Parameters
116        ----------
117        adduct_atom : str
118            The adduct atom.
119        ion_charge : int
120            The ion charge.
121
122        """
123        return (
124            self.neutral_mass
125            + (Atoms.atomic_masses.get(adduct_atom))
126            + (ion_charge * -1 * Atoms.electron_mass)
127        ) / abs(ion_charge)
128
129    def _protonated_mz(self, ion_charge):
130        """Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
131
132        Parameters
133        ----------
134        ion_charge : int
135            The ion charge.
136        """
137        return (
138            self.neutral_mass
139            + (ion_charge * Atoms.atomic_masses.get("H"))
140            + (ion_charge * -1 * Atoms.electron_mass)
141        ) / abs(ion_charge)
142
143    def _radical_mz(self, ion_charge):
144        """Get the m/z value of a radical ion version of the molecular formula.
145
146        Parameters
147        ----------
148        ion_charge : int
149            The ion charge.
150        """
151        return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(
152            ion_charge
153        )
154
155    def _neutral_mass(self):
156        """Get the neutral mass of the molecular formula."""
157
158        mass = 0
159
160        for each_atom in self._d_molecular_formula.keys():
161            if each_atom != Labels.ion_type and each_atom != "HC":
162                try:
163                    mass = mass + Atoms.atomic_masses[
164                        each_atom
165                    ] * self._d_molecular_formula.get(each_atom)
166
167                except:
168                    print(Labels.ion_type, each_atom)
169
170        return mass
171
172    def _calc_mz(self):
173        """Get the m/z value of the molecular formula, based on the ion charge and ion type."""
174
175        if self.ion_charge is not None:
176            if self._external_mz:
177                return self._external_mz
178
179            else:
180                ion_type = self._d_molecular_formula.get(Labels.ion_type)
181
182                if ion_type == Labels.protonated_de_ion:
183                    return self.protonated_mz
184
185                elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion:
186                    return self.radical_mz
187
188                elif ion_type == Labels.neutral:
189                    return self.neutral_mass
190
191                elif self.ion_charge == 0:
192                    return self.neutral_mass
193
194                else:
195                    # formula is probably ion form used for bruker ref list
196                    return self.neutral_mass
197
198        else:
199            raise Exception("Please set ion charge first")
200
201    def _calc_assignment_mass_error(self, method="ppm"):
202        """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z.
203
204        Parameters
205        ----------
206        method : str, optional
207            The method to calculate the mass error, by default 'ppm', but can be 'ppb'
208
209        Raises
210        ------
211        Exception
212            If the method is not 'ppm' or 'ppb'.
213        Exception
214            If there is no ms peak associated with the molecular formula instance.
215        """
216
217        if method == "ppm":
218            multi_factor = 1000000
219
220        elif method == "ppb":
221            multi_factor = 1000000
222
223        else:
224            raise Exception(
225                "method needs to be ppm or ppb, you have entered %s" % method
226            )
227
228        if self._mspeak_parent.mz_exp:
229            self._assignment_mass_error = (
230                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
231            ) * multi_factor
232
233            return (
234                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
235            ) * multi_factor
236
237        else:
238            raise Exception(
239                "No ms peak associated with the molecular formula instance %s", self
240            )
241
242    def _calc_mz_confidence(self, mean=0):
243        """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z.
244
245        Parameters
246        ----------
247        mean : int, optional
248            The mean of the m/z error, by default 0
249
250        """
251
252        # predicted std not set, using 0.3
253        if not self._mspeak_parent.predicted_std:
254            self._mspeak_parent.predicted_std = 1.66
255
256        # print( self._mspeak_parent.predicted_std)
257
258        return exp(
259            -1
260            * (
261                power((self.mz_error - mean), 2)
262                / (2 * power(self._mspeak_parent.predicted_std, 2))
263            )
264        )
265
266    def _calc_isotopologue_confidence(self):
267        """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity.
268
269        Returns
270        -------
271        float
272            The isotopologue confidence of the molecular formula.
273        """
274
275        if self.is_isotopologue:
276            # confidence of isotopologue is pure mz error
277            # TODO add more features here
278
279            mformula_index = self.mono_isotopic_formula_index
280            mspeak_index = self.mspeak_index_mono_isotopic
281
282            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
283
284            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
285
286            mono_mz = mspeak[mformula_index].mz_calc
287            mono_abundance = mspeak.abundance
288
289        else:
290            mono_mz = self.mz_calc
291            mono_abundance = self._mspeak_parent.abundance
292
293            expected_isotopologues = self.expected_isotopologues
294            # has isotopologues based on current dinamic range
295
296        if expected_isotopologues:
297            dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]}
298
299            # get reference data
300            for mf in expected_isotopologues:
301                dict_mz_abund_ref["abundance"].append(mf.abundance_calc)
302                dict_mz_abund_ref["mz"].append(mf.mz_calc)
303
304            dict_mz_abund_exp = {mono_mz: mono_abundance}
305
306            # get experimental data
307            for mf in expected_isotopologues:
308                # molecular formula has been assigned to a peak
309                if mf._mspeak_parent:
310                    # stores mspeak abundance
311                    dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance
312
313                else:
314                    # fill missing mz with abundance 0 and mz error score of 0
315                    dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1)
316
317            distance = SpectralSimilarity(
318                dict_mz_abund_exp, dict_mz_abund_ref
319            ).manhattan_distance()
320            correlation = 1 - self.normalize_distance(distance, [0, 2])
321            # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
322            # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
323
324            if correlation == 1:
325                print(dict_mz_abund_exp, dict_mz_abund_ref)
326            if isnan(correlation):
327                # print(dict_mz_abund_exp, dict_mz_abund_ref)
328                correlation = 0.00001
329
330        else:
331            # no isotopologue expected giving a correlation score of 0.0 but it needs optimization
332            correlation = 0.0
333
334        return correlation
335
336    def normalize_distance(self, dist, dist_range):
337        """
338        Normalize the distance value.
339
340        Parameters
341        ----------
342        dist : float
343            The distance value to be normalized.
344        dist_range : list
345            The range of the distance value.
346
347        """
348        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
349
350        if result < 0:
351            result = 0.0
352        elif result > 1:
353            result = 1.0
354
355        return result
356
357    def subtract_formula(self, formula_obj, formated=True):
358        """Subtract a formula from the current formula object
359
360        Parameters
361        ----------
362        formula_obj : MolecularFormula
363            MolecularFormula object to be subtracted from the current formula object
364        formated : bool, optional
365            If True, returns the formula in string format, by default True
366
367        """
368        subtraction = {}
369        for atom, value in self.to_dict().items():
370            if atom != Labels.ion_type:
371                if formula_obj.get(atom):
372                    # value_subtraction = value - formula_obj.get(atom)
373                    if value - formula_obj.get(atom) > 0:
374                        subtraction[atom] = value - formula_obj.get(atom)
375                else:
376                    subtraction[atom] = value
377        if formated:
378            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
379            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
380        else:
381            SUB = str.maketrans("0123456789", "0123456789")
382            SUP = str.maketrans("0123456789", "0123456789")
383        formula_srt = ""
384        for atom in Atoms.atoms_order:
385            if atom in subtraction.keys():
386                formula_srt += atom.translate(SUP) + str(
387                    int(subtraction.get(atom))
388                ).translate(SUB)
389
390        return formula_srt
391
392    def _calc_average_mz_score(self):
393        """Calculate the average m/z error score of the molecular formula identification, including the isotopologues."""
394        if self.is_isotopologue:
395            # confidence of isotopologue is pure mz error
396            # TODO add more features here
397
398            mformula_index = self.mono_isotopic_formula_index
399            mspeak_index = self.mspeak_index_mono_isotopic
400
401            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
402
403            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
404
405        else:
406            expected_isotopologues = self.expected_isotopologues
407            # has isotopologues based on current dinamic range
408
409        accumulated_mz_score = [self.mz_error_score]
410
411        if expected_isotopologues:
412            for mf in expected_isotopologues:
413                # molecular formula has been assigned to a peak
414                if mf._mspeak_parent:
415                    # stores mspeak abundance
416                    accumulated_mz_score.append(mf.mz_error_score)
417                else:
418                    # fill missing mz with abundance 0 and mz error score of 0
419                    accumulated_mz_score.append(0.0)
420
421        average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score)
422
423        if isnan(average_mz_score):
424            average_mz_score = 0.0
425
426        return average_mz_score
427
428    def _calc_confidence_score(self):
429        """Calculate the confidence score of the molecular formula identification, including the isotopologues."""
430
431        ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean
432        #### TODO: Add spectral similarity
433
434        ## Parameters
435        # ----------
436        #### mz_exp:
437        ####    Experimental m/z
438        #### predicted_std:
439        ####    Standart deviation calculated from Resolving power optimization or constant set by User
440
441        isotopologue_correlation = self.isotopologue_similarity
442        average_mz_score = self.average_mz_error_score
443        # add monoisotopic peak mz error score
444
445        # calculate score with higher weight for mass error
446        # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4)
447        a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight
448        b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight
449
450        score = (isotopologue_correlation * b) + (average_mz_score * a)
451
452        # if round(average_mz_score,2) == 0.00:
453        #    print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b)
454
455        return score
456
457    def _calc_abundance_error(self, method="percentile"):
458        """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance.
459
460        Parameters
461        ----------
462        method : str, optional
463            The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb'
464
465        Returns
466        -------
467        float
468            The abundance error of the molecular formula.
469
470        Raises
471        ------
472        Exception
473            If isotopologues were not calculated.
474        """
475
476        mult_factor = 100
477
478        iso_abundance = self._mspeak_parent.abundance
479        mono_abundance = self._mspeak_parent._ms_parent[
480            self.mspeak_index_mono_isotopic
481        ].abundance
482
483        if self.prob_ratio:
484            theor_abundance = mono_abundance * self.prob_ratio
485            # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
486            return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor
487
488        else:
489            raise Exception("Please calc_isotopologues")
490
491    def _calc_area_error(self, method="percentile"):
492        """Calculate the area error of the molecular formula, based on the experimental area and the calculated area.
493
494        Parameters
495        ----------
496        method : str, optional
497            The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb'
498
499        Returns
500        -------
501        float
502            The area error of the molecular formula.
503
504        Raises
505        ------
506        Exception
507            If isotopologues were not calculated.
508        """
509
510        mult_factor = 100
511
512        iso_area = self._mspeak_parent.area
513        mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area
514
515        if self.prob_ratio:
516            if mono_area and iso_area:
517                # exp_ratio = iso_area/mono_area
518
519                area_calc = mono_area * self.prob_ratio
520
521                # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
522                return ((area_calc - iso_area) / area_calc) * mult_factor
523                # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor
524
525            else:
526                # centroid mass spectrum
527                return 0
528        else:
529            raise Exception("Please calc_isotopologues")
530
531    def _calc_aromaticity_index_mod(self):
532        """Calculate the modified aromaticity index of the molecular formula.
533
534        Returns
535        -------
536        float
537            The aromaticity index of the molecular formula.
538
539        Notes
540        -----
541        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
542        corrected in https://doi.org/10.1002/rcm.7433
543        """
544        # Prepare empty dictionary to store the number of atoms of each element
545        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
546
547        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
548        for element in ai_es:
549            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
550            for element_w_iso in elements_w_iso:
551                if element_w_iso in self._d_molecular_formula:
552                    ai_es[element] += self._d_molecular_formula[element_w_iso]
553
554        ai_n = (
555            1
556            + ai_es["C"]
557            - (0.5 * ai_es["O"])
558            - ai_es["S"]
559            - (0.5 * (ai_es["N"] + ai_es["H"]))
560        )
561        ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"]
562
563        ai = ai_n / ai_d
564
565        if ai < 0:
566            ai = 0
567        if ai > 1:
568            ai = 1
569
570        return ai
571
572    def _calc_aromaticity_index(self):
573        """Calculate the aromaticity index of the molecular formula.
574
575        Returns
576        -------
577        float
578            The aromaticity index of the molecular formula.
579
580        Notes
581        -----
582        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
583        corrected in https://doi.org/10.1002/rcm.7433
584        """
585        # Prepare empty dictionary to store the number of atoms of each element
586        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
587
588        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
589        for element in ai_es:
590            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
591            for element_w_iso in elements_w_iso:
592                if element_w_iso in self._d_molecular_formula:
593                    ai_es[element] += self._d_molecular_formula[element_w_iso]
594
595                ai_n = (
596                    1
597                    + ai_es["C"]
598                    - (ai_es["O"])
599                    - ai_es["S"]
600                    - (0.5 * (ai_es["N"] + ai_es["H"]))
601                )
602        ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"]
603
604        ai = ai_n / ai_d
605
606        if ai < 0:
607            ai = 0
608        if ai > 1:
609            ai = 1
610
611        return ai
612
613    def _calc_nosc(self):
614        """Calculate the average nominal oxidation state of carbon
615
616        Returns
617        -------
618        float
619            The average nominal oxidation state of carbon
620
621        Notes
622        -----
623        Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020
624        """
625        # Prepare empty dictionary to store the number of atoms of each element
626        nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0}
627
628        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
629        for element in nosc_es:
630            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
631            for element_w_iso in elements_w_iso:
632                if element_w_iso in self._d_molecular_formula:
633                    nosc_es[element] += self._d_molecular_formula[element_w_iso]
634
635        nosc = (
636            -(
637                (
638                    4 * nosc_es["C"]
639                    + nosc_es["H"]
640                    - 3 * nosc_es["N"]
641                    - 2 * nosc_es["O"]
642                    + 5 * nosc_es["P"]
643                    - 2 * nosc_es["S"]
644                )
645                / nosc_es["C"]
646            )
647            + 4
648        )
649
650        # If nosc is infinite or negative infinity, set it to nan
651        if nosc == float("inf") or nosc == float("-inf"):
652            nosc = float("nan")
653
654        return nosc
655
656    @property
657    def dbe_ai(self):
658        """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens."""
659
660        carbons = self._d_molecular_formula.get("C")
661        hydrogens = self._d_molecular_formula.get("H")
662        oxygens = self._d_molecular_formula.get("O")
663        return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5)
664
665    def _calc_dbe(self):
666        """Calculate the double bond equivalent (DBE) of the molecular formula"""
667
668        individual_dbe = 0
669
670        for atom in self._d_molecular_formula.keys():
671            if atom != Labels.ion_type:
672                n_atom = int(self._d_molecular_formula.get(atom))
673
674                clean_atom = "".join([i for i in atom if not i.isdigit()])
675
676                if self._mspeak_parent:
677                    valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get(
678                        clean_atom
679                    )
680                else:
681                    valencia = MSParameters.molecular_search.used_atom_valences.get(
682                        clean_atom
683                    )
684                # valencia = Atoms.atoms_covalence.get(atom)
685
686                if type(valencia) is tuple:
687                    valencia = valencia[0]
688                if valencia > 0:
689                    # print atom, valencia, n_atom, individual_dbe
690                    individual_dbe = individual_dbe + (n_atom * (valencia - 2))
691                else:
692                    continue
693
694        dbe = 1 + (0.5 * individual_dbe)
695
696        if self.ion_type == Labels.adduct_ion:
697            dbe = dbe + 0.5
698
699        return dbe
700
701    def _calc_kmd(self, dict_base):
702        """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass.
703
704        Parameters
705        ----------
706        dict_base : dict
707            The dictionary of the base formula, e.g. {'C':1, 'H':2}
708
709        Returns
710        -------
711        tuple
712            The tuple of the KMD, Kendrick mass, and nominal Kendrick mass.
713        """
714        mass = 0
715        for atom in dict_base.keys():
716            mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom)
717
718        kendrick_mass = (int(mass) / mass) * self.mz_calc
719
720        nominal_km = int(kendrick_mass)
721
722        kmd = (nominal_km - kendrick_mass) * 100
723
724        # kmd = (nominal_km - km) * 1
725        kmd = round(kmd, 0)
726
727        return kmd, kendrick_mass, nominal_km
728
729    def _cal_isotopologues(
730        self, formula_dict, min_abundance, current_abundance, ms_dynamic_range
731    ):
732        """Calculate the isotopologues for a given molecular formula.
733
734        Parameters
735        ----------
736        formula_dict : dict
737            The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2}
738        min_abundance : float
739            The minimum abundance.
740        current_abundance : float
741            The current monoisotopic abundance.
742        ms_dynamic_range : float
743            The dynamic range.
744
745
746        Notes
747        -----
748        This is the primary function to look for isotopologues based on a monoisotopic molecular formula.
749        It needs to be expanded to include the calculation of resolving power and plot the results.
750        Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion.
751        Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational).
752        It might break when adding non-conventional atoms (not yet tested).
753        This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec.
754
755
756        """
757
758        # last update on 05-26-2020, Yuri E. Corilo
759
760        # updated it to reflect min possible mass peak abundance
761        cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range)
762
763        # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range)
764        # print(cut_off_to_IsoSpeccPy)
765        atoms_labels = (
766            atom
767            for atom in formula_dict.keys()
768            if atom != Labels.ion_type and atom != "H"
769        )
770
771        atoms_count = []
772        masses_list_tuples = []
773        props_list_tuples = []
774        all_atoms_list = []
775
776        for atom_label in atoms_labels:
777            if Atoms.isotopes.get(atom_label)[1][0] is None:
778                "This atom_label has no heavy isotope"
779                atoms_count.append(formula_dict.get(atom_label))
780                mass = Atoms.atomic_masses.get(atom_label)
781                prop = Atoms.isotopic_abundance.get(atom_label)
782                masses_list_tuples.append([mass])
783                props_list_tuples.append([prop])
784                all_atoms_list.append(atom_label)
785
786            else:
787                isotopes_label_list = Atoms.isotopes.get(atom_label)[1]
788
789                if len(isotopes_label_list) > 1:
790                    "This atom_label has two or more heavy isotope"
791                    isotopos_labels = [i for i in isotopes_label_list]
792                else:
793                    "This atom_label only has one heavy isotope"
794                    isotopos_labels = [isotopes_label_list[0]]
795
796                # all_atoms_list.extend(isotopos_labels)
797                isotopos_labels = [atom_label] + isotopos_labels
798
799                all_atoms_list.extend(isotopos_labels)
800
801                masses = [
802                    Atoms.atomic_masses.get(atom_label)
803                    for atom_label in isotopos_labels
804                ]
805                props = [
806                    Atoms.isotopic_abundance.get(atom_label)
807                    for atom_label in isotopos_labels
808                ]
809
810                atoms_count.append(formula_dict.get(atom_label))
811                masses_list_tuples.append(masses)
812                props_list_tuples.append(props)
813        if legacy_isospec:
814            iso = IsoSpecPy.IsoSpec(
815                atoms_count,
816                masses_list_tuples,
817                props_list_tuples,
818                cut_off_to_IsoSpeccPy,
819            )
820            conf = iso.getConfs()
821            masses = conf[0]
822            probs = exp(conf[1])
823            molecular_formulas = conf[2]
824            # print('conf', conf)
825            # print('probs', conf[1])
826        else:
827            # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach
828            iso = IsoSpecPy.IsoTotalProb(
829                atomCounts=atoms_count,
830                isotopeMasses=masses_list_tuples,
831                isotopeProbabilities=props_list_tuples,
832                prob_to_cover=cut_off_to_IsoSpeccPy,
833                get_confs=True,
834            )
835            masses = list(iso.masses)
836            probs = array(list(iso.probs))
837            confs = list(iso.confs)
838
839            molecular_formulas = []
840            for x in confs:
841                tmplist = []
842                for y in x:
843                    tmplist.extend(list(y))
844                molecular_formulas.append(tmplist)
845
846        new_formulas = []
847
848        for isotopologue_index in range(len(iso)):
849            # skip_mono_isotopic
850
851            formula_list = molecular_formulas[isotopologue_index]
852            new_formula_dict = dict(zip(all_atoms_list, formula_list))
853            new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type)
854            if formula_dict.get("H"):
855                new_formula_dict["H"] = formula_dict.get("H")
856
857            new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0})
858
859        # formula_dict in new_formulas check if monoisotopic is being returned
860        if new_formulas:  # and formula_dict in new_formulas:
861            # print(conf)
862            # print(new_formulas)
863            # print(atoms_count)
864            # print(all_atoms_list)
865            # print(masses_list_tuples)
866            # print(props_list_tuples)
867            # find where monoisotopic is
868            index_mono = new_formulas.index(formula_dict)
869            # calculate ratio iso/mono
870            probs = list(probs / probs[index_mono])
871
872            # delete the monoisotopic
873            del probs[index_mono]
874            del new_formulas[index_mono]
875
876            # print('probs_exp', probs)
877            for formulas, prob in zip(new_formulas, probs):
878                theor_abundance = current_abundance * prob
879                if theor_abundance > min_abundance:
880                    # print(prob, theor_abundance, current_abundance)
881                    yield (formulas, prob)
882            # return zip(new_formulas, probs )
883
884        # else:
885        #    return []
isospec_version = '2.2.2'
class MolecularFormulaCalc:
 33class MolecularFormulaCalc:
 34    """Class of calculations related to molecular formula
 35
 36    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula
 37
 38    Attributes
 39    ----------
 40    mz_calc : float
 41        The m/z value of the molecular formula.
 42    neutral_mass : float
 43        The neutral mass of the molecular formula.
 44    ion_charge : int
 45        The ion charge of the molecular formula.
 46    _external_mz : float
 47        The externally provided m/z value of the molecular formula.
 48    _d_molecular_formula : dict
 49        The dictionary representation of the molecular formula.
 50    _mspeak_parent : object
 51        The parent MS peak object associated with the molecular formula.
 52    _assignment_mass_error : float
 53        The mass error of the molecular formula.
 54
 55    Methods
 56    -------
 57    * _calc_resolving_power_low_pressure(B, T)
 58        Calculate the resolving power at low pressure.
 59    * _calc_resolving_power_high_pressure(B, T)
 60        Calculate the resolving power at high pressure.
 61    * _adduct_mz(adduct_atom, ion_charge)
 62        Get the m/z value of an adducted ion version of the molecular formula.
 63    * _protonated_mz(ion_charge)
 64        Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
 65    * _radical_mz(ion_charge)
 66        Get the m/z value of a radical ion version of the molecular formula.
 67    * _neutral_mass()
 68        Get the neutral mass of the molecular formula.
 69    * _calc_mz()
 70        Get the m/z value of the molecular formula.
 71    * _calc_assignment_mass_error(method='ppm')
 72        Calculate the mass error of the molecular formula.
 73    * _calc_mz_confidence(mean=0)
 74        Calculate the m/z confidence of the molecular formula.
 75    * _calc_isotopologue_confidence()
 76        Calculate the isotopologue confidence of the molecular formula.
 77    * normalize_distance(dist, dist_range)
 78        Normalize the distance value.
 79    * subtract_formula(formula_obj, formated=True)
 80        Subtract a formula from the current formula object.
 81    * _calc_average_mz_score()
 82        Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
 83    """
 84
 85    def _calc_resolving_power_low_pressure(self, B, T):
 86        """
 87        Calculate the resolving power at low pressure.
 88
 89        Parameters
 90        ----------
 91        B : float
 92            Magnetic Strength (Testa).
 93        T : float
 94            Transient time (seconds).
 95
 96        """
 97        return (1.274 * 10000000 * B * T) * (1 / self.mz_calc)
 98
 99    def _calc_resolving_power_high_pressure(self, B, T):
100        """
101        Calculate the resolving power at high pressure.
102
103        Parameters
104        ----------
105        B : float
106            Magnetic Strength (Testa).
107        T : float
108            Transient time (seconds).
109
110        """
111        return (2.758 * 10000000 * B * T) * (1 / self.mz_calc)
112
113    def _adduct_mz(self, adduct_atom, ion_charge):
114        """Get the m/z value of an adducted ion version of the molecular formula.
115
116        Parameters
117        ----------
118        adduct_atom : str
119            The adduct atom.
120        ion_charge : int
121            The ion charge.
122
123        """
124        return (
125            self.neutral_mass
126            + (Atoms.atomic_masses.get(adduct_atom))
127            + (ion_charge * -1 * Atoms.electron_mass)
128        ) / abs(ion_charge)
129
130    def _protonated_mz(self, ion_charge):
131        """Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
132
133        Parameters
134        ----------
135        ion_charge : int
136            The ion charge.
137        """
138        return (
139            self.neutral_mass
140            + (ion_charge * Atoms.atomic_masses.get("H"))
141            + (ion_charge * -1 * Atoms.electron_mass)
142        ) / abs(ion_charge)
143
144    def _radical_mz(self, ion_charge):
145        """Get the m/z value of a radical ion version of the molecular formula.
146
147        Parameters
148        ----------
149        ion_charge : int
150            The ion charge.
151        """
152        return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(
153            ion_charge
154        )
155
156    def _neutral_mass(self):
157        """Get the neutral mass of the molecular formula."""
158
159        mass = 0
160
161        for each_atom in self._d_molecular_formula.keys():
162            if each_atom != Labels.ion_type and each_atom != "HC":
163                try:
164                    mass = mass + Atoms.atomic_masses[
165                        each_atom
166                    ] * self._d_molecular_formula.get(each_atom)
167
168                except:
169                    print(Labels.ion_type, each_atom)
170
171        return mass
172
173    def _calc_mz(self):
174        """Get the m/z value of the molecular formula, based on the ion charge and ion type."""
175
176        if self.ion_charge is not None:
177            if self._external_mz:
178                return self._external_mz
179
180            else:
181                ion_type = self._d_molecular_formula.get(Labels.ion_type)
182
183                if ion_type == Labels.protonated_de_ion:
184                    return self.protonated_mz
185
186                elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion:
187                    return self.radical_mz
188
189                elif ion_type == Labels.neutral:
190                    return self.neutral_mass
191
192                elif self.ion_charge == 0:
193                    return self.neutral_mass
194
195                else:
196                    # formula is probably ion form used for bruker ref list
197                    return self.neutral_mass
198
199        else:
200            raise Exception("Please set ion charge first")
201
202    def _calc_assignment_mass_error(self, method="ppm"):
203        """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z.
204
205        Parameters
206        ----------
207        method : str, optional
208            The method to calculate the mass error, by default 'ppm', but can be 'ppb'
209
210        Raises
211        ------
212        Exception
213            If the method is not 'ppm' or 'ppb'.
214        Exception
215            If there is no ms peak associated with the molecular formula instance.
216        """
217
218        if method == "ppm":
219            multi_factor = 1000000
220
221        elif method == "ppb":
222            multi_factor = 1000000
223
224        else:
225            raise Exception(
226                "method needs to be ppm or ppb, you have entered %s" % method
227            )
228
229        if self._mspeak_parent.mz_exp:
230            self._assignment_mass_error = (
231                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
232            ) * multi_factor
233
234            return (
235                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
236            ) * multi_factor
237
238        else:
239            raise Exception(
240                "No ms peak associated with the molecular formula instance %s", self
241            )
242
243    def _calc_mz_confidence(self, mean=0):
244        """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z.
245
246        Parameters
247        ----------
248        mean : int, optional
249            The mean of the m/z error, by default 0
250
251        """
252
253        # predicted std not set, using 0.3
254        if not self._mspeak_parent.predicted_std:
255            self._mspeak_parent.predicted_std = 1.66
256
257        # print( self._mspeak_parent.predicted_std)
258
259        return exp(
260            -1
261            * (
262                power((self.mz_error - mean), 2)
263                / (2 * power(self._mspeak_parent.predicted_std, 2))
264            )
265        )
266
267    def _calc_isotopologue_confidence(self):
268        """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity.
269
270        Returns
271        -------
272        float
273            The isotopologue confidence of the molecular formula.
274        """
275
276        if self.is_isotopologue:
277            # confidence of isotopologue is pure mz error
278            # TODO add more features here
279
280            mformula_index = self.mono_isotopic_formula_index
281            mspeak_index = self.mspeak_index_mono_isotopic
282
283            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
284
285            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
286
287            mono_mz = mspeak[mformula_index].mz_calc
288            mono_abundance = mspeak.abundance
289
290        else:
291            mono_mz = self.mz_calc
292            mono_abundance = self._mspeak_parent.abundance
293
294            expected_isotopologues = self.expected_isotopologues
295            # has isotopologues based on current dinamic range
296
297        if expected_isotopologues:
298            dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]}
299
300            # get reference data
301            for mf in expected_isotopologues:
302                dict_mz_abund_ref["abundance"].append(mf.abundance_calc)
303                dict_mz_abund_ref["mz"].append(mf.mz_calc)
304
305            dict_mz_abund_exp = {mono_mz: mono_abundance}
306
307            # get experimental data
308            for mf in expected_isotopologues:
309                # molecular formula has been assigned to a peak
310                if mf._mspeak_parent:
311                    # stores mspeak abundance
312                    dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance
313
314                else:
315                    # fill missing mz with abundance 0 and mz error score of 0
316                    dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1)
317
318            distance = SpectralSimilarity(
319                dict_mz_abund_exp, dict_mz_abund_ref
320            ).manhattan_distance()
321            correlation = 1 - self.normalize_distance(distance, [0, 2])
322            # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
323            # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
324
325            if correlation == 1:
326                print(dict_mz_abund_exp, dict_mz_abund_ref)
327            if isnan(correlation):
328                # print(dict_mz_abund_exp, dict_mz_abund_ref)
329                correlation = 0.00001
330
331        else:
332            # no isotopologue expected giving a correlation score of 0.0 but it needs optimization
333            correlation = 0.0
334
335        return correlation
336
337    def normalize_distance(self, dist, dist_range):
338        """
339        Normalize the distance value.
340
341        Parameters
342        ----------
343        dist : float
344            The distance value to be normalized.
345        dist_range : list
346            The range of the distance value.
347
348        """
349        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
350
351        if result < 0:
352            result = 0.0
353        elif result > 1:
354            result = 1.0
355
356        return result
357
358    def subtract_formula(self, formula_obj, formated=True):
359        """Subtract a formula from the current formula object
360
361        Parameters
362        ----------
363        formula_obj : MolecularFormula
364            MolecularFormula object to be subtracted from the current formula object
365        formated : bool, optional
366            If True, returns the formula in string format, by default True
367
368        """
369        subtraction = {}
370        for atom, value in self.to_dict().items():
371            if atom != Labels.ion_type:
372                if formula_obj.get(atom):
373                    # value_subtraction = value - formula_obj.get(atom)
374                    if value - formula_obj.get(atom) > 0:
375                        subtraction[atom] = value - formula_obj.get(atom)
376                else:
377                    subtraction[atom] = value
378        if formated:
379            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
380            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
381        else:
382            SUB = str.maketrans("0123456789", "0123456789")
383            SUP = str.maketrans("0123456789", "0123456789")
384        formula_srt = ""
385        for atom in Atoms.atoms_order:
386            if atom in subtraction.keys():
387                formula_srt += atom.translate(SUP) + str(
388                    int(subtraction.get(atom))
389                ).translate(SUB)
390
391        return formula_srt
392
393    def _calc_average_mz_score(self):
394        """Calculate the average m/z error score of the molecular formula identification, including the isotopologues."""
395        if self.is_isotopologue:
396            # confidence of isotopologue is pure mz error
397            # TODO add more features here
398
399            mformula_index = self.mono_isotopic_formula_index
400            mspeak_index = self.mspeak_index_mono_isotopic
401
402            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
403
404            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
405
406        else:
407            expected_isotopologues = self.expected_isotopologues
408            # has isotopologues based on current dinamic range
409
410        accumulated_mz_score = [self.mz_error_score]
411
412        if expected_isotopologues:
413            for mf in expected_isotopologues:
414                # molecular formula has been assigned to a peak
415                if mf._mspeak_parent:
416                    # stores mspeak abundance
417                    accumulated_mz_score.append(mf.mz_error_score)
418                else:
419                    # fill missing mz with abundance 0 and mz error score of 0
420                    accumulated_mz_score.append(0.0)
421
422        average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score)
423
424        if isnan(average_mz_score):
425            average_mz_score = 0.0
426
427        return average_mz_score
428
429    def _calc_confidence_score(self):
430        """Calculate the confidence score of the molecular formula identification, including the isotopologues."""
431
432        ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean
433        #### TODO: Add spectral similarity
434
435        ## Parameters
436        # ----------
437        #### mz_exp:
438        ####    Experimental m/z
439        #### predicted_std:
440        ####    Standart deviation calculated from Resolving power optimization or constant set by User
441
442        isotopologue_correlation = self.isotopologue_similarity
443        average_mz_score = self.average_mz_error_score
444        # add monoisotopic peak mz error score
445
446        # calculate score with higher weight for mass error
447        # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4)
448        a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight
449        b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight
450
451        score = (isotopologue_correlation * b) + (average_mz_score * a)
452
453        # if round(average_mz_score,2) == 0.00:
454        #    print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b)
455
456        return score
457
458    def _calc_abundance_error(self, method="percentile"):
459        """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance.
460
461        Parameters
462        ----------
463        method : str, optional
464            The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb'
465
466        Returns
467        -------
468        float
469            The abundance error of the molecular formula.
470
471        Raises
472        ------
473        Exception
474            If isotopologues were not calculated.
475        """
476
477        mult_factor = 100
478
479        iso_abundance = self._mspeak_parent.abundance
480        mono_abundance = self._mspeak_parent._ms_parent[
481            self.mspeak_index_mono_isotopic
482        ].abundance
483
484        if self.prob_ratio:
485            theor_abundance = mono_abundance * self.prob_ratio
486            # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
487            return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor
488
489        else:
490            raise Exception("Please calc_isotopologues")
491
492    def _calc_area_error(self, method="percentile"):
493        """Calculate the area error of the molecular formula, based on the experimental area and the calculated area.
494
495        Parameters
496        ----------
497        method : str, optional
498            The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb'
499
500        Returns
501        -------
502        float
503            The area error of the molecular formula.
504
505        Raises
506        ------
507        Exception
508            If isotopologues were not calculated.
509        """
510
511        mult_factor = 100
512
513        iso_area = self._mspeak_parent.area
514        mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area
515
516        if self.prob_ratio:
517            if mono_area and iso_area:
518                # exp_ratio = iso_area/mono_area
519
520                area_calc = mono_area * self.prob_ratio
521
522                # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
523                return ((area_calc - iso_area) / area_calc) * mult_factor
524                # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor
525
526            else:
527                # centroid mass spectrum
528                return 0
529        else:
530            raise Exception("Please calc_isotopologues")
531
532    def _calc_aromaticity_index_mod(self):
533        """Calculate the modified aromaticity index of the molecular formula.
534
535        Returns
536        -------
537        float
538            The aromaticity index of the molecular formula.
539
540        Notes
541        -----
542        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
543        corrected in https://doi.org/10.1002/rcm.7433
544        """
545        # Prepare empty dictionary to store the number of atoms of each element
546        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
547
548        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
549        for element in ai_es:
550            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
551            for element_w_iso in elements_w_iso:
552                if element_w_iso in self._d_molecular_formula:
553                    ai_es[element] += self._d_molecular_formula[element_w_iso]
554
555        ai_n = (
556            1
557            + ai_es["C"]
558            - (0.5 * ai_es["O"])
559            - ai_es["S"]
560            - (0.5 * (ai_es["N"] + ai_es["H"]))
561        )
562        ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"]
563
564        ai = ai_n / ai_d
565
566        if ai < 0:
567            ai = 0
568        if ai > 1:
569            ai = 1
570
571        return ai
572
573    def _calc_aromaticity_index(self):
574        """Calculate the aromaticity index of the molecular formula.
575
576        Returns
577        -------
578        float
579            The aromaticity index of the molecular formula.
580
581        Notes
582        -----
583        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
584        corrected in https://doi.org/10.1002/rcm.7433
585        """
586        # Prepare empty dictionary to store the number of atoms of each element
587        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
588
589        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
590        for element in ai_es:
591            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
592            for element_w_iso in elements_w_iso:
593                if element_w_iso in self._d_molecular_formula:
594                    ai_es[element] += self._d_molecular_formula[element_w_iso]
595
596                ai_n = (
597                    1
598                    + ai_es["C"]
599                    - (ai_es["O"])
600                    - ai_es["S"]
601                    - (0.5 * (ai_es["N"] + ai_es["H"]))
602                )
603        ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"]
604
605        ai = ai_n / ai_d
606
607        if ai < 0:
608            ai = 0
609        if ai > 1:
610            ai = 1
611
612        return ai
613
614    def _calc_nosc(self):
615        """Calculate the average nominal oxidation state of carbon
616
617        Returns
618        -------
619        float
620            The average nominal oxidation state of carbon
621
622        Notes
623        -----
624        Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020
625        """
626        # Prepare empty dictionary to store the number of atoms of each element
627        nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0}
628
629        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
630        for element in nosc_es:
631            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
632            for element_w_iso in elements_w_iso:
633                if element_w_iso in self._d_molecular_formula:
634                    nosc_es[element] += self._d_molecular_formula[element_w_iso]
635
636        nosc = (
637            -(
638                (
639                    4 * nosc_es["C"]
640                    + nosc_es["H"]
641                    - 3 * nosc_es["N"]
642                    - 2 * nosc_es["O"]
643                    + 5 * nosc_es["P"]
644                    - 2 * nosc_es["S"]
645                )
646                / nosc_es["C"]
647            )
648            + 4
649        )
650
651        # If nosc is infinite or negative infinity, set it to nan
652        if nosc == float("inf") or nosc == float("-inf"):
653            nosc = float("nan")
654
655        return nosc
656
657    @property
658    def dbe_ai(self):
659        """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens."""
660
661        carbons = self._d_molecular_formula.get("C")
662        hydrogens = self._d_molecular_formula.get("H")
663        oxygens = self._d_molecular_formula.get("O")
664        return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5)
665
666    def _calc_dbe(self):
667        """Calculate the double bond equivalent (DBE) of the molecular formula"""
668
669        individual_dbe = 0
670
671        for atom in self._d_molecular_formula.keys():
672            if atom != Labels.ion_type:
673                n_atom = int(self._d_molecular_formula.get(atom))
674
675                clean_atom = "".join([i for i in atom if not i.isdigit()])
676
677                if self._mspeak_parent:
678                    valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get(
679                        clean_atom
680                    )
681                else:
682                    valencia = MSParameters.molecular_search.used_atom_valences.get(
683                        clean_atom
684                    )
685                # valencia = Atoms.atoms_covalence.get(atom)
686
687                if type(valencia) is tuple:
688                    valencia = valencia[0]
689                if valencia > 0:
690                    # print atom, valencia, n_atom, individual_dbe
691                    individual_dbe = individual_dbe + (n_atom * (valencia - 2))
692                else:
693                    continue
694
695        dbe = 1 + (0.5 * individual_dbe)
696
697        if self.ion_type == Labels.adduct_ion:
698            dbe = dbe + 0.5
699
700        return dbe
701
702    def _calc_kmd(self, dict_base):
703        """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass.
704
705        Parameters
706        ----------
707        dict_base : dict
708            The dictionary of the base formula, e.g. {'C':1, 'H':2}
709
710        Returns
711        -------
712        tuple
713            The tuple of the KMD, Kendrick mass, and nominal Kendrick mass.
714        """
715        mass = 0
716        for atom in dict_base.keys():
717            mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom)
718
719        kendrick_mass = (int(mass) / mass) * self.mz_calc
720
721        nominal_km = int(kendrick_mass)
722
723        kmd = (nominal_km - kendrick_mass) * 100
724
725        # kmd = (nominal_km - km) * 1
726        kmd = round(kmd, 0)
727
728        return kmd, kendrick_mass, nominal_km
729
730    def _cal_isotopologues(
731        self, formula_dict, min_abundance, current_abundance, ms_dynamic_range
732    ):
733        """Calculate the isotopologues for a given molecular formula.
734
735        Parameters
736        ----------
737        formula_dict : dict
738            The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2}
739        min_abundance : float
740            The minimum abundance.
741        current_abundance : float
742            The current monoisotopic abundance.
743        ms_dynamic_range : float
744            The dynamic range.
745
746
747        Notes
748        -----
749        This is the primary function to look for isotopologues based on a monoisotopic molecular formula.
750        It needs to be expanded to include the calculation of resolving power and plot the results.
751        Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion.
752        Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational).
753        It might break when adding non-conventional atoms (not yet tested).
754        This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec.
755
756
757        """
758
759        # last update on 05-26-2020, Yuri E. Corilo
760
761        # updated it to reflect min possible mass peak abundance
762        cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range)
763
764        # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range)
765        # print(cut_off_to_IsoSpeccPy)
766        atoms_labels = (
767            atom
768            for atom in formula_dict.keys()
769            if atom != Labels.ion_type and atom != "H"
770        )
771
772        atoms_count = []
773        masses_list_tuples = []
774        props_list_tuples = []
775        all_atoms_list = []
776
777        for atom_label in atoms_labels:
778            if Atoms.isotopes.get(atom_label)[1][0] is None:
779                "This atom_label has no heavy isotope"
780                atoms_count.append(formula_dict.get(atom_label))
781                mass = Atoms.atomic_masses.get(atom_label)
782                prop = Atoms.isotopic_abundance.get(atom_label)
783                masses_list_tuples.append([mass])
784                props_list_tuples.append([prop])
785                all_atoms_list.append(atom_label)
786
787            else:
788                isotopes_label_list = Atoms.isotopes.get(atom_label)[1]
789
790                if len(isotopes_label_list) > 1:
791                    "This atom_label has two or more heavy isotope"
792                    isotopos_labels = [i for i in isotopes_label_list]
793                else:
794                    "This atom_label only has one heavy isotope"
795                    isotopos_labels = [isotopes_label_list[0]]
796
797                # all_atoms_list.extend(isotopos_labels)
798                isotopos_labels = [atom_label] + isotopos_labels
799
800                all_atoms_list.extend(isotopos_labels)
801
802                masses = [
803                    Atoms.atomic_masses.get(atom_label)
804                    for atom_label in isotopos_labels
805                ]
806                props = [
807                    Atoms.isotopic_abundance.get(atom_label)
808                    for atom_label in isotopos_labels
809                ]
810
811                atoms_count.append(formula_dict.get(atom_label))
812                masses_list_tuples.append(masses)
813                props_list_tuples.append(props)
814        if legacy_isospec:
815            iso = IsoSpecPy.IsoSpec(
816                atoms_count,
817                masses_list_tuples,
818                props_list_tuples,
819                cut_off_to_IsoSpeccPy,
820            )
821            conf = iso.getConfs()
822            masses = conf[0]
823            probs = exp(conf[1])
824            molecular_formulas = conf[2]
825            # print('conf', conf)
826            # print('probs', conf[1])
827        else:
828            # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach
829            iso = IsoSpecPy.IsoTotalProb(
830                atomCounts=atoms_count,
831                isotopeMasses=masses_list_tuples,
832                isotopeProbabilities=props_list_tuples,
833                prob_to_cover=cut_off_to_IsoSpeccPy,
834                get_confs=True,
835            )
836            masses = list(iso.masses)
837            probs = array(list(iso.probs))
838            confs = list(iso.confs)
839
840            molecular_formulas = []
841            for x in confs:
842                tmplist = []
843                for y in x:
844                    tmplist.extend(list(y))
845                molecular_formulas.append(tmplist)
846
847        new_formulas = []
848
849        for isotopologue_index in range(len(iso)):
850            # skip_mono_isotopic
851
852            formula_list = molecular_formulas[isotopologue_index]
853            new_formula_dict = dict(zip(all_atoms_list, formula_list))
854            new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type)
855            if formula_dict.get("H"):
856                new_formula_dict["H"] = formula_dict.get("H")
857
858            new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0})
859
860        # formula_dict in new_formulas check if monoisotopic is being returned
861        if new_formulas:  # and formula_dict in new_formulas:
862            # print(conf)
863            # print(new_formulas)
864            # print(atoms_count)
865            # print(all_atoms_list)
866            # print(masses_list_tuples)
867            # print(props_list_tuples)
868            # find where monoisotopic is
869            index_mono = new_formulas.index(formula_dict)
870            # calculate ratio iso/mono
871            probs = list(probs / probs[index_mono])
872
873            # delete the monoisotopic
874            del probs[index_mono]
875            del new_formulas[index_mono]
876
877            # print('probs_exp', probs)
878            for formulas, prob in zip(new_formulas, probs):
879                theor_abundance = current_abundance * prob
880                if theor_abundance > min_abundance:
881                    # print(prob, theor_abundance, current_abundance)
882                    yield (formulas, prob)
883            # return zip(new_formulas, probs )
884
885        # else:
886        #    return []

Class of calculations related to molecular formula

This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula

Attributes
  • mz_calc (float): The m/z value of the molecular formula.
  • neutral_mass (float): The neutral mass of the molecular formula.
  • ion_charge (int): The ion charge of the molecular formula.
  • _external_mz (float): The externally provided m/z value of the molecular formula.
  • _d_molecular_formula (dict): The dictionary representation of the molecular formula.
  • _mspeak_parent (object): The parent MS peak object associated with the molecular formula.
  • _assignment_mass_error (float): The mass error of the molecular formula.
Methods
  • _calc_resolving_power_low_pressure(B, T) Calculate the resolving power at low pressure.
  • _calc_resolving_power_high_pressure(B, T) Calculate the resolving power at high pressure.
  • _adduct_mz(adduct_atom, ion_charge) Get the m/z value of an adducted ion version of the molecular formula.
  • _protonated_mz(ion_charge) Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
  • _radical_mz(ion_charge) Get the m/z value of a radical ion version of the molecular formula.
  • _neutral_mass() Get the neutral mass of the molecular formula.
  • _calc_mz() Get the m/z value of the molecular formula.
  • _calc_assignment_mass_error(method='ppm') Calculate the mass error of the molecular formula.
  • _calc_mz_confidence(mean=0) Calculate the m/z confidence of the molecular formula.
  • _calc_isotopologue_confidence() Calculate the isotopologue confidence of the molecular formula.
  • normalize_distance(dist, dist_range) Normalize the distance value.
  • subtract_formula(formula_obj, formated=True) Subtract a formula from the current formula object.
  • _calc_average_mz_score() Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
def normalize_distance(self, dist, dist_range):
337    def normalize_distance(self, dist, dist_range):
338        """
339        Normalize the distance value.
340
341        Parameters
342        ----------
343        dist : float
344            The distance value to be normalized.
345        dist_range : list
346            The range of the distance value.
347
348        """
349        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
350
351        if result < 0:
352            result = 0.0
353        elif result > 1:
354            result = 1.0
355
356        return result

Normalize the distance value.

Parameters
  • dist (float): The distance value to be normalized.
  • dist_range (list): The range of the distance value.
def subtract_formula(self, formula_obj, formated=True):
358    def subtract_formula(self, formula_obj, formated=True):
359        """Subtract a formula from the current formula object
360
361        Parameters
362        ----------
363        formula_obj : MolecularFormula
364            MolecularFormula object to be subtracted from the current formula object
365        formated : bool, optional
366            If True, returns the formula in string format, by default True
367
368        """
369        subtraction = {}
370        for atom, value in self.to_dict().items():
371            if atom != Labels.ion_type:
372                if formula_obj.get(atom):
373                    # value_subtraction = value - formula_obj.get(atom)
374                    if value - formula_obj.get(atom) > 0:
375                        subtraction[atom] = value - formula_obj.get(atom)
376                else:
377                    subtraction[atom] = value
378        if formated:
379            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
380            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
381        else:
382            SUB = str.maketrans("0123456789", "0123456789")
383            SUP = str.maketrans("0123456789", "0123456789")
384        formula_srt = ""
385        for atom in Atoms.atoms_order:
386            if atom in subtraction.keys():
387                formula_srt += atom.translate(SUP) + str(
388                    int(subtraction.get(atom))
389                ).translate(SUB)
390
391        return formula_srt

Subtract a formula from the current formula object

Parameters
  • formula_obj (MolecularFormula): MolecularFormula object to be subtracted from the current formula object
  • formated (bool, optional): If True, returns the formula in string format, by default True
dbe_ai

Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.