corems.molecular_formula.factory.MolecularFormulaFactory

  1import re
  2
  3from corems.encapsulation.constant import Atoms, Labels
  4from corems.molecular_formula.calc.MolecularFormulaCalc import MolecularFormulaCalc
  5
  6__author__ = "Yuri E. Corilo"
  7__date__ = "Jun 24, 2019"
  8
  9
 10class MolecularFormulaBase(MolecularFormulaCalc):
 11    """Base class for representing a molecular formula.
 12
 13    Parameters
 14    ----------
 15    molecular_formula : dict, list, str
 16        The molecular formula.
 17    ion_charge : int
 18        The ion charge.
 19    ion_type : str, optional
 20        The ion type. Defaults to None.
 21    adduct_atom : str, optional
 22        The adduct atom. Defaults to None.
 23    mspeak_parent : _MSPeak, optional
 24        The parent mass spectrum peak object instance. Defaults to None.
 25    external_mz : float, optional
 26        The external m/z value. Defaults to None.
 27
 28    Raises
 29    ------
 30    TypeError
 31        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
 32
 33    Attributes
 34    ----------
 35    isotopologue_count_percentile : float
 36        The isotopologue count percentile.
 37    O_C : float
 38        The O/C ratio.
 39    H_C : float
 40        The H/C ratio.
 41    dbe : float
 42        The double bond equivalent.
 43    mz_nominal_calc : int
 44        The nominal m/z value.
 45    mz_error : float
 46        The m/z error.
 47    mz_calc : float
 48        The m/z value.
 49    protonated_mz : float
 50        The protonated or deprotonated m/z value.
 51    radical_mz : float
 52        The radical m/z value.
 53    neutral_mass : float
 54        The neutral mass.
 55    ion_type : str
 56        The ion type.
 57    ion_charge : int
 58        The ion charge.
 59    atoms : list
 60        The atoms in the molecular formula.
 61    confidence_score : float
 62        The confidence score of the molecular formula identification.
 63    isotopologue_similarity : float
 64        The isotopologue similarity score of the molecular formula identification.
 65    average_mz_error_score : float
 66        The average m/z error score of the molecular formula identification, including the isotopologues.
 67    mz_error_score : float
 68        The m/z error score of the molecular formula identification.
 69    kmd : float
 70        The Kendrick mass defect (KMD).
 71    kendrick_mass : float
 72        The Kendrick mass.
 73    knm : float
 74        The nominal Kendrick mass.
 75    string : str
 76        The molecular formula string.
 77    string_formated : str
 78        The molecular formula string formated with subscripts and superscripts.
 79    class_label : str
 80        The class label.
 81    class_dict : dict
 82        The class dictionary.
 83
 84    Methods
 85    -------
 86    * change_kendrick_base(kendrick_dict_base).
 87        Change the Kendrick base.
 88    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
 89        Calculate the isotopologues.
 90    * atoms_qnt(atom).
 91        Get the atom quantity.
 92    * atoms_symbol(atom).
 93        Get the atom symbol without the mass number.
 94    * to_dict().
 95        Get the molecular formula as a dictionary.
 96    * to_list().
 97        Get the molecular formula as a list.
 98    """
 99
100    def __init__(
101        self,
102        molecular_formula,
103        ion_charge,
104        ion_type=None,
105        adduct_atom=None,
106        mspeak_parent=None,
107        external_mz=None,
108    ):
109        # clear dictionary of atoms with 0 value
110        if type(molecular_formula) is dict:
111            self._from_dict(molecular_formula, ion_type, adduct_atom)
112
113        elif type(molecular_formula) is list:
114            self._from_list(molecular_formula, ion_type, adduct_atom)
115
116        elif type(molecular_formula) is str:
117            self._from_str(molecular_formula, ion_type, adduct_atom)
118
119        self._ion_charge = ion_charge
120        self._external_mz = external_mz
121        self._confidence_score = None
122        self._isotopologue_similarity = None
123        self._mz_error_score = None
124        self._mass_error_average_score = None
125
126        self.is_isotopologue = False
127
128        # parent mass spectrum peak obj instance
129        self._mspeak_parent = mspeak_parent
130
131        self.expected_isotopologues = []
132        self.mspeak_mf_isotopologues_indexes = []
133
134        if self._mspeak_parent:
135            kendrick_dict_base = (
136                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
137            )
138        else:
139            kendrick_dict_base = {"C": 1, "H": 2}
140        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
141            kendrick_dict_base
142        )
143
144    def __repr__(self):
145        return "MolecularFormula({0},{1},ion type = {2}".format(
146            self._d_molecular_formula, self.ion_charge, self.ion_type
147        )
148
149    def __str__(self):
150        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(
151            self.string, self.ion_charge, self.ion_type, self.mz_calc
152        )
153
154    def __len__(self):
155        # crash if keys are not ordered
156        return len(self._d_molecular_formula.keys())
157
158    def __getitem__(self, atom):
159        # atom = list(self._d_molecular_formula.keys())[position]
160        if atom in self._d_molecular_formula.keys():
161            return self._d_molecular_formula[atom]
162        else:
163            return 0
164
165    def get(self, atom):
166        """Get the atom quantity of a specific atom.
167
168        Parameters
169        ----------
170        atom : str
171            The atom symbol.
172
173        Returns
174        -------
175        int
176            The atom quantity.
177        """
178        # atom = list(self._d_molecular_formula.keys())[position]
179        if atom in self._d_molecular_formula.keys():
180            return self._d_molecular_formula[atom]
181        else:
182            return 0
183
184    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
185        self._d_molecular_formula = {
186            key: val for key, val in molecular_formula.items() if val != 0
187        }
188
189        if ion_type is not None:
190            self._d_molecular_formula[Labels.ion_type] = ion_type
191
192        if adduct_atom:
193            if adduct_atom in self._d_molecular_formula:
194                self._d_molecular_formula[adduct_atom] += 1
195            else:
196                self._d_molecular_formula[adduct_atom] = 1
197        self.adduct_atom = adduct_atom
198
199    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
200        # list has to be in the format
201        # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]
202        self._d_molecular_formula = {}
203        for each in range(0, len(molecular_formula_list), 2):
204            atoms_label = molecular_formula_list[each]
205            atoms_count = int(molecular_formula_list[each + 1])
206
207            if atoms_count > 0:
208                self._d_molecular_formula[atoms_label] = int(atoms_count)
209
210        self._d_molecular_formula[Labels.ion_type] = ion_type
211        if adduct_atom:
212            self.adduct_atom = adduct_atom
213            if adduct_atom in self._d_molecular_formula:
214                self._d_molecular_formula[adduct_atom] += 1
215            else:
216                self._d_molecular_formula[adduct_atom] = 1
217        else:
218            self.adduct_atom = None
219
220    def _from_str(self, molecular_formula_str, ion_type, adduct_atom):
221        # string has to be in the format
222        #'C10 H21 13C1 Cl1 37Cl1 etc'
223        # Check if there are spaces in the string
224        if " " not in molecular_formula_str:
225            raise ValueError(
226                "The molecular formula string should have spaces, input: %s"
227                % molecular_formula_str
228            )
229
230        # Split the string by spaces
231        # Grab the text before a digit for each element after splitting on spaces (atoms)
232        elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()]
233        # Grab the digits at the end of each element after splitting on spaces (counts)
234        counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()]
235        # Check that the number of elements and counts are the same
236        if len(elements) != len(counts):
237            raise ValueError(
238                "The number of elements and counts do not match, input: %s"
239                % molecular_formula_str
240            )
241
242        # Create a dictionary from the elements and counts and add it to the molecular formula
243        dict_ = dict(zip(elements, counts))
244        # Cast counts to integers
245        dict_ = {key: int(val) for key, val in dict_.items()}
246        self._from_dict(dict_, ion_type, adduct_atom)
247
248    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
249        """Splits the molecular formula string.
250
251        Parameters
252        ----------
253        delimiters : list
254            The list of delimiters.
255        string : str
256            The molecular formula string.
257        maxsplit : int, optional
258            The maximum number of splits. Defaults to 0.
259
260        Returns
261        -------
262        list
263            The molecular formula list.
264
265        Notes
266        -----
267        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
268        """
269        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
270        isotopes = re.findall(regexPattern, string)  # pragma: no cover
271        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
272
273        return [isotopes[0], int(counts[1])]
274
275    @property
276    def isotopologue_count_percentile(
277        self,
278    ):
279        if not len(self.expected_isotopologues) == 0:
280            return (
281                len(self.mspeak_mf_isotopologues_indexes)
282                / len(self.expected_isotopologues)
283            ) * 100
284        else:
285            return 100
286
287    @property
288    def O_C(self):
289        if "O" in self._d_molecular_formula.keys():
290            # gather all the Os and Hs, regardless of the isotopic composition
291            Os = sum(
292                [
293                    self._d_molecular_formula.get(key)
294                    for key in ["O"] + Atoms.isotopes["O"][1]
295                    if key in self._d_molecular_formula.keys()
296                ]
297            )
298            Cs = sum(
299                [
300                    self._d_molecular_formula.get(key)
301                    for key in ["C"] + Atoms.isotopes["C"][1]
302                    if key in self._d_molecular_formula.keys()
303                ]
304            )
305            return Os / Cs
306        else:
307            return 0
308
309    @property
310    def H_C(self):
311        # gather all the Cs and Hs, regardless of the isotopic composition
312        Cs = sum(
313            [
314                self._d_molecular_formula.get(key)
315                for key in ["C"] + Atoms.isotopes["C"][1]
316                if key in self._d_molecular_formula.keys()
317            ]
318        )
319        Hs = sum(
320            [
321                self._d_molecular_formula.get(key)
322                for key in ["H"] + Atoms.isotopes["H"][1]
323                if key in self._d_molecular_formula.keys()
324            ]
325        )
326        return Hs / Cs
327
328    @property
329    def A_I(self):
330        """Aromaticity index"""
331        return self._calc_aromaticity_index()
332
333    @property
334    def A_I_mod(self):
335        """Modified aromaticity index"""
336        return self._calc_aromaticity_index_mod()
337
338    @property
339    def nosc(self):
340        """Nominal oxidation state of carbon"""
341        return self._calc_nosc()
342
343    @property
344    def dbe(self):
345        return self._calc_dbe()
346
347    @property
348    def mz_nominal_calc(self):
349        return int(self._calc_mz())
350
351    @property
352    def mz_error(self):
353        return self._calc_assignment_mass_error()
354
355    @property
356    def mz_calc(self):
357        return self._calc_mz()
358
359    @property
360    def protonated_mz(self):
361        return self._protonated_mz(self.ion_charge)
362
363    @property
364    def radical_mz(self):
365        return self._radical_mz(self.ion_charge)
366
367    @property
368    def neutral_mass(self):
369        return self._neutral_mass()
370
371    def adduct_mz(self, adduct_atom):
372        """Get m/z of an adducted ion version of the molecular formula.
373
374        Parameters
375        ----------
376        adduct_atom : str
377            The adduct atom.
378
379        Returns
380        -------
381        float
382            The m/z value of the adducted ion version of the molecular formula.
383        """
384        return self._adduct_mz(adduct_atom, self.ion_charge)
385
386    @property
387    def ion_type(self):
388        ion_type = self._d_molecular_formula.get(Labels.ion_type)
389        if ion_type == Labels.protonated_de_ion:
390            if self.ion_charge > 0:
391                return Labels.protonated
392            else:
393                return Labels.de_protonated
394        else:
395            return ion_type
396
397    @ion_type.setter
398    def ion_type(self, ion_type):
399        if ion_type in [
400            Labels.protonated_de_ion,
401            Labels.adduct_ion,
402            Labels.radical_ion,
403        ]:
404            self._d_molecular_formula[Labels.ion_type] = ion_type
405        else:
406            raise TypeError(
407                "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"
408                % ion_type
409            )
410
411    @property
412    def ion_charge(self):
413        return self._ion_charge
414
415    @property
416    def atoms(self):
417        """Get the atoms in the molecular formula."""
418        # if there is an adduct_atom, them reduce it from the atoms list
419        if self.adduct_atom is None:
420            return [
421                key
422                for key in self._d_molecular_formula.keys()
423                if key != Labels.ion_type
424            ]
425        else:
426            temp_dict = self._d_molecular_formula.copy()
427            temp_dict[self.adduct_atom] -= 1
428            return [
429                key
430                for key, val in temp_dict.items()
431                if key != Labels.ion_type and val > 0
432            ]
433
434    @property
435    def confidence_score(self):
436        if not self._confidence_score:
437            self._confidence_score = self._calc_confidence_score()
438
439        return self._confidence_score
440
441    @property
442    def isotopologue_similarity(self):
443        if not self._isotopologue_similarity:
444            self._isotopologue_similarity = self._calc_isotopologue_confidence()
445
446        return self._isotopologue_similarity
447
448    @property
449    def average_mz_error_score(self):
450        # includes the isotopologues
451
452        if not self._mass_error_average_score:
453            self._mass_error_average_score = self._calc_average_mz_score()
454
455        return self._mass_error_average_score
456
457    @property
458    def mz_error_score(self):
459        if not self._mz_error_score:
460            self._mz_error_score = self._calc_mz_confidence()
461
462        return self._mz_error_score
463
464    @property
465    def kmd(self):
466        return self._kmd
467
468    @property
469    def kendrick_mass(self):
470        return self._kendrick_mass
471
472    @property
473    def knm(self):
474        return self._nominal_km
475
476    def change_kendrick_base(self, kendrick_dict_base):
477        """Change the Kendrick base.
478
479        Parameters
480        ----------
481        kendrick_dict_base : dict
482            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
483        """
484        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
485            kendrick_dict_base
486        )
487
488    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
489        """Calculate the isotopologues for a given molecular formula.
490
491        Parameters
492        ----------
493        min_abundance : float
494            The minimum abundance.
495        current_mono_abundance : float
496            The current monoisotopic abundance.
497        dynamic_range : float
498            The dynamic range.
499
500        Yields
501        ------
502        MolecularFormulaIsotopologue
503            The molecular formula isotopologue.
504
505        Notes
506        -----
507        This calculation ignores the hydrogen isotopes.
508        """
509        isotopologues = []
510        for mf in self._cal_isotopologues(
511            self._d_molecular_formula,
512            min_abundance,
513            current_mono_abundance,
514            dynamic_range,
515        ):
516            isotopologues.append(mf)
517
518        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
519        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
520
521        for mf in sorted_isotopologues:
522            yield MolecularFormulaIsotopologue(
523                *mf,
524                current_mono_abundance,
525                self.ion_charge,
526                ion_type=self.ion_type,
527                adduct_atom=self.adduct_atom,
528            )
529
530    def atoms_qnt(self, atom):
531        """Get the atom quantity of a specific atom in the molecular formula."""
532        if atom in self._d_molecular_formula:
533            return self._d_molecular_formula.get(atom)
534        else:
535            raise Warning(
536                "Could not find %s in this Molecular Formula object" % str(atom)
537            )
538
539    def atoms_symbol(self, atom):
540        """Get the atom symbol without the mass number."""
541        return "".join([i for i in atom if not i.isdigit()])
542
543    @property
544    def string(self):
545        """Returns the molecular formula as a string."""
546        if self._d_molecular_formula:
547            if self.adduct_atom is None:
548                mol_form_dict = self._d_molecular_formula
549            else:
550                mol_form_dict = self._d_molecular_formula.copy()
551                if self.adduct_atom not in mol_form_dict.keys():
552                    raise Exception("Adduct atom not found in molecular formula dict")
553                mol_form_dict[self.adduct_atom] -= 1
554                mol_form_dict = {
555                    key: val for key, val in mol_form_dict.items() if val != 0
556                }
557            formula_srt = ""
558            for atom in Atoms.atoms_order:
559                if atom in mol_form_dict.keys():
560                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + " "
561            return formula_srt.strip()
562
563        else:
564            raise Exception("Molecular formula identification not performed yet")
565
566    @property
567    def string_formated(self):
568        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
569        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
570
571        if self._d_molecular_formula:
572            formula_srt = ""
573            for atom in Atoms.atoms_order:
574                if atom in self.to_dict().keys():
575                    formula_srt += atom.translate(SUP) + str(
576                        int(self.to_dict().get(atom))
577                    ).translate(SUB)
578            return formula_srt
579
580        else:
581            raise Exception("Molecular formula identification not performed yet")
582
583    def to_dict(self):
584        """Returns the molecular formula as a dictionary.
585
586        Returns
587        -------
588        dict
589            The molecular formula as a dictionary.
590        """
591        return self._d_molecular_formula
592
593    def to_list(self):
594        """Returns the molecular formula as a list.
595
596        Returns
597        -------
598        list
599            The molecular formula as a list.
600
601        Raises
602        ------
603        Exception
604            If the molecular formula identification was not performed yet.
605        """
606        # TODO ensure self._d_molecular_formula is a orderedDict
607
608        if self._d_molecular_formula:
609            formula_list = []
610
611            for atom, atom_number in self._d_molecular_formula.items():
612                if atom != Labels.ion_type:
613                    formula_list.append(atom)
614                    formula_list.append(atom_number)
615
616            return formula_list
617        else:
618            raise Exception("Molecular formula identification not performed yet")
619
620    @property
621    def class_label(self):
622        if self._d_molecular_formula:
623            formulalist = self.to_list()
624            classstring = ""
625
626            for each in range(0, len(formulalist), 2):
627                if (
628                    formulalist[each] != "C"
629                    and formulalist[each] != "H"
630                    and formulalist[each] != "HC"
631                ):
632                    classstring = (
633                        classstring
634                        + str(formulalist[each])
635                        + str(formulalist[each + 1])
636                        + " "
637                    )
638
639            if classstring == "":
640                classstring = "HC"
641
642            classstring = classstring.strip()
643
644            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:
645                return classstring + " -R"
646
647            # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:
648
649            #    return classstring + ' -A'
650
651            else:
652                return classstring
653
654            #'dict, tuple or string'
655
656        else:
657            raise Exception("Molecular formula identification not performed yet")
658
659    @property
660    def class_dict(self):
661        if self._d_molecular_formula:
662            class_dict = {}
663
664            for atom, qnt in self._d_molecular_formula.items():
665                if atom != Labels.ion_type and atom != "C" and atom != "H":
666                    class_dict[atom] = qnt
667
668            return class_dict
669
670        raise Exception("Molecular formula identification not performed yet")
671
672
673class MolecularFormulaIsotopologue(MolecularFormulaBase):
674    """Class for representing a molecular formula isotopologue.
675
676    Parameters
677    ----------
678    _d_molecular_formula : dict
679        The molecular formula as a dictionary.
680    prob_ratio : float
681        The probability ratio.
682    mono_abundance : float
683        The monoisotopic abundance.
684    ion_charge : int
685        The ion charge.
686    mspeak_parent : object, optional
687        The parent mass spectrum peak object instance. Defaults to None.
688    ion_type : str, optional
689        The ion type. Defaults to None.
690    adduct_atom : str, optional
691        The adduct atom. Defaults to None.
692
693    Attributes
694    ----------
695    prob_ratio : float
696        The probability ratio.
697    abundance_calc : float
698        The calculated abundance.
699    area_error : float
700        The area error.
701    abundance_error : float
702        The abundance error.
703    is_isotopologue : bool
704        The isotopologue flag. Defaults to True.
705    mspeak_index_mono_isotopic : int
706        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
707    mono_isotopic_formula_index : int
708        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
709    """
710
711    def __init__(
712        self,
713        _d_molecular_formula,
714        prob_ratio,
715        mono_abundance,
716        ion_charge,
717        mspeak_parent=None,
718        ion_type=None,
719        adduct_atom=None,
720    ):
721        if ion_type is None:
722            # check if ion type or adduct_atom is in the molecular formula dict
723            if Labels.ion_type in _d_molecular_formula:
724                ion_type = _d_molecular_formula.get(Labels.ion_type)
725            else:
726                ion_type = None
727        else:
728            ion_type = Labels.ion_type_translate.get(ion_type)
729
730        if ion_type == Labels.adduct_ion:
731            adduct_atom_int = None
732            if adduct_atom in _d_molecular_formula.keys():
733                adduct_atom_int = adduct_atom
734            else:
735                # Check to see if adduct_atom should actually be an isotope of the adduct atom
736                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
737                    if adduct_iso in _d_molecular_formula.keys():
738                        adduct_atom_int = adduct_iso
739            adduct_atom = adduct_atom_int
740            if adduct_atom is None:
741                raise Exception("adduct_atom is required for adduct ion")
742            _d_molecular_formula[adduct_atom] -= 1
743            _d_molecular_formula = {
744                key: val for key, val in _d_molecular_formula.items() if val != 0
745            }
746
747        super().__init__(
748            molecular_formula=_d_molecular_formula,
749            ion_charge=ion_charge,
750            ion_type=ion_type,
751            adduct_atom=adduct_atom,
752        )
753        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
754
755        self.prob_ratio = prob_ratio
756
757        self.abundance_calc = mono_abundance * prob_ratio
758
759        self.is_isotopologue = True
760
761        self.mspeak_index_mono_isotopic = None
762
763        self.mono_isotopic_formula_index = None
764        # parent mass spectrum peak obj instance
765        self._mspeak_parent = mspeak_parent
766
767    @property
768    def area_error(self):
769        return self._calc_area_error()
770
771    @property
772    def abundance_error(self):
773        return self._calc_abundance_error()
774
775
776class LCMSLibRefMolecularFormula(MolecularFormulaBase):
777    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
778
779    Parameters
780    ----------
781    molecular_formula : dict, list, str
782        The molecular formula.
783    ion_charge : int
784        The ion charge.
785    ion_type : str, optional
786        The ion type. Defaults to None.
787    adduct_atom : str, optional
788        The adduct atom. Defaults to None.
789    mspeak_parent : object, optional
790        The parent mass spectrum peak object instance. Defaults to None.
791    name : str, optional
792        The name of the reference molecule. Defaults to None.
793    kegg_id : str, optional
794        The KEGG ID of the reference molecule. Defaults to None.
795    cas : str, optional
796        The CAS number of the reference molecule. Defaults to None.
797
798    """
799
800    def __init__(
801        self,
802        molecular_formula,
803        ion_charge,
804        ion_type=None,
805        adduct_atom=None,
806        mspeak_parent=None,
807        name=None,
808        kegg_id=None,
809        cas=None,
810    ) -> None:
811        super().__init__(
812            molecular_formula,
813            ion_charge,
814            ion_type=ion_type,
815            adduct_atom=adduct_atom,
816            mspeak_parent=mspeak_parent,
817        )
818
819        self._name = name
820        self._kegg_id = kegg_id
821        self._cas = cas
822
823    @property
824    def name(self):
825        return self._name
826
827    @name.setter
828    def name(self, name):
829        if isinstance(name, str):
830            self._name = name
831        else:
832            raise TypeError("name: {} should be type string")
833
834    @property
835    def kegg_id(self):
836        return self._kegg_id
837
838    @kegg_id.setter
839    def kegg_id(self, kegg_id):
840        self._kegg_id = kegg_id
841        # if isinstance(kegg_id, str):
842        #    self._kegg_id = kegg_id
843        # else:
844        #    print(kegg_id)
845        #    raise TypeError('name: {} should be type string')
846
847    @property
848    def cas(self):
849        return self._cas
850
851    @cas.setter
852    def cas(self, cas):
853        self._cas = cas
854        # if isinstance(cas, str):
855        #    self._cas = cas
856        # else:
857        #    raise TypeError('name: {} should be type string')
858
859
860class MolecularFormula(MolecularFormulaBase):
861    """General class for representing a molecular formula.
862
863    Parameters
864    ----------
865    molecular_formula : dict, list, str
866        The molecular formula.
867    ion_charge : int
868        The ion charge.
869    ion_type : str, optional
870        The ion type. Defaults to None.
871    adduct_atom : str, optional
872        The adduct atom. Defaults to None.
873    mspeak_parent : object, optional
874        The parent mass spectrum peak object instance. Defaults to None.
875    external_mz : float, optional
876        The external m/z value. Defaults to False.
877    """
878
879    def __init__(
880        self,
881        molecular_formula,
882        ion_charge,
883        ion_type=None,
884        adduct_atom=None,
885        mspeak_parent=None,
886        external_mz=False,
887    ):
888        super().__init__(
889            molecular_formula,
890            ion_charge,
891            ion_type=ion_type,
892            adduct_atom=adduct_atom,
893            mspeak_parent=mspeak_parent,
894            external_mz=external_mz,
895        )
 11class MolecularFormulaBase(MolecularFormulaCalc):
 12    """Base class for representing a molecular formula.
 13
 14    Parameters
 15    ----------
 16    molecular_formula : dict, list, str
 17        The molecular formula.
 18    ion_charge : int
 19        The ion charge.
 20    ion_type : str, optional
 21        The ion type. Defaults to None.
 22    adduct_atom : str, optional
 23        The adduct atom. Defaults to None.
 24    mspeak_parent : _MSPeak, optional
 25        The parent mass spectrum peak object instance. Defaults to None.
 26    external_mz : float, optional
 27        The external m/z value. Defaults to None.
 28
 29    Raises
 30    ------
 31    TypeError
 32        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
 33
 34    Attributes
 35    ----------
 36    isotopologue_count_percentile : float
 37        The isotopologue count percentile.
 38    O_C : float
 39        The O/C ratio.
 40    H_C : float
 41        The H/C ratio.
 42    dbe : float
 43        The double bond equivalent.
 44    mz_nominal_calc : int
 45        The nominal m/z value.
 46    mz_error : float
 47        The m/z error.
 48    mz_calc : float
 49        The m/z value.
 50    protonated_mz : float
 51        The protonated or deprotonated m/z value.
 52    radical_mz : float
 53        The radical m/z value.
 54    neutral_mass : float
 55        The neutral mass.
 56    ion_type : str
 57        The ion type.
 58    ion_charge : int
 59        The ion charge.
 60    atoms : list
 61        The atoms in the molecular formula.
 62    confidence_score : float
 63        The confidence score of the molecular formula identification.
 64    isotopologue_similarity : float
 65        The isotopologue similarity score of the molecular formula identification.
 66    average_mz_error_score : float
 67        The average m/z error score of the molecular formula identification, including the isotopologues.
 68    mz_error_score : float
 69        The m/z error score of the molecular formula identification.
 70    kmd : float
 71        The Kendrick mass defect (KMD).
 72    kendrick_mass : float
 73        The Kendrick mass.
 74    knm : float
 75        The nominal Kendrick mass.
 76    string : str
 77        The molecular formula string.
 78    string_formated : str
 79        The molecular formula string formated with subscripts and superscripts.
 80    class_label : str
 81        The class label.
 82    class_dict : dict
 83        The class dictionary.
 84
 85    Methods
 86    -------
 87    * change_kendrick_base(kendrick_dict_base).
 88        Change the Kendrick base.
 89    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
 90        Calculate the isotopologues.
 91    * atoms_qnt(atom).
 92        Get the atom quantity.
 93    * atoms_symbol(atom).
 94        Get the atom symbol without the mass number.
 95    * to_dict().
 96        Get the molecular formula as a dictionary.
 97    * to_list().
 98        Get the molecular formula as a list.
 99    """
100
101    def __init__(
102        self,
103        molecular_formula,
104        ion_charge,
105        ion_type=None,
106        adduct_atom=None,
107        mspeak_parent=None,
108        external_mz=None,
109    ):
110        # clear dictionary of atoms with 0 value
111        if type(molecular_formula) is dict:
112            self._from_dict(molecular_formula, ion_type, adduct_atom)
113
114        elif type(molecular_formula) is list:
115            self._from_list(molecular_formula, ion_type, adduct_atom)
116
117        elif type(molecular_formula) is str:
118            self._from_str(molecular_formula, ion_type, adduct_atom)
119
120        self._ion_charge = ion_charge
121        self._external_mz = external_mz
122        self._confidence_score = None
123        self._isotopologue_similarity = None
124        self._mz_error_score = None
125        self._mass_error_average_score = None
126
127        self.is_isotopologue = False
128
129        # parent mass spectrum peak obj instance
130        self._mspeak_parent = mspeak_parent
131
132        self.expected_isotopologues = []
133        self.mspeak_mf_isotopologues_indexes = []
134
135        if self._mspeak_parent:
136            kendrick_dict_base = (
137                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
138            )
139        else:
140            kendrick_dict_base = {"C": 1, "H": 2}
141        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
142            kendrick_dict_base
143        )
144
145    def __repr__(self):
146        return "MolecularFormula({0},{1},ion type = {2}".format(
147            self._d_molecular_formula, self.ion_charge, self.ion_type
148        )
149
150    def __str__(self):
151        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(
152            self.string, self.ion_charge, self.ion_type, self.mz_calc
153        )
154
155    def __len__(self):
156        # crash if keys are not ordered
157        return len(self._d_molecular_formula.keys())
158
159    def __getitem__(self, atom):
160        # atom = list(self._d_molecular_formula.keys())[position]
161        if atom in self._d_molecular_formula.keys():
162            return self._d_molecular_formula[atom]
163        else:
164            return 0
165
166    def get(self, atom):
167        """Get the atom quantity of a specific atom.
168
169        Parameters
170        ----------
171        atom : str
172            The atom symbol.
173
174        Returns
175        -------
176        int
177            The atom quantity.
178        """
179        # atom = list(self._d_molecular_formula.keys())[position]
180        if atom in self._d_molecular_formula.keys():
181            return self._d_molecular_formula[atom]
182        else:
183            return 0
184
185    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
186        self._d_molecular_formula = {
187            key: val for key, val in molecular_formula.items() if val != 0
188        }
189
190        if ion_type is not None:
191            self._d_molecular_formula[Labels.ion_type] = ion_type
192
193        if adduct_atom:
194            if adduct_atom in self._d_molecular_formula:
195                self._d_molecular_formula[adduct_atom] += 1
196            else:
197                self._d_molecular_formula[adduct_atom] = 1
198        self.adduct_atom = adduct_atom
199
200    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
201        # list has to be in the format
202        # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]
203        self._d_molecular_formula = {}
204        for each in range(0, len(molecular_formula_list), 2):
205            atoms_label = molecular_formula_list[each]
206            atoms_count = int(molecular_formula_list[each + 1])
207
208            if atoms_count > 0:
209                self._d_molecular_formula[atoms_label] = int(atoms_count)
210
211        self._d_molecular_formula[Labels.ion_type] = ion_type
212        if adduct_atom:
213            self.adduct_atom = adduct_atom
214            if adduct_atom in self._d_molecular_formula:
215                self._d_molecular_formula[adduct_atom] += 1
216            else:
217                self._d_molecular_formula[adduct_atom] = 1
218        else:
219            self.adduct_atom = None
220
221    def _from_str(self, molecular_formula_str, ion_type, adduct_atom):
222        # string has to be in the format
223        #'C10 H21 13C1 Cl1 37Cl1 etc'
224        # Check if there are spaces in the string
225        if " " not in molecular_formula_str:
226            raise ValueError(
227                "The molecular formula string should have spaces, input: %s"
228                % molecular_formula_str
229            )
230
231        # Split the string by spaces
232        # Grab the text before a digit for each element after splitting on spaces (atoms)
233        elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()]
234        # Grab the digits at the end of each element after splitting on spaces (counts)
235        counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()]
236        # Check that the number of elements and counts are the same
237        if len(elements) != len(counts):
238            raise ValueError(
239                "The number of elements and counts do not match, input: %s"
240                % molecular_formula_str
241            )
242
243        # Create a dictionary from the elements and counts and add it to the molecular formula
244        dict_ = dict(zip(elements, counts))
245        # Cast counts to integers
246        dict_ = {key: int(val) for key, val in dict_.items()}
247        self._from_dict(dict_, ion_type, adduct_atom)
248
249    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
250        """Splits the molecular formula string.
251
252        Parameters
253        ----------
254        delimiters : list
255            The list of delimiters.
256        string : str
257            The molecular formula string.
258        maxsplit : int, optional
259            The maximum number of splits. Defaults to 0.
260
261        Returns
262        -------
263        list
264            The molecular formula list.
265
266        Notes
267        -----
268        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
269        """
270        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
271        isotopes = re.findall(regexPattern, string)  # pragma: no cover
272        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
273
274        return [isotopes[0], int(counts[1])]
275
276    @property
277    def isotopologue_count_percentile(
278        self,
279    ):
280        if not len(self.expected_isotopologues) == 0:
281            return (
282                len(self.mspeak_mf_isotopologues_indexes)
283                / len(self.expected_isotopologues)
284            ) * 100
285        else:
286            return 100
287
288    @property
289    def O_C(self):
290        if "O" in self._d_molecular_formula.keys():
291            # gather all the Os and Hs, regardless of the isotopic composition
292            Os = sum(
293                [
294                    self._d_molecular_formula.get(key)
295                    for key in ["O"] + Atoms.isotopes["O"][1]
296                    if key in self._d_molecular_formula.keys()
297                ]
298            )
299            Cs = sum(
300                [
301                    self._d_molecular_formula.get(key)
302                    for key in ["C"] + Atoms.isotopes["C"][1]
303                    if key in self._d_molecular_formula.keys()
304                ]
305            )
306            return Os / Cs
307        else:
308            return 0
309
310    @property
311    def H_C(self):
312        # gather all the Cs and Hs, regardless of the isotopic composition
313        Cs = sum(
314            [
315                self._d_molecular_formula.get(key)
316                for key in ["C"] + Atoms.isotopes["C"][1]
317                if key in self._d_molecular_formula.keys()
318            ]
319        )
320        Hs = sum(
321            [
322                self._d_molecular_formula.get(key)
323                for key in ["H"] + Atoms.isotopes["H"][1]
324                if key in self._d_molecular_formula.keys()
325            ]
326        )
327        return Hs / Cs
328
329    @property
330    def A_I(self):
331        """Aromaticity index"""
332        return self._calc_aromaticity_index()
333
334    @property
335    def A_I_mod(self):
336        """Modified aromaticity index"""
337        return self._calc_aromaticity_index_mod()
338
339    @property
340    def nosc(self):
341        """Nominal oxidation state of carbon"""
342        return self._calc_nosc()
343
344    @property
345    def dbe(self):
346        return self._calc_dbe()
347
348    @property
349    def mz_nominal_calc(self):
350        return int(self._calc_mz())
351
352    @property
353    def mz_error(self):
354        return self._calc_assignment_mass_error()
355
356    @property
357    def mz_calc(self):
358        return self._calc_mz()
359
360    @property
361    def protonated_mz(self):
362        return self._protonated_mz(self.ion_charge)
363
364    @property
365    def radical_mz(self):
366        return self._radical_mz(self.ion_charge)
367
368    @property
369    def neutral_mass(self):
370        return self._neutral_mass()
371
372    def adduct_mz(self, adduct_atom):
373        """Get m/z of an adducted ion version of the molecular formula.
374
375        Parameters
376        ----------
377        adduct_atom : str
378            The adduct atom.
379
380        Returns
381        -------
382        float
383            The m/z value of the adducted ion version of the molecular formula.
384        """
385        return self._adduct_mz(adduct_atom, self.ion_charge)
386
387    @property
388    def ion_type(self):
389        ion_type = self._d_molecular_formula.get(Labels.ion_type)
390        if ion_type == Labels.protonated_de_ion:
391            if self.ion_charge > 0:
392                return Labels.protonated
393            else:
394                return Labels.de_protonated
395        else:
396            return ion_type
397
398    @ion_type.setter
399    def ion_type(self, ion_type):
400        if ion_type in [
401            Labels.protonated_de_ion,
402            Labels.adduct_ion,
403            Labels.radical_ion,
404        ]:
405            self._d_molecular_formula[Labels.ion_type] = ion_type
406        else:
407            raise TypeError(
408                "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"
409                % ion_type
410            )
411
412    @property
413    def ion_charge(self):
414        return self._ion_charge
415
416    @property
417    def atoms(self):
418        """Get the atoms in the molecular formula."""
419        # if there is an adduct_atom, them reduce it from the atoms list
420        if self.adduct_atom is None:
421            return [
422                key
423                for key in self._d_molecular_formula.keys()
424                if key != Labels.ion_type
425            ]
426        else:
427            temp_dict = self._d_molecular_formula.copy()
428            temp_dict[self.adduct_atom] -= 1
429            return [
430                key
431                for key, val in temp_dict.items()
432                if key != Labels.ion_type and val > 0
433            ]
434
435    @property
436    def confidence_score(self):
437        if not self._confidence_score:
438            self._confidence_score = self._calc_confidence_score()
439
440        return self._confidence_score
441
442    @property
443    def isotopologue_similarity(self):
444        if not self._isotopologue_similarity:
445            self._isotopologue_similarity = self._calc_isotopologue_confidence()
446
447        return self._isotopologue_similarity
448
449    @property
450    def average_mz_error_score(self):
451        # includes the isotopologues
452
453        if not self._mass_error_average_score:
454            self._mass_error_average_score = self._calc_average_mz_score()
455
456        return self._mass_error_average_score
457
458    @property
459    def mz_error_score(self):
460        if not self._mz_error_score:
461            self._mz_error_score = self._calc_mz_confidence()
462
463        return self._mz_error_score
464
465    @property
466    def kmd(self):
467        return self._kmd
468
469    @property
470    def kendrick_mass(self):
471        return self._kendrick_mass
472
473    @property
474    def knm(self):
475        return self._nominal_km
476
477    def change_kendrick_base(self, kendrick_dict_base):
478        """Change the Kendrick base.
479
480        Parameters
481        ----------
482        kendrick_dict_base : dict
483            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
484        """
485        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
486            kendrick_dict_base
487        )
488
489    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
490        """Calculate the isotopologues for a given molecular formula.
491
492        Parameters
493        ----------
494        min_abundance : float
495            The minimum abundance.
496        current_mono_abundance : float
497            The current monoisotopic abundance.
498        dynamic_range : float
499            The dynamic range.
500
501        Yields
502        ------
503        MolecularFormulaIsotopologue
504            The molecular formula isotopologue.
505
506        Notes
507        -----
508        This calculation ignores the hydrogen isotopes.
509        """
510        isotopologues = []
511        for mf in self._cal_isotopologues(
512            self._d_molecular_formula,
513            min_abundance,
514            current_mono_abundance,
515            dynamic_range,
516        ):
517            isotopologues.append(mf)
518
519        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
520        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
521
522        for mf in sorted_isotopologues:
523            yield MolecularFormulaIsotopologue(
524                *mf,
525                current_mono_abundance,
526                self.ion_charge,
527                ion_type=self.ion_type,
528                adduct_atom=self.adduct_atom,
529            )
530
531    def atoms_qnt(self, atom):
532        """Get the atom quantity of a specific atom in the molecular formula."""
533        if atom in self._d_molecular_formula:
534            return self._d_molecular_formula.get(atom)
535        else:
536            raise Warning(
537                "Could not find %s in this Molecular Formula object" % str(atom)
538            )
539
540    def atoms_symbol(self, atom):
541        """Get the atom symbol without the mass number."""
542        return "".join([i for i in atom if not i.isdigit()])
543
544    @property
545    def string(self):
546        """Returns the molecular formula as a string."""
547        if self._d_molecular_formula:
548            if self.adduct_atom is None:
549                mol_form_dict = self._d_molecular_formula
550            else:
551                mol_form_dict = self._d_molecular_formula.copy()
552                if self.adduct_atom not in mol_form_dict.keys():
553                    raise Exception("Adduct atom not found in molecular formula dict")
554                mol_form_dict[self.adduct_atom] -= 1
555                mol_form_dict = {
556                    key: val for key, val in mol_form_dict.items() if val != 0
557                }
558            formula_srt = ""
559            for atom in Atoms.atoms_order:
560                if atom in mol_form_dict.keys():
561                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + " "
562            return formula_srt.strip()
563
564        else:
565            raise Exception("Molecular formula identification not performed yet")
566
567    @property
568    def string_formated(self):
569        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
570        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
571
572        if self._d_molecular_formula:
573            formula_srt = ""
574            for atom in Atoms.atoms_order:
575                if atom in self.to_dict().keys():
576                    formula_srt += atom.translate(SUP) + str(
577                        int(self.to_dict().get(atom))
578                    ).translate(SUB)
579            return formula_srt
580
581        else:
582            raise Exception("Molecular formula identification not performed yet")
583
584    def to_dict(self):
585        """Returns the molecular formula as a dictionary.
586
587        Returns
588        -------
589        dict
590            The molecular formula as a dictionary.
591        """
592        return self._d_molecular_formula
593
594    def to_list(self):
595        """Returns the molecular formula as a list.
596
597        Returns
598        -------
599        list
600            The molecular formula as a list.
601
602        Raises
603        ------
604        Exception
605            If the molecular formula identification was not performed yet.
606        """
607        # TODO ensure self._d_molecular_formula is a orderedDict
608
609        if self._d_molecular_formula:
610            formula_list = []
611
612            for atom, atom_number in self._d_molecular_formula.items():
613                if atom != Labels.ion_type:
614                    formula_list.append(atom)
615                    formula_list.append(atom_number)
616
617            return formula_list
618        else:
619            raise Exception("Molecular formula identification not performed yet")
620
621    @property
622    def class_label(self):
623        if self._d_molecular_formula:
624            formulalist = self.to_list()
625            classstring = ""
626
627            for each in range(0, len(formulalist), 2):
628                if (
629                    formulalist[each] != "C"
630                    and formulalist[each] != "H"
631                    and formulalist[each] != "HC"
632                ):
633                    classstring = (
634                        classstring
635                        + str(formulalist[each])
636                        + str(formulalist[each + 1])
637                        + " "
638                    )
639
640            if classstring == "":
641                classstring = "HC"
642
643            classstring = classstring.strip()
644
645            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:
646                return classstring + " -R"
647
648            # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:
649
650            #    return classstring + ' -A'
651
652            else:
653                return classstring
654
655            #'dict, tuple or string'
656
657        else:
658            raise Exception("Molecular formula identification not performed yet")
659
660    @property
661    def class_dict(self):
662        if self._d_molecular_formula:
663            class_dict = {}
664
665            for atom, qnt in self._d_molecular_formula.items():
666                if atom != Labels.ion_type and atom != "C" and atom != "H":
667                    class_dict[atom] = qnt
668
669            return class_dict
670
671        raise Exception("Molecular formula identification not performed yet")

Base class for representing a molecular formula.

Parameters
  • molecular_formula (dict, list, str): The molecular formula.
  • ion_charge (int): The ion charge.
  • ion_type (str, optional): The ion type. Defaults to None.
  • adduct_atom (str, optional): The adduct atom. Defaults to None.
  • mspeak_parent (_MSPeak, optional): The parent mass spectrum peak object instance. Defaults to None.
  • external_mz (float, optional): The external m/z value. Defaults to None.
Raises
  • TypeError: If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'.
Attributes
  • isotopologue_count_percentile (float): The isotopologue count percentile.
  • O_C (float): The O/C ratio.
  • H_C (float): The H/C ratio.
  • dbe (float): The double bond equivalent.
  • mz_nominal_calc (int): The nominal m/z value.
  • mz_error (float): The m/z error.
  • mz_calc (float): The m/z value.
  • protonated_mz (float): The protonated or deprotonated m/z value.
  • radical_mz (float): The radical m/z value.
  • neutral_mass (float): The neutral mass.
  • ion_type (str): The ion type.
  • ion_charge (int): The ion charge.
  • atoms (list): The atoms in the molecular formula.
  • confidence_score (float): The confidence score of the molecular formula identification.
  • isotopologue_similarity (float): The isotopologue similarity score of the molecular formula identification.
  • average_mz_error_score (float): The average m/z error score of the molecular formula identification, including the isotopologues.
  • mz_error_score (float): The m/z error score of the molecular formula identification.
  • kmd (float): The Kendrick mass defect (KMD).
  • kendrick_mass (float): The Kendrick mass.
  • knm (float): The nominal Kendrick mass.
  • string (str): The molecular formula string.
  • string_formated (str): The molecular formula string formated with subscripts and superscripts.
  • class_label (str): The class label.
  • class_dict (dict): The class dictionary.
Methods
  • change_kendrick_base(kendrick_dict_base). Change the Kendrick base.
  • isotopologues(min_abundance, current_mono_abundance, dynamic_range). Calculate the isotopologues.
  • atoms_qnt(atom). Get the atom quantity.
  • atoms_symbol(atom). Get the atom symbol without the mass number.
  • to_dict(). Get the molecular formula as a dictionary.
  • to_list(). Get the molecular formula as a list.
MolecularFormulaBase( molecular_formula, ion_charge, ion_type=None, adduct_atom=None, mspeak_parent=None, external_mz=None)
101    def __init__(
102        self,
103        molecular_formula,
104        ion_charge,
105        ion_type=None,
106        adduct_atom=None,
107        mspeak_parent=None,
108        external_mz=None,
109    ):
110        # clear dictionary of atoms with 0 value
111        if type(molecular_formula) is dict:
112            self._from_dict(molecular_formula, ion_type, adduct_atom)
113
114        elif type(molecular_formula) is list:
115            self._from_list(molecular_formula, ion_type, adduct_atom)
116
117        elif type(molecular_formula) is str:
118            self._from_str(molecular_formula, ion_type, adduct_atom)
119
120        self._ion_charge = ion_charge
121        self._external_mz = external_mz
122        self._confidence_score = None
123        self._isotopologue_similarity = None
124        self._mz_error_score = None
125        self._mass_error_average_score = None
126
127        self.is_isotopologue = False
128
129        # parent mass spectrum peak obj instance
130        self._mspeak_parent = mspeak_parent
131
132        self.expected_isotopologues = []
133        self.mspeak_mf_isotopologues_indexes = []
134
135        if self._mspeak_parent:
136            kendrick_dict_base = (
137                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
138            )
139        else:
140            kendrick_dict_base = {"C": 1, "H": 2}
141        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
142            kendrick_dict_base
143        )
is_isotopologue
expected_isotopologues
mspeak_mf_isotopologues_indexes
def get(self, atom):
166    def get(self, atom):
167        """Get the atom quantity of a specific atom.
168
169        Parameters
170        ----------
171        atom : str
172            The atom symbol.
173
174        Returns
175        -------
176        int
177            The atom quantity.
178        """
179        # atom = list(self._d_molecular_formula.keys())[position]
180        if atom in self._d_molecular_formula.keys():
181            return self._d_molecular_formula[atom]
182        else:
183            return 0

Get the atom quantity of a specific atom.

Parameters
  • atom (str): The atom symbol.
Returns
  • int: The atom quantity.
def split(self, delimiters, string, maxsplit=0):
249    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
250        """Splits the molecular formula string.
251
252        Parameters
253        ----------
254        delimiters : list
255            The list of delimiters.
256        string : str
257            The molecular formula string.
258        maxsplit : int, optional
259            The maximum number of splits. Defaults to 0.
260
261        Returns
262        -------
263        list
264            The molecular formula list.
265
266        Notes
267        -----
268        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
269        """
270        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
271        isotopes = re.findall(regexPattern, string)  # pragma: no cover
272        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
273
274        return [isotopes[0], int(counts[1])]

Splits the molecular formula string.

Parameters
  • delimiters (list): The list of delimiters.
  • string (str): The molecular formula string.
  • maxsplit (int, optional): The maximum number of splits. Defaults to 0.
Returns
  • list: The molecular formula list.
Notes

Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.

isotopologue_count_percentile
O_C
H_C
A_I

Aromaticity index

A_I_mod

Modified aromaticity index

nosc

Nominal oxidation state of carbon

dbe
mz_nominal_calc
mz_error
mz_calc
protonated_mz
radical_mz
neutral_mass
def adduct_mz(self, adduct_atom):
372    def adduct_mz(self, adduct_atom):
373        """Get m/z of an adducted ion version of the molecular formula.
374
375        Parameters
376        ----------
377        adduct_atom : str
378            The adduct atom.
379
380        Returns
381        -------
382        float
383            The m/z value of the adducted ion version of the molecular formula.
384        """
385        return self._adduct_mz(adduct_atom, self.ion_charge)

Get m/z of an adducted ion version of the molecular formula.

Parameters
  • adduct_atom (str): The adduct atom.
Returns
  • float: The m/z value of the adducted ion version of the molecular formula.
ion_type
ion_charge
atoms

Get the atoms in the molecular formula.

confidence_score
isotopologue_similarity
average_mz_error_score
mz_error_score
kmd
kendrick_mass
knm
def change_kendrick_base(self, kendrick_dict_base):
477    def change_kendrick_base(self, kendrick_dict_base):
478        """Change the Kendrick base.
479
480        Parameters
481        ----------
482        kendrick_dict_base : dict
483            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
484        """
485        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
486            kendrick_dict_base
487        )

Change the Kendrick base.

Parameters
  • kendrick_dict_base (dict): The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
489    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
490        """Calculate the isotopologues for a given molecular formula.
491
492        Parameters
493        ----------
494        min_abundance : float
495            The minimum abundance.
496        current_mono_abundance : float
497            The current monoisotopic abundance.
498        dynamic_range : float
499            The dynamic range.
500
501        Yields
502        ------
503        MolecularFormulaIsotopologue
504            The molecular formula isotopologue.
505
506        Notes
507        -----
508        This calculation ignores the hydrogen isotopes.
509        """
510        isotopologues = []
511        for mf in self._cal_isotopologues(
512            self._d_molecular_formula,
513            min_abundance,
514            current_mono_abundance,
515            dynamic_range,
516        ):
517            isotopologues.append(mf)
518
519        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
520        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
521
522        for mf in sorted_isotopologues:
523            yield MolecularFormulaIsotopologue(
524                *mf,
525                current_mono_abundance,
526                self.ion_charge,
527                ion_type=self.ion_type,
528                adduct_atom=self.adduct_atom,
529            )

Calculate the isotopologues for a given molecular formula.

Parameters
  • min_abundance (float): The minimum abundance.
  • current_mono_abundance (float): The current monoisotopic abundance.
  • dynamic_range (float): The dynamic range.
Yields
  • MolecularFormulaIsotopologue: The molecular formula isotopologue.
Notes

This calculation ignores the hydrogen isotopes.

def atoms_qnt(self, atom):
531    def atoms_qnt(self, atom):
532        """Get the atom quantity of a specific atom in the molecular formula."""
533        if atom in self._d_molecular_formula:
534            return self._d_molecular_formula.get(atom)
535        else:
536            raise Warning(
537                "Could not find %s in this Molecular Formula object" % str(atom)
538            )

Get the atom quantity of a specific atom in the molecular formula.

def atoms_symbol(self, atom):
540    def atoms_symbol(self, atom):
541        """Get the atom symbol without the mass number."""
542        return "".join([i for i in atom if not i.isdigit()])

Get the atom symbol without the mass number.

string

Returns the molecular formula as a string.

string_formated
def to_dict(self):
584    def to_dict(self):
585        """Returns the molecular formula as a dictionary.
586
587        Returns
588        -------
589        dict
590            The molecular formula as a dictionary.
591        """
592        return self._d_molecular_formula

Returns the molecular formula as a dictionary.

Returns
  • dict: The molecular formula as a dictionary.
def to_list(self):
594    def to_list(self):
595        """Returns the molecular formula as a list.
596
597        Returns
598        -------
599        list
600            The molecular formula as a list.
601
602        Raises
603        ------
604        Exception
605            If the molecular formula identification was not performed yet.
606        """
607        # TODO ensure self._d_molecular_formula is a orderedDict
608
609        if self._d_molecular_formula:
610            formula_list = []
611
612            for atom, atom_number in self._d_molecular_formula.items():
613                if atom != Labels.ion_type:
614                    formula_list.append(atom)
615                    formula_list.append(atom_number)
616
617            return formula_list
618        else:
619            raise Exception("Molecular formula identification not performed yet")

Returns the molecular formula as a list.

Returns
  • list: The molecular formula as a list.
Raises
  • Exception: If the molecular formula identification was not performed yet.
class_label
class_dict
class MolecularFormulaIsotopologue(MolecularFormulaBase):
674class MolecularFormulaIsotopologue(MolecularFormulaBase):
675    """Class for representing a molecular formula isotopologue.
676
677    Parameters
678    ----------
679    _d_molecular_formula : dict
680        The molecular formula as a dictionary.
681    prob_ratio : float
682        The probability ratio.
683    mono_abundance : float
684        The monoisotopic abundance.
685    ion_charge : int
686        The ion charge.
687    mspeak_parent : object, optional
688        The parent mass spectrum peak object instance. Defaults to None.
689    ion_type : str, optional
690        The ion type. Defaults to None.
691    adduct_atom : str, optional
692        The adduct atom. Defaults to None.
693
694    Attributes
695    ----------
696    prob_ratio : float
697        The probability ratio.
698    abundance_calc : float
699        The calculated abundance.
700    area_error : float
701        The area error.
702    abundance_error : float
703        The abundance error.
704    is_isotopologue : bool
705        The isotopologue flag. Defaults to True.
706    mspeak_index_mono_isotopic : int
707        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
708    mono_isotopic_formula_index : int
709        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
710    """
711
712    def __init__(
713        self,
714        _d_molecular_formula,
715        prob_ratio,
716        mono_abundance,
717        ion_charge,
718        mspeak_parent=None,
719        ion_type=None,
720        adduct_atom=None,
721    ):
722        if ion_type is None:
723            # check if ion type or adduct_atom is in the molecular formula dict
724            if Labels.ion_type in _d_molecular_formula:
725                ion_type = _d_molecular_formula.get(Labels.ion_type)
726            else:
727                ion_type = None
728        else:
729            ion_type = Labels.ion_type_translate.get(ion_type)
730
731        if ion_type == Labels.adduct_ion:
732            adduct_atom_int = None
733            if adduct_atom in _d_molecular_formula.keys():
734                adduct_atom_int = adduct_atom
735            else:
736                # Check to see if adduct_atom should actually be an isotope of the adduct atom
737                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
738                    if adduct_iso in _d_molecular_formula.keys():
739                        adduct_atom_int = adduct_iso
740            adduct_atom = adduct_atom_int
741            if adduct_atom is None:
742                raise Exception("adduct_atom is required for adduct ion")
743            _d_molecular_formula[adduct_atom] -= 1
744            _d_molecular_formula = {
745                key: val for key, val in _d_molecular_formula.items() if val != 0
746            }
747
748        super().__init__(
749            molecular_formula=_d_molecular_formula,
750            ion_charge=ion_charge,
751            ion_type=ion_type,
752            adduct_atom=adduct_atom,
753        )
754        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
755
756        self.prob_ratio = prob_ratio
757
758        self.abundance_calc = mono_abundance * prob_ratio
759
760        self.is_isotopologue = True
761
762        self.mspeak_index_mono_isotopic = None
763
764        self.mono_isotopic_formula_index = None
765        # parent mass spectrum peak obj instance
766        self._mspeak_parent = mspeak_parent
767
768    @property
769    def area_error(self):
770        return self._calc_area_error()
771
772    @property
773    def abundance_error(self):
774        return self._calc_abundance_error()

Class for representing a molecular formula isotopologue.

Parameters
  • _d_molecular_formula (dict): The molecular formula as a dictionary.
  • prob_ratio (float): The probability ratio.
  • mono_abundance (float): The monoisotopic abundance.
  • ion_charge (int): The ion charge.
  • mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
  • ion_type (str, optional): The ion type. Defaults to None.
  • adduct_atom (str, optional): The adduct atom. Defaults to None.
Attributes
  • prob_ratio (float): The probability ratio.
  • abundance_calc (float): The calculated abundance.
  • area_error (float): The area error.
  • abundance_error (float): The abundance error.
  • is_isotopologue (bool): The isotopologue flag. Defaults to True.
  • mspeak_index_mono_isotopic (int): The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
  • mono_isotopic_formula_index (int): The index of the monoisotopic formula in the molecular formula list. Defaults to None.
MolecularFormulaIsotopologue( _d_molecular_formula, prob_ratio, mono_abundance, ion_charge, mspeak_parent=None, ion_type=None, adduct_atom=None)
712    def __init__(
713        self,
714        _d_molecular_formula,
715        prob_ratio,
716        mono_abundance,
717        ion_charge,
718        mspeak_parent=None,
719        ion_type=None,
720        adduct_atom=None,
721    ):
722        if ion_type is None:
723            # check if ion type or adduct_atom is in the molecular formula dict
724            if Labels.ion_type in _d_molecular_formula:
725                ion_type = _d_molecular_formula.get(Labels.ion_type)
726            else:
727                ion_type = None
728        else:
729            ion_type = Labels.ion_type_translate.get(ion_type)
730
731        if ion_type == Labels.adduct_ion:
732            adduct_atom_int = None
733            if adduct_atom in _d_molecular_formula.keys():
734                adduct_atom_int = adduct_atom
735            else:
736                # Check to see if adduct_atom should actually be an isotope of the adduct atom
737                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
738                    if adduct_iso in _d_molecular_formula.keys():
739                        adduct_atom_int = adduct_iso
740            adduct_atom = adduct_atom_int
741            if adduct_atom is None:
742                raise Exception("adduct_atom is required for adduct ion")
743            _d_molecular_formula[adduct_atom] -= 1
744            _d_molecular_formula = {
745                key: val for key, val in _d_molecular_formula.items() if val != 0
746            }
747
748        super().__init__(
749            molecular_formula=_d_molecular_formula,
750            ion_charge=ion_charge,
751            ion_type=ion_type,
752            adduct_atom=adduct_atom,
753        )
754        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
755
756        self.prob_ratio = prob_ratio
757
758        self.abundance_calc = mono_abundance * prob_ratio
759
760        self.is_isotopologue = True
761
762        self.mspeak_index_mono_isotopic = None
763
764        self.mono_isotopic_formula_index = None
765        # parent mass spectrum peak obj instance
766        self._mspeak_parent = mspeak_parent
prob_ratio
abundance_calc
is_isotopologue
mspeak_index_mono_isotopic
mono_isotopic_formula_index
area_error
abundance_error
class LCMSLibRefMolecularFormula(MolecularFormulaBase):
777class LCMSLibRefMolecularFormula(MolecularFormulaBase):
778    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
779
780    Parameters
781    ----------
782    molecular_formula : dict, list, str
783        The molecular formula.
784    ion_charge : int
785        The ion charge.
786    ion_type : str, optional
787        The ion type. Defaults to None.
788    adduct_atom : str, optional
789        The adduct atom. Defaults to None.
790    mspeak_parent : object, optional
791        The parent mass spectrum peak object instance. Defaults to None.
792    name : str, optional
793        The name of the reference molecule. Defaults to None.
794    kegg_id : str, optional
795        The KEGG ID of the reference molecule. Defaults to None.
796    cas : str, optional
797        The CAS number of the reference molecule. Defaults to None.
798
799    """
800
801    def __init__(
802        self,
803        molecular_formula,
804        ion_charge,
805        ion_type=None,
806        adduct_atom=None,
807        mspeak_parent=None,
808        name=None,
809        kegg_id=None,
810        cas=None,
811    ) -> None:
812        super().__init__(
813            molecular_formula,
814            ion_charge,
815            ion_type=ion_type,
816            adduct_atom=adduct_atom,
817            mspeak_parent=mspeak_parent,
818        )
819
820        self._name = name
821        self._kegg_id = kegg_id
822        self._cas = cas
823
824    @property
825    def name(self):
826        return self._name
827
828    @name.setter
829    def name(self, name):
830        if isinstance(name, str):
831            self._name = name
832        else:
833            raise TypeError("name: {} should be type string")
834
835    @property
836    def kegg_id(self):
837        return self._kegg_id
838
839    @kegg_id.setter
840    def kegg_id(self, kegg_id):
841        self._kegg_id = kegg_id
842        # if isinstance(kegg_id, str):
843        #    self._kegg_id = kegg_id
844        # else:
845        #    print(kegg_id)
846        #    raise TypeError('name: {} should be type string')
847
848    @property
849    def cas(self):
850        return self._cas
851
852    @cas.setter
853    def cas(self, cas):
854        self._cas = cas
855        # if isinstance(cas, str):
856        #    self._cas = cas
857        # else:
858        #    raise TypeError('name: {} should be type string')

Class for representing a molecular formula associated with a molecule in a LCMS library reference.

Parameters
  • molecular_formula (dict, list, str): The molecular formula.
  • ion_charge (int): The ion charge.
  • ion_type (str, optional): The ion type. Defaults to None.
  • adduct_atom (str, optional): The adduct atom. Defaults to None.
  • mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
  • name (str, optional): The name of the reference molecule. Defaults to None.
  • kegg_id (str, optional): The KEGG ID of the reference molecule. Defaults to None.
  • cas (str, optional): The CAS number of the reference molecule. Defaults to None.
LCMSLibRefMolecularFormula( molecular_formula, ion_charge, ion_type=None, adduct_atom=None, mspeak_parent=None, name=None, kegg_id=None, cas=None)
801    def __init__(
802        self,
803        molecular_formula,
804        ion_charge,
805        ion_type=None,
806        adduct_atom=None,
807        mspeak_parent=None,
808        name=None,
809        kegg_id=None,
810        cas=None,
811    ) -> None:
812        super().__init__(
813            molecular_formula,
814            ion_charge,
815            ion_type=ion_type,
816            adduct_atom=adduct_atom,
817            mspeak_parent=mspeak_parent,
818        )
819
820        self._name = name
821        self._kegg_id = kegg_id
822        self._cas = cas
name
kegg_id
cas
class MolecularFormula(MolecularFormulaBase):
861class MolecularFormula(MolecularFormulaBase):
862    """General class for representing a molecular formula.
863
864    Parameters
865    ----------
866    molecular_formula : dict, list, str
867        The molecular formula.
868    ion_charge : int
869        The ion charge.
870    ion_type : str, optional
871        The ion type. Defaults to None.
872    adduct_atom : str, optional
873        The adduct atom. Defaults to None.
874    mspeak_parent : object, optional
875        The parent mass spectrum peak object instance. Defaults to None.
876    external_mz : float, optional
877        The external m/z value. Defaults to False.
878    """
879
880    def __init__(
881        self,
882        molecular_formula,
883        ion_charge,
884        ion_type=None,
885        adduct_atom=None,
886        mspeak_parent=None,
887        external_mz=False,
888    ):
889        super().__init__(
890            molecular_formula,
891            ion_charge,
892            ion_type=ion_type,
893            adduct_atom=adduct_atom,
894            mspeak_parent=mspeak_parent,
895            external_mz=external_mz,
896        )

General class for representing a molecular formula.

Parameters
  • molecular_formula (dict, list, str): The molecular formula.
  • ion_charge (int): The ion charge.
  • ion_type (str, optional): The ion type. Defaults to None.
  • adduct_atom (str, optional): The adduct atom. Defaults to None.
  • mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
  • external_mz (float, optional): The external m/z value. Defaults to False.
MolecularFormula( molecular_formula, ion_charge, ion_type=None, adduct_atom=None, mspeak_parent=None, external_mz=False)
880    def __init__(
881        self,
882        molecular_formula,
883        ion_charge,
884        ion_type=None,
885        adduct_atom=None,
886        mspeak_parent=None,
887        external_mz=False,
888    ):
889        super().__init__(
890            molecular_formula,
891            ion_charge,
892            ion_type=ion_type,
893            adduct_atom=adduct_atom,
894            mspeak_parent=mspeak_parent,
895            external_mz=external_mz,
896        )