corems.molecular_formula.factory.MolecularFormulaFactory
1import re 2 3from corems.encapsulation.constant import Atoms, Labels 4from corems.molecular_formula.calc.MolecularFormulaCalc import MolecularFormulaCalc 5 6__author__ = "Yuri E. Corilo" 7__date__ = "Jun 24, 2019" 8 9 10class MolecularFormulaBase(MolecularFormulaCalc): 11 """Base class for representing a molecular formula. 12 13 Parameters 14 ---------- 15 molecular_formula : dict, list, str 16 The molecular formula. 17 ion_charge : int 18 The ion charge. 19 ion_type : str, optional 20 The ion type. Defaults to None. 21 adduct_atom : str, optional 22 The adduct atom. Defaults to None. 23 mspeak_parent : _MSPeak, optional 24 The parent mass spectrum peak object instance. Defaults to None. 25 external_mz : float, optional 26 The external m/z value. Defaults to None. 27 28 Raises 29 ------ 30 TypeError 31 If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'. 32 33 Attributes 34 ---------- 35 isotopologue_count_percentile : float 36 The isotopologue count percentile. 37 O_C : float 38 The O/C ratio. 39 H_C : float 40 The H/C ratio. 41 dbe : float 42 The double bond equivalent. 43 mz_nominal_calc : int 44 The nominal m/z value. 45 mz_error : float 46 The m/z error. 47 mz_calc : float 48 The m/z value. 49 protonated_mz : float 50 The protonated or deprotonated m/z value. 51 radical_mz : float 52 The radical m/z value. 53 neutral_mass : float 54 The neutral mass. 55 ion_type : str 56 The ion type. 57 ion_charge : int 58 The ion charge. 59 atoms : list 60 The atoms in the molecular formula. 61 confidence_score : float 62 The confidence score of the molecular formula identification. 63 isotopologue_similarity : float 64 The isotopologue similarity score of the molecular formula identification. 65 average_mz_error_score : float 66 The average m/z error score of the molecular formula identification, including the isotopologues. 67 mz_error_score : float 68 The m/z error score of the molecular formula identification. 69 kmd : float 70 The Kendrick mass defect (KMD). 71 kendrick_mass : float 72 The Kendrick mass. 73 knm : float 74 The nominal Kendrick mass. 75 string : str 76 The molecular formula string. 77 string_formated : str 78 The molecular formula string formated with subscripts and superscripts. 79 class_label : str 80 The class label. 81 class_dict : dict 82 The class dictionary. 83 84 Methods 85 ------- 86 * change_kendrick_base(kendrick_dict_base). 87 Change the Kendrick base. 88 * isotopologues(min_abundance, current_mono_abundance, dynamic_range). 89 Calculate the isotopologues. 90 * atoms_qnt(atom). 91 Get the atom quantity. 92 * atoms_symbol(atom). 93 Get the atom symbol without the mass number. 94 * to_dict(). 95 Get the molecular formula as a dictionary. 96 * to_list(). 97 Get the molecular formula as a list. 98 """ 99 100 def __init__( 101 self, 102 molecular_formula, 103 ion_charge, 104 ion_type=None, 105 adduct_atom=None, 106 mspeak_parent=None, 107 external_mz=None, 108 ): 109 # clear dictionary of atoms with 0 value 110 if type(molecular_formula) is dict: 111 self._from_dict(molecular_formula, ion_type, adduct_atom) 112 113 elif type(molecular_formula) is list: 114 self._from_list(molecular_formula, ion_type, adduct_atom) 115 116 elif type(molecular_formula) is str: 117 self._from_str(molecular_formula, ion_type, adduct_atom) 118 119 self._ion_charge = ion_charge 120 self._external_mz = external_mz 121 self._confidence_score = None 122 self._isotopologue_similarity = None 123 self._mz_error_score = None 124 self._mass_error_average_score = None 125 126 self.is_isotopologue = False 127 128 # parent mass spectrum peak obj instance 129 self._mspeak_parent = mspeak_parent 130 131 self.expected_isotopologues = [] 132 self.mspeak_mf_isotopologues_indexes = [] 133 134 if self._mspeak_parent: 135 kendrick_dict_base = ( 136 self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base 137 ) 138 else: 139 kendrick_dict_base = {"C": 1, "H": 2} 140 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 141 kendrick_dict_base 142 ) 143 144 def __repr__(self): 145 return "MolecularFormula({0},{1},ion type = {2}".format( 146 self._d_molecular_formula, self.ion_charge, self.ion_type 147 ) 148 149 def __str__(self): 150 return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format( 151 self.string, self.ion_charge, self.ion_type, self.mz_calc 152 ) 153 154 def __len__(self): 155 # crash if keys are not ordered 156 return len(self._d_molecular_formula.keys()) 157 158 def __getitem__(self, atom): 159 # atom = list(self._d_molecular_formula.keys())[position] 160 if atom in self._d_molecular_formula.keys(): 161 return self._d_molecular_formula[atom] 162 else: 163 return 0 164 165 def get(self, atom): 166 """Get the atom quantity of a specific atom. 167 168 Parameters 169 ---------- 170 atom : str 171 The atom symbol. 172 173 Returns 174 ------- 175 int 176 The atom quantity. 177 """ 178 # atom = list(self._d_molecular_formula.keys())[position] 179 if atom in self._d_molecular_formula.keys(): 180 return self._d_molecular_formula[atom] 181 else: 182 return 0 183 184 def _from_dict(self, molecular_formula, ion_type, adduct_atom): 185 self._d_molecular_formula = { 186 key: val for key, val in molecular_formula.items() if val != 0 187 } 188 189 if ion_type is not None: 190 self._d_molecular_formula[Labels.ion_type] = ion_type 191 192 if adduct_atom: 193 if adduct_atom in self._d_molecular_formula: 194 self._d_molecular_formula[adduct_atom] += 1 195 else: 196 self._d_molecular_formula[adduct_atom] = 1 197 self.adduct_atom = adduct_atom 198 199 def _from_list(self, molecular_formula_list, ion_type, adduct_atom): 200 # list has to be in the format 201 # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc] 202 self._d_molecular_formula = {} 203 for each in range(0, len(molecular_formula_list), 2): 204 atoms_label = molecular_formula_list[each] 205 atoms_count = int(molecular_formula_list[each + 1]) 206 207 if atoms_count > 0: 208 self._d_molecular_formula[atoms_label] = int(atoms_count) 209 210 self._d_molecular_formula[Labels.ion_type] = ion_type 211 if adduct_atom: 212 self.adduct_atom = adduct_atom 213 if adduct_atom in self._d_molecular_formula: 214 self._d_molecular_formula[adduct_atom] += 1 215 else: 216 self._d_molecular_formula[adduct_atom] = 1 217 else: 218 self.adduct_atom = None 219 220 def _from_str(self, molecular_formula_str, ion_type, adduct_atom): 221 # string has to be in the format 222 #'C10 H21 13C1 Cl1 37Cl1 etc' 223 # Check if there are spaces in the string 224 if " " not in molecular_formula_str: 225 raise ValueError( 226 "The molecular formula string should have spaces, input: %s" 227 % molecular_formula_str 228 ) 229 230 # Split the string by spaces 231 # Grab the text before a digit for each element after splitting on spaces (atoms) 232 elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()] 233 # Grab the digits at the end of each element after splitting on spaces (counts) 234 counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()] 235 # Check that the number of elements and counts are the same 236 if len(elements) != len(counts): 237 raise ValueError( 238 "The number of elements and counts do not match, input: %s" 239 % molecular_formula_str 240 ) 241 242 # Create a dictionary from the elements and counts and add it to the molecular formula 243 dict_ = dict(zip(elements, counts)) 244 # Cast counts to integers 245 dict_ = {key: int(val) for key, val in dict_.items()} 246 self._from_dict(dict_, ion_type, adduct_atom) 247 248 def split(self, delimiters, string, maxsplit=0): # pragma: no cover 249 """Splits the molecular formula string. 250 251 Parameters 252 ---------- 253 delimiters : list 254 The list of delimiters. 255 string : str 256 The molecular formula string. 257 maxsplit : int, optional 258 The maximum number of splits. Defaults to 0. 259 260 Returns 261 ------- 262 list 263 The molecular formula list. 264 265 Notes 266 ----- 267 Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa. 268 """ 269 regexPattern = "|".join(map(re.escape, delimiters)) # pragma: no cover 270 isotopes = re.findall(regexPattern, string) # pragma: no cover 271 counts = re.split(regexPattern, string, maxsplit) # pragma: no cover 272 273 return [isotopes[0], int(counts[1])] 274 275 @property 276 def isotopologue_count_percentile( 277 self, 278 ): 279 if not len(self.expected_isotopologues) == 0: 280 return ( 281 len(self.mspeak_mf_isotopologues_indexes) 282 / len(self.expected_isotopologues) 283 ) * 100 284 else: 285 return 100 286 287 @property 288 def O_C(self): 289 if "O" in self._d_molecular_formula.keys(): 290 # gather all the Os and Hs, regardless of the isotopic composition 291 Os = sum( 292 [ 293 self._d_molecular_formula.get(key) 294 for key in ["O"] + Atoms.isotopes["O"][1] 295 if key in self._d_molecular_formula.keys() 296 ] 297 ) 298 Cs = sum( 299 [ 300 self._d_molecular_formula.get(key) 301 for key in ["C"] + Atoms.isotopes["C"][1] 302 if key in self._d_molecular_formula.keys() 303 ] 304 ) 305 return Os / Cs 306 else: 307 return 0 308 309 @property 310 def H_C(self): 311 # gather all the Cs and Hs, regardless of the isotopic composition 312 Cs = sum( 313 [ 314 self._d_molecular_formula.get(key) 315 for key in ["C"] + Atoms.isotopes["C"][1] 316 if key in self._d_molecular_formula.keys() 317 ] 318 ) 319 Hs = sum( 320 [ 321 self._d_molecular_formula.get(key) 322 for key in ["H"] + Atoms.isotopes["H"][1] 323 if key in self._d_molecular_formula.keys() 324 ] 325 ) 326 return Hs / Cs 327 328 @property 329 def A_I(self): 330 """Aromaticity index""" 331 return self._calc_aromaticity_index() 332 333 @property 334 def A_I_mod(self): 335 """Modified aromaticity index""" 336 return self._calc_aromaticity_index_mod() 337 338 @property 339 def nosc(self): 340 """Nominal oxidation state of carbon""" 341 return self._calc_nosc() 342 343 @property 344 def dbe(self): 345 return self._calc_dbe() 346 347 @property 348 def mz_nominal_calc(self): 349 return int(self._calc_mz()) 350 351 @property 352 def mz_error(self): 353 return self._calc_assignment_mass_error() 354 355 @property 356 def mz_calc(self): 357 return self._calc_mz() 358 359 @property 360 def protonated_mz(self): 361 return self._protonated_mz(self.ion_charge) 362 363 @property 364 def radical_mz(self): 365 return self._radical_mz(self.ion_charge) 366 367 @property 368 def neutral_mass(self): 369 return self._neutral_mass() 370 371 def adduct_mz(self, adduct_atom): 372 """Get m/z of an adducted ion version of the molecular formula. 373 374 Parameters 375 ---------- 376 adduct_atom : str 377 The adduct atom. 378 379 Returns 380 ------- 381 float 382 The m/z value of the adducted ion version of the molecular formula. 383 """ 384 return self._adduct_mz(adduct_atom, self.ion_charge) 385 386 @property 387 def ion_type(self): 388 ion_type = self._d_molecular_formula.get(Labels.ion_type) 389 if ion_type == Labels.protonated_de_ion: 390 if self.ion_charge > 0: 391 return Labels.protonated 392 else: 393 return Labels.de_protonated 394 else: 395 return ion_type 396 397 @ion_type.setter 398 def ion_type(self, ion_type): 399 if ion_type in [ 400 Labels.protonated_de_ion, 401 Labels.adduct_ion, 402 Labels.radical_ion, 403 ]: 404 self._d_molecular_formula[Labels.ion_type] = ion_type 405 else: 406 raise TypeError( 407 "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT', not %s" 408 % ion_type 409 ) 410 411 @property 412 def ion_charge(self): 413 return self._ion_charge 414 415 @property 416 def atoms(self): 417 """Get the atoms in the molecular formula.""" 418 # if there is an adduct_atom, them reduce it from the atoms list 419 if self.adduct_atom is None: 420 return [ 421 key 422 for key in self._d_molecular_formula.keys() 423 if key != Labels.ion_type 424 ] 425 else: 426 temp_dict = self._d_molecular_formula.copy() 427 temp_dict[self.adduct_atom] -= 1 428 return [ 429 key 430 for key, val in temp_dict.items() 431 if key != Labels.ion_type and val > 0 432 ] 433 434 @property 435 def confidence_score(self): 436 if not self._confidence_score: 437 self._confidence_score = self._calc_confidence_score() 438 439 return self._confidence_score 440 441 @property 442 def isotopologue_similarity(self): 443 if not self._isotopologue_similarity: 444 self._isotopologue_similarity = self._calc_isotopologue_confidence() 445 446 return self._isotopologue_similarity 447 448 @property 449 def average_mz_error_score(self): 450 # includes the isotopologues 451 452 if not self._mass_error_average_score: 453 self._mass_error_average_score = self._calc_average_mz_score() 454 455 return self._mass_error_average_score 456 457 @property 458 def mz_error_score(self): 459 if not self._mz_error_score: 460 self._mz_error_score = self._calc_mz_confidence() 461 462 return self._mz_error_score 463 464 @property 465 def kmd(self): 466 return self._kmd 467 468 @property 469 def kendrick_mass(self): 470 return self._kendrick_mass 471 472 @property 473 def knm(self): 474 return self._nominal_km 475 476 def change_kendrick_base(self, kendrick_dict_base): 477 """Change the Kendrick base. 478 479 Parameters 480 ---------- 481 kendrick_dict_base : dict 482 The Kendrick base dictionary. Ex: {"C": 1, "H": 2} 483 """ 484 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 485 kendrick_dict_base 486 ) 487 488 def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 489 """Calculate the isotopologues for a given molecular formula. 490 491 Parameters 492 ---------- 493 min_abundance : float 494 The minimum abundance. 495 current_mono_abundance : float 496 The current monoisotopic abundance. 497 dynamic_range : float 498 The dynamic range. 499 500 Yields 501 ------ 502 MolecularFormulaIsotopologue 503 The molecular formula isotopologue. 504 505 Notes 506 ----- 507 This calculation ignores the hydrogen isotopes. 508 """ 509 isotopologues = [] 510 for mf in self._cal_isotopologues( 511 self._d_molecular_formula, 512 min_abundance, 513 current_mono_abundance, 514 dynamic_range, 515 ): 516 isotopologues.append(mf) 517 518 # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 519 sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True) 520 521 for mf in sorted_isotopologues: 522 yield MolecularFormulaIsotopologue( 523 *mf, 524 current_mono_abundance, 525 self.ion_charge, 526 ion_type=self.ion_type, 527 adduct_atom=self.adduct_atom, 528 ) 529 530 def atoms_qnt(self, atom): 531 """Get the atom quantity of a specific atom in the molecular formula.""" 532 if atom in self._d_molecular_formula: 533 return self._d_molecular_formula.get(atom) 534 else: 535 raise Warning( 536 "Could not find %s in this Molecular Formula object" % str(atom) 537 ) 538 539 def atoms_symbol(self, atom): 540 """Get the atom symbol without the mass number.""" 541 return "".join([i for i in atom if not i.isdigit()]) 542 543 @property 544 def string(self): 545 """Returns the molecular formula as a string.""" 546 if self._d_molecular_formula: 547 if self.adduct_atom is None: 548 mol_form_dict = self._d_molecular_formula 549 else: 550 mol_form_dict = self._d_molecular_formula.copy() 551 if self.adduct_atom not in mol_form_dict.keys(): 552 raise Exception("Adduct atom not found in molecular formula dict") 553 mol_form_dict[self.adduct_atom] -= 1 554 mol_form_dict = { 555 key: val for key, val in mol_form_dict.items() if val != 0 556 } 557 formula_srt = "" 558 for atom in Atoms.atoms_order: 559 if atom in mol_form_dict.keys(): 560 formula_srt += atom + str(int(mol_form_dict.get(atom))) + " " 561 return formula_srt.strip() 562 563 else: 564 raise Exception("Molecular formula identification not performed yet") 565 566 @property 567 def string_formated(self): 568 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") 569 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") 570 571 if self._d_molecular_formula: 572 formula_srt = "" 573 for atom in Atoms.atoms_order: 574 if atom in self.to_dict().keys(): 575 formula_srt += atom.translate(SUP) + str( 576 int(self.to_dict().get(atom)) 577 ).translate(SUB) 578 return formula_srt 579 580 else: 581 raise Exception("Molecular formula identification not performed yet") 582 583 def to_dict(self): 584 """Returns the molecular formula as a dictionary. 585 586 Returns 587 ------- 588 dict 589 The molecular formula as a dictionary. 590 """ 591 return self._d_molecular_formula 592 593 def to_list(self): 594 """Returns the molecular formula as a list. 595 596 Returns 597 ------- 598 list 599 The molecular formula as a list. 600 601 Raises 602 ------ 603 Exception 604 If the molecular formula identification was not performed yet. 605 """ 606 # TODO ensure self._d_molecular_formula is a orderedDict 607 608 if self._d_molecular_formula: 609 formula_list = [] 610 611 for atom, atom_number in self._d_molecular_formula.items(): 612 if atom != Labels.ion_type: 613 formula_list.append(atom) 614 formula_list.append(atom_number) 615 616 return formula_list 617 else: 618 raise Exception("Molecular formula identification not performed yet") 619 620 @property 621 def class_label(self): 622 if self._d_molecular_formula: 623 formulalist = self.to_list() 624 classstring = "" 625 626 for each in range(0, len(formulalist), 2): 627 if ( 628 formulalist[each] != "C" 629 and formulalist[each] != "H" 630 and formulalist[each] != "HC" 631 ): 632 classstring = ( 633 classstring 634 + str(formulalist[each]) 635 + str(formulalist[each + 1]) 636 + " " 637 ) 638 639 if classstring == "": 640 classstring = "HC" 641 642 classstring = classstring.strip() 643 644 if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion: 645 return classstring + " -R" 646 647 # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion: 648 649 # return classstring + ' -A' 650 651 else: 652 return classstring 653 654 #'dict, tuple or string' 655 656 else: 657 raise Exception("Molecular formula identification not performed yet") 658 659 @property 660 def class_dict(self): 661 if self._d_molecular_formula: 662 class_dict = {} 663 664 for atom, qnt in self._d_molecular_formula.items(): 665 if atom != Labels.ion_type and atom != "C" and atom != "H": 666 class_dict[atom] = qnt 667 668 return class_dict 669 670 raise Exception("Molecular formula identification not performed yet") 671 672 673class MolecularFormulaIsotopologue(MolecularFormulaBase): 674 """Class for representing a molecular formula isotopologue. 675 676 Parameters 677 ---------- 678 _d_molecular_formula : dict 679 The molecular formula as a dictionary. 680 prob_ratio : float 681 The probability ratio. 682 mono_abundance : float 683 The monoisotopic abundance. 684 ion_charge : int 685 The ion charge. 686 mspeak_parent : object, optional 687 The parent mass spectrum peak object instance. Defaults to None. 688 ion_type : str, optional 689 The ion type. Defaults to None. 690 adduct_atom : str, optional 691 The adduct atom. Defaults to None. 692 693 Attributes 694 ---------- 695 prob_ratio : float 696 The probability ratio. 697 abundance_calc : float 698 The calculated abundance. 699 area_error : float 700 The area error. 701 abundance_error : float 702 The abundance error. 703 is_isotopologue : bool 704 The isotopologue flag. Defaults to True. 705 mspeak_index_mono_isotopic : int 706 The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None. 707 mono_isotopic_formula_index : int 708 The index of the monoisotopic formula in the molecular formula list. Defaults to None. 709 """ 710 711 def __init__( 712 self, 713 _d_molecular_formula, 714 prob_ratio, 715 mono_abundance, 716 ion_charge, 717 mspeak_parent=None, 718 ion_type=None, 719 adduct_atom=None, 720 ): 721 if ion_type is None: 722 # check if ion type or adduct_atom is in the molecular formula dict 723 if Labels.ion_type in _d_molecular_formula: 724 ion_type = _d_molecular_formula.get(Labels.ion_type) 725 else: 726 ion_type = None 727 else: 728 ion_type = Labels.ion_type_translate.get(ion_type) 729 730 if ion_type == Labels.adduct_ion: 731 adduct_atom_int = None 732 if adduct_atom in _d_molecular_formula.keys(): 733 adduct_atom_int = adduct_atom 734 else: 735 # Check to see if adduct_atom should actually be an isotope of the adduct atom 736 for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]: 737 if adduct_iso in _d_molecular_formula.keys(): 738 adduct_atom_int = adduct_iso 739 adduct_atom = adduct_atom_int 740 if adduct_atom is None: 741 raise Exception("adduct_atom is required for adduct ion") 742 _d_molecular_formula[adduct_atom] -= 1 743 _d_molecular_formula = { 744 key: val for key, val in _d_molecular_formula.items() if val != 0 745 } 746 747 super().__init__( 748 molecular_formula=_d_molecular_formula, 749 ion_charge=ion_charge, 750 ion_type=ion_type, 751 adduct_atom=adduct_atom, 752 ) 753 # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic 754 755 self.prob_ratio = prob_ratio 756 757 self.abundance_calc = mono_abundance * prob_ratio 758 759 self.is_isotopologue = True 760 761 self.mspeak_index_mono_isotopic = None 762 763 self.mono_isotopic_formula_index = None 764 # parent mass spectrum peak obj instance 765 self._mspeak_parent = mspeak_parent 766 767 @property 768 def area_error(self): 769 return self._calc_area_error() 770 771 @property 772 def abundance_error(self): 773 return self._calc_abundance_error() 774 775 776class LCMSLibRefMolecularFormula(MolecularFormulaBase): 777 """Class for representing a molecular formula associated with a molecule in a LCMS library reference. 778 779 Parameters 780 ---------- 781 molecular_formula : dict, list, str 782 The molecular formula. 783 ion_charge : int 784 The ion charge. 785 ion_type : str, optional 786 The ion type. Defaults to None. 787 adduct_atom : str, optional 788 The adduct atom. Defaults to None. 789 mspeak_parent : object, optional 790 The parent mass spectrum peak object instance. Defaults to None. 791 name : str, optional 792 The name of the reference molecule. Defaults to None. 793 kegg_id : str, optional 794 The KEGG ID of the reference molecule. Defaults to None. 795 cas : str, optional 796 The CAS number of the reference molecule. Defaults to None. 797 798 """ 799 800 def __init__( 801 self, 802 molecular_formula, 803 ion_charge, 804 ion_type=None, 805 adduct_atom=None, 806 mspeak_parent=None, 807 name=None, 808 kegg_id=None, 809 cas=None, 810 ) -> None: 811 super().__init__( 812 molecular_formula, 813 ion_charge, 814 ion_type=ion_type, 815 adduct_atom=adduct_atom, 816 mspeak_parent=mspeak_parent, 817 ) 818 819 self._name = name 820 self._kegg_id = kegg_id 821 self._cas = cas 822 823 @property 824 def name(self): 825 return self._name 826 827 @name.setter 828 def name(self, name): 829 if isinstance(name, str): 830 self._name = name 831 else: 832 raise TypeError("name: {} should be type string") 833 834 @property 835 def kegg_id(self): 836 return self._kegg_id 837 838 @kegg_id.setter 839 def kegg_id(self, kegg_id): 840 self._kegg_id = kegg_id 841 # if isinstance(kegg_id, str): 842 # self._kegg_id = kegg_id 843 # else: 844 # print(kegg_id) 845 # raise TypeError('name: {} should be type string') 846 847 @property 848 def cas(self): 849 return self._cas 850 851 @cas.setter 852 def cas(self, cas): 853 self._cas = cas 854 # if isinstance(cas, str): 855 # self._cas = cas 856 # else: 857 # raise TypeError('name: {} should be type string') 858 859 860class MolecularFormula(MolecularFormulaBase): 861 """General class for representing a molecular formula. 862 863 Parameters 864 ---------- 865 molecular_formula : dict, list, str 866 The molecular formula. 867 ion_charge : int 868 The ion charge. 869 ion_type : str, optional 870 The ion type. Defaults to None. 871 adduct_atom : str, optional 872 The adduct atom. Defaults to None. 873 mspeak_parent : object, optional 874 The parent mass spectrum peak object instance. Defaults to None. 875 external_mz : float, optional 876 The external m/z value. Defaults to False. 877 """ 878 879 def __init__( 880 self, 881 molecular_formula, 882 ion_charge, 883 ion_type=None, 884 adduct_atom=None, 885 mspeak_parent=None, 886 external_mz=False, 887 ): 888 super().__init__( 889 molecular_formula, 890 ion_charge, 891 ion_type=ion_type, 892 adduct_atom=adduct_atom, 893 mspeak_parent=mspeak_parent, 894 external_mz=external_mz, 895 )
11class MolecularFormulaBase(MolecularFormulaCalc): 12 """Base class for representing a molecular formula. 13 14 Parameters 15 ---------- 16 molecular_formula : dict, list, str 17 The molecular formula. 18 ion_charge : int 19 The ion charge. 20 ion_type : str, optional 21 The ion type. Defaults to None. 22 adduct_atom : str, optional 23 The adduct atom. Defaults to None. 24 mspeak_parent : _MSPeak, optional 25 The parent mass spectrum peak object instance. Defaults to None. 26 external_mz : float, optional 27 The external m/z value. Defaults to None. 28 29 Raises 30 ------ 31 TypeError 32 If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'. 33 34 Attributes 35 ---------- 36 isotopologue_count_percentile : float 37 The isotopologue count percentile. 38 O_C : float 39 The O/C ratio. 40 H_C : float 41 The H/C ratio. 42 dbe : float 43 The double bond equivalent. 44 mz_nominal_calc : int 45 The nominal m/z value. 46 mz_error : float 47 The m/z error. 48 mz_calc : float 49 The m/z value. 50 protonated_mz : float 51 The protonated or deprotonated m/z value. 52 radical_mz : float 53 The radical m/z value. 54 neutral_mass : float 55 The neutral mass. 56 ion_type : str 57 The ion type. 58 ion_charge : int 59 The ion charge. 60 atoms : list 61 The atoms in the molecular formula. 62 confidence_score : float 63 The confidence score of the molecular formula identification. 64 isotopologue_similarity : float 65 The isotopologue similarity score of the molecular formula identification. 66 average_mz_error_score : float 67 The average m/z error score of the molecular formula identification, including the isotopologues. 68 mz_error_score : float 69 The m/z error score of the molecular formula identification. 70 kmd : float 71 The Kendrick mass defect (KMD). 72 kendrick_mass : float 73 The Kendrick mass. 74 knm : float 75 The nominal Kendrick mass. 76 string : str 77 The molecular formula string. 78 string_formated : str 79 The molecular formula string formated with subscripts and superscripts. 80 class_label : str 81 The class label. 82 class_dict : dict 83 The class dictionary. 84 85 Methods 86 ------- 87 * change_kendrick_base(kendrick_dict_base). 88 Change the Kendrick base. 89 * isotopologues(min_abundance, current_mono_abundance, dynamic_range). 90 Calculate the isotopologues. 91 * atoms_qnt(atom). 92 Get the atom quantity. 93 * atoms_symbol(atom). 94 Get the atom symbol without the mass number. 95 * to_dict(). 96 Get the molecular formula as a dictionary. 97 * to_list(). 98 Get the molecular formula as a list. 99 """ 100 101 def __init__( 102 self, 103 molecular_formula, 104 ion_charge, 105 ion_type=None, 106 adduct_atom=None, 107 mspeak_parent=None, 108 external_mz=None, 109 ): 110 # clear dictionary of atoms with 0 value 111 if type(molecular_formula) is dict: 112 self._from_dict(molecular_formula, ion_type, adduct_atom) 113 114 elif type(molecular_formula) is list: 115 self._from_list(molecular_formula, ion_type, adduct_atom) 116 117 elif type(molecular_formula) is str: 118 self._from_str(molecular_formula, ion_type, adduct_atom) 119 120 self._ion_charge = ion_charge 121 self._external_mz = external_mz 122 self._confidence_score = None 123 self._isotopologue_similarity = None 124 self._mz_error_score = None 125 self._mass_error_average_score = None 126 127 self.is_isotopologue = False 128 129 # parent mass spectrum peak obj instance 130 self._mspeak_parent = mspeak_parent 131 132 self.expected_isotopologues = [] 133 self.mspeak_mf_isotopologues_indexes = [] 134 135 if self._mspeak_parent: 136 kendrick_dict_base = ( 137 self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base 138 ) 139 else: 140 kendrick_dict_base = {"C": 1, "H": 2} 141 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 142 kendrick_dict_base 143 ) 144 145 def __repr__(self): 146 return "MolecularFormula({0},{1},ion type = {2}".format( 147 self._d_molecular_formula, self.ion_charge, self.ion_type 148 ) 149 150 def __str__(self): 151 return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format( 152 self.string, self.ion_charge, self.ion_type, self.mz_calc 153 ) 154 155 def __len__(self): 156 # crash if keys are not ordered 157 return len(self._d_molecular_formula.keys()) 158 159 def __getitem__(self, atom): 160 # atom = list(self._d_molecular_formula.keys())[position] 161 if atom in self._d_molecular_formula.keys(): 162 return self._d_molecular_formula[atom] 163 else: 164 return 0 165 166 def get(self, atom): 167 """Get the atom quantity of a specific atom. 168 169 Parameters 170 ---------- 171 atom : str 172 The atom symbol. 173 174 Returns 175 ------- 176 int 177 The atom quantity. 178 """ 179 # atom = list(self._d_molecular_formula.keys())[position] 180 if atom in self._d_molecular_formula.keys(): 181 return self._d_molecular_formula[atom] 182 else: 183 return 0 184 185 def _from_dict(self, molecular_formula, ion_type, adduct_atom): 186 self._d_molecular_formula = { 187 key: val for key, val in molecular_formula.items() if val != 0 188 } 189 190 if ion_type is not None: 191 self._d_molecular_formula[Labels.ion_type] = ion_type 192 193 if adduct_atom: 194 if adduct_atom in self._d_molecular_formula: 195 self._d_molecular_formula[adduct_atom] += 1 196 else: 197 self._d_molecular_formula[adduct_atom] = 1 198 self.adduct_atom = adduct_atom 199 200 def _from_list(self, molecular_formula_list, ion_type, adduct_atom): 201 # list has to be in the format 202 # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc] 203 self._d_molecular_formula = {} 204 for each in range(0, len(molecular_formula_list), 2): 205 atoms_label = molecular_formula_list[each] 206 atoms_count = int(molecular_formula_list[each + 1]) 207 208 if atoms_count > 0: 209 self._d_molecular_formula[atoms_label] = int(atoms_count) 210 211 self._d_molecular_formula[Labels.ion_type] = ion_type 212 if adduct_atom: 213 self.adduct_atom = adduct_atom 214 if adduct_atom in self._d_molecular_formula: 215 self._d_molecular_formula[adduct_atom] += 1 216 else: 217 self._d_molecular_formula[adduct_atom] = 1 218 else: 219 self.adduct_atom = None 220 221 def _from_str(self, molecular_formula_str, ion_type, adduct_atom): 222 # string has to be in the format 223 #'C10 H21 13C1 Cl1 37Cl1 etc' 224 # Check if there are spaces in the string 225 if " " not in molecular_formula_str: 226 raise ValueError( 227 "The molecular formula string should have spaces, input: %s" 228 % molecular_formula_str 229 ) 230 231 # Split the string by spaces 232 # Grab the text before a digit for each element after splitting on spaces (atoms) 233 elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()] 234 # Grab the digits at the end of each element after splitting on spaces (counts) 235 counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()] 236 # Check that the number of elements and counts are the same 237 if len(elements) != len(counts): 238 raise ValueError( 239 "The number of elements and counts do not match, input: %s" 240 % molecular_formula_str 241 ) 242 243 # Create a dictionary from the elements and counts and add it to the molecular formula 244 dict_ = dict(zip(elements, counts)) 245 # Cast counts to integers 246 dict_ = {key: int(val) for key, val in dict_.items()} 247 self._from_dict(dict_, ion_type, adduct_atom) 248 249 def split(self, delimiters, string, maxsplit=0): # pragma: no cover 250 """Splits the molecular formula string. 251 252 Parameters 253 ---------- 254 delimiters : list 255 The list of delimiters. 256 string : str 257 The molecular formula string. 258 maxsplit : int, optional 259 The maximum number of splits. Defaults to 0. 260 261 Returns 262 ------- 263 list 264 The molecular formula list. 265 266 Notes 267 ----- 268 Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa. 269 """ 270 regexPattern = "|".join(map(re.escape, delimiters)) # pragma: no cover 271 isotopes = re.findall(regexPattern, string) # pragma: no cover 272 counts = re.split(regexPattern, string, maxsplit) # pragma: no cover 273 274 return [isotopes[0], int(counts[1])] 275 276 @property 277 def isotopologue_count_percentile( 278 self, 279 ): 280 if not len(self.expected_isotopologues) == 0: 281 return ( 282 len(self.mspeak_mf_isotopologues_indexes) 283 / len(self.expected_isotopologues) 284 ) * 100 285 else: 286 return 100 287 288 @property 289 def O_C(self): 290 if "O" in self._d_molecular_formula.keys(): 291 # gather all the Os and Hs, regardless of the isotopic composition 292 Os = sum( 293 [ 294 self._d_molecular_formula.get(key) 295 for key in ["O"] + Atoms.isotopes["O"][1] 296 if key in self._d_molecular_formula.keys() 297 ] 298 ) 299 Cs = sum( 300 [ 301 self._d_molecular_formula.get(key) 302 for key in ["C"] + Atoms.isotopes["C"][1] 303 if key in self._d_molecular_formula.keys() 304 ] 305 ) 306 return Os / Cs 307 else: 308 return 0 309 310 @property 311 def H_C(self): 312 # gather all the Cs and Hs, regardless of the isotopic composition 313 Cs = sum( 314 [ 315 self._d_molecular_formula.get(key) 316 for key in ["C"] + Atoms.isotopes["C"][1] 317 if key in self._d_molecular_formula.keys() 318 ] 319 ) 320 Hs = sum( 321 [ 322 self._d_molecular_formula.get(key) 323 for key in ["H"] + Atoms.isotopes["H"][1] 324 if key in self._d_molecular_formula.keys() 325 ] 326 ) 327 return Hs / Cs 328 329 @property 330 def A_I(self): 331 """Aromaticity index""" 332 return self._calc_aromaticity_index() 333 334 @property 335 def A_I_mod(self): 336 """Modified aromaticity index""" 337 return self._calc_aromaticity_index_mod() 338 339 @property 340 def nosc(self): 341 """Nominal oxidation state of carbon""" 342 return self._calc_nosc() 343 344 @property 345 def dbe(self): 346 return self._calc_dbe() 347 348 @property 349 def mz_nominal_calc(self): 350 return int(self._calc_mz()) 351 352 @property 353 def mz_error(self): 354 return self._calc_assignment_mass_error() 355 356 @property 357 def mz_calc(self): 358 return self._calc_mz() 359 360 @property 361 def protonated_mz(self): 362 return self._protonated_mz(self.ion_charge) 363 364 @property 365 def radical_mz(self): 366 return self._radical_mz(self.ion_charge) 367 368 @property 369 def neutral_mass(self): 370 return self._neutral_mass() 371 372 def adduct_mz(self, adduct_atom): 373 """Get m/z of an adducted ion version of the molecular formula. 374 375 Parameters 376 ---------- 377 adduct_atom : str 378 The adduct atom. 379 380 Returns 381 ------- 382 float 383 The m/z value of the adducted ion version of the molecular formula. 384 """ 385 return self._adduct_mz(adduct_atom, self.ion_charge) 386 387 @property 388 def ion_type(self): 389 ion_type = self._d_molecular_formula.get(Labels.ion_type) 390 if ion_type == Labels.protonated_de_ion: 391 if self.ion_charge > 0: 392 return Labels.protonated 393 else: 394 return Labels.de_protonated 395 else: 396 return ion_type 397 398 @ion_type.setter 399 def ion_type(self, ion_type): 400 if ion_type in [ 401 Labels.protonated_de_ion, 402 Labels.adduct_ion, 403 Labels.radical_ion, 404 ]: 405 self._d_molecular_formula[Labels.ion_type] = ion_type 406 else: 407 raise TypeError( 408 "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT', not %s" 409 % ion_type 410 ) 411 412 @property 413 def ion_charge(self): 414 return self._ion_charge 415 416 @property 417 def atoms(self): 418 """Get the atoms in the molecular formula.""" 419 # if there is an adduct_atom, them reduce it from the atoms list 420 if self.adduct_atom is None: 421 return [ 422 key 423 for key in self._d_molecular_formula.keys() 424 if key != Labels.ion_type 425 ] 426 else: 427 temp_dict = self._d_molecular_formula.copy() 428 temp_dict[self.adduct_atom] -= 1 429 return [ 430 key 431 for key, val in temp_dict.items() 432 if key != Labels.ion_type and val > 0 433 ] 434 435 @property 436 def confidence_score(self): 437 if not self._confidence_score: 438 self._confidence_score = self._calc_confidence_score() 439 440 return self._confidence_score 441 442 @property 443 def isotopologue_similarity(self): 444 if not self._isotopologue_similarity: 445 self._isotopologue_similarity = self._calc_isotopologue_confidence() 446 447 return self._isotopologue_similarity 448 449 @property 450 def average_mz_error_score(self): 451 # includes the isotopologues 452 453 if not self._mass_error_average_score: 454 self._mass_error_average_score = self._calc_average_mz_score() 455 456 return self._mass_error_average_score 457 458 @property 459 def mz_error_score(self): 460 if not self._mz_error_score: 461 self._mz_error_score = self._calc_mz_confidence() 462 463 return self._mz_error_score 464 465 @property 466 def kmd(self): 467 return self._kmd 468 469 @property 470 def kendrick_mass(self): 471 return self._kendrick_mass 472 473 @property 474 def knm(self): 475 return self._nominal_km 476 477 def change_kendrick_base(self, kendrick_dict_base): 478 """Change the Kendrick base. 479 480 Parameters 481 ---------- 482 kendrick_dict_base : dict 483 The Kendrick base dictionary. Ex: {"C": 1, "H": 2} 484 """ 485 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 486 kendrick_dict_base 487 ) 488 489 def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 490 """Calculate the isotopologues for a given molecular formula. 491 492 Parameters 493 ---------- 494 min_abundance : float 495 The minimum abundance. 496 current_mono_abundance : float 497 The current monoisotopic abundance. 498 dynamic_range : float 499 The dynamic range. 500 501 Yields 502 ------ 503 MolecularFormulaIsotopologue 504 The molecular formula isotopologue. 505 506 Notes 507 ----- 508 This calculation ignores the hydrogen isotopes. 509 """ 510 isotopologues = [] 511 for mf in self._cal_isotopologues( 512 self._d_molecular_formula, 513 min_abundance, 514 current_mono_abundance, 515 dynamic_range, 516 ): 517 isotopologues.append(mf) 518 519 # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 520 sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True) 521 522 for mf in sorted_isotopologues: 523 yield MolecularFormulaIsotopologue( 524 *mf, 525 current_mono_abundance, 526 self.ion_charge, 527 ion_type=self.ion_type, 528 adduct_atom=self.adduct_atom, 529 ) 530 531 def atoms_qnt(self, atom): 532 """Get the atom quantity of a specific atom in the molecular formula.""" 533 if atom in self._d_molecular_formula: 534 return self._d_molecular_formula.get(atom) 535 else: 536 raise Warning( 537 "Could not find %s in this Molecular Formula object" % str(atom) 538 ) 539 540 def atoms_symbol(self, atom): 541 """Get the atom symbol without the mass number.""" 542 return "".join([i for i in atom if not i.isdigit()]) 543 544 @property 545 def string(self): 546 """Returns the molecular formula as a string.""" 547 if self._d_molecular_formula: 548 if self.adduct_atom is None: 549 mol_form_dict = self._d_molecular_formula 550 else: 551 mol_form_dict = self._d_molecular_formula.copy() 552 if self.adduct_atom not in mol_form_dict.keys(): 553 raise Exception("Adduct atom not found in molecular formula dict") 554 mol_form_dict[self.adduct_atom] -= 1 555 mol_form_dict = { 556 key: val for key, val in mol_form_dict.items() if val != 0 557 } 558 formula_srt = "" 559 for atom in Atoms.atoms_order: 560 if atom in mol_form_dict.keys(): 561 formula_srt += atom + str(int(mol_form_dict.get(atom))) + " " 562 return formula_srt.strip() 563 564 else: 565 raise Exception("Molecular formula identification not performed yet") 566 567 @property 568 def string_formated(self): 569 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") 570 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") 571 572 if self._d_molecular_formula: 573 formula_srt = "" 574 for atom in Atoms.atoms_order: 575 if atom in self.to_dict().keys(): 576 formula_srt += atom.translate(SUP) + str( 577 int(self.to_dict().get(atom)) 578 ).translate(SUB) 579 return formula_srt 580 581 else: 582 raise Exception("Molecular formula identification not performed yet") 583 584 def to_dict(self): 585 """Returns the molecular formula as a dictionary. 586 587 Returns 588 ------- 589 dict 590 The molecular formula as a dictionary. 591 """ 592 return self._d_molecular_formula 593 594 def to_list(self): 595 """Returns the molecular formula as a list. 596 597 Returns 598 ------- 599 list 600 The molecular formula as a list. 601 602 Raises 603 ------ 604 Exception 605 If the molecular formula identification was not performed yet. 606 """ 607 # TODO ensure self._d_molecular_formula is a orderedDict 608 609 if self._d_molecular_formula: 610 formula_list = [] 611 612 for atom, atom_number in self._d_molecular_formula.items(): 613 if atom != Labels.ion_type: 614 formula_list.append(atom) 615 formula_list.append(atom_number) 616 617 return formula_list 618 else: 619 raise Exception("Molecular formula identification not performed yet") 620 621 @property 622 def class_label(self): 623 if self._d_molecular_formula: 624 formulalist = self.to_list() 625 classstring = "" 626 627 for each in range(0, len(formulalist), 2): 628 if ( 629 formulalist[each] != "C" 630 and formulalist[each] != "H" 631 and formulalist[each] != "HC" 632 ): 633 classstring = ( 634 classstring 635 + str(formulalist[each]) 636 + str(formulalist[each + 1]) 637 + " " 638 ) 639 640 if classstring == "": 641 classstring = "HC" 642 643 classstring = classstring.strip() 644 645 if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion: 646 return classstring + " -R" 647 648 # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion: 649 650 # return classstring + ' -A' 651 652 else: 653 return classstring 654 655 #'dict, tuple or string' 656 657 else: 658 raise Exception("Molecular formula identification not performed yet") 659 660 @property 661 def class_dict(self): 662 if self._d_molecular_formula: 663 class_dict = {} 664 665 for atom, qnt in self._d_molecular_formula.items(): 666 if atom != Labels.ion_type and atom != "C" and atom != "H": 667 class_dict[atom] = qnt 668 669 return class_dict 670 671 raise Exception("Molecular formula identification not performed yet")
Base class for representing a molecular formula.
Parameters
- molecular_formula (dict, list, str): The molecular formula.
- ion_charge (int): The ion charge.
- ion_type (str, optional): The ion type. Defaults to None.
- adduct_atom (str, optional): The adduct atom. Defaults to None.
- mspeak_parent (_MSPeak, optional): The parent mass spectrum peak object instance. Defaults to None.
- external_mz (float, optional): The external m/z value. Defaults to None.
Raises
- TypeError: If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'.
Attributes
- isotopologue_count_percentile (float): The isotopologue count percentile.
- O_C (float): The O/C ratio.
- H_C (float): The H/C ratio.
- dbe (float): The double bond equivalent.
- mz_nominal_calc (int): The nominal m/z value.
- mz_error (float): The m/z error.
- mz_calc (float): The m/z value.
- protonated_mz (float): The protonated or deprotonated m/z value.
- radical_mz (float): The radical m/z value.
- neutral_mass (float): The neutral mass.
- ion_type (str): The ion type.
- ion_charge (int): The ion charge.
- atoms (list): The atoms in the molecular formula.
- confidence_score (float): The confidence score of the molecular formula identification.
- isotopologue_similarity (float): The isotopologue similarity score of the molecular formula identification.
- average_mz_error_score (float): The average m/z error score of the molecular formula identification, including the isotopologues.
- mz_error_score (float): The m/z error score of the molecular formula identification.
- kmd (float): The Kendrick mass defect (KMD).
- kendrick_mass (float): The Kendrick mass.
- knm (float): The nominal Kendrick mass.
- string (str): The molecular formula string.
- string_formated (str): The molecular formula string formated with subscripts and superscripts.
- class_label (str): The class label.
- class_dict (dict): The class dictionary.
Methods
- change_kendrick_base(kendrick_dict_base). Change the Kendrick base.
- isotopologues(min_abundance, current_mono_abundance, dynamic_range). Calculate the isotopologues.
- atoms_qnt(atom). Get the atom quantity.
- atoms_symbol(atom). Get the atom symbol without the mass number.
- to_dict(). Get the molecular formula as a dictionary.
- to_list(). Get the molecular formula as a list.
101 def __init__( 102 self, 103 molecular_formula, 104 ion_charge, 105 ion_type=None, 106 adduct_atom=None, 107 mspeak_parent=None, 108 external_mz=None, 109 ): 110 # clear dictionary of atoms with 0 value 111 if type(molecular_formula) is dict: 112 self._from_dict(molecular_formula, ion_type, adduct_atom) 113 114 elif type(molecular_formula) is list: 115 self._from_list(molecular_formula, ion_type, adduct_atom) 116 117 elif type(molecular_formula) is str: 118 self._from_str(molecular_formula, ion_type, adduct_atom) 119 120 self._ion_charge = ion_charge 121 self._external_mz = external_mz 122 self._confidence_score = None 123 self._isotopologue_similarity = None 124 self._mz_error_score = None 125 self._mass_error_average_score = None 126 127 self.is_isotopologue = False 128 129 # parent mass spectrum peak obj instance 130 self._mspeak_parent = mspeak_parent 131 132 self.expected_isotopologues = [] 133 self.mspeak_mf_isotopologues_indexes = [] 134 135 if self._mspeak_parent: 136 kendrick_dict_base = ( 137 self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base 138 ) 139 else: 140 kendrick_dict_base = {"C": 1, "H": 2} 141 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 142 kendrick_dict_base 143 )
166 def get(self, atom): 167 """Get the atom quantity of a specific atom. 168 169 Parameters 170 ---------- 171 atom : str 172 The atom symbol. 173 174 Returns 175 ------- 176 int 177 The atom quantity. 178 """ 179 # atom = list(self._d_molecular_formula.keys())[position] 180 if atom in self._d_molecular_formula.keys(): 181 return self._d_molecular_formula[atom] 182 else: 183 return 0
Get the atom quantity of a specific atom.
Parameters
- atom (str): The atom symbol.
Returns
- int: The atom quantity.
249 def split(self, delimiters, string, maxsplit=0): # pragma: no cover 250 """Splits the molecular formula string. 251 252 Parameters 253 ---------- 254 delimiters : list 255 The list of delimiters. 256 string : str 257 The molecular formula string. 258 maxsplit : int, optional 259 The maximum number of splits. Defaults to 0. 260 261 Returns 262 ------- 263 list 264 The molecular formula list. 265 266 Notes 267 ----- 268 Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa. 269 """ 270 regexPattern = "|".join(map(re.escape, delimiters)) # pragma: no cover 271 isotopes = re.findall(regexPattern, string) # pragma: no cover 272 counts = re.split(regexPattern, string, maxsplit) # pragma: no cover 273 274 return [isotopes[0], int(counts[1])]
Splits the molecular formula string.
Parameters
- delimiters (list): The list of delimiters.
- string (str): The molecular formula string.
- maxsplit (int, optional): The maximum number of splits. Defaults to 0.
Returns
- list: The molecular formula list.
Notes
Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
372 def adduct_mz(self, adduct_atom): 373 """Get m/z of an adducted ion version of the molecular formula. 374 375 Parameters 376 ---------- 377 adduct_atom : str 378 The adduct atom. 379 380 Returns 381 ------- 382 float 383 The m/z value of the adducted ion version of the molecular formula. 384 """ 385 return self._adduct_mz(adduct_atom, self.ion_charge)
Get m/z of an adducted ion version of the molecular formula.
Parameters
- adduct_atom (str): The adduct atom.
Returns
- float: The m/z value of the adducted ion version of the molecular formula.
477 def change_kendrick_base(self, kendrick_dict_base): 478 """Change the Kendrick base. 479 480 Parameters 481 ---------- 482 kendrick_dict_base : dict 483 The Kendrick base dictionary. Ex: {"C": 1, "H": 2} 484 """ 485 self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd( 486 kendrick_dict_base 487 )
Change the Kendrick base.
Parameters
- kendrick_dict_base (dict): The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
489 def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 490 """Calculate the isotopologues for a given molecular formula. 491 492 Parameters 493 ---------- 494 min_abundance : float 495 The minimum abundance. 496 current_mono_abundance : float 497 The current monoisotopic abundance. 498 dynamic_range : float 499 The dynamic range. 500 501 Yields 502 ------ 503 MolecularFormulaIsotopologue 504 The molecular formula isotopologue. 505 506 Notes 507 ----- 508 This calculation ignores the hydrogen isotopes. 509 """ 510 isotopologues = [] 511 for mf in self._cal_isotopologues( 512 self._d_molecular_formula, 513 min_abundance, 514 current_mono_abundance, 515 dynamic_range, 516 ): 517 isotopologues.append(mf) 518 519 # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 520 sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True) 521 522 for mf in sorted_isotopologues: 523 yield MolecularFormulaIsotopologue( 524 *mf, 525 current_mono_abundance, 526 self.ion_charge, 527 ion_type=self.ion_type, 528 adduct_atom=self.adduct_atom, 529 )
Calculate the isotopologues for a given molecular formula.
Parameters
- min_abundance (float): The minimum abundance.
- current_mono_abundance (float): The current monoisotopic abundance.
- dynamic_range (float): The dynamic range.
Yields
- MolecularFormulaIsotopologue: The molecular formula isotopologue.
Notes
This calculation ignores the hydrogen isotopes.
531 def atoms_qnt(self, atom): 532 """Get the atom quantity of a specific atom in the molecular formula.""" 533 if atom in self._d_molecular_formula: 534 return self._d_molecular_formula.get(atom) 535 else: 536 raise Warning( 537 "Could not find %s in this Molecular Formula object" % str(atom) 538 )
Get the atom quantity of a specific atom in the molecular formula.
540 def atoms_symbol(self, atom): 541 """Get the atom symbol without the mass number.""" 542 return "".join([i for i in atom if not i.isdigit()])
Get the atom symbol without the mass number.
584 def to_dict(self): 585 """Returns the molecular formula as a dictionary. 586 587 Returns 588 ------- 589 dict 590 The molecular formula as a dictionary. 591 """ 592 return self._d_molecular_formula
Returns the molecular formula as a dictionary.
Returns
- dict: The molecular formula as a dictionary.
594 def to_list(self): 595 """Returns the molecular formula as a list. 596 597 Returns 598 ------- 599 list 600 The molecular formula as a list. 601 602 Raises 603 ------ 604 Exception 605 If the molecular formula identification was not performed yet. 606 """ 607 # TODO ensure self._d_molecular_formula is a orderedDict 608 609 if self._d_molecular_formula: 610 formula_list = [] 611 612 for atom, atom_number in self._d_molecular_formula.items(): 613 if atom != Labels.ion_type: 614 formula_list.append(atom) 615 formula_list.append(atom_number) 616 617 return formula_list 618 else: 619 raise Exception("Molecular formula identification not performed yet")
Returns the molecular formula as a list.
Returns
- list: The molecular formula as a list.
Raises
- Exception: If the molecular formula identification was not performed yet.
674class MolecularFormulaIsotopologue(MolecularFormulaBase): 675 """Class for representing a molecular formula isotopologue. 676 677 Parameters 678 ---------- 679 _d_molecular_formula : dict 680 The molecular formula as a dictionary. 681 prob_ratio : float 682 The probability ratio. 683 mono_abundance : float 684 The monoisotopic abundance. 685 ion_charge : int 686 The ion charge. 687 mspeak_parent : object, optional 688 The parent mass spectrum peak object instance. Defaults to None. 689 ion_type : str, optional 690 The ion type. Defaults to None. 691 adduct_atom : str, optional 692 The adduct atom. Defaults to None. 693 694 Attributes 695 ---------- 696 prob_ratio : float 697 The probability ratio. 698 abundance_calc : float 699 The calculated abundance. 700 area_error : float 701 The area error. 702 abundance_error : float 703 The abundance error. 704 is_isotopologue : bool 705 The isotopologue flag. Defaults to True. 706 mspeak_index_mono_isotopic : int 707 The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None. 708 mono_isotopic_formula_index : int 709 The index of the monoisotopic formula in the molecular formula list. Defaults to None. 710 """ 711 712 def __init__( 713 self, 714 _d_molecular_formula, 715 prob_ratio, 716 mono_abundance, 717 ion_charge, 718 mspeak_parent=None, 719 ion_type=None, 720 adduct_atom=None, 721 ): 722 if ion_type is None: 723 # check if ion type or adduct_atom is in the molecular formula dict 724 if Labels.ion_type in _d_molecular_formula: 725 ion_type = _d_molecular_formula.get(Labels.ion_type) 726 else: 727 ion_type = None 728 else: 729 ion_type = Labels.ion_type_translate.get(ion_type) 730 731 if ion_type == Labels.adduct_ion: 732 adduct_atom_int = None 733 if adduct_atom in _d_molecular_formula.keys(): 734 adduct_atom_int = adduct_atom 735 else: 736 # Check to see if adduct_atom should actually be an isotope of the adduct atom 737 for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]: 738 if adduct_iso in _d_molecular_formula.keys(): 739 adduct_atom_int = adduct_iso 740 adduct_atom = adduct_atom_int 741 if adduct_atom is None: 742 raise Exception("adduct_atom is required for adduct ion") 743 _d_molecular_formula[adduct_atom] -= 1 744 _d_molecular_formula = { 745 key: val for key, val in _d_molecular_formula.items() if val != 0 746 } 747 748 super().__init__( 749 molecular_formula=_d_molecular_formula, 750 ion_charge=ion_charge, 751 ion_type=ion_type, 752 adduct_atom=adduct_atom, 753 ) 754 # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic 755 756 self.prob_ratio = prob_ratio 757 758 self.abundance_calc = mono_abundance * prob_ratio 759 760 self.is_isotopologue = True 761 762 self.mspeak_index_mono_isotopic = None 763 764 self.mono_isotopic_formula_index = None 765 # parent mass spectrum peak obj instance 766 self._mspeak_parent = mspeak_parent 767 768 @property 769 def area_error(self): 770 return self._calc_area_error() 771 772 @property 773 def abundance_error(self): 774 return self._calc_abundance_error()
Class for representing a molecular formula isotopologue.
Parameters
- _d_molecular_formula (dict): The molecular formula as a dictionary.
- prob_ratio (float): The probability ratio.
- mono_abundance (float): The monoisotopic abundance.
- ion_charge (int): The ion charge.
- mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
- ion_type (str, optional): The ion type. Defaults to None.
- adduct_atom (str, optional): The adduct atom. Defaults to None.
Attributes
- prob_ratio (float): The probability ratio.
- abundance_calc (float): The calculated abundance.
- area_error (float): The area error.
- abundance_error (float): The abundance error.
- is_isotopologue (bool): The isotopologue flag. Defaults to True.
- mspeak_index_mono_isotopic (int): The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
- mono_isotopic_formula_index (int): The index of the monoisotopic formula in the molecular formula list. Defaults to None.
712 def __init__( 713 self, 714 _d_molecular_formula, 715 prob_ratio, 716 mono_abundance, 717 ion_charge, 718 mspeak_parent=None, 719 ion_type=None, 720 adduct_atom=None, 721 ): 722 if ion_type is None: 723 # check if ion type or adduct_atom is in the molecular formula dict 724 if Labels.ion_type in _d_molecular_formula: 725 ion_type = _d_molecular_formula.get(Labels.ion_type) 726 else: 727 ion_type = None 728 else: 729 ion_type = Labels.ion_type_translate.get(ion_type) 730 731 if ion_type == Labels.adduct_ion: 732 adduct_atom_int = None 733 if adduct_atom in _d_molecular_formula.keys(): 734 adduct_atom_int = adduct_atom 735 else: 736 # Check to see if adduct_atom should actually be an isotope of the adduct atom 737 for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]: 738 if adduct_iso in _d_molecular_formula.keys(): 739 adduct_atom_int = adduct_iso 740 adduct_atom = adduct_atom_int 741 if adduct_atom is None: 742 raise Exception("adduct_atom is required for adduct ion") 743 _d_molecular_formula[adduct_atom] -= 1 744 _d_molecular_formula = { 745 key: val for key, val in _d_molecular_formula.items() if val != 0 746 } 747 748 super().__init__( 749 molecular_formula=_d_molecular_formula, 750 ion_charge=ion_charge, 751 ion_type=ion_type, 752 adduct_atom=adduct_atom, 753 ) 754 # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic 755 756 self.prob_ratio = prob_ratio 757 758 self.abundance_calc = mono_abundance * prob_ratio 759 760 self.is_isotopologue = True 761 762 self.mspeak_index_mono_isotopic = None 763 764 self.mono_isotopic_formula_index = None 765 # parent mass spectrum peak obj instance 766 self._mspeak_parent = mspeak_parent
Inherited Members
- MolecularFormulaBase
- expected_isotopologues
- mspeak_mf_isotopologues_indexes
- get
- split
- isotopologue_count_percentile
- O_C
- H_C
- A_I
- A_I_mod
- nosc
- dbe
- mz_nominal_calc
- mz_error
- mz_calc
- protonated_mz
- radical_mz
- neutral_mass
- adduct_mz
- ion_type
- ion_charge
- atoms
- confidence_score
- isotopologue_similarity
- average_mz_error_score
- mz_error_score
- kmd
- kendrick_mass
- knm
- change_kendrick_base
- isotopologues
- atoms_qnt
- atoms_symbol
- string
- string_formated
- to_dict
- to_list
- class_label
- class_dict
777class LCMSLibRefMolecularFormula(MolecularFormulaBase): 778 """Class for representing a molecular formula associated with a molecule in a LCMS library reference. 779 780 Parameters 781 ---------- 782 molecular_formula : dict, list, str 783 The molecular formula. 784 ion_charge : int 785 The ion charge. 786 ion_type : str, optional 787 The ion type. Defaults to None. 788 adduct_atom : str, optional 789 The adduct atom. Defaults to None. 790 mspeak_parent : object, optional 791 The parent mass spectrum peak object instance. Defaults to None. 792 name : str, optional 793 The name of the reference molecule. Defaults to None. 794 kegg_id : str, optional 795 The KEGG ID of the reference molecule. Defaults to None. 796 cas : str, optional 797 The CAS number of the reference molecule. Defaults to None. 798 799 """ 800 801 def __init__( 802 self, 803 molecular_formula, 804 ion_charge, 805 ion_type=None, 806 adduct_atom=None, 807 mspeak_parent=None, 808 name=None, 809 kegg_id=None, 810 cas=None, 811 ) -> None: 812 super().__init__( 813 molecular_formula, 814 ion_charge, 815 ion_type=ion_type, 816 adduct_atom=adduct_atom, 817 mspeak_parent=mspeak_parent, 818 ) 819 820 self._name = name 821 self._kegg_id = kegg_id 822 self._cas = cas 823 824 @property 825 def name(self): 826 return self._name 827 828 @name.setter 829 def name(self, name): 830 if isinstance(name, str): 831 self._name = name 832 else: 833 raise TypeError("name: {} should be type string") 834 835 @property 836 def kegg_id(self): 837 return self._kegg_id 838 839 @kegg_id.setter 840 def kegg_id(self, kegg_id): 841 self._kegg_id = kegg_id 842 # if isinstance(kegg_id, str): 843 # self._kegg_id = kegg_id 844 # else: 845 # print(kegg_id) 846 # raise TypeError('name: {} should be type string') 847 848 @property 849 def cas(self): 850 return self._cas 851 852 @cas.setter 853 def cas(self, cas): 854 self._cas = cas 855 # if isinstance(cas, str): 856 # self._cas = cas 857 # else: 858 # raise TypeError('name: {} should be type string')
Class for representing a molecular formula associated with a molecule in a LCMS library reference.
Parameters
- molecular_formula (dict, list, str): The molecular formula.
- ion_charge (int): The ion charge.
- ion_type (str, optional): The ion type. Defaults to None.
- adduct_atom (str, optional): The adduct atom. Defaults to None.
- mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
- name (str, optional): The name of the reference molecule. Defaults to None.
- kegg_id (str, optional): The KEGG ID of the reference molecule. Defaults to None.
- cas (str, optional): The CAS number of the reference molecule. Defaults to None.
801 def __init__( 802 self, 803 molecular_formula, 804 ion_charge, 805 ion_type=None, 806 adduct_atom=None, 807 mspeak_parent=None, 808 name=None, 809 kegg_id=None, 810 cas=None, 811 ) -> None: 812 super().__init__( 813 molecular_formula, 814 ion_charge, 815 ion_type=ion_type, 816 adduct_atom=adduct_atom, 817 mspeak_parent=mspeak_parent, 818 ) 819 820 self._name = name 821 self._kegg_id = kegg_id 822 self._cas = cas
Inherited Members
- MolecularFormulaBase
- is_isotopologue
- expected_isotopologues
- mspeak_mf_isotopologues_indexes
- get
- split
- isotopologue_count_percentile
- O_C
- H_C
- A_I
- A_I_mod
- nosc
- dbe
- mz_nominal_calc
- mz_error
- mz_calc
- protonated_mz
- radical_mz
- neutral_mass
- adduct_mz
- ion_type
- ion_charge
- atoms
- confidence_score
- isotopologue_similarity
- average_mz_error_score
- mz_error_score
- kmd
- kendrick_mass
- knm
- change_kendrick_base
- isotopologues
- atoms_qnt
- atoms_symbol
- string
- string_formated
- to_dict
- to_list
- class_label
- class_dict
861class MolecularFormula(MolecularFormulaBase): 862 """General class for representing a molecular formula. 863 864 Parameters 865 ---------- 866 molecular_formula : dict, list, str 867 The molecular formula. 868 ion_charge : int 869 The ion charge. 870 ion_type : str, optional 871 The ion type. Defaults to None. 872 adduct_atom : str, optional 873 The adduct atom. Defaults to None. 874 mspeak_parent : object, optional 875 The parent mass spectrum peak object instance. Defaults to None. 876 external_mz : float, optional 877 The external m/z value. Defaults to False. 878 """ 879 880 def __init__( 881 self, 882 molecular_formula, 883 ion_charge, 884 ion_type=None, 885 adduct_atom=None, 886 mspeak_parent=None, 887 external_mz=False, 888 ): 889 super().__init__( 890 molecular_formula, 891 ion_charge, 892 ion_type=ion_type, 893 adduct_atom=adduct_atom, 894 mspeak_parent=mspeak_parent, 895 external_mz=external_mz, 896 )
General class for representing a molecular formula.
Parameters
- molecular_formula (dict, list, str): The molecular formula.
- ion_charge (int): The ion charge.
- ion_type (str, optional): The ion type. Defaults to None.
- adduct_atom (str, optional): The adduct atom. Defaults to None.
- mspeak_parent (object, optional): The parent mass spectrum peak object instance. Defaults to None.
- external_mz (float, optional): The external m/z value. Defaults to False.
880 def __init__( 881 self, 882 molecular_formula, 883 ion_charge, 884 ion_type=None, 885 adduct_atom=None, 886 mspeak_parent=None, 887 external_mz=False, 888 ): 889 super().__init__( 890 molecular_formula, 891 ion_charge, 892 ion_type=ion_type, 893 adduct_atom=adduct_atom, 894 mspeak_parent=mspeak_parent, 895 external_mz=external_mz, 896 )
Inherited Members
- MolecularFormulaBase
- is_isotopologue
- expected_isotopologues
- mspeak_mf_isotopologues_indexes
- get
- split
- isotopologue_count_percentile
- O_C
- H_C
- A_I
- A_I_mod
- nosc
- dbe
- mz_nominal_calc
- mz_error
- mz_calc
- protonated_mz
- radical_mz
- neutral_mass
- adduct_mz
- ion_type
- ion_charge
- atoms
- confidence_score
- isotopologue_similarity
- average_mz_error_score
- mz_error_score
- kmd
- kendrick_mass
- knm
- change_kendrick_base
- isotopologues
- atoms_qnt
- atoms_symbol
- string
- string_formated
- to_dict
- to_list
- class_label
- class_dict