corems.molecular_formula.calc.MolecularFormulaCalc
1__author__ = "Yuri E. Corilo" 2__date__ = "Jun 24, 2019" 3 4 5import warnings 6 7import IsoSpecPy 8from numpy import array, exp, isnan, nextafter, power 9 10# this is to handle both versions of IsoSpecPy, 2.0.2 and 2.2.2 11# TODO in a future release remove support for legacy isospecpy 12from packaging import version 13 14from corems.encapsulation.constant import Atoms, Labels 15from corems.encapsulation.factory.parameters import MSParameters 16from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity 17 18isospec_version = IsoSpecPy.__version__ 19if version.parse(isospec_version) > version.parse("2.0.2"): 20 legacy_isospec = False 21else: 22 legacy_isospec = True 23if legacy_isospec: 24 from IsoSpecPy import IsoSpecPy 25 26 warnings.warn( 27 f"IsoSpecPy version {isospec_version} is installed, and support is deprecated. Please update to 2.2.2", 28 DeprecationWarning, 29 ) 30 31 32class MolecularFormulaCalc: 33 """Class of calculations related to molecular formula 34 35 This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula 36 37 Attributes 38 ---------- 39 mz_calc : float 40 The m/z value of the molecular formula. 41 neutral_mass : float 42 The neutral mass of the molecular formula. 43 ion_charge : int 44 The ion charge of the molecular formula. 45 _external_mz : float 46 The externally provided m/z value of the molecular formula. 47 _d_molecular_formula : dict 48 The dictionary representation of the molecular formula. 49 _mspeak_parent : object 50 The parent MS peak object associated with the molecular formula. 51 _assignment_mass_error : float 52 The mass error of the molecular formula. 53 54 Methods 55 ------- 56 * _calc_resolving_power_low_pressure(B, T) 57 Calculate the resolving power at low pressure. 58 * _calc_resolving_power_high_pressure(B, T) 59 Calculate the resolving power at high pressure. 60 * _adduct_mz(adduct_atom, ion_charge) 61 Get the m/z value of an adducted ion version of the molecular formula. 62 * _protonated_mz(ion_charge) 63 Get the m/z value of a protonated or deprotonated ion version of the molecular formula. 64 * _radical_mz(ion_charge) 65 Get the m/z value of a radical ion version of the molecular formula. 66 * _neutral_mass() 67 Get the neutral mass of the molecular formula. 68 * _calc_mz() 69 Get the m/z value of the molecular formula. 70 * _calc_assignment_mass_error(method='ppm') 71 Calculate the mass error of the molecular formula. 72 * _calc_mz_confidence(mean=0) 73 Calculate the m/z confidence of the molecular formula. 74 * _calc_isotopologue_confidence() 75 Calculate the isotopologue confidence of the molecular formula. 76 * normalize_distance(dist, dist_range) 77 Normalize the distance value. 78 * subtract_formula(formula_obj, formated=True) 79 Subtract a formula from the current formula object. 80 * _calc_average_mz_score() 81 Calculate the average m/z error score of the molecular formula identification, including the isotopologues. 82 """ 83 84 def _calc_resolving_power_low_pressure(self, B, T): 85 """ 86 Calculate the resolving power at low pressure. 87 88 Parameters 89 ---------- 90 B : float 91 Magnetic Strength (Testa). 92 T : float 93 Transient time (seconds). 94 95 """ 96 return (1.274 * 10000000 * B * T) * (1 / self.mz_calc) 97 98 def _calc_resolving_power_high_pressure(self, B, T): 99 """ 100 Calculate the resolving power at high pressure. 101 102 Parameters 103 ---------- 104 B : float 105 Magnetic Strength (Testa). 106 T : float 107 Transient time (seconds). 108 109 """ 110 return (2.758 * 10000000 * B * T) * (1 / self.mz_calc) 111 112 def _adduct_mz(self, adduct_atom, ion_charge): 113 """Get the m/z value of an adducted ion version of the molecular formula. 114 115 Parameters 116 ---------- 117 adduct_atom : str 118 The adduct atom. 119 ion_charge : int 120 The ion charge. 121 122 """ 123 return ( 124 self.neutral_mass 125 + (Atoms.atomic_masses.get(adduct_atom)) 126 + (ion_charge * -1 * Atoms.electron_mass) 127 ) / abs(ion_charge) 128 129 def _protonated_mz(self, ion_charge): 130 """Get the m/z value of a protonated or deprotonated ion version of the molecular formula. 131 132 Parameters 133 ---------- 134 ion_charge : int 135 The ion charge. 136 """ 137 return ( 138 self.neutral_mass 139 + (ion_charge * Atoms.atomic_masses.get("H")) 140 + (ion_charge * -1 * Atoms.electron_mass) 141 ) / abs(ion_charge) 142 143 def _radical_mz(self, ion_charge): 144 """Get the m/z value of a radical ion version of the molecular formula. 145 146 Parameters 147 ---------- 148 ion_charge : int 149 The ion charge. 150 """ 151 return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs( 152 ion_charge 153 ) 154 155 def _neutral_mass(self): 156 """Get the neutral mass of the molecular formula.""" 157 158 mass = 0 159 160 for each_atom in self._d_molecular_formula.keys(): 161 if each_atom != Labels.ion_type and each_atom != "HC": 162 try: 163 mass = mass + Atoms.atomic_masses[ 164 each_atom 165 ] * self._d_molecular_formula.get(each_atom) 166 167 except: 168 print(Labels.ion_type, each_atom) 169 170 return mass 171 172 def _calc_mz(self): 173 """Get the m/z value of the molecular formula, based on the ion charge and ion type.""" 174 175 if self.ion_charge is not None: 176 if self._external_mz: 177 return self._external_mz 178 179 else: 180 ion_type = self._d_molecular_formula.get(Labels.ion_type) 181 182 if ion_type == Labels.protonated_de_ion: 183 return self.protonated_mz 184 185 elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion: 186 return self.radical_mz 187 188 elif ion_type == Labels.neutral: 189 return self.neutral_mass 190 191 elif self.ion_charge == 0: 192 return self.neutral_mass 193 194 else: 195 # formula is probably ion form used for bruker ref list 196 return self.neutral_mass 197 198 else: 199 raise Exception("Please set ion charge first") 200 201 def _calc_assignment_mass_error(self, method="ppm"): 202 """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z. 203 204 Parameters 205 ---------- 206 method : str, optional 207 The method to calculate the mass error, by default 'ppm', but can be 'ppb' 208 209 Raises 210 ------ 211 Exception 212 If the method is not 'ppm' or 'ppb'. 213 Exception 214 If there is no ms peak associated with the molecular formula instance. 215 """ 216 217 if method == "ppm": 218 multi_factor = 1000000 219 220 elif method == "ppb": 221 multi_factor = 1000000 222 223 else: 224 raise Exception( 225 "method needs to be ppm or ppb, you have entered %s" % method 226 ) 227 228 if self._mspeak_parent.mz_exp: 229 self._assignment_mass_error = ( 230 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc 231 ) * multi_factor 232 233 return ( 234 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc 235 ) * multi_factor 236 237 else: 238 raise Exception( 239 "No ms peak associated with the molecular formula instance %s", self 240 ) 241 242 def _calc_mz_confidence(self, mean=0): 243 """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z. 244 245 Parameters 246 ---------- 247 mean : int, optional 248 The mean of the m/z error, by default 0 249 250 """ 251 252 # predicted std not set, using 0.3 253 if not self._mspeak_parent.predicted_std: 254 self._mspeak_parent.predicted_std = 1.66 255 256 # print( self._mspeak_parent.predicted_std) 257 258 return exp( 259 -1 260 * ( 261 power((self.mz_error - mean), 2) 262 / (2 * power(self._mspeak_parent.predicted_std, 2)) 263 ) 264 ) 265 266 def _calc_isotopologue_confidence(self): 267 """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity. 268 269 Returns 270 ------- 271 float 272 The isotopologue confidence of the molecular formula. 273 """ 274 275 if self.is_isotopologue: 276 # confidence of isotopologue is pure mz error 277 # TODO add more features here 278 279 mformula_index = self.mono_isotopic_formula_index 280 mspeak_index = self.mspeak_index_mono_isotopic 281 282 mspeak = self._mspeak_parent._ms_parent[mspeak_index] 283 284 expected_isotopologues = mspeak[mformula_index].expected_isotopologues 285 286 mono_mz = mspeak[mformula_index].mz_calc 287 mono_abundance = mspeak.abundance 288 289 else: 290 mono_mz = self.mz_calc 291 mono_abundance = self._mspeak_parent.abundance 292 293 expected_isotopologues = self.expected_isotopologues 294 # has isotopologues based on current dinamic range 295 296 if expected_isotopologues: 297 dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]} 298 299 # get reference data 300 for mf in expected_isotopologues: 301 dict_mz_abund_ref["abundance"].append(mf.abundance_calc) 302 dict_mz_abund_ref["mz"].append(mf.mz_calc) 303 304 dict_mz_abund_exp = {mono_mz: mono_abundance} 305 306 # get experimental data 307 for mf in expected_isotopologues: 308 # molecular formula has been assigned to a peak 309 if mf._mspeak_parent: 310 # stores mspeak abundance 311 dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance 312 313 else: 314 # fill missing mz with abundance 0 and mz error score of 0 315 dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1) 316 317 distance = SpectralSimilarity( 318 dict_mz_abund_exp, dict_mz_abund_ref 319 ).manhattan_distance() 320 correlation = 1 - self.normalize_distance(distance, [0, 2]) 321 # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref) 322 # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref) 323 324 if correlation == 1: 325 print(dict_mz_abund_exp, dict_mz_abund_ref) 326 if isnan(correlation): 327 # print(dict_mz_abund_exp, dict_mz_abund_ref) 328 correlation = 0.00001 329 330 else: 331 # no isotopologue expected giving a correlation score of 0.0 but it needs optimization 332 correlation = 0.0 333 334 return correlation 335 336 def normalize_distance(self, dist, dist_range): 337 """ 338 Normalize the distance value. 339 340 Parameters 341 ---------- 342 dist : float 343 The distance value to be normalized. 344 dist_range : list 345 The range of the distance value. 346 347 """ 348 result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0]) 349 350 if result < 0: 351 result = 0.0 352 elif result > 1: 353 result = 1.0 354 355 return result 356 357 def subtract_formula(self, formula_obj, formated=True): 358 """Subtract a formula from the current formula object 359 360 Parameters 361 ---------- 362 formula_obj : MolecularFormula 363 MolecularFormula object to be subtracted from the current formula object 364 formated : bool, optional 365 If True, returns the formula in string format, by default True 366 367 """ 368 subtraction = {} 369 for atom, value in self.to_dict().items(): 370 if atom != Labels.ion_type: 371 if formula_obj.get(atom): 372 # value_subtraction = value - formula_obj.get(atom) 373 if value - formula_obj.get(atom) > 0: 374 subtraction[atom] = value - formula_obj.get(atom) 375 else: 376 subtraction[atom] = value 377 if formated: 378 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") 379 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") 380 else: 381 SUB = str.maketrans("0123456789", "0123456789") 382 SUP = str.maketrans("0123456789", "0123456789") 383 formula_srt = "" 384 for atom in Atoms.atoms_order: 385 if atom in subtraction.keys(): 386 formula_srt += atom.translate(SUP) + str( 387 int(subtraction.get(atom)) 388 ).translate(SUB) 389 390 return formula_srt 391 392 def _calc_average_mz_score(self): 393 """Calculate the average m/z error score of the molecular formula identification, including the isotopologues.""" 394 if self.is_isotopologue: 395 # confidence of isotopologue is pure mz error 396 # TODO add more features here 397 398 mformula_index = self.mono_isotopic_formula_index 399 mspeak_index = self.mspeak_index_mono_isotopic 400 401 mspeak = self._mspeak_parent._ms_parent[mspeak_index] 402 403 expected_isotopologues = mspeak[mformula_index].expected_isotopologues 404 405 else: 406 expected_isotopologues = self.expected_isotopologues 407 # has isotopologues based on current dinamic range 408 409 accumulated_mz_score = [self.mz_error_score] 410 411 if expected_isotopologues: 412 for mf in expected_isotopologues: 413 # molecular formula has been assigned to a peak 414 if mf._mspeak_parent: 415 # stores mspeak abundance 416 accumulated_mz_score.append(mf.mz_error_score) 417 else: 418 # fill missing mz with abundance 0 and mz error score of 0 419 accumulated_mz_score.append(0.0) 420 421 average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score) 422 423 if isnan(average_mz_score): 424 average_mz_score = 0.0 425 426 return average_mz_score 427 428 def _calc_confidence_score(self): 429 """Calculate the confidence score of the molecular formula identification, including the isotopologues.""" 430 431 ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean 432 #### TODO: Add spectral similarity 433 434 ## Parameters 435 # ---------- 436 #### mz_exp: 437 #### Experimental m/z 438 #### predicted_std: 439 #### Standart deviation calculated from Resolving power optimization or constant set by User 440 441 isotopologue_correlation = self.isotopologue_similarity 442 average_mz_score = self.average_mz_error_score 443 # add monoisotopic peak mz error score 444 445 # calculate score with higher weight for mass error 446 # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4) 447 a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight 448 b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight 449 450 score = (isotopologue_correlation * b) + (average_mz_score * a) 451 452 # if round(average_mz_score,2) == 0.00: 453 # print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b) 454 455 return score 456 457 def _calc_abundance_error(self, method="percentile"): 458 """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance. 459 460 Parameters 461 ---------- 462 method : str, optional 463 The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb' 464 465 Returns 466 ------- 467 float 468 The abundance error of the molecular formula. 469 470 Raises 471 ------ 472 Exception 473 If isotopologues were not calculated. 474 """ 475 476 mult_factor = 100 477 478 iso_abundance = self._mspeak_parent.abundance 479 mono_abundance = self._mspeak_parent._ms_parent[ 480 self.mspeak_index_mono_isotopic 481 ].abundance 482 483 if self.prob_ratio: 484 theor_abundance = mono_abundance * self.prob_ratio 485 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class 486 return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor 487 488 else: 489 raise Exception("Please calc_isotopologues") 490 491 def _calc_area_error(self, method="percentile"): 492 """Calculate the area error of the molecular formula, based on the experimental area and the calculated area. 493 494 Parameters 495 ---------- 496 method : str, optional 497 The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb' 498 499 Returns 500 ------- 501 float 502 The area error of the molecular formula. 503 504 Raises 505 ------ 506 Exception 507 If isotopologues were not calculated. 508 """ 509 510 mult_factor = 100 511 512 iso_area = self._mspeak_parent.area 513 mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area 514 515 if self.prob_ratio: 516 if mono_area and iso_area: 517 # exp_ratio = iso_area/mono_area 518 519 area_calc = mono_area * self.prob_ratio 520 521 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class 522 return ((area_calc - iso_area) / area_calc) * mult_factor 523 # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor 524 525 else: 526 # centroid mass spectrum 527 return 0 528 else: 529 raise Exception("Please calc_isotopologues") 530 531 def _calc_aromaticity_index_mod(self): 532 """Calculate the modified aromaticity index of the molecular formula. 533 534 Returns 535 ------- 536 float 537 The aromaticity index of the molecular formula. 538 539 Notes 540 ----- 541 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 542 corrected in https://doi.org/10.1002/rcm.7433 543 """ 544 # Prepare empty dictionary to store the number of atoms of each element 545 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} 546 547 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 548 for element in ai_es: 549 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 550 for element_w_iso in elements_w_iso: 551 if element_w_iso in self._d_molecular_formula: 552 ai_es[element] += self._d_molecular_formula[element_w_iso] 553 554 ai_n = ( 555 1 556 + ai_es["C"] 557 - (0.5 * ai_es["O"]) 558 - ai_es["S"] 559 - (0.5 * (ai_es["N"] + ai_es["H"])) 560 ) 561 ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"] 562 563 ai = ai_n / ai_d 564 565 if ai < 0: 566 ai = 0 567 if ai > 1: 568 ai = 1 569 570 return ai 571 572 def _calc_aromaticity_index(self): 573 """Calculate the aromaticity index of the molecular formula. 574 575 Returns 576 ------- 577 float 578 The aromaticity index of the molecular formula. 579 580 Notes 581 ----- 582 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 583 corrected in https://doi.org/10.1002/rcm.7433 584 """ 585 # Prepare empty dictionary to store the number of atoms of each element 586 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} 587 588 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 589 for element in ai_es: 590 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 591 for element_w_iso in elements_w_iso: 592 if element_w_iso in self._d_molecular_formula: 593 ai_es[element] += self._d_molecular_formula[element_w_iso] 594 595 ai_n = ( 596 1 597 + ai_es["C"] 598 - (ai_es["O"]) 599 - ai_es["S"] 600 - (0.5 * (ai_es["N"] + ai_es["H"])) 601 ) 602 ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"] 603 604 ai = ai_n / ai_d 605 606 if ai < 0: 607 ai = 0 608 if ai > 1: 609 ai = 1 610 611 return ai 612 613 def _calc_nosc(self): 614 """Calculate the average nominal oxidation state of carbon 615 616 Returns 617 ------- 618 float 619 The average nominal oxidation state of carbon 620 621 Notes 622 ----- 623 Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020 624 """ 625 # Prepare empty dictionary to store the number of atoms of each element 626 nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0} 627 628 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 629 for element in nosc_es: 630 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 631 for element_w_iso in elements_w_iso: 632 if element_w_iso in self._d_molecular_formula: 633 nosc_es[element] += self._d_molecular_formula[element_w_iso] 634 635 nosc = ( 636 -( 637 ( 638 4 * nosc_es["C"] 639 + nosc_es["H"] 640 - 3 * nosc_es["N"] 641 - 2 * nosc_es["O"] 642 + 5 * nosc_es["P"] 643 - 2 * nosc_es["S"] 644 ) 645 / nosc_es["C"] 646 ) 647 + 4 648 ) 649 650 # If nosc is infinite or negative infinity, set it to nan 651 if nosc == float("inf") or nosc == float("-inf"): 652 nosc = float("nan") 653 654 return nosc 655 656 @property 657 def dbe_ai(self): 658 """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.""" 659 660 carbons = self._d_molecular_formula.get("C") 661 hydrogens = self._d_molecular_formula.get("H") 662 oxygens = self._d_molecular_formula.get("O") 663 return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5) 664 665 def _calc_dbe(self): 666 """Calculate the double bond equivalent (DBE) of the molecular formula""" 667 668 individual_dbe = 0 669 670 for atom in self._d_molecular_formula.keys(): 671 if atom != Labels.ion_type: 672 n_atom = int(self._d_molecular_formula.get(atom)) 673 674 clean_atom = "".join([i for i in atom if not i.isdigit()]) 675 676 if self._mspeak_parent: 677 valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get( 678 clean_atom 679 ) 680 else: 681 valencia = MSParameters.molecular_search.used_atom_valences.get( 682 clean_atom 683 ) 684 # valencia = Atoms.atoms_covalence.get(atom) 685 686 if type(valencia) is tuple: 687 valencia = valencia[0] 688 if valencia > 0: 689 # print atom, valencia, n_atom, individual_dbe 690 individual_dbe = individual_dbe + (n_atom * (valencia - 2)) 691 else: 692 continue 693 694 dbe = 1 + (0.5 * individual_dbe) 695 696 if self.ion_type == Labels.adduct_ion: 697 dbe = dbe + 0.5 698 699 return dbe 700 701 def _calc_kmd(self, dict_base): 702 """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass. 703 704 Parameters 705 ---------- 706 dict_base : dict 707 The dictionary of the base formula, e.g. {'C':1, 'H':2} 708 709 Returns 710 ------- 711 tuple 712 The tuple of the KMD, Kendrick mass, and nominal Kendrick mass. 713 """ 714 mass = 0 715 for atom in dict_base.keys(): 716 mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom) 717 718 kendrick_mass = (int(mass) / mass) * self.mz_calc 719 720 nominal_km = int(kendrick_mass) 721 722 kmd = (nominal_km - kendrick_mass) * 100 723 724 # kmd = (nominal_km - km) * 1 725 kmd = round(kmd, 0) 726 727 return kmd, kendrick_mass, nominal_km 728 729 def _cal_isotopologues( 730 self, formula_dict, min_abundance, current_abundance, ms_dynamic_range 731 ): 732 """Calculate the isotopologues for a given molecular formula. 733 734 Parameters 735 ---------- 736 formula_dict : dict 737 The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2} 738 min_abundance : float 739 The minimum abundance. 740 current_abundance : float 741 The current monoisotopic abundance. 742 ms_dynamic_range : float 743 The dynamic range. 744 745 746 Notes 747 ----- 748 This is the primary function to look for isotopologues based on a monoisotopic molecular formula. 749 It needs to be expanded to include the calculation of resolving power and plot the results. 750 Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion. 751 Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational). 752 It might break when adding non-conventional atoms (not yet tested). 753 This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec. 754 755 756 """ 757 758 # last update on 05-26-2020, Yuri E. Corilo 759 760 # updated it to reflect min possible mass peak abundance 761 cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range) 762 763 # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range) 764 # print(cut_off_to_IsoSpeccPy) 765 atoms_labels = ( 766 atom 767 for atom in formula_dict.keys() 768 if atom != Labels.ion_type and atom != "H" 769 ) 770 771 atoms_count = [] 772 masses_list_tuples = [] 773 props_list_tuples = [] 774 all_atoms_list = [] 775 776 for atom_label in atoms_labels: 777 if Atoms.isotopes.get(atom_label)[1][0] is None: 778 "This atom_label has no heavy isotope" 779 atoms_count.append(formula_dict.get(atom_label)) 780 mass = Atoms.atomic_masses.get(atom_label) 781 prop = Atoms.isotopic_abundance.get(atom_label) 782 masses_list_tuples.append([mass]) 783 props_list_tuples.append([prop]) 784 all_atoms_list.append(atom_label) 785 786 else: 787 isotopes_label_list = Atoms.isotopes.get(atom_label)[1] 788 789 if len(isotopes_label_list) > 1: 790 "This atom_label has two or more heavy isotope" 791 isotopos_labels = [i for i in isotopes_label_list] 792 else: 793 "This atom_label only has one heavy isotope" 794 isotopos_labels = [isotopes_label_list[0]] 795 796 # all_atoms_list.extend(isotopos_labels) 797 isotopos_labels = [atom_label] + isotopos_labels 798 799 all_atoms_list.extend(isotopos_labels) 800 801 masses = [ 802 Atoms.atomic_masses.get(atom_label) 803 for atom_label in isotopos_labels 804 ] 805 props = [ 806 Atoms.isotopic_abundance.get(atom_label) 807 for atom_label in isotopos_labels 808 ] 809 810 atoms_count.append(formula_dict.get(atom_label)) 811 masses_list_tuples.append(masses) 812 props_list_tuples.append(props) 813 if legacy_isospec: 814 iso = IsoSpecPy.IsoSpec( 815 atoms_count, 816 masses_list_tuples, 817 props_list_tuples, 818 cut_off_to_IsoSpeccPy, 819 ) 820 conf = iso.getConfs() 821 masses = conf[0] 822 probs = exp(conf[1]) 823 molecular_formulas = conf[2] 824 # print('conf', conf) 825 # print('probs', conf[1]) 826 else: 827 # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach 828 iso = IsoSpecPy.IsoTotalProb( 829 atomCounts=atoms_count, 830 isotopeMasses=masses_list_tuples, 831 isotopeProbabilities=props_list_tuples, 832 prob_to_cover=cut_off_to_IsoSpeccPy, 833 get_confs=True, 834 ) 835 masses = list(iso.masses) 836 probs = array(list(iso.probs)) 837 confs = list(iso.confs) 838 839 molecular_formulas = [] 840 for x in confs: 841 tmplist = [] 842 for y in x: 843 tmplist.extend(list(y)) 844 molecular_formulas.append(tmplist) 845 846 new_formulas = [] 847 848 for isotopologue_index in range(len(iso)): 849 # skip_mono_isotopic 850 851 formula_list = molecular_formulas[isotopologue_index] 852 new_formula_dict = dict(zip(all_atoms_list, formula_list)) 853 new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type) 854 if formula_dict.get("H"): 855 new_formula_dict["H"] = formula_dict.get("H") 856 857 new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0}) 858 859 # formula_dict in new_formulas check if monoisotopic is being returned 860 if new_formulas: # and formula_dict in new_formulas: 861 # print(conf) 862 # print(new_formulas) 863 # print(atoms_count) 864 # print(all_atoms_list) 865 # print(masses_list_tuples) 866 # print(props_list_tuples) 867 # find where monoisotopic is 868 index_mono = new_formulas.index(formula_dict) 869 # calculate ratio iso/mono 870 probs = list(probs / probs[index_mono]) 871 872 # delete the monoisotopic 873 del probs[index_mono] 874 del new_formulas[index_mono] 875 876 # print('probs_exp', probs) 877 for formulas, prob in zip(new_formulas, probs): 878 theor_abundance = current_abundance * prob 879 if theor_abundance > min_abundance: 880 # print(prob, theor_abundance, current_abundance) 881 yield (formulas, prob) 882 # return zip(new_formulas, probs ) 883 884 # else: 885 # return []
isospec_version =
'2.2.2'
class
MolecularFormulaCalc:
33class MolecularFormulaCalc: 34 """Class of calculations related to molecular formula 35 36 This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula 37 38 Attributes 39 ---------- 40 mz_calc : float 41 The m/z value of the molecular formula. 42 neutral_mass : float 43 The neutral mass of the molecular formula. 44 ion_charge : int 45 The ion charge of the molecular formula. 46 _external_mz : float 47 The externally provided m/z value of the molecular formula. 48 _d_molecular_formula : dict 49 The dictionary representation of the molecular formula. 50 _mspeak_parent : object 51 The parent MS peak object associated with the molecular formula. 52 _assignment_mass_error : float 53 The mass error of the molecular formula. 54 55 Methods 56 ------- 57 * _calc_resolving_power_low_pressure(B, T) 58 Calculate the resolving power at low pressure. 59 * _calc_resolving_power_high_pressure(B, T) 60 Calculate the resolving power at high pressure. 61 * _adduct_mz(adduct_atom, ion_charge) 62 Get the m/z value of an adducted ion version of the molecular formula. 63 * _protonated_mz(ion_charge) 64 Get the m/z value of a protonated or deprotonated ion version of the molecular formula. 65 * _radical_mz(ion_charge) 66 Get the m/z value of a radical ion version of the molecular formula. 67 * _neutral_mass() 68 Get the neutral mass of the molecular formula. 69 * _calc_mz() 70 Get the m/z value of the molecular formula. 71 * _calc_assignment_mass_error(method='ppm') 72 Calculate the mass error of the molecular formula. 73 * _calc_mz_confidence(mean=0) 74 Calculate the m/z confidence of the molecular formula. 75 * _calc_isotopologue_confidence() 76 Calculate the isotopologue confidence of the molecular formula. 77 * normalize_distance(dist, dist_range) 78 Normalize the distance value. 79 * subtract_formula(formula_obj, formated=True) 80 Subtract a formula from the current formula object. 81 * _calc_average_mz_score() 82 Calculate the average m/z error score of the molecular formula identification, including the isotopologues. 83 """ 84 85 def _calc_resolving_power_low_pressure(self, B, T): 86 """ 87 Calculate the resolving power at low pressure. 88 89 Parameters 90 ---------- 91 B : float 92 Magnetic Strength (Testa). 93 T : float 94 Transient time (seconds). 95 96 """ 97 return (1.274 * 10000000 * B * T) * (1 / self.mz_calc) 98 99 def _calc_resolving_power_high_pressure(self, B, T): 100 """ 101 Calculate the resolving power at high pressure. 102 103 Parameters 104 ---------- 105 B : float 106 Magnetic Strength (Testa). 107 T : float 108 Transient time (seconds). 109 110 """ 111 return (2.758 * 10000000 * B * T) * (1 / self.mz_calc) 112 113 def _adduct_mz(self, adduct_atom, ion_charge): 114 """Get the m/z value of an adducted ion version of the molecular formula. 115 116 Parameters 117 ---------- 118 adduct_atom : str 119 The adduct atom. 120 ion_charge : int 121 The ion charge. 122 123 """ 124 return ( 125 self.neutral_mass 126 + (Atoms.atomic_masses.get(adduct_atom)) 127 + (ion_charge * -1 * Atoms.electron_mass) 128 ) / abs(ion_charge) 129 130 def _protonated_mz(self, ion_charge): 131 """Get the m/z value of a protonated or deprotonated ion version of the molecular formula. 132 133 Parameters 134 ---------- 135 ion_charge : int 136 The ion charge. 137 """ 138 return ( 139 self.neutral_mass 140 + (ion_charge * Atoms.atomic_masses.get("H")) 141 + (ion_charge * -1 * Atoms.electron_mass) 142 ) / abs(ion_charge) 143 144 def _radical_mz(self, ion_charge): 145 """Get the m/z value of a radical ion version of the molecular formula. 146 147 Parameters 148 ---------- 149 ion_charge : int 150 The ion charge. 151 """ 152 return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs( 153 ion_charge 154 ) 155 156 def _neutral_mass(self): 157 """Get the neutral mass of the molecular formula.""" 158 159 mass = 0 160 161 for each_atom in self._d_molecular_formula.keys(): 162 if each_atom != Labels.ion_type and each_atom != "HC": 163 try: 164 mass = mass + Atoms.atomic_masses[ 165 each_atom 166 ] * self._d_molecular_formula.get(each_atom) 167 168 except: 169 print(Labels.ion_type, each_atom) 170 171 return mass 172 173 def _calc_mz(self): 174 """Get the m/z value of the molecular formula, based on the ion charge and ion type.""" 175 176 if self.ion_charge is not None: 177 if self._external_mz: 178 return self._external_mz 179 180 else: 181 ion_type = self._d_molecular_formula.get(Labels.ion_type) 182 183 if ion_type == Labels.protonated_de_ion: 184 return self.protonated_mz 185 186 elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion: 187 return self.radical_mz 188 189 elif ion_type == Labels.neutral: 190 return self.neutral_mass 191 192 elif self.ion_charge == 0: 193 return self.neutral_mass 194 195 else: 196 # formula is probably ion form used for bruker ref list 197 return self.neutral_mass 198 199 else: 200 raise Exception("Please set ion charge first") 201 202 def _calc_assignment_mass_error(self, method="ppm"): 203 """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z. 204 205 Parameters 206 ---------- 207 method : str, optional 208 The method to calculate the mass error, by default 'ppm', but can be 'ppb' 209 210 Raises 211 ------ 212 Exception 213 If the method is not 'ppm' or 'ppb'. 214 Exception 215 If there is no ms peak associated with the molecular formula instance. 216 """ 217 218 if method == "ppm": 219 multi_factor = 1000000 220 221 elif method == "ppb": 222 multi_factor = 1000000 223 224 else: 225 raise Exception( 226 "method needs to be ppm or ppb, you have entered %s" % method 227 ) 228 229 if self._mspeak_parent.mz_exp: 230 self._assignment_mass_error = ( 231 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc 232 ) * multi_factor 233 234 return ( 235 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc 236 ) * multi_factor 237 238 else: 239 raise Exception( 240 "No ms peak associated with the molecular formula instance %s", self 241 ) 242 243 def _calc_mz_confidence(self, mean=0): 244 """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z. 245 246 Parameters 247 ---------- 248 mean : int, optional 249 The mean of the m/z error, by default 0 250 251 """ 252 253 # predicted std not set, using 0.3 254 if not self._mspeak_parent.predicted_std: 255 self._mspeak_parent.predicted_std = 1.66 256 257 # print( self._mspeak_parent.predicted_std) 258 259 return exp( 260 -1 261 * ( 262 power((self.mz_error - mean), 2) 263 / (2 * power(self._mspeak_parent.predicted_std, 2)) 264 ) 265 ) 266 267 def _calc_isotopologue_confidence(self): 268 """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity. 269 270 Returns 271 ------- 272 float 273 The isotopologue confidence of the molecular formula. 274 """ 275 276 if self.is_isotopologue: 277 # confidence of isotopologue is pure mz error 278 # TODO add more features here 279 280 mformula_index = self.mono_isotopic_formula_index 281 mspeak_index = self.mspeak_index_mono_isotopic 282 283 mspeak = self._mspeak_parent._ms_parent[mspeak_index] 284 285 expected_isotopologues = mspeak[mformula_index].expected_isotopologues 286 287 mono_mz = mspeak[mformula_index].mz_calc 288 mono_abundance = mspeak.abundance 289 290 else: 291 mono_mz = self.mz_calc 292 mono_abundance = self._mspeak_parent.abundance 293 294 expected_isotopologues = self.expected_isotopologues 295 # has isotopologues based on current dinamic range 296 297 if expected_isotopologues: 298 dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]} 299 300 # get reference data 301 for mf in expected_isotopologues: 302 dict_mz_abund_ref["abundance"].append(mf.abundance_calc) 303 dict_mz_abund_ref["mz"].append(mf.mz_calc) 304 305 dict_mz_abund_exp = {mono_mz: mono_abundance} 306 307 # get experimental data 308 for mf in expected_isotopologues: 309 # molecular formula has been assigned to a peak 310 if mf._mspeak_parent: 311 # stores mspeak abundance 312 dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance 313 314 else: 315 # fill missing mz with abundance 0 and mz error score of 0 316 dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1) 317 318 distance = SpectralSimilarity( 319 dict_mz_abund_exp, dict_mz_abund_ref 320 ).manhattan_distance() 321 correlation = 1 - self.normalize_distance(distance, [0, 2]) 322 # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref) 323 # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref) 324 325 if correlation == 1: 326 print(dict_mz_abund_exp, dict_mz_abund_ref) 327 if isnan(correlation): 328 # print(dict_mz_abund_exp, dict_mz_abund_ref) 329 correlation = 0.00001 330 331 else: 332 # no isotopologue expected giving a correlation score of 0.0 but it needs optimization 333 correlation = 0.0 334 335 return correlation 336 337 def normalize_distance(self, dist, dist_range): 338 """ 339 Normalize the distance value. 340 341 Parameters 342 ---------- 343 dist : float 344 The distance value to be normalized. 345 dist_range : list 346 The range of the distance value. 347 348 """ 349 result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0]) 350 351 if result < 0: 352 result = 0.0 353 elif result > 1: 354 result = 1.0 355 356 return result 357 358 def subtract_formula(self, formula_obj, formated=True): 359 """Subtract a formula from the current formula object 360 361 Parameters 362 ---------- 363 formula_obj : MolecularFormula 364 MolecularFormula object to be subtracted from the current formula object 365 formated : bool, optional 366 If True, returns the formula in string format, by default True 367 368 """ 369 subtraction = {} 370 for atom, value in self.to_dict().items(): 371 if atom != Labels.ion_type: 372 if formula_obj.get(atom): 373 # value_subtraction = value - formula_obj.get(atom) 374 if value - formula_obj.get(atom) > 0: 375 subtraction[atom] = value - formula_obj.get(atom) 376 else: 377 subtraction[atom] = value 378 if formated: 379 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") 380 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") 381 else: 382 SUB = str.maketrans("0123456789", "0123456789") 383 SUP = str.maketrans("0123456789", "0123456789") 384 formula_srt = "" 385 for atom in Atoms.atoms_order: 386 if atom in subtraction.keys(): 387 formula_srt += atom.translate(SUP) + str( 388 int(subtraction.get(atom)) 389 ).translate(SUB) 390 391 return formula_srt 392 393 def _calc_average_mz_score(self): 394 """Calculate the average m/z error score of the molecular formula identification, including the isotopologues.""" 395 if self.is_isotopologue: 396 # confidence of isotopologue is pure mz error 397 # TODO add more features here 398 399 mformula_index = self.mono_isotopic_formula_index 400 mspeak_index = self.mspeak_index_mono_isotopic 401 402 mspeak = self._mspeak_parent._ms_parent[mspeak_index] 403 404 expected_isotopologues = mspeak[mformula_index].expected_isotopologues 405 406 else: 407 expected_isotopologues = self.expected_isotopologues 408 # has isotopologues based on current dinamic range 409 410 accumulated_mz_score = [self.mz_error_score] 411 412 if expected_isotopologues: 413 for mf in expected_isotopologues: 414 # molecular formula has been assigned to a peak 415 if mf._mspeak_parent: 416 # stores mspeak abundance 417 accumulated_mz_score.append(mf.mz_error_score) 418 else: 419 # fill missing mz with abundance 0 and mz error score of 0 420 accumulated_mz_score.append(0.0) 421 422 average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score) 423 424 if isnan(average_mz_score): 425 average_mz_score = 0.0 426 427 return average_mz_score 428 429 def _calc_confidence_score(self): 430 """Calculate the confidence score of the molecular formula identification, including the isotopologues.""" 431 432 ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean 433 #### TODO: Add spectral similarity 434 435 ## Parameters 436 # ---------- 437 #### mz_exp: 438 #### Experimental m/z 439 #### predicted_std: 440 #### Standart deviation calculated from Resolving power optimization or constant set by User 441 442 isotopologue_correlation = self.isotopologue_similarity 443 average_mz_score = self.average_mz_error_score 444 # add monoisotopic peak mz error score 445 446 # calculate score with higher weight for mass error 447 # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4) 448 a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight 449 b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight 450 451 score = (isotopologue_correlation * b) + (average_mz_score * a) 452 453 # if round(average_mz_score,2) == 0.00: 454 # print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b) 455 456 return score 457 458 def _calc_abundance_error(self, method="percentile"): 459 """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance. 460 461 Parameters 462 ---------- 463 method : str, optional 464 The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb' 465 466 Returns 467 ------- 468 float 469 The abundance error of the molecular formula. 470 471 Raises 472 ------ 473 Exception 474 If isotopologues were not calculated. 475 """ 476 477 mult_factor = 100 478 479 iso_abundance = self._mspeak_parent.abundance 480 mono_abundance = self._mspeak_parent._ms_parent[ 481 self.mspeak_index_mono_isotopic 482 ].abundance 483 484 if self.prob_ratio: 485 theor_abundance = mono_abundance * self.prob_ratio 486 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class 487 return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor 488 489 else: 490 raise Exception("Please calc_isotopologues") 491 492 def _calc_area_error(self, method="percentile"): 493 """Calculate the area error of the molecular formula, based on the experimental area and the calculated area. 494 495 Parameters 496 ---------- 497 method : str, optional 498 The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb' 499 500 Returns 501 ------- 502 float 503 The area error of the molecular formula. 504 505 Raises 506 ------ 507 Exception 508 If isotopologues were not calculated. 509 """ 510 511 mult_factor = 100 512 513 iso_area = self._mspeak_parent.area 514 mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area 515 516 if self.prob_ratio: 517 if mono_area and iso_area: 518 # exp_ratio = iso_area/mono_area 519 520 area_calc = mono_area * self.prob_ratio 521 522 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class 523 return ((area_calc - iso_area) / area_calc) * mult_factor 524 # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor 525 526 else: 527 # centroid mass spectrum 528 return 0 529 else: 530 raise Exception("Please calc_isotopologues") 531 532 def _calc_aromaticity_index_mod(self): 533 """Calculate the modified aromaticity index of the molecular formula. 534 535 Returns 536 ------- 537 float 538 The aromaticity index of the molecular formula. 539 540 Notes 541 ----- 542 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 543 corrected in https://doi.org/10.1002/rcm.7433 544 """ 545 # Prepare empty dictionary to store the number of atoms of each element 546 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} 547 548 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 549 for element in ai_es: 550 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 551 for element_w_iso in elements_w_iso: 552 if element_w_iso in self._d_molecular_formula: 553 ai_es[element] += self._d_molecular_formula[element_w_iso] 554 555 ai_n = ( 556 1 557 + ai_es["C"] 558 - (0.5 * ai_es["O"]) 559 - ai_es["S"] 560 - (0.5 * (ai_es["N"] + ai_es["H"])) 561 ) 562 ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"] 563 564 ai = ai_n / ai_d 565 566 if ai < 0: 567 ai = 0 568 if ai > 1: 569 ai = 1 570 571 return ai 572 573 def _calc_aromaticity_index(self): 574 """Calculate the aromaticity index of the molecular formula. 575 576 Returns 577 ------- 578 float 579 The aromaticity index of the molecular formula. 580 581 Notes 582 ----- 583 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 584 corrected in https://doi.org/10.1002/rcm.7433 585 """ 586 # Prepare empty dictionary to store the number of atoms of each element 587 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} 588 589 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 590 for element in ai_es: 591 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 592 for element_w_iso in elements_w_iso: 593 if element_w_iso in self._d_molecular_formula: 594 ai_es[element] += self._d_molecular_formula[element_w_iso] 595 596 ai_n = ( 597 1 598 + ai_es["C"] 599 - (ai_es["O"]) 600 - ai_es["S"] 601 - (0.5 * (ai_es["N"] + ai_es["H"])) 602 ) 603 ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"] 604 605 ai = ai_n / ai_d 606 607 if ai < 0: 608 ai = 0 609 if ai > 1: 610 ai = 1 611 612 return ai 613 614 def _calc_nosc(self): 615 """Calculate the average nominal oxidation state of carbon 616 617 Returns 618 ------- 619 float 620 The average nominal oxidation state of carbon 621 622 Notes 623 ----- 624 Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020 625 """ 626 # Prepare empty dictionary to store the number of atoms of each element 627 nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0} 628 629 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes 630 for element in nosc_es: 631 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] 632 for element_w_iso in elements_w_iso: 633 if element_w_iso in self._d_molecular_formula: 634 nosc_es[element] += self._d_molecular_formula[element_w_iso] 635 636 nosc = ( 637 -( 638 ( 639 4 * nosc_es["C"] 640 + nosc_es["H"] 641 - 3 * nosc_es["N"] 642 - 2 * nosc_es["O"] 643 + 5 * nosc_es["P"] 644 - 2 * nosc_es["S"] 645 ) 646 / nosc_es["C"] 647 ) 648 + 4 649 ) 650 651 # If nosc is infinite or negative infinity, set it to nan 652 if nosc == float("inf") or nosc == float("-inf"): 653 nosc = float("nan") 654 655 return nosc 656 657 @property 658 def dbe_ai(self): 659 """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.""" 660 661 carbons = self._d_molecular_formula.get("C") 662 hydrogens = self._d_molecular_formula.get("H") 663 oxygens = self._d_molecular_formula.get("O") 664 return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5) 665 666 def _calc_dbe(self): 667 """Calculate the double bond equivalent (DBE) of the molecular formula""" 668 669 individual_dbe = 0 670 671 for atom in self._d_molecular_formula.keys(): 672 if atom != Labels.ion_type: 673 n_atom = int(self._d_molecular_formula.get(atom)) 674 675 clean_atom = "".join([i for i in atom if not i.isdigit()]) 676 677 if self._mspeak_parent: 678 valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get( 679 clean_atom 680 ) 681 else: 682 valencia = MSParameters.molecular_search.used_atom_valences.get( 683 clean_atom 684 ) 685 # valencia = Atoms.atoms_covalence.get(atom) 686 687 if type(valencia) is tuple: 688 valencia = valencia[0] 689 if valencia > 0: 690 # print atom, valencia, n_atom, individual_dbe 691 individual_dbe = individual_dbe + (n_atom * (valencia - 2)) 692 else: 693 continue 694 695 dbe = 1 + (0.5 * individual_dbe) 696 697 if self.ion_type == Labels.adduct_ion: 698 dbe = dbe + 0.5 699 700 return dbe 701 702 def _calc_kmd(self, dict_base): 703 """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass. 704 705 Parameters 706 ---------- 707 dict_base : dict 708 The dictionary of the base formula, e.g. {'C':1, 'H':2} 709 710 Returns 711 ------- 712 tuple 713 The tuple of the KMD, Kendrick mass, and nominal Kendrick mass. 714 """ 715 mass = 0 716 for atom in dict_base.keys(): 717 mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom) 718 719 kendrick_mass = (int(mass) / mass) * self.mz_calc 720 721 nominal_km = int(kendrick_mass) 722 723 kmd = (nominal_km - kendrick_mass) * 100 724 725 # kmd = (nominal_km - km) * 1 726 kmd = round(kmd, 0) 727 728 return kmd, kendrick_mass, nominal_km 729 730 def _cal_isotopologues( 731 self, formula_dict, min_abundance, current_abundance, ms_dynamic_range 732 ): 733 """Calculate the isotopologues for a given molecular formula. 734 735 Parameters 736 ---------- 737 formula_dict : dict 738 The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2} 739 min_abundance : float 740 The minimum abundance. 741 current_abundance : float 742 The current monoisotopic abundance. 743 ms_dynamic_range : float 744 The dynamic range. 745 746 747 Notes 748 ----- 749 This is the primary function to look for isotopologues based on a monoisotopic molecular formula. 750 It needs to be expanded to include the calculation of resolving power and plot the results. 751 Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion. 752 Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational). 753 It might break when adding non-conventional atoms (not yet tested). 754 This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec. 755 756 757 """ 758 759 # last update on 05-26-2020, Yuri E. Corilo 760 761 # updated it to reflect min possible mass peak abundance 762 cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range) 763 764 # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range) 765 # print(cut_off_to_IsoSpeccPy) 766 atoms_labels = ( 767 atom 768 for atom in formula_dict.keys() 769 if atom != Labels.ion_type and atom != "H" 770 ) 771 772 atoms_count = [] 773 masses_list_tuples = [] 774 props_list_tuples = [] 775 all_atoms_list = [] 776 777 for atom_label in atoms_labels: 778 if Atoms.isotopes.get(atom_label)[1][0] is None: 779 "This atom_label has no heavy isotope" 780 atoms_count.append(formula_dict.get(atom_label)) 781 mass = Atoms.atomic_masses.get(atom_label) 782 prop = Atoms.isotopic_abundance.get(atom_label) 783 masses_list_tuples.append([mass]) 784 props_list_tuples.append([prop]) 785 all_atoms_list.append(atom_label) 786 787 else: 788 isotopes_label_list = Atoms.isotopes.get(atom_label)[1] 789 790 if len(isotopes_label_list) > 1: 791 "This atom_label has two or more heavy isotope" 792 isotopos_labels = [i for i in isotopes_label_list] 793 else: 794 "This atom_label only has one heavy isotope" 795 isotopos_labels = [isotopes_label_list[0]] 796 797 # all_atoms_list.extend(isotopos_labels) 798 isotopos_labels = [atom_label] + isotopos_labels 799 800 all_atoms_list.extend(isotopos_labels) 801 802 masses = [ 803 Atoms.atomic_masses.get(atom_label) 804 for atom_label in isotopos_labels 805 ] 806 props = [ 807 Atoms.isotopic_abundance.get(atom_label) 808 for atom_label in isotopos_labels 809 ] 810 811 atoms_count.append(formula_dict.get(atom_label)) 812 masses_list_tuples.append(masses) 813 props_list_tuples.append(props) 814 if legacy_isospec: 815 iso = IsoSpecPy.IsoSpec( 816 atoms_count, 817 masses_list_tuples, 818 props_list_tuples, 819 cut_off_to_IsoSpeccPy, 820 ) 821 conf = iso.getConfs() 822 masses = conf[0] 823 probs = exp(conf[1]) 824 molecular_formulas = conf[2] 825 # print('conf', conf) 826 # print('probs', conf[1]) 827 else: 828 # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach 829 iso = IsoSpecPy.IsoTotalProb( 830 atomCounts=atoms_count, 831 isotopeMasses=masses_list_tuples, 832 isotopeProbabilities=props_list_tuples, 833 prob_to_cover=cut_off_to_IsoSpeccPy, 834 get_confs=True, 835 ) 836 masses = list(iso.masses) 837 probs = array(list(iso.probs)) 838 confs = list(iso.confs) 839 840 molecular_formulas = [] 841 for x in confs: 842 tmplist = [] 843 for y in x: 844 tmplist.extend(list(y)) 845 molecular_formulas.append(tmplist) 846 847 new_formulas = [] 848 849 for isotopologue_index in range(len(iso)): 850 # skip_mono_isotopic 851 852 formula_list = molecular_formulas[isotopologue_index] 853 new_formula_dict = dict(zip(all_atoms_list, formula_list)) 854 new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type) 855 if formula_dict.get("H"): 856 new_formula_dict["H"] = formula_dict.get("H") 857 858 new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0}) 859 860 # formula_dict in new_formulas check if monoisotopic is being returned 861 if new_formulas: # and formula_dict in new_formulas: 862 # print(conf) 863 # print(new_formulas) 864 # print(atoms_count) 865 # print(all_atoms_list) 866 # print(masses_list_tuples) 867 # print(props_list_tuples) 868 # find where monoisotopic is 869 index_mono = new_formulas.index(formula_dict) 870 # calculate ratio iso/mono 871 probs = list(probs / probs[index_mono]) 872 873 # delete the monoisotopic 874 del probs[index_mono] 875 del new_formulas[index_mono] 876 877 # print('probs_exp', probs) 878 for formulas, prob in zip(new_formulas, probs): 879 theor_abundance = current_abundance * prob 880 if theor_abundance > min_abundance: 881 # print(prob, theor_abundance, current_abundance) 882 yield (formulas, prob) 883 # return zip(new_formulas, probs ) 884 885 # else: 886 # return []
Class of calculations related to molecular formula
This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula
Attributes
- mz_calc (float): The m/z value of the molecular formula.
- neutral_mass (float): The neutral mass of the molecular formula.
- ion_charge (int): The ion charge of the molecular formula.
- _external_mz (float): The externally provided m/z value of the molecular formula.
- _d_molecular_formula (dict): The dictionary representation of the molecular formula.
- _mspeak_parent (object): The parent MS peak object associated with the molecular formula.
- _assignment_mass_error (float): The mass error of the molecular formula.
Methods
- _calc_resolving_power_low_pressure(B, T) Calculate the resolving power at low pressure.
- _calc_resolving_power_high_pressure(B, T) Calculate the resolving power at high pressure.
- _adduct_mz(adduct_atom, ion_charge) Get the m/z value of an adducted ion version of the molecular formula.
- _protonated_mz(ion_charge) Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
- _radical_mz(ion_charge) Get the m/z value of a radical ion version of the molecular formula.
- _neutral_mass() Get the neutral mass of the molecular formula.
- _calc_mz() Get the m/z value of the molecular formula.
- _calc_assignment_mass_error(method='ppm') Calculate the mass error of the molecular formula.
- _calc_mz_confidence(mean=0) Calculate the m/z confidence of the molecular formula.
- _calc_isotopologue_confidence() Calculate the isotopologue confidence of the molecular formula.
- normalize_distance(dist, dist_range) Normalize the distance value.
- subtract_formula(formula_obj, formated=True) Subtract a formula from the current formula object.
- _calc_average_mz_score() Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
def
normalize_distance(self, dist, dist_range):
337 def normalize_distance(self, dist, dist_range): 338 """ 339 Normalize the distance value. 340 341 Parameters 342 ---------- 343 dist : float 344 The distance value to be normalized. 345 dist_range : list 346 The range of the distance value. 347 348 """ 349 result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0]) 350 351 if result < 0: 352 result = 0.0 353 elif result > 1: 354 result = 1.0 355 356 return result
Normalize the distance value.
Parameters
- dist (float): The distance value to be normalized.
- dist_range (list): The range of the distance value.
def
subtract_formula(self, formula_obj, formated=True):
358 def subtract_formula(self, formula_obj, formated=True): 359 """Subtract a formula from the current formula object 360 361 Parameters 362 ---------- 363 formula_obj : MolecularFormula 364 MolecularFormula object to be subtracted from the current formula object 365 formated : bool, optional 366 If True, returns the formula in string format, by default True 367 368 """ 369 subtraction = {} 370 for atom, value in self.to_dict().items(): 371 if atom != Labels.ion_type: 372 if formula_obj.get(atom): 373 # value_subtraction = value - formula_obj.get(atom) 374 if value - formula_obj.get(atom) > 0: 375 subtraction[atom] = value - formula_obj.get(atom) 376 else: 377 subtraction[atom] = value 378 if formated: 379 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") 380 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") 381 else: 382 SUB = str.maketrans("0123456789", "0123456789") 383 SUP = str.maketrans("0123456789", "0123456789") 384 formula_srt = "" 385 for atom in Atoms.atoms_order: 386 if atom in subtraction.keys(): 387 formula_srt += atom.translate(SUP) + str( 388 int(subtraction.get(atom)) 389 ).translate(SUB) 390 391 return formula_srt
Subtract a formula from the current formula object
Parameters
- formula_obj (MolecularFormula): MolecularFormula object to be subtracted from the current formula object
- formated (bool, optional): If True, returns the formula in string format, by default True