corems.molecular_id.search.priorityAssignment

  1import os
  2import sys
  3
  4from copy import deepcopy
  5from threading import Thread
  6from itertools import product
  7
  8import tqdm
  9
 10from corems.encapsulation.constant import Labels, Atoms
 11from corems.molecular_id.calc.MolecularFilter import MolecularFormulaSearchFilters
 12from corems.molecular_id.search.findOxygenPeaks import FindOxygenPeaks
 13from corems.molecular_id.search.molecularFormulaSearch import (
 14    SearchMolecularFormulaWorker,
 15)
 16from corems.molecular_id.factory.molecularSQL import MolForm_SQL
 17from corems.molecular_id.calc.ClusterFilter import ClusteringFilter
 18
 19
 20class OxygenPriorityAssignment(Thread):
 21    """A class for assigning priority to oxygen classes in a molecular search.
 22
 23    Parameters
 24    ----------
 25    mass_spectrum_obj : MassSpectrum
 26        The mass spectrum object.
 27    sql_db : bool, optional
 28        Whether to use an SQL database. The default is False.
 29
 30    Attributes
 31    ----------
 32    mass_spectrum_obj : MassSpectrum
 33        The mass spectrum object.
 34    sql_db : MolForm_SQL
 35        The SQL database object.
 36
 37    Methods
 38    -------
 39    * run().
 40        Run the priority assignment process.
 41    * create_data_base().
 42        Create the molecular database for the specified heteroatomic classes.
 43    * run_worker_mass_spectrum(assign_classes_order_tuples).
 44        Run the molecular formula search for each class in the specified order.
 45    * get_dict_molecular_database(classe_str_list).
 46        Get the molecular database as a dictionary.
 47    * ox_classes_and_peaks_in_order_().
 48        Get the oxygen classes and associated peaks in order.
 49    * get_classes_in_order(dict_ox_class_and_ms_peak)
 50        Get the classes in order.
 51    """
 52
 53    def __init__(self, mass_spectrum_obj, sql_db=False):
 54        # TODO:- add support for other atoms and adducts: Done
 55        #        - add dbe range on search runtime : Done
 56        #        - add docs
 57        #        - improve performace : Done
 58
 59        Thread.__init__(self)
 60        self.mass_spectrum_obj = mass_spectrum_obj
 61        #  initiated at create_molecular_database()
 62        # self.dict_molecular_lookup_table = None
 63
 64        if not sql_db:
 65            self.sql_db = MolForm_SQL(
 66                url=mass_spectrum_obj.molecular_search_settings.url_database
 67            )
 68
 69        else:
 70            self.sql_db = sql_db
 71
 72    def run(self):
 73        """Run the priority assignment process."""
 74        # get Oxygen classes dict and the associate mspeak class
 75        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
 76        # create database separated to give the user the chance to use mass spec filters
 77
 78        assign_classes_order_str_dict_tuple_list = self.create_data_base()
 79
 80        if assign_classes_order_str_dict_tuple_list:
 81            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
 82
 83        else:
 84            raise RuntimeError("call create_data_base() first")
 85
 86        self.sql_db.close()
 87
 88    def create_data_base(self):
 89        """Create the molecular database for the specified heteroatomic classes.
 90
 91        Returns
 92        -------
 93        assign_classes_order_str_dict_tuple_ : list
 94            A list of tuples containing the class names and dictionaries of class attributes.
 95        """
 96
 97        def create_molecular_database():
 98            """Checks and creates the database entries for the specified heteroatomic classes."""
 99            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
100
101            if min_o <= 0:
102                min_o = 1
103
104            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
105
106            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
107
108            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
109
110            # self.lookupTableSettings.use_pah_line_rule = False
111
112            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
113
114            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
115
116            self.mass_spectrum_obj.reset_indexes()
117
118            self.mass_spectrum_obj.filter_by_noise_threshold()
119
120            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
121
122            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
123                min_o,
124                max_o,
125            )
126
127            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
128
129        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
130        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
131            print("Getting Oxygen Series")
132        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
133        find_formula_thread.run()
134
135        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
136        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
137            print("Getting Oxygen Series")
138        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
139
140        # get the Ox class and the DBE for the lowest error molecular formula candidate
141        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
142
143        # sort the classes by abundance
144        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
145            print("Getting Oxygen Series Order")
146        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
147            dict_ox_class_and_ms_peak
148        )
149
150        create_molecular_database()
151
152        return assign_classes_order_str_dict_tuple_list
153
154    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
155        """Run the molecular formula search for each class in the specified order.
156
157        Parameters
158        ----------
159        assign_classes_order_tuples : list
160            A list of tuples containing the class names and dictionaries of class attributes.
161        """
162
163        def check_adduct_class(classe_dict):
164            """Check if the class contains any adduct atoms.
165
166            Parameters
167            ----------
168            classe_dict : dict
169                The dictionary of class attributes.
170
171            Returns
172            -------
173            bool
174                True if the class contains adduct atoms, False otherwise.
175            """
176            return any(
177                [
178                    key in classe_dict.keys()
179                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
180                ]
181            )
182
183        def set_min_max_dbe_by_oxygen(classe_dict):
184            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
185
186            Parameters
187            ----------
188            classe_dict : dict
189                The dictionary of class attributes.
190            """
191            # calculates min and max DBE based on the Oxygen number
192            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
193            # if class does not has O it use the pha rule
194            # ref : Vlad Lobodin manuscript to be include here
195
196            # atoms_exchanges = ['N']
197            # if 'O' in classe_dict.keys():
198            #
199            #    Oxygen_number = classe_dict.get("O")
200            #    for atom in atoms_exchanges:
201            #        if atom in classe_dict.keys():
202            #            Oxygen_number += classe_dict.get(atom)
203            #
204            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
205            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
206            #
207            # else:
208
209            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
210
211        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
212            """Run the molecular formula search for each mass spectrum peak.
213
214            Parameters
215            ----------
216            possible_formulas_dict : dict
217                A dictionary of possible molecular formulas.
218            mass_spectrum_obj : MassSpectrum
219                The mass spectrum object.
220            min_abundance : float
221                The minimum abundance threshold.
222
223            Returns
224            -------
225            list
226                A list of assigned peak indexes.
227            """
228            all_assigned_indexes = list()
229
230            for ms_peak in mass_spectrum_obj.sort_by_abundance():
231                if ms_peak:
232                    continue
233                # already assigned a molecular formula
234
235                nominal_mz = ms_peak.nominal_mz_exp
236
237                # get mono isotopic peaks that was added a molecular formula obj
238                # TODO update error variables
239
240                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
241
242                if possible_formulas_nominal:
243                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
244                        possible_formulas_nominal,
245                        min_abundance,
246                        mass_spectrum_obj,
247                        ms_peak,
248                    )
249
250                    all_assigned_indexes.extend(ms_peak_indexes)
251
252            # filter peaks by percentile threshold of found isotopologues
253            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
254                all_assigned_indexes, mass_spectrum_obj
255            )
256
257            # filter noise by kendrick density
258            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
259                all_assigned_indexes, mass_spectrum_obj
260            )
261
262            # filter per min peaks per mono isotopic class
263            # this function should always be the last filter,
264            # thefore no need to return remaining indexes
265            MolecularFormulaSearchFilters().check_min_peaks(
266                all_assigned_indexes, mass_spectrum_obj
267            )
268
269        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
270
271        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
272
273        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
274
275        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
276
277        min_abundance = self.mass_spectrum_obj.min_abundance
278
279        list_classes_str = [i[0] for i in assign_classes_order_tuples]
280        verbose = self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing
281        pbar = tqdm.tqdm(assign_classes_order_tuples, disable= not verbose)
282        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
283
284        for classe_tuple in pbar:
285            classe_str = classe_tuple[0]
286            classe_dict = classe_tuple[1]
287
288            set_min_max_dbe_by_oxygen(classe_dict)
289
290            # if len(classe_dict.keys()) == 2:
291            #    if classe_dict.get('S') == 1:
292            #       continue
293            # limits the dbe by the Ox class most abundant,
294            # need to add other atoms contribution to be more accurate
295            # but +-7 should be sufficient to cover the range
296
297            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
298                # tqdm.set_description_str(desc=None, refresh=True)
299                if verbose:
300                    pbar.set_description_str(
301                        desc="Started molecular formula search for class %s, (de)protonated "
302                        % classe_str,
303                        refresh=True,
304                    )
305
306                ion_type = Labels.protonated_de_ion
307
308                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
309                    classe_str
310                )
311
312                if possible_formulas_dict:
313                    run_search(
314                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
315                    )
316
317            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
318                # print("Started molecular formula search for class %s,  radical" % classe_str)
319                if verbose:
320                    pbar.set_description_str(
321                        desc="Started molecular formula search for class %s, radical"
322                        % classe_str,
323                        refresh=True,
324                    )
325
326                ion_type = Labels.radical_ion
327
328                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
329                    classe_str
330                )
331
332                if possible_formulas_dict:
333                    run_search(
334                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
335                    )
336
337            # looks for adduct, used_atom_valences should be 0
338            # this code does not support H exchance by halogen atoms
339            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
340                if verbose:
341                    pbar.set_description_str(
342                        desc="Started molecular formula search for class %s, adduct"
343                        % classe_str,
344                        refresh=True,
345                    )
346                # print("Started molecular formula search for class %s, adduct" % classe_str)
347
348                ion_type = Labels.radical_ion
349
350                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
351                    classe_str
352                )
353
354                """ commenting  unfinished code for release 2.0, see end of file for details"""
355                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
356
357                # if possible_formulas_adduct:
358
359                run_search(
360                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
361                )
362
363    def get_dict_molecular_database(self, classe_str_list):
364        """Get the molecular database as a dictionary.
365
366        Parameters
367        ----------
368        classe_str_list : list
369            A list of class names.
370
371        Returns
372        -------
373        dict
374            A dictionary containing the molecular database.
375        """
376        nominal_mzs = self.nominal_mzs
377        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
378        ion_charge = self.mass_spectrum_obj.polarity
379
380        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
381
382        dict_res = {}
383
384        if mf_search_settings.isProtonated:
385            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
386                classe_str_list,
387                Labels.protonated_de_ion,
388                nominal_mzs,
389                ion_charge,
390                mf_search_settings,
391            )
392
393        if mf_search_settings.isRadical:
394            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
395                classe_str_list,
396                Labels.radical_ion,
397                nominal_mzs,
398                ion_charge,
399                mf_search_settings,
400            )
401
402        if mf_search_settings.isAdduct:
403            adduct_list = (
404                mf_search_settings.adduct_atoms_neg
405                if ion_charge < 0
406                else mf_search_settings.adduct_atoms_pos
407            )
408            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
409                classe_str_list,
410                Labels.adduct_ion,
411                nominal_mzs,
412                ion_charge,
413                mf_search_settings,
414                adducts=adduct_list,
415            )
416
417        return dict_res
418
419    def ox_classes_and_peaks_in_order_(self) -> dict:
420        """Get the oxygen classes and associated peaks in order.
421
422        Returns
423        -------
424        dict
425            A dictionary containing the oxygen classes and associated peaks.
426        """
427        # order is only valid in python 3.4 and above
428        # change to OrderedDict if your version is lower
429        dict_ox_class_and_ms_peak = dict()
430
431        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
432            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
433
434            ox_classe = mspeak.best_molecular_formula_candidate.class_label
435
436            if ox_classe in dict_ox_class_and_ms_peak.keys():
437                # get the most abundant of the same ox class
438                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
439                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
440            else:
441                dict_ox_class_and_ms_peak[ox_classe] = mspeak
442
443        return dict_ox_class_and_ms_peak
444
445    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
446        """Get the classes in order.
447
448        Parameters
449        ----------
450        dict_ox_class_and_ms_peak : dict
451            A dictionary containing the oxygen classes and associated peaks.
452
453        Returns
454        -------
455        list
456            A list of tuples containing the class names and dictionaries of class attributes.
457
458        Notes
459        -----
460        structure is
461            ('HC', {'HC': 1})
462        """
463
464        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
465
466        usedAtoms.pop("C")
467        usedAtoms.pop("H")
468        usedAtoms.pop("O")
469
470        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
471        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
472        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
473
474        possible_n = [n for n in range(min_n, max_n + 1)]
475        possible_s = [s for s in range(min_s, max_s + 1)]
476        possible_p = [p for p in range(min_p, max_p + 1)]
477
478        # used to enforce order for commum atoms
479        # and track the atom index in on the tuple in all_atoms_tuples
480        atoms_in_order = ["N", "S", "P"]
481
482        # do number atoms prodcut and remove then from the usedAtoms dict
483        all_atoms_tuples = product(possible_n, possible_s, possible_p)
484        for atom in atoms_in_order:
485            usedAtoms.pop(atom, None)
486
487        # iterate over other atoms besides C,H, N, O, S and P
488
489        for selected_atom_label, min_max_tuple in usedAtoms.items():
490            min_x = min_max_tuple[0]
491            max_x = min_max_tuple[1]
492
493            possible_x = [x for x in range(min_x, max_x + 1)]
494            all_atoms_tuples = product(all_atoms_tuples, possible_x)
495
496            # merge tuples
497            all_atoms_tuples = [
498                all_atoms_combined[0] + (all_atoms_combined[1],)
499                for all_atoms_combined in all_atoms_tuples
500            ]
501
502            # add atom label to the atoms_in_order list
503
504            # important to index where the atom position is in on the tuple in all_atoms_tuples
505            atoms_in_order.append(selected_atom_label)
506
507        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
508            all_atoms_tuples, atoms_in_order
509        )
510
511        combined_classes = self.combine_ox_class_with_other(
512            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
513        )
514
515        combination_classes_ordered = self.sort_classes(
516            atoms_in_order, combined_classes
517        )
518
519        oxygen_class_str_dict_tuple = [
520            (ox_class, mspeak[0].class_dict)
521            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
522        ]
523
524        ## add classes together and ignores classes selected from the main series
525        for class_tuple in combination_classes_ordered:
526            if class_tuple not in oxygen_class_str_dict_tuple:
527                oxygen_class_str_dict_tuple.append(class_tuple)
528
529        return oxygen_class_str_dict_tuple
530
531    @staticmethod
532    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
533        """Get the class strings and dictionaries.
534
535        Parameters
536        ----------
537        all_atoms_tuples : tuple
538            A tuple containing the atoms.
539        atoms_in_order : list
540            A list of atoms in order.
541
542        Returns
543        --------
544        list
545            A list of tuples containing the class strings and dictionaries.
546
547        """
548        classe_list = []
549        hc_class = []
550
551        for all_atoms_tuple in all_atoms_tuples:
552            classe_str = ""
553            classe_dict = dict()
554
555            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
556                if atoms_number != 0:
557                    classe_str = (
558                        classe_str
559                        + atoms_in_order[each_atoms_index]
560                        + str(atoms_number)
561                        + " "
562                    )
563
564                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
565
566            classe_str = classe_str.strip()
567
568            if len(classe_str) > 0:
569                classe_list.append((classe_str, classe_dict))
570
571            elif len(classe_str) == 0:
572                hc_class.append(("HC", {"HC": 1}))
573
574        return classe_list, hc_class
575
576    @staticmethod
577    def combine_ox_class_with_other(
578        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
579    ) -> [dict]:
580        """Combine the oxygen classes with other classes.
581
582        Parameters
583        ----------
584        atoms_in_order : list
585            A list of atoms in order.
586        classes_strings_dict_tuples : list
587
588        dict_ox_class_and_ms_peak : dict
589            A dictionary containing the oxygen classes and associated peaks.
590
591        Returns
592        -------
593        list
594            A list of dictionaries.
595        """
596        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
597        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
598        # sort by len first then sort based on the atoms_in_order list
599        atoms_in_order = Atoms.atoms_order
600
601        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
602
603        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
604
605        # print(classes_strings_dict_tuples)
606        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
607        # print(classe_in_order)
608
609        combination = []
610
611        # _ ignoring the class_str
612        for _, other_classe_dict in classes_strings_dict_tuples:
613            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
614            combination.extend(
615                [
616                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
617                    for Oxygen_mf in Oxygen_mfs
618                ]
619            )
620
621        return combination
622
623    @staticmethod
624    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
625        """Sort the classes.
626
627        Parameters
628        ----------
629        atoms_in_order : list
630            A list of atoms in order.
631        combination_tuples : list
632
633        Returns
634        -------
635        list
636            A list of tuples containing the class strings and dictionaries.
637        """
638        join_list_of_list_classes = list()
639        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
640
641        sort_method = (
642            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
643        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
644        for class_dict in combination_tuples:
645            sorted_dict_keys = sorted(class_dict, key=sort_method)
646            class_str = " ".join(
647                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
648            )
649            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
650            join_list_of_list_classes.append((class_str, new_class_dict))
651
652        return join_list_of_list_classes
653
654    '''
655    The code bellow is unfinished, might be added to next release, 2.1
656    def add_adducts(self, possible_formulas):
657        """ Add adducts to the molecular formula candidates.
658
659        Parameters
660        ----------
661        possible_formulas : dict
662            A dictionary of possible molecular formulas.
663        
664        Returns
665        -------
666        dict 
667            A dictionary of possible molecular formulas with adducts.
668        
669        """
670        ion_type = Labels.adduct_ion
671
672        if self.mass_spectrum_obj.polarity < 0:
673            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
674            molform_model = MolecularFormulaDict
675        else:
676            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
677            molform_model = MolecularFormulaTablePos
678
679        new_dict = {}
680        
681        for nominal_mz, list_formulas in possible_formulas.items():
682            
683            for adduct_atom in adduct_atoms:
684                
685                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
686
687                for molecularFormulaTable in  list_formulas:
688                    
689                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
690                    
691                    if adduct_atom in formula_dict.keys():
692                        formula_dict[adduct_atom] += 1  
693                    else:
694                        formula_dict[adduct_atom] = 1      
695                    
696                    mz = adduct_atom_mass + molecularFormulaTable.mz
697                    nm = int(mz)
698                    
699                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
700                                            "mz" : mz,
701                                            "ion_type" : ion_type,
702                                            "nominal_mz" : nm,
703                                            "ion_charge" : molecularFormulaTable.ion_charge,
704                                            "classe" : molecularFormulaTable.classe,
705                                            "C" : molecularFormulaTable.C,
706                                            "H" : molecularFormulaTable.H,
707                                            "N" : molecularFormulaTable.N,
708                                            "O" : molecularFormulaTable.O,
709                                            "S" : molecularFormulaTable.S,
710                                            "P" : molecularFormulaTable.P,
711                                            "H_C" : molecularFormulaTable.H_C,
712                                            "O_C" : molecularFormulaTable.O_C,
713                                            "DBE" : molecularFormulaTable.DBE,
714                                            })
715                    if nm in new_dict:
716                        new_dict[nm].append(new_formul_obj)
717                    
718                    else:
719                        new_dict[nm]= [new_formul_obj]
720                    
721        return new_dict
722
723    '''
class OxygenPriorityAssignment(threading.Thread):
 21class OxygenPriorityAssignment(Thread):
 22    """A class for assigning priority to oxygen classes in a molecular search.
 23
 24    Parameters
 25    ----------
 26    mass_spectrum_obj : MassSpectrum
 27        The mass spectrum object.
 28    sql_db : bool, optional
 29        Whether to use an SQL database. The default is False.
 30
 31    Attributes
 32    ----------
 33    mass_spectrum_obj : MassSpectrum
 34        The mass spectrum object.
 35    sql_db : MolForm_SQL
 36        The SQL database object.
 37
 38    Methods
 39    -------
 40    * run().
 41        Run the priority assignment process.
 42    * create_data_base().
 43        Create the molecular database for the specified heteroatomic classes.
 44    * run_worker_mass_spectrum(assign_classes_order_tuples).
 45        Run the molecular formula search for each class in the specified order.
 46    * get_dict_molecular_database(classe_str_list).
 47        Get the molecular database as a dictionary.
 48    * ox_classes_and_peaks_in_order_().
 49        Get the oxygen classes and associated peaks in order.
 50    * get_classes_in_order(dict_ox_class_and_ms_peak)
 51        Get the classes in order.
 52    """
 53
 54    def __init__(self, mass_spectrum_obj, sql_db=False):
 55        # TODO:- add support for other atoms and adducts: Done
 56        #        - add dbe range on search runtime : Done
 57        #        - add docs
 58        #        - improve performace : Done
 59
 60        Thread.__init__(self)
 61        self.mass_spectrum_obj = mass_spectrum_obj
 62        #  initiated at create_molecular_database()
 63        # self.dict_molecular_lookup_table = None
 64
 65        if not sql_db:
 66            self.sql_db = MolForm_SQL(
 67                url=mass_spectrum_obj.molecular_search_settings.url_database
 68            )
 69
 70        else:
 71            self.sql_db = sql_db
 72
 73    def run(self):
 74        """Run the priority assignment process."""
 75        # get Oxygen classes dict and the associate mspeak class
 76        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
 77        # create database separated to give the user the chance to use mass spec filters
 78
 79        assign_classes_order_str_dict_tuple_list = self.create_data_base()
 80
 81        if assign_classes_order_str_dict_tuple_list:
 82            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
 83
 84        else:
 85            raise RuntimeError("call create_data_base() first")
 86
 87        self.sql_db.close()
 88
 89    def create_data_base(self):
 90        """Create the molecular database for the specified heteroatomic classes.
 91
 92        Returns
 93        -------
 94        assign_classes_order_str_dict_tuple_ : list
 95            A list of tuples containing the class names and dictionaries of class attributes.
 96        """
 97
 98        def create_molecular_database():
 99            """Checks and creates the database entries for the specified heteroatomic classes."""
100            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
101
102            if min_o <= 0:
103                min_o = 1
104
105            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
106
107            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
108
109            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
110
111            # self.lookupTableSettings.use_pah_line_rule = False
112
113            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
114
115            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
116
117            self.mass_spectrum_obj.reset_indexes()
118
119            self.mass_spectrum_obj.filter_by_noise_threshold()
120
121            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
122
123            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
124                min_o,
125                max_o,
126            )
127
128            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
129
130        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
131        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
132            print("Getting Oxygen Series")
133        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
134        find_formula_thread.run()
135
136        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
137        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
138            print("Getting Oxygen Series")
139        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
140
141        # get the Ox class and the DBE for the lowest error molecular formula candidate
142        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
143
144        # sort the classes by abundance
145        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
146            print("Getting Oxygen Series Order")
147        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
148            dict_ox_class_and_ms_peak
149        )
150
151        create_molecular_database()
152
153        return assign_classes_order_str_dict_tuple_list
154
155    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
156        """Run the molecular formula search for each class in the specified order.
157
158        Parameters
159        ----------
160        assign_classes_order_tuples : list
161            A list of tuples containing the class names and dictionaries of class attributes.
162        """
163
164        def check_adduct_class(classe_dict):
165            """Check if the class contains any adduct atoms.
166
167            Parameters
168            ----------
169            classe_dict : dict
170                The dictionary of class attributes.
171
172            Returns
173            -------
174            bool
175                True if the class contains adduct atoms, False otherwise.
176            """
177            return any(
178                [
179                    key in classe_dict.keys()
180                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
181                ]
182            )
183
184        def set_min_max_dbe_by_oxygen(classe_dict):
185            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
186
187            Parameters
188            ----------
189            classe_dict : dict
190                The dictionary of class attributes.
191            """
192            # calculates min and max DBE based on the Oxygen number
193            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
194            # if class does not has O it use the pha rule
195            # ref : Vlad Lobodin manuscript to be include here
196
197            # atoms_exchanges = ['N']
198            # if 'O' in classe_dict.keys():
199            #
200            #    Oxygen_number = classe_dict.get("O")
201            #    for atom in atoms_exchanges:
202            #        if atom in classe_dict.keys():
203            #            Oxygen_number += classe_dict.get(atom)
204            #
205            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
206            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
207            #
208            # else:
209
210            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
211
212        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
213            """Run the molecular formula search for each mass spectrum peak.
214
215            Parameters
216            ----------
217            possible_formulas_dict : dict
218                A dictionary of possible molecular formulas.
219            mass_spectrum_obj : MassSpectrum
220                The mass spectrum object.
221            min_abundance : float
222                The minimum abundance threshold.
223
224            Returns
225            -------
226            list
227                A list of assigned peak indexes.
228            """
229            all_assigned_indexes = list()
230
231            for ms_peak in mass_spectrum_obj.sort_by_abundance():
232                if ms_peak:
233                    continue
234                # already assigned a molecular formula
235
236                nominal_mz = ms_peak.nominal_mz_exp
237
238                # get mono isotopic peaks that was added a molecular formula obj
239                # TODO update error variables
240
241                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
242
243                if possible_formulas_nominal:
244                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
245                        possible_formulas_nominal,
246                        min_abundance,
247                        mass_spectrum_obj,
248                        ms_peak,
249                    )
250
251                    all_assigned_indexes.extend(ms_peak_indexes)
252
253            # filter peaks by percentile threshold of found isotopologues
254            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
255                all_assigned_indexes, mass_spectrum_obj
256            )
257
258            # filter noise by kendrick density
259            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
260                all_assigned_indexes, mass_spectrum_obj
261            )
262
263            # filter per min peaks per mono isotopic class
264            # this function should always be the last filter,
265            # thefore no need to return remaining indexes
266            MolecularFormulaSearchFilters().check_min_peaks(
267                all_assigned_indexes, mass_spectrum_obj
268            )
269
270        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
271
272        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
273
274        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
275
276        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
277
278        min_abundance = self.mass_spectrum_obj.min_abundance
279
280        list_classes_str = [i[0] for i in assign_classes_order_tuples]
281        verbose = self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing
282        pbar = tqdm.tqdm(assign_classes_order_tuples, disable= not verbose)
283        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
284
285        for classe_tuple in pbar:
286            classe_str = classe_tuple[0]
287            classe_dict = classe_tuple[1]
288
289            set_min_max_dbe_by_oxygen(classe_dict)
290
291            # if len(classe_dict.keys()) == 2:
292            #    if classe_dict.get('S') == 1:
293            #       continue
294            # limits the dbe by the Ox class most abundant,
295            # need to add other atoms contribution to be more accurate
296            # but +-7 should be sufficient to cover the range
297
298            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
299                # tqdm.set_description_str(desc=None, refresh=True)
300                if verbose:
301                    pbar.set_description_str(
302                        desc="Started molecular formula search for class %s, (de)protonated "
303                        % classe_str,
304                        refresh=True,
305                    )
306
307                ion_type = Labels.protonated_de_ion
308
309                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
310                    classe_str
311                )
312
313                if possible_formulas_dict:
314                    run_search(
315                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
316                    )
317
318            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
319                # print("Started molecular formula search for class %s,  radical" % classe_str)
320                if verbose:
321                    pbar.set_description_str(
322                        desc="Started molecular formula search for class %s, radical"
323                        % classe_str,
324                        refresh=True,
325                    )
326
327                ion_type = Labels.radical_ion
328
329                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
330                    classe_str
331                )
332
333                if possible_formulas_dict:
334                    run_search(
335                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
336                    )
337
338            # looks for adduct, used_atom_valences should be 0
339            # this code does not support H exchance by halogen atoms
340            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
341                if verbose:
342                    pbar.set_description_str(
343                        desc="Started molecular formula search for class %s, adduct"
344                        % classe_str,
345                        refresh=True,
346                    )
347                # print("Started molecular formula search for class %s, adduct" % classe_str)
348
349                ion_type = Labels.radical_ion
350
351                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
352                    classe_str
353                )
354
355                """ commenting  unfinished code for release 2.0, see end of file for details"""
356                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
357
358                # if possible_formulas_adduct:
359
360                run_search(
361                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
362                )
363
364    def get_dict_molecular_database(self, classe_str_list):
365        """Get the molecular database as a dictionary.
366
367        Parameters
368        ----------
369        classe_str_list : list
370            A list of class names.
371
372        Returns
373        -------
374        dict
375            A dictionary containing the molecular database.
376        """
377        nominal_mzs = self.nominal_mzs
378        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
379        ion_charge = self.mass_spectrum_obj.polarity
380
381        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
382
383        dict_res = {}
384
385        if mf_search_settings.isProtonated:
386            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
387                classe_str_list,
388                Labels.protonated_de_ion,
389                nominal_mzs,
390                ion_charge,
391                mf_search_settings,
392            )
393
394        if mf_search_settings.isRadical:
395            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
396                classe_str_list,
397                Labels.radical_ion,
398                nominal_mzs,
399                ion_charge,
400                mf_search_settings,
401            )
402
403        if mf_search_settings.isAdduct:
404            adduct_list = (
405                mf_search_settings.adduct_atoms_neg
406                if ion_charge < 0
407                else mf_search_settings.adduct_atoms_pos
408            )
409            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
410                classe_str_list,
411                Labels.adduct_ion,
412                nominal_mzs,
413                ion_charge,
414                mf_search_settings,
415                adducts=adduct_list,
416            )
417
418        return dict_res
419
420    def ox_classes_and_peaks_in_order_(self) -> dict:
421        """Get the oxygen classes and associated peaks in order.
422
423        Returns
424        -------
425        dict
426            A dictionary containing the oxygen classes and associated peaks.
427        """
428        # order is only valid in python 3.4 and above
429        # change to OrderedDict if your version is lower
430        dict_ox_class_and_ms_peak = dict()
431
432        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
433            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
434
435            ox_classe = mspeak.best_molecular_formula_candidate.class_label
436
437            if ox_classe in dict_ox_class_and_ms_peak.keys():
438                # get the most abundant of the same ox class
439                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
440                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
441            else:
442                dict_ox_class_and_ms_peak[ox_classe] = mspeak
443
444        return dict_ox_class_and_ms_peak
445
446    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
447        """Get the classes in order.
448
449        Parameters
450        ----------
451        dict_ox_class_and_ms_peak : dict
452            A dictionary containing the oxygen classes and associated peaks.
453
454        Returns
455        -------
456        list
457            A list of tuples containing the class names and dictionaries of class attributes.
458
459        Notes
460        -----
461        structure is
462            ('HC', {'HC': 1})
463        """
464
465        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
466
467        usedAtoms.pop("C")
468        usedAtoms.pop("H")
469        usedAtoms.pop("O")
470
471        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
472        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
473        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
474
475        possible_n = [n for n in range(min_n, max_n + 1)]
476        possible_s = [s for s in range(min_s, max_s + 1)]
477        possible_p = [p for p in range(min_p, max_p + 1)]
478
479        # used to enforce order for commum atoms
480        # and track the atom index in on the tuple in all_atoms_tuples
481        atoms_in_order = ["N", "S", "P"]
482
483        # do number atoms prodcut and remove then from the usedAtoms dict
484        all_atoms_tuples = product(possible_n, possible_s, possible_p)
485        for atom in atoms_in_order:
486            usedAtoms.pop(atom, None)
487
488        # iterate over other atoms besides C,H, N, O, S and P
489
490        for selected_atom_label, min_max_tuple in usedAtoms.items():
491            min_x = min_max_tuple[0]
492            max_x = min_max_tuple[1]
493
494            possible_x = [x for x in range(min_x, max_x + 1)]
495            all_atoms_tuples = product(all_atoms_tuples, possible_x)
496
497            # merge tuples
498            all_atoms_tuples = [
499                all_atoms_combined[0] + (all_atoms_combined[1],)
500                for all_atoms_combined in all_atoms_tuples
501            ]
502
503            # add atom label to the atoms_in_order list
504
505            # important to index where the atom position is in on the tuple in all_atoms_tuples
506            atoms_in_order.append(selected_atom_label)
507
508        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
509            all_atoms_tuples, atoms_in_order
510        )
511
512        combined_classes = self.combine_ox_class_with_other(
513            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
514        )
515
516        combination_classes_ordered = self.sort_classes(
517            atoms_in_order, combined_classes
518        )
519
520        oxygen_class_str_dict_tuple = [
521            (ox_class, mspeak[0].class_dict)
522            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
523        ]
524
525        ## add classes together and ignores classes selected from the main series
526        for class_tuple in combination_classes_ordered:
527            if class_tuple not in oxygen_class_str_dict_tuple:
528                oxygen_class_str_dict_tuple.append(class_tuple)
529
530        return oxygen_class_str_dict_tuple
531
532    @staticmethod
533    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
534        """Get the class strings and dictionaries.
535
536        Parameters
537        ----------
538        all_atoms_tuples : tuple
539            A tuple containing the atoms.
540        atoms_in_order : list
541            A list of atoms in order.
542
543        Returns
544        --------
545        list
546            A list of tuples containing the class strings and dictionaries.
547
548        """
549        classe_list = []
550        hc_class = []
551
552        for all_atoms_tuple in all_atoms_tuples:
553            classe_str = ""
554            classe_dict = dict()
555
556            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
557                if atoms_number != 0:
558                    classe_str = (
559                        classe_str
560                        + atoms_in_order[each_atoms_index]
561                        + str(atoms_number)
562                        + " "
563                    )
564
565                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
566
567            classe_str = classe_str.strip()
568
569            if len(classe_str) > 0:
570                classe_list.append((classe_str, classe_dict))
571
572            elif len(classe_str) == 0:
573                hc_class.append(("HC", {"HC": 1}))
574
575        return classe_list, hc_class
576
577    @staticmethod
578    def combine_ox_class_with_other(
579        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
580    ) -> [dict]:
581        """Combine the oxygen classes with other classes.
582
583        Parameters
584        ----------
585        atoms_in_order : list
586            A list of atoms in order.
587        classes_strings_dict_tuples : list
588
589        dict_ox_class_and_ms_peak : dict
590            A dictionary containing the oxygen classes and associated peaks.
591
592        Returns
593        -------
594        list
595            A list of dictionaries.
596        """
597        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
598        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
599        # sort by len first then sort based on the atoms_in_order list
600        atoms_in_order = Atoms.atoms_order
601
602        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
603
604        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
605
606        # print(classes_strings_dict_tuples)
607        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
608        # print(classe_in_order)
609
610        combination = []
611
612        # _ ignoring the class_str
613        for _, other_classe_dict in classes_strings_dict_tuples:
614            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
615            combination.extend(
616                [
617                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
618                    for Oxygen_mf in Oxygen_mfs
619                ]
620            )
621
622        return combination
623
624    @staticmethod
625    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
626        """Sort the classes.
627
628        Parameters
629        ----------
630        atoms_in_order : list
631            A list of atoms in order.
632        combination_tuples : list
633
634        Returns
635        -------
636        list
637            A list of tuples containing the class strings and dictionaries.
638        """
639        join_list_of_list_classes = list()
640        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
641
642        sort_method = (
643            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
644        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
645        for class_dict in combination_tuples:
646            sorted_dict_keys = sorted(class_dict, key=sort_method)
647            class_str = " ".join(
648                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
649            )
650            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
651            join_list_of_list_classes.append((class_str, new_class_dict))
652
653        return join_list_of_list_classes
654
655    '''
656    The code bellow is unfinished, might be added to next release, 2.1
657    def add_adducts(self, possible_formulas):
658        """ Add adducts to the molecular formula candidates.
659
660        Parameters
661        ----------
662        possible_formulas : dict
663            A dictionary of possible molecular formulas.
664        
665        Returns
666        -------
667        dict 
668            A dictionary of possible molecular formulas with adducts.
669        
670        """
671        ion_type = Labels.adduct_ion
672
673        if self.mass_spectrum_obj.polarity < 0:
674            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
675            molform_model = MolecularFormulaDict
676        else:
677            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
678            molform_model = MolecularFormulaTablePos
679
680        new_dict = {}
681        
682        for nominal_mz, list_formulas in possible_formulas.items():
683            
684            for adduct_atom in adduct_atoms:
685                
686                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
687
688                for molecularFormulaTable in  list_formulas:
689                    
690                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
691                    
692                    if adduct_atom in formula_dict.keys():
693                        formula_dict[adduct_atom] += 1  
694                    else:
695                        formula_dict[adduct_atom] = 1      
696                    
697                    mz = adduct_atom_mass + molecularFormulaTable.mz
698                    nm = int(mz)
699                    
700                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
701                                            "mz" : mz,
702                                            "ion_type" : ion_type,
703                                            "nominal_mz" : nm,
704                                            "ion_charge" : molecularFormulaTable.ion_charge,
705                                            "classe" : molecularFormulaTable.classe,
706                                            "C" : molecularFormulaTable.C,
707                                            "H" : molecularFormulaTable.H,
708                                            "N" : molecularFormulaTable.N,
709                                            "O" : molecularFormulaTable.O,
710                                            "S" : molecularFormulaTable.S,
711                                            "P" : molecularFormulaTable.P,
712                                            "H_C" : molecularFormulaTable.H_C,
713                                            "O_C" : molecularFormulaTable.O_C,
714                                            "DBE" : molecularFormulaTable.DBE,
715                                            })
716                    if nm in new_dict:
717                        new_dict[nm].append(new_formul_obj)
718                    
719                    else:
720                        new_dict[nm]= [new_formul_obj]
721                    
722        return new_dict
723
724    '''

A class for assigning priority to oxygen classes in a molecular search.

Parameters
  • mass_spectrum_obj (MassSpectrum): The mass spectrum object.
  • sql_db (bool, optional): Whether to use an SQL database. The default is False.
Attributes
  • mass_spectrum_obj (MassSpectrum): The mass spectrum object.
  • sql_db (MolForm_SQL): The SQL database object.
Methods
  • run(). Run the priority assignment process.
  • create_data_base(). Create the molecular database for the specified heteroatomic classes.
  • run_worker_mass_spectrum(assign_classes_order_tuples). Run the molecular formula search for each class in the specified order.
  • get_dict_molecular_database(classe_str_list). Get the molecular database as a dictionary.
  • ox_classes_and_peaks_in_order_(). Get the oxygen classes and associated peaks in order.
  • get_classes_in_order(dict_ox_class_and_ms_peak) Get the classes in order.
OxygenPriorityAssignment(mass_spectrum_obj, sql_db=False)
54    def __init__(self, mass_spectrum_obj, sql_db=False):
55        # TODO:- add support for other atoms and adducts: Done
56        #        - add dbe range on search runtime : Done
57        #        - add docs
58        #        - improve performace : Done
59
60        Thread.__init__(self)
61        self.mass_spectrum_obj = mass_spectrum_obj
62        #  initiated at create_molecular_database()
63        # self.dict_molecular_lookup_table = None
64
65        if not sql_db:
66            self.sql_db = MolForm_SQL(
67                url=mass_spectrum_obj.molecular_search_settings.url_database
68            )
69
70        else:
71            self.sql_db = sql_db

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

mass_spectrum_obj
def run(self):
73    def run(self):
74        """Run the priority assignment process."""
75        # get Oxygen classes dict and the associate mspeak class
76        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
77        # create database separated to give the user the chance to use mass spec filters
78
79        assign_classes_order_str_dict_tuple_list = self.create_data_base()
80
81        if assign_classes_order_str_dict_tuple_list:
82            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
83
84        else:
85            raise RuntimeError("call create_data_base() first")
86
87        self.sql_db.close()

Run the priority assignment process.

def create_data_base(self):
 89    def create_data_base(self):
 90        """Create the molecular database for the specified heteroatomic classes.
 91
 92        Returns
 93        -------
 94        assign_classes_order_str_dict_tuple_ : list
 95            A list of tuples containing the class names and dictionaries of class attributes.
 96        """
 97
 98        def create_molecular_database():
 99            """Checks and creates the database entries for the specified heteroatomic classes."""
100            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
101
102            if min_o <= 0:
103                min_o = 1
104
105            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
106
107            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
108
109            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
110
111            # self.lookupTableSettings.use_pah_line_rule = False
112
113            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
114
115            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
116
117            self.mass_spectrum_obj.reset_indexes()
118
119            self.mass_spectrum_obj.filter_by_noise_threshold()
120
121            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
122
123            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
124                min_o,
125                max_o,
126            )
127
128            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
129
130        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
131        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
132            print("Getting Oxygen Series")
133        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
134        find_formula_thread.run()
135
136        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
137        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
138            print("Getting Oxygen Series")
139        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
140
141        # get the Ox class and the DBE for the lowest error molecular formula candidate
142        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
143
144        # sort the classes by abundance
145        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
146            print("Getting Oxygen Series Order")
147        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
148            dict_ox_class_and_ms_peak
149        )
150
151        create_molecular_database()
152
153        return assign_classes_order_str_dict_tuple_list

Create the molecular database for the specified heteroatomic classes.

Returns
  • assign_classes_order_str_dict_tuple_ (list): A list of tuples containing the class names and dictionaries of class attributes.
def run_worker_mass_spectrum(self, assign_classes_order_tuples):
155    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
156        """Run the molecular formula search for each class in the specified order.
157
158        Parameters
159        ----------
160        assign_classes_order_tuples : list
161            A list of tuples containing the class names and dictionaries of class attributes.
162        """
163
164        def check_adduct_class(classe_dict):
165            """Check if the class contains any adduct atoms.
166
167            Parameters
168            ----------
169            classe_dict : dict
170                The dictionary of class attributes.
171
172            Returns
173            -------
174            bool
175                True if the class contains adduct atoms, False otherwise.
176            """
177            return any(
178                [
179                    key in classe_dict.keys()
180                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
181                ]
182            )
183
184        def set_min_max_dbe_by_oxygen(classe_dict):
185            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
186
187            Parameters
188            ----------
189            classe_dict : dict
190                The dictionary of class attributes.
191            """
192            # calculates min and max DBE based on the Oxygen number
193            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
194            # if class does not has O it use the pha rule
195            # ref : Vlad Lobodin manuscript to be include here
196
197            # atoms_exchanges = ['N']
198            # if 'O' in classe_dict.keys():
199            #
200            #    Oxygen_number = classe_dict.get("O")
201            #    for atom in atoms_exchanges:
202            #        if atom in classe_dict.keys():
203            #            Oxygen_number += classe_dict.get(atom)
204            #
205            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
206            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
207            #
208            # else:
209
210            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
211
212        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
213            """Run the molecular formula search for each mass spectrum peak.
214
215            Parameters
216            ----------
217            possible_formulas_dict : dict
218                A dictionary of possible molecular formulas.
219            mass_spectrum_obj : MassSpectrum
220                The mass spectrum object.
221            min_abundance : float
222                The minimum abundance threshold.
223
224            Returns
225            -------
226            list
227                A list of assigned peak indexes.
228            """
229            all_assigned_indexes = list()
230
231            for ms_peak in mass_spectrum_obj.sort_by_abundance():
232                if ms_peak:
233                    continue
234                # already assigned a molecular formula
235
236                nominal_mz = ms_peak.nominal_mz_exp
237
238                # get mono isotopic peaks that was added a molecular formula obj
239                # TODO update error variables
240
241                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
242
243                if possible_formulas_nominal:
244                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
245                        possible_formulas_nominal,
246                        min_abundance,
247                        mass_spectrum_obj,
248                        ms_peak,
249                    )
250
251                    all_assigned_indexes.extend(ms_peak_indexes)
252
253            # filter peaks by percentile threshold of found isotopologues
254            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
255                all_assigned_indexes, mass_spectrum_obj
256            )
257
258            # filter noise by kendrick density
259            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
260                all_assigned_indexes, mass_spectrum_obj
261            )
262
263            # filter per min peaks per mono isotopic class
264            # this function should always be the last filter,
265            # thefore no need to return remaining indexes
266            MolecularFormulaSearchFilters().check_min_peaks(
267                all_assigned_indexes, mass_spectrum_obj
268            )
269
270        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
271
272        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
273
274        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
275
276        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
277
278        min_abundance = self.mass_spectrum_obj.min_abundance
279
280        list_classes_str = [i[0] for i in assign_classes_order_tuples]
281        verbose = self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing
282        pbar = tqdm.tqdm(assign_classes_order_tuples, disable= not verbose)
283        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
284
285        for classe_tuple in pbar:
286            classe_str = classe_tuple[0]
287            classe_dict = classe_tuple[1]
288
289            set_min_max_dbe_by_oxygen(classe_dict)
290
291            # if len(classe_dict.keys()) == 2:
292            #    if classe_dict.get('S') == 1:
293            #       continue
294            # limits the dbe by the Ox class most abundant,
295            # need to add other atoms contribution to be more accurate
296            # but +-7 should be sufficient to cover the range
297
298            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
299                # tqdm.set_description_str(desc=None, refresh=True)
300                if verbose:
301                    pbar.set_description_str(
302                        desc="Started molecular formula search for class %s, (de)protonated "
303                        % classe_str,
304                        refresh=True,
305                    )
306
307                ion_type = Labels.protonated_de_ion
308
309                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
310                    classe_str
311                )
312
313                if possible_formulas_dict:
314                    run_search(
315                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
316                    )
317
318            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
319                # print("Started molecular formula search for class %s,  radical" % classe_str)
320                if verbose:
321                    pbar.set_description_str(
322                        desc="Started molecular formula search for class %s, radical"
323                        % classe_str,
324                        refresh=True,
325                    )
326
327                ion_type = Labels.radical_ion
328
329                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
330                    classe_str
331                )
332
333                if possible_formulas_dict:
334                    run_search(
335                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
336                    )
337
338            # looks for adduct, used_atom_valences should be 0
339            # this code does not support H exchance by halogen atoms
340            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
341                if verbose:
342                    pbar.set_description_str(
343                        desc="Started molecular formula search for class %s, adduct"
344                        % classe_str,
345                        refresh=True,
346                    )
347                # print("Started molecular formula search for class %s, adduct" % classe_str)
348
349                ion_type = Labels.radical_ion
350
351                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
352                    classe_str
353                )
354
355                """ commenting  unfinished code for release 2.0, see end of file for details"""
356                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
357
358                # if possible_formulas_adduct:
359
360                run_search(
361                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
362                )

Run the molecular formula search for each class in the specified order.

Parameters
  • assign_classes_order_tuples (list): A list of tuples containing the class names and dictionaries of class attributes.
def get_dict_molecular_database(self, classe_str_list):
364    def get_dict_molecular_database(self, classe_str_list):
365        """Get the molecular database as a dictionary.
366
367        Parameters
368        ----------
369        classe_str_list : list
370            A list of class names.
371
372        Returns
373        -------
374        dict
375            A dictionary containing the molecular database.
376        """
377        nominal_mzs = self.nominal_mzs
378        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
379        ion_charge = self.mass_spectrum_obj.polarity
380
381        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
382
383        dict_res = {}
384
385        if mf_search_settings.isProtonated:
386            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
387                classe_str_list,
388                Labels.protonated_de_ion,
389                nominal_mzs,
390                ion_charge,
391                mf_search_settings,
392            )
393
394        if mf_search_settings.isRadical:
395            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
396                classe_str_list,
397                Labels.radical_ion,
398                nominal_mzs,
399                ion_charge,
400                mf_search_settings,
401            )
402
403        if mf_search_settings.isAdduct:
404            adduct_list = (
405                mf_search_settings.adduct_atoms_neg
406                if ion_charge < 0
407                else mf_search_settings.adduct_atoms_pos
408            )
409            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
410                classe_str_list,
411                Labels.adduct_ion,
412                nominal_mzs,
413                ion_charge,
414                mf_search_settings,
415                adducts=adduct_list,
416            )
417
418        return dict_res

Get the molecular database as a dictionary.

Parameters
  • classe_str_list (list): A list of class names.
Returns
  • dict: A dictionary containing the molecular database.
def ox_classes_and_peaks_in_order_(self) -> dict:
420    def ox_classes_and_peaks_in_order_(self) -> dict:
421        """Get the oxygen classes and associated peaks in order.
422
423        Returns
424        -------
425        dict
426            A dictionary containing the oxygen classes and associated peaks.
427        """
428        # order is only valid in python 3.4 and above
429        # change to OrderedDict if your version is lower
430        dict_ox_class_and_ms_peak = dict()
431
432        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
433            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
434
435            ox_classe = mspeak.best_molecular_formula_candidate.class_label
436
437            if ox_classe in dict_ox_class_and_ms_peak.keys():
438                # get the most abundant of the same ox class
439                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
440                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
441            else:
442                dict_ox_class_and_ms_peak[ox_classe] = mspeak
443
444        return dict_ox_class_and_ms_peak

Get the oxygen classes and associated peaks in order.

Returns
  • dict: A dictionary containing the oxygen classes and associated peaks.
def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(<class 'str'>, <class 'dict'>)]:
446    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
447        """Get the classes in order.
448
449        Parameters
450        ----------
451        dict_ox_class_and_ms_peak : dict
452            A dictionary containing the oxygen classes and associated peaks.
453
454        Returns
455        -------
456        list
457            A list of tuples containing the class names and dictionaries of class attributes.
458
459        Notes
460        -----
461        structure is
462            ('HC', {'HC': 1})
463        """
464
465        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
466
467        usedAtoms.pop("C")
468        usedAtoms.pop("H")
469        usedAtoms.pop("O")
470
471        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
472        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
473        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
474
475        possible_n = [n for n in range(min_n, max_n + 1)]
476        possible_s = [s for s in range(min_s, max_s + 1)]
477        possible_p = [p for p in range(min_p, max_p + 1)]
478
479        # used to enforce order for commum atoms
480        # and track the atom index in on the tuple in all_atoms_tuples
481        atoms_in_order = ["N", "S", "P"]
482
483        # do number atoms prodcut and remove then from the usedAtoms dict
484        all_atoms_tuples = product(possible_n, possible_s, possible_p)
485        for atom in atoms_in_order:
486            usedAtoms.pop(atom, None)
487
488        # iterate over other atoms besides C,H, N, O, S and P
489
490        for selected_atom_label, min_max_tuple in usedAtoms.items():
491            min_x = min_max_tuple[0]
492            max_x = min_max_tuple[1]
493
494            possible_x = [x for x in range(min_x, max_x + 1)]
495            all_atoms_tuples = product(all_atoms_tuples, possible_x)
496
497            # merge tuples
498            all_atoms_tuples = [
499                all_atoms_combined[0] + (all_atoms_combined[1],)
500                for all_atoms_combined in all_atoms_tuples
501            ]
502
503            # add atom label to the atoms_in_order list
504
505            # important to index where the atom position is in on the tuple in all_atoms_tuples
506            atoms_in_order.append(selected_atom_label)
507
508        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
509            all_atoms_tuples, atoms_in_order
510        )
511
512        combined_classes = self.combine_ox_class_with_other(
513            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
514        )
515
516        combination_classes_ordered = self.sort_classes(
517            atoms_in_order, combined_classes
518        )
519
520        oxygen_class_str_dict_tuple = [
521            (ox_class, mspeak[0].class_dict)
522            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
523        ]
524
525        ## add classes together and ignores classes selected from the main series
526        for class_tuple in combination_classes_ordered:
527            if class_tuple not in oxygen_class_str_dict_tuple:
528                oxygen_class_str_dict_tuple.append(class_tuple)
529
530        return oxygen_class_str_dict_tuple

Get the classes in order.

Parameters
  • dict_ox_class_and_ms_peak (dict): A dictionary containing the oxygen classes and associated peaks.
Returns
  • list: A list of tuples containing the class names and dictionaries of class attributes.
Notes

structure is ('HC', {'HC': 1})

@staticmethod
def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(<class 'str'>, <class 'dict'>)]:
532    @staticmethod
533    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
534        """Get the class strings and dictionaries.
535
536        Parameters
537        ----------
538        all_atoms_tuples : tuple
539            A tuple containing the atoms.
540        atoms_in_order : list
541            A list of atoms in order.
542
543        Returns
544        --------
545        list
546            A list of tuples containing the class strings and dictionaries.
547
548        """
549        classe_list = []
550        hc_class = []
551
552        for all_atoms_tuple in all_atoms_tuples:
553            classe_str = ""
554            classe_dict = dict()
555
556            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
557                if atoms_number != 0:
558                    classe_str = (
559                        classe_str
560                        + atoms_in_order[each_atoms_index]
561                        + str(atoms_number)
562                        + " "
563                    )
564
565                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
566
567            classe_str = classe_str.strip()
568
569            if len(classe_str) > 0:
570                classe_list.append((classe_str, classe_dict))
571
572            elif len(classe_str) == 0:
573                hc_class.append(("HC", {"HC": 1}))
574
575        return classe_list, hc_class

Get the class strings and dictionaries.

Parameters
  • all_atoms_tuples (tuple): A tuple containing the atoms.
  • atoms_in_order (list): A list of atoms in order.
Returns
  • list: A list of tuples containing the class strings and dictionaries.
@staticmethod
def combine_ox_class_with_other( atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak) -> [<class 'dict'>]:
577    @staticmethod
578    def combine_ox_class_with_other(
579        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
580    ) -> [dict]:
581        """Combine the oxygen classes with other classes.
582
583        Parameters
584        ----------
585        atoms_in_order : list
586            A list of atoms in order.
587        classes_strings_dict_tuples : list
588
589        dict_ox_class_and_ms_peak : dict
590            A dictionary containing the oxygen classes and associated peaks.
591
592        Returns
593        -------
594        list
595            A list of dictionaries.
596        """
597        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
598        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
599        # sort by len first then sort based on the atoms_in_order list
600        atoms_in_order = Atoms.atoms_order
601
602        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
603
604        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
605
606        # print(classes_strings_dict_tuples)
607        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
608        # print(classe_in_order)
609
610        combination = []
611
612        # _ ignoring the class_str
613        for _, other_classe_dict in classes_strings_dict_tuples:
614            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
615            combination.extend(
616                [
617                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
618                    for Oxygen_mf in Oxygen_mfs
619                ]
620            )
621
622        return combination

Combine the oxygen classes with other classes.

Parameters
  • atoms_in_order (list): A list of atoms in order.
  • classes_strings_dict_tuples (list):

  • dict_ox_class_and_ms_peak (dict): A dictionary containing the oxygen classes and associated peaks.

Returns
  • list: A list of dictionaries.
@staticmethod
def sort_classes(atoms_in_order, combination_tuples) -> [(<class 'str'>, <class 'dict'>)]:
624    @staticmethod
625    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
626        """Sort the classes.
627
628        Parameters
629        ----------
630        atoms_in_order : list
631            A list of atoms in order.
632        combination_tuples : list
633
634        Returns
635        -------
636        list
637            A list of tuples containing the class strings and dictionaries.
638        """
639        join_list_of_list_classes = list()
640        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
641
642        sort_method = (
643            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
644        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
645        for class_dict in combination_tuples:
646            sorted_dict_keys = sorted(class_dict, key=sort_method)
647            class_str = " ".join(
648                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
649            )
650            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
651            join_list_of_list_classes.append((class_str, new_class_dict))
652
653        return join_list_of_list_classes

Sort the classes.

Parameters
  • atoms_in_order (list): A list of atoms in order.
  • combination_tuples (list):
Returns
  • list: A list of tuples containing the class strings and dictionaries.
Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id