corems.molecular_id.search.findOxygenPeaks

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jul 31, 2019"
  3
  4from copy import deepcopy
  5from threading import Thread
  6
  7from numpy import average, std
  8
  9from corems.molecular_id.calc.ClusterFilter import ClusteringFilter
 10from corems.molecular_id.factory.molecularSQL import MolForm_SQL
 11from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas
 12
 13
 14class FindOxygenPeaks(Thread):
 15    """Class to find Oxygen peaks in a mass spectrum for formula assignment search
 16
 17    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter
 18    Returns a list of MSPeak class containing the possible Molecular Formula class objects.
 19
 20    Parameters
 21    ----------
 22    mass_spectrum_obj : MassSpec class
 23        This is where we store MassSpec class obj,
 24
 25    lookupTableSettings:  MolecularLookupTableSettings class
 26        This is where we store MolecularLookupTableSettings class obj
 27
 28    min_O , max_O : int
 29        minium and maximum of Oxygen to allow the software to look for
 30        it will override the settings at lookupTableSettings.usedAtoms
 31        default min = 1, max = 22
 32
 33    Attributes
 34    ----------
 35    mass_spectrum_obj : MassSpec class
 36        This is where we store MassSpec class obj,
 37    lookupTableSettings:  MolecularLookupTableSettings class
 38        This is where we store MolecularLookupTableSettings class obj
 39
 40    Methods
 41    ----------
 42    * run().
 43            will be called when the instantiated class method start is called
 44    * get_list_found_peaks().
 45            returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak
 46            for more details of the structure see MSPeak class and MolecularFormula class
 47    * set_mass_spec_indexes_by_found_peaks().
 48            set the mass spectrum to interate over only the selected indexes
 49    """
 50
 51    def __init__(
 52        self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22
 53    ):
 54        Thread.__init__(self)
 55
 56        self.mass_spectrum_obj = mass_spectrum_obj
 57        self.min_0 = min_O
 58        self.max_O = max_O
 59
 60        if not sql_db:
 61            self.sql_db = MolForm_SQL(
 62                mass_spectrum_obj.molecular_search_settings.url_database
 63            )
 64        else:
 65            self.sql_db = sql_db
 66
 67    def run(self):
 68        """Run the thread"""
 69        # save initial settings min peaks per class filter
 70        initial_min_peak_bool = deepcopy(
 71            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
 72        )
 73
 74        # deactivate the usage of min peaks per class filter
 75        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
 76
 77        # save initial settings for Ox
 78        initial_ox = deepcopy(
 79            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"]
 80        )
 81
 82        # resets the used atoms to look only for oxygen organic compounds
 83        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
 84            self.min_0,
 85            self.max_O,
 86        )
 87
 88        self.list_found_mspeaks = []
 89
 90        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
 91
 92        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
 93
 94        # needs to be wrapped inside the mass_spec class
 95        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
 96
 97        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
 98            print("Start most abundant mass spectral peak search")
 99        molecular_formula_obj_reference = self.find_most_abundant_formula(
100            self.mass_spectrum_obj
101        )
102
103        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
104            print(
105                "Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm"
106                % (
107                    molecular_formula_obj_reference.string,
108                    molecular_formula_obj_reference.mz_error,
109                )
110            )
111            print("Started mass spectral peak series search")
112
113        self.list_found_mspeaks = self.find_series_mspeaks(
114            self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
115        )
116
117        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
118
119        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox
120
121        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
122            initial_min_peak_bool
123        )
124
125        self.mass_spectrum_obj.reset_indexes()
126
127        self.mass_spectrum_obj.filter_by_noise_threshold()
128        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
129            print("Done with mass spectral peak series search")
130
131        self.sql_db.close()
132
133    def find_most_abundant_formula(self, mass_spectrum_obj):
134        """Find the most abundant formula in the mass spectrum
135
136        Parameters
137        ----------
138        mass_spectrum_obj : MassSpec class
139            Mass spectrum object
140
141        Returns
142        ----------
143        MolecularFormula class obj
144            most abundant MolecularFormula with the lowest mass error
145        """
146        # need to find a better way to cut off outliners
147        # import matplotlib.pyplot as plt
148        # plt.hist(mass_spectrum_obj.abundance, bins=100)
149        # plt.show()
150
151        abundances = mass_spectrum_obj.abundance
152        abun_mean = average(abundances, axis=0)
153        abun_std = std(abundances, axis=0)
154
155        upper_limit = abun_mean + 7 * abun_std
156        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
157            print(
158                "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
159                % (
160                    upper_limit,
161                    max(mass_spectrum_obj, key=lambda m: m.abundance).abundance,
162                )
163            )
164
165        mspeak_most_abundant = max(
166            mass_spectrum_obj,
167            key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
168        )
169
170        print("Searching molecular formulas")
171
172        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
173            [mspeak_most_abundant]
174        )
175
176        print("Finished searching molecular formulas")
177
178        if mspeak_most_abundant:
179            return mspeak_most_abundant.best_molecular_formula_candidate
180
181        else:
182            raise Exception(
183                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
184                % mspeak_most_abundant.mz_exp
185            )
186
187        # return the first option
188        # return mspeak_most_abundant[0]
189
190    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
191        """[Test function] Find the most abundant formula in the mass spectrum
192
193        Parameters
194        ----------
195        mass_spectrum_obj : MassSpec class
196            Mass spectrum object
197        settings : MolecularSearchSettings class
198            Molecular search settings object
199
200        Returns
201        ----------
202        MolecularFormula class obj
203            most abundant MolecularFormula with the lowest mass error
204
205        """
206        # this function is intended for test only.
207        # Have to sort by Kendrick to be able to select the most abundant series
208        # then select the most abundant peak inside the series
209        # or have the user select the reference mspeak on the gui
210
211        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
212
213        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
214            [mspeak_most_abundant]
215        )
216
217        if mspeak_most_abundant:
218            return mspeak_most_abundant.best_molecular_formula_candidate
219
220        else:
221            raise Exception(
222                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
223                % mspeak_most_abundant.mz_exp
224            )
225        # return the first option
226        # return mspeak_most_abundant[0]
227
228    def find_series_mspeaks(
229        self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
230    ):
231        """Find a series of abundant peaks in the mass spectrum for a given molecular formula
232
233        Parameters
234        ----------
235        mass_spectrum_obj : MassSpec class
236            Mass spectrum object
237        molecular_formula_obj_reference : MolecularFormula class
238            Molecular formula object
239        deltamz : float
240            delta m/z to look for peaks
241
242        Returns
243        ----------
244        list
245            list of MSpeak class objects
246        """
247        abundances = mass_spectrum_obj.abundance
248        abun_mean = average(abundances, axis=0)
249        abun_std = std(abundances, axis=0)
250        upper_limit = abun_mean + 7 * abun_std
251
252        list_most_abundant_peaks = list()
253
254        min_mz = mass_spectrum_obj.min_mz_exp
255
256        max_mz = mass_spectrum_obj.max_mz_exp
257
258        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
259
260        mass = initial_nominal_mass
261
262        nominal_masses = []
263        while mass <= max_mz:
264            # print "shit 1", mass, min_mz
265            mass += deltamz
266            nominal_masses.append(mass)
267
268        mass = initial_nominal_mass
269        while mass >= min_mz:
270            # print "shit 1", mass, min_mz
271            mass -= deltamz
272            nominal_masses.append(mass)
273
274        nominal_masses = sorted(nominal_masses)
275
276        for nominal_mass in nominal_masses:
277            first_index, last_index = (
278                mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
279            )
280
281            ms_peaks = mass_spectrum_obj[first_index:last_index]
282
283            if ms_peaks:
284                #
285                # print (nominal_mass, first_index,
286                #    last_index,
287                #    mass_spectrum_obj[first_index].mz_exp,
288                #    mass_spectrum_obj[last_index].mz_exp
289                #    )
290                #
291
292                mspeak_most_abundant = max(
293                    ms_peaks,
294                    key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
295                )
296
297                # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
298
299                list_most_abundant_peaks.append(mspeak_most_abundant)
300        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
301            print("Start molecular formula search")
302        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
303            list_most_abundant_peaks
304        )
305        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
306            print("Done molecular formula search")
307        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]
308
309    def get_list_found_peaks(self):
310        """Get the list of found peaks
311
312        Returns
313        ----------
314        list
315            list of MSpeak class objects
316        """
317        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
318
319    def set_mass_spec_indexes_by_found_peaks(self):
320        """Set the mass spectrum to interate over only the selected indexes.
321
322        Notes
323        ----------
324        Warning!!!!
325        set the mass spectrum to interate over only the selected indexes
326        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
327        """
328
329        indexes = [msp.index for msp in self.list_found_mspeaks]
330        self.mass_spectrum_obj.set_indexes(indexes)
class FindOxygenPeaks(threading.Thread):
 15class FindOxygenPeaks(Thread):
 16    """Class to find Oxygen peaks in a mass spectrum for formula assignment search
 17
 18    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter
 19    Returns a list of MSPeak class containing the possible Molecular Formula class objects.
 20
 21    Parameters
 22    ----------
 23    mass_spectrum_obj : MassSpec class
 24        This is where we store MassSpec class obj,
 25
 26    lookupTableSettings:  MolecularLookupTableSettings class
 27        This is where we store MolecularLookupTableSettings class obj
 28
 29    min_O , max_O : int
 30        minium and maximum of Oxygen to allow the software to look for
 31        it will override the settings at lookupTableSettings.usedAtoms
 32        default min = 1, max = 22
 33
 34    Attributes
 35    ----------
 36    mass_spectrum_obj : MassSpec class
 37        This is where we store MassSpec class obj,
 38    lookupTableSettings:  MolecularLookupTableSettings class
 39        This is where we store MolecularLookupTableSettings class obj
 40
 41    Methods
 42    ----------
 43    * run().
 44            will be called when the instantiated class method start is called
 45    * get_list_found_peaks().
 46            returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak
 47            for more details of the structure see MSPeak class and MolecularFormula class
 48    * set_mass_spec_indexes_by_found_peaks().
 49            set the mass spectrum to interate over only the selected indexes
 50    """
 51
 52    def __init__(
 53        self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22
 54    ):
 55        Thread.__init__(self)
 56
 57        self.mass_spectrum_obj = mass_spectrum_obj
 58        self.min_0 = min_O
 59        self.max_O = max_O
 60
 61        if not sql_db:
 62            self.sql_db = MolForm_SQL(
 63                mass_spectrum_obj.molecular_search_settings.url_database
 64            )
 65        else:
 66            self.sql_db = sql_db
 67
 68    def run(self):
 69        """Run the thread"""
 70        # save initial settings min peaks per class filter
 71        initial_min_peak_bool = deepcopy(
 72            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
 73        )
 74
 75        # deactivate the usage of min peaks per class filter
 76        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
 77
 78        # save initial settings for Ox
 79        initial_ox = deepcopy(
 80            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"]
 81        )
 82
 83        # resets the used atoms to look only for oxygen organic compounds
 84        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
 85            self.min_0,
 86            self.max_O,
 87        )
 88
 89        self.list_found_mspeaks = []
 90
 91        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
 92
 93        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
 94
 95        # needs to be wrapped inside the mass_spec class
 96        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
 97
 98        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
 99            print("Start most abundant mass spectral peak search")
100        molecular_formula_obj_reference = self.find_most_abundant_formula(
101            self.mass_spectrum_obj
102        )
103
104        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
105            print(
106                "Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm"
107                % (
108                    molecular_formula_obj_reference.string,
109                    molecular_formula_obj_reference.mz_error,
110                )
111            )
112            print("Started mass spectral peak series search")
113
114        self.list_found_mspeaks = self.find_series_mspeaks(
115            self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
116        )
117
118        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
119
120        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox
121
122        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
123            initial_min_peak_bool
124        )
125
126        self.mass_spectrum_obj.reset_indexes()
127
128        self.mass_spectrum_obj.filter_by_noise_threshold()
129        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
130            print("Done with mass spectral peak series search")
131
132        self.sql_db.close()
133
134    def find_most_abundant_formula(self, mass_spectrum_obj):
135        """Find the most abundant formula in the mass spectrum
136
137        Parameters
138        ----------
139        mass_spectrum_obj : MassSpec class
140            Mass spectrum object
141
142        Returns
143        ----------
144        MolecularFormula class obj
145            most abundant MolecularFormula with the lowest mass error
146        """
147        # need to find a better way to cut off outliners
148        # import matplotlib.pyplot as plt
149        # plt.hist(mass_spectrum_obj.abundance, bins=100)
150        # plt.show()
151
152        abundances = mass_spectrum_obj.abundance
153        abun_mean = average(abundances, axis=0)
154        abun_std = std(abundances, axis=0)
155
156        upper_limit = abun_mean + 7 * abun_std
157        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
158            print(
159                "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
160                % (
161                    upper_limit,
162                    max(mass_spectrum_obj, key=lambda m: m.abundance).abundance,
163                )
164            )
165
166        mspeak_most_abundant = max(
167            mass_spectrum_obj,
168            key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
169        )
170
171        print("Searching molecular formulas")
172
173        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
174            [mspeak_most_abundant]
175        )
176
177        print("Finished searching molecular formulas")
178
179        if mspeak_most_abundant:
180            return mspeak_most_abundant.best_molecular_formula_candidate
181
182        else:
183            raise Exception(
184                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
185                % mspeak_most_abundant.mz_exp
186            )
187
188        # return the first option
189        # return mspeak_most_abundant[0]
190
191    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
192        """[Test function] Find the most abundant formula in the mass spectrum
193
194        Parameters
195        ----------
196        mass_spectrum_obj : MassSpec class
197            Mass spectrum object
198        settings : MolecularSearchSettings class
199            Molecular search settings object
200
201        Returns
202        ----------
203        MolecularFormula class obj
204            most abundant MolecularFormula with the lowest mass error
205
206        """
207        # this function is intended for test only.
208        # Have to sort by Kendrick to be able to select the most abundant series
209        # then select the most abundant peak inside the series
210        # or have the user select the reference mspeak on the gui
211
212        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
213
214        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
215            [mspeak_most_abundant]
216        )
217
218        if mspeak_most_abundant:
219            return mspeak_most_abundant.best_molecular_formula_candidate
220
221        else:
222            raise Exception(
223                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
224                % mspeak_most_abundant.mz_exp
225            )
226        # return the first option
227        # return mspeak_most_abundant[0]
228
229    def find_series_mspeaks(
230        self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
231    ):
232        """Find a series of abundant peaks in the mass spectrum for a given molecular formula
233
234        Parameters
235        ----------
236        mass_spectrum_obj : MassSpec class
237            Mass spectrum object
238        molecular_formula_obj_reference : MolecularFormula class
239            Molecular formula object
240        deltamz : float
241            delta m/z to look for peaks
242
243        Returns
244        ----------
245        list
246            list of MSpeak class objects
247        """
248        abundances = mass_spectrum_obj.abundance
249        abun_mean = average(abundances, axis=0)
250        abun_std = std(abundances, axis=0)
251        upper_limit = abun_mean + 7 * abun_std
252
253        list_most_abundant_peaks = list()
254
255        min_mz = mass_spectrum_obj.min_mz_exp
256
257        max_mz = mass_spectrum_obj.max_mz_exp
258
259        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
260
261        mass = initial_nominal_mass
262
263        nominal_masses = []
264        while mass <= max_mz:
265            # print "shit 1", mass, min_mz
266            mass += deltamz
267            nominal_masses.append(mass)
268
269        mass = initial_nominal_mass
270        while mass >= min_mz:
271            # print "shit 1", mass, min_mz
272            mass -= deltamz
273            nominal_masses.append(mass)
274
275        nominal_masses = sorted(nominal_masses)
276
277        for nominal_mass in nominal_masses:
278            first_index, last_index = (
279                mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
280            )
281
282            ms_peaks = mass_spectrum_obj[first_index:last_index]
283
284            if ms_peaks:
285                #
286                # print (nominal_mass, first_index,
287                #    last_index,
288                #    mass_spectrum_obj[first_index].mz_exp,
289                #    mass_spectrum_obj[last_index].mz_exp
290                #    )
291                #
292
293                mspeak_most_abundant = max(
294                    ms_peaks,
295                    key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
296                )
297
298                # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
299
300                list_most_abundant_peaks.append(mspeak_most_abundant)
301        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
302            print("Start molecular formula search")
303        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
304            list_most_abundant_peaks
305        )
306        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
307            print("Done molecular formula search")
308        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]
309
310    def get_list_found_peaks(self):
311        """Get the list of found peaks
312
313        Returns
314        ----------
315        list
316            list of MSpeak class objects
317        """
318        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
319
320    def set_mass_spec_indexes_by_found_peaks(self):
321        """Set the mass spectrum to interate over only the selected indexes.
322
323        Notes
324        ----------
325        Warning!!!!
326        set the mass spectrum to interate over only the selected indexes
327        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
328        """
329
330        indexes = [msp.index for msp in self.list_found_mspeaks]
331        self.mass_spectrum_obj.set_indexes(indexes)

Class to find Oxygen peaks in a mass spectrum for formula assignment search

Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter Returns a list of MSPeak class containing the possible Molecular Formula class objects.

Parameters
  • mass_spectrum_obj (MassSpec class): This is where we store MassSpec class obj,
  • lookupTableSettings (MolecularLookupTableSettings class): This is where we store MolecularLookupTableSettings class obj
  • min_O , max_O (int): minium and maximum of Oxygen to allow the software to look for it will override the settings at lookupTableSettings.usedAtoms default min = 1, max = 22
Attributes
  • mass_spectrum_obj (MassSpec class): This is where we store MassSpec class obj,
  • lookupTableSettings (MolecularLookupTableSettings class): This is where we store MolecularLookupTableSettings class obj
Methods
  • run(). will be called when the instantiated class method start is called
  • get_list_found_peaks(). returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak for more details of the structure see MSPeak class and MolecularFormula class
  • set_mass_spec_indexes_by_found_peaks(). set the mass spectrum to interate over only the selected indexes
FindOxygenPeaks( mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22)
52    def __init__(
53        self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22
54    ):
55        Thread.__init__(self)
56
57        self.mass_spectrum_obj = mass_spectrum_obj
58        self.min_0 = min_O
59        self.max_O = max_O
60
61        if not sql_db:
62            self.sql_db = MolForm_SQL(
63                mass_spectrum_obj.molecular_search_settings.url_database
64            )
65        else:
66            self.sql_db = sql_db

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

mass_spectrum_obj
min_0
max_O
def run(self):
 68    def run(self):
 69        """Run the thread"""
 70        # save initial settings min peaks per class filter
 71        initial_min_peak_bool = deepcopy(
 72            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
 73        )
 74
 75        # deactivate the usage of min peaks per class filter
 76        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
 77
 78        # save initial settings for Ox
 79        initial_ox = deepcopy(
 80            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"]
 81        )
 82
 83        # resets the used atoms to look only for oxygen organic compounds
 84        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
 85            self.min_0,
 86            self.max_O,
 87        )
 88
 89        self.list_found_mspeaks = []
 90
 91        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
 92
 93        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
 94
 95        # needs to be wrapped inside the mass_spec class
 96        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
 97
 98        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
 99            print("Start most abundant mass spectral peak search")
100        molecular_formula_obj_reference = self.find_most_abundant_formula(
101            self.mass_spectrum_obj
102        )
103
104        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
105            print(
106                "Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm"
107                % (
108                    molecular_formula_obj_reference.string,
109                    molecular_formula_obj_reference.mz_error,
110                )
111            )
112            print("Started mass spectral peak series search")
113
114        self.list_found_mspeaks = self.find_series_mspeaks(
115            self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
116        )
117
118        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
119
120        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox
121
122        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
123            initial_min_peak_bool
124        )
125
126        self.mass_spectrum_obj.reset_indexes()
127
128        self.mass_spectrum_obj.filter_by_noise_threshold()
129        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
130            print("Done with mass spectral peak series search")
131
132        self.sql_db.close()

Run the thread

def find_most_abundant_formula(self, mass_spectrum_obj):
134    def find_most_abundant_formula(self, mass_spectrum_obj):
135        """Find the most abundant formula in the mass spectrum
136
137        Parameters
138        ----------
139        mass_spectrum_obj : MassSpec class
140            Mass spectrum object
141
142        Returns
143        ----------
144        MolecularFormula class obj
145            most abundant MolecularFormula with the lowest mass error
146        """
147        # need to find a better way to cut off outliners
148        # import matplotlib.pyplot as plt
149        # plt.hist(mass_spectrum_obj.abundance, bins=100)
150        # plt.show()
151
152        abundances = mass_spectrum_obj.abundance
153        abun_mean = average(abundances, axis=0)
154        abun_std = std(abundances, axis=0)
155
156        upper_limit = abun_mean + 7 * abun_std
157        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
158            print(
159                "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
160                % (
161                    upper_limit,
162                    max(mass_spectrum_obj, key=lambda m: m.abundance).abundance,
163                )
164            )
165
166        mspeak_most_abundant = max(
167            mass_spectrum_obj,
168            key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
169        )
170
171        print("Searching molecular formulas")
172
173        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
174            [mspeak_most_abundant]
175        )
176
177        print("Finished searching molecular formulas")
178
179        if mspeak_most_abundant:
180            return mspeak_most_abundant.best_molecular_formula_candidate
181
182        else:
183            raise Exception(
184                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
185                % mspeak_most_abundant.mz_exp
186            )
187
188        # return the first option
189        # return mspeak_most_abundant[0]

Find the most abundant formula in the mass spectrum

Parameters
  • mass_spectrum_obj (MassSpec class): Mass spectrum object
Returns
  • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
191    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
192        """[Test function] Find the most abundant formula in the mass spectrum
193
194        Parameters
195        ----------
196        mass_spectrum_obj : MassSpec class
197            Mass spectrum object
198        settings : MolecularSearchSettings class
199            Molecular search settings object
200
201        Returns
202        ----------
203        MolecularFormula class obj
204            most abundant MolecularFormula with the lowest mass error
205
206        """
207        # this function is intended for test only.
208        # Have to sort by Kendrick to be able to select the most abundant series
209        # then select the most abundant peak inside the series
210        # or have the user select the reference mspeak on the gui
211
212        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
213
214        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
215            [mspeak_most_abundant]
216        )
217
218        if mspeak_most_abundant:
219            return mspeak_most_abundant.best_molecular_formula_candidate
220
221        else:
222            raise Exception(
223                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
224                % mspeak_most_abundant.mz_exp
225            )
226        # return the first option
227        # return mspeak_most_abundant[0]

[Test function] Find the most abundant formula in the mass spectrum

Parameters
  • mass_spectrum_obj (MassSpec class): Mass spectrum object
  • settings (MolecularSearchSettings class): Molecular search settings object
Returns
  • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
def find_series_mspeaks(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14):
229    def find_series_mspeaks(
230        self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
231    ):
232        """Find a series of abundant peaks in the mass spectrum for a given molecular formula
233
234        Parameters
235        ----------
236        mass_spectrum_obj : MassSpec class
237            Mass spectrum object
238        molecular_formula_obj_reference : MolecularFormula class
239            Molecular formula object
240        deltamz : float
241            delta m/z to look for peaks
242
243        Returns
244        ----------
245        list
246            list of MSpeak class objects
247        """
248        abundances = mass_spectrum_obj.abundance
249        abun_mean = average(abundances, axis=0)
250        abun_std = std(abundances, axis=0)
251        upper_limit = abun_mean + 7 * abun_std
252
253        list_most_abundant_peaks = list()
254
255        min_mz = mass_spectrum_obj.min_mz_exp
256
257        max_mz = mass_spectrum_obj.max_mz_exp
258
259        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
260
261        mass = initial_nominal_mass
262
263        nominal_masses = []
264        while mass <= max_mz:
265            # print "shit 1", mass, min_mz
266            mass += deltamz
267            nominal_masses.append(mass)
268
269        mass = initial_nominal_mass
270        while mass >= min_mz:
271            # print "shit 1", mass, min_mz
272            mass -= deltamz
273            nominal_masses.append(mass)
274
275        nominal_masses = sorted(nominal_masses)
276
277        for nominal_mass in nominal_masses:
278            first_index, last_index = (
279                mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
280            )
281
282            ms_peaks = mass_spectrum_obj[first_index:last_index]
283
284            if ms_peaks:
285                #
286                # print (nominal_mass, first_index,
287                #    last_index,
288                #    mass_spectrum_obj[first_index].mz_exp,
289                #    mass_spectrum_obj[last_index].mz_exp
290                #    )
291                #
292
293                mspeak_most_abundant = max(
294                    ms_peaks,
295                    key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
296                )
297
298                # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
299
300                list_most_abundant_peaks.append(mspeak_most_abundant)
301        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
302            print("Start molecular formula search")
303        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
304            list_most_abundant_peaks
305        )
306        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
307            print("Done molecular formula search")
308        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]

Find a series of abundant peaks in the mass spectrum for a given molecular formula

Parameters
  • mass_spectrum_obj (MassSpec class): Mass spectrum object
  • molecular_formula_obj_reference (MolecularFormula class): Molecular formula object
  • deltamz (float): delta m/z to look for peaks
Returns
  • list: list of MSpeak class objects
def get_list_found_peaks(self):
310    def get_list_found_peaks(self):
311        """Get the list of found peaks
312
313        Returns
314        ----------
315        list
316            list of MSpeak class objects
317        """
318        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)

Get the list of found peaks

Returns
  • list: list of MSpeak class objects
def set_mass_spec_indexes_by_found_peaks(self):
320    def set_mass_spec_indexes_by_found_peaks(self):
321        """Set the mass spectrum to interate over only the selected indexes.
322
323        Notes
324        ----------
325        Warning!!!!
326        set the mass spectrum to interate over only the selected indexes
327        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
328        """
329
330        indexes = [msp.index for msp in self.list_found_mspeaks]
331        self.mass_spectrum_obj.set_indexes(indexes)

Set the mass spectrum to interate over only the selected indexes.

Notes

Warning!!!! set the mass spectrum to interate over only the selected indexes don not forget to call mass_spectrum_obj.reset_indexes after the job is done

Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id