corems.molecular_id.search.findOxygenPeaks
1__author__ = "Yuri E. Corilo" 2__date__ = "Jul 31, 2019" 3 4from copy import deepcopy 5from threading import Thread 6 7from numpy import average, std 8 9from corems.molecular_id.calc.ClusterFilter import ClusteringFilter 10from corems.molecular_id.factory.molecularSQL import MolForm_SQL 11from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas 12 13 14class FindOxygenPeaks(Thread): 15 """Class to find Oxygen peaks in a mass spectrum for formula assignment search 16 17 Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter 18 Returns a list of MSPeak class containing the possible Molecular Formula class objects. 19 20 Parameters 21 ---------- 22 mass_spectrum_obj : MassSpec class 23 This is where we store MassSpec class obj, 24 25 lookupTableSettings: MolecularLookupTableSettings class 26 This is where we store MolecularLookupTableSettings class obj 27 28 min_O , max_O : int 29 minium and maximum of Oxygen to allow the software to look for 30 it will override the settings at lookupTableSettings.usedAtoms 31 default min = 1, max = 22 32 33 Attributes 34 ---------- 35 mass_spectrum_obj : MassSpec class 36 This is where we store MassSpec class obj, 37 lookupTableSettings: MolecularLookupTableSettings class 38 This is where we store MolecularLookupTableSettings class obj 39 40 Methods 41 ---------- 42 * run(). 43 will be called when the instantiated class method start is called 44 * get_list_found_peaks(). 45 returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak 46 for more details of the structure see MSPeak class and MolecularFormula class 47 * set_mass_spec_indexes_by_found_peaks(). 48 set the mass spectrum to interate over only the selected indexes 49 """ 50 51 def __init__( 52 self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22 53 ): 54 Thread.__init__(self) 55 56 self.mass_spectrum_obj = mass_spectrum_obj 57 self.min_0 = min_O 58 self.max_O = max_O 59 60 if not sql_db: 61 self.sql_db = MolForm_SQL( 62 mass_spectrum_obj.molecular_search_settings.url_database 63 ) 64 else: 65 self.sql_db = sql_db 66 67 def run(self): 68 """Run the thread""" 69 # save initial settings min peaks per class filter 70 initial_min_peak_bool = deepcopy( 71 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter 72 ) 73 74 # deactivate the usage of min peaks per class filter 75 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False 76 77 # save initial settings for Ox 78 initial_ox = deepcopy( 79 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] 80 ) 81 82 # resets the used atoms to look only for oxygen organic compounds 83 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = ( 84 self.min_0, 85 self.max_O, 86 ) 87 88 self.list_found_mspeaks = [] 89 90 kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base 91 92 self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base) 93 94 # needs to be wrapped inside the mass_spec class 95 ClusteringFilter().filter_kendrick(self.mass_spectrum_obj) 96 97 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 98 print("Start most abundant mass spectral peak search") 99 molecular_formula_obj_reference = self.find_most_abundant_formula( 100 self.mass_spectrum_obj 101 ) 102 103 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 104 print( 105 "Select most abundant peak with molecular formula = %s with a m/z error of %s ppm" 106 % ( 107 molecular_formula_obj_reference.string, 108 molecular_formula_obj_reference.mz_error, 109 ) 110 ) 111 print("Started mass spectral peak series search") 112 113 self.list_found_mspeaks = self.find_series_mspeaks( 114 self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 115 ) 116 117 # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick()) 118 119 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox 120 121 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( 122 initial_min_peak_bool 123 ) 124 125 self.mass_spectrum_obj.reset_indexes() 126 127 self.mass_spectrum_obj.filter_by_noise_threshold() 128 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 129 print("Done with mass spectral peak series search") 130 131 self.sql_db.close() 132 133 def find_most_abundant_formula(self, mass_spectrum_obj): 134 """Find the most abundant formula in the mass spectrum 135 136 Parameters 137 ---------- 138 mass_spectrum_obj : MassSpec class 139 Mass spectrum object 140 141 Returns 142 ---------- 143 MolecularFormula class obj 144 most abundant MolecularFormula with the lowest mass error 145 """ 146 # need to find a better way to cut off outliners 147 # import matplotlib.pyplot as plt 148 # plt.hist(mass_spectrum_obj.abundance, bins=100) 149 # plt.show() 150 151 abundances = mass_spectrum_obj.abundance 152 abun_mean = average(abundances, axis=0) 153 abun_std = std(abundances, axis=0) 154 155 upper_limit = abun_mean + 7 * abun_std 156 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 157 print( 158 "Maximum abundance limit = %s and max abundance kendrick cluster = %s" 159 % ( 160 upper_limit, 161 max(mass_spectrum_obj, key=lambda m: m.abundance).abundance, 162 ) 163 ) 164 165 mspeak_most_abundant = max( 166 mass_spectrum_obj, 167 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 168 ) 169 170 print("Searching molecular formulas") 171 172 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 173 [mspeak_most_abundant] 174 ) 175 176 print("Finished searching molecular formulas") 177 178 if mspeak_most_abundant: 179 return mspeak_most_abundant.best_molecular_formula_candidate 180 181 else: 182 raise Exception( 183 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 184 % mspeak_most_abundant.mz_exp 185 ) 186 187 # return the first option 188 # return mspeak_most_abundant[0] 189 190 def find_most_abundant_formula_test(self, mass_spectrum_obj, settings): 191 """[Test function] Find the most abundant formula in the mass spectrum 192 193 Parameters 194 ---------- 195 mass_spectrum_obj : MassSpec class 196 Mass spectrum object 197 settings : MolecularSearchSettings class 198 Molecular search settings object 199 200 Returns 201 ---------- 202 MolecularFormula class obj 203 most abundant MolecularFormula with the lowest mass error 204 205 """ 206 # this function is intended for test only. 207 # Have to sort by Kendrick to be able to select the most abundant series 208 # then select the most abundant peak inside the series 209 # or have the user select the reference mspeak on the gui 210 211 mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak 212 213 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 214 [mspeak_most_abundant] 215 ) 216 217 if mspeak_most_abundant: 218 return mspeak_most_abundant.best_molecular_formula_candidate 219 220 else: 221 raise Exception( 222 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 223 % mspeak_most_abundant.mz_exp 224 ) 225 # return the first option 226 # return mspeak_most_abundant[0] 227 228 def find_series_mspeaks( 229 self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 230 ): 231 """Find a series of abundant peaks in the mass spectrum for a given molecular formula 232 233 Parameters 234 ---------- 235 mass_spectrum_obj : MassSpec class 236 Mass spectrum object 237 molecular_formula_obj_reference : MolecularFormula class 238 Molecular formula object 239 deltamz : float 240 delta m/z to look for peaks 241 242 Returns 243 ---------- 244 list 245 list of MSpeak class objects 246 """ 247 abundances = mass_spectrum_obj.abundance 248 abun_mean = average(abundances, axis=0) 249 abun_std = std(abundances, axis=0) 250 upper_limit = abun_mean + 7 * abun_std 251 252 list_most_abundant_peaks = list() 253 254 min_mz = mass_spectrum_obj.min_mz_exp 255 256 max_mz = mass_spectrum_obj.max_mz_exp 257 258 initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc 259 260 mass = initial_nominal_mass 261 262 nominal_masses = [] 263 while mass <= max_mz: 264 # print "shit 1", mass, min_mz 265 mass += deltamz 266 nominal_masses.append(mass) 267 268 mass = initial_nominal_mass 269 while mass >= min_mz: 270 # print "shit 1", mass, min_mz 271 mass -= deltamz 272 nominal_masses.append(mass) 273 274 nominal_masses = sorted(nominal_masses) 275 276 for nominal_mass in nominal_masses: 277 first_index, last_index = ( 278 mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass) 279 ) 280 281 ms_peaks = mass_spectrum_obj[first_index:last_index] 282 283 if ms_peaks: 284 # 285 # print (nominal_mass, first_index, 286 # last_index, 287 # mass_spectrum_obj[first_index].mz_exp, 288 # mass_spectrum_obj[last_index].mz_exp 289 # ) 290 # 291 292 mspeak_most_abundant = max( 293 ms_peaks, 294 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 295 ) 296 297 # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance) 298 299 list_most_abundant_peaks.append(mspeak_most_abundant) 300 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 301 print("Start molecular formula search") 302 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 303 list_most_abundant_peaks 304 ) 305 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 306 print("Done molecular formula search") 307 return [mspeak for mspeak in list_most_abundant_peaks if mspeak] 308 309 def get_list_found_peaks(self): 310 """Get the list of found peaks 311 312 Returns 313 ---------- 314 list 315 list of MSpeak class objects 316 """ 317 return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp) 318 319 def set_mass_spec_indexes_by_found_peaks(self): 320 """Set the mass spectrum to interate over only the selected indexes. 321 322 Notes 323 ---------- 324 Warning!!!! 325 set the mass spectrum to interate over only the selected indexes 326 don not forget to call mass_spectrum_obj.reset_indexes after the job is done 327 """ 328 329 indexes = [msp.index for msp in self.list_found_mspeaks] 330 self.mass_spectrum_obj.set_indexes(indexes)
15class FindOxygenPeaks(Thread): 16 """Class to find Oxygen peaks in a mass spectrum for formula assignment search 17 18 Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter 19 Returns a list of MSPeak class containing the possible Molecular Formula class objects. 20 21 Parameters 22 ---------- 23 mass_spectrum_obj : MassSpec class 24 This is where we store MassSpec class obj, 25 26 lookupTableSettings: MolecularLookupTableSettings class 27 This is where we store MolecularLookupTableSettings class obj 28 29 min_O , max_O : int 30 minium and maximum of Oxygen to allow the software to look for 31 it will override the settings at lookupTableSettings.usedAtoms 32 default min = 1, max = 22 33 34 Attributes 35 ---------- 36 mass_spectrum_obj : MassSpec class 37 This is where we store MassSpec class obj, 38 lookupTableSettings: MolecularLookupTableSettings class 39 This is where we store MolecularLookupTableSettings class obj 40 41 Methods 42 ---------- 43 * run(). 44 will be called when the instantiated class method start is called 45 * get_list_found_peaks(). 46 returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak 47 for more details of the structure see MSPeak class and MolecularFormula class 48 * set_mass_spec_indexes_by_found_peaks(). 49 set the mass spectrum to interate over only the selected indexes 50 """ 51 52 def __init__( 53 self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22 54 ): 55 Thread.__init__(self) 56 57 self.mass_spectrum_obj = mass_spectrum_obj 58 self.min_0 = min_O 59 self.max_O = max_O 60 61 if not sql_db: 62 self.sql_db = MolForm_SQL( 63 mass_spectrum_obj.molecular_search_settings.url_database 64 ) 65 else: 66 self.sql_db = sql_db 67 68 def run(self): 69 """Run the thread""" 70 # save initial settings min peaks per class filter 71 initial_min_peak_bool = deepcopy( 72 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter 73 ) 74 75 # deactivate the usage of min peaks per class filter 76 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False 77 78 # save initial settings for Ox 79 initial_ox = deepcopy( 80 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] 81 ) 82 83 # resets the used atoms to look only for oxygen organic compounds 84 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = ( 85 self.min_0, 86 self.max_O, 87 ) 88 89 self.list_found_mspeaks = [] 90 91 kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base 92 93 self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base) 94 95 # needs to be wrapped inside the mass_spec class 96 ClusteringFilter().filter_kendrick(self.mass_spectrum_obj) 97 98 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 99 print("Start most abundant mass spectral peak search") 100 molecular_formula_obj_reference = self.find_most_abundant_formula( 101 self.mass_spectrum_obj 102 ) 103 104 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 105 print( 106 "Select most abundant peak with molecular formula = %s with a m/z error of %s ppm" 107 % ( 108 molecular_formula_obj_reference.string, 109 molecular_formula_obj_reference.mz_error, 110 ) 111 ) 112 print("Started mass spectral peak series search") 113 114 self.list_found_mspeaks = self.find_series_mspeaks( 115 self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 116 ) 117 118 # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick()) 119 120 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox 121 122 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( 123 initial_min_peak_bool 124 ) 125 126 self.mass_spectrum_obj.reset_indexes() 127 128 self.mass_spectrum_obj.filter_by_noise_threshold() 129 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 130 print("Done with mass spectral peak series search") 131 132 self.sql_db.close() 133 134 def find_most_abundant_formula(self, mass_spectrum_obj): 135 """Find the most abundant formula in the mass spectrum 136 137 Parameters 138 ---------- 139 mass_spectrum_obj : MassSpec class 140 Mass spectrum object 141 142 Returns 143 ---------- 144 MolecularFormula class obj 145 most abundant MolecularFormula with the lowest mass error 146 """ 147 # need to find a better way to cut off outliners 148 # import matplotlib.pyplot as plt 149 # plt.hist(mass_spectrum_obj.abundance, bins=100) 150 # plt.show() 151 152 abundances = mass_spectrum_obj.abundance 153 abun_mean = average(abundances, axis=0) 154 abun_std = std(abundances, axis=0) 155 156 upper_limit = abun_mean + 7 * abun_std 157 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 158 print( 159 "Maximum abundance limit = %s and max abundance kendrick cluster = %s" 160 % ( 161 upper_limit, 162 max(mass_spectrum_obj, key=lambda m: m.abundance).abundance, 163 ) 164 ) 165 166 mspeak_most_abundant = max( 167 mass_spectrum_obj, 168 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 169 ) 170 171 print("Searching molecular formulas") 172 173 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 174 [mspeak_most_abundant] 175 ) 176 177 print("Finished searching molecular formulas") 178 179 if mspeak_most_abundant: 180 return mspeak_most_abundant.best_molecular_formula_candidate 181 182 else: 183 raise Exception( 184 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 185 % mspeak_most_abundant.mz_exp 186 ) 187 188 # return the first option 189 # return mspeak_most_abundant[0] 190 191 def find_most_abundant_formula_test(self, mass_spectrum_obj, settings): 192 """[Test function] Find the most abundant formula in the mass spectrum 193 194 Parameters 195 ---------- 196 mass_spectrum_obj : MassSpec class 197 Mass spectrum object 198 settings : MolecularSearchSettings class 199 Molecular search settings object 200 201 Returns 202 ---------- 203 MolecularFormula class obj 204 most abundant MolecularFormula with the lowest mass error 205 206 """ 207 # this function is intended for test only. 208 # Have to sort by Kendrick to be able to select the most abundant series 209 # then select the most abundant peak inside the series 210 # or have the user select the reference mspeak on the gui 211 212 mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak 213 214 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 215 [mspeak_most_abundant] 216 ) 217 218 if mspeak_most_abundant: 219 return mspeak_most_abundant.best_molecular_formula_candidate 220 221 else: 222 raise Exception( 223 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 224 % mspeak_most_abundant.mz_exp 225 ) 226 # return the first option 227 # return mspeak_most_abundant[0] 228 229 def find_series_mspeaks( 230 self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 231 ): 232 """Find a series of abundant peaks in the mass spectrum for a given molecular formula 233 234 Parameters 235 ---------- 236 mass_spectrum_obj : MassSpec class 237 Mass spectrum object 238 molecular_formula_obj_reference : MolecularFormula class 239 Molecular formula object 240 deltamz : float 241 delta m/z to look for peaks 242 243 Returns 244 ---------- 245 list 246 list of MSpeak class objects 247 """ 248 abundances = mass_spectrum_obj.abundance 249 abun_mean = average(abundances, axis=0) 250 abun_std = std(abundances, axis=0) 251 upper_limit = abun_mean + 7 * abun_std 252 253 list_most_abundant_peaks = list() 254 255 min_mz = mass_spectrum_obj.min_mz_exp 256 257 max_mz = mass_spectrum_obj.max_mz_exp 258 259 initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc 260 261 mass = initial_nominal_mass 262 263 nominal_masses = [] 264 while mass <= max_mz: 265 # print "shit 1", mass, min_mz 266 mass += deltamz 267 nominal_masses.append(mass) 268 269 mass = initial_nominal_mass 270 while mass >= min_mz: 271 # print "shit 1", mass, min_mz 272 mass -= deltamz 273 nominal_masses.append(mass) 274 275 nominal_masses = sorted(nominal_masses) 276 277 for nominal_mass in nominal_masses: 278 first_index, last_index = ( 279 mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass) 280 ) 281 282 ms_peaks = mass_spectrum_obj[first_index:last_index] 283 284 if ms_peaks: 285 # 286 # print (nominal_mass, first_index, 287 # last_index, 288 # mass_spectrum_obj[first_index].mz_exp, 289 # mass_spectrum_obj[last_index].mz_exp 290 # ) 291 # 292 293 mspeak_most_abundant = max( 294 ms_peaks, 295 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 296 ) 297 298 # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance) 299 300 list_most_abundant_peaks.append(mspeak_most_abundant) 301 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 302 print("Start molecular formula search") 303 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 304 list_most_abundant_peaks 305 ) 306 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 307 print("Done molecular formula search") 308 return [mspeak for mspeak in list_most_abundant_peaks if mspeak] 309 310 def get_list_found_peaks(self): 311 """Get the list of found peaks 312 313 Returns 314 ---------- 315 list 316 list of MSpeak class objects 317 """ 318 return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp) 319 320 def set_mass_spec_indexes_by_found_peaks(self): 321 """Set the mass spectrum to interate over only the selected indexes. 322 323 Notes 324 ---------- 325 Warning!!!! 326 set the mass spectrum to interate over only the selected indexes 327 don not forget to call mass_spectrum_obj.reset_indexes after the job is done 328 """ 329 330 indexes = [msp.index for msp in self.list_found_mspeaks] 331 self.mass_spectrum_obj.set_indexes(indexes)
Class to find Oxygen peaks in a mass spectrum for formula assignment search
Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter Returns a list of MSPeak class containing the possible Molecular Formula class objects.
Parameters
- mass_spectrum_obj (MassSpec class): This is where we store MassSpec class obj,
- lookupTableSettings (MolecularLookupTableSettings class): This is where we store MolecularLookupTableSettings class obj
- min_O , max_O (int): minium and maximum of Oxygen to allow the software to look for it will override the settings at lookupTableSettings.usedAtoms default min = 1, max = 22
Attributes
- mass_spectrum_obj (MassSpec class): This is where we store MassSpec class obj,
- lookupTableSettings (MolecularLookupTableSettings class): This is where we store MolecularLookupTableSettings class obj
Methods
- run(). will be called when the instantiated class method start is called
- get_list_found_peaks(). returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak for more details of the structure see MSPeak class and MolecularFormula class
- set_mass_spec_indexes_by_found_peaks(). set the mass spectrum to interate over only the selected indexes
52 def __init__( 53 self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22 54 ): 55 Thread.__init__(self) 56 57 self.mass_spectrum_obj = mass_spectrum_obj 58 self.min_0 = min_O 59 self.max_O = max_O 60 61 if not sql_db: 62 self.sql_db = MolForm_SQL( 63 mass_spectrum_obj.molecular_search_settings.url_database 64 ) 65 else: 66 self.sql_db = sql_db
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
68 def run(self): 69 """Run the thread""" 70 # save initial settings min peaks per class filter 71 initial_min_peak_bool = deepcopy( 72 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter 73 ) 74 75 # deactivate the usage of min peaks per class filter 76 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False 77 78 # save initial settings for Ox 79 initial_ox = deepcopy( 80 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] 81 ) 82 83 # resets the used atoms to look only for oxygen organic compounds 84 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = ( 85 self.min_0, 86 self.max_O, 87 ) 88 89 self.list_found_mspeaks = [] 90 91 kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base 92 93 self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base) 94 95 # needs to be wrapped inside the mass_spec class 96 ClusteringFilter().filter_kendrick(self.mass_spectrum_obj) 97 98 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 99 print("Start most abundant mass spectral peak search") 100 molecular_formula_obj_reference = self.find_most_abundant_formula( 101 self.mass_spectrum_obj 102 ) 103 104 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 105 print( 106 "Select most abundant peak with molecular formula = %s with a m/z error of %s ppm" 107 % ( 108 molecular_formula_obj_reference.string, 109 molecular_formula_obj_reference.mz_error, 110 ) 111 ) 112 print("Started mass spectral peak series search") 113 114 self.list_found_mspeaks = self.find_series_mspeaks( 115 self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 116 ) 117 118 # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick()) 119 120 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox 121 122 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( 123 initial_min_peak_bool 124 ) 125 126 self.mass_spectrum_obj.reset_indexes() 127 128 self.mass_spectrum_obj.filter_by_noise_threshold() 129 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 130 print("Done with mass spectral peak series search") 131 132 self.sql_db.close()
Run the thread
134 def find_most_abundant_formula(self, mass_spectrum_obj): 135 """Find the most abundant formula in the mass spectrum 136 137 Parameters 138 ---------- 139 mass_spectrum_obj : MassSpec class 140 Mass spectrum object 141 142 Returns 143 ---------- 144 MolecularFormula class obj 145 most abundant MolecularFormula with the lowest mass error 146 """ 147 # need to find a better way to cut off outliners 148 # import matplotlib.pyplot as plt 149 # plt.hist(mass_spectrum_obj.abundance, bins=100) 150 # plt.show() 151 152 abundances = mass_spectrum_obj.abundance 153 abun_mean = average(abundances, axis=0) 154 abun_std = std(abundances, axis=0) 155 156 upper_limit = abun_mean + 7 * abun_std 157 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 158 print( 159 "Maximum abundance limit = %s and max abundance kendrick cluster = %s" 160 % ( 161 upper_limit, 162 max(mass_spectrum_obj, key=lambda m: m.abundance).abundance, 163 ) 164 ) 165 166 mspeak_most_abundant = max( 167 mass_spectrum_obj, 168 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 169 ) 170 171 print("Searching molecular formulas") 172 173 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 174 [mspeak_most_abundant] 175 ) 176 177 print("Finished searching molecular formulas") 178 179 if mspeak_most_abundant: 180 return mspeak_most_abundant.best_molecular_formula_candidate 181 182 else: 183 raise Exception( 184 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 185 % mspeak_most_abundant.mz_exp 186 ) 187 188 # return the first option 189 # return mspeak_most_abundant[0]
Find the most abundant formula in the mass spectrum
Parameters
- mass_spectrum_obj (MassSpec class): Mass spectrum object
Returns
- MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
191 def find_most_abundant_formula_test(self, mass_spectrum_obj, settings): 192 """[Test function] Find the most abundant formula in the mass spectrum 193 194 Parameters 195 ---------- 196 mass_spectrum_obj : MassSpec class 197 Mass spectrum object 198 settings : MolecularSearchSettings class 199 Molecular search settings object 200 201 Returns 202 ---------- 203 MolecularFormula class obj 204 most abundant MolecularFormula with the lowest mass error 205 206 """ 207 # this function is intended for test only. 208 # Have to sort by Kendrick to be able to select the most abundant series 209 # then select the most abundant peak inside the series 210 # or have the user select the reference mspeak on the gui 211 212 mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak 213 214 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 215 [mspeak_most_abundant] 216 ) 217 218 if mspeak_most_abundant: 219 return mspeak_most_abundant.best_molecular_formula_candidate 220 221 else: 222 raise Exception( 223 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" 224 % mspeak_most_abundant.mz_exp 225 ) 226 # return the first option 227 # return mspeak_most_abundant[0]
[Test function] Find the most abundant formula in the mass spectrum
Parameters
- mass_spectrum_obj (MassSpec class): Mass spectrum object
- settings (MolecularSearchSettings class): Molecular search settings object
Returns
- MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
229 def find_series_mspeaks( 230 self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 231 ): 232 """Find a series of abundant peaks in the mass spectrum for a given molecular formula 233 234 Parameters 235 ---------- 236 mass_spectrum_obj : MassSpec class 237 Mass spectrum object 238 molecular_formula_obj_reference : MolecularFormula class 239 Molecular formula object 240 deltamz : float 241 delta m/z to look for peaks 242 243 Returns 244 ---------- 245 list 246 list of MSpeak class objects 247 """ 248 abundances = mass_spectrum_obj.abundance 249 abun_mean = average(abundances, axis=0) 250 abun_std = std(abundances, axis=0) 251 upper_limit = abun_mean + 7 * abun_std 252 253 list_most_abundant_peaks = list() 254 255 min_mz = mass_spectrum_obj.min_mz_exp 256 257 max_mz = mass_spectrum_obj.max_mz_exp 258 259 initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc 260 261 mass = initial_nominal_mass 262 263 nominal_masses = [] 264 while mass <= max_mz: 265 # print "shit 1", mass, min_mz 266 mass += deltamz 267 nominal_masses.append(mass) 268 269 mass = initial_nominal_mass 270 while mass >= min_mz: 271 # print "shit 1", mass, min_mz 272 mass -= deltamz 273 nominal_masses.append(mass) 274 275 nominal_masses = sorted(nominal_masses) 276 277 for nominal_mass in nominal_masses: 278 first_index, last_index = ( 279 mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass) 280 ) 281 282 ms_peaks = mass_spectrum_obj[first_index:last_index] 283 284 if ms_peaks: 285 # 286 # print (nominal_mass, first_index, 287 # last_index, 288 # mass_spectrum_obj[first_index].mz_exp, 289 # mass_spectrum_obj[last_index].mz_exp 290 # ) 291 # 292 293 mspeak_most_abundant = max( 294 ms_peaks, 295 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, 296 ) 297 298 # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance) 299 300 list_most_abundant_peaks.append(mspeak_most_abundant) 301 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 302 print("Start molecular formula search") 303 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( 304 list_most_abundant_peaks 305 ) 306 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: 307 print("Done molecular formula search") 308 return [mspeak for mspeak in list_most_abundant_peaks if mspeak]
Find a series of abundant peaks in the mass spectrum for a given molecular formula
Parameters
- mass_spectrum_obj (MassSpec class): Mass spectrum object
- molecular_formula_obj_reference (MolecularFormula class): Molecular formula object
- deltamz (float): delta m/z to look for peaks
Returns
- list: list of MSpeak class objects
310 def get_list_found_peaks(self): 311 """Get the list of found peaks 312 313 Returns 314 ---------- 315 list 316 list of MSpeak class objects 317 """ 318 return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
Get the list of found peaks
Returns
- list: list of MSpeak class objects
320 def set_mass_spec_indexes_by_found_peaks(self): 321 """Set the mass spectrum to interate over only the selected indexes. 322 323 Notes 324 ---------- 325 Warning!!!! 326 set the mass spectrum to interate over only the selected indexes 327 don not forget to call mass_spectrum_obj.reset_indexes after the job is done 328 """ 329 330 indexes = [msp.index for msp in self.list_found_mspeaks] 331 self.mass_spectrum_obj.set_indexes(indexes)
Set the mass spectrum to interate over only the selected indexes.
Notes
Warning!!!! set the mass spectrum to interate over only the selected indexes don not forget to call mass_spectrum_obj.reset_indexes after the job is done
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id