corems.molecular_id.search.lcms_spectral_search
1import re 2 3import numpy as np 4 5from corems.molecular_id.factory.spectrum_search_results import SpectrumSearchResults 6 7 8class LCMSSpectralSearch: 9 """ 10 Methods for searching LCMS spectra. 11 12 This class is designed to be a mixin class for the :obj:`~corems.mass_spectra.factory.lc_class.LCMSBase` class. 13 14 """ 15 16 @staticmethod 17 def get_more_match_quals( 18 query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False 19 ): 20 """ 21 Return additional match qualities between query and library entry. 22 23 Parameters 24 ---------- 25 query_mz_arr : np.array 26 Array of query spectrum. Shape (N, 2), with m/z in the first column 27 and abundance in the second. 28 lib_entry : dict 29 Library spectrum entry, with 'mz' key containing the spectrum in 30 the format (mz, abundance),(mz, abundance), i.e. from MetabRef. 31 mz_tol_da : float, optional 32 Tolerance in Da for matching peaks (in MS2). Default is 0.1. 33 include_fragment_types : bool, optional 34 If True, include fragment type comparisons in output. 35 Defaults to False. 36 37 Returns 38 ------- 39 tuple 40 Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz). 41 42 Notes 43 ----- 44 query_in_lib : int 45 Number of peaks in query that are present in the library entry (within mz_tol_da). 46 query_in_lib_fract : float 47 Fraction of peaks in query that are present in the library entry (within mz_tol_da). 48 lib_in_query : int 49 Number of peaks in the library entry that are present in the query (within mz_tol_da). 50 lib_in_query_fract : float 51 Fraction of peaks in the library entry that are present in the query (within mz_tol_da). 52 query_frags : list 53 List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both. 54 lib_frags : list 55 List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both. 56 57 Raises 58 ------ 59 ValueError 60 If library entry does not have 'fragment_types' key and include_fragment_types is True. 61 62 """ 63 64 # Get the original mz values from the library entry 65 lib_mzs = np.array( 66 re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float 67 ).reshape(-1, 2)[:, 0] 68 69 # Get count and fraction of peaks in query that are in lib entry 70 query_in_lib = 0 71 for peak in query_mz_arr: 72 if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)): 73 query_in_lib += 1 74 query_in_lib_fract = query_in_lib / len(query_mz_arr) 75 76 # Get count and fraction of peaks in lib that are in query 77 lib_in_query = 0 78 for peak in lib_mzs: 79 if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)): 80 lib_in_query += 1 81 lib_in_query_fract = lib_in_query / len(lib_mzs) 82 83 if include_fragment_types: 84 # Check that fragment types are present in the library entry 85 if "fragment_types" not in lib_entry.keys(): 86 raise ValueError( 87 "Flash entropy library entry must have 'fragment_types' key to include fragment types in output." 88 ) 89 90 # Get types of fragments in the lib entry 91 lib_frags = lib_entry["fragment_types"] 92 # make list of the fragment types that are present in the query spectrum 93 lib_in_query_ids = list( 94 set( 95 [ 96 ind 97 for ind, x in enumerate(lib_mzs) 98 if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0]) 99 > 0 100 ] 101 ) 102 ) 103 query_frags = list(set([lib_frags[x] for x in lib_in_query_ids])) 104 lib_frags = list(set(lib_frags)) 105 106 else: 107 query_frags = None 108 lib_frags = None 109 110 return ( 111 query_in_lib, 112 query_in_lib_fract, 113 lib_in_query, 114 lib_in_query_fract, 115 query_frags, 116 lib_frags, 117 ) 118 119 def fe_search( 120 self, 121 scan_list, 122 fe_lib, 123 precursor_mz_list=[], 124 use_mass_features=True, 125 peak_sep_da=0.01, 126 get_additional_metrics=True, 127 ): 128 """ 129 Search LCMS spectra using a FlashEntropy approach. 130 131 Parameters 132 ---------- 133 scan_list : list 134 List of scan numbers to search. 135 fe_lib : :obj:`~ms_entropy.FlashEntropySearch` 136 FlashEntropy Search instance. 137 precursor_mz_list : list, optional 138 List of precursor m/z values to search, by default [], which implies 139 matched with mass features; to enable this use_mass_features must be True. 140 use_mass_features : bool, optional 141 If True, use mass features to get precursor m/z values, by default True. 142 If True, will add search results to mass features' ms2_similarity_results attribute. 143 peak_sep_da : float, optional 144 Minimum separation between m/z peaks spectra in Da. This needs match the 145 approximate resolution of the search spectra and the FlashEntropySearch 146 instance, by default 0.01. 147 get_additional_metrics : bool, optional 148 If True, get additional metrics from FlashEntropy search, by default True. 149 150 Returns 151 ------- 152 None, but adds results to self.spectral_search_results and associates these 153 spectral_search_results with mass_features within the self.mass_features dictionary. 154 155 """ 156 # Retrieve parameters from self 157 # include_fragment_types should used for lipids queries only, not general metabolomics 158 include_fragment_types = self.parameters.lc_ms.include_fragment_types 159 min_match_score = self.parameters.lc_ms.ms2_min_fe_score 160 161 # If precursor_mz_list is empty and use_mass_features is True, get precursor m/z values from mass features for each scan in scan_list 162 if use_mass_features and len(precursor_mz_list) == 0: 163 precursor_mz_list = [] 164 for scan in scan_list: 165 mf_ids = [ 166 key 167 for key, value in self.mass_features.items() 168 if scan in value.ms2_mass_spectra 169 ] 170 precursor_mz = [ 171 value.mz 172 for key, value in self.mass_features.items() 173 if key in mf_ids 174 ] 175 precursor_mz_list.append(precursor_mz) 176 177 # Check that precursor_mz_list same length as scan_list, if not, raise error 178 if len(precursor_mz_list) != len(scan_list): 179 raise ValueError("Length of precursor_mz_list is not equal to scan_list.") 180 181 # Loop through each query spectrum / precursor match and save ids of db spectrum that are decent matches 182 overall_results_dict = {} 183 for i in np.arange(len(scan_list)): 184 scan_oi = scan_list[i] 185 if len(self._ms[scan_oi].mspeaks) > 0: 186 precursor_mzs = precursor_mz_list[i] 187 overall_results_dict[scan_oi] = {} 188 for precursor_mz in precursor_mzs: 189 query_spectrum = fe_lib.clean_spectrum_for_search( 190 precursor_mz=precursor_mz, 191 peaks=np.vstack( 192 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 193 ).T, 194 precursor_ions_removal_da=None, 195 noise_threshold=self._ms[ 196 scan_oi 197 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 198 / 100, 199 min_ms2_difference_in_da=peak_sep_da, 200 ) 201 search_results = fe_lib.search( 202 precursor_mz=precursor_mz, 203 peaks=query_spectrum, 204 ms1_tolerance_in_da=self.parameters.mass_spectrum[ 205 "ms1" 206 ].molecular_search.max_ppm_error 207 * 10**-6 208 * precursor_mz, 209 ms2_tolerance_in_da=peak_sep_da * 0.5, 210 method={"identity"}, 211 precursor_ions_removal_da=None, 212 noise_threshold=self._ms[ 213 scan_oi 214 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 215 / 100, 216 target="cpu", 217 )["identity_search"] 218 match_inds = np.where(search_results > min_match_score)[0] 219 220 # If any decent matches are found, add them to the results dictionary 221 if len(match_inds) > 0: 222 match_scores = search_results[match_inds] 223 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] 224 ref_mol_ids = [ 225 fe_lib[x]["molecular_data_id"] for x in match_inds 226 ] 227 ref_precursor_mzs = [ 228 fe_lib[x]["precursor_mz"] for x in match_inds 229 ] 230 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] 231 overall_results_dict[scan_oi][precursor_mz] = { 232 "ref_mol_id": ref_mol_ids, 233 "ref_ms_id": ref_ms_ids, 234 "ref_precursor_mz": ref_precursor_mzs, 235 "precursor_mz_error_ppm": [ 236 (precursor_mz - x) / precursor_mz * 10**6 237 for x in ref_precursor_mzs 238 ], 239 "entropy_similarity": match_scores, 240 "ref_ion_type": ion_types, 241 } 242 if get_additional_metrics: 243 more_match_quals = [ 244 self.get_more_match_quals( 245 self._ms[scan_oi].mz_exp, 246 fe_lib[x], 247 mz_tol_da=peak_sep_da, 248 include_fragment_types=include_fragment_types, 249 ) 250 for x in match_inds 251 ] 252 overall_results_dict[scan_oi][precursor_mz].update( 253 { 254 "query_mz_in_ref_n": [ 255 x[0] for x in more_match_quals 256 ], 257 "query_mz_in_ref_fract": [ 258 x[1] for x in more_match_quals 259 ], 260 "ref_mz_in_query_n": [ 261 x[2] for x in more_match_quals 262 ], 263 "ref_mz_in_query_fract": [ 264 x[3] for x in more_match_quals 265 ], 266 } 267 ) 268 if include_fragment_types: 269 overall_results_dict[scan_oi][precursor_mz].update( 270 { 271 "query_frag_types": [ 272 x[4] for x in more_match_quals 273 ], 274 "ref_frag_types": [ 275 x[5] for x in more_match_quals 276 ], 277 } 278 ) 279 280 # Drop scans with no results from dictionary 281 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} 282 283 # Cast each entry as a MS2SearchResults object 284 for scan_id in overall_results_dict.keys(): 285 for precursor_mz in overall_results_dict[scan_id].keys(): 286 ms2_spectrum = self._ms[scan_id] 287 ms2_search_results = overall_results_dict[scan_id][precursor_mz] 288 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( 289 ms2_spectrum, precursor_mz, ms2_search_results 290 ) 291 292 # Add MS2SearchResults to the existing spectral search results dictionary 293 self.spectral_search_results.update(overall_results_dict) 294 295 # If there are mass features, associate the results with each mass feature 296 if len(self.mass_features) > 0: 297 for mass_feature_id, mass_feature in self.mass_features.items(): 298 scan_ids = mass_feature.ms2_scan_numbers 299 for ms2_scan_id in scan_ids: 300 precursor_mz = mass_feature.mz 301 try: 302 self.spectral_search_results[ms2_scan_id][precursor_mz] 303 except KeyError: 304 pass 305 else: 306 self.mass_features[ 307 mass_feature_id 308 ].ms2_similarity_results.append( 309 self.spectral_search_results[ms2_scan_id][precursor_mz] 310 )
9class LCMSSpectralSearch: 10 """ 11 Methods for searching LCMS spectra. 12 13 This class is designed to be a mixin class for the :obj:`~corems.mass_spectra.factory.lc_class.LCMSBase` class. 14 15 """ 16 17 @staticmethod 18 def get_more_match_quals( 19 query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False 20 ): 21 """ 22 Return additional match qualities between query and library entry. 23 24 Parameters 25 ---------- 26 query_mz_arr : np.array 27 Array of query spectrum. Shape (N, 2), with m/z in the first column 28 and abundance in the second. 29 lib_entry : dict 30 Library spectrum entry, with 'mz' key containing the spectrum in 31 the format (mz, abundance),(mz, abundance), i.e. from MetabRef. 32 mz_tol_da : float, optional 33 Tolerance in Da for matching peaks (in MS2). Default is 0.1. 34 include_fragment_types : bool, optional 35 If True, include fragment type comparisons in output. 36 Defaults to False. 37 38 Returns 39 ------- 40 tuple 41 Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz). 42 43 Notes 44 ----- 45 query_in_lib : int 46 Number of peaks in query that are present in the library entry (within mz_tol_da). 47 query_in_lib_fract : float 48 Fraction of peaks in query that are present in the library entry (within mz_tol_da). 49 lib_in_query : int 50 Number of peaks in the library entry that are present in the query (within mz_tol_da). 51 lib_in_query_fract : float 52 Fraction of peaks in the library entry that are present in the query (within mz_tol_da). 53 query_frags : list 54 List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both. 55 lib_frags : list 56 List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both. 57 58 Raises 59 ------ 60 ValueError 61 If library entry does not have 'fragment_types' key and include_fragment_types is True. 62 63 """ 64 65 # Get the original mz values from the library entry 66 lib_mzs = np.array( 67 re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float 68 ).reshape(-1, 2)[:, 0] 69 70 # Get count and fraction of peaks in query that are in lib entry 71 query_in_lib = 0 72 for peak in query_mz_arr: 73 if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)): 74 query_in_lib += 1 75 query_in_lib_fract = query_in_lib / len(query_mz_arr) 76 77 # Get count and fraction of peaks in lib that are in query 78 lib_in_query = 0 79 for peak in lib_mzs: 80 if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)): 81 lib_in_query += 1 82 lib_in_query_fract = lib_in_query / len(lib_mzs) 83 84 if include_fragment_types: 85 # Check that fragment types are present in the library entry 86 if "fragment_types" not in lib_entry.keys(): 87 raise ValueError( 88 "Flash entropy library entry must have 'fragment_types' key to include fragment types in output." 89 ) 90 91 # Get types of fragments in the lib entry 92 lib_frags = lib_entry["fragment_types"] 93 # make list of the fragment types that are present in the query spectrum 94 lib_in_query_ids = list( 95 set( 96 [ 97 ind 98 for ind, x in enumerate(lib_mzs) 99 if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0]) 100 > 0 101 ] 102 ) 103 ) 104 query_frags = list(set([lib_frags[x] for x in lib_in_query_ids])) 105 lib_frags = list(set(lib_frags)) 106 107 else: 108 query_frags = None 109 lib_frags = None 110 111 return ( 112 query_in_lib, 113 query_in_lib_fract, 114 lib_in_query, 115 lib_in_query_fract, 116 query_frags, 117 lib_frags, 118 ) 119 120 def fe_search( 121 self, 122 scan_list, 123 fe_lib, 124 precursor_mz_list=[], 125 use_mass_features=True, 126 peak_sep_da=0.01, 127 get_additional_metrics=True, 128 ): 129 """ 130 Search LCMS spectra using a FlashEntropy approach. 131 132 Parameters 133 ---------- 134 scan_list : list 135 List of scan numbers to search. 136 fe_lib : :obj:`~ms_entropy.FlashEntropySearch` 137 FlashEntropy Search instance. 138 precursor_mz_list : list, optional 139 List of precursor m/z values to search, by default [], which implies 140 matched with mass features; to enable this use_mass_features must be True. 141 use_mass_features : bool, optional 142 If True, use mass features to get precursor m/z values, by default True. 143 If True, will add search results to mass features' ms2_similarity_results attribute. 144 peak_sep_da : float, optional 145 Minimum separation between m/z peaks spectra in Da. This needs match the 146 approximate resolution of the search spectra and the FlashEntropySearch 147 instance, by default 0.01. 148 get_additional_metrics : bool, optional 149 If True, get additional metrics from FlashEntropy search, by default True. 150 151 Returns 152 ------- 153 None, but adds results to self.spectral_search_results and associates these 154 spectral_search_results with mass_features within the self.mass_features dictionary. 155 156 """ 157 # Retrieve parameters from self 158 # include_fragment_types should used for lipids queries only, not general metabolomics 159 include_fragment_types = self.parameters.lc_ms.include_fragment_types 160 min_match_score = self.parameters.lc_ms.ms2_min_fe_score 161 162 # If precursor_mz_list is empty and use_mass_features is True, get precursor m/z values from mass features for each scan in scan_list 163 if use_mass_features and len(precursor_mz_list) == 0: 164 precursor_mz_list = [] 165 for scan in scan_list: 166 mf_ids = [ 167 key 168 for key, value in self.mass_features.items() 169 if scan in value.ms2_mass_spectra 170 ] 171 precursor_mz = [ 172 value.mz 173 for key, value in self.mass_features.items() 174 if key in mf_ids 175 ] 176 precursor_mz_list.append(precursor_mz) 177 178 # Check that precursor_mz_list same length as scan_list, if not, raise error 179 if len(precursor_mz_list) != len(scan_list): 180 raise ValueError("Length of precursor_mz_list is not equal to scan_list.") 181 182 # Loop through each query spectrum / precursor match and save ids of db spectrum that are decent matches 183 overall_results_dict = {} 184 for i in np.arange(len(scan_list)): 185 scan_oi = scan_list[i] 186 if len(self._ms[scan_oi].mspeaks) > 0: 187 precursor_mzs = precursor_mz_list[i] 188 overall_results_dict[scan_oi] = {} 189 for precursor_mz in precursor_mzs: 190 query_spectrum = fe_lib.clean_spectrum_for_search( 191 precursor_mz=precursor_mz, 192 peaks=np.vstack( 193 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 194 ).T, 195 precursor_ions_removal_da=None, 196 noise_threshold=self._ms[ 197 scan_oi 198 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 199 / 100, 200 min_ms2_difference_in_da=peak_sep_da, 201 ) 202 search_results = fe_lib.search( 203 precursor_mz=precursor_mz, 204 peaks=query_spectrum, 205 ms1_tolerance_in_da=self.parameters.mass_spectrum[ 206 "ms1" 207 ].molecular_search.max_ppm_error 208 * 10**-6 209 * precursor_mz, 210 ms2_tolerance_in_da=peak_sep_da * 0.5, 211 method={"identity"}, 212 precursor_ions_removal_da=None, 213 noise_threshold=self._ms[ 214 scan_oi 215 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 216 / 100, 217 target="cpu", 218 )["identity_search"] 219 match_inds = np.where(search_results > min_match_score)[0] 220 221 # If any decent matches are found, add them to the results dictionary 222 if len(match_inds) > 0: 223 match_scores = search_results[match_inds] 224 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] 225 ref_mol_ids = [ 226 fe_lib[x]["molecular_data_id"] for x in match_inds 227 ] 228 ref_precursor_mzs = [ 229 fe_lib[x]["precursor_mz"] for x in match_inds 230 ] 231 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] 232 overall_results_dict[scan_oi][precursor_mz] = { 233 "ref_mol_id": ref_mol_ids, 234 "ref_ms_id": ref_ms_ids, 235 "ref_precursor_mz": ref_precursor_mzs, 236 "precursor_mz_error_ppm": [ 237 (precursor_mz - x) / precursor_mz * 10**6 238 for x in ref_precursor_mzs 239 ], 240 "entropy_similarity": match_scores, 241 "ref_ion_type": ion_types, 242 } 243 if get_additional_metrics: 244 more_match_quals = [ 245 self.get_more_match_quals( 246 self._ms[scan_oi].mz_exp, 247 fe_lib[x], 248 mz_tol_da=peak_sep_da, 249 include_fragment_types=include_fragment_types, 250 ) 251 for x in match_inds 252 ] 253 overall_results_dict[scan_oi][precursor_mz].update( 254 { 255 "query_mz_in_ref_n": [ 256 x[0] for x in more_match_quals 257 ], 258 "query_mz_in_ref_fract": [ 259 x[1] for x in more_match_quals 260 ], 261 "ref_mz_in_query_n": [ 262 x[2] for x in more_match_quals 263 ], 264 "ref_mz_in_query_fract": [ 265 x[3] for x in more_match_quals 266 ], 267 } 268 ) 269 if include_fragment_types: 270 overall_results_dict[scan_oi][precursor_mz].update( 271 { 272 "query_frag_types": [ 273 x[4] for x in more_match_quals 274 ], 275 "ref_frag_types": [ 276 x[5] for x in more_match_quals 277 ], 278 } 279 ) 280 281 # Drop scans with no results from dictionary 282 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} 283 284 # Cast each entry as a MS2SearchResults object 285 for scan_id in overall_results_dict.keys(): 286 for precursor_mz in overall_results_dict[scan_id].keys(): 287 ms2_spectrum = self._ms[scan_id] 288 ms2_search_results = overall_results_dict[scan_id][precursor_mz] 289 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( 290 ms2_spectrum, precursor_mz, ms2_search_results 291 ) 292 293 # Add MS2SearchResults to the existing spectral search results dictionary 294 self.spectral_search_results.update(overall_results_dict) 295 296 # If there are mass features, associate the results with each mass feature 297 if len(self.mass_features) > 0: 298 for mass_feature_id, mass_feature in self.mass_features.items(): 299 scan_ids = mass_feature.ms2_scan_numbers 300 for ms2_scan_id in scan_ids: 301 precursor_mz = mass_feature.mz 302 try: 303 self.spectral_search_results[ms2_scan_id][precursor_mz] 304 except KeyError: 305 pass 306 else: 307 self.mass_features[ 308 mass_feature_id 309 ].ms2_similarity_results.append( 310 self.spectral_search_results[ms2_scan_id][precursor_mz] 311 )
Methods for searching LCMS spectra.
This class is designed to be a mixin class for the ~corems.mass_spectra.factory.lc_class.LCMSBase
class.
17 @staticmethod 18 def get_more_match_quals( 19 query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False 20 ): 21 """ 22 Return additional match qualities between query and library entry. 23 24 Parameters 25 ---------- 26 query_mz_arr : np.array 27 Array of query spectrum. Shape (N, 2), with m/z in the first column 28 and abundance in the second. 29 lib_entry : dict 30 Library spectrum entry, with 'mz' key containing the spectrum in 31 the format (mz, abundance),(mz, abundance), i.e. from MetabRef. 32 mz_tol_da : float, optional 33 Tolerance in Da for matching peaks (in MS2). Default is 0.1. 34 include_fragment_types : bool, optional 35 If True, include fragment type comparisons in output. 36 Defaults to False. 37 38 Returns 39 ------- 40 tuple 41 Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz). 42 43 Notes 44 ----- 45 query_in_lib : int 46 Number of peaks in query that are present in the library entry (within mz_tol_da). 47 query_in_lib_fract : float 48 Fraction of peaks in query that are present in the library entry (within mz_tol_da). 49 lib_in_query : int 50 Number of peaks in the library entry that are present in the query (within mz_tol_da). 51 lib_in_query_fract : float 52 Fraction of peaks in the library entry that are present in the query (within mz_tol_da). 53 query_frags : list 54 List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both. 55 lib_frags : list 56 List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both. 57 58 Raises 59 ------ 60 ValueError 61 If library entry does not have 'fragment_types' key and include_fragment_types is True. 62 63 """ 64 65 # Get the original mz values from the library entry 66 lib_mzs = np.array( 67 re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float 68 ).reshape(-1, 2)[:, 0] 69 70 # Get count and fraction of peaks in query that are in lib entry 71 query_in_lib = 0 72 for peak in query_mz_arr: 73 if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)): 74 query_in_lib += 1 75 query_in_lib_fract = query_in_lib / len(query_mz_arr) 76 77 # Get count and fraction of peaks in lib that are in query 78 lib_in_query = 0 79 for peak in lib_mzs: 80 if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)): 81 lib_in_query += 1 82 lib_in_query_fract = lib_in_query / len(lib_mzs) 83 84 if include_fragment_types: 85 # Check that fragment types are present in the library entry 86 if "fragment_types" not in lib_entry.keys(): 87 raise ValueError( 88 "Flash entropy library entry must have 'fragment_types' key to include fragment types in output." 89 ) 90 91 # Get types of fragments in the lib entry 92 lib_frags = lib_entry["fragment_types"] 93 # make list of the fragment types that are present in the query spectrum 94 lib_in_query_ids = list( 95 set( 96 [ 97 ind 98 for ind, x in enumerate(lib_mzs) 99 if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0]) 100 > 0 101 ] 102 ) 103 ) 104 query_frags = list(set([lib_frags[x] for x in lib_in_query_ids])) 105 lib_frags = list(set(lib_frags)) 106 107 else: 108 query_frags = None 109 lib_frags = None 110 111 return ( 112 query_in_lib, 113 query_in_lib_fract, 114 lib_in_query, 115 lib_in_query_fract, 116 query_frags, 117 lib_frags, 118 )
Return additional match qualities between query and library entry.
Parameters
- query_mz_arr (np.array): Array of query spectrum. Shape (N, 2), with m/z in the first column and abundance in the second.
- lib_entry (dict): Library spectrum entry, with 'mz' key containing the spectrum in the format (mz, abundance),(mz, abundance), i.e. from MetabRef.
- mz_tol_da (float, optional): Tolerance in Da for matching peaks (in MS2). Default is 0.1.
- include_fragment_types (bool, optional): If True, include fragment type comparisons in output. Defaults to False.
Returns
- tuple: Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
Notes
query_in_lib : int Number of peaks in query that are present in the library entry (within mz_tol_da). query_in_lib_fract : float Fraction of peaks in query that are present in the library entry (within mz_tol_da). lib_in_query : int Number of peaks in the library entry that are present in the query (within mz_tol_da). lib_in_query_fract : float Fraction of peaks in the library entry that are present in the query (within mz_tol_da). query_frags : list List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both. lib_frags : list List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.
Raises
- ValueError: If library entry does not have 'fragment_types' key and include_fragment_types is True.
120 def fe_search( 121 self, 122 scan_list, 123 fe_lib, 124 precursor_mz_list=[], 125 use_mass_features=True, 126 peak_sep_da=0.01, 127 get_additional_metrics=True, 128 ): 129 """ 130 Search LCMS spectra using a FlashEntropy approach. 131 132 Parameters 133 ---------- 134 scan_list : list 135 List of scan numbers to search. 136 fe_lib : :obj:`~ms_entropy.FlashEntropySearch` 137 FlashEntropy Search instance. 138 precursor_mz_list : list, optional 139 List of precursor m/z values to search, by default [], which implies 140 matched with mass features; to enable this use_mass_features must be True. 141 use_mass_features : bool, optional 142 If True, use mass features to get precursor m/z values, by default True. 143 If True, will add search results to mass features' ms2_similarity_results attribute. 144 peak_sep_da : float, optional 145 Minimum separation between m/z peaks spectra in Da. This needs match the 146 approximate resolution of the search spectra and the FlashEntropySearch 147 instance, by default 0.01. 148 get_additional_metrics : bool, optional 149 If True, get additional metrics from FlashEntropy search, by default True. 150 151 Returns 152 ------- 153 None, but adds results to self.spectral_search_results and associates these 154 spectral_search_results with mass_features within the self.mass_features dictionary. 155 156 """ 157 # Retrieve parameters from self 158 # include_fragment_types should used for lipids queries only, not general metabolomics 159 include_fragment_types = self.parameters.lc_ms.include_fragment_types 160 min_match_score = self.parameters.lc_ms.ms2_min_fe_score 161 162 # If precursor_mz_list is empty and use_mass_features is True, get precursor m/z values from mass features for each scan in scan_list 163 if use_mass_features and len(precursor_mz_list) == 0: 164 precursor_mz_list = [] 165 for scan in scan_list: 166 mf_ids = [ 167 key 168 for key, value in self.mass_features.items() 169 if scan in value.ms2_mass_spectra 170 ] 171 precursor_mz = [ 172 value.mz 173 for key, value in self.mass_features.items() 174 if key in mf_ids 175 ] 176 precursor_mz_list.append(precursor_mz) 177 178 # Check that precursor_mz_list same length as scan_list, if not, raise error 179 if len(precursor_mz_list) != len(scan_list): 180 raise ValueError("Length of precursor_mz_list is not equal to scan_list.") 181 182 # Loop through each query spectrum / precursor match and save ids of db spectrum that are decent matches 183 overall_results_dict = {} 184 for i in np.arange(len(scan_list)): 185 scan_oi = scan_list[i] 186 if len(self._ms[scan_oi].mspeaks) > 0: 187 precursor_mzs = precursor_mz_list[i] 188 overall_results_dict[scan_oi] = {} 189 for precursor_mz in precursor_mzs: 190 query_spectrum = fe_lib.clean_spectrum_for_search( 191 precursor_mz=precursor_mz, 192 peaks=np.vstack( 193 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 194 ).T, 195 precursor_ions_removal_da=None, 196 noise_threshold=self._ms[ 197 scan_oi 198 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 199 / 100, 200 min_ms2_difference_in_da=peak_sep_da, 201 ) 202 search_results = fe_lib.search( 203 precursor_mz=precursor_mz, 204 peaks=query_spectrum, 205 ms1_tolerance_in_da=self.parameters.mass_spectrum[ 206 "ms1" 207 ].molecular_search.max_ppm_error 208 * 10**-6 209 * precursor_mz, 210 ms2_tolerance_in_da=peak_sep_da * 0.5, 211 method={"identity"}, 212 precursor_ions_removal_da=None, 213 noise_threshold=self._ms[ 214 scan_oi 215 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance 216 / 100, 217 target="cpu", 218 )["identity_search"] 219 match_inds = np.where(search_results > min_match_score)[0] 220 221 # If any decent matches are found, add them to the results dictionary 222 if len(match_inds) > 0: 223 match_scores = search_results[match_inds] 224 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] 225 ref_mol_ids = [ 226 fe_lib[x]["molecular_data_id"] for x in match_inds 227 ] 228 ref_precursor_mzs = [ 229 fe_lib[x]["precursor_mz"] for x in match_inds 230 ] 231 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] 232 overall_results_dict[scan_oi][precursor_mz] = { 233 "ref_mol_id": ref_mol_ids, 234 "ref_ms_id": ref_ms_ids, 235 "ref_precursor_mz": ref_precursor_mzs, 236 "precursor_mz_error_ppm": [ 237 (precursor_mz - x) / precursor_mz * 10**6 238 for x in ref_precursor_mzs 239 ], 240 "entropy_similarity": match_scores, 241 "ref_ion_type": ion_types, 242 } 243 if get_additional_metrics: 244 more_match_quals = [ 245 self.get_more_match_quals( 246 self._ms[scan_oi].mz_exp, 247 fe_lib[x], 248 mz_tol_da=peak_sep_da, 249 include_fragment_types=include_fragment_types, 250 ) 251 for x in match_inds 252 ] 253 overall_results_dict[scan_oi][precursor_mz].update( 254 { 255 "query_mz_in_ref_n": [ 256 x[0] for x in more_match_quals 257 ], 258 "query_mz_in_ref_fract": [ 259 x[1] for x in more_match_quals 260 ], 261 "ref_mz_in_query_n": [ 262 x[2] for x in more_match_quals 263 ], 264 "ref_mz_in_query_fract": [ 265 x[3] for x in more_match_quals 266 ], 267 } 268 ) 269 if include_fragment_types: 270 overall_results_dict[scan_oi][precursor_mz].update( 271 { 272 "query_frag_types": [ 273 x[4] for x in more_match_quals 274 ], 275 "ref_frag_types": [ 276 x[5] for x in more_match_quals 277 ], 278 } 279 ) 280 281 # Drop scans with no results from dictionary 282 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} 283 284 # Cast each entry as a MS2SearchResults object 285 for scan_id in overall_results_dict.keys(): 286 for precursor_mz in overall_results_dict[scan_id].keys(): 287 ms2_spectrum = self._ms[scan_id] 288 ms2_search_results = overall_results_dict[scan_id][precursor_mz] 289 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( 290 ms2_spectrum, precursor_mz, ms2_search_results 291 ) 292 293 # Add MS2SearchResults to the existing spectral search results dictionary 294 self.spectral_search_results.update(overall_results_dict) 295 296 # If there are mass features, associate the results with each mass feature 297 if len(self.mass_features) > 0: 298 for mass_feature_id, mass_feature in self.mass_features.items(): 299 scan_ids = mass_feature.ms2_scan_numbers 300 for ms2_scan_id in scan_ids: 301 precursor_mz = mass_feature.mz 302 try: 303 self.spectral_search_results[ms2_scan_id][precursor_mz] 304 except KeyError: 305 pass 306 else: 307 self.mass_features[ 308 mass_feature_id 309 ].ms2_similarity_results.append( 310 self.spectral_search_results[ms2_scan_id][precursor_mz] 311 )
Search LCMS spectra using a FlashEntropy approach.
Parameters
- scan_list (list): List of scan numbers to search.
- fe_lib (
~ms_entropy.FlashEntropySearch
): FlashEntropy Search instance. - precursor_mz_list (list, optional): List of precursor m/z values to search, by default [], which implies matched with mass features; to enable this use_mass_features must be True.
- use_mass_features (bool, optional): If True, use mass features to get precursor m/z values, by default True. If True, will add search results to mass features' ms2_similarity_results attribute.
- peak_sep_da (float, optional): Minimum separation between m/z peaks spectra in Da. This needs match the approximate resolution of the search spectra and the FlashEntropySearch instance, by default 0.01.
- get_additional_metrics (bool, optional): If True, get additional metrics from FlashEntropy search, by default True.
Returns
- None, but adds results to self.spectral_search_results and associates these
- spectral_search_results with mass_features within the self.mass_features dictionary.