corems.molecular_id.search.compoundSearch
1from math import exp 2from threading import Thread 3 4from numpy import power 5 6from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity 7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite 8 9 10class LowResMassSpectralMatch(Thread): 11 """A class representing a low-resolution mass spectral match. 12 13 Parameters 14 ----------- 15 gcms_obj : object 16 The GC-MS object. 17 sql_obj : object, optional 18 The SQL object for database operations. Default is None. 19 calibration : bool, optional 20 Flag indicating if the match is for calibration. Default is False. 21 22 Attributes 23 ----------- 24 gcms_obj : object 25 The GC-MS object. 26 sql_obj : object 27 The SQL object for database operations. 28 calibration : bool 29 Flag indicating if the match is for calibration. 30 31 Methods 32 -------- 33 * metabolite_detector_score(gc_peak, ref_obj, spectral_simi). 34 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. 35 * run(). 36 Runs the low-resolution mass spectral match. 37 38 """ 39 40 def __init__(self, gcms_obj, sql_obj=None, calibration=False): 41 Thread.__init__(self) 42 43 self.gcms_obj = gcms_obj 44 45 # initiated at create_molecular_database() 46 # self.dict_molecular_lookup_table = None 47 self.calibration = calibration 48 # reading local file for now, 49 if not sql_obj: 50 self.sql_obj = EI_LowRes_SQLite( 51 url=self.gcms_obj.molecular_search_settings.url_database 52 ) 53 else: 54 self.sql_obj = sql_obj 55 56 def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi): 57 """ 58 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. 59 60 Parameters 61 ----------- 62 gc_peak : object 63 The GC peak object. 64 ref_obj : object 65 The reference object. 66 spectral_simi : object 67 The spectral similarity object. 68 69 Returns 70 -------- 71 tuple 72 A tuple containing the spectral similarity scores, RI score, and similarity score. 73 74 """ 75 spectral_similarity_scores = {} 76 spectral_similarity_scores["cosine_correlation"] = ( 77 spectral_simi.cosine_correlation() 78 ) 79 80 if self.gcms_obj.molecular_search_settings.exploratory_mode: 81 spectral_similarity_scores["weighted_cosine_correlation"] = ( 82 spectral_simi.weighted_cosine_correlation() 83 ) 84 ss, ss_nist = spectral_simi.stein_scott() 85 spectral_similarity_scores["stein_scott_similarity"] = ss 86 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist 87 88 spectral_similarity_scores["pearson_correlation"] = ( 89 spectral_simi.pearson_correlation() 90 ) 91 spectral_similarity_scores["spearman_correlation"] = ( 92 spectral_simi.spearman_correlation() 93 ) 94 spectral_similarity_scores["kendall_tau_correlation"] = ( 95 spectral_simi.kendall_tau() 96 ) 97 spectral_similarity_scores["euclidean_distance"] = ( 98 spectral_simi.euclidean_distance() 99 ) 100 spectral_similarity_scores["manhattan_distance"] = ( 101 spectral_simi.manhattan_distance() 102 ) 103 spectral_similarity_scores["jaccard_distance"] = ( 104 spectral_simi.jaccard_distance() 105 ) 106 spectral_similarity_scores["dft_correlation"] = ( 107 spectral_simi.dft_correlation() 108 ) 109 spectral_similarity_scores["dwt_correlation"] = ( 110 spectral_simi.dwt_correlation() 111 ) 112 spectral_similarity_scores.update(spectral_simi.extra_distances()) 113 # print(spectral_similarity_scores) 114 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) 115 116 ri_score = exp( 117 -1 118 * ( 119 power((gc_peak.ri - ref_obj.get("ri")), 2) 120 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) 121 ) 122 ) 123 124 similarity_score = ( 125 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) 126 ) ** (1 / 3) 127 128 return spectral_similarity_scores, ri_score, similarity_score 129 130 def run(self): 131 """Runs the low-resolution mass spectral match.""" 132 # TODO select the best gcms peak 133 import tqdm 134 135 original_use_deconvolution = ( 136 self.gcms_obj.chromatogram_settings.use_deconvolution 137 ) 138 139 if not self.gcms_obj: 140 # Do not use deconvolution for the retention index calibration 141 142 if self.calibration: 143 self.gcms_obj.chromatogram_settings.use_deconvolution = False 144 145 self.gcms_obj.process_chromatogram() 146 147 self.gcms_obj.chromatogram_settings.use_deconvolution = ( 148 original_use_deconvolution 149 ) 150 verbose = self.gcms_obj.chromatogram_settings.verbose_processing 151 for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose): 152 if not self.calibration: 153 window = self.gcms_obj.molecular_search_settings.ri_search_range 154 155 ri = gc_peak.ri 156 157 min_mat_ri = (ri - window, ri + window) 158 159 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) 160 161 else: 162 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names 163 164 window = self.gcms_obj.molecular_search_settings.rt_search_range 165 166 rt = gc_peak.retention_time 167 168 min_mat_rt = (rt - window, rt + window) 169 170 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) 171 172 for ref_obj in ref_objs: 173 # uses spectral similarly and uses a threshold to only select peaks with high data correlation 174 175 spectral_simi = SpectralSimilarity( 176 gc_peak.mass_spectrum.mz_abun_dict, ref_obj 177 ) 178 179 if self.calibration: 180 spectral_similarity_scores = {} 181 spectral_similarity_scores["cosine_correlation"] = ( 182 spectral_simi.cosine_correlation() 183 ) 184 185 # print(w_correlation_value,correlation_value ) 186 if ( 187 spectral_similarity_scores["cosine_correlation"] 188 >= self.gcms_obj.molecular_search_settings.correlation_threshold 189 ): 190 gc_peak.add_compound(ref_obj, spectral_similarity_scores) 191 192 # use score, usually a combination of Retention index and Spectral Similarity 193 # Threshold is implemented by not necessarily used 194 else: 195 # m/q developed methods will be implemented here 196 spectral_similarity_scores, ri_score, similarity_score = ( 197 self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) 198 ) 199 200 # TODO need to add similarity score option in the parameters encapsulation class 201 202 if ( 203 similarity_score 204 >= self.gcms_obj.molecular_search_settings.score_threshold 205 ): 206 gc_peak.add_compound( 207 ref_obj, 208 spectral_similarity_scores, 209 ri_score, 210 similarity_score, 211 ) 212 213 self.sql_obj.session.close() 214 self.sql_obj.engine.dispose()
11class LowResMassSpectralMatch(Thread): 12 """A class representing a low-resolution mass spectral match. 13 14 Parameters 15 ----------- 16 gcms_obj : object 17 The GC-MS object. 18 sql_obj : object, optional 19 The SQL object for database operations. Default is None. 20 calibration : bool, optional 21 Flag indicating if the match is for calibration. Default is False. 22 23 Attributes 24 ----------- 25 gcms_obj : object 26 The GC-MS object. 27 sql_obj : object 28 The SQL object for database operations. 29 calibration : bool 30 Flag indicating if the match is for calibration. 31 32 Methods 33 -------- 34 * metabolite_detector_score(gc_peak, ref_obj, spectral_simi). 35 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. 36 * run(). 37 Runs the low-resolution mass spectral match. 38 39 """ 40 41 def __init__(self, gcms_obj, sql_obj=None, calibration=False): 42 Thread.__init__(self) 43 44 self.gcms_obj = gcms_obj 45 46 # initiated at create_molecular_database() 47 # self.dict_molecular_lookup_table = None 48 self.calibration = calibration 49 # reading local file for now, 50 if not sql_obj: 51 self.sql_obj = EI_LowRes_SQLite( 52 url=self.gcms_obj.molecular_search_settings.url_database 53 ) 54 else: 55 self.sql_obj = sql_obj 56 57 def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi): 58 """ 59 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. 60 61 Parameters 62 ----------- 63 gc_peak : object 64 The GC peak object. 65 ref_obj : object 66 The reference object. 67 spectral_simi : object 68 The spectral similarity object. 69 70 Returns 71 -------- 72 tuple 73 A tuple containing the spectral similarity scores, RI score, and similarity score. 74 75 """ 76 spectral_similarity_scores = {} 77 spectral_similarity_scores["cosine_correlation"] = ( 78 spectral_simi.cosine_correlation() 79 ) 80 81 if self.gcms_obj.molecular_search_settings.exploratory_mode: 82 spectral_similarity_scores["weighted_cosine_correlation"] = ( 83 spectral_simi.weighted_cosine_correlation() 84 ) 85 ss, ss_nist = spectral_simi.stein_scott() 86 spectral_similarity_scores["stein_scott_similarity"] = ss 87 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist 88 89 spectral_similarity_scores["pearson_correlation"] = ( 90 spectral_simi.pearson_correlation() 91 ) 92 spectral_similarity_scores["spearman_correlation"] = ( 93 spectral_simi.spearman_correlation() 94 ) 95 spectral_similarity_scores["kendall_tau_correlation"] = ( 96 spectral_simi.kendall_tau() 97 ) 98 spectral_similarity_scores["euclidean_distance"] = ( 99 spectral_simi.euclidean_distance() 100 ) 101 spectral_similarity_scores["manhattan_distance"] = ( 102 spectral_simi.manhattan_distance() 103 ) 104 spectral_similarity_scores["jaccard_distance"] = ( 105 spectral_simi.jaccard_distance() 106 ) 107 spectral_similarity_scores["dft_correlation"] = ( 108 spectral_simi.dft_correlation() 109 ) 110 spectral_similarity_scores["dwt_correlation"] = ( 111 spectral_simi.dwt_correlation() 112 ) 113 spectral_similarity_scores.update(spectral_simi.extra_distances()) 114 # print(spectral_similarity_scores) 115 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) 116 117 ri_score = exp( 118 -1 119 * ( 120 power((gc_peak.ri - ref_obj.get("ri")), 2) 121 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) 122 ) 123 ) 124 125 similarity_score = ( 126 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) 127 ) ** (1 / 3) 128 129 return spectral_similarity_scores, ri_score, similarity_score 130 131 def run(self): 132 """Runs the low-resolution mass spectral match.""" 133 # TODO select the best gcms peak 134 import tqdm 135 136 original_use_deconvolution = ( 137 self.gcms_obj.chromatogram_settings.use_deconvolution 138 ) 139 140 if not self.gcms_obj: 141 # Do not use deconvolution for the retention index calibration 142 143 if self.calibration: 144 self.gcms_obj.chromatogram_settings.use_deconvolution = False 145 146 self.gcms_obj.process_chromatogram() 147 148 self.gcms_obj.chromatogram_settings.use_deconvolution = ( 149 original_use_deconvolution 150 ) 151 verbose = self.gcms_obj.chromatogram_settings.verbose_processing 152 for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose): 153 if not self.calibration: 154 window = self.gcms_obj.molecular_search_settings.ri_search_range 155 156 ri = gc_peak.ri 157 158 min_mat_ri = (ri - window, ri + window) 159 160 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) 161 162 else: 163 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names 164 165 window = self.gcms_obj.molecular_search_settings.rt_search_range 166 167 rt = gc_peak.retention_time 168 169 min_mat_rt = (rt - window, rt + window) 170 171 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) 172 173 for ref_obj in ref_objs: 174 # uses spectral similarly and uses a threshold to only select peaks with high data correlation 175 176 spectral_simi = SpectralSimilarity( 177 gc_peak.mass_spectrum.mz_abun_dict, ref_obj 178 ) 179 180 if self.calibration: 181 spectral_similarity_scores = {} 182 spectral_similarity_scores["cosine_correlation"] = ( 183 spectral_simi.cosine_correlation() 184 ) 185 186 # print(w_correlation_value,correlation_value ) 187 if ( 188 spectral_similarity_scores["cosine_correlation"] 189 >= self.gcms_obj.molecular_search_settings.correlation_threshold 190 ): 191 gc_peak.add_compound(ref_obj, spectral_similarity_scores) 192 193 # use score, usually a combination of Retention index and Spectral Similarity 194 # Threshold is implemented by not necessarily used 195 else: 196 # m/q developed methods will be implemented here 197 spectral_similarity_scores, ri_score, similarity_score = ( 198 self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) 199 ) 200 201 # TODO need to add similarity score option in the parameters encapsulation class 202 203 if ( 204 similarity_score 205 >= self.gcms_obj.molecular_search_settings.score_threshold 206 ): 207 gc_peak.add_compound( 208 ref_obj, 209 spectral_similarity_scores, 210 ri_score, 211 similarity_score, 212 ) 213 214 self.sql_obj.session.close() 215 self.sql_obj.engine.dispose()
A class representing a low-resolution mass spectral match.
Parameters
- gcms_obj (object): The GC-MS object.
- sql_obj (object, optional): The SQL object for database operations. Default is None.
- calibration (bool, optional): Flag indicating if the match is for calibration. Default is False.
Attributes
- gcms_obj (object): The GC-MS object.
- sql_obj (object): The SQL object for database operations.
- calibration (bool): Flag indicating if the match is for calibration.
Methods
- metabolite_detector_score(gc_peak, ref_obj, spectral_simi). Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
- run(). Runs the low-resolution mass spectral match.
41 def __init__(self, gcms_obj, sql_obj=None, calibration=False): 42 Thread.__init__(self) 43 44 self.gcms_obj = gcms_obj 45 46 # initiated at create_molecular_database() 47 # self.dict_molecular_lookup_table = None 48 self.calibration = calibration 49 # reading local file for now, 50 if not sql_obj: 51 self.sql_obj = EI_LowRes_SQLite( 52 url=self.gcms_obj.molecular_search_settings.url_database 53 ) 54 else: 55 self.sql_obj = sql_obj
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
57 def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi): 58 """ 59 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. 60 61 Parameters 62 ----------- 63 gc_peak : object 64 The GC peak object. 65 ref_obj : object 66 The reference object. 67 spectral_simi : object 68 The spectral similarity object. 69 70 Returns 71 -------- 72 tuple 73 A tuple containing the spectral similarity scores, RI score, and similarity score. 74 75 """ 76 spectral_similarity_scores = {} 77 spectral_similarity_scores["cosine_correlation"] = ( 78 spectral_simi.cosine_correlation() 79 ) 80 81 if self.gcms_obj.molecular_search_settings.exploratory_mode: 82 spectral_similarity_scores["weighted_cosine_correlation"] = ( 83 spectral_simi.weighted_cosine_correlation() 84 ) 85 ss, ss_nist = spectral_simi.stein_scott() 86 spectral_similarity_scores["stein_scott_similarity"] = ss 87 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist 88 89 spectral_similarity_scores["pearson_correlation"] = ( 90 spectral_simi.pearson_correlation() 91 ) 92 spectral_similarity_scores["spearman_correlation"] = ( 93 spectral_simi.spearman_correlation() 94 ) 95 spectral_similarity_scores["kendall_tau_correlation"] = ( 96 spectral_simi.kendall_tau() 97 ) 98 spectral_similarity_scores["euclidean_distance"] = ( 99 spectral_simi.euclidean_distance() 100 ) 101 spectral_similarity_scores["manhattan_distance"] = ( 102 spectral_simi.manhattan_distance() 103 ) 104 spectral_similarity_scores["jaccard_distance"] = ( 105 spectral_simi.jaccard_distance() 106 ) 107 spectral_similarity_scores["dft_correlation"] = ( 108 spectral_simi.dft_correlation() 109 ) 110 spectral_similarity_scores["dwt_correlation"] = ( 111 spectral_simi.dwt_correlation() 112 ) 113 spectral_similarity_scores.update(spectral_simi.extra_distances()) 114 # print(spectral_similarity_scores) 115 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) 116 117 ri_score = exp( 118 -1 119 * ( 120 power((gc_peak.ri - ref_obj.get("ri")), 2) 121 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) 122 ) 123 ) 124 125 similarity_score = ( 126 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) 127 ) ** (1 / 3) 128 129 return spectral_similarity_scores, ri_score, similarity_score
Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
Parameters
- gc_peak (object): The GC peak object.
- ref_obj (object): The reference object.
- spectral_simi (object): The spectral similarity object.
Returns
- tuple: A tuple containing the spectral similarity scores, RI score, and similarity score.
131 def run(self): 132 """Runs the low-resolution mass spectral match.""" 133 # TODO select the best gcms peak 134 import tqdm 135 136 original_use_deconvolution = ( 137 self.gcms_obj.chromatogram_settings.use_deconvolution 138 ) 139 140 if not self.gcms_obj: 141 # Do not use deconvolution for the retention index calibration 142 143 if self.calibration: 144 self.gcms_obj.chromatogram_settings.use_deconvolution = False 145 146 self.gcms_obj.process_chromatogram() 147 148 self.gcms_obj.chromatogram_settings.use_deconvolution = ( 149 original_use_deconvolution 150 ) 151 verbose = self.gcms_obj.chromatogram_settings.verbose_processing 152 for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose): 153 if not self.calibration: 154 window = self.gcms_obj.molecular_search_settings.ri_search_range 155 156 ri = gc_peak.ri 157 158 min_mat_ri = (ri - window, ri + window) 159 160 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) 161 162 else: 163 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names 164 165 window = self.gcms_obj.molecular_search_settings.rt_search_range 166 167 rt = gc_peak.retention_time 168 169 min_mat_rt = (rt - window, rt + window) 170 171 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) 172 173 for ref_obj in ref_objs: 174 # uses spectral similarly and uses a threshold to only select peaks with high data correlation 175 176 spectral_simi = SpectralSimilarity( 177 gc_peak.mass_spectrum.mz_abun_dict, ref_obj 178 ) 179 180 if self.calibration: 181 spectral_similarity_scores = {} 182 spectral_similarity_scores["cosine_correlation"] = ( 183 spectral_simi.cosine_correlation() 184 ) 185 186 # print(w_correlation_value,correlation_value ) 187 if ( 188 spectral_similarity_scores["cosine_correlation"] 189 >= self.gcms_obj.molecular_search_settings.correlation_threshold 190 ): 191 gc_peak.add_compound(ref_obj, spectral_similarity_scores) 192 193 # use score, usually a combination of Retention index and Spectral Similarity 194 # Threshold is implemented by not necessarily used 195 else: 196 # m/q developed methods will be implemented here 197 spectral_similarity_scores, ri_score, similarity_score = ( 198 self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) 199 ) 200 201 # TODO need to add similarity score option in the parameters encapsulation class 202 203 if ( 204 similarity_score 205 >= self.gcms_obj.molecular_search_settings.score_threshold 206 ): 207 gc_peak.add_compound( 208 ref_obj, 209 spectral_similarity_scores, 210 ri_score, 211 similarity_score, 212 ) 213 214 self.sql_obj.session.close() 215 self.sql_obj.engine.dispose()
Runs the low-resolution mass spectral match.
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id