corems.molecular_id.search.compoundSearch

  1from math import exp
  2from threading import Thread
  3
  4from numpy import power
  5
  6from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity
  7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite
  8
  9
 10class LowResMassSpectralMatch(Thread):
 11    """A class representing a low-resolution mass spectral match.
 12
 13    Parameters
 14    -----------
 15    gcms_obj : object
 16        The GC-MS object.
 17    sql_obj : object, optional
 18        The SQL object for database operations. Default is None.
 19    calibration : bool, optional
 20        Flag indicating if the match is for calibration. Default is False.
 21
 22    Attributes
 23    -----------
 24    gcms_obj : object
 25        The GC-MS object.
 26    sql_obj : object
 27        The SQL object for database operations.
 28    calibration : bool
 29        Flag indicating if the match is for calibration.
 30
 31    Methods
 32    --------
 33    * metabolite_detector_score(gc_peak, ref_obj, spectral_simi).
 34        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
 35    * run().
 36        Runs the low-resolution mass spectral match.
 37
 38    """
 39
 40    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
 41        Thread.__init__(self)
 42
 43        self.gcms_obj = gcms_obj
 44
 45        #  initiated at create_molecular_database()
 46        # self.dict_molecular_lookup_table = None
 47        self.calibration = calibration
 48        # reading local file for now,
 49        if not sql_obj:
 50            self.sql_obj = EI_LowRes_SQLite(
 51                url=self.gcms_obj.molecular_search_settings.url_database
 52            )
 53        else:
 54            self.sql_obj = sql_obj
 55
 56    def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi):
 57        """
 58        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
 59
 60        Parameters
 61        -----------
 62        gc_peak : object
 63            The GC peak object.
 64        ref_obj : object
 65            The reference object.
 66        spectral_simi : object
 67            The spectral similarity object.
 68
 69        Returns
 70        --------
 71        tuple
 72            A tuple containing the spectral similarity scores, RI score, and similarity score.
 73
 74        """
 75        spectral_similarity_scores = {}
 76        spectral_similarity_scores["cosine_correlation"] = (
 77            spectral_simi.cosine_correlation()
 78        )
 79
 80        if self.gcms_obj.molecular_search_settings.exploratory_mode:
 81            spectral_similarity_scores["weighted_cosine_correlation"] = (
 82                spectral_simi.weighted_cosine_correlation()
 83            )
 84            ss, ss_nist = spectral_simi.stein_scott()
 85            spectral_similarity_scores["stein_scott_similarity"] = ss
 86            spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist
 87
 88            spectral_similarity_scores["pearson_correlation"] = (
 89                spectral_simi.pearson_correlation()
 90            )
 91            spectral_similarity_scores["spearman_correlation"] = (
 92                spectral_simi.spearman_correlation()
 93            )
 94            spectral_similarity_scores["kendall_tau_correlation"] = (
 95                spectral_simi.kendall_tau()
 96            )
 97            spectral_similarity_scores["euclidean_distance"] = (
 98                spectral_simi.euclidean_distance()
 99            )
100            spectral_similarity_scores["manhattan_distance"] = (
101                spectral_simi.manhattan_distance()
102            )
103            spectral_similarity_scores["jaccard_distance"] = (
104                spectral_simi.jaccard_distance()
105            )
106            spectral_similarity_scores["dft_correlation"] = (
107                spectral_simi.dft_correlation()
108            )
109            spectral_similarity_scores["dwt_correlation"] = (
110                spectral_simi.dwt_correlation()
111            )
112            spectral_similarity_scores.update(spectral_simi.extra_distances())
113            # print(spectral_similarity_scores)
114        # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window)
115
116        ri_score = exp(
117            -1
118            * (
119                power((gc_peak.ri - ref_obj.get("ri")), 2)
120                / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2))
121            )
122        )
123
124        similarity_score = (
125            (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score)
126        ) ** (1 / 3)
127
128        return spectral_similarity_scores, ri_score, similarity_score
129
130    def run(self):
131        """Runs the low-resolution mass spectral match."""
132        # TODO select the best gcms peak
133        import tqdm
134
135        original_use_deconvolution = (
136            self.gcms_obj.chromatogram_settings.use_deconvolution
137        )
138
139        if not self.gcms_obj:
140            # Do not use deconvolution for the retention index calibration
141
142            if self.calibration:
143                self.gcms_obj.chromatogram_settings.use_deconvolution = False
144
145            self.gcms_obj.process_chromatogram()
146
147        self.gcms_obj.chromatogram_settings.use_deconvolution = (
148            original_use_deconvolution
149        )
150        verbose = self.gcms_obj.chromatogram_settings.verbose_processing
151        for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose):
152            if not self.calibration:
153                window = self.gcms_obj.molecular_search_settings.ri_search_range
154
155                ri = gc_peak.ri
156
157                min_mat_ri = (ri - window, ri + window)
158
159                ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri)
160
161            else:
162                compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names
163
164                window = self.gcms_obj.molecular_search_settings.rt_search_range
165
166                rt = gc_peak.retention_time
167
168                min_mat_rt = (rt - window, rt + window)
169
170                ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names)
171
172            for ref_obj in ref_objs:
173                # uses spectral similarly and uses a threshold to only select peaks with high data correlation
174
175                spectral_simi = SpectralSimilarity(
176                    gc_peak.mass_spectrum.mz_abun_dict, ref_obj
177                )
178
179                if self.calibration:
180                    spectral_similarity_scores = {}
181                    spectral_similarity_scores["cosine_correlation"] = (
182                        spectral_simi.cosine_correlation()
183                    )
184
185                    # print(w_correlation_value,correlation_value )
186                    if (
187                        spectral_similarity_scores["cosine_correlation"]
188                        >= self.gcms_obj.molecular_search_settings.correlation_threshold
189                    ):
190                        gc_peak.add_compound(ref_obj, spectral_similarity_scores)
191
192                # use score, usually a combination of Retention index and Spectral Similarity
193                # Threshold is implemented by not necessarily used
194                else:
195                    # m/q developed methods will be implemented here
196                    spectral_similarity_scores, ri_score, similarity_score = (
197                        self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi)
198                    )
199
200                    # TODO need to add similarity score option in the parameters encapsulation class
201
202                    if (
203                        similarity_score
204                        >= self.gcms_obj.molecular_search_settings.score_threshold
205                    ):
206                        gc_peak.add_compound(
207                            ref_obj,
208                            spectral_similarity_scores,
209                            ri_score,
210                            similarity_score,
211                        )
212
213        self.sql_obj.session.close()
214        self.sql_obj.engine.dispose()
class LowResMassSpectralMatch(threading.Thread):
 11class LowResMassSpectralMatch(Thread):
 12    """A class representing a low-resolution mass spectral match.
 13
 14    Parameters
 15    -----------
 16    gcms_obj : object
 17        The GC-MS object.
 18    sql_obj : object, optional
 19        The SQL object for database operations. Default is None.
 20    calibration : bool, optional
 21        Flag indicating if the match is for calibration. Default is False.
 22
 23    Attributes
 24    -----------
 25    gcms_obj : object
 26        The GC-MS object.
 27    sql_obj : object
 28        The SQL object for database operations.
 29    calibration : bool
 30        Flag indicating if the match is for calibration.
 31
 32    Methods
 33    --------
 34    * metabolite_detector_score(gc_peak, ref_obj, spectral_simi).
 35        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
 36    * run().
 37        Runs the low-resolution mass spectral match.
 38
 39    """
 40
 41    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
 42        Thread.__init__(self)
 43
 44        self.gcms_obj = gcms_obj
 45
 46        #  initiated at create_molecular_database()
 47        # self.dict_molecular_lookup_table = None
 48        self.calibration = calibration
 49        # reading local file for now,
 50        if not sql_obj:
 51            self.sql_obj = EI_LowRes_SQLite(
 52                url=self.gcms_obj.molecular_search_settings.url_database
 53            )
 54        else:
 55            self.sql_obj = sql_obj
 56
 57    def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi):
 58        """
 59        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
 60
 61        Parameters
 62        -----------
 63        gc_peak : object
 64            The GC peak object.
 65        ref_obj : object
 66            The reference object.
 67        spectral_simi : object
 68            The spectral similarity object.
 69
 70        Returns
 71        --------
 72        tuple
 73            A tuple containing the spectral similarity scores, RI score, and similarity score.
 74
 75        """
 76        spectral_similarity_scores = {}
 77        spectral_similarity_scores["cosine_correlation"] = (
 78            spectral_simi.cosine_correlation()
 79        )
 80
 81        if self.gcms_obj.molecular_search_settings.exploratory_mode:
 82            spectral_similarity_scores["weighted_cosine_correlation"] = (
 83                spectral_simi.weighted_cosine_correlation()
 84            )
 85            ss, ss_nist = spectral_simi.stein_scott()
 86            spectral_similarity_scores["stein_scott_similarity"] = ss
 87            spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist
 88
 89            spectral_similarity_scores["pearson_correlation"] = (
 90                spectral_simi.pearson_correlation()
 91            )
 92            spectral_similarity_scores["spearman_correlation"] = (
 93                spectral_simi.spearman_correlation()
 94            )
 95            spectral_similarity_scores["kendall_tau_correlation"] = (
 96                spectral_simi.kendall_tau()
 97            )
 98            spectral_similarity_scores["euclidean_distance"] = (
 99                spectral_simi.euclidean_distance()
100            )
101            spectral_similarity_scores["manhattan_distance"] = (
102                spectral_simi.manhattan_distance()
103            )
104            spectral_similarity_scores["jaccard_distance"] = (
105                spectral_simi.jaccard_distance()
106            )
107            spectral_similarity_scores["dft_correlation"] = (
108                spectral_simi.dft_correlation()
109            )
110            spectral_similarity_scores["dwt_correlation"] = (
111                spectral_simi.dwt_correlation()
112            )
113            spectral_similarity_scores.update(spectral_simi.extra_distances())
114            # print(spectral_similarity_scores)
115        # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window)
116
117        ri_score = exp(
118            -1
119            * (
120                power((gc_peak.ri - ref_obj.get("ri")), 2)
121                / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2))
122            )
123        )
124
125        similarity_score = (
126            (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score)
127        ) ** (1 / 3)
128
129        return spectral_similarity_scores, ri_score, similarity_score
130
131    def run(self):
132        """Runs the low-resolution mass spectral match."""
133        # TODO select the best gcms peak
134        import tqdm
135
136        original_use_deconvolution = (
137            self.gcms_obj.chromatogram_settings.use_deconvolution
138        )
139
140        if not self.gcms_obj:
141            # Do not use deconvolution for the retention index calibration
142
143            if self.calibration:
144                self.gcms_obj.chromatogram_settings.use_deconvolution = False
145
146            self.gcms_obj.process_chromatogram()
147
148        self.gcms_obj.chromatogram_settings.use_deconvolution = (
149            original_use_deconvolution
150        )
151        verbose = self.gcms_obj.chromatogram_settings.verbose_processing
152        for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose):
153            if not self.calibration:
154                window = self.gcms_obj.molecular_search_settings.ri_search_range
155
156                ri = gc_peak.ri
157
158                min_mat_ri = (ri - window, ri + window)
159
160                ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri)
161
162            else:
163                compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names
164
165                window = self.gcms_obj.molecular_search_settings.rt_search_range
166
167                rt = gc_peak.retention_time
168
169                min_mat_rt = (rt - window, rt + window)
170
171                ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names)
172
173            for ref_obj in ref_objs:
174                # uses spectral similarly and uses a threshold to only select peaks with high data correlation
175
176                spectral_simi = SpectralSimilarity(
177                    gc_peak.mass_spectrum.mz_abun_dict, ref_obj
178                )
179
180                if self.calibration:
181                    spectral_similarity_scores = {}
182                    spectral_similarity_scores["cosine_correlation"] = (
183                        spectral_simi.cosine_correlation()
184                    )
185
186                    # print(w_correlation_value,correlation_value )
187                    if (
188                        spectral_similarity_scores["cosine_correlation"]
189                        >= self.gcms_obj.molecular_search_settings.correlation_threshold
190                    ):
191                        gc_peak.add_compound(ref_obj, spectral_similarity_scores)
192
193                # use score, usually a combination of Retention index and Spectral Similarity
194                # Threshold is implemented by not necessarily used
195                else:
196                    # m/q developed methods will be implemented here
197                    spectral_similarity_scores, ri_score, similarity_score = (
198                        self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi)
199                    )
200
201                    # TODO need to add similarity score option in the parameters encapsulation class
202
203                    if (
204                        similarity_score
205                        >= self.gcms_obj.molecular_search_settings.score_threshold
206                    ):
207                        gc_peak.add_compound(
208                            ref_obj,
209                            spectral_similarity_scores,
210                            ri_score,
211                            similarity_score,
212                        )
213
214        self.sql_obj.session.close()
215        self.sql_obj.engine.dispose()

A class representing a low-resolution mass spectral match.

Parameters
  • gcms_obj (object): The GC-MS object.
  • sql_obj (object, optional): The SQL object for database operations. Default is None.
  • calibration (bool, optional): Flag indicating if the match is for calibration. Default is False.
Attributes
  • gcms_obj (object): The GC-MS object.
  • sql_obj (object): The SQL object for database operations.
  • calibration (bool): Flag indicating if the match is for calibration.
Methods
  • metabolite_detector_score(gc_peak, ref_obj, spectral_simi). Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
  • run(). Runs the low-resolution mass spectral match.
LowResMassSpectralMatch(gcms_obj, sql_obj=None, calibration=False)
41    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
42        Thread.__init__(self)
43
44        self.gcms_obj = gcms_obj
45
46        #  initiated at create_molecular_database()
47        # self.dict_molecular_lookup_table = None
48        self.calibration = calibration
49        # reading local file for now,
50        if not sql_obj:
51            self.sql_obj = EI_LowRes_SQLite(
52                url=self.gcms_obj.molecular_search_settings.url_database
53            )
54        else:
55            self.sql_obj = sql_obj

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

gcms_obj
calibration
def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi):
 57    def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi):
 58        """
 59        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
 60
 61        Parameters
 62        -----------
 63        gc_peak : object
 64            The GC peak object.
 65        ref_obj : object
 66            The reference object.
 67        spectral_simi : object
 68            The spectral similarity object.
 69
 70        Returns
 71        --------
 72        tuple
 73            A tuple containing the spectral similarity scores, RI score, and similarity score.
 74
 75        """
 76        spectral_similarity_scores = {}
 77        spectral_similarity_scores["cosine_correlation"] = (
 78            spectral_simi.cosine_correlation()
 79        )
 80
 81        if self.gcms_obj.molecular_search_settings.exploratory_mode:
 82            spectral_similarity_scores["weighted_cosine_correlation"] = (
 83                spectral_simi.weighted_cosine_correlation()
 84            )
 85            ss, ss_nist = spectral_simi.stein_scott()
 86            spectral_similarity_scores["stein_scott_similarity"] = ss
 87            spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist
 88
 89            spectral_similarity_scores["pearson_correlation"] = (
 90                spectral_simi.pearson_correlation()
 91            )
 92            spectral_similarity_scores["spearman_correlation"] = (
 93                spectral_simi.spearman_correlation()
 94            )
 95            spectral_similarity_scores["kendall_tau_correlation"] = (
 96                spectral_simi.kendall_tau()
 97            )
 98            spectral_similarity_scores["euclidean_distance"] = (
 99                spectral_simi.euclidean_distance()
100            )
101            spectral_similarity_scores["manhattan_distance"] = (
102                spectral_simi.manhattan_distance()
103            )
104            spectral_similarity_scores["jaccard_distance"] = (
105                spectral_simi.jaccard_distance()
106            )
107            spectral_similarity_scores["dft_correlation"] = (
108                spectral_simi.dft_correlation()
109            )
110            spectral_similarity_scores["dwt_correlation"] = (
111                spectral_simi.dwt_correlation()
112            )
113            spectral_similarity_scores.update(spectral_simi.extra_distances())
114            # print(spectral_similarity_scores)
115        # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window)
116
117        ri_score = exp(
118            -1
119            * (
120                power((gc_peak.ri - ref_obj.get("ri")), 2)
121                / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2))
122            )
123        )
124
125        similarity_score = (
126            (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score)
127        ) ** (1 / 3)
128
129        return spectral_similarity_scores, ri_score, similarity_score

Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.

Parameters
  • gc_peak (object): The GC peak object.
  • ref_obj (object): The reference object.
  • spectral_simi (object): The spectral similarity object.
Returns
  • tuple: A tuple containing the spectral similarity scores, RI score, and similarity score.
def run(self):
131    def run(self):
132        """Runs the low-resolution mass spectral match."""
133        # TODO select the best gcms peak
134        import tqdm
135
136        original_use_deconvolution = (
137            self.gcms_obj.chromatogram_settings.use_deconvolution
138        )
139
140        if not self.gcms_obj:
141            # Do not use deconvolution for the retention index calibration
142
143            if self.calibration:
144                self.gcms_obj.chromatogram_settings.use_deconvolution = False
145
146            self.gcms_obj.process_chromatogram()
147
148        self.gcms_obj.chromatogram_settings.use_deconvolution = (
149            original_use_deconvolution
150        )
151        verbose = self.gcms_obj.chromatogram_settings.verbose_processing
152        for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose):
153            if not self.calibration:
154                window = self.gcms_obj.molecular_search_settings.ri_search_range
155
156                ri = gc_peak.ri
157
158                min_mat_ri = (ri - window, ri + window)
159
160                ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri)
161
162            else:
163                compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names
164
165                window = self.gcms_obj.molecular_search_settings.rt_search_range
166
167                rt = gc_peak.retention_time
168
169                min_mat_rt = (rt - window, rt + window)
170
171                ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names)
172
173            for ref_obj in ref_objs:
174                # uses spectral similarly and uses a threshold to only select peaks with high data correlation
175
176                spectral_simi = SpectralSimilarity(
177                    gc_peak.mass_spectrum.mz_abun_dict, ref_obj
178                )
179
180                if self.calibration:
181                    spectral_similarity_scores = {}
182                    spectral_similarity_scores["cosine_correlation"] = (
183                        spectral_simi.cosine_correlation()
184                    )
185
186                    # print(w_correlation_value,correlation_value )
187                    if (
188                        spectral_similarity_scores["cosine_correlation"]
189                        >= self.gcms_obj.molecular_search_settings.correlation_threshold
190                    ):
191                        gc_peak.add_compound(ref_obj, spectral_similarity_scores)
192
193                # use score, usually a combination of Retention index and Spectral Similarity
194                # Threshold is implemented by not necessarily used
195                else:
196                    # m/q developed methods will be implemented here
197                    spectral_similarity_scores, ri_score, similarity_score = (
198                        self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi)
199                    )
200
201                    # TODO need to add similarity score option in the parameters encapsulation class
202
203                    if (
204                        similarity_score
205                        >= self.gcms_obj.molecular_search_settings.score_threshold
206                    ):
207                        gc_peak.add_compound(
208                            ref_obj,
209                            spectral_similarity_scores,
210                            ri_score,
211                            similarity_score,
212                        )
213
214        self.sql_obj.session.close()
215        self.sql_obj.engine.dispose()

Runs the low-resolution mass spectral match.

Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id