corems.molecular_id.search.lcms_spectral_search

  1import re
  2
  3import numpy as np
  4
  5from corems.molecular_id.factory.spectrum_search_results import SpectrumSearchResults
  6
  7
  8class LCMSSpectralSearch:
  9    """
 10    Methods for searching LCMS spectra.
 11
 12    This class is designed to be a mixin class for the :obj:`~corems.mass_spectra.factory.lc_class.LCMSBase` class.
 13
 14    """
 15
 16    @staticmethod
 17    def get_more_match_quals(
 18        query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False
 19    ):
 20        """
 21        Return additional match qualities between query and library entry.
 22
 23        Parameters
 24        ----------
 25        query_mz_arr : np.array
 26            Array of query spectrum. Shape (N, 2), with m/z in the first column
 27            and abundance in the second.
 28        lib_entry : dict
 29            Library spectrum entry, with 'mz' key containing the spectrum in
 30            the format (mz, abundance),(mz, abundance), i.e. from MetabRef.
 31        mz_tol_da : float, optional
 32            Tolerance in Da for matching peaks (in MS2). Default is 0.1.
 33        include_fragment_types : bool, optional
 34            If True, include fragment type comparisons in output.
 35            Defaults to False.
 36
 37        Returns
 38        -------
 39        tuple
 40            Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
 41
 42        Notes
 43        -----
 44        query_in_lib : int
 45            Number of peaks in query that are present in the library entry (within mz_tol_da).
 46        query_in_lib_fract : float
 47            Fraction of peaks in query that are present in the library entry (within mz_tol_da).
 48        lib_in_query : int
 49            Number of peaks in the library entry that are present in the query (within mz_tol_da).
 50        lib_in_query_fract : float
 51            Fraction of peaks in the library entry that are present in the query (within mz_tol_da).
 52        query_frags : list
 53            List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both.
 54        lib_frags : list
 55            List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.
 56
 57        Raises
 58        ------
 59        ValueError
 60            If library entry does not have 'fragment_types' key and include_fragment_types is True.
 61
 62        """
 63
 64        # Get the original mz values from the library entry
 65        lib_mzs = np.array(
 66            re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float
 67        ).reshape(-1, 2)[:, 0]
 68
 69        # Get count and fraction of peaks in query that are in lib entry
 70        query_in_lib = 0
 71        for peak in query_mz_arr:
 72            if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)):
 73                query_in_lib += 1
 74        query_in_lib_fract = query_in_lib / len(query_mz_arr)
 75
 76        # Get count and fraction of peaks in lib that are in query
 77        lib_in_query = 0
 78        for peak in lib_mzs:
 79            if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)):
 80                lib_in_query += 1
 81        lib_in_query_fract = lib_in_query / len(lib_mzs)
 82
 83        if include_fragment_types:
 84            # Check that fragment types are present in the library entry
 85            if "fragment_types" not in lib_entry.keys():
 86                raise ValueError(
 87                    "Flash entropy library entry must have 'fragment_types' key to include fragment types in output."
 88                )
 89
 90            # Get types of fragments in the lib entry
 91            lib_frags = lib_entry["fragment_types"]
 92            # make list of the fragment types that are present in the query spectrum
 93            lib_in_query_ids = list(
 94                set(
 95                    [
 96                        ind
 97                        for ind, x in enumerate(lib_mzs)
 98                        if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0])
 99                        > 0
100                    ]
101                )
102            )
103            query_frags = list(set([lib_frags[x] for x in lib_in_query_ids]))
104            lib_frags = list(set(lib_frags))
105
106        else:
107            query_frags = None
108            lib_frags = None
109
110        return (
111            query_in_lib,
112            query_in_lib_fract,
113            lib_in_query,
114            lib_in_query_fract,
115            query_frags,
116            lib_frags,
117        )
118
119    def fe_search(
120        self,
121        scan_list,
122        fe_lib,
123        precursor_mz_list=[],
124        use_mass_features=True,
125        peak_sep_da=0.01,
126        get_additional_metrics=True,
127    ):
128        """
129        Search LCMS spectra using a FlashEntropy approach.
130
131        Parameters
132        ----------
133        scan_list : list
134            List of scan numbers to search.
135        fe_lib : :obj:`~ms_entropy.FlashEntropySearch`
136            FlashEntropy Search instance.
137        precursor_mz_list : list, optional
138            List of precursor m/z values to search, by default [], which implies
139            matched with mass features; to enable this use_mass_features must be True.
140        use_mass_features : bool, optional
141            If True, use mass features to get precursor m/z values, by default True.
142            If True, will add search results to mass features' ms2_similarity_results attribute.
143        peak_sep_da : float, optional
144            Minimum separation between m/z peaks spectra in Da. This needs match the
145            approximate resolution of the search spectra and the FlashEntropySearch
146            instance, by default 0.01.
147        get_additional_metrics : bool, optional
148            If True, get additional metrics from FlashEntropy search, by default True.
149
150        Returns
151        -------
152        None, but adds results to self.spectral_search_results and associates these
153        spectral_search_results with mass_features within the self.mass_features dictionary.
154
155        """
156        # Retrieve parameters from self
157        # include_fragment_types should used for lipids queries only, not general metabolomics
158        include_fragment_types = self.parameters.lc_ms.include_fragment_types
159        min_match_score = self.parameters.lc_ms.ms2_min_fe_score
160
161        # If precursor_mz_list is empty and use_mass_features is True, get precursor m/z values from mass features for each scan in scan_list
162        if use_mass_features and len(precursor_mz_list) == 0:
163            precursor_mz_list = []
164            for scan in scan_list:
165                mf_ids = [
166                    key
167                    for key, value in self.mass_features.items()
168                    if scan in value.ms2_mass_spectra
169                ]
170                precursor_mz = [
171                    value.mz
172                    for key, value in self.mass_features.items()
173                    if key in mf_ids
174                ]
175                precursor_mz_list.append(precursor_mz)
176
177        # Check that precursor_mz_list same length as scan_list, if not, raise error
178        if len(precursor_mz_list) != len(scan_list):
179            raise ValueError("Length of precursor_mz_list is not equal to scan_list.")
180
181        # Loop through each query spectrum / precursor match and save ids of db spectrum that are decent matches
182        overall_results_dict = {}
183        for i in np.arange(len(scan_list)):
184            scan_oi = scan_list[i]
185            if len(self._ms[scan_oi].mspeaks) > 0:
186                precursor_mzs = precursor_mz_list[i]
187                overall_results_dict[scan_oi] = {}
188                for precursor_mz in precursor_mzs:
189                    query_spectrum = fe_lib.clean_spectrum_for_search(
190                        precursor_mz=precursor_mz,
191                        peaks=np.vstack(
192                            (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance)
193                        ).T,
194                        precursor_ions_removal_da=None,
195                        noise_threshold=self._ms[
196                            scan_oi
197                        ].parameters.mass_spectrum.noise_threshold_min_relative_abundance
198                        / 100,
199                        min_ms2_difference_in_da=peak_sep_da,
200                    )
201                    search_results = fe_lib.search(
202                        precursor_mz=precursor_mz,
203                        peaks=query_spectrum,
204                        ms1_tolerance_in_da=self.parameters.mass_spectrum[
205                            "ms1"
206                        ].molecular_search.max_ppm_error
207                        * 10**-6
208                        * precursor_mz,
209                        ms2_tolerance_in_da=peak_sep_da * 0.5,
210                        method={"identity"},
211                        precursor_ions_removal_da=None,
212                        noise_threshold=self._ms[
213                            scan_oi
214                        ].parameters.mass_spectrum.noise_threshold_min_relative_abundance
215                        / 100,
216                        target="cpu",
217                    )["identity_search"]
218                    match_inds = np.where(search_results > min_match_score)[0]
219
220                    # If any decent matches are found, add them to the results dictionary
221                    if len(match_inds) > 0:
222                        match_scores = search_results[match_inds]
223                        ref_ms_ids = [fe_lib[x]["id"] for x in match_inds]
224                        ref_mol_ids = [
225                            fe_lib[x]["molecular_data_id"] for x in match_inds
226                        ]
227                        ref_precursor_mzs = [
228                            fe_lib[x]["precursor_mz"] for x in match_inds
229                        ]
230                        ion_types = [fe_lib[x]["ion_type"] for x in match_inds]
231                        overall_results_dict[scan_oi][precursor_mz] = {
232                            "ref_mol_id": ref_mol_ids,
233                            "ref_ms_id": ref_ms_ids,
234                            "ref_precursor_mz": ref_precursor_mzs,
235                            "precursor_mz_error_ppm": [
236                                (precursor_mz - x) / precursor_mz * 10**6
237                                for x in ref_precursor_mzs
238                            ],
239                            "entropy_similarity": match_scores,
240                            "ref_ion_type": ion_types,
241                        }
242                        if get_additional_metrics:
243                            more_match_quals = [
244                                self.get_more_match_quals(
245                                    self._ms[scan_oi].mz_exp,
246                                    fe_lib[x],
247                                    mz_tol_da=peak_sep_da,
248                                    include_fragment_types=include_fragment_types,
249                                )
250                                for x in match_inds
251                            ]
252                            overall_results_dict[scan_oi][precursor_mz].update(
253                                {
254                                    "query_mz_in_ref_n": [
255                                        x[0] for x in more_match_quals
256                                    ],
257                                    "query_mz_in_ref_fract": [
258                                        x[1] for x in more_match_quals
259                                    ],
260                                    "ref_mz_in_query_n": [
261                                        x[2] for x in more_match_quals
262                                    ],
263                                    "ref_mz_in_query_fract": [
264                                        x[3] for x in more_match_quals
265                                    ],
266                                }
267                            )
268                            if include_fragment_types:
269                                overall_results_dict[scan_oi][precursor_mz].update(
270                                    {
271                                        "query_frag_types": [
272                                            x[4] for x in more_match_quals
273                                        ],
274                                        "ref_frag_types": [
275                                            x[5] for x in more_match_quals
276                                        ],
277                                    }
278                                )
279
280        # Drop scans with no results from dictionary
281        overall_results_dict = {k: v for k, v in overall_results_dict.items() if v}
282
283        # Cast each entry as a MS2SearchResults object
284        for scan_id in overall_results_dict.keys():
285            for precursor_mz in overall_results_dict[scan_id].keys():
286                ms2_spectrum = self._ms[scan_id]
287                ms2_search_results = overall_results_dict[scan_id][precursor_mz]
288                overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults(
289                    ms2_spectrum, precursor_mz, ms2_search_results
290                )
291
292        # Add MS2SearchResults to the existing spectral search results dictionary
293        self.spectral_search_results.update(overall_results_dict)
294
295        # If there are mass features, associate the results with each mass feature
296        if len(self.mass_features) > 0:
297            for mass_feature_id, mass_feature in self.mass_features.items():
298                scan_ids = mass_feature.ms2_scan_numbers
299                for ms2_scan_id in scan_ids:
300                    precursor_mz = mass_feature.mz
301                    try:
302                        self.spectral_search_results[ms2_scan_id][precursor_mz]
303                    except KeyError:
304                        pass
305                    else:
306                        self.mass_features[
307                            mass_feature_id
308                        ].ms2_similarity_results.append(
309                            self.spectral_search_results[ms2_scan_id][precursor_mz]
310                        )
class LCMSSpectralSearch:
  9class LCMSSpectralSearch:
 10    """
 11    Methods for searching LCMS spectra.
 12
 13    This class is designed to be a mixin class for the :obj:`~corems.mass_spectra.factory.lc_class.LCMSBase` class.
 14
 15    """
 16
 17    @staticmethod
 18    def get_more_match_quals(
 19        query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False
 20    ):
 21        """
 22        Return additional match qualities between query and library entry.
 23
 24        Parameters
 25        ----------
 26        query_mz_arr : np.array
 27            Array of query spectrum. Shape (N, 2), with m/z in the first column
 28            and abundance in the second.
 29        lib_entry : dict
 30            Library spectrum entry, with 'mz' key containing the spectrum in
 31            the format (mz, abundance),(mz, abundance), i.e. from MetabRef.
 32        mz_tol_da : float, optional
 33            Tolerance in Da for matching peaks (in MS2). Default is 0.1.
 34        include_fragment_types : bool, optional
 35            If True, include fragment type comparisons in output.
 36            Defaults to False.
 37
 38        Returns
 39        -------
 40        tuple
 41            Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
 42
 43        Notes
 44        -----
 45        query_in_lib : int
 46            Number of peaks in query that are present in the library entry (within mz_tol_da).
 47        query_in_lib_fract : float
 48            Fraction of peaks in query that are present in the library entry (within mz_tol_da).
 49        lib_in_query : int
 50            Number of peaks in the library entry that are present in the query (within mz_tol_da).
 51        lib_in_query_fract : float
 52            Fraction of peaks in the library entry that are present in the query (within mz_tol_da).
 53        query_frags : list
 54            List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both.
 55        lib_frags : list
 56            List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.
 57
 58        Raises
 59        ------
 60        ValueError
 61            If library entry does not have 'fragment_types' key and include_fragment_types is True.
 62
 63        """
 64
 65        # Get the original mz values from the library entry
 66        lib_mzs = np.array(
 67            re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float
 68        ).reshape(-1, 2)[:, 0]
 69
 70        # Get count and fraction of peaks in query that are in lib entry
 71        query_in_lib = 0
 72        for peak in query_mz_arr:
 73            if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)):
 74                query_in_lib += 1
 75        query_in_lib_fract = query_in_lib / len(query_mz_arr)
 76
 77        # Get count and fraction of peaks in lib that are in query
 78        lib_in_query = 0
 79        for peak in lib_mzs:
 80            if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)):
 81                lib_in_query += 1
 82        lib_in_query_fract = lib_in_query / len(lib_mzs)
 83
 84        if include_fragment_types:
 85            # Check that fragment types are present in the library entry
 86            if "fragment_types" not in lib_entry.keys():
 87                raise ValueError(
 88                    "Flash entropy library entry must have 'fragment_types' key to include fragment types in output."
 89                )
 90
 91            # Get types of fragments in the lib entry
 92            lib_frags = lib_entry["fragment_types"]
 93            # make list of the fragment types that are present in the query spectrum
 94            lib_in_query_ids = list(
 95                set(
 96                    [
 97                        ind
 98                        for ind, x in enumerate(lib_mzs)
 99                        if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0])
100                        > 0
101                    ]
102                )
103            )
104            query_frags = list(set([lib_frags[x] for x in lib_in_query_ids]))
105            lib_frags = list(set(lib_frags))
106
107        else:
108            query_frags = None
109            lib_frags = None
110
111        return (
112            query_in_lib,
113            query_in_lib_fract,
114            lib_in_query,
115            lib_in_query_fract,
116            query_frags,
117            lib_frags,
118        )
119
120    def fe_search(
121        self,
122        scan_list,
123        fe_lib,
124        precursor_mz_list=[],
125        use_mass_features=True,
126        peak_sep_da=0.01,
127        get_additional_metrics=True,
128    ):
129        """
130        Search LCMS spectra using a FlashEntropy approach.
131
132        Parameters
133        ----------
134        scan_list : list
135            List of scan numbers to search.
136        fe_lib : :obj:`~ms_entropy.FlashEntropySearch`
137            FlashEntropy Search instance.
138        precursor_mz_list : list, optional
139            List of precursor m/z values to search, by default [], which implies
140            matched with mass features; to enable this use_mass_features must be True.
141        use_mass_features : bool, optional
142            If True, use mass features to get precursor m/z values, by default True.
143            If True, will add search results to mass features' ms2_similarity_results attribute.
144        peak_sep_da : float, optional
145            Minimum separation between m/z peaks spectra in Da. This needs match the
146            approximate resolution of the search spectra and the FlashEntropySearch
147            instance, by default 0.01.
148        get_additional_metrics : bool, optional
149            If True, get additional metrics from FlashEntropy search, by default True.
150
151        Returns
152        -------
153        None, but adds results to self.spectral_search_results and associates these
154        spectral_search_results with mass_features within the self.mass_features dictionary.
155
156        """
157        # Retrieve parameters from self
158        # include_fragment_types should used for lipids queries only, not general metabolomics
159        include_fragment_types = self.parameters.lc_ms.include_fragment_types
160        min_match_score = self.parameters.lc_ms.ms2_min_fe_score
161
162        # If precursor_mz_list is empty and use_mass_features is True, get precursor m/z values from mass features for each scan in scan_list
163        if use_mass_features and len(precursor_mz_list) == 0:
164            precursor_mz_list = []
165            for scan in scan_list:
166                mf_ids = [
167                    key
168                    for key, value in self.mass_features.items()
169                    if scan in value.ms2_mass_spectra
170                ]
171                precursor_mz = [
172                    value.mz
173                    for key, value in self.mass_features.items()
174                    if key in mf_ids
175                ]
176                precursor_mz_list.append(precursor_mz)
177
178        # Check that precursor_mz_list same length as scan_list, if not, raise error
179        if len(precursor_mz_list) != len(scan_list):
180            raise ValueError("Length of precursor_mz_list is not equal to scan_list.")
181
182        # Loop through each query spectrum / precursor match and save ids of db spectrum that are decent matches
183        overall_results_dict = {}
184        for i in np.arange(len(scan_list)):
185            scan_oi = scan_list[i]
186            if len(self._ms[scan_oi].mspeaks) > 0:
187                precursor_mzs = precursor_mz_list[i]
188                overall_results_dict[scan_oi] = {}
189                for precursor_mz in precursor_mzs:
190                    query_spectrum = fe_lib.clean_spectrum_for_search(
191                        precursor_mz=precursor_mz,
192                        peaks=np.vstack(
193                            (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance)
194                        ).T,
195                        precursor_ions_removal_da=None,
196                        noise_threshold=self._ms[
197                            scan_oi
198                        ].parameters.mass_spectrum.noise_threshold_min_relative_abundance
199                        / 100,
200                        min_ms2_difference_in_da=peak_sep_da,
201                    )
202                    search_results = fe_lib.search(
203                        precursor_mz=precursor_mz,
204                        peaks=query_spectrum,
205                        ms1_tolerance_in_da=self.parameters.mass_spectrum[
206                            "ms1"
207                        ].molecular_search.max_ppm_error
208                        * 10**-6
209                        * precursor_mz,
210                        ms2_tolerance_in_da=peak_sep_da * 0.5,
211                        method={"identity"},
212                        precursor_ions_removal_da=None,
213                        noise_threshold=self._ms[
214                            scan_oi
215                        ].parameters.mass_spectrum.noise_threshold_min_relative_abundance
216                        / 100,
217                        target="cpu",
218                    )["identity_search"]
219                    match_inds = np.where(search_results > min_match_score)[0]
220
221                    # If any decent matches are found, add them to the results dictionary
222                    if len(match_inds) > 0:
223                        match_scores = search_results[match_inds]
224                        ref_ms_ids = [fe_lib[x]["id"] for x in match_inds]
225                        ref_mol_ids = [
226                            fe_lib[x]["molecular_data_id"] for x in match_inds
227                        ]
228                        ref_precursor_mzs = [
229                            fe_lib[x]["precursor_mz"] for x in match_inds
230                        ]
231                        ion_types = [fe_lib[x]["ion_type"] for x in match_inds]
232                        overall_results_dict[scan_oi][precursor_mz] = {
233                            "ref_mol_id": ref_mol_ids,
234                            "ref_ms_id": ref_ms_ids,
235                            "ref_precursor_mz": ref_precursor_mzs,
236                            "precursor_mz_error_ppm": [
237                                (precursor_mz - x) / precursor_mz * 10**6
238                                for x in ref_precursor_mzs
239                            ],
240                            "entropy_similarity": match_scores,
241                            "ref_ion_type": ion_types,
242                        }
243                        if get_additional_metrics:
244                            more_match_quals = [
245                                self.get_more_match_quals(
246                                    self._ms[scan_oi].mz_exp,
247                                    fe_lib[x],
248                                    mz_tol_da=peak_sep_da,
249                                    include_fragment_types=include_fragment_types,
250                                )
251                                for x in match_inds
252                            ]
253                            overall_results_dict[scan_oi][precursor_mz].update(
254                                {
255                                    "query_mz_in_ref_n": [
256                                        x[0] for x in more_match_quals
257                                    ],
258                                    "query_mz_in_ref_fract": [
259                                        x[1] for x in more_match_quals
260                                    ],
261                                    "ref_mz_in_query_n": [
262                                        x[2] for x in more_match_quals
263                                    ],
264                                    "ref_mz_in_query_fract": [
265                                        x[3] for x in more_match_quals
266                                    ],
267                                }
268                            )
269                            if include_fragment_types:
270                                overall_results_dict[scan_oi][precursor_mz].update(
271                                    {
272                                        "query_frag_types": [
273                                            x[4] for x in more_match_quals
274                                        ],
275                                        "ref_frag_types": [
276                                            x[5] for x in more_match_quals
277                                        ],
278                                    }
279                                )
280
281        # Drop scans with no results from dictionary
282        overall_results_dict = {k: v for k, v in overall_results_dict.items() if v}
283
284        # Cast each entry as a MS2SearchResults object
285        for scan_id in overall_results_dict.keys():
286            for precursor_mz in overall_results_dict[scan_id].keys():
287                ms2_spectrum = self._ms[scan_id]
288                ms2_search_results = overall_results_dict[scan_id][precursor_mz]
289                overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults(
290                    ms2_spectrum, precursor_mz, ms2_search_results
291                )
292
293        # Add MS2SearchResults to the existing spectral search results dictionary
294        self.spectral_search_results.update(overall_results_dict)
295
296        # If there are mass features, associate the results with each mass feature
297        if len(self.mass_features) > 0:
298            for mass_feature_id, mass_feature in self.mass_features.items():
299                scan_ids = mass_feature.ms2_scan_numbers
300                for ms2_scan_id in scan_ids:
301                    precursor_mz = mass_feature.mz
302                    try:
303                        self.spectral_search_results[ms2_scan_id][precursor_mz]
304                    except KeyError:
305                        pass
306                    else:
307                        self.mass_features[
308                            mass_feature_id
309                        ].ms2_similarity_results.append(
310                            self.spectral_search_results[ms2_scan_id][precursor_mz]
311                        )

Methods for searching LCMS spectra.

This class is designed to be a mixin class for the ~corems.mass_spectra.factory.lc_class.LCMSBase class.

@staticmethod
def get_more_match_quals(query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False):
 17    @staticmethod
 18    def get_more_match_quals(
 19        query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False
 20    ):
 21        """
 22        Return additional match qualities between query and library entry.
 23
 24        Parameters
 25        ----------
 26        query_mz_arr : np.array
 27            Array of query spectrum. Shape (N, 2), with m/z in the first column
 28            and abundance in the second.
 29        lib_entry : dict
 30            Library spectrum entry, with 'mz' key containing the spectrum in
 31            the format (mz, abundance),(mz, abundance), i.e. from MetabRef.
 32        mz_tol_da : float, optional
 33            Tolerance in Da for matching peaks (in MS2). Default is 0.1.
 34        include_fragment_types : bool, optional
 35            If True, include fragment type comparisons in output.
 36            Defaults to False.
 37
 38        Returns
 39        -------
 40        tuple
 41            Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
 42
 43        Notes
 44        -----
 45        query_in_lib : int
 46            Number of peaks in query that are present in the library entry (within mz_tol_da).
 47        query_in_lib_fract : float
 48            Fraction of peaks in query that are present in the library entry (within mz_tol_da).
 49        lib_in_query : int
 50            Number of peaks in the library entry that are present in the query (within mz_tol_da).
 51        lib_in_query_fract : float
 52            Fraction of peaks in the library entry that are present in the query (within mz_tol_da).
 53        query_frags : list
 54            List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both.
 55        lib_frags : list
 56            List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.
 57
 58        Raises
 59        ------
 60        ValueError
 61            If library entry does not have 'fragment_types' key and include_fragment_types is True.
 62
 63        """
 64
 65        # Get the original mz values from the library entry
 66        lib_mzs = np.array(
 67            re.findall(r"\(([^,]+),([^)]+)\)", lib_entry["mz"]), dtype=float
 68        ).reshape(-1, 2)[:, 0]
 69
 70        # Get count and fraction of peaks in query that are in lib entry
 71        query_in_lib = 0
 72        for peak in query_mz_arr:
 73            if np.any(np.isclose(lib_mzs, peak, atol=mz_tol_da)):
 74                query_in_lib += 1
 75        query_in_lib_fract = query_in_lib / len(query_mz_arr)
 76
 77        # Get count and fraction of peaks in lib that are in query
 78        lib_in_query = 0
 79        for peak in lib_mzs:
 80            if np.any(np.isclose(query_mz_arr, peak, atol=mz_tol_da)):
 81                lib_in_query += 1
 82        lib_in_query_fract = lib_in_query / len(lib_mzs)
 83
 84        if include_fragment_types:
 85            # Check that fragment types are present in the library entry
 86            if "fragment_types" not in lib_entry.keys():
 87                raise ValueError(
 88                    "Flash entropy library entry must have 'fragment_types' key to include fragment types in output."
 89                )
 90
 91            # Get types of fragments in the lib entry
 92            lib_frags = lib_entry["fragment_types"]
 93            # make list of the fragment types that are present in the query spectrum
 94            lib_in_query_ids = list(
 95                set(
 96                    [
 97                        ind
 98                        for ind, x in enumerate(lib_mzs)
 99                        if len(np.where(np.isclose(query_mz_arr, x, atol=mz_tol_da))[0])
100                        > 0
101                    ]
102                )
103            )
104            query_frags = list(set([lib_frags[x] for x in lib_in_query_ids]))
105            lib_frags = list(set(lib_frags))
106
107        else:
108            query_frags = None
109            lib_frags = None
110
111        return (
112            query_in_lib,
113            query_in_lib_fract,
114            lib_in_query,
115            lib_in_query_fract,
116            query_frags,
117            lib_frags,
118        )

Return additional match qualities between query and library entry.

Parameters
  • query_mz_arr (np.array): Array of query spectrum. Shape (N, 2), with m/z in the first column and abundance in the second.
  • lib_entry (dict): Library spectrum entry, with 'mz' key containing the spectrum in the format (mz, abundance),(mz, abundance), i.e. from MetabRef.
  • mz_tol_da (float, optional): Tolerance in Da for matching peaks (in MS2). Default is 0.1.
  • include_fragment_types (bool, optional): If True, include fragment type comparisons in output. Defaults to False.
Returns
  • tuple: Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
Notes

query_in_lib : int Number of peaks in query that are present in the library entry (within mz_tol_da). query_in_lib_fract : float Fraction of peaks in query that are present in the library entry (within mz_tol_da). lib_in_query : int Number of peaks in the library entry that are present in the query (within mz_tol_da). lib_in_query_fract : float Fraction of peaks in the library entry that are present in the query (within mz_tol_da). query_frags : list List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both. lib_frags : list List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.

Raises
  • ValueError: If library entry does not have 'fragment_types' key and include_fragment_types is True.