corems.chroma_peak.factory.chroma_peak_classes

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 12, 2019"
  3
  4import matplotlib.pyplot as plt
  5import numpy as np
  6import pandas as pd
  7import copy
  8
  9from corems.chroma_peak.calc.ChromaPeakCalc import (
 10    GCPeakCalculation,
 11    LCMSMassFeatureCalculation,
 12)
 13from corems.mass_spectra.factory.chromat_data import EIC_Data
 14from corems.molecular_id.factory.EI_SQL import LowResCompoundRef
 15
 16
 17class ChromaPeakBase:
 18    """Base class for chromatographic peak (ChromaPeak) objects.
 19
 20    Parameters
 21    -------
 22    chromatogram_parent : Chromatogram
 23        The parent chromatogram object.
 24    mass_spectrum_obj : MassSpectrum
 25        The mass spectrum object.
 26    start_index : int
 27        The start index of the peak.
 28    index : int
 29        The index of the peak.
 30    final_index : int
 31        The final index of the peak.
 32
 33    Attributes
 34    --------
 35    start_scan : int
 36        The start scan of the peak.
 37    final_scan : int
 38        The final scan of the peak.
 39    apex_scan : int
 40        The apex scan of the peak.
 41    chromatogram_parent : Chromatogram
 42        The parent chromatogram object.
 43    mass_spectrum : MassSpectrum
 44        The mass spectrum object.
 45    _area : float
 46        The area of the peak.
 47
 48    Properties
 49    --------
 50    * retention_time : float.
 51        The retention time of the peak.
 52    * tic : float.
 53        The total ion current of the peak.
 54    * area : float.
 55        The area of the peak.
 56    * rt_list : list.
 57        The list of retention times within the peak.
 58    * tic_list : list.
 59        The list of total ion currents within the peak.
 60
 61    Methods
 62    --------
 63    * None
 64    """
 65
 66    def __init__(
 67        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
 68    ):
 69        self.start_scan = start_index
 70        self.final_scan = final_index
 71        self.apex_scan = int(index)
 72        self.chromatogram_parent = chromatogram_parent
 73        self.mass_spectrum = mass_spectrum_obj
 74        self._area = None
 75
 76    @property
 77    def retention_time(self):
 78        """Retention Time"""
 79        return self.mass_spectrum.retention_time
 80
 81    @property
 82    def tic(self):
 83        """Total Ion Current"""
 84        return self.mass_spectrum.tic
 85
 86    @property
 87    def area(self):
 88        """Peak Area"""
 89        return self._area
 90
 91    @property
 92    def rt_list(self):
 93        """Retention Time List"""
 94        return [
 95            self.chromatogram_parent.retention_time[i]
 96            for i in range(self.start_scan, self.final_scan + 1)
 97        ]
 98
 99    @property
100    def tic_list(self):
101        """Total Ion Current List"""
102        return [
103            self.chromatogram_parent.tic[i]
104            for i in range(self.start_scan, self.final_scan + 1)
105        ]
106
107
108class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation):
109    """Class representing a mass feature in a liquid chromatography (LC) chromatogram.
110
111    Parameters
112    -------
113    lcms_parent : LCMS
114        The parent LCMSBase object.
115    mz : float
116        The observed mass to charge ratio of the feature.
117    retention_time : float
118        The retention time of the feature (in minutes), at the apex.
119    intensity : float
120        The intensity of the feature.
121    apex_scan : int
122        The scan number of the apex of the feature.
123    persistence : float, optional
124        The persistence of the feature. Default is None.
125
126    Attributes
127    --------
128    _mz_exp : float
129        The observed mass to charge ratio of the feature.
130    _mz_cal : float
131        The calibrated mass to charge ratio of the feature.
132    _retention_time : float
133        The retention time of the feature (in minutes), at the apex.
134    _apex_scan : int
135        The scan number of the apex of the feature.
136    _intensity : float
137        The intensity of the feature.
138    _persistence : float
139        The persistence of the feature.
140    _eic_data : EIC_Data
141        The EIC data object associated with the feature.
142    _dispersity_index : float
143        The dispersity index of the feature, in minutes.
144    _normalized_dispersity_index : float
145        The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index).
146    _half_height_width : numpy.ndarray
147        The half height width of the feature (in minutes, as an array of min and max values).
148    _tailing_factor : float
149        The tailing factor of the feature.
150        > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
151    _noise_score : tuple
152        The noise score of the feature, as a tuple of (left, right) scores.
153        Each score is a float, with higher values indicating better signal to noise.
154    _gaussian_similarity : float
155        The Gaussian similarity of the feature, as a float between 0 and 1.
156        1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape.
157    _ms_deconvoluted_idx : [int]
158        The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
159    is_calibrated : bool
160        If True, the feature has been calibrated. Default is False.
161    monoisotopic_mf_id : int
162        Mass feature id that is the monoisotopic version of self.
163        If self.id, then self is the monoisotopic feature). Default is None.
164    isotopologue_type : str
165        The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc.
166        Default is None.
167    ms2_scan_numbers : list
168        List of scan numbers of the MS2 spectra associated with the feature.
169        Default is an empty list.
170    ms2_mass_spectra : dict
171        Dictionary of MS2 spectra associated with the feature (key = scan number for DDA).
172        Default is an empty dictionary.
173    ms2_similarity_results : list
174        List of MS2 similarity results associated with the mass feature.
175        Default is an empty list.
176    id : int
177        The ID of the feature, also the key in the parent LCMS object's
178        `mass_features` dictionary.
179    mass_spectrum_deconvoluted_parent : bool
180        If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
181    associated_mass_features_deconvoluted : list
182        List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
183
184    """
185
186    def __init__(
187        self,
188        lcms_parent,
189        mz: float,
190        retention_time: float,
191        intensity: float,
192        apex_scan: int,
193        persistence: float = None,
194        id: int = None,
195    ):
196        super().__init__(
197            chromatogram_parent=lcms_parent,
198            mass_spectrum_obj=None,
199            start_index=None,
200            index=apex_scan,
201            final_index=None,
202        )
203        # Core attributes, marked as private
204        self._mz_exp: float = mz
205        self._mz_cal: float = None
206        self._retention_time: float = retention_time
207        self._apex_scan: int = apex_scan
208        self._intensity: float = intensity
209        self._persistence: float = persistence
210        self._eic_data: EIC_Data = None
211        self._dispersity_index: float = None
212        self._normalized_dispersity_index: float = None
213        self._half_height_width: np.ndarray = None
214        self._ms_deconvoluted_idx = None
215        self._tailing_factor: float = None
216        self._noise_score: tuple = None
217        self._gaussian_similarity: float = None
218
219        # Additional attributes
220        self.monoisotopic_mf_id = None
221        self.isotopologue_type = None
222        self.ms2_scan_numbers = []
223        self.ms2_mass_spectra = {}
224        self.ms2_similarity_results = []
225        self.mass_spectrum_deconvoluted_parent: bool = None
226        self.associated_mass_features_deconvoluted = []
227
228        if id:
229            self.id = id
230        else:
231            # get the parent's mass feature keys and add 1 to the max value to get the new key
232            self.id = (
233                max(lcms_parent.mass_features.keys()) + 1
234                if lcms_parent.mass_features.keys()
235                else 0
236            )
237
238    def update_mz(self):
239        """Update the mass to charge ratio from the mass spectrum object."""
240        if self.mass_spectrum is None:
241            raise ValueError(
242                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
243            )
244        if len(self.mass_spectrum.mz_exp) == 0:
245            raise ValueError(
246                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
247            )
248        new_mz = self.ms1_peak.mz_exp
249
250        # calculate the difference between the new and old m/z, only update if it is close
251        mz_diff = new_mz - self.mz
252        if abs(mz_diff) < 0.01:
253            self._mz_exp = new_mz
254
255    def plot(
256        self,
257        to_plot=["EIC", "MS1", "MS2"],
258        return_fig=True,
259        plot_smoothed_eic=False,
260        plot_eic_datapoints=False,
261    ):
262        """Plot the mass feature.
263
264        Parameters
265        ----------
266        to_plot : list, optional
267            List of strings specifying what to plot, any iteration of
268            "EIC", "MS2", and "MS1".
269            Default is ["EIC", "MS1", "MS2"].
270        return_fig : bool, optional
271            If True, the figure is returned. Default is True.
272        plot_smoothed_eic : bool, optional
273            If True, the smoothed EIC is plotted. Default is False.
274        plot_eic_datapoints : bool, optional
275            If True, the EIC data points are plotted. Default is False.
276
277        Returns
278        -------
279        matplotlib.figure.Figure or None
280            The figure object if `return_fig` is True.
281            Otherwise None and the figure is displayed.
282        """
283
284        # EIC plot preparation
285        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
286
287        # Adjust to_plot list if there are not spectra added to the mass features
288        if self.mass_spectrum is None:
289            to_plot = [x for x in to_plot if x != "MS1"]
290        if len(self.ms2_mass_spectra) == 0:
291            to_plot = [x for x in to_plot if x != "MS2"]
292        if self._eic_data is None:
293            to_plot = [x for x in to_plot if x != "EIC"]
294        if self._ms_deconvoluted_idx is not None:
295            deconvoluted = True
296        else:
297            deconvoluted = False
298
299        fig, axs = plt.subplots(
300            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
301        )
302        fig.suptitle(
303            "Mass Feature "
304            + str(self.id)
305            + ": m/z = "
306            + str(round(self.mz, ndigits=4))
307            + "; time = "
308            + str(round(self.retention_time, ndigits=1))
309            + " minutes"
310        )
311
312        i = 0
313        # EIC plot
314        if "EIC" in to_plot:
315            if self._eic_data is None:
316                raise ValueError(
317                    "EIC data is not available, cannot plot the mass feature's EIC"
318                )
319            axs[i][0].set_title("EIC", loc="left")
320            axs[i][0].plot(
321                self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC"
322            )
323            if plot_eic_datapoints:
324                axs[i][0].scatter(
325                    self._eic_data.time,
326                    self._eic_data.eic,
327                    c="tab:blue",
328                    label="EIC Data Points",
329                )
330            if plot_smoothed_eic:
331                axs[i][0].plot(
332                    self._eic_data.time,
333                    self._eic_data.eic_smoothed,
334                    c="tab:red",
335                    label="Smoothed EIC",
336                )
337            if self.start_scan is not None:
338                axs[i][0].fill_between(
339                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
340                )
341            else:
342                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
343                    print(
344                        "No start and final scan numbers were provided for mass feature "
345                        + str(self.id)
346                    )
347            axs[i][0].set_ylabel("Intensity")
348            axs[i][0].set_xlabel("Time (minutes)")
349            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
350            axs[i][0].set_xlim(
351                self.retention_time - eic_buffer_time,
352                self.retention_time + eic_buffer_time,
353            )
354            axs[i][0].axvline(
355                x=self.retention_time, color="k", label="MS1 scan time (apex)"
356            )
357            if len(self.ms2_scan_numbers) > 0:
358                axs[i][0].axvline(
359                    x=self.chromatogram_parent.get_time_of_scan_id(
360                        self.best_ms2.scan_number
361                    ),
362                    color="grey",
363                    linestyle="--",
364                    label="MS2 scan time",
365                )
366            axs[i][0].legend(loc="upper left")
367            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
368            i += 1
369
370        # MS1 plot
371        if "MS1" in to_plot:
372            if deconvoluted:
373                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
374                axs[i][0].vlines(
375                    self.mass_spectrum.mz_exp,
376                    0,
377                    self.mass_spectrum.abundance,
378                    color="k",
379                    alpha=0.2,
380                    label="Raw MS1",
381                )
382                axs[i][0].vlines(
383                    self.mass_spectrum_deconvoluted.mz_exp,
384                    0,
385                    self.mass_spectrum_deconvoluted.abundance,
386                    color="k",
387                    label="Deconvoluted MS1",
388                )
389                axs[i][0].set_xlim(
390                    self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8,
391                    self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1,
392                )
393                axs[i][0].set_ylim(
394                    0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1
395                )
396            else:
397                axs[i][0].set_title("MS1 (raw)", loc="left")
398                axs[i][0].vlines(
399                    self.mass_spectrum.mz_exp,
400                    0,
401                    self.mass_spectrum.abundance,
402                    color="k",
403                    label="Raw MS1",
404                )
405                axs[i][0].set_xlim(
406                    self.mass_spectrum.mz_exp.min() * 0.8,
407                    self.mass_spectrum.mz_exp.max() * 1.1,
408                )
409                axs[i][0].set_ylim(bottom=0)
410
411            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
412                axs[i][0].vlines(
413                    self.ms1_peak.mz_exp,
414                    0,
415                    self.ms1_peak.abundance,
416                    color="m",
417                    label="Feature m/z",
418                )
419
420            else:
421                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
422                    print(
423                        "The m/z of the mass feature "
424                        + str(self.id)
425                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
426                    )
427            axs[i][0].legend(loc="upper left")
428            axs[i][0].set_ylabel("Intensity")
429            axs[i][0].set_xlabel("m/z")
430            axs[i][0].yaxis.set_tick_params(labelleft=False)
431            i += 1
432
433        # MS2 plot
434        if "MS2" in to_plot:
435            axs[i][0].set_title("MS2", loc="left")
436            axs[i][0].vlines(
437                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
438            )
439            axs[i][0].set_ylabel("Intensity")
440            axs[i][0].set_xlabel("m/z")
441            axs[i][0].set_ylim(bottom=0)
442            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
443            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
444            axs[i][0].set_xlim(
445                self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1
446            )
447            axs[i][0].yaxis.set_tick_params(labelleft=False)
448
449        # Add space between subplots
450        plt.tight_layout()
451
452        if return_fig:
453            # Close figure
454            plt.close(fig)
455            return fig
456
457    @property
458    def mz(self):
459        """Mass to charge ratio of the mass feature"""
460        # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z
461        if self._mz_cal is not None:
462            return self._mz_cal
463        else:
464            return self._mz_exp
465
466    @property
467    def mass_spectrum_deconvoluted(self):
468        """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed."""
469        if self._ms_deconvoluted_idx is not None:
470            ms_deconvoluted = copy.deepcopy(self.mass_spectrum)
471            ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx)
472            return ms_deconvoluted
473        else:
474            raise ValueError(
475                "Deconvolution has not been performed for mass feature " + str(self.id)
476            )
477
478    @property
479    def retention_time(self):
480        """Retention time of the mass feature"""
481        return self._retention_time
482
483    @retention_time.setter
484    def retention_time(self, value):
485        """Set the retention time of the mass feature"""
486        if not isinstance(value, float):
487            raise ValueError("The retention time of the mass feature must be a float")
488        self._retention_time = value
489
490    @property
491    def apex_scan(self):
492        """Apex scan of the mass feature"""
493        return self._apex_scan
494
495    @apex_scan.setter
496    def apex_scan(self, value):
497        """Set the apex scan of the mass feature"""
498        if not isinstance(value, int):
499            raise ValueError("The apex scan of the mass feature must be an integer")
500        self._apex_scan = value
501
502    @property
503    def intensity(self):
504        """Intensity of the mass feature"""
505        return self._intensity
506
507    @intensity.setter
508    def intensity(self, value):
509        """Set the intensity of the mass feature"""
510        if not isinstance(value, float):
511            raise ValueError("The intensity of the mass feature must be a float")
512        self._intensity = value
513
514    @property
515    def persistence(self):
516        """Persistence of the mass feature"""
517        return self._persistence
518
519    @persistence.setter
520    def persistence(self, value):
521        """Set the persistence of the mass feature"""
522        if not isinstance(value, float):
523            raise ValueError("The persistence of the mass feature must be a float")
524        self._persistence = value
525
526    @property
527    def eic_rt_list(self):
528        """Retention time list between the beginning and end of the mass feature"""
529        # Find index of the start and final scans in the EIC data
530        start_index = self._eic_data.scans.tolist().index(self.start_scan)
531        final_index = self._eic_data.scans.tolist().index(self.final_scan)
532
533        # Get the retention time list
534        rt_list = self._eic_data.time[start_index : final_index + 1]
535        return rt_list
536
537    @property
538    def eic_list(self):
539        """EIC List between the beginning and end of the mass feature"""
540        # Find index of the start and final scans in the EIC data
541        start_index = self._eic_data.scans.tolist().index(self.start_scan)
542        final_index = self._eic_data.scans.tolist().index(self.final_scan)
543
544        # Get the retention time list
545        eic = self._eic_data.eic[start_index : final_index + 1]
546        return eic
547
548    @property
549    def ms1_peak(self):
550        """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z"""
551        # Find index array self.mass_spectrum.mz_exp that is closest to self.mz
552        closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz))
553        closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz)
554
555        return self.mass_spectrum._mspeaks[closest_mz_index]
556
557    @property
558    def tailing_factor(self):
559        """Tailing factor of the mass feature"""
560        return self._tailing_factor
561
562    @tailing_factor.setter
563    def tailing_factor(self, value):
564        """Set the tailing factor of the mass feature"""
565        if not isinstance(value, float):
566            raise ValueError("The tailing factor of the mass feature must be a float")
567        self._tailing_factor = value
568
569    @property
570    def dispersity_index(self):
571        """Dispersity index of the mass feature"""
572        return self._dispersity_index
573
574    @dispersity_index.setter
575    def dispersity_index(self, value):
576        """Set the dispersity index of the mass feature"""
577        if not isinstance(value, float):
578            raise ValueError("The dispersity index of the mass feature must be a float")
579        self._dispersity_index = value
580
581    @property
582    def normalized_dispersity_index(self):
583        """Normalized dispersity index of the mass feature, unitless (fraction of total window used)"""
584        return self._normalized_dispersity_index
585
586    @property
587    def half_height_width(self):
588        """Half height width of the mass feature, average of min and max values, in minutes"""
589        return np.mean(self._half_height_width)
590
591    @property
592    def noise_score(self):
593        """Mean of left and right noise scores.
594
595        Returns
596        -------
597        float or np.nan
598            Mean noise score, or np.nan if both sides are np.nan.
599        """
600        if self._noise_score is None:
601            return np.nan
602
603        left, right = self._noise_score
604        # Handle NaN values
605        if np.isnan(left) and np.isnan(right):
606            return np.nan
607        elif np.isnan(left):
608            return right
609        elif np.isnan(right):
610            return left
611        else:
612            return (left + right) / 2.0
613
614    @property
615    def noise_score_min(self):
616        """Minimum of left and right noise scores.
617
618        Returns
619        -------
620        float or np.nan
621            Minimum noise score, or np.nan if both sides are np.nan.
622        """
623        if self._noise_score is None:
624            return np.nan
625
626        left, right = self._noise_score
627        # Handle NaN values - nanmin ignores NaN
628        return np.nanmin([left, right])
629
630    @property
631    def noise_score_max(self):
632        """Maximum of left and right noise scores.
633
634        Returns
635        -------
636        float or np.nan
637            Maximum noise score, or np.nan if both sides are np.nan.
638        """
639        if self._noise_score is None:
640            return np.nan
641
642        left, right = self._noise_score
643        # Handle NaN values - nanmax ignores NaN
644        return np.nanmax([left, right])
645
646    @property
647    def best_ms2(self):
648        """Points to the best representative MS2 mass spectrum
649
650        Notes
651        -----
652        If there is only one MS2 mass spectrum, it will be returned
653        If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.
654        If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power.  Checks for and disqualifies possible chimeric spectra.
655
656        Returns
657        -------
658        MassSpectrum or None
659            The best MS2 mass spectrum.
660        """
661        if len(self.ms2_similarity_results) > 0:
662            # the scan number with the highest similarity score
663            results_df = [x.to_dataframe() for x in self.ms2_similarity_results]
664            results_df = pd.concat(results_df)
665            results_df = results_df.sort_values(
666                by="entropy_similarity", ascending=False
667            )
668            best_scan_number = results_df.iloc[0]["query_spectrum_id"]
669            return self.ms2_mass_spectra[best_scan_number]
670
671        ms2_scans = list(self.ms2_mass_spectra.keys())
672        if len(ms2_scans) > 1:
673            mz_diff_list = []  # List of mz difference between mz of mass feature and mass of nearest mz in each scan
674            res_list = []  # List of maximum resolving power of peaks in each scan
675            time_diff_list = []  # List of time difference between scan and apex scan in each scan
676            for scan in ms2_scans:
677                if len(self.ms2_mass_spectra[scan].mspeaks) > 0:
678                    # Find mz closest to mass feature mz, return both the difference in mass and its resolution
679                    closest_mz = min(
680                        self.ms2_mass_spectra[scan].mz_exp,
681                        key=lambda x: abs(x - self.mz),
682                    )
683                    if all(
684                        np.isnan(self.ms2_mass_spectra[scan].resolving_power)
685                    ):  # All NA for resolving power in peaks, not uncommon in CID spectra
686                        res_list.append(2)  # Assumes very low resolving power
687                    else:
688                        res_list.append(
689                            np.nanmax(self.ms2_mass_spectra[scan].resolving_power)
690                        )
691                    mz_diff_list.append(np.abs(closest_mz - self.mz))
692                    time_diff_list.append(
693                        np.abs(
694                            self.chromatogram_parent.get_time_of_scan_id(scan)
695                            - self.retention_time
696                        )
697                    )
698                else:
699                    res_list.append(np.nan)
700                    mz_diff_list.append(np.nan)
701                    time_diff_list.append(np.nan)
702            # Convert diff_lists into logical scores (higher is better for each score)
703            time_score = 1 - np.array(time_diff_list) / np.nanmax(
704                np.array(time_diff_list)
705            )
706            res_score = np.array(res_list) / np.nanmax(np.array(res_list))
707            # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning)
708            mz_score = np.zeros(len(ms2_scans))
709            for i in np.arange(0, len(ms2_scans)):
710                if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1:  # Possible chimeric
711                    mz_score[i] = 0
712                else:
713                    mz_score[i] = 1
714            # get the index of the best score and return the mass spectrum
715            if len([np.nanargmax(time_score * res_score * mz_score)]) == 1:
716                return self.ms2_mass_spectra[
717                    ms2_scans[np.nanargmax(time_score * res_score * mz_score)]
718                ]
719            # remove the mz_score condition and try again
720            elif len(np.argmax(time_score * res_score)) == 1:
721                return self.ms2_mass_spectra[
722                    ms2_scans[np.nanargmax(time_score * res_score)]
723                ]
724            else:
725                raise ValueError(
726                    "No best MS2 mass spectrum could be found for mass feature "
727                    + str(self.id)
728                )
729        elif len(ms2_scans) == 1:  # if only one ms2 spectra, return it
730            return self.ms2_mass_spectra[ms2_scans[0]]
731        else:  # if no ms2 spectra, return None
732            return None
733
734
735class GCPeak(ChromaPeakBase, GCPeakCalculation):
736    """Class representing a peak in a gas chromatography (GC) chromatogram.
737
738    Parameters
739    ----------
740    chromatogram_parent : Chromatogram
741        The parent chromatogram object.
742    mass_spectrum_obj : MassSpectrum
743        The mass spectrum object associated with the peak.
744    indexes : tuple
745        The indexes of the peak in the chromatogram.
746
747    Attributes
748    ----------
749    _compounds : list
750        List of compounds associated with the peak.
751    _ri : float or None
752        Retention index of the peak.
753
754    Methods
755    -------
756    * __len__(). Returns the number of compounds associated with the peak.
757    * __getitem__(position).  Returns the compound at the specified position.
758    * remove_compound(compounds_obj). Removes the specified compound from the peak.
759    * clear_compounds(). Removes all compounds from the peak.
760    * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
761    * ri().  Returns the retention index of the peak.
762    * highest_ss_compound(). Returns the compound with the highest spectral similarity score.
763    * highest_score_compound(). Returns the compound with the highest similarity score.
764    * compound_names(). Returns a list of names of compounds associated with the peak.
765    """
766
767    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
768        self._compounds = []
769        self._ri = None
770        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
771
772    def __len__(self):
773        return len(self._compounds)
774
775    def __getitem__(self, position):
776        return self._compounds[position]
777
778    def remove_compound(self, compounds_obj):
779        self._compounds.remove(compounds_obj)
780
781    def clear_compounds(self):
782        self._compounds = []
783
784    def add_compound(
785        self,
786        compounds_dict,
787        spectral_similarity_scores,
788        ri_score=None,
789        similarity_score=None,
790    ):
791        """Adds a compound to the peak with the specified attributes.
792
793        Parameters
794        ----------
795        compounds_dict : dict
796            Dictionary containing the compound information.
797        spectral_similarity_scores : dict
798            Dictionary containing the spectral similarity scores.
799        ri_score : float or None, optional
800            The retention index score of the compound. Default is None.
801        similarity_score : float or None, optional
802            The similarity score of the compound. Default is None.
803        """
804        compound_obj = LowResCompoundRef(compounds_dict)
805        compound_obj.spectral_similarity_scores = spectral_similarity_scores
806        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
807            "cosine_correlation"
808        )
809        # TODO check is the above line correct?
810        compound_obj.ri_score = ri_score
811        compound_obj.similarity_score = similarity_score
812        self._compounds.append(compound_obj)
813        if similarity_score:
814            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
815        else:
816            self._compounds.sort(
817                key=lambda c: c.spectral_similarity_score, reverse=True
818            )
819
820    @property
821    def ri(self):
822        """Returns the retention index of the peak.
823
824        Returns
825        -------
826        float or None
827            The retention index of the peak.
828        """
829        return self._ri
830
831    @property
832    def highest_ss_compound(self):
833        """Returns the compound with the highest spectral similarity score.
834
835        Returns
836        -------
837        LowResCompoundRef or None
838            The compound with the highest spectral similarity score.
839        """
840        if self:
841            return max(self, key=lambda c: c.spectral_similarity_score)
842        else:
843            return None
844
845    @property
846    def highest_score_compound(self):
847        """Returns the compound with the highest similarity score.
848
849        Returns
850        -------
851        LowResCompoundRef or None
852            The compound with the highest similarity score.
853        """
854        if self:
855            return max(self, key=lambda c: c.similarity_score)
856        else:
857            return None
858
859    @property
860    def compound_names(self):
861        """Returns a list of names of compounds associated with the peak.
862
863        Returns
864        -------
865        list
866            List of names of compounds associated with the peak.
867        """
868        if self:
869            return [c.name for c in self]
870        else:
871            return []
872
873
874class GCPeakDeconvolved(GCPeak):
875    """Represents a deconvolved peak in a chromatogram.
876
877    Parameters
878    ----------
879    chromatogram_parent : Chromatogram
880        The parent chromatogram object.
881    mass_spectra : list
882        List of mass spectra associated with the peak.
883    apex_index : int
884        Index of the apex mass spectrum in the `mass_spectra` list.
885    rt_list : list
886        List of retention times.
887    tic_list : list
888        List of total ion currents.
889    """
890
891    def __init__(
892        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
893    ):
894        self._ri = None
895        self._rt_list = list(rt_list)
896        self._tic_list = list(tic_list)
897        self.mass_spectra = list(mass_spectra)
898        super().__init__(
899            chromatogram_parent,
900            self.mass_spectra[apex_index],
901            (0, apex_index, len(self.mass_spectra) - 1),
902        )
903
904    @property
905    def rt_list(self):
906        """Get the list of retention times.
907
908        Returns
909        -------
910        list
911            The list of retention times.
912        """
913        return self._rt_list
914
915    @property
916    def tic_list(self):
917        """Get the list of total ion currents.
918
919        Returns
920        -------
921        list
922            The list of total ion currents.
923        """
924        return self._tic_list
class ChromaPeakBase:
 18class ChromaPeakBase:
 19    """Base class for chromatographic peak (ChromaPeak) objects.
 20
 21    Parameters
 22    -------
 23    chromatogram_parent : Chromatogram
 24        The parent chromatogram object.
 25    mass_spectrum_obj : MassSpectrum
 26        The mass spectrum object.
 27    start_index : int
 28        The start index of the peak.
 29    index : int
 30        The index of the peak.
 31    final_index : int
 32        The final index of the peak.
 33
 34    Attributes
 35    --------
 36    start_scan : int
 37        The start scan of the peak.
 38    final_scan : int
 39        The final scan of the peak.
 40    apex_scan : int
 41        The apex scan of the peak.
 42    chromatogram_parent : Chromatogram
 43        The parent chromatogram object.
 44    mass_spectrum : MassSpectrum
 45        The mass spectrum object.
 46    _area : float
 47        The area of the peak.
 48
 49    Properties
 50    --------
 51    * retention_time : float.
 52        The retention time of the peak.
 53    * tic : float.
 54        The total ion current of the peak.
 55    * area : float.
 56        The area of the peak.
 57    * rt_list : list.
 58        The list of retention times within the peak.
 59    * tic_list : list.
 60        The list of total ion currents within the peak.
 61
 62    Methods
 63    --------
 64    * None
 65    """
 66
 67    def __init__(
 68        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
 69    ):
 70        self.start_scan = start_index
 71        self.final_scan = final_index
 72        self.apex_scan = int(index)
 73        self.chromatogram_parent = chromatogram_parent
 74        self.mass_spectrum = mass_spectrum_obj
 75        self._area = None
 76
 77    @property
 78    def retention_time(self):
 79        """Retention Time"""
 80        return self.mass_spectrum.retention_time
 81
 82    @property
 83    def tic(self):
 84        """Total Ion Current"""
 85        return self.mass_spectrum.tic
 86
 87    @property
 88    def area(self):
 89        """Peak Area"""
 90        return self._area
 91
 92    @property
 93    def rt_list(self):
 94        """Retention Time List"""
 95        return [
 96            self.chromatogram_parent.retention_time[i]
 97            for i in range(self.start_scan, self.final_scan + 1)
 98        ]
 99
100    @property
101    def tic_list(self):
102        """Total Ion Current List"""
103        return [
104            self.chromatogram_parent.tic[i]
105            for i in range(self.start_scan, self.final_scan + 1)
106        ]

Base class for chromatographic peak (ChromaPeak) objects.

Parameters
  • chromatogram_parent (Chromatogram): The parent chromatogram object.
  • mass_spectrum_obj (MassSpectrum): The mass spectrum object.
  • start_index (int): The start index of the peak.
  • index (int): The index of the peak.
  • final_index (int): The final index of the peak.
Attributes
  • start_scan (int): The start scan of the peak.
  • final_scan (int): The final scan of the peak.
  • apex_scan (int): The apex scan of the peak.
  • chromatogram_parent (Chromatogram): The parent chromatogram object.
  • mass_spectrum (MassSpectrum): The mass spectrum object.
  • _area (float): The area of the peak.
Properties
  • retention_time : float. The retention time of the peak.
  • tic : float. The total ion current of the peak.
  • area : float. The area of the peak.
  • rt_list : list. The list of retention times within the peak.
  • tic_list : list. The list of total ion currents within the peak.
Methods
  • None
ChromaPeakBase( chromatogram_parent, mass_spectrum_obj, start_index, index, final_index)
67    def __init__(
68        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
69    ):
70        self.start_scan = start_index
71        self.final_scan = final_index
72        self.apex_scan = int(index)
73        self.chromatogram_parent = chromatogram_parent
74        self.mass_spectrum = mass_spectrum_obj
75        self._area = None
start_scan
final_scan
apex_scan
chromatogram_parent
mass_spectrum
retention_time

Retention Time

tic

Total Ion Current

area

Peak Area

rt_list

Retention Time List

tic_list

Total Ion Current List

109class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation):
110    """Class representing a mass feature in a liquid chromatography (LC) chromatogram.
111
112    Parameters
113    -------
114    lcms_parent : LCMS
115        The parent LCMSBase object.
116    mz : float
117        The observed mass to charge ratio of the feature.
118    retention_time : float
119        The retention time of the feature (in minutes), at the apex.
120    intensity : float
121        The intensity of the feature.
122    apex_scan : int
123        The scan number of the apex of the feature.
124    persistence : float, optional
125        The persistence of the feature. Default is None.
126
127    Attributes
128    --------
129    _mz_exp : float
130        The observed mass to charge ratio of the feature.
131    _mz_cal : float
132        The calibrated mass to charge ratio of the feature.
133    _retention_time : float
134        The retention time of the feature (in minutes), at the apex.
135    _apex_scan : int
136        The scan number of the apex of the feature.
137    _intensity : float
138        The intensity of the feature.
139    _persistence : float
140        The persistence of the feature.
141    _eic_data : EIC_Data
142        The EIC data object associated with the feature.
143    _dispersity_index : float
144        The dispersity index of the feature, in minutes.
145    _normalized_dispersity_index : float
146        The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index).
147    _half_height_width : numpy.ndarray
148        The half height width of the feature (in minutes, as an array of min and max values).
149    _tailing_factor : float
150        The tailing factor of the feature.
151        > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
152    _noise_score : tuple
153        The noise score of the feature, as a tuple of (left, right) scores.
154        Each score is a float, with higher values indicating better signal to noise.
155    _gaussian_similarity : float
156        The Gaussian similarity of the feature, as a float between 0 and 1.
157        1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape.
158    _ms_deconvoluted_idx : [int]
159        The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
160    is_calibrated : bool
161        If True, the feature has been calibrated. Default is False.
162    monoisotopic_mf_id : int
163        Mass feature id that is the monoisotopic version of self.
164        If self.id, then self is the monoisotopic feature). Default is None.
165    isotopologue_type : str
166        The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc.
167        Default is None.
168    ms2_scan_numbers : list
169        List of scan numbers of the MS2 spectra associated with the feature.
170        Default is an empty list.
171    ms2_mass_spectra : dict
172        Dictionary of MS2 spectra associated with the feature (key = scan number for DDA).
173        Default is an empty dictionary.
174    ms2_similarity_results : list
175        List of MS2 similarity results associated with the mass feature.
176        Default is an empty list.
177    id : int
178        The ID of the feature, also the key in the parent LCMS object's
179        `mass_features` dictionary.
180    mass_spectrum_deconvoluted_parent : bool
181        If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
182    associated_mass_features_deconvoluted : list
183        List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
184
185    """
186
187    def __init__(
188        self,
189        lcms_parent,
190        mz: float,
191        retention_time: float,
192        intensity: float,
193        apex_scan: int,
194        persistence: float = None,
195        id: int = None,
196    ):
197        super().__init__(
198            chromatogram_parent=lcms_parent,
199            mass_spectrum_obj=None,
200            start_index=None,
201            index=apex_scan,
202            final_index=None,
203        )
204        # Core attributes, marked as private
205        self._mz_exp: float = mz
206        self._mz_cal: float = None
207        self._retention_time: float = retention_time
208        self._apex_scan: int = apex_scan
209        self._intensity: float = intensity
210        self._persistence: float = persistence
211        self._eic_data: EIC_Data = None
212        self._dispersity_index: float = None
213        self._normalized_dispersity_index: float = None
214        self._half_height_width: np.ndarray = None
215        self._ms_deconvoluted_idx = None
216        self._tailing_factor: float = None
217        self._noise_score: tuple = None
218        self._gaussian_similarity: float = None
219
220        # Additional attributes
221        self.monoisotopic_mf_id = None
222        self.isotopologue_type = None
223        self.ms2_scan_numbers = []
224        self.ms2_mass_spectra = {}
225        self.ms2_similarity_results = []
226        self.mass_spectrum_deconvoluted_parent: bool = None
227        self.associated_mass_features_deconvoluted = []
228
229        if id:
230            self.id = id
231        else:
232            # get the parent's mass feature keys and add 1 to the max value to get the new key
233            self.id = (
234                max(lcms_parent.mass_features.keys()) + 1
235                if lcms_parent.mass_features.keys()
236                else 0
237            )
238
239    def update_mz(self):
240        """Update the mass to charge ratio from the mass spectrum object."""
241        if self.mass_spectrum is None:
242            raise ValueError(
243                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
244            )
245        if len(self.mass_spectrum.mz_exp) == 0:
246            raise ValueError(
247                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
248            )
249        new_mz = self.ms1_peak.mz_exp
250
251        # calculate the difference between the new and old m/z, only update if it is close
252        mz_diff = new_mz - self.mz
253        if abs(mz_diff) < 0.01:
254            self._mz_exp = new_mz
255
256    def plot(
257        self,
258        to_plot=["EIC", "MS1", "MS2"],
259        return_fig=True,
260        plot_smoothed_eic=False,
261        plot_eic_datapoints=False,
262    ):
263        """Plot the mass feature.
264
265        Parameters
266        ----------
267        to_plot : list, optional
268            List of strings specifying what to plot, any iteration of
269            "EIC", "MS2", and "MS1".
270            Default is ["EIC", "MS1", "MS2"].
271        return_fig : bool, optional
272            If True, the figure is returned. Default is True.
273        plot_smoothed_eic : bool, optional
274            If True, the smoothed EIC is plotted. Default is False.
275        plot_eic_datapoints : bool, optional
276            If True, the EIC data points are plotted. Default is False.
277
278        Returns
279        -------
280        matplotlib.figure.Figure or None
281            The figure object if `return_fig` is True.
282            Otherwise None and the figure is displayed.
283        """
284
285        # EIC plot preparation
286        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
287
288        # Adjust to_plot list if there are not spectra added to the mass features
289        if self.mass_spectrum is None:
290            to_plot = [x for x in to_plot if x != "MS1"]
291        if len(self.ms2_mass_spectra) == 0:
292            to_plot = [x for x in to_plot if x != "MS2"]
293        if self._eic_data is None:
294            to_plot = [x for x in to_plot if x != "EIC"]
295        if self._ms_deconvoluted_idx is not None:
296            deconvoluted = True
297        else:
298            deconvoluted = False
299
300        fig, axs = plt.subplots(
301            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
302        )
303        fig.suptitle(
304            "Mass Feature "
305            + str(self.id)
306            + ": m/z = "
307            + str(round(self.mz, ndigits=4))
308            + "; time = "
309            + str(round(self.retention_time, ndigits=1))
310            + " minutes"
311        )
312
313        i = 0
314        # EIC plot
315        if "EIC" in to_plot:
316            if self._eic_data is None:
317                raise ValueError(
318                    "EIC data is not available, cannot plot the mass feature's EIC"
319                )
320            axs[i][0].set_title("EIC", loc="left")
321            axs[i][0].plot(
322                self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC"
323            )
324            if plot_eic_datapoints:
325                axs[i][0].scatter(
326                    self._eic_data.time,
327                    self._eic_data.eic,
328                    c="tab:blue",
329                    label="EIC Data Points",
330                )
331            if plot_smoothed_eic:
332                axs[i][0].plot(
333                    self._eic_data.time,
334                    self._eic_data.eic_smoothed,
335                    c="tab:red",
336                    label="Smoothed EIC",
337                )
338            if self.start_scan is not None:
339                axs[i][0].fill_between(
340                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
341                )
342            else:
343                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
344                    print(
345                        "No start and final scan numbers were provided for mass feature "
346                        + str(self.id)
347                    )
348            axs[i][0].set_ylabel("Intensity")
349            axs[i][0].set_xlabel("Time (minutes)")
350            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
351            axs[i][0].set_xlim(
352                self.retention_time - eic_buffer_time,
353                self.retention_time + eic_buffer_time,
354            )
355            axs[i][0].axvline(
356                x=self.retention_time, color="k", label="MS1 scan time (apex)"
357            )
358            if len(self.ms2_scan_numbers) > 0:
359                axs[i][0].axvline(
360                    x=self.chromatogram_parent.get_time_of_scan_id(
361                        self.best_ms2.scan_number
362                    ),
363                    color="grey",
364                    linestyle="--",
365                    label="MS2 scan time",
366                )
367            axs[i][0].legend(loc="upper left")
368            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
369            i += 1
370
371        # MS1 plot
372        if "MS1" in to_plot:
373            if deconvoluted:
374                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
375                axs[i][0].vlines(
376                    self.mass_spectrum.mz_exp,
377                    0,
378                    self.mass_spectrum.abundance,
379                    color="k",
380                    alpha=0.2,
381                    label="Raw MS1",
382                )
383                axs[i][0].vlines(
384                    self.mass_spectrum_deconvoluted.mz_exp,
385                    0,
386                    self.mass_spectrum_deconvoluted.abundance,
387                    color="k",
388                    label="Deconvoluted MS1",
389                )
390                axs[i][0].set_xlim(
391                    self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8,
392                    self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1,
393                )
394                axs[i][0].set_ylim(
395                    0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1
396                )
397            else:
398                axs[i][0].set_title("MS1 (raw)", loc="left")
399                axs[i][0].vlines(
400                    self.mass_spectrum.mz_exp,
401                    0,
402                    self.mass_spectrum.abundance,
403                    color="k",
404                    label="Raw MS1",
405                )
406                axs[i][0].set_xlim(
407                    self.mass_spectrum.mz_exp.min() * 0.8,
408                    self.mass_spectrum.mz_exp.max() * 1.1,
409                )
410                axs[i][0].set_ylim(bottom=0)
411
412            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
413                axs[i][0].vlines(
414                    self.ms1_peak.mz_exp,
415                    0,
416                    self.ms1_peak.abundance,
417                    color="m",
418                    label="Feature m/z",
419                )
420
421            else:
422                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
423                    print(
424                        "The m/z of the mass feature "
425                        + str(self.id)
426                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
427                    )
428            axs[i][0].legend(loc="upper left")
429            axs[i][0].set_ylabel("Intensity")
430            axs[i][0].set_xlabel("m/z")
431            axs[i][0].yaxis.set_tick_params(labelleft=False)
432            i += 1
433
434        # MS2 plot
435        if "MS2" in to_plot:
436            axs[i][0].set_title("MS2", loc="left")
437            axs[i][0].vlines(
438                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
439            )
440            axs[i][0].set_ylabel("Intensity")
441            axs[i][0].set_xlabel("m/z")
442            axs[i][0].set_ylim(bottom=0)
443            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
444            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
445            axs[i][0].set_xlim(
446                self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1
447            )
448            axs[i][0].yaxis.set_tick_params(labelleft=False)
449
450        # Add space between subplots
451        plt.tight_layout()
452
453        if return_fig:
454            # Close figure
455            plt.close(fig)
456            return fig
457
458    @property
459    def mz(self):
460        """Mass to charge ratio of the mass feature"""
461        # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z
462        if self._mz_cal is not None:
463            return self._mz_cal
464        else:
465            return self._mz_exp
466
467    @property
468    def mass_spectrum_deconvoluted(self):
469        """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed."""
470        if self._ms_deconvoluted_idx is not None:
471            ms_deconvoluted = copy.deepcopy(self.mass_spectrum)
472            ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx)
473            return ms_deconvoluted
474        else:
475            raise ValueError(
476                "Deconvolution has not been performed for mass feature " + str(self.id)
477            )
478
479    @property
480    def retention_time(self):
481        """Retention time of the mass feature"""
482        return self._retention_time
483
484    @retention_time.setter
485    def retention_time(self, value):
486        """Set the retention time of the mass feature"""
487        if not isinstance(value, float):
488            raise ValueError("The retention time of the mass feature must be a float")
489        self._retention_time = value
490
491    @property
492    def apex_scan(self):
493        """Apex scan of the mass feature"""
494        return self._apex_scan
495
496    @apex_scan.setter
497    def apex_scan(self, value):
498        """Set the apex scan of the mass feature"""
499        if not isinstance(value, int):
500            raise ValueError("The apex scan of the mass feature must be an integer")
501        self._apex_scan = value
502
503    @property
504    def intensity(self):
505        """Intensity of the mass feature"""
506        return self._intensity
507
508    @intensity.setter
509    def intensity(self, value):
510        """Set the intensity of the mass feature"""
511        if not isinstance(value, float):
512            raise ValueError("The intensity of the mass feature must be a float")
513        self._intensity = value
514
515    @property
516    def persistence(self):
517        """Persistence of the mass feature"""
518        return self._persistence
519
520    @persistence.setter
521    def persistence(self, value):
522        """Set the persistence of the mass feature"""
523        if not isinstance(value, float):
524            raise ValueError("The persistence of the mass feature must be a float")
525        self._persistence = value
526
527    @property
528    def eic_rt_list(self):
529        """Retention time list between the beginning and end of the mass feature"""
530        # Find index of the start and final scans in the EIC data
531        start_index = self._eic_data.scans.tolist().index(self.start_scan)
532        final_index = self._eic_data.scans.tolist().index(self.final_scan)
533
534        # Get the retention time list
535        rt_list = self._eic_data.time[start_index : final_index + 1]
536        return rt_list
537
538    @property
539    def eic_list(self):
540        """EIC List between the beginning and end of the mass feature"""
541        # Find index of the start and final scans in the EIC data
542        start_index = self._eic_data.scans.tolist().index(self.start_scan)
543        final_index = self._eic_data.scans.tolist().index(self.final_scan)
544
545        # Get the retention time list
546        eic = self._eic_data.eic[start_index : final_index + 1]
547        return eic
548
549    @property
550    def ms1_peak(self):
551        """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z"""
552        # Find index array self.mass_spectrum.mz_exp that is closest to self.mz
553        closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz))
554        closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz)
555
556        return self.mass_spectrum._mspeaks[closest_mz_index]
557
558    @property
559    def tailing_factor(self):
560        """Tailing factor of the mass feature"""
561        return self._tailing_factor
562
563    @tailing_factor.setter
564    def tailing_factor(self, value):
565        """Set the tailing factor of the mass feature"""
566        if not isinstance(value, float):
567            raise ValueError("The tailing factor of the mass feature must be a float")
568        self._tailing_factor = value
569
570    @property
571    def dispersity_index(self):
572        """Dispersity index of the mass feature"""
573        return self._dispersity_index
574
575    @dispersity_index.setter
576    def dispersity_index(self, value):
577        """Set the dispersity index of the mass feature"""
578        if not isinstance(value, float):
579            raise ValueError("The dispersity index of the mass feature must be a float")
580        self._dispersity_index = value
581
582    @property
583    def normalized_dispersity_index(self):
584        """Normalized dispersity index of the mass feature, unitless (fraction of total window used)"""
585        return self._normalized_dispersity_index
586
587    @property
588    def half_height_width(self):
589        """Half height width of the mass feature, average of min and max values, in minutes"""
590        return np.mean(self._half_height_width)
591
592    @property
593    def noise_score(self):
594        """Mean of left and right noise scores.
595
596        Returns
597        -------
598        float or np.nan
599            Mean noise score, or np.nan if both sides are np.nan.
600        """
601        if self._noise_score is None:
602            return np.nan
603
604        left, right = self._noise_score
605        # Handle NaN values
606        if np.isnan(left) and np.isnan(right):
607            return np.nan
608        elif np.isnan(left):
609            return right
610        elif np.isnan(right):
611            return left
612        else:
613            return (left + right) / 2.0
614
615    @property
616    def noise_score_min(self):
617        """Minimum of left and right noise scores.
618
619        Returns
620        -------
621        float or np.nan
622            Minimum noise score, or np.nan if both sides are np.nan.
623        """
624        if self._noise_score is None:
625            return np.nan
626
627        left, right = self._noise_score
628        # Handle NaN values - nanmin ignores NaN
629        return np.nanmin([left, right])
630
631    @property
632    def noise_score_max(self):
633        """Maximum of left and right noise scores.
634
635        Returns
636        -------
637        float or np.nan
638            Maximum noise score, or np.nan if both sides are np.nan.
639        """
640        if self._noise_score is None:
641            return np.nan
642
643        left, right = self._noise_score
644        # Handle NaN values - nanmax ignores NaN
645        return np.nanmax([left, right])
646
647    @property
648    def best_ms2(self):
649        """Points to the best representative MS2 mass spectrum
650
651        Notes
652        -----
653        If there is only one MS2 mass spectrum, it will be returned
654        If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.
655        If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power.  Checks for and disqualifies possible chimeric spectra.
656
657        Returns
658        -------
659        MassSpectrum or None
660            The best MS2 mass spectrum.
661        """
662        if len(self.ms2_similarity_results) > 0:
663            # the scan number with the highest similarity score
664            results_df = [x.to_dataframe() for x in self.ms2_similarity_results]
665            results_df = pd.concat(results_df)
666            results_df = results_df.sort_values(
667                by="entropy_similarity", ascending=False
668            )
669            best_scan_number = results_df.iloc[0]["query_spectrum_id"]
670            return self.ms2_mass_spectra[best_scan_number]
671
672        ms2_scans = list(self.ms2_mass_spectra.keys())
673        if len(ms2_scans) > 1:
674            mz_diff_list = []  # List of mz difference between mz of mass feature and mass of nearest mz in each scan
675            res_list = []  # List of maximum resolving power of peaks in each scan
676            time_diff_list = []  # List of time difference between scan and apex scan in each scan
677            for scan in ms2_scans:
678                if len(self.ms2_mass_spectra[scan].mspeaks) > 0:
679                    # Find mz closest to mass feature mz, return both the difference in mass and its resolution
680                    closest_mz = min(
681                        self.ms2_mass_spectra[scan].mz_exp,
682                        key=lambda x: abs(x - self.mz),
683                    )
684                    if all(
685                        np.isnan(self.ms2_mass_spectra[scan].resolving_power)
686                    ):  # All NA for resolving power in peaks, not uncommon in CID spectra
687                        res_list.append(2)  # Assumes very low resolving power
688                    else:
689                        res_list.append(
690                            np.nanmax(self.ms2_mass_spectra[scan].resolving_power)
691                        )
692                    mz_diff_list.append(np.abs(closest_mz - self.mz))
693                    time_diff_list.append(
694                        np.abs(
695                            self.chromatogram_parent.get_time_of_scan_id(scan)
696                            - self.retention_time
697                        )
698                    )
699                else:
700                    res_list.append(np.nan)
701                    mz_diff_list.append(np.nan)
702                    time_diff_list.append(np.nan)
703            # Convert diff_lists into logical scores (higher is better for each score)
704            time_score = 1 - np.array(time_diff_list) / np.nanmax(
705                np.array(time_diff_list)
706            )
707            res_score = np.array(res_list) / np.nanmax(np.array(res_list))
708            # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning)
709            mz_score = np.zeros(len(ms2_scans))
710            for i in np.arange(0, len(ms2_scans)):
711                if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1:  # Possible chimeric
712                    mz_score[i] = 0
713                else:
714                    mz_score[i] = 1
715            # get the index of the best score and return the mass spectrum
716            if len([np.nanargmax(time_score * res_score * mz_score)]) == 1:
717                return self.ms2_mass_spectra[
718                    ms2_scans[np.nanargmax(time_score * res_score * mz_score)]
719                ]
720            # remove the mz_score condition and try again
721            elif len(np.argmax(time_score * res_score)) == 1:
722                return self.ms2_mass_spectra[
723                    ms2_scans[np.nanargmax(time_score * res_score)]
724                ]
725            else:
726                raise ValueError(
727                    "No best MS2 mass spectrum could be found for mass feature "
728                    + str(self.id)
729                )
730        elif len(ms2_scans) == 1:  # if only one ms2 spectra, return it
731            return self.ms2_mass_spectra[ms2_scans[0]]
732        else:  # if no ms2 spectra, return None
733            return None

Class representing a mass feature in a liquid chromatography (LC) chromatogram.

Parameters
  • lcms_parent (LCMS): The parent LCMSBase object.
  • mz (float): The observed mass to charge ratio of the feature.
  • retention_time (float): The retention time of the feature (in minutes), at the apex.
  • intensity (float): The intensity of the feature.
  • apex_scan (int): The scan number of the apex of the feature.
  • persistence (float, optional): The persistence of the feature. Default is None.
Attributes
  • _mz_exp (float): The observed mass to charge ratio of the feature.
  • _mz_cal (float): The calibrated mass to charge ratio of the feature.
  • _retention_time (float): The retention time of the feature (in minutes), at the apex.
  • _apex_scan (int): The scan number of the apex of the feature.
  • _intensity (float): The intensity of the feature.
  • _persistence (float): The persistence of the feature.
  • _eic_data (EIC_Data): The EIC data object associated with the feature.
  • _dispersity_index (float): The dispersity index of the feature, in minutes.
  • _normalized_dispersity_index (float): The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index).
  • _half_height_width (numpy.ndarray): The half height width of the feature (in minutes, as an array of min and max values).
  • _tailing_factor (float): The tailing factor of the feature. > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
  • _noise_score (tuple): The noise score of the feature, as a tuple of (left, right) scores. Each score is a float, with higher values indicating better signal to noise.
  • _gaussian_similarity (float): The Gaussian similarity of the feature, as a float between 0 and 1. 1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape.
  • _ms_deconvoluted_idx ([int]): The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
  • is_calibrated (bool): If True, the feature has been calibrated. Default is False.
  • monoisotopic_mf_id (int): Mass feature id that is the monoisotopic version of self. If self.id, then self is the monoisotopic feature). Default is None.
  • isotopologue_type (str): The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. Default is None.
  • ms2_scan_numbers (list): List of scan numbers of the MS2 spectra associated with the feature. Default is an empty list.
  • ms2_mass_spectra (dict): Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). Default is an empty dictionary.
  • ms2_similarity_results (list): List of MS2 similarity results associated with the mass feature. Default is an empty list.
  • id (int): The ID of the feature, also the key in the parent LCMS object's mass_features dictionary.
  • mass_spectrum_deconvoluted_parent (bool): If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
  • associated_mass_features_deconvoluted (list): List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
LCMSMassFeature( lcms_parent, mz: float, retention_time: float, intensity: float, apex_scan: int, persistence: float = None, id: int = None)
187    def __init__(
188        self,
189        lcms_parent,
190        mz: float,
191        retention_time: float,
192        intensity: float,
193        apex_scan: int,
194        persistence: float = None,
195        id: int = None,
196    ):
197        super().__init__(
198            chromatogram_parent=lcms_parent,
199            mass_spectrum_obj=None,
200            start_index=None,
201            index=apex_scan,
202            final_index=None,
203        )
204        # Core attributes, marked as private
205        self._mz_exp: float = mz
206        self._mz_cal: float = None
207        self._retention_time: float = retention_time
208        self._apex_scan: int = apex_scan
209        self._intensity: float = intensity
210        self._persistence: float = persistence
211        self._eic_data: EIC_Data = None
212        self._dispersity_index: float = None
213        self._normalized_dispersity_index: float = None
214        self._half_height_width: np.ndarray = None
215        self._ms_deconvoluted_idx = None
216        self._tailing_factor: float = None
217        self._noise_score: tuple = None
218        self._gaussian_similarity: float = None
219
220        # Additional attributes
221        self.monoisotopic_mf_id = None
222        self.isotopologue_type = None
223        self.ms2_scan_numbers = []
224        self.ms2_mass_spectra = {}
225        self.ms2_similarity_results = []
226        self.mass_spectrum_deconvoluted_parent: bool = None
227        self.associated_mass_features_deconvoluted = []
228
229        if id:
230            self.id = id
231        else:
232            # get the parent's mass feature keys and add 1 to the max value to get the new key
233            self.id = (
234                max(lcms_parent.mass_features.keys()) + 1
235                if lcms_parent.mass_features.keys()
236                else 0
237            )
monoisotopic_mf_id
isotopologue_type
ms2_scan_numbers
ms2_mass_spectra
ms2_similarity_results
mass_spectrum_deconvoluted_parent: bool
associated_mass_features_deconvoluted
def update_mz(self):
239    def update_mz(self):
240        """Update the mass to charge ratio from the mass spectrum object."""
241        if self.mass_spectrum is None:
242            raise ValueError(
243                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
244            )
245        if len(self.mass_spectrum.mz_exp) == 0:
246            raise ValueError(
247                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
248            )
249        new_mz = self.ms1_peak.mz_exp
250
251        # calculate the difference between the new and old m/z, only update if it is close
252        mz_diff = new_mz - self.mz
253        if abs(mz_diff) < 0.01:
254            self._mz_exp = new_mz

Update the mass to charge ratio from the mass spectrum object.

def plot( self, to_plot=['EIC', 'MS1', 'MS2'], return_fig=True, plot_smoothed_eic=False, plot_eic_datapoints=False):
256    def plot(
257        self,
258        to_plot=["EIC", "MS1", "MS2"],
259        return_fig=True,
260        plot_smoothed_eic=False,
261        plot_eic_datapoints=False,
262    ):
263        """Plot the mass feature.
264
265        Parameters
266        ----------
267        to_plot : list, optional
268            List of strings specifying what to plot, any iteration of
269            "EIC", "MS2", and "MS1".
270            Default is ["EIC", "MS1", "MS2"].
271        return_fig : bool, optional
272            If True, the figure is returned. Default is True.
273        plot_smoothed_eic : bool, optional
274            If True, the smoothed EIC is plotted. Default is False.
275        plot_eic_datapoints : bool, optional
276            If True, the EIC data points are plotted. Default is False.
277
278        Returns
279        -------
280        matplotlib.figure.Figure or None
281            The figure object if `return_fig` is True.
282            Otherwise None and the figure is displayed.
283        """
284
285        # EIC plot preparation
286        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
287
288        # Adjust to_plot list if there are not spectra added to the mass features
289        if self.mass_spectrum is None:
290            to_plot = [x for x in to_plot if x != "MS1"]
291        if len(self.ms2_mass_spectra) == 0:
292            to_plot = [x for x in to_plot if x != "MS2"]
293        if self._eic_data is None:
294            to_plot = [x for x in to_plot if x != "EIC"]
295        if self._ms_deconvoluted_idx is not None:
296            deconvoluted = True
297        else:
298            deconvoluted = False
299
300        fig, axs = plt.subplots(
301            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
302        )
303        fig.suptitle(
304            "Mass Feature "
305            + str(self.id)
306            + ": m/z = "
307            + str(round(self.mz, ndigits=4))
308            + "; time = "
309            + str(round(self.retention_time, ndigits=1))
310            + " minutes"
311        )
312
313        i = 0
314        # EIC plot
315        if "EIC" in to_plot:
316            if self._eic_data is None:
317                raise ValueError(
318                    "EIC data is not available, cannot plot the mass feature's EIC"
319                )
320            axs[i][0].set_title("EIC", loc="left")
321            axs[i][0].plot(
322                self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC"
323            )
324            if plot_eic_datapoints:
325                axs[i][0].scatter(
326                    self._eic_data.time,
327                    self._eic_data.eic,
328                    c="tab:blue",
329                    label="EIC Data Points",
330                )
331            if plot_smoothed_eic:
332                axs[i][0].plot(
333                    self._eic_data.time,
334                    self._eic_data.eic_smoothed,
335                    c="tab:red",
336                    label="Smoothed EIC",
337                )
338            if self.start_scan is not None:
339                axs[i][0].fill_between(
340                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
341                )
342            else:
343                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
344                    print(
345                        "No start and final scan numbers were provided for mass feature "
346                        + str(self.id)
347                    )
348            axs[i][0].set_ylabel("Intensity")
349            axs[i][0].set_xlabel("Time (minutes)")
350            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
351            axs[i][0].set_xlim(
352                self.retention_time - eic_buffer_time,
353                self.retention_time + eic_buffer_time,
354            )
355            axs[i][0].axvline(
356                x=self.retention_time, color="k", label="MS1 scan time (apex)"
357            )
358            if len(self.ms2_scan_numbers) > 0:
359                axs[i][0].axvline(
360                    x=self.chromatogram_parent.get_time_of_scan_id(
361                        self.best_ms2.scan_number
362                    ),
363                    color="grey",
364                    linestyle="--",
365                    label="MS2 scan time",
366                )
367            axs[i][0].legend(loc="upper left")
368            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
369            i += 1
370
371        # MS1 plot
372        if "MS1" in to_plot:
373            if deconvoluted:
374                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
375                axs[i][0].vlines(
376                    self.mass_spectrum.mz_exp,
377                    0,
378                    self.mass_spectrum.abundance,
379                    color="k",
380                    alpha=0.2,
381                    label="Raw MS1",
382                )
383                axs[i][0].vlines(
384                    self.mass_spectrum_deconvoluted.mz_exp,
385                    0,
386                    self.mass_spectrum_deconvoluted.abundance,
387                    color="k",
388                    label="Deconvoluted MS1",
389                )
390                axs[i][0].set_xlim(
391                    self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8,
392                    self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1,
393                )
394                axs[i][0].set_ylim(
395                    0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1
396                )
397            else:
398                axs[i][0].set_title("MS1 (raw)", loc="left")
399                axs[i][0].vlines(
400                    self.mass_spectrum.mz_exp,
401                    0,
402                    self.mass_spectrum.abundance,
403                    color="k",
404                    label="Raw MS1",
405                )
406                axs[i][0].set_xlim(
407                    self.mass_spectrum.mz_exp.min() * 0.8,
408                    self.mass_spectrum.mz_exp.max() * 1.1,
409                )
410                axs[i][0].set_ylim(bottom=0)
411
412            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
413                axs[i][0].vlines(
414                    self.ms1_peak.mz_exp,
415                    0,
416                    self.ms1_peak.abundance,
417                    color="m",
418                    label="Feature m/z",
419                )
420
421            else:
422                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
423                    print(
424                        "The m/z of the mass feature "
425                        + str(self.id)
426                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
427                    )
428            axs[i][0].legend(loc="upper left")
429            axs[i][0].set_ylabel("Intensity")
430            axs[i][0].set_xlabel("m/z")
431            axs[i][0].yaxis.set_tick_params(labelleft=False)
432            i += 1
433
434        # MS2 plot
435        if "MS2" in to_plot:
436            axs[i][0].set_title("MS2", loc="left")
437            axs[i][0].vlines(
438                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
439            )
440            axs[i][0].set_ylabel("Intensity")
441            axs[i][0].set_xlabel("m/z")
442            axs[i][0].set_ylim(bottom=0)
443            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
444            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
445            axs[i][0].set_xlim(
446                self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1
447            )
448            axs[i][0].yaxis.set_tick_params(labelleft=False)
449
450        # Add space between subplots
451        plt.tight_layout()
452
453        if return_fig:
454            # Close figure
455            plt.close(fig)
456            return fig

Plot the mass feature.

Parameters
  • to_plot (list, optional): List of strings specifying what to plot, any iteration of "EIC", "MS2", and "MS1". Default is ["EIC", "MS1", "MS2"].
  • return_fig (bool, optional): If True, the figure is returned. Default is True.
  • plot_smoothed_eic (bool, optional): If True, the smoothed EIC is plotted. Default is False.
  • plot_eic_datapoints (bool, optional): If True, the EIC data points are plotted. Default is False.
Returns
  • matplotlib.figure.Figure or None: The figure object if return_fig is True. Otherwise None and the figure is displayed.
mz

Mass to charge ratio of the mass feature

mass_spectrum_deconvoluted

Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.

retention_time

Retention time of the mass feature

apex_scan

Apex scan of the mass feature

intensity

Intensity of the mass feature

persistence

Persistence of the mass feature

eic_rt_list

Retention time list between the beginning and end of the mass feature

eic_list

EIC List between the beginning and end of the mass feature

ms1_peak

MS1 peak from associated mass spectrum that is closest to the mass feature's m/z

tailing_factor

Tailing factor of the mass feature

dispersity_index

Dispersity index of the mass feature

normalized_dispersity_index

Normalized dispersity index of the mass feature, unitless (fraction of total window used)

half_height_width

Half height width of the mass feature, average of min and max values, in minutes

noise_score

Mean of left and right noise scores.

Returns
  • float or np.nan: Mean noise score, or np.nan if both sides are np.nan.
noise_score_min

Minimum of left and right noise scores.

Returns
  • float or np.nan: Minimum noise score, or np.nan if both sides are np.nan.
noise_score_max

Maximum of left and right noise scores.

Returns
  • float or np.nan: Maximum noise score, or np.nan if both sides are np.nan.
best_ms2

Points to the best representative MS2 mass spectrum

Notes

If there is only one MS2 mass spectrum, it will be returned If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra.

Returns
  • MassSpectrum or None: The best MS2 mass spectrum.
736class GCPeak(ChromaPeakBase, GCPeakCalculation):
737    """Class representing a peak in a gas chromatography (GC) chromatogram.
738
739    Parameters
740    ----------
741    chromatogram_parent : Chromatogram
742        The parent chromatogram object.
743    mass_spectrum_obj : MassSpectrum
744        The mass spectrum object associated with the peak.
745    indexes : tuple
746        The indexes of the peak in the chromatogram.
747
748    Attributes
749    ----------
750    _compounds : list
751        List of compounds associated with the peak.
752    _ri : float or None
753        Retention index of the peak.
754
755    Methods
756    -------
757    * __len__(). Returns the number of compounds associated with the peak.
758    * __getitem__(position).  Returns the compound at the specified position.
759    * remove_compound(compounds_obj). Removes the specified compound from the peak.
760    * clear_compounds(). Removes all compounds from the peak.
761    * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
762    * ri().  Returns the retention index of the peak.
763    * highest_ss_compound(). Returns the compound with the highest spectral similarity score.
764    * highest_score_compound(). Returns the compound with the highest similarity score.
765    * compound_names(). Returns a list of names of compounds associated with the peak.
766    """
767
768    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
769        self._compounds = []
770        self._ri = None
771        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
772
773    def __len__(self):
774        return len(self._compounds)
775
776    def __getitem__(self, position):
777        return self._compounds[position]
778
779    def remove_compound(self, compounds_obj):
780        self._compounds.remove(compounds_obj)
781
782    def clear_compounds(self):
783        self._compounds = []
784
785    def add_compound(
786        self,
787        compounds_dict,
788        spectral_similarity_scores,
789        ri_score=None,
790        similarity_score=None,
791    ):
792        """Adds a compound to the peak with the specified attributes.
793
794        Parameters
795        ----------
796        compounds_dict : dict
797            Dictionary containing the compound information.
798        spectral_similarity_scores : dict
799            Dictionary containing the spectral similarity scores.
800        ri_score : float or None, optional
801            The retention index score of the compound. Default is None.
802        similarity_score : float or None, optional
803            The similarity score of the compound. Default is None.
804        """
805        compound_obj = LowResCompoundRef(compounds_dict)
806        compound_obj.spectral_similarity_scores = spectral_similarity_scores
807        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
808            "cosine_correlation"
809        )
810        # TODO check is the above line correct?
811        compound_obj.ri_score = ri_score
812        compound_obj.similarity_score = similarity_score
813        self._compounds.append(compound_obj)
814        if similarity_score:
815            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
816        else:
817            self._compounds.sort(
818                key=lambda c: c.spectral_similarity_score, reverse=True
819            )
820
821    @property
822    def ri(self):
823        """Returns the retention index of the peak.
824
825        Returns
826        -------
827        float or None
828            The retention index of the peak.
829        """
830        return self._ri
831
832    @property
833    def highest_ss_compound(self):
834        """Returns the compound with the highest spectral similarity score.
835
836        Returns
837        -------
838        LowResCompoundRef or None
839            The compound with the highest spectral similarity score.
840        """
841        if self:
842            return max(self, key=lambda c: c.spectral_similarity_score)
843        else:
844            return None
845
846    @property
847    def highest_score_compound(self):
848        """Returns the compound with the highest similarity score.
849
850        Returns
851        -------
852        LowResCompoundRef or None
853            The compound with the highest similarity score.
854        """
855        if self:
856            return max(self, key=lambda c: c.similarity_score)
857        else:
858            return None
859
860    @property
861    def compound_names(self):
862        """Returns a list of names of compounds associated with the peak.
863
864        Returns
865        -------
866        list
867            List of names of compounds associated with the peak.
868        """
869        if self:
870            return [c.name for c in self]
871        else:
872            return []

Class representing a peak in a gas chromatography (GC) chromatogram.

Parameters
  • chromatogram_parent (Chromatogram): The parent chromatogram object.
  • mass_spectrum_obj (MassSpectrum): The mass spectrum object associated with the peak.
  • indexes (tuple): The indexes of the peak in the chromatogram.
Attributes
  • _compounds (list): List of compounds associated with the peak.
  • _ri (float or None): Retention index of the peak.
Methods
  • __len__(). Returns the number of compounds associated with the peak.
  • __getitem__(position). Returns the compound at the specified position.
  • remove_compound(compounds_obj). Removes the specified compound from the peak.
  • clear_compounds(). Removes all compounds from the peak.
  • add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
  • ri(). Returns the retention index of the peak.
  • highest_ss_compound(). Returns the compound with the highest spectral similarity score.
  • highest_score_compound(). Returns the compound with the highest similarity score.
  • compound_names(). Returns a list of names of compounds associated with the peak.
GCPeak(chromatogram_parent, mass_spectrum_obj, indexes)
768    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
769        self._compounds = []
770        self._ri = None
771        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
def remove_compound(self, compounds_obj):
779    def remove_compound(self, compounds_obj):
780        self._compounds.remove(compounds_obj)
def clear_compounds(self):
782    def clear_compounds(self):
783        self._compounds = []
def add_compound( self, compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None):
785    def add_compound(
786        self,
787        compounds_dict,
788        spectral_similarity_scores,
789        ri_score=None,
790        similarity_score=None,
791    ):
792        """Adds a compound to the peak with the specified attributes.
793
794        Parameters
795        ----------
796        compounds_dict : dict
797            Dictionary containing the compound information.
798        spectral_similarity_scores : dict
799            Dictionary containing the spectral similarity scores.
800        ri_score : float or None, optional
801            The retention index score of the compound. Default is None.
802        similarity_score : float or None, optional
803            The similarity score of the compound. Default is None.
804        """
805        compound_obj = LowResCompoundRef(compounds_dict)
806        compound_obj.spectral_similarity_scores = spectral_similarity_scores
807        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
808            "cosine_correlation"
809        )
810        # TODO check is the above line correct?
811        compound_obj.ri_score = ri_score
812        compound_obj.similarity_score = similarity_score
813        self._compounds.append(compound_obj)
814        if similarity_score:
815            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
816        else:
817            self._compounds.sort(
818                key=lambda c: c.spectral_similarity_score, reverse=True
819            )

Adds a compound to the peak with the specified attributes.

Parameters
  • compounds_dict (dict): Dictionary containing the compound information.
  • spectral_similarity_scores (dict): Dictionary containing the spectral similarity scores.
  • ri_score (float or None, optional): The retention index score of the compound. Default is None.
  • similarity_score (float or None, optional): The similarity score of the compound. Default is None.
ri

Returns the retention index of the peak.

Returns
  • float or None: The retention index of the peak.
highest_ss_compound

Returns the compound with the highest spectral similarity score.

Returns
  • LowResCompoundRef or None: The compound with the highest spectral similarity score.
highest_score_compound

Returns the compound with the highest similarity score.

Returns
  • LowResCompoundRef or None: The compound with the highest similarity score.
compound_names

Returns a list of names of compounds associated with the peak.

Returns
  • list: List of names of compounds associated with the peak.
class GCPeakDeconvolved(GCPeak):
875class GCPeakDeconvolved(GCPeak):
876    """Represents a deconvolved peak in a chromatogram.
877
878    Parameters
879    ----------
880    chromatogram_parent : Chromatogram
881        The parent chromatogram object.
882    mass_spectra : list
883        List of mass spectra associated with the peak.
884    apex_index : int
885        Index of the apex mass spectrum in the `mass_spectra` list.
886    rt_list : list
887        List of retention times.
888    tic_list : list
889        List of total ion currents.
890    """
891
892    def __init__(
893        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
894    ):
895        self._ri = None
896        self._rt_list = list(rt_list)
897        self._tic_list = list(tic_list)
898        self.mass_spectra = list(mass_spectra)
899        super().__init__(
900            chromatogram_parent,
901            self.mass_spectra[apex_index],
902            (0, apex_index, len(self.mass_spectra) - 1),
903        )
904
905    @property
906    def rt_list(self):
907        """Get the list of retention times.
908
909        Returns
910        -------
911        list
912            The list of retention times.
913        """
914        return self._rt_list
915
916    @property
917    def tic_list(self):
918        """Get the list of total ion currents.
919
920        Returns
921        -------
922        list
923            The list of total ion currents.
924        """
925        return self._tic_list

Represents a deconvolved peak in a chromatogram.

Parameters
  • chromatogram_parent (Chromatogram): The parent chromatogram object.
  • mass_spectra (list): List of mass spectra associated with the peak.
  • apex_index (int): Index of the apex mass spectrum in the mass_spectra list.
  • rt_list (list): List of retention times.
  • tic_list (list): List of total ion currents.
GCPeakDeconvolved(chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list)
892    def __init__(
893        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
894    ):
895        self._ri = None
896        self._rt_list = list(rt_list)
897        self._tic_list = list(tic_list)
898        self.mass_spectra = list(mass_spectra)
899        super().__init__(
900            chromatogram_parent,
901            self.mass_spectra[apex_index],
902            (0, apex_index, len(self.mass_spectra) - 1),
903        )
mass_spectra
rt_list

Get the list of retention times.

Returns
  • list: The list of retention times.
tic_list

Get the list of total ion currents.

Returns
  • list: The list of total ion currents.