corems.mass_spectra.factory.lc_class

   1from pathlib import Path
   2
   3import numpy as np
   4import pandas as pd
   5import warnings
   6import matplotlib.pyplot as plt
   7
   8from corems.encapsulation.factory.parameters import LCMSParameters
   9from corems.mass_spectra.calc.lc_calc import LCCalculations, PHCalculations
  10from corems.molecular_id.search.lcms_spectral_search import LCMSSpectralSearch
  11from corems.mass_spectrum.input.numpyArray import ms_from_array_profile, ms_from_array_centroid
  12from corems.mass_spectra.calc.lc_calc import find_closest
  13
  14
  15class MassSpectraBase:
  16    """Base class for mass spectra objects.
  17
  18    Parameters
  19    -----------
  20    file_location : str or Path
  21        The location of the file containing the mass spectra data.
  22    analyzer : str, optional
  23        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
  24    instrument_label : str, optional
  25        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
  26    sample_name : str, optional
  27        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
  28    spectra_parser : object, optional
  29        The spectra parser object used to create the mass spectra object. Defaults to None.
  30
  31    Attributes
  32    -----------
  33    spectra_parser_class : class
  34        The class of the spectra parser used to create the mass spectra object.
  35    file_location : str or Path
  36        The location of the file containing the mass spectra data.
  37    sample_name : str
  38        The name of the sample; defaults to the file name if not provided to the parser.
  39    analyzer : str
  40        The type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
  41    instrument_label : str
  42        The type of instrument used to generate the mass spectra data. Derived from the spectra parser.
  43    _scan_info : dict
  44        A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,
  45        scan text, and scan window (lower and upper).
  46        Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
  47    _ms : dict
  48        A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
  49    _ms_unprocessed: dictionary of pandas.DataFrames or None
  50        A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.
  51        Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
  52
  53    Methods
  54    --------
  55    * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).
  56        Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
  57    * get_time_of_scan_id(scan).
  58        Returns the scan time for the specified scan number.
  59    """
  60
  61    def __init__(
  62        self,
  63        file_location,
  64        analyzer="Unknown",
  65        instrument_label="Unknown",
  66        sample_name=None,
  67        spectra_parser=None,
  68    ):
  69        if isinstance(file_location, str):
  70            file_location = Path(file_location)
  71        else:
  72            file_location = file_location
  73        if not file_location.exists():
  74            raise FileExistsError("File does not exist: " + str(file_location))
  75
  76        if sample_name:
  77            self.sample_name = sample_name
  78        else:
  79            self.sample_name = file_location.stem
  80
  81        self.file_location = file_location
  82        self.analyzer = analyzer
  83        self.instrument_label = instrument_label
  84
  85        # Add the spectra parser class to the object if it is not None
  86        if spectra_parser is not None:
  87            self.spectra_parser_class = spectra_parser.__class__
  88            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
  89            if (
  90                self.sample_name is not None
  91                and self.sample_name != self.spectra_parser.sample_name
  92            ):
  93                warnings.warn(
  94                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
  95                    UserWarning,
  96                )
  97            if self.analyzer != self.spectra_parser.analyzer:
  98                warnings.warn(
  99                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
 100                    UserWarning,
 101                )
 102            if self.instrument_label != self.spectra_parser.instrument_label:
 103                warnings.warn(
 104                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
 105                    UserWarning,
 106                )
 107            if self.file_location != self.spectra_parser.file_location:
 108                warnings.warn(
 109                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
 110                    UserWarning,
 111                )
 112
 113        # Instantiate empty dictionaries for scan information and mass spectra
 114        self._scan_info = {}
 115        self._ms = {}
 116        self._ms_unprocessed = {}
 117
 118    @property
 119    def spectra_parser(self):
 120        """Returns an instance of the spectra parser class."""
 121        return self.spectra_parser_class(self.file_location)
 122
 123    def add_mass_spectrum(self, mass_spec):
 124        """Adds a mass spectrum to the dataset.
 125
 126        Parameters
 127        -----------
 128        mass_spec : MassSpectrum
 129            The corems MassSpectrum object to be added to the dataset.
 130
 131        Notes
 132        -----
 133        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
 134        """
 135        # check if mass_spec has a scan_number attribute
 136        if not hasattr(mass_spec, "scan_number"):
 137            raise ValueError(
 138                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
 139            )
 140        self._ms[mass_spec.scan_number] = mass_spec
 141
 142    def add_mass_spectra(
 143        self,
 144        scan_list,
 145        spectrum_mode=None,
 146        ms_level=1,
 147        use_parser=True,
 148        auto_process=True,
 149        ms_params=None,
 150    ):
 151        """Add mass spectra to _ms dictionary, from a list of scans or single scan
 152
 153        Notes
 154        -----
 155        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
 156
 157
 158        Parameters
 159        -----------
 160        scan_list : list of ints
 161            List of scans to use to populate _ms slot
 162        spectrum_mode : str or None
 163            The spectrum mode to use for the mass spectra.
 164            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
 165            Defaults to None.
 166        ms_level : int, optional
 167            The MS level to use for the mass spectra.
 168            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
 169            Defaults to 1.
 170        using_parser : bool
 171            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
 172        auto_process : bool
 173            Whether to auto-process the mass spectra.  Defaults to True.
 174        ms_params : MSParameters or None
 175            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
 176
 177        Raises
 178        ------
 179        TypeError
 180            If scan_list is not a list of ints
 181        ValueError
 182            If polarity is not 'positive' or 'negative'
 183            If ms_level is not 1 or 2
 184        """
 185
 186        # check if scan_list is a list or a single int; if single int, convert to list
 187        if isinstance(scan_list, int):
 188            scan_list = [scan_list]
 189        if not isinstance(scan_list, list):
 190            raise TypeError("scan_list must be a list of integers")
 191        for scan in scan_list:
 192            if not isinstance(scan, int):
 193                raise TypeError("scan_list must be a list of integers")
 194
 195        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
 196        if self.polarity == "negative":
 197            polarity = -1
 198        elif self.polarity == "positive":
 199            polarity = 1
 200        else:
 201            raise ValueError(
 202                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
 203            )
 204
 205        # is not using_parser, check that ms1 and ms2 are not None
 206        if not use_parser:
 207            if ms_level not in self._ms_unprocessed.keys():
 208                raise ValueError(
 209                    "ms_level {} not found in _ms_unprocessed dictionary".format(
 210                        ms_level
 211                    )
 212                )
 213
 214        scan_list = list(set(scan_list))
 215        scan_list.sort()
 216        if not use_parser:
 217            if self._ms_unprocessed[ms_level] is None:
 218                raise ValueError(
 219                    "No unprocessed data found for ms_level {}".format(ms_level)
 220                )
 221            if (
 222                len(
 223                    np.setdiff1d(
 224                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
 225                    )
 226                )
 227                > 0
 228            ):
 229                raise ValueError(
 230                    "Not all scans in scan_list are present in the unprocessed data"
 231                )
 232            # Prepare the ms_df for parsing
 233            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
 234
 235        if use_parser:
 236            # Use batch function to get all mass spectra at once
 237            if spectrum_mode is None:
 238                # get spectrum mode from _scan_info for each scan
 239                spectrum_modes = [self.scan_df.loc[scan, "ms_format"] for scan in scan_list]
 240                spectrum_mode_batch = spectrum_modes[0] if len(set(spectrum_modes)) == 1 else None
 241            else:
 242                spectrum_mode_batch = spectrum_mode
 243            
 244            ms_list = self.spectra_parser.get_mass_spectra_from_scan_list(
 245                scan_list=scan_list,
 246                spectrum_mode=spectrum_mode_batch,
 247                auto_process=False,
 248            )
 249            
 250            # Process each mass spectrum
 251            for i, scan in enumerate(scan_list):
 252                ms = ms_list[i] if i < len(ms_list) else None
 253                if ms is not None:
 254                    if ms_params is not None:
 255                        ms.parameters = ms_params
 256                    ms.scan_number = scan
 257                    if auto_process:
 258                        ms.process_mass_spec()
 259                    self.add_mass_spectrum(ms)
 260        else:
 261            # Original non-parser logic remains unchanged
 262            for scan in scan_list:
 263                ms = None
 264                if spectrum_mode is None:
 265                    # get spectrum mode from _scan_info
 266                    spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
 267                else:
 268                    spectrum_mode_scan = spectrum_mode
 269                
 270                my_ms_df = ms_df.loc[scan]
 271                if spectrum_mode_scan == "profile":
 272                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
 273                    ms = ms_from_array_profile(
 274                        my_ms_df.mz,
 275                        my_ms_df.intensity,
 276                        self.file_location,
 277                        polarity=polarity,
 278                        auto_process=False,
 279                    )
 280                else:
 281                   ms = ms_from_array_centroid(
 282                        mz = my_ms_df.mz,
 283                        abundance = my_ms_df.intensity,
 284                        rp = [np.nan] * len(my_ms_df.mz),
 285                        s2n = [np.nan] * len(my_ms_df.mz),
 286                        dataname = self.file_location,
 287                        polarity=polarity,
 288                        auto_process=False,
 289                    )
 290
 291                # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
 292                if ms is not None:
 293                    if ms_params is not None:
 294                        ms.parameters = ms_params
 295                    ms.scan_number = scan
 296                    if auto_process:
 297                        ms.process_mass_spec()
 298                    self.add_mass_spectrum(ms)
 299
 300    def get_time_of_scan_id(self, scan):
 301        """Returns the scan time for the specified scan number.
 302
 303        Parameters
 304        -----------
 305        scan : int
 306            The scan number of the desired scan time.
 307
 308        Returns
 309        --------
 310        float
 311            The scan time for the specified scan number (in minutes).
 312
 313        Raises
 314        ------
 315        ValueError
 316            If no scan time is found for the specified scan number.
 317        """
 318        # Check if _retenion_time_list is empty and raise error if so
 319        if len(self._retention_time_list) == 0:
 320            raise ValueError("No retention times found in dataset")
 321        rt = self._retention_time_list[self._scans_number_list.index(scan)]
 322        return rt
 323
 324    @property
 325    def scan_df(self):
 326        """
 327        pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).
 328        """
 329        scan_df = pd.DataFrame.from_dict(self._scan_info)
 330        return scan_df
 331        
 332    @property
 333    def ms(self):
 334        """
 335        dictionary : contains the key associated with mass spectra and values are the associated MassSpecProfiles
 336        """
 337        return self._ms
 338
 339    
 340    @scan_df.setter
 341    def scan_df(self, df):
 342        """
 343        Sets the scan data for the dataset.
 344
 345        Parameters
 346        -----------
 347        df : pandas.DataFrame
 348            A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level,
 349            precursor m/z, scan text, and scan window (lower and upper).
 350        """
 351        self._scan_info = df.to_dict()
 352
 353    def __getitem__(self, scan_number):
 354        return self._ms.get(scan_number)
 355
 356
 357class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch):
 358    """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.
 359
 360    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.
 361
 362    Parameters
 363    -----------
 364    file_location : str or Path
 365        The location of the file containing the mass spectra data.
 366    analyzer : str, optional
 367        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
 368    instrument_label : str, optional
 369        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
 370    sample_name : str, optional
 371        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
 372    spectra_parser : object, optional
 373        The spectra parser object used to create the mass spectra object. Defaults to None.
 374
 375    Attributes
 376    -----------
 377    polarity : str
 378        The polarity of the ionization mode used for the dataset.
 379    _parameters : LCMSParameters
 380        The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
 381    _retention_time_list : numpy.ndarray
 382        An array of retention times for the dataset.
 383    _scans_number_list : list
 384        A list of scan numbers for the dataset.
 385    _tic_list : numpy.ndarray
 386        An array of total ion current (TIC) values for the dataset.
 387    eics : dict
 388        A dictionary containing extracted ion chromatograms (EICs) for the dataset.
 389        Key is the mz of the EIC. Initialized as an empty dictionary.
 390    mass_features : dictionary of LCMSMassFeature objects
 391        A dictionary containing mass features for the dataset.
 392        Key is mass feature ID. Initialized as an empty dictionary.
 393    spectral_search_results : dictionary of MS2SearchResults objects
 394        A dictionary containing spectral search results for the dataset.
 395        Key is scan number : precursor mz. Initialized as an empty dictionary.
 396
 397    Methods
 398    --------
 399    * get_parameters_json().
 400        Returns the parameters used for the LC-MS analysis in JSON format.
 401    * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)
 402        Adds which MS2 scans are associated with each mass feature to the
 403        mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
 404    * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)
 405        Adds the MS1 spectra associated with each mass feature to the
 406        mass_features dictionary and adds the MS1 spectra to the _ms dictionary.
 407    * mass_features_to_df()
 408        Returns a pandas dataframe summarizing the mass features in the dataset.
 409    * set_tic_list_from_data(overwrite=False)
 410        Sets the TIC list from the mass spectrum objects within the _ms dictionary.
 411    * set_retention_time_from_data(overwrite=False)
 412        Sets the retention time list from the data in the _ms dictionary.
 413    * set_scans_number_from_data(overwrite=False)
 414        Sets the scan number list from the data in the _ms dictionary.
 415    * plot_composite_mz_features(binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False)
 416        Generates plot of M/Z features comparing scan time vs M/Z value
 417    """
 418
 419    def __init__(
 420        self,
 421        file_location,
 422        analyzer="Unknown",
 423        instrument_label="Unknown",
 424        sample_name=None,
 425        spectra_parser=None,
 426    ):
 427        super().__init__(
 428            file_location, analyzer, instrument_label, sample_name, spectra_parser
 429        )
 430        self.polarity = ""
 431        self._parameters = LCMSParameters()
 432        self._retention_time_list = []
 433        self._scans_number_list = []
 434        self._tic_list = []
 435        self.eics = {}
 436        self.mass_features = {}
 437        self.spectral_search_results = {}
 438
 439    def get_parameters_json(self):
 440        """Returns the parameters stored for the LC-MS object in JSON format.
 441
 442        Returns
 443        --------
 444        str
 445            The parameters used for the LC-MS analysis in JSON format.
 446        """
 447        return self.parameters.to_json()
 448
 449    def remove_unprocessed_data(self, ms_level=None):
 450        """Removes the unprocessed data from the LCMSBase object.
 451
 452        Parameters
 453        -----------
 454        ms_level : int, optional
 455            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
 456
 457        Raises
 458        ------
 459        ValueError
 460            If ms_level is not 1 or 2.
 461
 462        Notes
 463        -----
 464        This method is useful for freeing up memory after the data has been processed.
 465        """
 466        if ms_level is None:
 467            for ms_level in self._ms_unprocessed.keys():
 468                self._ms_unprocessed[ms_level] = None
 469        if ms_level not in [1, 2]:
 470            raise ValueError("ms_level must be 1 or 2")
 471        self._ms_unprocessed[ms_level] = None
 472
 473    def add_associated_ms2_dda(
 474        self,
 475        auto_process=True,
 476        use_parser=True,
 477        spectrum_mode=None,
 478        ms_params_key="ms2",
 479        scan_filter=None,
 480    ):
 481        """Add MS2 spectra associated with mass features to the dataset.
 482
 483        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
 484
 485        Parameters
 486        -----------
 487        auto_process : bool, optional
 488            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
 489        use_parser : bool, optional
 490            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
 491        spectrum_mode : str or None, optional
 492            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
 493            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
 494            Defaults to None. (faster if defined, otherwise will check each scan)
 495        ms_params_key : string, optional
 496            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
 497            Defaults to 'ms2'.
 498        scan_filter : str
 499            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
 500            "hcd" will pull out only HCD scans.
 501
 502        Raises
 503        ------
 504        ValueError
 505            If mass_features is not set, must run find_mass_features() first.
 506            If no MS2 scans are found in the dataset.
 507            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
 508        """
 509        # Check if mass_features is set, raise error if not
 510        if self.mass_features is None:
 511            raise ValueError(
 512                "mass_features not set, must run find_mass_features() first"
 513            )
 514
 515        # reconfigure ms_params to get the correct mass spectrum parameters from the key
 516        ms_params = self.parameters.mass_spectrum[ms_params_key]
 517
 518        mf_df = self.mass_features_to_df().copy()
 519        # Find ms2 scans that have a precursor m/z value
 520        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
 521        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
 522        # drop ms2 scans that have no tic
 523        ms2_scans = ms2_scans[ms2_scans.tic > 0]
 524        if ms2_scans is None:
 525            raise ValueError("No DDA scans found in dataset")
 526
 527        if scan_filter is not None:
 528            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
 529        # set tolerance in rt space (in minutes) and mz space (in daltons)
 530        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
 531        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
 532
 533        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
 534        dda_scans = []
 535        for i, row in mf_df.iterrows():
 536            ms2_scans_filtered = ms2_scans[
 537                ms2_scans.scan_time.between(
 538                    row.scan_time - time_tol, row.scan_time + time_tol
 539                )
 540            ]
 541            ms2_scans_filtered = ms2_scans_filtered[
 542                ms2_scans_filtered.precursor_mz.between(
 543                    row.mz - mz_tol, row.mz + mz_tol
 544                )
 545            ]
 546            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
 547            self.mass_features[i].ms2_scan_numbers = (
 548                ms2_scans_filtered.scan.tolist()
 549                + self.mass_features[i].ms2_scan_numbers
 550            )
 551        # add to _ms attribute
 552        self.add_mass_spectra(
 553            scan_list=list(set(dda_scans)),
 554            auto_process=auto_process,
 555            spectrum_mode=spectrum_mode,
 556            use_parser=use_parser,
 557            ms_params=ms_params,
 558        )
 559        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
 560        for mf_id in self.mass_features:
 561            if self.mass_features[mf_id].ms2_scan_numbers is not None:
 562                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
 563                    if dda_scan in self._ms.keys():
 564                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
 565                            dda_scan
 566                        ]
 567
 568    def add_associated_ms1(
 569        self, auto_process=True, use_parser=True, spectrum_mode=None
 570    ):
 571        """Add MS1 spectra associated with mass features to the dataset.
 572
 573        Parameters
 574        -----------
 575        auto_process : bool, optional
 576            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
 577        use_parser : bool, optional
 578            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
 579        spectrum_mode : str or None, optional
 580            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
 581            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
 582            Defaults to None. (faster if defined, otherwise will check each scan)
 583
 584        Raises
 585        ------
 586        ValueError
 587            If mass_features is not set, must run find_mass_features() first.
 588            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
 589            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
 590            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
 591        """
 592        # Check if mass_features is set, raise error if not
 593        if self.mass_features is None:
 594            raise ValueError(
 595                "mass_features not set, must run find_mass_features() first"
 596            )
 597        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
 598
 599        if scans_to_average == 1:
 600            # Add to LCMSobj
 601            self.add_mass_spectra(
 602                scan_list=[
 603                    int(mf.apex_scan) for mf in self.mass_features.values()
 604                ],
 605                auto_process=auto_process,
 606                use_parser=use_parser,
 607                spectrum_mode=spectrum_mode,
 608                ms_params=self.parameters.mass_spectrum["ms1"],
 609            )
 610
 611        elif (
 612            (scans_to_average - 1) % 2
 613        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
 614            apex_scans = list(set([int(mf.apex_scan) for mf in self.mass_features.values()]))
 615            # Check if all apex scans are profile mode, raise error if not
 616            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
 617                raise ValueError("All apex scans must be profile mode for averaging")
 618
 619            # First get sets of scans to average
 620            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
 621                ms1_idx_start = ms1_scans.index(apex_scan) - int(
 622                    (scans_to_average - 1) / 2
 623                )
 624                if ms1_idx_start < 0:
 625                    ms1_idx_start = 0
 626                ms1_idx_end = (
 627                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
 628                )
 629                if ms1_idx_end > (len(ms1_scans) - 1):
 630                    ms1_idx_end = len(ms1_scans) - 1
 631                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
 632                return scan_list
 633
 634            ms1_scans = self.ms1_scans
 635            scans_lists = [
 636                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
 637                for apex_scan in apex_scans
 638            ]
 639
 640            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
 641            if self.polarity == "negative":
 642                polarity = -1
 643            elif self.polarity == "positive":
 644                polarity = 1
 645
 646            if not use_parser:
 647                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
 648                ms1_unprocessed = self._ms_unprocessed[1].copy()
 649                # Set the index on _ms_unprocessed[1] to scan number
 650                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
 651                self._ms_unprocessed[1] = ms1_unprocessed
 652
 653                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
 654                scans_lists_flat = list(
 655                    set([scan for sublist in scans_lists for scan in sublist])
 656                )
 657                if (
 658                    len(
 659                        np.setdiff1d(
 660                            np.sort(scans_lists_flat),
 661                            np.sort(ms1_unprocessed.index.values),
 662                        )
 663                    )
 664                    > 0
 665                ):
 666                    raise ValueError(
 667                        "Not all scans to average are present in the unprocessed data"
 668                    )
 669
 670            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
 671                # Get unprocessed mass spectrum from scans
 672                ms = self.get_average_mass_spectrum(
 673                    scan_list=scan_list_average,
 674                    apex_scan=apex_scan,
 675                    spectrum_mode="profile",
 676                    ms_level=1,
 677                    auto_process=auto_process,
 678                    use_parser=use_parser,
 679                    perform_checks=False,
 680                    polarity=polarity,
 681                    ms_params=self.parameters.mass_spectrum["ms1"],
 682                )
 683                # Add mass spectrum to LCMS object and associated with mass feature
 684                self.add_mass_spectrum(ms)
 685
 686            if not use_parser:
 687                # Reset the index on _ms_unprocessed[1] to not be scan number
 688                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
 689                self._ms_unprocessed[1] = ms1_unprocessed
 690        else:
 691            raise ValueError(
 692                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
 693            )
 694
 695        # Associate the ms1 spectra with the mass features
 696        for mf_id in self.mass_features:
 697            self.mass_features[mf_id].mass_spectrum = self._ms[
 698                self.mass_features[mf_id].apex_scan
 699            ]
 700            self.mass_features[mf_id].update_mz()
 701
 702    def mass_features_to_df(self):
 703        """Returns a pandas dataframe summarizing the mass features.
 704
 705        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
 706        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
 707
 708        Returns
 709        --------
 710        pandas.DataFrame
 711            A pandas dataframe of mass features with the following columns:
 712            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
 713        """
 714
 715        def mass_spectrum_to_string(
 716            mass_spec, normalize=True, min_normalized_abun=0.01
 717        ):
 718            """Converts a mass spectrum to a string of m/z:abundance pairs.
 719
 720            Parameters
 721            -----------
 722            mass_spec : MassSpectrum
 723                A MassSpectrum object to be converted to a string.
 724            normalize : bool, optional
 725                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
 726            min_normalized_abun : float, optional
 727                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
 728
 729            Returns
 730            --------
 731            str
 732                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
 733            """
 734            mz_np = mass_spec.to_dataframe()["m/z"].values
 735            abun_np = mass_spec.to_dataframe()["Peak Height"].values
 736            if normalize:
 737                abun_np = abun_np / abun_np.max()
 738            mz_abun = np.column_stack((mz_np, abun_np))
 739            if normalize:
 740                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
 741            mz_abun_str = [
 742                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
 743                for mz, abun in mz_abun
 744            ]
 745            return "; ".join(mz_abun_str)
 746
 747        cols_in_df = [
 748            "id",
 749            "apex_scan",
 750            "start_scan",
 751            "final_scan",
 752            "retention_time",
 753            "intensity",
 754            "persistence",
 755            "area",
 756            "dispersity_index",
 757            "normalized_dispersity_index",
 758            "tailing_factor",
 759            "gaussian_similarity",
 760            "noise_score",
 761            "noise_score_min",
 762            "noise_score_max",
 763            "monoisotopic_mf_id",
 764            "isotopologue_type",
 765            "mass_spectrum_deconvoluted_parent",
 766        ]
 767        df_mf_list = []
 768        for mf_id in self.mass_features.keys():
 769            # Find cols_in_df that are in single_mf
 770            df_keys = list(
 771                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
 772            )
 773            dict_mf = {}
 774            # Get the values for each key in df_keys from the mass feature object
 775            for key in df_keys:
 776                dict_mf[key] = getattr(self.mass_features[mf_id], key)
 777            # Special handling for mass_spectrum and associated_mass_features_deconvoluted, since they are not single values
 778            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
 779                # Add MS2 spectra info
 780                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
 781                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
 782            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
 783                dict_mf["associated_mass_features"] = ", ".join(
 784                    map(
 785                        str,
 786                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
 787                    )
 788                )
 789            # Check if EIC for mass feature is set
 790            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
 791            df_mf_single["mz"] = self.mass_features[mf_id].mz
 792            df_mf_list.append(df_mf_single)
 793        df_mf = pd.concat(df_mf_list)
 794
 795        # rename _area to area and id to mf_id
 796        df_mf = df_mf.rename(
 797            columns={
 798                "id": "mf_id",
 799                "retention_time": "scan_time",            
 800            }
 801        )
 802
 803        # reorder columns
 804        col_order = [
 805            "mf_id",
 806            "scan_time",
 807            "mz",
 808            "apex_scan",
 809            "start_scan",
 810            "final_scan",
 811            "intensity",
 812            "persistence",
 813            "area",
 814            "half_height_width",
 815            "tailing_factor",
 816            "dispersity_index",
 817            "normalized_dispersity_index",
 818            "gaussian_similarity",
 819            "noise_score",
 820            "noise_score_min",
 821            "noise_score_max",
 822            "monoisotopic_mf_id",
 823            "isotopologue_type",
 824            "mass_spectrum_deconvoluted_parent",
 825            "associated_mass_features",
 826            "ms2_spectrum",
 827        ]
 828        # drop columns that are not in col_order
 829        cols_to_order = [col for col in col_order if col in df_mf.columns]
 830        df_mf = df_mf[cols_to_order]
 831
 832        # reset index to mf_id
 833        df_mf = df_mf.set_index("mf_id")
 834        df_mf.index.name = "mf_id"
 835
 836        return df_mf
 837
 838    def mass_features_ms1_annot_to_df(self):
 839        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
 840
 841        Returns
 842        --------
 843        pandas.DataFrame
 844            A pandas dataframe of MS1 annotations for the mass features in the dataset.
 845            The index is set to mf_id (mass feature ID)
 846
 847        Raises
 848        ------
 849        Warning
 850            If no MS1 annotations were found for the mass features in the dataset.
 851        """
 852        annot_df_list_ms1 = []
 853        for mf_id in self.mass_features.keys():
 854            if self.mass_features[mf_id].mass_spectrum is None:
 855                pass
 856            else:
 857                # Add ms1 annotations to ms1 annotation list
 858                if (
 859                    np.abs(
 860                        (
 861                            self.mass_features[mf_id].ms1_peak.mz_exp
 862                            - self.mass_features[mf_id].mz
 863                        )
 864                    )
 865                    < 0.01
 866                ):
 867                    # Get the molecular formula from the mass spectrum
 868                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
 869                    # Subset to pull out only the peak associated with the mass feature
 870                    annot_df = annot_df[
 871                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
 872                    ].copy()
 873
 874                    # If there are more than 1 row, remove any rows without a molecular formula
 875                    if len(annot_df) > 1:
 876                        annot_df = annot_df[~annot_df["Molecular Formula"].isna()]
 877
 878                    # Remove the index column and add column for mf_id
 879                    annot_df = annot_df.drop(columns=["Index"])
 880                    annot_df["mf_id"] = mf_id
 881                    annot_df_list_ms1.append(annot_df)
 882
 883        if len(annot_df_list_ms1) > 0:
 884            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
 885            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
 886            annot_ms1_df_full.index.name = "mf_id"
 887
 888        else:
 889            annot_ms1_df_full = None
 890            # Warn that no ms1 annotations were found
 891            warnings.warn(
 892                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
 893                UserWarning,
 894            )
 895
 896        return annot_ms1_df_full
 897
 898    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
 899        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
 900
 901        Parameters
 902        -----------
 903        molecular_metadata :  dict of MolecularMetadata objects
 904            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
 905
 906        Returns
 907        --------
 908        pandas.DataFrame
 909            A pandas dataframe of MS2 annotations for the mass features in the dataset,
 910            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
 911
 912        Raises
 913        ------
 914        Warning
 915            If no MS2 annotations were found for the mass features in the dataset.
 916        """
 917        annot_df_list_ms2 = []
 918        for mf_id in self.mass_features.keys():
 919            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
 920                # Add ms2 annotations to ms2 annotation list
 921                for result in self.mass_features[mf_id].ms2_similarity_results:
 922                    annot_df_ms2 = result.to_dataframe()
 923                    annot_df_ms2["mf_id"] = mf_id
 924                    annot_df_list_ms2.append(annot_df_ms2)
 925
 926        if len(annot_df_list_ms2) > 0:
 927            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
 928            if molecular_metadata is not None:
 929                molecular_metadata_df = pd.concat(
 930                    [
 931                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
 932                        for k, v in molecular_metadata.items()
 933                    ],
 934                    ignore_index=True,
 935                )
 936                molecular_metadata_df = molecular_metadata_df.rename(
 937                    columns={"id": "ref_mol_id"}
 938                )
 939                annot_ms2_df_full = annot_ms2_df_full.merge(
 940                    molecular_metadata_df, on="ref_mol_id", how="left"
 941                )
 942            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
 943                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
 944            ).copy()
 945            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
 946            annot_ms2_df_full.index.name = "mf_id"
 947        else:
 948            annot_ms2_df_full = None
 949            # Warn that no ms2 annotations were found
 950            warnings.warn(
 951                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
 952                UserWarning,
 953            )
 954
 955        return annot_ms2_df_full
 956
 957    def plot_composite_mz_features(self, binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False):
 958        """Returns a figure displaying 
 959            (1) thresholded, unprocessed data
 960            (2) the m/z features
 961            (3) which m/z features are associated with MS2 spectra
 962
 963        Parameters
 964        -----------
 965        binsize :  float
 966            Desired binsize for the m/z axis of the composite feature map.  Defaults to 1e-4.
 967        mf_plot : boolean
 968            Indicates whether to plot the m/z features. Defaults to True.
 969        ms2_plot : boolean
 970            Indicates whether to identify m/z features with associated MS2 spectra. Defaults to True.
 971        return_fig : boolean
 972            Indicates whether to plot composite feature map (False) or return figure object (True). Defaults to False.
 973
 974        Returns
 975        --------
 976        matplotlib.pyplot.Figure
 977            A figure with the thresholded, unprocessed data on an axis of m/z value with respect to 
 978            scan time. Unprocessed data is displayed in gray scale with darker colors indicating 
 979            higher intensities. If m/z features are plotted, they are displayed in cyan. If m/z
 980            features with associated with MS2 spectra are plotted, they are displayed in red.
 981
 982        Raises
 983        ------
 984        Warning
 985            If m/z features are set to be plot but aren't in the dataset.
 986            If m/z features with associated MS2 data are set to be plot but no MS2 annotations 
 987            were found for the m/z features in the dataset.
 988        """
 989        if mf_plot:
 990            # Check if mass_features is set, raise error if not
 991            if self.mass_features is None:
 992                raise ValueError(
 993                    "mass_features not set, must run find_mass_features() first"
 994                )
 995            ## call mass feature data
 996            mf_df = self.mass_features_to_df()
 997
 998        if ms2_plot:
 999            if not mf_plot:
1000                # Check if mass_features is set, raise error if not
1001                if self.mass_features is None:
1002                    raise ValueError(
1003                        "mass_features not set, must run find_mass_features() first"
1004                    )
1005
1006            ## call m/z feature data
1007            mf_df = self.mass_features_to_df()
1008
1009            # Check if ms2_spectrum is set, raise error if not
1010            if 'ms2_spectrum' not in mf_df.columns:
1011                raise ValueError(                
1012                    "ms2_spectrum not set, must run add_associated_ms2_dda() first"            
1013                )
1014    
1015        ## threshold and grid unprocessed data
1016        df = self._ms_unprocessed[1].copy()
1017        df = df.dropna(subset=['intensity']).reset_index(drop = True)
1018        threshold = ph_int_min_thresh * df.intensity.max()
1019        df_thres = df[df["intensity"] > threshold].reset_index(drop = True).copy()
1020        df = self.grid_data(df_thres)
1021    
1022        ## format unprocessed data for plotting
1023        df = df.merge(self.scan_df[['scan', 'scan_time']], on = 'scan')
1024        mz_grid = np.arange(0, np.max(df.mz), binsize)
1025        mz_data = np.array(df.mz)
1026        df['mz_bin'] = find_closest(mz_grid, mz_data)
1027        df['ab_bin'] = df.groupby(['mz_bin', 'scan_time']).intensity.transform(sum)
1028        unproc_df = df[['scan_time', 'mz_bin', 'ab_bin']].drop_duplicates(ignore_index = True)
1029
1030        ## generate figure
1031        fig = plt.figure()
1032        plt.scatter(
1033            unproc_df.scan_time,
1034            unproc_df.mz_bin*binsize,
1035            c = unproc_df.ab_bin/np.max(unproc_df.ab_bin),
1036            alpha = unproc_df.ab_bin/np.max(unproc_df.ab_bin), 
1037            cmap = 'Greys_r',
1038            s = 1
1039        )
1040
1041        if mf_plot:
1042            if ms2_plot:
1043                plt.scatter(
1044                    mf_df[mf_df.ms2_spectrum.isna()].scan_time,
1045                    mf_df[mf_df.ms2_spectrum.isna()].mz,
1046                    c = 'c',
1047                    s = 4,
1048                    label = 'M/Z features without MS2'
1049                )
1050            else:
1051                plt.scatter(
1052                    mf_df.scan_time,
1053                    mf_df.mz,
1054                    c = 'c',
1055                    s = 4,
1056                    label = 'M/Z features'
1057                )
1058
1059        if ms2_plot: 
1060            plt.scatter(
1061                mf_df[~mf_df.ms2_spectrum.isna()].scan_time,
1062                mf_df[~mf_df.ms2_spectrum.isna()].mz,
1063                c = 'r',
1064                s = 2,
1065                label = 'M/Z features with MS2'
1066            )
1067
1068        if mf_plot == True or ms2_plot == True:
1069            plt.legend(loc = 'lower center', bbox_to_anchor = (0.5, -0.25), ncol = 2)
1070        plt.xlabel('Scan time')
1071        plt.ylabel('m/z')
1072        plt.ylim(0, np.ceil(np.max(df.mz)))
1073        plt.xlim(0, np.ceil(np.max(df.scan_time)))
1074        plt.title('Composite Feature Map')
1075
1076        if return_fig:
1077            plt.close(fig)
1078            return fig
1079
1080        else:
1081            plt.show()
1082
1083    def __len__(self):
1084        """
1085        Returns the number of mass spectra in the dataset.
1086
1087        Returns
1088        --------
1089        int
1090            The number of mass spectra in the dataset.
1091        """
1092        return len(self._ms)
1093
1094    def __getitem__(self, scan_number):
1095        """
1096        Returns the mass spectrum corresponding to the specified scan number.
1097
1098        Parameters
1099        -----------
1100        scan_number : int
1101            The scan number of the desired mass spectrum.
1102
1103        Returns
1104        --------
1105        MassSpectrum
1106            The mass spectrum corresponding to the specified scan number.
1107        """
1108        return self._ms.get(scan_number)
1109
1110    def __iter__(self):
1111        """Returns an iterator over the mass spectra in the dataset.
1112
1113        Returns
1114        --------
1115        iterator
1116            An iterator over the mass spectra in the dataset.
1117        """
1118        return iter(self._ms.values())
1119
1120    def set_tic_list_from_data(self, overwrite=False):
1121        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
1122
1123        Parameters
1124        -----------
1125        overwrite : bool, optional
1126            If True, overwrites the TIC list if it is already set. Defaults to False.
1127
1128        Notes
1129        -----
1130        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
1131
1132        Raises
1133        ------
1134        ValueError
1135            If no mass spectra are found in the dataset.
1136            If the TIC list is already set and overwrite is False.
1137        """
1138        # Check if _ms is empty and raise error if so
1139        if len(self._ms) == 0:
1140            raise ValueError("No mass spectra found in dataset")
1141
1142        # Check if tic_list is already set and raise error if so
1143        if len(self.tic) > 0 and not overwrite:
1144            raise ValueError("TIC list already set, use overwrite=True to overwrite")
1145
1146        self.tic = [self._ms.get(i).tic for i in self.scans_number]
1147
1148    def set_retention_time_from_data(self, overwrite=False):
1149        """Sets the retention time list from the data in the _ms dictionary.
1150
1151        Parameters
1152        -----------
1153        overwrite : bool, optional
1154            If True, overwrites the retention time list if it is already set. Defaults to False.
1155
1156        Notes
1157        -----
1158        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
1159
1160        Raises
1161        ------
1162        ValueError
1163            If no mass spectra are found in the dataset.
1164            If the retention time list is already set and overwrite is False.
1165        """
1166        # Check if _ms is empty and raise error if so
1167        if len(self._ms) == 0:
1168            raise ValueError("No mass spectra found in dataset")
1169
1170        # Check if retention_time_list is already set and raise error if so
1171        if len(self.retention_time) > 0 and not overwrite:
1172            raise ValueError(
1173                "Retention time list already set, use overwrite=True to overwrite"
1174            )
1175
1176        retention_time_list = []
1177        for key_ms in sorted(self._ms.keys()):
1178            retention_time_list.append(self._ms.get(key_ms).retention_time)
1179        self.retention_time = retention_time_list
1180
1181    def set_scans_number_from_data(self, overwrite=False):
1182        """Sets the scan number list from the data in the _ms dictionary.
1183
1184        Notes
1185        -----
1186        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
1187
1188        Raises
1189        ------
1190        ValueError
1191            If no mass spectra are found in the dataset.
1192            If the scan number list is already set and overwrite is False.
1193        """
1194        # Check if _ms is empty and raise error if so
1195        if len(self._ms) == 0:
1196            raise ValueError("No mass spectra found in dataset")
1197
1198        # Check if scans_number_list is already set and raise error if so
1199        if len(self.scans_number) > 0 and not overwrite:
1200            raise ValueError(
1201                "Scan number list already set, use overwrite=True to overwrite"
1202            )
1203
1204        self.scans_number = sorted(self._ms.keys())
1205
1206    @property
1207    def ms1_scans(self):
1208        """
1209        list : A list of MS1 scan numbers for the dataset.
1210        """
1211        return self.scan_df[self.scan_df.ms_level == 1].index.tolist()
1212
1213    @property
1214    def parameters(self):
1215        """
1216        LCMSParameters : The parameters used for the LC-MS analysis.
1217        """
1218        return self._parameters
1219
1220    @parameters.setter
1221    def parameters(self, paramsinstance):
1222        """
1223        Sets the parameters used for the LC-MS analysis.
1224
1225        Parameters
1226        -----------
1227        paramsinstance : LCMSParameters
1228            The parameters used for the LC-MS analysis.
1229        """
1230        self._parameters = paramsinstance
1231
1232    @property
1233    def scans_number(self):
1234        """
1235        list : A list of scan numbers for the dataset.
1236        """
1237        return self._scans_number_list
1238
1239    @scans_number.setter
1240    def scans_number(self, scan_numbers_list):
1241        """
1242        Sets the scan numbers for the dataset.
1243
1244        Parameters
1245        -----------
1246        scan_numbers_list : list
1247            A list of scan numbers for the dataset.
1248        """
1249        self._scans_number_list = scan_numbers_list
1250
1251    @property
1252    def retention_time(self):
1253        """
1254        numpy.ndarray : An array of retention times for the dataset.
1255        """
1256        return self._retention_time_list
1257
1258    @retention_time.setter
1259    def retention_time(self, rt_list):
1260        """
1261        Sets the retention times for the dataset.
1262
1263        Parameters
1264        -----------
1265        rt_list : list
1266            A list of retention times for the dataset.
1267        """
1268        self._retention_time_list = np.array(rt_list)
1269
1270    @property
1271    def tic(self):
1272        """
1273        numpy.ndarray : An array of TIC values for the dataset.
1274        """
1275        return self._tic_list
1276
1277    @tic.setter
1278    def tic(self, tic_list):
1279        """
1280        Sets the TIC values for the dataset.
1281
1282        Parameters
1283        -----------
1284        tic_list : list
1285            A list of TIC values for the dataset.
1286        """
1287        self._tic_list = np.array(tic_list)
class MassSpectraBase:
 16class MassSpectraBase:
 17    """Base class for mass spectra objects.
 18
 19    Parameters
 20    -----------
 21    file_location : str or Path
 22        The location of the file containing the mass spectra data.
 23    analyzer : str, optional
 24        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
 25    instrument_label : str, optional
 26        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
 27    sample_name : str, optional
 28        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
 29    spectra_parser : object, optional
 30        The spectra parser object used to create the mass spectra object. Defaults to None.
 31
 32    Attributes
 33    -----------
 34    spectra_parser_class : class
 35        The class of the spectra parser used to create the mass spectra object.
 36    file_location : str or Path
 37        The location of the file containing the mass spectra data.
 38    sample_name : str
 39        The name of the sample; defaults to the file name if not provided to the parser.
 40    analyzer : str
 41        The type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
 42    instrument_label : str
 43        The type of instrument used to generate the mass spectra data. Derived from the spectra parser.
 44    _scan_info : dict
 45        A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,
 46        scan text, and scan window (lower and upper).
 47        Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
 48    _ms : dict
 49        A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
 50    _ms_unprocessed: dictionary of pandas.DataFrames or None
 51        A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.
 52        Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
 53
 54    Methods
 55    --------
 56    * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).
 57        Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
 58    * get_time_of_scan_id(scan).
 59        Returns the scan time for the specified scan number.
 60    """
 61
 62    def __init__(
 63        self,
 64        file_location,
 65        analyzer="Unknown",
 66        instrument_label="Unknown",
 67        sample_name=None,
 68        spectra_parser=None,
 69    ):
 70        if isinstance(file_location, str):
 71            file_location = Path(file_location)
 72        else:
 73            file_location = file_location
 74        if not file_location.exists():
 75            raise FileExistsError("File does not exist: " + str(file_location))
 76
 77        if sample_name:
 78            self.sample_name = sample_name
 79        else:
 80            self.sample_name = file_location.stem
 81
 82        self.file_location = file_location
 83        self.analyzer = analyzer
 84        self.instrument_label = instrument_label
 85
 86        # Add the spectra parser class to the object if it is not None
 87        if spectra_parser is not None:
 88            self.spectra_parser_class = spectra_parser.__class__
 89            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
 90            if (
 91                self.sample_name is not None
 92                and self.sample_name != self.spectra_parser.sample_name
 93            ):
 94                warnings.warn(
 95                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
 96                    UserWarning,
 97                )
 98            if self.analyzer != self.spectra_parser.analyzer:
 99                warnings.warn(
100                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
101                    UserWarning,
102                )
103            if self.instrument_label != self.spectra_parser.instrument_label:
104                warnings.warn(
105                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
106                    UserWarning,
107                )
108            if self.file_location != self.spectra_parser.file_location:
109                warnings.warn(
110                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
111                    UserWarning,
112                )
113
114        # Instantiate empty dictionaries for scan information and mass spectra
115        self._scan_info = {}
116        self._ms = {}
117        self._ms_unprocessed = {}
118
119    @property
120    def spectra_parser(self):
121        """Returns an instance of the spectra parser class."""
122        return self.spectra_parser_class(self.file_location)
123
124    def add_mass_spectrum(self, mass_spec):
125        """Adds a mass spectrum to the dataset.
126
127        Parameters
128        -----------
129        mass_spec : MassSpectrum
130            The corems MassSpectrum object to be added to the dataset.
131
132        Notes
133        -----
134        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
135        """
136        # check if mass_spec has a scan_number attribute
137        if not hasattr(mass_spec, "scan_number"):
138            raise ValueError(
139                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
140            )
141        self._ms[mass_spec.scan_number] = mass_spec
142
143    def add_mass_spectra(
144        self,
145        scan_list,
146        spectrum_mode=None,
147        ms_level=1,
148        use_parser=True,
149        auto_process=True,
150        ms_params=None,
151    ):
152        """Add mass spectra to _ms dictionary, from a list of scans or single scan
153
154        Notes
155        -----
156        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
157
158
159        Parameters
160        -----------
161        scan_list : list of ints
162            List of scans to use to populate _ms slot
163        spectrum_mode : str or None
164            The spectrum mode to use for the mass spectra.
165            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
166            Defaults to None.
167        ms_level : int, optional
168            The MS level to use for the mass spectra.
169            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
170            Defaults to 1.
171        using_parser : bool
172            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
173        auto_process : bool
174            Whether to auto-process the mass spectra.  Defaults to True.
175        ms_params : MSParameters or None
176            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
177
178        Raises
179        ------
180        TypeError
181            If scan_list is not a list of ints
182        ValueError
183            If polarity is not 'positive' or 'negative'
184            If ms_level is not 1 or 2
185        """
186
187        # check if scan_list is a list or a single int; if single int, convert to list
188        if isinstance(scan_list, int):
189            scan_list = [scan_list]
190        if not isinstance(scan_list, list):
191            raise TypeError("scan_list must be a list of integers")
192        for scan in scan_list:
193            if not isinstance(scan, int):
194                raise TypeError("scan_list must be a list of integers")
195
196        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
197        if self.polarity == "negative":
198            polarity = -1
199        elif self.polarity == "positive":
200            polarity = 1
201        else:
202            raise ValueError(
203                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
204            )
205
206        # is not using_parser, check that ms1 and ms2 are not None
207        if not use_parser:
208            if ms_level not in self._ms_unprocessed.keys():
209                raise ValueError(
210                    "ms_level {} not found in _ms_unprocessed dictionary".format(
211                        ms_level
212                    )
213                )
214
215        scan_list = list(set(scan_list))
216        scan_list.sort()
217        if not use_parser:
218            if self._ms_unprocessed[ms_level] is None:
219                raise ValueError(
220                    "No unprocessed data found for ms_level {}".format(ms_level)
221                )
222            if (
223                len(
224                    np.setdiff1d(
225                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
226                    )
227                )
228                > 0
229            ):
230                raise ValueError(
231                    "Not all scans in scan_list are present in the unprocessed data"
232                )
233            # Prepare the ms_df for parsing
234            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
235
236        if use_parser:
237            # Use batch function to get all mass spectra at once
238            if spectrum_mode is None:
239                # get spectrum mode from _scan_info for each scan
240                spectrum_modes = [self.scan_df.loc[scan, "ms_format"] for scan in scan_list]
241                spectrum_mode_batch = spectrum_modes[0] if len(set(spectrum_modes)) == 1 else None
242            else:
243                spectrum_mode_batch = spectrum_mode
244            
245            ms_list = self.spectra_parser.get_mass_spectra_from_scan_list(
246                scan_list=scan_list,
247                spectrum_mode=spectrum_mode_batch,
248                auto_process=False,
249            )
250            
251            # Process each mass spectrum
252            for i, scan in enumerate(scan_list):
253                ms = ms_list[i] if i < len(ms_list) else None
254                if ms is not None:
255                    if ms_params is not None:
256                        ms.parameters = ms_params
257                    ms.scan_number = scan
258                    if auto_process:
259                        ms.process_mass_spec()
260                    self.add_mass_spectrum(ms)
261        else:
262            # Original non-parser logic remains unchanged
263            for scan in scan_list:
264                ms = None
265                if spectrum_mode is None:
266                    # get spectrum mode from _scan_info
267                    spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
268                else:
269                    spectrum_mode_scan = spectrum_mode
270                
271                my_ms_df = ms_df.loc[scan]
272                if spectrum_mode_scan == "profile":
273                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
274                    ms = ms_from_array_profile(
275                        my_ms_df.mz,
276                        my_ms_df.intensity,
277                        self.file_location,
278                        polarity=polarity,
279                        auto_process=False,
280                    )
281                else:
282                   ms = ms_from_array_centroid(
283                        mz = my_ms_df.mz,
284                        abundance = my_ms_df.intensity,
285                        rp = [np.nan] * len(my_ms_df.mz),
286                        s2n = [np.nan] * len(my_ms_df.mz),
287                        dataname = self.file_location,
288                        polarity=polarity,
289                        auto_process=False,
290                    )
291
292                # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
293                if ms is not None:
294                    if ms_params is not None:
295                        ms.parameters = ms_params
296                    ms.scan_number = scan
297                    if auto_process:
298                        ms.process_mass_spec()
299                    self.add_mass_spectrum(ms)
300
301    def get_time_of_scan_id(self, scan):
302        """Returns the scan time for the specified scan number.
303
304        Parameters
305        -----------
306        scan : int
307            The scan number of the desired scan time.
308
309        Returns
310        --------
311        float
312            The scan time for the specified scan number (in minutes).
313
314        Raises
315        ------
316        ValueError
317            If no scan time is found for the specified scan number.
318        """
319        # Check if _retenion_time_list is empty and raise error if so
320        if len(self._retention_time_list) == 0:
321            raise ValueError("No retention times found in dataset")
322        rt = self._retention_time_list[self._scans_number_list.index(scan)]
323        return rt
324
325    @property
326    def scan_df(self):
327        """
328        pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).
329        """
330        scan_df = pd.DataFrame.from_dict(self._scan_info)
331        return scan_df
332        
333    @property
334    def ms(self):
335        """
336        dictionary : contains the key associated with mass spectra and values are the associated MassSpecProfiles
337        """
338        return self._ms
339
340    
341    @scan_df.setter
342    def scan_df(self, df):
343        """
344        Sets the scan data for the dataset.
345
346        Parameters
347        -----------
348        df : pandas.DataFrame
349            A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level,
350            precursor m/z, scan text, and scan window (lower and upper).
351        """
352        self._scan_info = df.to_dict()
353
354    def __getitem__(self, scan_number):
355        return self._ms.get(scan_number)

Base class for mass spectra objects.

Parameters
  • file_location (str or Path): The location of the file containing the mass spectra data.
  • analyzer (str, optional): The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
  • instrument_label (str, optional): The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
  • sample_name (str, optional): The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
  • spectra_parser (object, optional): The spectra parser object used to create the mass spectra object. Defaults to None.
Attributes
  • spectra_parser_class (class): The class of the spectra parser used to create the mass spectra object.
  • file_location (str or Path): The location of the file containing the mass spectra data.
  • sample_name (str): The name of the sample; defaults to the file name if not provided to the parser.
  • analyzer (str): The type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
  • instrument_label (str): The type of instrument used to generate the mass spectra data. Derived from the spectra parser.
  • _scan_info (dict): A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper). Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
  • _ms (dict): A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
  • _ms_unprocessed (dictionary of pandas.DataFrames or None): A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking. Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
Methods
  • add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True). Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
  • get_time_of_scan_id(scan). Returns the scan time for the specified scan number.
MassSpectraBase( file_location, analyzer='Unknown', instrument_label='Unknown', sample_name=None, spectra_parser=None)
 62    def __init__(
 63        self,
 64        file_location,
 65        analyzer="Unknown",
 66        instrument_label="Unknown",
 67        sample_name=None,
 68        spectra_parser=None,
 69    ):
 70        if isinstance(file_location, str):
 71            file_location = Path(file_location)
 72        else:
 73            file_location = file_location
 74        if not file_location.exists():
 75            raise FileExistsError("File does not exist: " + str(file_location))
 76
 77        if sample_name:
 78            self.sample_name = sample_name
 79        else:
 80            self.sample_name = file_location.stem
 81
 82        self.file_location = file_location
 83        self.analyzer = analyzer
 84        self.instrument_label = instrument_label
 85
 86        # Add the spectra parser class to the object if it is not None
 87        if spectra_parser is not None:
 88            self.spectra_parser_class = spectra_parser.__class__
 89            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
 90            if (
 91                self.sample_name is not None
 92                and self.sample_name != self.spectra_parser.sample_name
 93            ):
 94                warnings.warn(
 95                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
 96                    UserWarning,
 97                )
 98            if self.analyzer != self.spectra_parser.analyzer:
 99                warnings.warn(
100                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
101                    UserWarning,
102                )
103            if self.instrument_label != self.spectra_parser.instrument_label:
104                warnings.warn(
105                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
106                    UserWarning,
107                )
108            if self.file_location != self.spectra_parser.file_location:
109                warnings.warn(
110                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
111                    UserWarning,
112                )
113
114        # Instantiate empty dictionaries for scan information and mass spectra
115        self._scan_info = {}
116        self._ms = {}
117        self._ms_unprocessed = {}
file_location
analyzer
instrument_label
spectra_parser

Returns an instance of the spectra parser class.

def add_mass_spectrum(self, mass_spec):
124    def add_mass_spectrum(self, mass_spec):
125        """Adds a mass spectrum to the dataset.
126
127        Parameters
128        -----------
129        mass_spec : MassSpectrum
130            The corems MassSpectrum object to be added to the dataset.
131
132        Notes
133        -----
134        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
135        """
136        # check if mass_spec has a scan_number attribute
137        if not hasattr(mass_spec, "scan_number"):
138            raise ValueError(
139                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
140            )
141        self._ms[mass_spec.scan_number] = mass_spec

Adds a mass spectrum to the dataset.

Parameters
  • mass_spec (MassSpectrum): The corems MassSpectrum object to be added to the dataset.
Notes

This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.

def add_mass_spectra( self, scan_list, spectrum_mode=None, ms_level=1, use_parser=True, auto_process=True, ms_params=None):
143    def add_mass_spectra(
144        self,
145        scan_list,
146        spectrum_mode=None,
147        ms_level=1,
148        use_parser=True,
149        auto_process=True,
150        ms_params=None,
151    ):
152        """Add mass spectra to _ms dictionary, from a list of scans or single scan
153
154        Notes
155        -----
156        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
157
158
159        Parameters
160        -----------
161        scan_list : list of ints
162            List of scans to use to populate _ms slot
163        spectrum_mode : str or None
164            The spectrum mode to use for the mass spectra.
165            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
166            Defaults to None.
167        ms_level : int, optional
168            The MS level to use for the mass spectra.
169            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
170            Defaults to 1.
171        using_parser : bool
172            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
173        auto_process : bool
174            Whether to auto-process the mass spectra.  Defaults to True.
175        ms_params : MSParameters or None
176            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
177
178        Raises
179        ------
180        TypeError
181            If scan_list is not a list of ints
182        ValueError
183            If polarity is not 'positive' or 'negative'
184            If ms_level is not 1 or 2
185        """
186
187        # check if scan_list is a list or a single int; if single int, convert to list
188        if isinstance(scan_list, int):
189            scan_list = [scan_list]
190        if not isinstance(scan_list, list):
191            raise TypeError("scan_list must be a list of integers")
192        for scan in scan_list:
193            if not isinstance(scan, int):
194                raise TypeError("scan_list must be a list of integers")
195
196        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
197        if self.polarity == "negative":
198            polarity = -1
199        elif self.polarity == "positive":
200            polarity = 1
201        else:
202            raise ValueError(
203                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
204            )
205
206        # is not using_parser, check that ms1 and ms2 are not None
207        if not use_parser:
208            if ms_level not in self._ms_unprocessed.keys():
209                raise ValueError(
210                    "ms_level {} not found in _ms_unprocessed dictionary".format(
211                        ms_level
212                    )
213                )
214
215        scan_list = list(set(scan_list))
216        scan_list.sort()
217        if not use_parser:
218            if self._ms_unprocessed[ms_level] is None:
219                raise ValueError(
220                    "No unprocessed data found for ms_level {}".format(ms_level)
221                )
222            if (
223                len(
224                    np.setdiff1d(
225                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
226                    )
227                )
228                > 0
229            ):
230                raise ValueError(
231                    "Not all scans in scan_list are present in the unprocessed data"
232                )
233            # Prepare the ms_df for parsing
234            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
235
236        if use_parser:
237            # Use batch function to get all mass spectra at once
238            if spectrum_mode is None:
239                # get spectrum mode from _scan_info for each scan
240                spectrum_modes = [self.scan_df.loc[scan, "ms_format"] for scan in scan_list]
241                spectrum_mode_batch = spectrum_modes[0] if len(set(spectrum_modes)) == 1 else None
242            else:
243                spectrum_mode_batch = spectrum_mode
244            
245            ms_list = self.spectra_parser.get_mass_spectra_from_scan_list(
246                scan_list=scan_list,
247                spectrum_mode=spectrum_mode_batch,
248                auto_process=False,
249            )
250            
251            # Process each mass spectrum
252            for i, scan in enumerate(scan_list):
253                ms = ms_list[i] if i < len(ms_list) else None
254                if ms is not None:
255                    if ms_params is not None:
256                        ms.parameters = ms_params
257                    ms.scan_number = scan
258                    if auto_process:
259                        ms.process_mass_spec()
260                    self.add_mass_spectrum(ms)
261        else:
262            # Original non-parser logic remains unchanged
263            for scan in scan_list:
264                ms = None
265                if spectrum_mode is None:
266                    # get spectrum mode from _scan_info
267                    spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
268                else:
269                    spectrum_mode_scan = spectrum_mode
270                
271                my_ms_df = ms_df.loc[scan]
272                if spectrum_mode_scan == "profile":
273                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
274                    ms = ms_from_array_profile(
275                        my_ms_df.mz,
276                        my_ms_df.intensity,
277                        self.file_location,
278                        polarity=polarity,
279                        auto_process=False,
280                    )
281                else:
282                   ms = ms_from_array_centroid(
283                        mz = my_ms_df.mz,
284                        abundance = my_ms_df.intensity,
285                        rp = [np.nan] * len(my_ms_df.mz),
286                        s2n = [np.nan] * len(my_ms_df.mz),
287                        dataname = self.file_location,
288                        polarity=polarity,
289                        auto_process=False,
290                    )
291
292                # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
293                if ms is not None:
294                    if ms_params is not None:
295                        ms.parameters = ms_params
296                    ms.scan_number = scan
297                    if auto_process:
298                        ms.process_mass_spec()
299                    self.add_mass_spectrum(ms)

Add mass spectra to _ms dictionary, from a list of scans or single scan

Notes

The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.

Parameters
  • scan_list (list of ints): List of scans to use to populate _ms slot
  • spectrum_mode (str or None): The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types). Defaults to None.
  • ms_level (int, optional): The MS level to use for the mass spectra. This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects. Defaults to 1.
  • using_parser (bool): Whether to use the mass spectra parser to get the mass spectra. Defaults to True.
  • auto_process (bool): Whether to auto-process the mass spectra. Defaults to True.
  • ms_params (MSParameters or None): The mass spectrum parameters to use for the mass spectra. If None, uses the globally set MSParameters.
Raises
  • TypeError: If scan_list is not a list of ints
  • ValueError: If polarity is not 'positive' or 'negative' If ms_level is not 1 or 2
def get_time_of_scan_id(self, scan):
301    def get_time_of_scan_id(self, scan):
302        """Returns the scan time for the specified scan number.
303
304        Parameters
305        -----------
306        scan : int
307            The scan number of the desired scan time.
308
309        Returns
310        --------
311        float
312            The scan time for the specified scan number (in minutes).
313
314        Raises
315        ------
316        ValueError
317            If no scan time is found for the specified scan number.
318        """
319        # Check if _retenion_time_list is empty and raise error if so
320        if len(self._retention_time_list) == 0:
321            raise ValueError("No retention times found in dataset")
322        rt = self._retention_time_list[self._scans_number_list.index(scan)]
323        return rt

Returns the scan time for the specified scan number.

Parameters
  • scan (int): The scan number of the desired scan time.
Returns
  • float: The scan time for the specified scan number (in minutes).
Raises
  • ValueError: If no scan time is found for the specified scan number.
scan_df

pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).

ms

dictionary : contains the key associated with mass spectra and values are the associated MassSpecProfiles

 358class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch):
 359    """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.
 360
 361    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.
 362
 363    Parameters
 364    -----------
 365    file_location : str or Path
 366        The location of the file containing the mass spectra data.
 367    analyzer : str, optional
 368        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
 369    instrument_label : str, optional
 370        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
 371    sample_name : str, optional
 372        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
 373    spectra_parser : object, optional
 374        The spectra parser object used to create the mass spectra object. Defaults to None.
 375
 376    Attributes
 377    -----------
 378    polarity : str
 379        The polarity of the ionization mode used for the dataset.
 380    _parameters : LCMSParameters
 381        The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
 382    _retention_time_list : numpy.ndarray
 383        An array of retention times for the dataset.
 384    _scans_number_list : list
 385        A list of scan numbers for the dataset.
 386    _tic_list : numpy.ndarray
 387        An array of total ion current (TIC) values for the dataset.
 388    eics : dict
 389        A dictionary containing extracted ion chromatograms (EICs) for the dataset.
 390        Key is the mz of the EIC. Initialized as an empty dictionary.
 391    mass_features : dictionary of LCMSMassFeature objects
 392        A dictionary containing mass features for the dataset.
 393        Key is mass feature ID. Initialized as an empty dictionary.
 394    spectral_search_results : dictionary of MS2SearchResults objects
 395        A dictionary containing spectral search results for the dataset.
 396        Key is scan number : precursor mz. Initialized as an empty dictionary.
 397
 398    Methods
 399    --------
 400    * get_parameters_json().
 401        Returns the parameters used for the LC-MS analysis in JSON format.
 402    * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)
 403        Adds which MS2 scans are associated with each mass feature to the
 404        mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
 405    * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)
 406        Adds the MS1 spectra associated with each mass feature to the
 407        mass_features dictionary and adds the MS1 spectra to the _ms dictionary.
 408    * mass_features_to_df()
 409        Returns a pandas dataframe summarizing the mass features in the dataset.
 410    * set_tic_list_from_data(overwrite=False)
 411        Sets the TIC list from the mass spectrum objects within the _ms dictionary.
 412    * set_retention_time_from_data(overwrite=False)
 413        Sets the retention time list from the data in the _ms dictionary.
 414    * set_scans_number_from_data(overwrite=False)
 415        Sets the scan number list from the data in the _ms dictionary.
 416    * plot_composite_mz_features(binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False)
 417        Generates plot of M/Z features comparing scan time vs M/Z value
 418    """
 419
 420    def __init__(
 421        self,
 422        file_location,
 423        analyzer="Unknown",
 424        instrument_label="Unknown",
 425        sample_name=None,
 426        spectra_parser=None,
 427    ):
 428        super().__init__(
 429            file_location, analyzer, instrument_label, sample_name, spectra_parser
 430        )
 431        self.polarity = ""
 432        self._parameters = LCMSParameters()
 433        self._retention_time_list = []
 434        self._scans_number_list = []
 435        self._tic_list = []
 436        self.eics = {}
 437        self.mass_features = {}
 438        self.spectral_search_results = {}
 439
 440    def get_parameters_json(self):
 441        """Returns the parameters stored for the LC-MS object in JSON format.
 442
 443        Returns
 444        --------
 445        str
 446            The parameters used for the LC-MS analysis in JSON format.
 447        """
 448        return self.parameters.to_json()
 449
 450    def remove_unprocessed_data(self, ms_level=None):
 451        """Removes the unprocessed data from the LCMSBase object.
 452
 453        Parameters
 454        -----------
 455        ms_level : int, optional
 456            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
 457
 458        Raises
 459        ------
 460        ValueError
 461            If ms_level is not 1 or 2.
 462
 463        Notes
 464        -----
 465        This method is useful for freeing up memory after the data has been processed.
 466        """
 467        if ms_level is None:
 468            for ms_level in self._ms_unprocessed.keys():
 469                self._ms_unprocessed[ms_level] = None
 470        if ms_level not in [1, 2]:
 471            raise ValueError("ms_level must be 1 or 2")
 472        self._ms_unprocessed[ms_level] = None
 473
 474    def add_associated_ms2_dda(
 475        self,
 476        auto_process=True,
 477        use_parser=True,
 478        spectrum_mode=None,
 479        ms_params_key="ms2",
 480        scan_filter=None,
 481    ):
 482        """Add MS2 spectra associated with mass features to the dataset.
 483
 484        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
 485
 486        Parameters
 487        -----------
 488        auto_process : bool, optional
 489            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
 490        use_parser : bool, optional
 491            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
 492        spectrum_mode : str or None, optional
 493            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
 494            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
 495            Defaults to None. (faster if defined, otherwise will check each scan)
 496        ms_params_key : string, optional
 497            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
 498            Defaults to 'ms2'.
 499        scan_filter : str
 500            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
 501            "hcd" will pull out only HCD scans.
 502
 503        Raises
 504        ------
 505        ValueError
 506            If mass_features is not set, must run find_mass_features() first.
 507            If no MS2 scans are found in the dataset.
 508            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
 509        """
 510        # Check if mass_features is set, raise error if not
 511        if self.mass_features is None:
 512            raise ValueError(
 513                "mass_features not set, must run find_mass_features() first"
 514            )
 515
 516        # reconfigure ms_params to get the correct mass spectrum parameters from the key
 517        ms_params = self.parameters.mass_spectrum[ms_params_key]
 518
 519        mf_df = self.mass_features_to_df().copy()
 520        # Find ms2 scans that have a precursor m/z value
 521        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
 522        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
 523        # drop ms2 scans that have no tic
 524        ms2_scans = ms2_scans[ms2_scans.tic > 0]
 525        if ms2_scans is None:
 526            raise ValueError("No DDA scans found in dataset")
 527
 528        if scan_filter is not None:
 529            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
 530        # set tolerance in rt space (in minutes) and mz space (in daltons)
 531        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
 532        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
 533
 534        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
 535        dda_scans = []
 536        for i, row in mf_df.iterrows():
 537            ms2_scans_filtered = ms2_scans[
 538                ms2_scans.scan_time.between(
 539                    row.scan_time - time_tol, row.scan_time + time_tol
 540                )
 541            ]
 542            ms2_scans_filtered = ms2_scans_filtered[
 543                ms2_scans_filtered.precursor_mz.between(
 544                    row.mz - mz_tol, row.mz + mz_tol
 545                )
 546            ]
 547            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
 548            self.mass_features[i].ms2_scan_numbers = (
 549                ms2_scans_filtered.scan.tolist()
 550                + self.mass_features[i].ms2_scan_numbers
 551            )
 552        # add to _ms attribute
 553        self.add_mass_spectra(
 554            scan_list=list(set(dda_scans)),
 555            auto_process=auto_process,
 556            spectrum_mode=spectrum_mode,
 557            use_parser=use_parser,
 558            ms_params=ms_params,
 559        )
 560        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
 561        for mf_id in self.mass_features:
 562            if self.mass_features[mf_id].ms2_scan_numbers is not None:
 563                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
 564                    if dda_scan in self._ms.keys():
 565                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
 566                            dda_scan
 567                        ]
 568
 569    def add_associated_ms1(
 570        self, auto_process=True, use_parser=True, spectrum_mode=None
 571    ):
 572        """Add MS1 spectra associated with mass features to the dataset.
 573
 574        Parameters
 575        -----------
 576        auto_process : bool, optional
 577            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
 578        use_parser : bool, optional
 579            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
 580        spectrum_mode : str or None, optional
 581            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
 582            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
 583            Defaults to None. (faster if defined, otherwise will check each scan)
 584
 585        Raises
 586        ------
 587        ValueError
 588            If mass_features is not set, must run find_mass_features() first.
 589            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
 590            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
 591            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
 592        """
 593        # Check if mass_features is set, raise error if not
 594        if self.mass_features is None:
 595            raise ValueError(
 596                "mass_features not set, must run find_mass_features() first"
 597            )
 598        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
 599
 600        if scans_to_average == 1:
 601            # Add to LCMSobj
 602            self.add_mass_spectra(
 603                scan_list=[
 604                    int(mf.apex_scan) for mf in self.mass_features.values()
 605                ],
 606                auto_process=auto_process,
 607                use_parser=use_parser,
 608                spectrum_mode=spectrum_mode,
 609                ms_params=self.parameters.mass_spectrum["ms1"],
 610            )
 611
 612        elif (
 613            (scans_to_average - 1) % 2
 614        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
 615            apex_scans = list(set([int(mf.apex_scan) for mf in self.mass_features.values()]))
 616            # Check if all apex scans are profile mode, raise error if not
 617            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
 618                raise ValueError("All apex scans must be profile mode for averaging")
 619
 620            # First get sets of scans to average
 621            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
 622                ms1_idx_start = ms1_scans.index(apex_scan) - int(
 623                    (scans_to_average - 1) / 2
 624                )
 625                if ms1_idx_start < 0:
 626                    ms1_idx_start = 0
 627                ms1_idx_end = (
 628                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
 629                )
 630                if ms1_idx_end > (len(ms1_scans) - 1):
 631                    ms1_idx_end = len(ms1_scans) - 1
 632                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
 633                return scan_list
 634
 635            ms1_scans = self.ms1_scans
 636            scans_lists = [
 637                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
 638                for apex_scan in apex_scans
 639            ]
 640
 641            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
 642            if self.polarity == "negative":
 643                polarity = -1
 644            elif self.polarity == "positive":
 645                polarity = 1
 646
 647            if not use_parser:
 648                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
 649                ms1_unprocessed = self._ms_unprocessed[1].copy()
 650                # Set the index on _ms_unprocessed[1] to scan number
 651                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
 652                self._ms_unprocessed[1] = ms1_unprocessed
 653
 654                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
 655                scans_lists_flat = list(
 656                    set([scan for sublist in scans_lists for scan in sublist])
 657                )
 658                if (
 659                    len(
 660                        np.setdiff1d(
 661                            np.sort(scans_lists_flat),
 662                            np.sort(ms1_unprocessed.index.values),
 663                        )
 664                    )
 665                    > 0
 666                ):
 667                    raise ValueError(
 668                        "Not all scans to average are present in the unprocessed data"
 669                    )
 670
 671            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
 672                # Get unprocessed mass spectrum from scans
 673                ms = self.get_average_mass_spectrum(
 674                    scan_list=scan_list_average,
 675                    apex_scan=apex_scan,
 676                    spectrum_mode="profile",
 677                    ms_level=1,
 678                    auto_process=auto_process,
 679                    use_parser=use_parser,
 680                    perform_checks=False,
 681                    polarity=polarity,
 682                    ms_params=self.parameters.mass_spectrum["ms1"],
 683                )
 684                # Add mass spectrum to LCMS object and associated with mass feature
 685                self.add_mass_spectrum(ms)
 686
 687            if not use_parser:
 688                # Reset the index on _ms_unprocessed[1] to not be scan number
 689                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
 690                self._ms_unprocessed[1] = ms1_unprocessed
 691        else:
 692            raise ValueError(
 693                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
 694            )
 695
 696        # Associate the ms1 spectra with the mass features
 697        for mf_id in self.mass_features:
 698            self.mass_features[mf_id].mass_spectrum = self._ms[
 699                self.mass_features[mf_id].apex_scan
 700            ]
 701            self.mass_features[mf_id].update_mz()
 702
 703    def mass_features_to_df(self):
 704        """Returns a pandas dataframe summarizing the mass features.
 705
 706        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
 707        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
 708
 709        Returns
 710        --------
 711        pandas.DataFrame
 712            A pandas dataframe of mass features with the following columns:
 713            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
 714        """
 715
 716        def mass_spectrum_to_string(
 717            mass_spec, normalize=True, min_normalized_abun=0.01
 718        ):
 719            """Converts a mass spectrum to a string of m/z:abundance pairs.
 720
 721            Parameters
 722            -----------
 723            mass_spec : MassSpectrum
 724                A MassSpectrum object to be converted to a string.
 725            normalize : bool, optional
 726                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
 727            min_normalized_abun : float, optional
 728                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
 729
 730            Returns
 731            --------
 732            str
 733                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
 734            """
 735            mz_np = mass_spec.to_dataframe()["m/z"].values
 736            abun_np = mass_spec.to_dataframe()["Peak Height"].values
 737            if normalize:
 738                abun_np = abun_np / abun_np.max()
 739            mz_abun = np.column_stack((mz_np, abun_np))
 740            if normalize:
 741                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
 742            mz_abun_str = [
 743                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
 744                for mz, abun in mz_abun
 745            ]
 746            return "; ".join(mz_abun_str)
 747
 748        cols_in_df = [
 749            "id",
 750            "apex_scan",
 751            "start_scan",
 752            "final_scan",
 753            "retention_time",
 754            "intensity",
 755            "persistence",
 756            "area",
 757            "dispersity_index",
 758            "normalized_dispersity_index",
 759            "tailing_factor",
 760            "gaussian_similarity",
 761            "noise_score",
 762            "noise_score_min",
 763            "noise_score_max",
 764            "monoisotopic_mf_id",
 765            "isotopologue_type",
 766            "mass_spectrum_deconvoluted_parent",
 767        ]
 768        df_mf_list = []
 769        for mf_id in self.mass_features.keys():
 770            # Find cols_in_df that are in single_mf
 771            df_keys = list(
 772                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
 773            )
 774            dict_mf = {}
 775            # Get the values for each key in df_keys from the mass feature object
 776            for key in df_keys:
 777                dict_mf[key] = getattr(self.mass_features[mf_id], key)
 778            # Special handling for mass_spectrum and associated_mass_features_deconvoluted, since they are not single values
 779            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
 780                # Add MS2 spectra info
 781                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
 782                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
 783            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
 784                dict_mf["associated_mass_features"] = ", ".join(
 785                    map(
 786                        str,
 787                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
 788                    )
 789                )
 790            # Check if EIC for mass feature is set
 791            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
 792            df_mf_single["mz"] = self.mass_features[mf_id].mz
 793            df_mf_list.append(df_mf_single)
 794        df_mf = pd.concat(df_mf_list)
 795
 796        # rename _area to area and id to mf_id
 797        df_mf = df_mf.rename(
 798            columns={
 799                "id": "mf_id",
 800                "retention_time": "scan_time",            
 801            }
 802        )
 803
 804        # reorder columns
 805        col_order = [
 806            "mf_id",
 807            "scan_time",
 808            "mz",
 809            "apex_scan",
 810            "start_scan",
 811            "final_scan",
 812            "intensity",
 813            "persistence",
 814            "area",
 815            "half_height_width",
 816            "tailing_factor",
 817            "dispersity_index",
 818            "normalized_dispersity_index",
 819            "gaussian_similarity",
 820            "noise_score",
 821            "noise_score_min",
 822            "noise_score_max",
 823            "monoisotopic_mf_id",
 824            "isotopologue_type",
 825            "mass_spectrum_deconvoluted_parent",
 826            "associated_mass_features",
 827            "ms2_spectrum",
 828        ]
 829        # drop columns that are not in col_order
 830        cols_to_order = [col for col in col_order if col in df_mf.columns]
 831        df_mf = df_mf[cols_to_order]
 832
 833        # reset index to mf_id
 834        df_mf = df_mf.set_index("mf_id")
 835        df_mf.index.name = "mf_id"
 836
 837        return df_mf
 838
 839    def mass_features_ms1_annot_to_df(self):
 840        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
 841
 842        Returns
 843        --------
 844        pandas.DataFrame
 845            A pandas dataframe of MS1 annotations for the mass features in the dataset.
 846            The index is set to mf_id (mass feature ID)
 847
 848        Raises
 849        ------
 850        Warning
 851            If no MS1 annotations were found for the mass features in the dataset.
 852        """
 853        annot_df_list_ms1 = []
 854        for mf_id in self.mass_features.keys():
 855            if self.mass_features[mf_id].mass_spectrum is None:
 856                pass
 857            else:
 858                # Add ms1 annotations to ms1 annotation list
 859                if (
 860                    np.abs(
 861                        (
 862                            self.mass_features[mf_id].ms1_peak.mz_exp
 863                            - self.mass_features[mf_id].mz
 864                        )
 865                    )
 866                    < 0.01
 867                ):
 868                    # Get the molecular formula from the mass spectrum
 869                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
 870                    # Subset to pull out only the peak associated with the mass feature
 871                    annot_df = annot_df[
 872                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
 873                    ].copy()
 874
 875                    # If there are more than 1 row, remove any rows without a molecular formula
 876                    if len(annot_df) > 1:
 877                        annot_df = annot_df[~annot_df["Molecular Formula"].isna()]
 878
 879                    # Remove the index column and add column for mf_id
 880                    annot_df = annot_df.drop(columns=["Index"])
 881                    annot_df["mf_id"] = mf_id
 882                    annot_df_list_ms1.append(annot_df)
 883
 884        if len(annot_df_list_ms1) > 0:
 885            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
 886            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
 887            annot_ms1_df_full.index.name = "mf_id"
 888
 889        else:
 890            annot_ms1_df_full = None
 891            # Warn that no ms1 annotations were found
 892            warnings.warn(
 893                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
 894                UserWarning,
 895            )
 896
 897        return annot_ms1_df_full
 898
 899    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
 900        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
 901
 902        Parameters
 903        -----------
 904        molecular_metadata :  dict of MolecularMetadata objects
 905            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
 906
 907        Returns
 908        --------
 909        pandas.DataFrame
 910            A pandas dataframe of MS2 annotations for the mass features in the dataset,
 911            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
 912
 913        Raises
 914        ------
 915        Warning
 916            If no MS2 annotations were found for the mass features in the dataset.
 917        """
 918        annot_df_list_ms2 = []
 919        for mf_id in self.mass_features.keys():
 920            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
 921                # Add ms2 annotations to ms2 annotation list
 922                for result in self.mass_features[mf_id].ms2_similarity_results:
 923                    annot_df_ms2 = result.to_dataframe()
 924                    annot_df_ms2["mf_id"] = mf_id
 925                    annot_df_list_ms2.append(annot_df_ms2)
 926
 927        if len(annot_df_list_ms2) > 0:
 928            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
 929            if molecular_metadata is not None:
 930                molecular_metadata_df = pd.concat(
 931                    [
 932                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
 933                        for k, v in molecular_metadata.items()
 934                    ],
 935                    ignore_index=True,
 936                )
 937                molecular_metadata_df = molecular_metadata_df.rename(
 938                    columns={"id": "ref_mol_id"}
 939                )
 940                annot_ms2_df_full = annot_ms2_df_full.merge(
 941                    molecular_metadata_df, on="ref_mol_id", how="left"
 942                )
 943            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
 944                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
 945            ).copy()
 946            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
 947            annot_ms2_df_full.index.name = "mf_id"
 948        else:
 949            annot_ms2_df_full = None
 950            # Warn that no ms2 annotations were found
 951            warnings.warn(
 952                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
 953                UserWarning,
 954            )
 955
 956        return annot_ms2_df_full
 957
 958    def plot_composite_mz_features(self, binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False):
 959        """Returns a figure displaying 
 960            (1) thresholded, unprocessed data
 961            (2) the m/z features
 962            (3) which m/z features are associated with MS2 spectra
 963
 964        Parameters
 965        -----------
 966        binsize :  float
 967            Desired binsize for the m/z axis of the composite feature map.  Defaults to 1e-4.
 968        mf_plot : boolean
 969            Indicates whether to plot the m/z features. Defaults to True.
 970        ms2_plot : boolean
 971            Indicates whether to identify m/z features with associated MS2 spectra. Defaults to True.
 972        return_fig : boolean
 973            Indicates whether to plot composite feature map (False) or return figure object (True). Defaults to False.
 974
 975        Returns
 976        --------
 977        matplotlib.pyplot.Figure
 978            A figure with the thresholded, unprocessed data on an axis of m/z value with respect to 
 979            scan time. Unprocessed data is displayed in gray scale with darker colors indicating 
 980            higher intensities. If m/z features are plotted, they are displayed in cyan. If m/z
 981            features with associated with MS2 spectra are plotted, they are displayed in red.
 982
 983        Raises
 984        ------
 985        Warning
 986            If m/z features are set to be plot but aren't in the dataset.
 987            If m/z features with associated MS2 data are set to be plot but no MS2 annotations 
 988            were found for the m/z features in the dataset.
 989        """
 990        if mf_plot:
 991            # Check if mass_features is set, raise error if not
 992            if self.mass_features is None:
 993                raise ValueError(
 994                    "mass_features not set, must run find_mass_features() first"
 995                )
 996            ## call mass feature data
 997            mf_df = self.mass_features_to_df()
 998
 999        if ms2_plot:
1000            if not mf_plot:
1001                # Check if mass_features is set, raise error if not
1002                if self.mass_features is None:
1003                    raise ValueError(
1004                        "mass_features not set, must run find_mass_features() first"
1005                    )
1006
1007            ## call m/z feature data
1008            mf_df = self.mass_features_to_df()
1009
1010            # Check if ms2_spectrum is set, raise error if not
1011            if 'ms2_spectrum' not in mf_df.columns:
1012                raise ValueError(                
1013                    "ms2_spectrum not set, must run add_associated_ms2_dda() first"            
1014                )
1015    
1016        ## threshold and grid unprocessed data
1017        df = self._ms_unprocessed[1].copy()
1018        df = df.dropna(subset=['intensity']).reset_index(drop = True)
1019        threshold = ph_int_min_thresh * df.intensity.max()
1020        df_thres = df[df["intensity"] > threshold].reset_index(drop = True).copy()
1021        df = self.grid_data(df_thres)
1022    
1023        ## format unprocessed data for plotting
1024        df = df.merge(self.scan_df[['scan', 'scan_time']], on = 'scan')
1025        mz_grid = np.arange(0, np.max(df.mz), binsize)
1026        mz_data = np.array(df.mz)
1027        df['mz_bin'] = find_closest(mz_grid, mz_data)
1028        df['ab_bin'] = df.groupby(['mz_bin', 'scan_time']).intensity.transform(sum)
1029        unproc_df = df[['scan_time', 'mz_bin', 'ab_bin']].drop_duplicates(ignore_index = True)
1030
1031        ## generate figure
1032        fig = plt.figure()
1033        plt.scatter(
1034            unproc_df.scan_time,
1035            unproc_df.mz_bin*binsize,
1036            c = unproc_df.ab_bin/np.max(unproc_df.ab_bin),
1037            alpha = unproc_df.ab_bin/np.max(unproc_df.ab_bin), 
1038            cmap = 'Greys_r',
1039            s = 1
1040        )
1041
1042        if mf_plot:
1043            if ms2_plot:
1044                plt.scatter(
1045                    mf_df[mf_df.ms2_spectrum.isna()].scan_time,
1046                    mf_df[mf_df.ms2_spectrum.isna()].mz,
1047                    c = 'c',
1048                    s = 4,
1049                    label = 'M/Z features without MS2'
1050                )
1051            else:
1052                plt.scatter(
1053                    mf_df.scan_time,
1054                    mf_df.mz,
1055                    c = 'c',
1056                    s = 4,
1057                    label = 'M/Z features'
1058                )
1059
1060        if ms2_plot: 
1061            plt.scatter(
1062                mf_df[~mf_df.ms2_spectrum.isna()].scan_time,
1063                mf_df[~mf_df.ms2_spectrum.isna()].mz,
1064                c = 'r',
1065                s = 2,
1066                label = 'M/Z features with MS2'
1067            )
1068
1069        if mf_plot == True or ms2_plot == True:
1070            plt.legend(loc = 'lower center', bbox_to_anchor = (0.5, -0.25), ncol = 2)
1071        plt.xlabel('Scan time')
1072        plt.ylabel('m/z')
1073        plt.ylim(0, np.ceil(np.max(df.mz)))
1074        plt.xlim(0, np.ceil(np.max(df.scan_time)))
1075        plt.title('Composite Feature Map')
1076
1077        if return_fig:
1078            plt.close(fig)
1079            return fig
1080
1081        else:
1082            plt.show()
1083
1084    def __len__(self):
1085        """
1086        Returns the number of mass spectra in the dataset.
1087
1088        Returns
1089        --------
1090        int
1091            The number of mass spectra in the dataset.
1092        """
1093        return len(self._ms)
1094
1095    def __getitem__(self, scan_number):
1096        """
1097        Returns the mass spectrum corresponding to the specified scan number.
1098
1099        Parameters
1100        -----------
1101        scan_number : int
1102            The scan number of the desired mass spectrum.
1103
1104        Returns
1105        --------
1106        MassSpectrum
1107            The mass spectrum corresponding to the specified scan number.
1108        """
1109        return self._ms.get(scan_number)
1110
1111    def __iter__(self):
1112        """Returns an iterator over the mass spectra in the dataset.
1113
1114        Returns
1115        --------
1116        iterator
1117            An iterator over the mass spectra in the dataset.
1118        """
1119        return iter(self._ms.values())
1120
1121    def set_tic_list_from_data(self, overwrite=False):
1122        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
1123
1124        Parameters
1125        -----------
1126        overwrite : bool, optional
1127            If True, overwrites the TIC list if it is already set. Defaults to False.
1128
1129        Notes
1130        -----
1131        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
1132
1133        Raises
1134        ------
1135        ValueError
1136            If no mass spectra are found in the dataset.
1137            If the TIC list is already set and overwrite is False.
1138        """
1139        # Check if _ms is empty and raise error if so
1140        if len(self._ms) == 0:
1141            raise ValueError("No mass spectra found in dataset")
1142
1143        # Check if tic_list is already set and raise error if so
1144        if len(self.tic) > 0 and not overwrite:
1145            raise ValueError("TIC list already set, use overwrite=True to overwrite")
1146
1147        self.tic = [self._ms.get(i).tic for i in self.scans_number]
1148
1149    def set_retention_time_from_data(self, overwrite=False):
1150        """Sets the retention time list from the data in the _ms dictionary.
1151
1152        Parameters
1153        -----------
1154        overwrite : bool, optional
1155            If True, overwrites the retention time list if it is already set. Defaults to False.
1156
1157        Notes
1158        -----
1159        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
1160
1161        Raises
1162        ------
1163        ValueError
1164            If no mass spectra are found in the dataset.
1165            If the retention time list is already set and overwrite is False.
1166        """
1167        # Check if _ms is empty and raise error if so
1168        if len(self._ms) == 0:
1169            raise ValueError("No mass spectra found in dataset")
1170
1171        # Check if retention_time_list is already set and raise error if so
1172        if len(self.retention_time) > 0 and not overwrite:
1173            raise ValueError(
1174                "Retention time list already set, use overwrite=True to overwrite"
1175            )
1176
1177        retention_time_list = []
1178        for key_ms in sorted(self._ms.keys()):
1179            retention_time_list.append(self._ms.get(key_ms).retention_time)
1180        self.retention_time = retention_time_list
1181
1182    def set_scans_number_from_data(self, overwrite=False):
1183        """Sets the scan number list from the data in the _ms dictionary.
1184
1185        Notes
1186        -----
1187        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
1188
1189        Raises
1190        ------
1191        ValueError
1192            If no mass spectra are found in the dataset.
1193            If the scan number list is already set and overwrite is False.
1194        """
1195        # Check if _ms is empty and raise error if so
1196        if len(self._ms) == 0:
1197            raise ValueError("No mass spectra found in dataset")
1198
1199        # Check if scans_number_list is already set and raise error if so
1200        if len(self.scans_number) > 0 and not overwrite:
1201            raise ValueError(
1202                "Scan number list already set, use overwrite=True to overwrite"
1203            )
1204
1205        self.scans_number = sorted(self._ms.keys())
1206
1207    @property
1208    def ms1_scans(self):
1209        """
1210        list : A list of MS1 scan numbers for the dataset.
1211        """
1212        return self.scan_df[self.scan_df.ms_level == 1].index.tolist()
1213
1214    @property
1215    def parameters(self):
1216        """
1217        LCMSParameters : The parameters used for the LC-MS analysis.
1218        """
1219        return self._parameters
1220
1221    @parameters.setter
1222    def parameters(self, paramsinstance):
1223        """
1224        Sets the parameters used for the LC-MS analysis.
1225
1226        Parameters
1227        -----------
1228        paramsinstance : LCMSParameters
1229            The parameters used for the LC-MS analysis.
1230        """
1231        self._parameters = paramsinstance
1232
1233    @property
1234    def scans_number(self):
1235        """
1236        list : A list of scan numbers for the dataset.
1237        """
1238        return self._scans_number_list
1239
1240    @scans_number.setter
1241    def scans_number(self, scan_numbers_list):
1242        """
1243        Sets the scan numbers for the dataset.
1244
1245        Parameters
1246        -----------
1247        scan_numbers_list : list
1248            A list of scan numbers for the dataset.
1249        """
1250        self._scans_number_list = scan_numbers_list
1251
1252    @property
1253    def retention_time(self):
1254        """
1255        numpy.ndarray : An array of retention times for the dataset.
1256        """
1257        return self._retention_time_list
1258
1259    @retention_time.setter
1260    def retention_time(self, rt_list):
1261        """
1262        Sets the retention times for the dataset.
1263
1264        Parameters
1265        -----------
1266        rt_list : list
1267            A list of retention times for the dataset.
1268        """
1269        self._retention_time_list = np.array(rt_list)
1270
1271    @property
1272    def tic(self):
1273        """
1274        numpy.ndarray : An array of TIC values for the dataset.
1275        """
1276        return self._tic_list
1277
1278    @tic.setter
1279    def tic(self, tic_list):
1280        """
1281        Sets the TIC values for the dataset.
1282
1283        Parameters
1284        -----------
1285        tic_list : list
1286            A list of TIC values for the dataset.
1287        """
1288        self._tic_list = np.array(tic_list)

A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.

This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.

Parameters
  • file_location (str or Path): The location of the file containing the mass spectra data.
  • analyzer (str, optional): The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
  • instrument_label (str, optional): The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
  • sample_name (str, optional): The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
  • spectra_parser (object, optional): The spectra parser object used to create the mass spectra object. Defaults to None.
Attributes
  • polarity (str): The polarity of the ionization mode used for the dataset.
  • _parameters (LCMSParameters): The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
  • _retention_time_list (numpy.ndarray): An array of retention times for the dataset.
  • _scans_number_list (list): A list of scan numbers for the dataset.
  • _tic_list (numpy.ndarray): An array of total ion current (TIC) values for the dataset.
  • eics (dict): A dictionary containing extracted ion chromatograms (EICs) for the dataset. Key is the mz of the EIC. Initialized as an empty dictionary.
  • mass_features (dictionary of LCMSMassFeature objects): A dictionary containing mass features for the dataset. Key is mass feature ID. Initialized as an empty dictionary.
  • spectral_search_results (dictionary of MS2SearchResults objects): A dictionary containing spectral search results for the dataset. Key is scan number : precursor mz. Initialized as an empty dictionary.
Methods
  • get_parameters_json(). Returns the parameters used for the LC-MS analysis in JSON format.
  • add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True) Adds which MS2 scans are associated with each mass feature to the mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
  • add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True) Adds the MS1 spectra associated with each mass feature to the mass_features dictionary and adds the MS1 spectra to the _ms dictionary.
  • mass_features_to_df() Returns a pandas dataframe summarizing the mass features in the dataset.
  • set_tic_list_from_data(overwrite=False) Sets the TIC list from the mass spectrum objects within the _ms dictionary.
  • set_retention_time_from_data(overwrite=False) Sets the retention time list from the data in the _ms dictionary.
  • set_scans_number_from_data(overwrite=False) Sets the scan number list from the data in the _ms dictionary.
  • plot_composite_mz_features(binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False) Generates plot of M/Z features comparing scan time vs M/Z value
LCMSBase( file_location, analyzer='Unknown', instrument_label='Unknown', sample_name=None, spectra_parser=None)
420    def __init__(
421        self,
422        file_location,
423        analyzer="Unknown",
424        instrument_label="Unknown",
425        sample_name=None,
426        spectra_parser=None,
427    ):
428        super().__init__(
429            file_location, analyzer, instrument_label, sample_name, spectra_parser
430        )
431        self.polarity = ""
432        self._parameters = LCMSParameters()
433        self._retention_time_list = []
434        self._scans_number_list = []
435        self._tic_list = []
436        self.eics = {}
437        self.mass_features = {}
438        self.spectral_search_results = {}
polarity
eics
mass_features
spectral_search_results
def get_parameters_json(self):
440    def get_parameters_json(self):
441        """Returns the parameters stored for the LC-MS object in JSON format.
442
443        Returns
444        --------
445        str
446            The parameters used for the LC-MS analysis in JSON format.
447        """
448        return self.parameters.to_json()

Returns the parameters stored for the LC-MS object in JSON format.

Returns
  • str: The parameters used for the LC-MS analysis in JSON format.
def remove_unprocessed_data(self, ms_level=None):
450    def remove_unprocessed_data(self, ms_level=None):
451        """Removes the unprocessed data from the LCMSBase object.
452
453        Parameters
454        -----------
455        ms_level : int, optional
456            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
457
458        Raises
459        ------
460        ValueError
461            If ms_level is not 1 or 2.
462
463        Notes
464        -----
465        This method is useful for freeing up memory after the data has been processed.
466        """
467        if ms_level is None:
468            for ms_level in self._ms_unprocessed.keys():
469                self._ms_unprocessed[ms_level] = None
470        if ms_level not in [1, 2]:
471            raise ValueError("ms_level must be 1 or 2")
472        self._ms_unprocessed[ms_level] = None

Removes the unprocessed data from the LCMSBase object.

Parameters
  • ms_level (int, optional): The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
Raises
  • ValueError: If ms_level is not 1 or 2.
Notes

This method is useful for freeing up memory after the data has been processed.

def add_associated_ms2_dda( self, auto_process=True, use_parser=True, spectrum_mode=None, ms_params_key='ms2', scan_filter=None):
474    def add_associated_ms2_dda(
475        self,
476        auto_process=True,
477        use_parser=True,
478        spectrum_mode=None,
479        ms_params_key="ms2",
480        scan_filter=None,
481    ):
482        """Add MS2 spectra associated with mass features to the dataset.
483
484        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
485
486        Parameters
487        -----------
488        auto_process : bool, optional
489            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
490        use_parser : bool, optional
491            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
492        spectrum_mode : str or None, optional
493            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
494            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
495            Defaults to None. (faster if defined, otherwise will check each scan)
496        ms_params_key : string, optional
497            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
498            Defaults to 'ms2'.
499        scan_filter : str
500            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
501            "hcd" will pull out only HCD scans.
502
503        Raises
504        ------
505        ValueError
506            If mass_features is not set, must run find_mass_features() first.
507            If no MS2 scans are found in the dataset.
508            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
509        """
510        # Check if mass_features is set, raise error if not
511        if self.mass_features is None:
512            raise ValueError(
513                "mass_features not set, must run find_mass_features() first"
514            )
515
516        # reconfigure ms_params to get the correct mass spectrum parameters from the key
517        ms_params = self.parameters.mass_spectrum[ms_params_key]
518
519        mf_df = self.mass_features_to_df().copy()
520        # Find ms2 scans that have a precursor m/z value
521        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
522        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
523        # drop ms2 scans that have no tic
524        ms2_scans = ms2_scans[ms2_scans.tic > 0]
525        if ms2_scans is None:
526            raise ValueError("No DDA scans found in dataset")
527
528        if scan_filter is not None:
529            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
530        # set tolerance in rt space (in minutes) and mz space (in daltons)
531        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
532        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
533
534        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
535        dda_scans = []
536        for i, row in mf_df.iterrows():
537            ms2_scans_filtered = ms2_scans[
538                ms2_scans.scan_time.between(
539                    row.scan_time - time_tol, row.scan_time + time_tol
540                )
541            ]
542            ms2_scans_filtered = ms2_scans_filtered[
543                ms2_scans_filtered.precursor_mz.between(
544                    row.mz - mz_tol, row.mz + mz_tol
545                )
546            ]
547            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
548            self.mass_features[i].ms2_scan_numbers = (
549                ms2_scans_filtered.scan.tolist()
550                + self.mass_features[i].ms2_scan_numbers
551            )
552        # add to _ms attribute
553        self.add_mass_spectra(
554            scan_list=list(set(dda_scans)),
555            auto_process=auto_process,
556            spectrum_mode=spectrum_mode,
557            use_parser=use_parser,
558            ms_params=ms_params,
559        )
560        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
561        for mf_id in self.mass_features:
562            if self.mass_features[mf_id].ms2_scan_numbers is not None:
563                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
564                    if dda_scan in self._ms.keys():
565                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
566                            dda_scan
567                        ]

Add MS2 spectra associated with mass features to the dataset.

Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)

Parameters
  • auto_process (bool, optional): If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
  • use_parser (bool, optional): If True, envoke the spectra parser to get the MS2 spectra. Default is True.
  • spectrum_mode (str or None, optional): The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types). Defaults to None. (faster if defined, otherwise will check each scan)
  • ms_params_key (string, optional): The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute. Defaults to 'ms2'.
  • scan_filter (str): A string to filter the scans to add to the _ms dictionary. If None, all scans are added. Defaults to None. "hcd" will pull out only HCD scans.
Raises
  • ValueError: If mass_features is not set, must run find_mass_features() first. If no MS2 scans are found in the dataset. If no precursor m/z values are found in MS2 scans, not a DDA dataset.
def add_associated_ms1(self, auto_process=True, use_parser=True, spectrum_mode=None):
569    def add_associated_ms1(
570        self, auto_process=True, use_parser=True, spectrum_mode=None
571    ):
572        """Add MS1 spectra associated with mass features to the dataset.
573
574        Parameters
575        -----------
576        auto_process : bool, optional
577            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
578        use_parser : bool, optional
579            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
580        spectrum_mode : str or None, optional
581            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
582            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
583            Defaults to None. (faster if defined, otherwise will check each scan)
584
585        Raises
586        ------
587        ValueError
588            If mass_features is not set, must run find_mass_features() first.
589            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
590            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
591            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
592        """
593        # Check if mass_features is set, raise error if not
594        if self.mass_features is None:
595            raise ValueError(
596                "mass_features not set, must run find_mass_features() first"
597            )
598        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
599
600        if scans_to_average == 1:
601            # Add to LCMSobj
602            self.add_mass_spectra(
603                scan_list=[
604                    int(mf.apex_scan) for mf in self.mass_features.values()
605                ],
606                auto_process=auto_process,
607                use_parser=use_parser,
608                spectrum_mode=spectrum_mode,
609                ms_params=self.parameters.mass_spectrum["ms1"],
610            )
611
612        elif (
613            (scans_to_average - 1) % 2
614        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
615            apex_scans = list(set([int(mf.apex_scan) for mf in self.mass_features.values()]))
616            # Check if all apex scans are profile mode, raise error if not
617            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
618                raise ValueError("All apex scans must be profile mode for averaging")
619
620            # First get sets of scans to average
621            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
622                ms1_idx_start = ms1_scans.index(apex_scan) - int(
623                    (scans_to_average - 1) / 2
624                )
625                if ms1_idx_start < 0:
626                    ms1_idx_start = 0
627                ms1_idx_end = (
628                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
629                )
630                if ms1_idx_end > (len(ms1_scans) - 1):
631                    ms1_idx_end = len(ms1_scans) - 1
632                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
633                return scan_list
634
635            ms1_scans = self.ms1_scans
636            scans_lists = [
637                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
638                for apex_scan in apex_scans
639            ]
640
641            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
642            if self.polarity == "negative":
643                polarity = -1
644            elif self.polarity == "positive":
645                polarity = 1
646
647            if not use_parser:
648                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
649                ms1_unprocessed = self._ms_unprocessed[1].copy()
650                # Set the index on _ms_unprocessed[1] to scan number
651                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
652                self._ms_unprocessed[1] = ms1_unprocessed
653
654                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
655                scans_lists_flat = list(
656                    set([scan for sublist in scans_lists for scan in sublist])
657                )
658                if (
659                    len(
660                        np.setdiff1d(
661                            np.sort(scans_lists_flat),
662                            np.sort(ms1_unprocessed.index.values),
663                        )
664                    )
665                    > 0
666                ):
667                    raise ValueError(
668                        "Not all scans to average are present in the unprocessed data"
669                    )
670
671            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
672                # Get unprocessed mass spectrum from scans
673                ms = self.get_average_mass_spectrum(
674                    scan_list=scan_list_average,
675                    apex_scan=apex_scan,
676                    spectrum_mode="profile",
677                    ms_level=1,
678                    auto_process=auto_process,
679                    use_parser=use_parser,
680                    perform_checks=False,
681                    polarity=polarity,
682                    ms_params=self.parameters.mass_spectrum["ms1"],
683                )
684                # Add mass spectrum to LCMS object and associated with mass feature
685                self.add_mass_spectrum(ms)
686
687            if not use_parser:
688                # Reset the index on _ms_unprocessed[1] to not be scan number
689                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
690                self._ms_unprocessed[1] = ms1_unprocessed
691        else:
692            raise ValueError(
693                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
694            )
695
696        # Associate the ms1 spectra with the mass features
697        for mf_id in self.mass_features:
698            self.mass_features[mf_id].mass_spectrum = self._ms[
699                self.mass_features[mf_id].apex_scan
700            ]
701            self.mass_features[mf_id].update_mz()

Add MS1 spectra associated with mass features to the dataset.

Parameters
  • auto_process (bool, optional): If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
  • use_parser (bool, optional): If True, envoke the spectra parser to get the MS1 spectra. Default is True.
  • spectrum_mode (str or None, optional): The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types). Defaults to None. (faster if defined, otherwise will check each scan)
Raises
  • ValueError: If mass_features is not set, must run find_mass_features() first. If apex scans are not profile mode, all apex scans must be profile mode for averaging. If number of scans to average is not 1 or an integer with an integer median (i.e. 3, 5, 7, 9). If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
def mass_features_to_df(self):
703    def mass_features_to_df(self):
704        """Returns a pandas dataframe summarizing the mass features.
705
706        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
707        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
708
709        Returns
710        --------
711        pandas.DataFrame
712            A pandas dataframe of mass features with the following columns:
713            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
714        """
715
716        def mass_spectrum_to_string(
717            mass_spec, normalize=True, min_normalized_abun=0.01
718        ):
719            """Converts a mass spectrum to a string of m/z:abundance pairs.
720
721            Parameters
722            -----------
723            mass_spec : MassSpectrum
724                A MassSpectrum object to be converted to a string.
725            normalize : bool, optional
726                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
727            min_normalized_abun : float, optional
728                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
729
730            Returns
731            --------
732            str
733                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
734            """
735            mz_np = mass_spec.to_dataframe()["m/z"].values
736            abun_np = mass_spec.to_dataframe()["Peak Height"].values
737            if normalize:
738                abun_np = abun_np / abun_np.max()
739            mz_abun = np.column_stack((mz_np, abun_np))
740            if normalize:
741                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
742            mz_abun_str = [
743                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
744                for mz, abun in mz_abun
745            ]
746            return "; ".join(mz_abun_str)
747
748        cols_in_df = [
749            "id",
750            "apex_scan",
751            "start_scan",
752            "final_scan",
753            "retention_time",
754            "intensity",
755            "persistence",
756            "area",
757            "dispersity_index",
758            "normalized_dispersity_index",
759            "tailing_factor",
760            "gaussian_similarity",
761            "noise_score",
762            "noise_score_min",
763            "noise_score_max",
764            "monoisotopic_mf_id",
765            "isotopologue_type",
766            "mass_spectrum_deconvoluted_parent",
767        ]
768        df_mf_list = []
769        for mf_id in self.mass_features.keys():
770            # Find cols_in_df that are in single_mf
771            df_keys = list(
772                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
773            )
774            dict_mf = {}
775            # Get the values for each key in df_keys from the mass feature object
776            for key in df_keys:
777                dict_mf[key] = getattr(self.mass_features[mf_id], key)
778            # Special handling for mass_spectrum and associated_mass_features_deconvoluted, since they are not single values
779            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
780                # Add MS2 spectra info
781                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
782                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
783            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
784                dict_mf["associated_mass_features"] = ", ".join(
785                    map(
786                        str,
787                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
788                    )
789                )
790            # Check if EIC for mass feature is set
791            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
792            df_mf_single["mz"] = self.mass_features[mf_id].mz
793            df_mf_list.append(df_mf_single)
794        df_mf = pd.concat(df_mf_list)
795
796        # rename _area to area and id to mf_id
797        df_mf = df_mf.rename(
798            columns={
799                "id": "mf_id",
800                "retention_time": "scan_time",            
801            }
802        )
803
804        # reorder columns
805        col_order = [
806            "mf_id",
807            "scan_time",
808            "mz",
809            "apex_scan",
810            "start_scan",
811            "final_scan",
812            "intensity",
813            "persistence",
814            "area",
815            "half_height_width",
816            "tailing_factor",
817            "dispersity_index",
818            "normalized_dispersity_index",
819            "gaussian_similarity",
820            "noise_score",
821            "noise_score_min",
822            "noise_score_max",
823            "monoisotopic_mf_id",
824            "isotopologue_type",
825            "mass_spectrum_deconvoluted_parent",
826            "associated_mass_features",
827            "ms2_spectrum",
828        ]
829        # drop columns that are not in col_order
830        cols_to_order = [col for col in col_order if col in df_mf.columns]
831        df_mf = df_mf[cols_to_order]
832
833        # reset index to mf_id
834        df_mf = df_mf.set_index("mf_id")
835        df_mf.index.name = "mf_id"
836
837        return df_mf

Returns a pandas dataframe summarizing the mass features.

The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity, persistence, area, monoisotopic_mf_id, and isotopologue_type. The index is set to mf_id (mass feature ID).

Returns
  • pandas.DataFrame: A pandas dataframe of mass features with the following columns: mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
def mass_features_ms1_annot_to_df(self):
839    def mass_features_ms1_annot_to_df(self):
840        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
841
842        Returns
843        --------
844        pandas.DataFrame
845            A pandas dataframe of MS1 annotations for the mass features in the dataset.
846            The index is set to mf_id (mass feature ID)
847
848        Raises
849        ------
850        Warning
851            If no MS1 annotations were found for the mass features in the dataset.
852        """
853        annot_df_list_ms1 = []
854        for mf_id in self.mass_features.keys():
855            if self.mass_features[mf_id].mass_spectrum is None:
856                pass
857            else:
858                # Add ms1 annotations to ms1 annotation list
859                if (
860                    np.abs(
861                        (
862                            self.mass_features[mf_id].ms1_peak.mz_exp
863                            - self.mass_features[mf_id].mz
864                        )
865                    )
866                    < 0.01
867                ):
868                    # Get the molecular formula from the mass spectrum
869                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
870                    # Subset to pull out only the peak associated with the mass feature
871                    annot_df = annot_df[
872                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
873                    ].copy()
874
875                    # If there are more than 1 row, remove any rows without a molecular formula
876                    if len(annot_df) > 1:
877                        annot_df = annot_df[~annot_df["Molecular Formula"].isna()]
878
879                    # Remove the index column and add column for mf_id
880                    annot_df = annot_df.drop(columns=["Index"])
881                    annot_df["mf_id"] = mf_id
882                    annot_df_list_ms1.append(annot_df)
883
884        if len(annot_df_list_ms1) > 0:
885            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
886            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
887            annot_ms1_df_full.index.name = "mf_id"
888
889        else:
890            annot_ms1_df_full = None
891            # Warn that no ms1 annotations were found
892            warnings.warn(
893                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
894                UserWarning,
895            )
896
897        return annot_ms1_df_full

Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.

Returns
  • pandas.DataFrame: A pandas dataframe of MS1 annotations for the mass features in the dataset. The index is set to mf_id (mass feature ID)
Raises
  • Warning: If no MS1 annotations were found for the mass features in the dataset.
def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
899    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
900        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
901
902        Parameters
903        -----------
904        molecular_metadata :  dict of MolecularMetadata objects
905            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
906
907        Returns
908        --------
909        pandas.DataFrame
910            A pandas dataframe of MS2 annotations for the mass features in the dataset,
911            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
912
913        Raises
914        ------
915        Warning
916            If no MS2 annotations were found for the mass features in the dataset.
917        """
918        annot_df_list_ms2 = []
919        for mf_id in self.mass_features.keys():
920            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
921                # Add ms2 annotations to ms2 annotation list
922                for result in self.mass_features[mf_id].ms2_similarity_results:
923                    annot_df_ms2 = result.to_dataframe()
924                    annot_df_ms2["mf_id"] = mf_id
925                    annot_df_list_ms2.append(annot_df_ms2)
926
927        if len(annot_df_list_ms2) > 0:
928            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
929            if molecular_metadata is not None:
930                molecular_metadata_df = pd.concat(
931                    [
932                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
933                        for k, v in molecular_metadata.items()
934                    ],
935                    ignore_index=True,
936                )
937                molecular_metadata_df = molecular_metadata_df.rename(
938                    columns={"id": "ref_mol_id"}
939                )
940                annot_ms2_df_full = annot_ms2_df_full.merge(
941                    molecular_metadata_df, on="ref_mol_id", how="left"
942                )
943            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
944                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
945            ).copy()
946            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
947            annot_ms2_df_full.index.name = "mf_id"
948        else:
949            annot_ms2_df_full = None
950            # Warn that no ms2 annotations were found
951            warnings.warn(
952                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
953                UserWarning,
954            )
955
956        return annot_ms2_df_full

Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.

Parameters
  • molecular_metadata (dict of MolecularMetadata objects): A dictionary of MolecularMetadata objects, keyed by metabref_mol_id. Defaults to None.
Returns
  • pandas.DataFrame: A pandas dataframe of MS2 annotations for the mass features in the dataset, and optionally molecular metadata. The index is set to mf_id (mass feature ID)
Raises
  • Warning: If no MS2 annotations were found for the mass features in the dataset.
def plot_composite_mz_features( self, binsize=0.0001, ph_int_min_thresh=0.001, mf_plot=True, ms2_plot=True, return_fig=False):
 958    def plot_composite_mz_features(self, binsize = 1e-4, ph_int_min_thresh = 0.001, mf_plot = True, ms2_plot = True, return_fig = False):
 959        """Returns a figure displaying 
 960            (1) thresholded, unprocessed data
 961            (2) the m/z features
 962            (3) which m/z features are associated with MS2 spectra
 963
 964        Parameters
 965        -----------
 966        binsize :  float
 967            Desired binsize for the m/z axis of the composite feature map.  Defaults to 1e-4.
 968        mf_plot : boolean
 969            Indicates whether to plot the m/z features. Defaults to True.
 970        ms2_plot : boolean
 971            Indicates whether to identify m/z features with associated MS2 spectra. Defaults to True.
 972        return_fig : boolean
 973            Indicates whether to plot composite feature map (False) or return figure object (True). Defaults to False.
 974
 975        Returns
 976        --------
 977        matplotlib.pyplot.Figure
 978            A figure with the thresholded, unprocessed data on an axis of m/z value with respect to 
 979            scan time. Unprocessed data is displayed in gray scale with darker colors indicating 
 980            higher intensities. If m/z features are plotted, they are displayed in cyan. If m/z
 981            features with associated with MS2 spectra are plotted, they are displayed in red.
 982
 983        Raises
 984        ------
 985        Warning
 986            If m/z features are set to be plot but aren't in the dataset.
 987            If m/z features with associated MS2 data are set to be plot but no MS2 annotations 
 988            were found for the m/z features in the dataset.
 989        """
 990        if mf_plot:
 991            # Check if mass_features is set, raise error if not
 992            if self.mass_features is None:
 993                raise ValueError(
 994                    "mass_features not set, must run find_mass_features() first"
 995                )
 996            ## call mass feature data
 997            mf_df = self.mass_features_to_df()
 998
 999        if ms2_plot:
1000            if not mf_plot:
1001                # Check if mass_features is set, raise error if not
1002                if self.mass_features is None:
1003                    raise ValueError(
1004                        "mass_features not set, must run find_mass_features() first"
1005                    )
1006
1007            ## call m/z feature data
1008            mf_df = self.mass_features_to_df()
1009
1010            # Check if ms2_spectrum is set, raise error if not
1011            if 'ms2_spectrum' not in mf_df.columns:
1012                raise ValueError(                
1013                    "ms2_spectrum not set, must run add_associated_ms2_dda() first"            
1014                )
1015    
1016        ## threshold and grid unprocessed data
1017        df = self._ms_unprocessed[1].copy()
1018        df = df.dropna(subset=['intensity']).reset_index(drop = True)
1019        threshold = ph_int_min_thresh * df.intensity.max()
1020        df_thres = df[df["intensity"] > threshold].reset_index(drop = True).copy()
1021        df = self.grid_data(df_thres)
1022    
1023        ## format unprocessed data for plotting
1024        df = df.merge(self.scan_df[['scan', 'scan_time']], on = 'scan')
1025        mz_grid = np.arange(0, np.max(df.mz), binsize)
1026        mz_data = np.array(df.mz)
1027        df['mz_bin'] = find_closest(mz_grid, mz_data)
1028        df['ab_bin'] = df.groupby(['mz_bin', 'scan_time']).intensity.transform(sum)
1029        unproc_df = df[['scan_time', 'mz_bin', 'ab_bin']].drop_duplicates(ignore_index = True)
1030
1031        ## generate figure
1032        fig = plt.figure()
1033        plt.scatter(
1034            unproc_df.scan_time,
1035            unproc_df.mz_bin*binsize,
1036            c = unproc_df.ab_bin/np.max(unproc_df.ab_bin),
1037            alpha = unproc_df.ab_bin/np.max(unproc_df.ab_bin), 
1038            cmap = 'Greys_r',
1039            s = 1
1040        )
1041
1042        if mf_plot:
1043            if ms2_plot:
1044                plt.scatter(
1045                    mf_df[mf_df.ms2_spectrum.isna()].scan_time,
1046                    mf_df[mf_df.ms2_spectrum.isna()].mz,
1047                    c = 'c',
1048                    s = 4,
1049                    label = 'M/Z features without MS2'
1050                )
1051            else:
1052                plt.scatter(
1053                    mf_df.scan_time,
1054                    mf_df.mz,
1055                    c = 'c',
1056                    s = 4,
1057                    label = 'M/Z features'
1058                )
1059
1060        if ms2_plot: 
1061            plt.scatter(
1062                mf_df[~mf_df.ms2_spectrum.isna()].scan_time,
1063                mf_df[~mf_df.ms2_spectrum.isna()].mz,
1064                c = 'r',
1065                s = 2,
1066                label = 'M/Z features with MS2'
1067            )
1068
1069        if mf_plot == True or ms2_plot == True:
1070            plt.legend(loc = 'lower center', bbox_to_anchor = (0.5, -0.25), ncol = 2)
1071        plt.xlabel('Scan time')
1072        plt.ylabel('m/z')
1073        plt.ylim(0, np.ceil(np.max(df.mz)))
1074        plt.xlim(0, np.ceil(np.max(df.scan_time)))
1075        plt.title('Composite Feature Map')
1076
1077        if return_fig:
1078            plt.close(fig)
1079            return fig
1080
1081        else:
1082            plt.show()

Returns a figure displaying (1) thresholded, unprocessed data (2) the m/z features (3) which m/z features are associated with MS2 spectra

Parameters
  • binsize (float): Desired binsize for the m/z axis of the composite feature map. Defaults to 1e-4.
  • mf_plot (boolean): Indicates whether to plot the m/z features. Defaults to True.
  • ms2_plot (boolean): Indicates whether to identify m/z features with associated MS2 spectra. Defaults to True.
  • return_fig (boolean): Indicates whether to plot composite feature map (False) or return figure object (True). Defaults to False.
Returns
  • matplotlib.pyplot.Figure: A figure with the thresholded, unprocessed data on an axis of m/z value with respect to scan time. Unprocessed data is displayed in gray scale with darker colors indicating higher intensities. If m/z features are plotted, they are displayed in cyan. If m/z features with associated with MS2 spectra are plotted, they are displayed in red.
Raises
  • Warning: If m/z features are set to be plot but aren't in the dataset. If m/z features with associated MS2 data are set to be plot but no MS2 annotations were found for the m/z features in the dataset.
def set_tic_list_from_data(self, overwrite=False):
1121    def set_tic_list_from_data(self, overwrite=False):
1122        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
1123
1124        Parameters
1125        -----------
1126        overwrite : bool, optional
1127            If True, overwrites the TIC list if it is already set. Defaults to False.
1128
1129        Notes
1130        -----
1131        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
1132
1133        Raises
1134        ------
1135        ValueError
1136            If no mass spectra are found in the dataset.
1137            If the TIC list is already set and overwrite is False.
1138        """
1139        # Check if _ms is empty and raise error if so
1140        if len(self._ms) == 0:
1141            raise ValueError("No mass spectra found in dataset")
1142
1143        # Check if tic_list is already set and raise error if so
1144        if len(self.tic) > 0 and not overwrite:
1145            raise ValueError("TIC list already set, use overwrite=True to overwrite")
1146
1147        self.tic = [self._ms.get(i).tic for i in self.scans_number]

Sets the TIC list from the mass spectrum objects within the _ms dictionary.

Parameters
  • overwrite (bool, optional): If True, overwrites the TIC list if it is already set. Defaults to False.
Notes

If the _ms dictionary is incomplete, sets the TIC list to an empty list.

Raises
  • ValueError: If no mass spectra are found in the dataset. If the TIC list is already set and overwrite is False.
def set_retention_time_from_data(self, overwrite=False):
1149    def set_retention_time_from_data(self, overwrite=False):
1150        """Sets the retention time list from the data in the _ms dictionary.
1151
1152        Parameters
1153        -----------
1154        overwrite : bool, optional
1155            If True, overwrites the retention time list if it is already set. Defaults to False.
1156
1157        Notes
1158        -----
1159        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
1160
1161        Raises
1162        ------
1163        ValueError
1164            If no mass spectra are found in the dataset.
1165            If the retention time list is already set and overwrite is False.
1166        """
1167        # Check if _ms is empty and raise error if so
1168        if len(self._ms) == 0:
1169            raise ValueError("No mass spectra found in dataset")
1170
1171        # Check if retention_time_list is already set and raise error if so
1172        if len(self.retention_time) > 0 and not overwrite:
1173            raise ValueError(
1174                "Retention time list already set, use overwrite=True to overwrite"
1175            )
1176
1177        retention_time_list = []
1178        for key_ms in sorted(self._ms.keys()):
1179            retention_time_list.append(self._ms.get(key_ms).retention_time)
1180        self.retention_time = retention_time_list

Sets the retention time list from the data in the _ms dictionary.

Parameters
  • overwrite (bool, optional): If True, overwrites the retention time list if it is already set. Defaults to False.
Notes

If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.

Raises
  • ValueError: If no mass spectra are found in the dataset. If the retention time list is already set and overwrite is False.
def set_scans_number_from_data(self, overwrite=False):
1182    def set_scans_number_from_data(self, overwrite=False):
1183        """Sets the scan number list from the data in the _ms dictionary.
1184
1185        Notes
1186        -----
1187        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
1188
1189        Raises
1190        ------
1191        ValueError
1192            If no mass spectra are found in the dataset.
1193            If the scan number list is already set and overwrite is False.
1194        """
1195        # Check if _ms is empty and raise error if so
1196        if len(self._ms) == 0:
1197            raise ValueError("No mass spectra found in dataset")
1198
1199        # Check if scans_number_list is already set and raise error if so
1200        if len(self.scans_number) > 0 and not overwrite:
1201            raise ValueError(
1202                "Scan number list already set, use overwrite=True to overwrite"
1203            )
1204
1205        self.scans_number = sorted(self._ms.keys())

Sets the scan number list from the data in the _ms dictionary.

Notes

If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.

Raises
  • ValueError: If no mass spectra are found in the dataset. If the scan number list is already set and overwrite is False.
ms1_scans

list : A list of MS1 scan numbers for the dataset.

parameters

LCMSParameters : The parameters used for the LC-MS analysis.

scans_number

list : A list of scan numbers for the dataset.

retention_time

numpy.ndarray : An array of retention times for the dataset.

tic

numpy.ndarray : An array of TIC values for the dataset.