corems.mass_spectra.input.massList

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 12, 2019"
  3
  4import sys
  5
  6from pathlib import Path
  7from threading import Thread
  8import warnings
  9from corems.mass_spectrum.input.massList import ReadCoremsMasslist
 10from corems.mass_spectra.factory.lc_class import LCMSBase
 11
 12
 13class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread):
 14    """
 15    Class for reading CoreMS mass spectra from a text file.
 16
 17    Parameters
 18    ----------
 19    file_location : str, pathlib.Path, or s3path.S3Path
 20        Path object from pathlib containing the file location
 21    analyzer : str, optional
 22        Name of the analyzer, by default 'Unknown'
 23    instrument_label : str, optional
 24        Label of the instrument, by default 'Unknown'
 25
 26    Attributes
 27    ----------
 28    lcms : LCMSBase
 29        LCMSBase object for storing the mass spectra data.
 30
 31    Methods
 32    -------
 33    * get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
 34    * set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
 35    * import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
 36    * run(). Run the import_mass_spectra method to create the LCMSBase object.
 37    * get_lcms_obj(). Get the LCMSBase object.
 38    """
 39
 40    def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"):
 41        if isinstance(file_location, str):
 42            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
 43            file_location = Path(file_location)
 44
 45        if not file_location.exists():
 46            raise FileNotFoundError("%s not found" % file_location)
 47
 48        if not file_location.suffix == ".corems":
 49            raise TypeError("%s is not a valid CoreMS file" % file_location)
 50
 51        Thread.__init__(self)
 52
 53        ReadCoremsMasslist.__init__(self, file_location)
 54
 55        self.lcms = LCMSBase(
 56            self.file_location, analyzer=analyzer, instrument_label=instrument_label
 57        )
 58
 59    def get_scans_filename(self) -> list:
 60        all_other = self.file_location.glob("*_scan*[!.json]")
 61
 62        scans_filepath = [
 63            (file_path_obj.stem.split("scan")[1], file_path_obj)
 64            for file_path_obj in all_other
 65        ]
 66
 67        scans_filepath.sort(key=lambda m: int(m[0]))
 68
 69        return scans_filepath
 70
 71    def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
 72        self.file_location = file_path_obj
 73
 74        if file_path_obj.suffix == ".pkl":
 75            self.data_type == "dataframe"
 76
 77        else:
 78            if file_path_obj.suffix == ".csv":
 79                self.data_type == "txt"
 80                self.delimiter = ","
 81
 82            elif file_path_obj.suffix == ".xlsx":
 83                self.data_type == "excel"
 84                self.delimiter = ","
 85
 86            elif file_path_obj.suffix == ".txt":
 87                self.data_type == "txt"
 88                self.delimiter = "\t"
 89                warnings.warn("using tab as delimiter")
 90            else:
 91                raise NotImplementedError(
 92                    "%s data not yet supported " % file_path_obj.suffix
 93                )
 94
 95    def import_mass_spectra(self) -> None:
 96        list_rt, list_tic, list_scan = list(), list(), list()
 97
 98        for scan_number, file_path_obj in self.get_scans_filename():
 99            self.set_filepath_datatype_and_delimiter(file_path_obj)
100
101            mass_spec = self.get_mass_spectrum(int(scan_number))
102
103            list_scan.append(int(scan_number))
104
105            list_rt.append(mass_spec.retention_time)
106
107            list_tic.append(mass_spec.tic)
108
109            self.lcms.add_mass_spectrum(mass_spec)
110
111        self.lcms.retention_time = list_rt
112        self.lcms.tic_list = list_tic  # TODO: check if this is correct
113        self.lcms.scans_number = list_scan
114
115    def run(self) -> None:
116        """Creates the LCMS object and imports mass spectra."""
117
118        self.import_mass_spectra()
119
120    def get_lcms_obj(self) -> LCMSBase:
121        """
122        Returns the LCMSBase object associated with the massList.
123
124        If the LCMSBase object is already initialized, it is returned.
125        Otherwise, an exception is raised.
126
127        Raises:
128            Exception: If the LCMSBase object is not initialized.
129        """
130        if self.lcms:
131            return self.lcms
132        else:
133            raise Exception("returning an empty lcms class")
class ReadCoremsMassSpectraText(corems.mass_spectrum.input.massList.ReadCoremsMasslist, threading.Thread):
 14class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread):
 15    """
 16    Class for reading CoreMS mass spectra from a text file.
 17
 18    Parameters
 19    ----------
 20    file_location : str, pathlib.Path, or s3path.S3Path
 21        Path object from pathlib containing the file location
 22    analyzer : str, optional
 23        Name of the analyzer, by default 'Unknown'
 24    instrument_label : str, optional
 25        Label of the instrument, by default 'Unknown'
 26
 27    Attributes
 28    ----------
 29    lcms : LCMSBase
 30        LCMSBase object for storing the mass spectra data.
 31
 32    Methods
 33    -------
 34    * get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
 35    * set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
 36    * import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
 37    * run(). Run the import_mass_spectra method to create the LCMSBase object.
 38    * get_lcms_obj(). Get the LCMSBase object.
 39    """
 40
 41    def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"):
 42        if isinstance(file_location, str):
 43            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
 44            file_location = Path(file_location)
 45
 46        if not file_location.exists():
 47            raise FileNotFoundError("%s not found" % file_location)
 48
 49        if not file_location.suffix == ".corems":
 50            raise TypeError("%s is not a valid CoreMS file" % file_location)
 51
 52        Thread.__init__(self)
 53
 54        ReadCoremsMasslist.__init__(self, file_location)
 55
 56        self.lcms = LCMSBase(
 57            self.file_location, analyzer=analyzer, instrument_label=instrument_label
 58        )
 59
 60    def get_scans_filename(self) -> list:
 61        all_other = self.file_location.glob("*_scan*[!.json]")
 62
 63        scans_filepath = [
 64            (file_path_obj.stem.split("scan")[1], file_path_obj)
 65            for file_path_obj in all_other
 66        ]
 67
 68        scans_filepath.sort(key=lambda m: int(m[0]))
 69
 70        return scans_filepath
 71
 72    def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
 73        self.file_location = file_path_obj
 74
 75        if file_path_obj.suffix == ".pkl":
 76            self.data_type == "dataframe"
 77
 78        else:
 79            if file_path_obj.suffix == ".csv":
 80                self.data_type == "txt"
 81                self.delimiter = ","
 82
 83            elif file_path_obj.suffix == ".xlsx":
 84                self.data_type == "excel"
 85                self.delimiter = ","
 86
 87            elif file_path_obj.suffix == ".txt":
 88                self.data_type == "txt"
 89                self.delimiter = "\t"
 90                warnings.warn("using tab as delimiter")
 91            else:
 92                raise NotImplementedError(
 93                    "%s data not yet supported " % file_path_obj.suffix
 94                )
 95
 96    def import_mass_spectra(self) -> None:
 97        list_rt, list_tic, list_scan = list(), list(), list()
 98
 99        for scan_number, file_path_obj in self.get_scans_filename():
100            self.set_filepath_datatype_and_delimiter(file_path_obj)
101
102            mass_spec = self.get_mass_spectrum(int(scan_number))
103
104            list_scan.append(int(scan_number))
105
106            list_rt.append(mass_spec.retention_time)
107
108            list_tic.append(mass_spec.tic)
109
110            self.lcms.add_mass_spectrum(mass_spec)
111
112        self.lcms.retention_time = list_rt
113        self.lcms.tic_list = list_tic  # TODO: check if this is correct
114        self.lcms.scans_number = list_scan
115
116    def run(self) -> None:
117        """Creates the LCMS object and imports mass spectra."""
118
119        self.import_mass_spectra()
120
121    def get_lcms_obj(self) -> LCMSBase:
122        """
123        Returns the LCMSBase object associated with the massList.
124
125        If the LCMSBase object is already initialized, it is returned.
126        Otherwise, an exception is raised.
127
128        Raises:
129            Exception: If the LCMSBase object is not initialized.
130        """
131        if self.lcms:
132            return self.lcms
133        else:
134            raise Exception("returning an empty lcms class")

Class for reading CoreMS mass spectra from a text file.

Parameters
  • file_location (str, pathlib.Path, or s3path.S3Path): Path object from pathlib containing the file location
  • analyzer (str, optional): Name of the analyzer, by default 'Unknown'
  • instrument_label (str, optional): Label of the instrument, by default 'Unknown'
Attributes
  • lcms (LCMSBase): LCMSBase object for storing the mass spectra data.
Methods
  • get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
  • set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
  • import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
  • run(). Run the import_mass_spectra method to create the LCMSBase object.
  • get_lcms_obj(). Get the LCMSBase object.
ReadCoremsMassSpectraText(file_location, analyzer='Unknown', instrument_label='Unknown')
41    def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"):
42        if isinstance(file_location, str):
43            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
44            file_location = Path(file_location)
45
46        if not file_location.exists():
47            raise FileNotFoundError("%s not found" % file_location)
48
49        if not file_location.suffix == ".corems":
50            raise TypeError("%s is not a valid CoreMS file" % file_location)
51
52        Thread.__init__(self)
53
54        ReadCoremsMasslist.__init__(self, file_location)
55
56        self.lcms = LCMSBase(
57            self.file_location, analyzer=analyzer, instrument_label=instrument_label
58        )

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

lcms
def get_scans_filename(self) -> list:
60    def get_scans_filename(self) -> list:
61        all_other = self.file_location.glob("*_scan*[!.json]")
62
63        scans_filepath = [
64            (file_path_obj.stem.split("scan")[1], file_path_obj)
65            for file_path_obj in all_other
66        ]
67
68        scans_filepath.sort(key=lambda m: int(m[0]))
69
70        return scans_filepath
def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
72    def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
73        self.file_location = file_path_obj
74
75        if file_path_obj.suffix == ".pkl":
76            self.data_type == "dataframe"
77
78        else:
79            if file_path_obj.suffix == ".csv":
80                self.data_type == "txt"
81                self.delimiter = ","
82
83            elif file_path_obj.suffix == ".xlsx":
84                self.data_type == "excel"
85                self.delimiter = ","
86
87            elif file_path_obj.suffix == ".txt":
88                self.data_type == "txt"
89                self.delimiter = "\t"
90                warnings.warn("using tab as delimiter")
91            else:
92                raise NotImplementedError(
93                    "%s data not yet supported " % file_path_obj.suffix
94                )
def import_mass_spectra(self) -> None:
 96    def import_mass_spectra(self) -> None:
 97        list_rt, list_tic, list_scan = list(), list(), list()
 98
 99        for scan_number, file_path_obj in self.get_scans_filename():
100            self.set_filepath_datatype_and_delimiter(file_path_obj)
101
102            mass_spec = self.get_mass_spectrum(int(scan_number))
103
104            list_scan.append(int(scan_number))
105
106            list_rt.append(mass_spec.retention_time)
107
108            list_tic.append(mass_spec.tic)
109
110            self.lcms.add_mass_spectrum(mass_spec)
111
112        self.lcms.retention_time = list_rt
113        self.lcms.tic_list = list_tic  # TODO: check if this is correct
114        self.lcms.scans_number = list_scan
def run(self) -> None:
116    def run(self) -> None:
117        """Creates the LCMS object and imports mass spectra."""
118
119        self.import_mass_spectra()

Creates the LCMS object and imports mass spectra.

def get_lcms_obj(self) -> corems.mass_spectra.factory.lc_class.LCMSBase:
121    def get_lcms_obj(self) -> LCMSBase:
122        """
123        Returns the LCMSBase object associated with the massList.
124
125        If the LCMSBase object is already initialized, it is returned.
126        Otherwise, an exception is raised.
127
128        Raises:
129            Exception: If the LCMSBase object is not initialized.
130        """
131        if self.lcms:
132            return self.lcms
133        else:
134            raise Exception("returning an empty lcms class")

Returns the LCMSBase object associated with the massList.

If the LCMSBase object is already initialized, it is returned. Otherwise, an exception is raised.

Raises: Exception: If the LCMSBase object is not initialized.