corems.mass_spectra.input.massList
1__author__ = "Yuri E. Corilo" 2__date__ = "Jun 12, 2019" 3 4import sys 5 6from pathlib import Path 7from threading import Thread 8import warnings 9from corems.mass_spectrum.input.massList import ReadCoremsMasslist 10from corems.mass_spectra.factory.lc_class import LCMSBase 11 12 13class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread): 14 """ 15 Class for reading CoreMS mass spectra from a text file. 16 17 Parameters 18 ---------- 19 file_location : str, pathlib.Path, or s3path.S3Path 20 Path object from pathlib containing the file location 21 analyzer : str, optional 22 Name of the analyzer, by default 'Unknown' 23 instrument_label : str, optional 24 Label of the instrument, by default 'Unknown' 25 26 Attributes 27 ---------- 28 lcms : LCMSBase 29 LCMSBase object for storing the mass spectra data. 30 31 Methods 32 ------- 33 * get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file. 34 * set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object. 35 * import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object. 36 * run(). Run the import_mass_spectra method to create the LCMSBase object. 37 * get_lcms_obj(). Get the LCMSBase object. 38 """ 39 40 def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"): 41 if isinstance(file_location, str): 42 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 43 file_location = Path(file_location) 44 45 if not file_location.exists(): 46 raise FileNotFoundError("%s not found" % file_location) 47 48 if not file_location.suffix == ".corems": 49 raise TypeError("%s is not a valid CoreMS file" % file_location) 50 51 Thread.__init__(self) 52 53 ReadCoremsMasslist.__init__(self, file_location) 54 55 self.lcms = LCMSBase( 56 self.file_location, analyzer=analyzer, instrument_label=instrument_label 57 ) 58 59 def get_scans_filename(self) -> list: 60 all_other = self.file_location.glob("*_scan*[!.json]") 61 62 scans_filepath = [ 63 (file_path_obj.stem.split("scan")[1], file_path_obj) 64 for file_path_obj in all_other 65 ] 66 67 scans_filepath.sort(key=lambda m: int(m[0])) 68 69 return scans_filepath 70 71 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: 72 self.file_location = file_path_obj 73 74 if file_path_obj.suffix == ".pkl": 75 self.data_type == "dataframe" 76 77 else: 78 if file_path_obj.suffix == ".csv": 79 self.data_type == "txt" 80 self.delimiter = "," 81 82 elif file_path_obj.suffix == ".xlsx": 83 self.data_type == "excel" 84 self.delimiter = "," 85 86 elif file_path_obj.suffix == ".txt": 87 self.data_type == "txt" 88 self.delimiter = "\t" 89 warnings.warn("using tab as delimiter") 90 else: 91 raise NotImplementedError( 92 "%s data not yet supported " % file_path_obj.suffix 93 ) 94 95 def import_mass_spectra(self) -> None: 96 list_rt, list_tic, list_scan = list(), list(), list() 97 98 for scan_number, file_path_obj in self.get_scans_filename(): 99 self.set_filepath_datatype_and_delimiter(file_path_obj) 100 101 mass_spec = self.get_mass_spectrum(int(scan_number)) 102 103 list_scan.append(int(scan_number)) 104 105 list_rt.append(mass_spec.retention_time) 106 107 list_tic.append(mass_spec.tic) 108 109 self.lcms.add_mass_spectrum(mass_spec) 110 111 self.lcms.retention_time = list_rt 112 self.lcms.tic_list = list_tic # TODO: check if this is correct 113 self.lcms.scans_number = list_scan 114 115 def run(self) -> None: 116 """Creates the LCMS object and imports mass spectra.""" 117 118 self.import_mass_spectra() 119 120 def get_lcms_obj(self) -> LCMSBase: 121 """ 122 Returns the LCMSBase object associated with the massList. 123 124 If the LCMSBase object is already initialized, it is returned. 125 Otherwise, an exception is raised. 126 127 Raises: 128 Exception: If the LCMSBase object is not initialized. 129 """ 130 if self.lcms: 131 return self.lcms 132 else: 133 raise Exception("returning an empty lcms class")
14class ReadCoremsMassSpectraText(ReadCoremsMasslist, Thread): 15 """ 16 Class for reading CoreMS mass spectra from a text file. 17 18 Parameters 19 ---------- 20 file_location : str, pathlib.Path, or s3path.S3Path 21 Path object from pathlib containing the file location 22 analyzer : str, optional 23 Name of the analyzer, by default 'Unknown' 24 instrument_label : str, optional 25 Label of the instrument, by default 'Unknown' 26 27 Attributes 28 ---------- 29 lcms : LCMSBase 30 LCMSBase object for storing the mass spectra data. 31 32 Methods 33 ------- 34 * get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file. 35 * set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object. 36 * import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object. 37 * run(). Run the import_mass_spectra method to create the LCMSBase object. 38 * get_lcms_obj(). Get the LCMSBase object. 39 """ 40 41 def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"): 42 if isinstance(file_location, str): 43 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 44 file_location = Path(file_location) 45 46 if not file_location.exists(): 47 raise FileNotFoundError("%s not found" % file_location) 48 49 if not file_location.suffix == ".corems": 50 raise TypeError("%s is not a valid CoreMS file" % file_location) 51 52 Thread.__init__(self) 53 54 ReadCoremsMasslist.__init__(self, file_location) 55 56 self.lcms = LCMSBase( 57 self.file_location, analyzer=analyzer, instrument_label=instrument_label 58 ) 59 60 def get_scans_filename(self) -> list: 61 all_other = self.file_location.glob("*_scan*[!.json]") 62 63 scans_filepath = [ 64 (file_path_obj.stem.split("scan")[1], file_path_obj) 65 for file_path_obj in all_other 66 ] 67 68 scans_filepath.sort(key=lambda m: int(m[0])) 69 70 return scans_filepath 71 72 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: 73 self.file_location = file_path_obj 74 75 if file_path_obj.suffix == ".pkl": 76 self.data_type == "dataframe" 77 78 else: 79 if file_path_obj.suffix == ".csv": 80 self.data_type == "txt" 81 self.delimiter = "," 82 83 elif file_path_obj.suffix == ".xlsx": 84 self.data_type == "excel" 85 self.delimiter = "," 86 87 elif file_path_obj.suffix == ".txt": 88 self.data_type == "txt" 89 self.delimiter = "\t" 90 warnings.warn("using tab as delimiter") 91 else: 92 raise NotImplementedError( 93 "%s data not yet supported " % file_path_obj.suffix 94 ) 95 96 def import_mass_spectra(self) -> None: 97 list_rt, list_tic, list_scan = list(), list(), list() 98 99 for scan_number, file_path_obj in self.get_scans_filename(): 100 self.set_filepath_datatype_and_delimiter(file_path_obj) 101 102 mass_spec = self.get_mass_spectrum(int(scan_number)) 103 104 list_scan.append(int(scan_number)) 105 106 list_rt.append(mass_spec.retention_time) 107 108 list_tic.append(mass_spec.tic) 109 110 self.lcms.add_mass_spectrum(mass_spec) 111 112 self.lcms.retention_time = list_rt 113 self.lcms.tic_list = list_tic # TODO: check if this is correct 114 self.lcms.scans_number = list_scan 115 116 def run(self) -> None: 117 """Creates the LCMS object and imports mass spectra.""" 118 119 self.import_mass_spectra() 120 121 def get_lcms_obj(self) -> LCMSBase: 122 """ 123 Returns the LCMSBase object associated with the massList. 124 125 If the LCMSBase object is already initialized, it is returned. 126 Otherwise, an exception is raised. 127 128 Raises: 129 Exception: If the LCMSBase object is not initialized. 130 """ 131 if self.lcms: 132 return self.lcms 133 else: 134 raise Exception("returning an empty lcms class")
Class for reading CoreMS mass spectra from a text file.
Parameters
- file_location (str, pathlib.Path, or s3path.S3Path): Path object from pathlib containing the file location
- analyzer (str, optional): Name of the analyzer, by default 'Unknown'
- instrument_label (str, optional): Label of the instrument, by default 'Unknown'
Attributes
- lcms (LCMSBase): LCMSBase object for storing the mass spectra data.
Methods
- get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
- set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
- import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
- run(). Run the import_mass_spectra method to create the LCMSBase object.
- get_lcms_obj(). Get the LCMSBase object.
41 def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"): 42 if isinstance(file_location, str): 43 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 44 file_location = Path(file_location) 45 46 if not file_location.exists(): 47 raise FileNotFoundError("%s not found" % file_location) 48 49 if not file_location.suffix == ".corems": 50 raise TypeError("%s is not a valid CoreMS file" % file_location) 51 52 Thread.__init__(self) 53 54 ReadCoremsMasslist.__init__(self, file_location) 55 56 self.lcms = LCMSBase( 57 self.file_location, analyzer=analyzer, instrument_label=instrument_label 58 )
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
72 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: 73 self.file_location = file_path_obj 74 75 if file_path_obj.suffix == ".pkl": 76 self.data_type == "dataframe" 77 78 else: 79 if file_path_obj.suffix == ".csv": 80 self.data_type == "txt" 81 self.delimiter = "," 82 83 elif file_path_obj.suffix == ".xlsx": 84 self.data_type == "excel" 85 self.delimiter = "," 86 87 elif file_path_obj.suffix == ".txt": 88 self.data_type == "txt" 89 self.delimiter = "\t" 90 warnings.warn("using tab as delimiter") 91 else: 92 raise NotImplementedError( 93 "%s data not yet supported " % file_path_obj.suffix 94 )
96 def import_mass_spectra(self) -> None: 97 list_rt, list_tic, list_scan = list(), list(), list() 98 99 for scan_number, file_path_obj in self.get_scans_filename(): 100 self.set_filepath_datatype_and_delimiter(file_path_obj) 101 102 mass_spec = self.get_mass_spectrum(int(scan_number)) 103 104 list_scan.append(int(scan_number)) 105 106 list_rt.append(mass_spec.retention_time) 107 108 list_tic.append(mass_spec.tic) 109 110 self.lcms.add_mass_spectrum(mass_spec) 111 112 self.lcms.retention_time = list_rt 113 self.lcms.tic_list = list_tic # TODO: check if this is correct 114 self.lcms.scans_number = list_scan
116 def run(self) -> None: 117 """Creates the LCMS object and imports mass spectra.""" 118 119 self.import_mass_spectra()
Creates the LCMS object and imports mass spectra.
121 def get_lcms_obj(self) -> LCMSBase: 122 """ 123 Returns the LCMSBase object associated with the massList. 124 125 If the LCMSBase object is already initialized, it is returned. 126 Otherwise, an exception is raised. 127 128 Raises: 129 Exception: If the LCMSBase object is not initialized. 130 """ 131 if self.lcms: 132 return self.lcms 133 else: 134 raise Exception("returning an empty lcms class")
Returns the LCMSBase object associated with the massList.
If the LCMSBase object is already initialized, it is returned. Otherwise, an exception is raised.
Raises: Exception: If the LCMSBase object is not initialized.
Inherited Members
- corems.mass_spectrum.input.baseClass.MassListBaseClass
- file_location
- header_lines
- isCentroid
- isThermoProfile
- headerless
- analyzer
- instrument_label
- sample_name
- parameters
- set_parameter_from_toml
- set_parameter_from_json
- data_type
- delimiter
- encoding_detector
- set_data_type
- get_dataframe
- load_settings
- get_output_parameters
- clean_data_frame
- check_columns
- read_xml_peaks
- get_xml_polarity
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id