corems.molecular_id.input.nistMSI
1__author__ = "Yuri E. Corilo" 2__date__ = "Feb 12, 2020" 3 4from threading import Thread 5from pathlib import Path 6 7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite 8 9 10class ReadNistMSI(Thread): 11 """A class for reading NIST MSI files and storing the data in a SQLite database. 12 13 Parameters 14 ---------- 15 file_path : str 16 The path to the NIST MSI file. 17 url : str, optional 18 The URL for the SQLite database. Default is 'sqlite://'. 19 20 Raises 21 ------ 22 FileExistsError 23 If the specified file does not exist. 24 25 Attributes 26 ---------- 27 file_path : str 28 The path to the NIST MSI file. 29 url : str 30 The URL for the SQLite database. 31 sqlLite_obj : EI_LowRes_SQLite 32 The SQLite object for storing the compound data. 33 34 Methods 35 ------- 36 * run(). 37 Runs the thread and initializes the SQLite object. 38 * get_sqlLite_obj(). 39 Returns the SQLite object. 40 * get_compound_data_dict_list(). 41 Parses the NIST MSI file and returns a list of compound data dictionaries. 42 """ 43 44 def __init__(self, file_path, url="sqlite://"): 45 Thread.__init__(self) 46 file_path = Path(file_path) 47 48 if not file_path.exists(): 49 raise FileExistsError("File does not exist: " + file_path) 50 51 self.file_path = file_path 52 53 self.url = url 54 55 def run(self): 56 """Runs the thread and initializes the SQLite object.""" 57 self.sqlLite_obj = self.get_sqlLite_obj() 58 59 def get_sqlLite_obj(self): 60 """Returns the SQLite object. 61 62 Returns 63 ------- 64 EI_LowRes_SQLite 65 The SQLite object for storing the compound data. 66 """ 67 compound_data_dict_list = self.get_compound_data_dict_list() 68 69 sqlLite_obj = EI_LowRes_SQLite(url=self.url) 70 71 for data_dict in compound_data_dict_list: 72 if not data_dict.get("NUM PEAKS"): 73 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) 74 if not data_dict.get("CASNO"): 75 data_dict["CASNO"] = data_dict.get("CAS") 76 if not data_dict["CASNO"]: 77 data_dict["CASNO"] = 0 78 # print(data_dict) 79 try: 80 sqlLite_obj.add_compound(data_dict) 81 except: 82 print(data_dict.get("NAME")) 83 84 return sqlLite_obj 85 86 def get_compound_data_dict_list(self): 87 """Parses the NIST MSI file and returns a list of compound data dictionaries. 88 89 Returns 90 ------- 91 list 92 A list of compound data dictionaries. 93 """ 94 list_dict_data = [] 95 96 with open(self.file_path) as msifile: 97 content = msifile.readlines() 98 99 i = 0 100 101 dict_data = dict() 102 dict_data["mz"] = list() 103 dict_data["abundance"] = list() 104 # for line in content: 105 # print(line, line=="\n" ) 106 107 while i < len(content): 108 split_line = content[i].split(":") 109 110 # empty line 111 if len(content[i]) == 1: 112 i += 1 113 if dict_data.get("NAME"): 114 list_dict_data.append(dict_data) 115 116 # print(dict_data) 117 dict_data = dict() 118 dict_data["mz"] = list() 119 dict_data["abundance"] = list() 120 121 # metadata, name, ri, rt etc 122 elif len(split_line) >= 2: 123 label = split_line[0] 124 value = ":".join(split_line[1:]).strip("\n").strip("") 125 dict_data[label] = value 126 i += 1 127 128 # mz and abundance pairs 129 elif len(split_line) == 1: 130 for s in content[i].strip("\n").strip("").split("(")[1:]: 131 values = s.split(" ") 132 133 if values[0] == "": 134 mz = values[1] 135 else: 136 mz = values[0] 137 138 abun = values[-2].strip(")") 139 140 dict_data["mz"].append(mz) 141 dict_data["abundance"].append(abun) 142 143 i += 1 144 # something else 145 else: 146 i += 1 147 148 return list_dict_data
11class ReadNistMSI(Thread): 12 """A class for reading NIST MSI files and storing the data in a SQLite database. 13 14 Parameters 15 ---------- 16 file_path : str 17 The path to the NIST MSI file. 18 url : str, optional 19 The URL for the SQLite database. Default is 'sqlite://'. 20 21 Raises 22 ------ 23 FileExistsError 24 If the specified file does not exist. 25 26 Attributes 27 ---------- 28 file_path : str 29 The path to the NIST MSI file. 30 url : str 31 The URL for the SQLite database. 32 sqlLite_obj : EI_LowRes_SQLite 33 The SQLite object for storing the compound data. 34 35 Methods 36 ------- 37 * run(). 38 Runs the thread and initializes the SQLite object. 39 * get_sqlLite_obj(). 40 Returns the SQLite object. 41 * get_compound_data_dict_list(). 42 Parses the NIST MSI file and returns a list of compound data dictionaries. 43 """ 44 45 def __init__(self, file_path, url="sqlite://"): 46 Thread.__init__(self) 47 file_path = Path(file_path) 48 49 if not file_path.exists(): 50 raise FileExistsError("File does not exist: " + file_path) 51 52 self.file_path = file_path 53 54 self.url = url 55 56 def run(self): 57 """Runs the thread and initializes the SQLite object.""" 58 self.sqlLite_obj = self.get_sqlLite_obj() 59 60 def get_sqlLite_obj(self): 61 """Returns the SQLite object. 62 63 Returns 64 ------- 65 EI_LowRes_SQLite 66 The SQLite object for storing the compound data. 67 """ 68 compound_data_dict_list = self.get_compound_data_dict_list() 69 70 sqlLite_obj = EI_LowRes_SQLite(url=self.url) 71 72 for data_dict in compound_data_dict_list: 73 if not data_dict.get("NUM PEAKS"): 74 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) 75 if not data_dict.get("CASNO"): 76 data_dict["CASNO"] = data_dict.get("CAS") 77 if not data_dict["CASNO"]: 78 data_dict["CASNO"] = 0 79 # print(data_dict) 80 try: 81 sqlLite_obj.add_compound(data_dict) 82 except: 83 print(data_dict.get("NAME")) 84 85 return sqlLite_obj 86 87 def get_compound_data_dict_list(self): 88 """Parses the NIST MSI file and returns a list of compound data dictionaries. 89 90 Returns 91 ------- 92 list 93 A list of compound data dictionaries. 94 """ 95 list_dict_data = [] 96 97 with open(self.file_path) as msifile: 98 content = msifile.readlines() 99 100 i = 0 101 102 dict_data = dict() 103 dict_data["mz"] = list() 104 dict_data["abundance"] = list() 105 # for line in content: 106 # print(line, line=="\n" ) 107 108 while i < len(content): 109 split_line = content[i].split(":") 110 111 # empty line 112 if len(content[i]) == 1: 113 i += 1 114 if dict_data.get("NAME"): 115 list_dict_data.append(dict_data) 116 117 # print(dict_data) 118 dict_data = dict() 119 dict_data["mz"] = list() 120 dict_data["abundance"] = list() 121 122 # metadata, name, ri, rt etc 123 elif len(split_line) >= 2: 124 label = split_line[0] 125 value = ":".join(split_line[1:]).strip("\n").strip("") 126 dict_data[label] = value 127 i += 1 128 129 # mz and abundance pairs 130 elif len(split_line) == 1: 131 for s in content[i].strip("\n").strip("").split("(")[1:]: 132 values = s.split(" ") 133 134 if values[0] == "": 135 mz = values[1] 136 else: 137 mz = values[0] 138 139 abun = values[-2].strip(")") 140 141 dict_data["mz"].append(mz) 142 dict_data["abundance"].append(abun) 143 144 i += 1 145 # something else 146 else: 147 i += 1 148 149 return list_dict_data
A class for reading NIST MSI files and storing the data in a SQLite database.
Parameters
- file_path (str): The path to the NIST MSI file.
- url (str, optional): The URL for the SQLite database. Default is 'sqlite://'.
Raises
- FileExistsError: If the specified file does not exist.
Attributes
- file_path (str): The path to the NIST MSI file.
- url (str): The URL for the SQLite database.
- sqlLite_obj (EI_LowRes_SQLite): The SQLite object for storing the compound data.
Methods
- run(). Runs the thread and initializes the SQLite object.
- get_sqlLite_obj(). Returns the SQLite object.
- get_compound_data_dict_list(). Parses the NIST MSI file and returns a list of compound data dictionaries.
45 def __init__(self, file_path, url="sqlite://"): 46 Thread.__init__(self) 47 file_path = Path(file_path) 48 49 if not file_path.exists(): 50 raise FileExistsError("File does not exist: " + file_path) 51 52 self.file_path = file_path 53 54 self.url = url
This constructor should always be called with keyword arguments. Arguments are:
group should be None; reserved for future extension when a ThreadGroup class is implemented.
target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.
name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.
args is the argument tuple for the target invocation. Defaults to ().
kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.
If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.
56 def run(self): 57 """Runs the thread and initializes the SQLite object.""" 58 self.sqlLite_obj = self.get_sqlLite_obj()
Runs the thread and initializes the SQLite object.
60 def get_sqlLite_obj(self): 61 """Returns the SQLite object. 62 63 Returns 64 ------- 65 EI_LowRes_SQLite 66 The SQLite object for storing the compound data. 67 """ 68 compound_data_dict_list = self.get_compound_data_dict_list() 69 70 sqlLite_obj = EI_LowRes_SQLite(url=self.url) 71 72 for data_dict in compound_data_dict_list: 73 if not data_dict.get("NUM PEAKS"): 74 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) 75 if not data_dict.get("CASNO"): 76 data_dict["CASNO"] = data_dict.get("CAS") 77 if not data_dict["CASNO"]: 78 data_dict["CASNO"] = 0 79 # print(data_dict) 80 try: 81 sqlLite_obj.add_compound(data_dict) 82 except: 83 print(data_dict.get("NAME")) 84 85 return sqlLite_obj
Returns the SQLite object.
Returns
- EI_LowRes_SQLite: The SQLite object for storing the compound data.
87 def get_compound_data_dict_list(self): 88 """Parses the NIST MSI file and returns a list of compound data dictionaries. 89 90 Returns 91 ------- 92 list 93 A list of compound data dictionaries. 94 """ 95 list_dict_data = [] 96 97 with open(self.file_path) as msifile: 98 content = msifile.readlines() 99 100 i = 0 101 102 dict_data = dict() 103 dict_data["mz"] = list() 104 dict_data["abundance"] = list() 105 # for line in content: 106 # print(line, line=="\n" ) 107 108 while i < len(content): 109 split_line = content[i].split(":") 110 111 # empty line 112 if len(content[i]) == 1: 113 i += 1 114 if dict_data.get("NAME"): 115 list_dict_data.append(dict_data) 116 117 # print(dict_data) 118 dict_data = dict() 119 dict_data["mz"] = list() 120 dict_data["abundance"] = list() 121 122 # metadata, name, ri, rt etc 123 elif len(split_line) >= 2: 124 label = split_line[0] 125 value = ":".join(split_line[1:]).strip("\n").strip("") 126 dict_data[label] = value 127 i += 1 128 129 # mz and abundance pairs 130 elif len(split_line) == 1: 131 for s in content[i].strip("\n").strip("").split("(")[1:]: 132 values = s.split(" ") 133 134 if values[0] == "": 135 mz = values[1] 136 else: 137 mz = values[0] 138 139 abun = values[-2].strip(")") 140 141 dict_data["mz"].append(mz) 142 dict_data["abundance"].append(abun) 143 144 i += 1 145 # something else 146 else: 147 i += 1 148 149 return list_dict_data
Parses the NIST MSI file and returns a list of compound data dictionaries.
Returns
- list: A list of compound data dictionaries.
Inherited Members
- threading.Thread
- start
- join
- name
- ident
- is_alive
- daemon
- isDaemon
- setDaemon
- getName
- setName
- native_id