corems.molecular_id.input.nistMSI

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Feb 12, 2020"
  3
  4from threading import Thread
  5from pathlib import Path
  6
  7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite
  8
  9
 10class ReadNistMSI(Thread):
 11    """A class for reading NIST MSI files and storing the data in a SQLite database.
 12
 13    Parameters
 14    ----------
 15    file_path : str
 16        The path to the NIST MSI file.
 17    url : str, optional
 18        The URL for the SQLite database. Default is 'sqlite://'.
 19
 20    Raises
 21    ------
 22    FileExistsError
 23        If the specified file does not exist.
 24
 25    Attributes
 26    ----------
 27    file_path : str
 28        The path to the NIST MSI file.
 29    url : str
 30        The URL for the SQLite database.
 31    sqlLite_obj : EI_LowRes_SQLite
 32        The SQLite object for storing the compound data.
 33
 34    Methods
 35    -------
 36    * run().
 37        Runs the thread and initializes the SQLite object.
 38    * get_sqlLite_obj().
 39        Returns the SQLite object.
 40    * get_compound_data_dict_list().
 41        Parses the NIST MSI file and returns a list of compound data dictionaries.
 42    """
 43
 44    def __init__(self, file_path, url="sqlite://"):
 45        Thread.__init__(self)
 46        file_path = Path(file_path)
 47
 48        if not file_path.exists():
 49            raise FileExistsError("File does not exist: " + file_path)
 50
 51        self.file_path = file_path
 52
 53        self.url = url
 54
 55    def run(self):
 56        """Runs the thread and initializes the SQLite object."""
 57        self.sqlLite_obj = self.get_sqlLite_obj()
 58
 59    def get_sqlLite_obj(self):
 60        """Returns the SQLite object.
 61
 62        Returns
 63        -------
 64        EI_LowRes_SQLite
 65            The SQLite object for storing the compound data.
 66        """
 67        compound_data_dict_list = self.get_compound_data_dict_list()
 68
 69        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
 70
 71        for data_dict in compound_data_dict_list:
 72            if not data_dict.get("NUM PEAKS"):
 73                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
 74            if not data_dict.get("CASNO"):
 75                data_dict["CASNO"] = data_dict.get("CAS")
 76                if not data_dict["CASNO"]:
 77                    data_dict["CASNO"] = 0
 78            # print(data_dict)
 79            try:
 80                sqlLite_obj.add_compound(data_dict)
 81            except:
 82                print(data_dict.get("NAME"))
 83
 84        return sqlLite_obj
 85
 86    def get_compound_data_dict_list(self):
 87        """Parses the NIST MSI file and returns a list of compound data dictionaries.
 88
 89        Returns
 90        -------
 91        list
 92            A list of compound data dictionaries.
 93        """
 94        list_dict_data = []
 95
 96        with open(self.file_path) as msifile:
 97            content = msifile.readlines()
 98
 99            i = 0
100
101            dict_data = dict()
102            dict_data["mz"] = list()
103            dict_data["abundance"] = list()
104            # for line in content:
105            #   print(line, line=="\n" )
106
107            while i < len(content):
108                split_line = content[i].split(":")
109
110                # empty line
111                if len(content[i]) == 1:
112                    i += 1
113                    if dict_data.get("NAME"):
114                        list_dict_data.append(dict_data)
115
116                    # print(dict_data)
117                    dict_data = dict()
118                    dict_data["mz"] = list()
119                    dict_data["abundance"] = list()
120
121                # metadata, name, ri, rt etc
122                elif len(split_line) >= 2:
123                    label = split_line[0]
124                    value = ":".join(split_line[1:]).strip("\n").strip("")
125                    dict_data[label] = value
126                    i += 1
127
128                # mz and abundance pairs
129                elif len(split_line) == 1:
130                    for s in content[i].strip("\n").strip("").split("(")[1:]:
131                        values = s.split(" ")
132
133                        if values[0] == "":
134                            mz = values[1]
135                        else:
136                            mz = values[0]
137
138                        abun = values[-2].strip(")")
139
140                        dict_data["mz"].append(mz)
141                        dict_data["abundance"].append(abun)
142
143                    i += 1
144                # something else
145                else:
146                    i += 1
147
148        return list_dict_data
class ReadNistMSI(threading.Thread):
 11class ReadNistMSI(Thread):
 12    """A class for reading NIST MSI files and storing the data in a SQLite database.
 13
 14    Parameters
 15    ----------
 16    file_path : str
 17        The path to the NIST MSI file.
 18    url : str, optional
 19        The URL for the SQLite database. Default is 'sqlite://'.
 20
 21    Raises
 22    ------
 23    FileExistsError
 24        If the specified file does not exist.
 25
 26    Attributes
 27    ----------
 28    file_path : str
 29        The path to the NIST MSI file.
 30    url : str
 31        The URL for the SQLite database.
 32    sqlLite_obj : EI_LowRes_SQLite
 33        The SQLite object for storing the compound data.
 34
 35    Methods
 36    -------
 37    * run().
 38        Runs the thread and initializes the SQLite object.
 39    * get_sqlLite_obj().
 40        Returns the SQLite object.
 41    * get_compound_data_dict_list().
 42        Parses the NIST MSI file and returns a list of compound data dictionaries.
 43    """
 44
 45    def __init__(self, file_path, url="sqlite://"):
 46        Thread.__init__(self)
 47        file_path = Path(file_path)
 48
 49        if not file_path.exists():
 50            raise FileExistsError("File does not exist: " + file_path)
 51
 52        self.file_path = file_path
 53
 54        self.url = url
 55
 56    def run(self):
 57        """Runs the thread and initializes the SQLite object."""
 58        self.sqlLite_obj = self.get_sqlLite_obj()
 59
 60    def get_sqlLite_obj(self):
 61        """Returns the SQLite object.
 62
 63        Returns
 64        -------
 65        EI_LowRes_SQLite
 66            The SQLite object for storing the compound data.
 67        """
 68        compound_data_dict_list = self.get_compound_data_dict_list()
 69
 70        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
 71
 72        for data_dict in compound_data_dict_list:
 73            if not data_dict.get("NUM PEAKS"):
 74                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
 75            if not data_dict.get("CASNO"):
 76                data_dict["CASNO"] = data_dict.get("CAS")
 77                if not data_dict["CASNO"]:
 78                    data_dict["CASNO"] = 0
 79            # print(data_dict)
 80            try:
 81                sqlLite_obj.add_compound(data_dict)
 82            except:
 83                print(data_dict.get("NAME"))
 84
 85        return sqlLite_obj
 86
 87    def get_compound_data_dict_list(self):
 88        """Parses the NIST MSI file and returns a list of compound data dictionaries.
 89
 90        Returns
 91        -------
 92        list
 93            A list of compound data dictionaries.
 94        """
 95        list_dict_data = []
 96
 97        with open(self.file_path) as msifile:
 98            content = msifile.readlines()
 99
100            i = 0
101
102            dict_data = dict()
103            dict_data["mz"] = list()
104            dict_data["abundance"] = list()
105            # for line in content:
106            #   print(line, line=="\n" )
107
108            while i < len(content):
109                split_line = content[i].split(":")
110
111                # empty line
112                if len(content[i]) == 1:
113                    i += 1
114                    if dict_data.get("NAME"):
115                        list_dict_data.append(dict_data)
116
117                    # print(dict_data)
118                    dict_data = dict()
119                    dict_data["mz"] = list()
120                    dict_data["abundance"] = list()
121
122                # metadata, name, ri, rt etc
123                elif len(split_line) >= 2:
124                    label = split_line[0]
125                    value = ":".join(split_line[1:]).strip("\n").strip("")
126                    dict_data[label] = value
127                    i += 1
128
129                # mz and abundance pairs
130                elif len(split_line) == 1:
131                    for s in content[i].strip("\n").strip("").split("(")[1:]:
132                        values = s.split(" ")
133
134                        if values[0] == "":
135                            mz = values[1]
136                        else:
137                            mz = values[0]
138
139                        abun = values[-2].strip(")")
140
141                        dict_data["mz"].append(mz)
142                        dict_data["abundance"].append(abun)
143
144                    i += 1
145                # something else
146                else:
147                    i += 1
148
149        return list_dict_data

A class for reading NIST MSI files and storing the data in a SQLite database.

Parameters
  • file_path (str): The path to the NIST MSI file.
  • url (str, optional): The URL for the SQLite database. Default is 'sqlite://'.
Raises
  • FileExistsError: If the specified file does not exist.
Attributes
  • file_path (str): The path to the NIST MSI file.
  • url (str): The URL for the SQLite database.
  • sqlLite_obj (EI_LowRes_SQLite): The SQLite object for storing the compound data.
Methods
  • run(). Runs the thread and initializes the SQLite object.
  • get_sqlLite_obj(). Returns the SQLite object.
  • get_compound_data_dict_list(). Parses the NIST MSI file and returns a list of compound data dictionaries.
ReadNistMSI(file_path, url='sqlite://')
45    def __init__(self, file_path, url="sqlite://"):
46        Thread.__init__(self)
47        file_path = Path(file_path)
48
49        if not file_path.exists():
50            raise FileExistsError("File does not exist: " + file_path)
51
52        self.file_path = file_path
53
54        self.url = url

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

file_path
url
def run(self):
56    def run(self):
57        """Runs the thread and initializes the SQLite object."""
58        self.sqlLite_obj = self.get_sqlLite_obj()

Runs the thread and initializes the SQLite object.

def get_sqlLite_obj(self):
60    def get_sqlLite_obj(self):
61        """Returns the SQLite object.
62
63        Returns
64        -------
65        EI_LowRes_SQLite
66            The SQLite object for storing the compound data.
67        """
68        compound_data_dict_list = self.get_compound_data_dict_list()
69
70        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
71
72        for data_dict in compound_data_dict_list:
73            if not data_dict.get("NUM PEAKS"):
74                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
75            if not data_dict.get("CASNO"):
76                data_dict["CASNO"] = data_dict.get("CAS")
77                if not data_dict["CASNO"]:
78                    data_dict["CASNO"] = 0
79            # print(data_dict)
80            try:
81                sqlLite_obj.add_compound(data_dict)
82            except:
83                print(data_dict.get("NAME"))
84
85        return sqlLite_obj

Returns the SQLite object.

Returns
  • EI_LowRes_SQLite: The SQLite object for storing the compound data.
def get_compound_data_dict_list(self):
 87    def get_compound_data_dict_list(self):
 88        """Parses the NIST MSI file and returns a list of compound data dictionaries.
 89
 90        Returns
 91        -------
 92        list
 93            A list of compound data dictionaries.
 94        """
 95        list_dict_data = []
 96
 97        with open(self.file_path) as msifile:
 98            content = msifile.readlines()
 99
100            i = 0
101
102            dict_data = dict()
103            dict_data["mz"] = list()
104            dict_data["abundance"] = list()
105            # for line in content:
106            #   print(line, line=="\n" )
107
108            while i < len(content):
109                split_line = content[i].split(":")
110
111                # empty line
112                if len(content[i]) == 1:
113                    i += 1
114                    if dict_data.get("NAME"):
115                        list_dict_data.append(dict_data)
116
117                    # print(dict_data)
118                    dict_data = dict()
119                    dict_data["mz"] = list()
120                    dict_data["abundance"] = list()
121
122                # metadata, name, ri, rt etc
123                elif len(split_line) >= 2:
124                    label = split_line[0]
125                    value = ":".join(split_line[1:]).strip("\n").strip("")
126                    dict_data[label] = value
127                    i += 1
128
129                # mz and abundance pairs
130                elif len(split_line) == 1:
131                    for s in content[i].strip("\n").strip("").split("(")[1:]:
132                        values = s.split(" ")
133
134                        if values[0] == "":
135                            mz = values[1]
136                        else:
137                            mz = values[0]
138
139                        abun = values[-2].strip(")")
140
141                        dict_data["mz"].append(mz)
142                        dict_data["abundance"].append(abun)
143
144                    i += 1
145                # something else
146                else:
147                    i += 1
148
149        return list_dict_data

Parses the NIST MSI file and returns a list of compound data dictionaries.

Returns
  • list: A list of compound data dictionaries.
Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id