corems.molecular_id.factory.EI_SQL

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Feb 12, 2020"
  3
  4import os
  5from dataclasses import dataclass
  6
  7from numpy import array, frombuffer
  8from sqlalchemy import (
  9    Column,
 10    Float,
 11    ForeignKey,
 12    Integer,
 13    LargeBinary,
 14    String,
 15    create_engine,
 16)
 17from sqlalchemy.exc import SQLAlchemyError
 18from sqlalchemy.ext.declarative import declarative_base
 19from sqlalchemy.orm import relationship, sessionmaker
 20from sqlalchemy.pool import QueuePool
 21
 22Base = declarative_base()
 23
 24
 25class Metadatar(Base):
 26    """This class is used to store the metadata of the compounds in the database
 27
 28    Attributes
 29    -----------
 30    id : int
 31        The id of the compound.
 32    cas : str
 33        The CAS number of the compound.
 34    inchikey : str
 35        The InChiKey of the compound.
 36    inchi : str
 37        The InChi of the compound.
 38    chebi : str
 39        The ChEBI ID of the compound.
 40    smiles : str
 41        The SMILES of the compound.
 42    kegg : str
 43        The KEGG ID of the compound.
 44    iupac_name : str
 45        The IUPAC name of the compound.
 46    traditional_name : str
 47        The traditional name of the compound.
 48    common_name : str
 49        The common name of the compound.
 50    data_id : int
 51        The id of the compound in the molecularData table.
 52    data : LowResolutionEICompound
 53        The compound object.
 54    """
 55
 56    __tablename__ = "metaDataR"
 57
 58    id = Column(Integer, primary_key=True)
 59    cas = Column(String, nullable=True)
 60    inchikey = Column(String, nullable=False)
 61    inchi = Column(String, nullable=False)
 62    chebi = Column(String, nullable=True)
 63    smiles = Column(String, nullable=True)
 64    kegg = Column(String, nullable=True)
 65    iupac_name = Column(String, nullable=True)
 66    traditional_name = Column(String, nullable=True)
 67    common_name = Column(String, nullable=True)
 68
 69    data_id = Column(Integer, ForeignKey("molecularData.id"))
 70    data = relationship("LowResolutionEICompound", back_populates="metadatar")
 71
 72
 73class LowResolutionEICompound(Base):
 74    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
 75
 76    Attributes
 77    -----------
 78    id : int
 79        The id of the compound.
 80    name : str
 81        The name of the compound.
 82    classify : str
 83        The classification of the compound.
 84    formula : str
 85        The formula of the compound.
 86    ri : float
 87        The retention index of the compound.
 88    retention_time : float
 89        The retention time of the compound.
 90    source : str
 91        The source of the compound.
 92    casno : str
 93        The CAS number of the compound.
 94    comment : str
 95        The comment of the compound.
 96    source_temp_c : float
 97        The source temperature of the spectra.
 98    ev : float
 99        The electron volts of the spectra.
100    peaks_count : int
101        The number of peaks in the spectra.
102    mz : numpy.ndarray
103        The m/z values of the spectra.
104    abundance : numpy.ndarray
105        The abundance values of the spectra.
106    metadatar : Metadatar
107        The metadata object.
108    """
109
110    __tablename__ = "molecularData"
111
112    id = Column(Integer, primary_key=True)
113
114    name = Column(String, nullable=False)
115    classify = Column(String, nullable=True)
116    formula = Column(String, nullable=True)
117    ri = Column(Float, nullable=False)
118    retention_time = Column(Float, nullable=False)
119
120    source = Column(String, nullable=True)
121    casno = Column(String, nullable=False)
122    comment = Column(String, nullable=True)
123
124    derivativenum = Column(String, nullable=True)
125    derivatization = Column(String, nullable=True)
126
127    source_temp_c = Column(Float, nullable=True)
128    ev = Column(Float, nullable=True)
129
130    peaks_count = Column(Integer, nullable=False)
131
132    mz = Column(LargeBinary, nullable=False)
133    abundance = Column(LargeBinary, nullable=False)
134
135    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
136
137    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
138
139    def __init__(self, **dict_data):
140        self.id = dict_data.get("id")
141
142        self.name = dict_data.get("NAME")
143        self.classify = dict_data.get("classify")
144        self.formula = dict_data.get("FORM")
145        self.ri = dict_data.get("RI")
146        self.retention_time = dict_data.get("RT")
147
148        self.source = dict_data.get("SOURCE")
149        self.casno = dict_data.get("CASNO")
150        self.comment = dict_data.get("COMMENT")
151
152        self.derivativenum = dict_data.get("derivativenum")
153        self.derivatization = dict_data.get("derivatization")
154
155        self.peaks_count = dict_data.get("NUM PEAKS")
156
157        # mz and abun are numpy arrays of 64 bits integer
158        # when using postgres array might be a better option
159
160        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
161        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
162
163        self.metadatar = dict_data.get("metadatar", None)
164
165    def __repr__(self):
166        return (
167            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
168            % (
169                self.name,
170                self.casno,
171                self.formula,
172                self.ri,
173                self.retention_time,
174                self.comment,
175            )
176        )
177
178
179@dataclass
180class MetaboliteMetadata:
181    """Dataclass for the Metabolite Metadata
182
183    Attributes
184    -----------
185    id : int
186        The id of the compound.
187    cas : str
188        The CAS number of the compound.
189    inchikey : str
190        The InChiKey of the compound.
191    inchi : str
192        The InChi of the compound.
193    chebi : str
194        The ChEBI ID of the compound.
195    smiles : str
196        The SMILES of the compound.
197    kegg : str
198        The KEGG ID of the compound.
199    iupac_name : str
200        The IUPAC name of the compound.
201    traditional_name : str
202        The traditional name of the compound.
203    common_name : str
204        The common name of the compound.
205    data_id : int
206        The id of the compound in the molecularData table.
207
208    """
209
210    id: int
211    cas: str
212    inchikey: str
213    inchi: str
214    chebi: str
215    smiles: str
216    kegg: str
217    data_id: int
218    iupac_name: str
219    traditional_name: str
220    common_name: str
221
222
223@dataclass
224class LowResCompoundRef:
225    """Dataclass for the Low Resolution Compound Reference
226
227    This class is used to store the molecular and spectral data of the compounds in the low res EI database
228
229    Parameters
230    -----------
231    compounds_dict : dict
232        A dictionary representing the compound.
233
234    Attributes
235    -----------
236    id : int
237        The id of the compound.
238    name : str
239        The name of the compound.
240    ri : str
241        The retention index of the compound.
242    retention_time : str
243        The retention time of the compound.
244    casno : str
245        The CAS number of the compound.
246    comment : str
247        The comment of the compound.
248    peaks_count : int
249        The number of peaks in the spectra.
250    classify : str
251        The classification of the compound.
252    derivativenum : str
253        The derivative number of the compound.
254    derivatization : str
255        The derivatization applied to the compound.
256    mz : numpy.ndarray
257        The m/z values of the spectra.
258    abundance : numpy.ndarray
259        The abundance values of the spectra.
260    source_temp_c : float
261        The source temperature of the spectra.
262    ev : float
263        The electron volts of the spectra.
264    formula : str
265        The formula of the compound.
266    source : str
267        The source of the spectra data.
268    classify : str
269        The classification of the compound.
270    metadata : MetaboliteMetadata
271        The metadata object.
272    similarity_score : float
273        The similarity score of the compound.
274    ri_score : float
275        The RI score of the compound.
276    spectral_similarity_score : float
277        The spectral similarity score of the compound.
278    spectral_similarity_scores : dict
279        The spectral similarity scores of the compound.
280
281    """
282
283    # this class is use to store the results inside the GCPeak class
284    def __init__(self, compounds_dict):
285        self.id = compounds_dict.get("id")
286        self.name = compounds_dict.get("name")
287        self.ri = compounds_dict.get("ri")
288        self.retention_time = compounds_dict.get("rt")
289        self.casno = compounds_dict.get("casno")
290        self.comment = compounds_dict.get("comment")
291        self.peaks_count = compounds_dict.get("peaks_count")
292
293        self.classify = compounds_dict.get("classify")
294        self.derivativenum = compounds_dict.get("derivativenum")
295        self.derivatization = compounds_dict.get("derivatization")
296
297        self.mz = compounds_dict.get("mz")
298        self.abundance = compounds_dict.get("abundance")
299
300        self.source_temp_c = compounds_dict.get("source_temp_c")
301        self.ev = compounds_dict.get("ev")
302        self.formula = compounds_dict.get("formula")
303        self.source = compounds_dict.get("source")
304
305        self.classify = compounds_dict.get("classify")
306
307        if compounds_dict.get("metadata"):
308            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
309
310        else:
311            self.metadata = None
312
313        self.similarity_score = None
314        self.ri_score = None
315        self.spectral_similarity_score = None
316        self.spectral_similarity_scores = {}
317
318
319class EI_LowRes_SQLite:
320    """
321    A class for interacting with a SQLite database for low-resolution EI compounds.
322
323    Parameters
324    -----------
325    url : str, optional
326        The URL of the SQLite database. Default is 'sqlite://'.
327
328    Attributes
329    -----------
330    engine : sqlalchemy.engine.Engine
331        The SQLAlchemy engine for connecting to the database.
332    session : sqlalchemy.orm.Session
333        The SQLAlchemy session for executing database operations.
334
335    Methods
336    --------
337    * __init__(self, url='sqlite://').
338        Initializes the EI_LowRes_SQLite object.
339    * __exit__(self, exc_type, exc_val, exc_tb).
340        Closes the database connection.
341    * init_engine(self, url).
342        Initializes the SQLAlchemy engine.
343    * __enter__(self).
344        Returns the EI_LowRes_SQLite object.
345    * add_compound_list(self, data_dict_list).
346        Adds a list of compounds to the database.
347    * add_compound(self, data_dict).
348        Adds a single compound to the database.
349    * commit(self).
350        Commits the changes to the database.
351    * row_to_dict(self, row).
352        Converts a database row to a dictionary.
353    * get_all(self).
354        Retrieves all compounds from the database.
355    * query_min_max_rt(self, min_max_rt).
356        Queries compounds based on retention time range.
357    * query_min_max_ri(self, min_max_ri).
358        Queries compounds based on RI range.
359    * query_names_and_rt(self, min_max_rt, compound_names).
360        Queries compounds based on compound names and retention time range.
361    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
362        Queries compounds based on RI range and retention time range.
363    * delete_compound(self, compound).
364        Deletes a compound from the database.
365    * purge(self).
366        Deletes all compounds from the database table.
367    * clear_data(self).
368        Clears all tables in the database.
369    """
370
371    def __init__(self, url="sqlite://"):
372        self.engine = self.init_engine(url)
373
374        Base.metadata.create_all(self.engine)
375
376        Session = sessionmaker(bind=self.engine)
377
378        self.session = Session()
379
380    def __exit__(self, exc_type, exc_val, exc_tb):
381        """Closes the database connection."""
382        self.commit()
383        self.session.close()
384        self.engine.dispose()
385
386    def init_engine(self, url):
387        """Initializes the SQLAlchemy engine.
388
389        Parameters
390        -----------
391        url : str
392            The URL of the SQLite database.
393
394        Returns
395        --------
396        sqlalchemy.engine.Engine
397            The SQLAlchemy engine for connecting to the database.
398        """
399        directory = os.getcwd()
400        if not url:
401            if not os.path.isdir(directory + "/db"):
402                os.mkdir(directory + "/db")
403            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
404                DB=directory
405            )
406        return create_engine(url, poolclass=QueuePool)
407
408    def __enter__(self):
409        """Returns the EI_LowRes_SQLite object."""
410        return self
411
412    def add_compound_list(self, data_dict_list):
413        """Adds a list of compounds to the database.
414
415        Parameters
416        -----------
417        data_dict_list : list of dict
418            A list of dictionaries representing the compounds.
419        """
420        for data_dict in data_dict_list:
421            # print(data_dict.get('NUM PEAKS'))
422            if not data_dict.get("NUM PEAKS"):
423                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
424            if not data_dict.get("CASNO"):
425                data_dict["CASNO"] = data_dict.get("CAS")
426
427        self.session.add_all(
428            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
429        )
430
431    def add_compound(self, data_dict):
432        """Adds a single compound to the database.
433
434        Parameters
435        -----------
436        data_dict : dict
437            A dictionary representing the compound.
438
439        """
440        one_compound = LowResolutionEICompound(**data_dict)
441        self.session.add(one_compound)
442        self.commit()
443
444    def commit(self):
445        """Commits the changes to the database."""
446        try:
447            self.session.commit()
448        except SQLAlchemyError as e:
449            self.session.rollback()
450            print(str(e))
451
452    def row_to_dict(self, row):
453        """Converts a database row to a dictionary.
454
455        Parameters
456        -----------
457        row : sqlalchemy.engine.row.Row
458            A row from the database.
459
460        Returns
461        --------
462        dict
463            A dictionary representing the compound.
464        """
465        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
466
467        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
468        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
469
470        if row.metadatar:
471            data_dict["metadata"] = {
472                c.name: getattr(row.metadatar, c.name)
473                for c in row.metadatar.__table__.columns
474            }
475
476        else:
477            data_dict["metadata"] = None
478
479        return data_dict
480
481    def get_all(
482        self,
483    ):
484        """Retrieves all compounds from the database.
485
486        Returns
487        --------
488        list
489            A list of dictionaries representing the compounds.
490        """
491        compounds = self.session.query(LowResolutionEICompound).all()
492
493        return [self.row_to_dict(compound) for compound in compounds]
494
495    def query_min_max_rt(
496        self,
497        min_max_rt,
498    ):
499        """Queries compounds based on retention time range.
500
501        Parameters
502        -----------
503        min_max_rt : tuple
504            A tuple containing the minimum and maximum retention time values.
505
506        Returns
507        --------
508        list
509            A list of dictionaries representing the compounds.
510        """
511        min_rt, max_rt = min_max_rt
512
513        compounds = self.session.query(LowResolutionEICompound).filter(
514            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
515        )
516
517        return [self.row_to_dict(compound) for compound in compounds]
518
519    def query_min_max_ri(self, min_max_ri):
520        """Queries compounds based on RI range.
521
522        Parameters
523        -----------
524        min_max_ri : tuple
525            A tuple containing the minimum and maximum RI values.
526        """
527        min_ri, max_ri = min_max_ri
528
529        compounds = (
530            self.session.query(LowResolutionEICompound)
531            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
532            .all()
533        )
534
535        return [self.row_to_dict(compound) for compound in compounds]
536
537    def query_names_and_rt(self, min_max_rt, compound_names):
538        """Queries compounds based on compound names and retention time range.
539
540        Parameters
541        -----------
542        min_max_rt : tuple
543            A tuple containing the minimum and maximum retention time values.
544        compound_names : list
545            A list of compound names.
546
547        Returns
548        --------
549        list
550            A list of dictionaries representing the compounds.
551
552        """
553        min_rt, max_rt = min_max_rt
554
555        compounds = (
556            self.session.query(LowResolutionEICompound)
557            .filter(LowResolutionEICompound.name.in_(compound_names))
558            .filter(
559                LowResolutionEICompound.retention_time >= min_rt,
560                LowResolutionEICompound.retention_time <= max_rt,
561            )
562        )
563
564        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
565        # x = [self.row_to_dict(compound) for compound in compounds]
566
567        return [self.row_to_dict(compound) for compound in compounds]
568
569    def query_min_max_ri_and_rt(
570        self,
571        min_max_ri,
572        min_max_rt,
573    ):
574        """Queries compounds based on RI range and retention time range.
575
576        Parameters
577        -----------
578        min_max_ri : tuple
579            A tuple containing the minimum and maximum RI values.
580        min_max_rt : tuple
581            A tuple containing the minimum and maximum retention time values.
582
583        Returns
584        --------
585        list
586            A list of dictionaries representing the compounds.
587
588        """
589        min_ri, max_ri = min_max_ri
590
591        min_rt, max_rt = min_max_rt
592
593        compounds = self.session.query(LowResolutionEICompound).filter(
594            LowResolutionEICompound.ri <= max_ri,
595            LowResolutionEICompound.ri >= min_ri,
596            LowResolutionEICompound.ri >= min_rt,
597            LowResolutionEICompound.ri >= max_rt,
598        )
599
600        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
601
602        return [self.row_to_dict(compound) for compound in compounds]
603
604    def delete_compound(self, compound):
605        """Deletes a compound from the database.
606
607        Parameters
608        -----------
609        compound : LowResolutionEICompound
610            A compound object.
611
612        """
613        try:
614            self.session.delete(compound)
615            self.session.commit()
616
617        except SQLAlchemyError as e:
618            self.session.rollback()
619            print(str(e))
620
621    def purge(self):
622        """Deletes all compounds from the database table.
623
624        Notes
625        ------
626        Careful, this will delete the entire database table.
627        """
628        self.session.query(LowResolutionEICompound).delete()
629        self.session.commit()
630
631    def clear_data(self):
632        """Clears all tables in the database."""
633        meta = Base.metadata
634        for table in reversed(meta.sorted_tables):
635            print("Clear table %s" % table)
636            self.session.execute(table.delete())
637        self.session.commit()
class Base:

The base class of the class hierarchy.

When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.

Base(**kwargs)
1185def _declarative_constructor(self, **kwargs):
1186    """A simple constructor that allows initialization from kwargs.
1187
1188    Sets attributes on the constructed instance using the names and
1189    values in ``kwargs``.
1190
1191    Only keys that are present as
1192    attributes of the instance's class are allowed. These could be,
1193    for example, any mapped columns or relationships.
1194    """
1195    cls_ = type(self)
1196    for k in kwargs:
1197        if not hasattr(cls_, k):
1198            raise TypeError(
1199                "%r is an invalid keyword argument for %s" % (k, cls_.__name__)
1200            )
1201        setattr(self, k, kwargs[k])

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

registry = <sqlalchemy.orm.decl_api.registry object>
metadata = MetaData()
class Metadatar(corems.molecular_id.factory.EI_SQL.Base):
26class Metadatar(Base):
27    """This class is used to store the metadata of the compounds in the database
28
29    Attributes
30    -----------
31    id : int
32        The id of the compound.
33    cas : str
34        The CAS number of the compound.
35    inchikey : str
36        The InChiKey of the compound.
37    inchi : str
38        The InChi of the compound.
39    chebi : str
40        The ChEBI ID of the compound.
41    smiles : str
42        The SMILES of the compound.
43    kegg : str
44        The KEGG ID of the compound.
45    iupac_name : str
46        The IUPAC name of the compound.
47    traditional_name : str
48        The traditional name of the compound.
49    common_name : str
50        The common name of the compound.
51    data_id : int
52        The id of the compound in the molecularData table.
53    data : LowResolutionEICompound
54        The compound object.
55    """
56
57    __tablename__ = "metaDataR"
58
59    id = Column(Integer, primary_key=True)
60    cas = Column(String, nullable=True)
61    inchikey = Column(String, nullable=False)
62    inchi = Column(String, nullable=False)
63    chebi = Column(String, nullable=True)
64    smiles = Column(String, nullable=True)
65    kegg = Column(String, nullable=True)
66    iupac_name = Column(String, nullable=True)
67    traditional_name = Column(String, nullable=True)
68    common_name = Column(String, nullable=True)
69
70    data_id = Column(Integer, ForeignKey("molecularData.id"))
71    data = relationship("LowResolutionEICompound", back_populates="metadatar")

This class is used to store the metadata of the compounds in the database

Attributes
  • id (int): The id of the compound.
  • cas (str): The CAS number of the compound.
  • inchikey (str): The InChiKey of the compound.
  • inchi (str): The InChi of the compound.
  • chebi (str): The ChEBI ID of the compound.
  • smiles (str): The SMILES of the compound.
  • kegg (str): The KEGG ID of the compound.
  • iupac_name (str): The IUPAC name of the compound.
  • traditional_name (str): The traditional name of the compound.
  • common_name (str): The common name of the compound.
  • data_id (int): The id of the compound in the molecularData table.
  • data (LowResolutionEICompound): The compound object.
Metadatar(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

id
cas
inchikey
inchi
chebi
smiles
kegg
iupac_name
traditional_name
common_name
data_id
data
Inherited Members
Base
registry
metadata
class LowResolutionEICompound(corems.molecular_id.factory.EI_SQL.Base):
 74class LowResolutionEICompound(Base):
 75    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
 76
 77    Attributes
 78    -----------
 79    id : int
 80        The id of the compound.
 81    name : str
 82        The name of the compound.
 83    classify : str
 84        The classification of the compound.
 85    formula : str
 86        The formula of the compound.
 87    ri : float
 88        The retention index of the compound.
 89    retention_time : float
 90        The retention time of the compound.
 91    source : str
 92        The source of the compound.
 93    casno : str
 94        The CAS number of the compound.
 95    comment : str
 96        The comment of the compound.
 97    source_temp_c : float
 98        The source temperature of the spectra.
 99    ev : float
100        The electron volts of the spectra.
101    peaks_count : int
102        The number of peaks in the spectra.
103    mz : numpy.ndarray
104        The m/z values of the spectra.
105    abundance : numpy.ndarray
106        The abundance values of the spectra.
107    metadatar : Metadatar
108        The metadata object.
109    """
110
111    __tablename__ = "molecularData"
112
113    id = Column(Integer, primary_key=True)
114
115    name = Column(String, nullable=False)
116    classify = Column(String, nullable=True)
117    formula = Column(String, nullable=True)
118    ri = Column(Float, nullable=False)
119    retention_time = Column(Float, nullable=False)
120
121    source = Column(String, nullable=True)
122    casno = Column(String, nullable=False)
123    comment = Column(String, nullable=True)
124
125    derivativenum = Column(String, nullable=True)
126    derivatization = Column(String, nullable=True)
127
128    source_temp_c = Column(Float, nullable=True)
129    ev = Column(Float, nullable=True)
130
131    peaks_count = Column(Integer, nullable=False)
132
133    mz = Column(LargeBinary, nullable=False)
134    abundance = Column(LargeBinary, nullable=False)
135
136    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
137
138    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
139
140    def __init__(self, **dict_data):
141        self.id = dict_data.get("id")
142
143        self.name = dict_data.get("NAME")
144        self.classify = dict_data.get("classify")
145        self.formula = dict_data.get("FORM")
146        self.ri = dict_data.get("RI")
147        self.retention_time = dict_data.get("RT")
148
149        self.source = dict_data.get("SOURCE")
150        self.casno = dict_data.get("CASNO")
151        self.comment = dict_data.get("COMMENT")
152
153        self.derivativenum = dict_data.get("derivativenum")
154        self.derivatization = dict_data.get("derivatization")
155
156        self.peaks_count = dict_data.get("NUM PEAKS")
157
158        # mz and abun are numpy arrays of 64 bits integer
159        # when using postgres array might be a better option
160
161        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
162        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
163
164        self.metadatar = dict_data.get("metadatar", None)
165
166    def __repr__(self):
167        return (
168            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
169            % (
170                self.name,
171                self.casno,
172                self.formula,
173                self.ri,
174                self.retention_time,
175                self.comment,
176            )
177        )

This class is used to store the molecular and spectral data of the compounds in the low res EI database

Attributes
  • id (int): The id of the compound.
  • name (str): The name of the compound.
  • classify (str): The classification of the compound.
  • formula (str): The formula of the compound.
  • ri (float): The retention index of the compound.
  • retention_time (float): The retention time of the compound.
  • source (str): The source of the compound.
  • casno (str): The CAS number of the compound.
  • comment (str): The comment of the compound.
  • source_temp_c (float): The source temperature of the spectra.
  • ev (float): The electron volts of the spectra.
  • peaks_count (int): The number of peaks in the spectra.
  • mz (numpy.ndarray): The m/z values of the spectra.
  • abundance (numpy.ndarray): The abundance values of the spectra.
  • metadatar (Metadatar): The metadata object.
LowResolutionEICompound(**dict_data)
id
name
classify
formula
ri
retention_time
source
casno
comment
derivativenum
derivatization
source_temp_c
ev
peaks_count
mz
abundance
metadatar
Inherited Members
Base
registry
metadata
@dataclass
class MetaboliteMetadata:
180@dataclass
181class MetaboliteMetadata:
182    """Dataclass for the Metabolite Metadata
183
184    Attributes
185    -----------
186    id : int
187        The id of the compound.
188    cas : str
189        The CAS number of the compound.
190    inchikey : str
191        The InChiKey of the compound.
192    inchi : str
193        The InChi of the compound.
194    chebi : str
195        The ChEBI ID of the compound.
196    smiles : str
197        The SMILES of the compound.
198    kegg : str
199        The KEGG ID of the compound.
200    iupac_name : str
201        The IUPAC name of the compound.
202    traditional_name : str
203        The traditional name of the compound.
204    common_name : str
205        The common name of the compound.
206    data_id : int
207        The id of the compound in the molecularData table.
208
209    """
210
211    id: int
212    cas: str
213    inchikey: str
214    inchi: str
215    chebi: str
216    smiles: str
217    kegg: str
218    data_id: int
219    iupac_name: str
220    traditional_name: str
221    common_name: str

Dataclass for the Metabolite Metadata

Attributes
  • id (int): The id of the compound.
  • cas (str): The CAS number of the compound.
  • inchikey (str): The InChiKey of the compound.
  • inchi (str): The InChi of the compound.
  • chebi (str): The ChEBI ID of the compound.
  • smiles (str): The SMILES of the compound.
  • kegg (str): The KEGG ID of the compound.
  • iupac_name (str): The IUPAC name of the compound.
  • traditional_name (str): The traditional name of the compound.
  • common_name (str): The common name of the compound.
  • data_id (int): The id of the compound in the molecularData table.
MetaboliteMetadata( id: int, cas: str, inchikey: str, inchi: str, chebi: str, smiles: str, kegg: str, data_id: int, iupac_name: str, traditional_name: str, common_name: str)
id: int
cas: str
inchikey: str
inchi: str
chebi: str
smiles: str
kegg: str
data_id: int
iupac_name: str
traditional_name: str
common_name: str
@dataclass
class LowResCompoundRef:
224@dataclass
225class LowResCompoundRef:
226    """Dataclass for the Low Resolution Compound Reference
227
228    This class is used to store the molecular and spectral data of the compounds in the low res EI database
229
230    Parameters
231    -----------
232    compounds_dict : dict
233        A dictionary representing the compound.
234
235    Attributes
236    -----------
237    id : int
238        The id of the compound.
239    name : str
240        The name of the compound.
241    ri : str
242        The retention index of the compound.
243    retention_time : str
244        The retention time of the compound.
245    casno : str
246        The CAS number of the compound.
247    comment : str
248        The comment of the compound.
249    peaks_count : int
250        The number of peaks in the spectra.
251    classify : str
252        The classification of the compound.
253    derivativenum : str
254        The derivative number of the compound.
255    derivatization : str
256        The derivatization applied to the compound.
257    mz : numpy.ndarray
258        The m/z values of the spectra.
259    abundance : numpy.ndarray
260        The abundance values of the spectra.
261    source_temp_c : float
262        The source temperature of the spectra.
263    ev : float
264        The electron volts of the spectra.
265    formula : str
266        The formula of the compound.
267    source : str
268        The source of the spectra data.
269    classify : str
270        The classification of the compound.
271    metadata : MetaboliteMetadata
272        The metadata object.
273    similarity_score : float
274        The similarity score of the compound.
275    ri_score : float
276        The RI score of the compound.
277    spectral_similarity_score : float
278        The spectral similarity score of the compound.
279    spectral_similarity_scores : dict
280        The spectral similarity scores of the compound.
281
282    """
283
284    # this class is use to store the results inside the GCPeak class
285    def __init__(self, compounds_dict):
286        self.id = compounds_dict.get("id")
287        self.name = compounds_dict.get("name")
288        self.ri = compounds_dict.get("ri")
289        self.retention_time = compounds_dict.get("rt")
290        self.casno = compounds_dict.get("casno")
291        self.comment = compounds_dict.get("comment")
292        self.peaks_count = compounds_dict.get("peaks_count")
293
294        self.classify = compounds_dict.get("classify")
295        self.derivativenum = compounds_dict.get("derivativenum")
296        self.derivatization = compounds_dict.get("derivatization")
297
298        self.mz = compounds_dict.get("mz")
299        self.abundance = compounds_dict.get("abundance")
300
301        self.source_temp_c = compounds_dict.get("source_temp_c")
302        self.ev = compounds_dict.get("ev")
303        self.formula = compounds_dict.get("formula")
304        self.source = compounds_dict.get("source")
305
306        self.classify = compounds_dict.get("classify")
307
308        if compounds_dict.get("metadata"):
309            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
310
311        else:
312            self.metadata = None
313
314        self.similarity_score = None
315        self.ri_score = None
316        self.spectral_similarity_score = None
317        self.spectral_similarity_scores = {}

Dataclass for the Low Resolution Compound Reference

This class is used to store the molecular and spectral data of the compounds in the low res EI database

Parameters
  • compounds_dict (dict): A dictionary representing the compound.
Attributes
  • id (int): The id of the compound.
  • name (str): The name of the compound.
  • ri (str): The retention index of the compound.
  • retention_time (str): The retention time of the compound.
  • casno (str): The CAS number of the compound.
  • comment (str): The comment of the compound.
  • peaks_count (int): The number of peaks in the spectra.
  • classify (str): The classification of the compound.
  • derivativenum (str): The derivative number of the compound.
  • derivatization (str): The derivatization applied to the compound.
  • mz (numpy.ndarray): The m/z values of the spectra.
  • abundance (numpy.ndarray): The abundance values of the spectra.
  • source_temp_c (float): The source temperature of the spectra.
  • ev (float): The electron volts of the spectra.
  • formula (str): The formula of the compound.
  • source (str): The source of the spectra data.
  • classify (str): The classification of the compound.
  • metadata (MetaboliteMetadata): The metadata object.
  • similarity_score (float): The similarity score of the compound.
  • ri_score (float): The RI score of the compound.
  • spectral_similarity_score (float): The spectral similarity score of the compound.
  • spectral_similarity_scores (dict): The spectral similarity scores of the compound.
LowResCompoundRef(compounds_dict)
285    def __init__(self, compounds_dict):
286        self.id = compounds_dict.get("id")
287        self.name = compounds_dict.get("name")
288        self.ri = compounds_dict.get("ri")
289        self.retention_time = compounds_dict.get("rt")
290        self.casno = compounds_dict.get("casno")
291        self.comment = compounds_dict.get("comment")
292        self.peaks_count = compounds_dict.get("peaks_count")
293
294        self.classify = compounds_dict.get("classify")
295        self.derivativenum = compounds_dict.get("derivativenum")
296        self.derivatization = compounds_dict.get("derivatization")
297
298        self.mz = compounds_dict.get("mz")
299        self.abundance = compounds_dict.get("abundance")
300
301        self.source_temp_c = compounds_dict.get("source_temp_c")
302        self.ev = compounds_dict.get("ev")
303        self.formula = compounds_dict.get("formula")
304        self.source = compounds_dict.get("source")
305
306        self.classify = compounds_dict.get("classify")
307
308        if compounds_dict.get("metadata"):
309            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
310
311        else:
312            self.metadata = None
313
314        self.similarity_score = None
315        self.ri_score = None
316        self.spectral_similarity_score = None
317        self.spectral_similarity_scores = {}
id
name
ri
retention_time
casno
comment
peaks_count
classify
derivativenum
derivatization
mz
abundance
source_temp_c
ev
formula
source
similarity_score
ri_score
spectral_similarity_score
spectral_similarity_scores
class EI_LowRes_SQLite:
320class EI_LowRes_SQLite:
321    """
322    A class for interacting with a SQLite database for low-resolution EI compounds.
323
324    Parameters
325    -----------
326    url : str, optional
327        The URL of the SQLite database. Default is 'sqlite://'.
328
329    Attributes
330    -----------
331    engine : sqlalchemy.engine.Engine
332        The SQLAlchemy engine for connecting to the database.
333    session : sqlalchemy.orm.Session
334        The SQLAlchemy session for executing database operations.
335
336    Methods
337    --------
338    * __init__(self, url='sqlite://').
339        Initializes the EI_LowRes_SQLite object.
340    * __exit__(self, exc_type, exc_val, exc_tb).
341        Closes the database connection.
342    * init_engine(self, url).
343        Initializes the SQLAlchemy engine.
344    * __enter__(self).
345        Returns the EI_LowRes_SQLite object.
346    * add_compound_list(self, data_dict_list).
347        Adds a list of compounds to the database.
348    * add_compound(self, data_dict).
349        Adds a single compound to the database.
350    * commit(self).
351        Commits the changes to the database.
352    * row_to_dict(self, row).
353        Converts a database row to a dictionary.
354    * get_all(self).
355        Retrieves all compounds from the database.
356    * query_min_max_rt(self, min_max_rt).
357        Queries compounds based on retention time range.
358    * query_min_max_ri(self, min_max_ri).
359        Queries compounds based on RI range.
360    * query_names_and_rt(self, min_max_rt, compound_names).
361        Queries compounds based on compound names and retention time range.
362    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
363        Queries compounds based on RI range and retention time range.
364    * delete_compound(self, compound).
365        Deletes a compound from the database.
366    * purge(self).
367        Deletes all compounds from the database table.
368    * clear_data(self).
369        Clears all tables in the database.
370    """
371
372    def __init__(self, url="sqlite://"):
373        self.engine = self.init_engine(url)
374
375        Base.metadata.create_all(self.engine)
376
377        Session = sessionmaker(bind=self.engine)
378
379        self.session = Session()
380
381    def __exit__(self, exc_type, exc_val, exc_tb):
382        """Closes the database connection."""
383        self.commit()
384        self.session.close()
385        self.engine.dispose()
386
387    def init_engine(self, url):
388        """Initializes the SQLAlchemy engine.
389
390        Parameters
391        -----------
392        url : str
393            The URL of the SQLite database.
394
395        Returns
396        --------
397        sqlalchemy.engine.Engine
398            The SQLAlchemy engine for connecting to the database.
399        """
400        directory = os.getcwd()
401        if not url:
402            if not os.path.isdir(directory + "/db"):
403                os.mkdir(directory + "/db")
404            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
405                DB=directory
406            )
407        return create_engine(url, poolclass=QueuePool)
408
409    def __enter__(self):
410        """Returns the EI_LowRes_SQLite object."""
411        return self
412
413    def add_compound_list(self, data_dict_list):
414        """Adds a list of compounds to the database.
415
416        Parameters
417        -----------
418        data_dict_list : list of dict
419            A list of dictionaries representing the compounds.
420        """
421        for data_dict in data_dict_list:
422            # print(data_dict.get('NUM PEAKS'))
423            if not data_dict.get("NUM PEAKS"):
424                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
425            if not data_dict.get("CASNO"):
426                data_dict["CASNO"] = data_dict.get("CAS")
427
428        self.session.add_all(
429            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
430        )
431
432    def add_compound(self, data_dict):
433        """Adds a single compound to the database.
434
435        Parameters
436        -----------
437        data_dict : dict
438            A dictionary representing the compound.
439
440        """
441        one_compound = LowResolutionEICompound(**data_dict)
442        self.session.add(one_compound)
443        self.commit()
444
445    def commit(self):
446        """Commits the changes to the database."""
447        try:
448            self.session.commit()
449        except SQLAlchemyError as e:
450            self.session.rollback()
451            print(str(e))
452
453    def row_to_dict(self, row):
454        """Converts a database row to a dictionary.
455
456        Parameters
457        -----------
458        row : sqlalchemy.engine.row.Row
459            A row from the database.
460
461        Returns
462        --------
463        dict
464            A dictionary representing the compound.
465        """
466        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
467
468        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
469        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
470
471        if row.metadatar:
472            data_dict["metadata"] = {
473                c.name: getattr(row.metadatar, c.name)
474                for c in row.metadatar.__table__.columns
475            }
476
477        else:
478            data_dict["metadata"] = None
479
480        return data_dict
481
482    def get_all(
483        self,
484    ):
485        """Retrieves all compounds from the database.
486
487        Returns
488        --------
489        list
490            A list of dictionaries representing the compounds.
491        """
492        compounds = self.session.query(LowResolutionEICompound).all()
493
494        return [self.row_to_dict(compound) for compound in compounds]
495
496    def query_min_max_rt(
497        self,
498        min_max_rt,
499    ):
500        """Queries compounds based on retention time range.
501
502        Parameters
503        -----------
504        min_max_rt : tuple
505            A tuple containing the minimum and maximum retention time values.
506
507        Returns
508        --------
509        list
510            A list of dictionaries representing the compounds.
511        """
512        min_rt, max_rt = min_max_rt
513
514        compounds = self.session.query(LowResolutionEICompound).filter(
515            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
516        )
517
518        return [self.row_to_dict(compound) for compound in compounds]
519
520    def query_min_max_ri(self, min_max_ri):
521        """Queries compounds based on RI range.
522
523        Parameters
524        -----------
525        min_max_ri : tuple
526            A tuple containing the minimum and maximum RI values.
527        """
528        min_ri, max_ri = min_max_ri
529
530        compounds = (
531            self.session.query(LowResolutionEICompound)
532            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
533            .all()
534        )
535
536        return [self.row_to_dict(compound) for compound in compounds]
537
538    def query_names_and_rt(self, min_max_rt, compound_names):
539        """Queries compounds based on compound names and retention time range.
540
541        Parameters
542        -----------
543        min_max_rt : tuple
544            A tuple containing the minimum and maximum retention time values.
545        compound_names : list
546            A list of compound names.
547
548        Returns
549        --------
550        list
551            A list of dictionaries representing the compounds.
552
553        """
554        min_rt, max_rt = min_max_rt
555
556        compounds = (
557            self.session.query(LowResolutionEICompound)
558            .filter(LowResolutionEICompound.name.in_(compound_names))
559            .filter(
560                LowResolutionEICompound.retention_time >= min_rt,
561                LowResolutionEICompound.retention_time <= max_rt,
562            )
563        )
564
565        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
566        # x = [self.row_to_dict(compound) for compound in compounds]
567
568        return [self.row_to_dict(compound) for compound in compounds]
569
570    def query_min_max_ri_and_rt(
571        self,
572        min_max_ri,
573        min_max_rt,
574    ):
575        """Queries compounds based on RI range and retention time range.
576
577        Parameters
578        -----------
579        min_max_ri : tuple
580            A tuple containing the minimum and maximum RI values.
581        min_max_rt : tuple
582            A tuple containing the minimum and maximum retention time values.
583
584        Returns
585        --------
586        list
587            A list of dictionaries representing the compounds.
588
589        """
590        min_ri, max_ri = min_max_ri
591
592        min_rt, max_rt = min_max_rt
593
594        compounds = self.session.query(LowResolutionEICompound).filter(
595            LowResolutionEICompound.ri <= max_ri,
596            LowResolutionEICompound.ri >= min_ri,
597            LowResolutionEICompound.ri >= min_rt,
598            LowResolutionEICompound.ri >= max_rt,
599        )
600
601        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
602
603        return [self.row_to_dict(compound) for compound in compounds]
604
605    def delete_compound(self, compound):
606        """Deletes a compound from the database.
607
608        Parameters
609        -----------
610        compound : LowResolutionEICompound
611            A compound object.
612
613        """
614        try:
615            self.session.delete(compound)
616            self.session.commit()
617
618        except SQLAlchemyError as e:
619            self.session.rollback()
620            print(str(e))
621
622    def purge(self):
623        """Deletes all compounds from the database table.
624
625        Notes
626        ------
627        Careful, this will delete the entire database table.
628        """
629        self.session.query(LowResolutionEICompound).delete()
630        self.session.commit()
631
632    def clear_data(self):
633        """Clears all tables in the database."""
634        meta = Base.metadata
635        for table in reversed(meta.sorted_tables):
636            print("Clear table %s" % table)
637            self.session.execute(table.delete())
638        self.session.commit()

A class for interacting with a SQLite database for low-resolution EI compounds.

Parameters
  • url (str, optional): The URL of the SQLite database. Default is 'sqlite://'.
Attributes
  • engine (sqlalchemy.engine.Engine): The SQLAlchemy engine for connecting to the database.
  • session (sqlalchemy.orm.Session): The SQLAlchemy session for executing database operations.
Methods
  • __init__(self, url='sqlite://'). Initializes the EI_LowRes_SQLite object.
  • __exit__(self, exc_type, exc_val, exc_tb). Closes the database connection.
  • init_engine(self, url). Initializes the SQLAlchemy engine.
  • __enter__(self). Returns the EI_LowRes_SQLite object.
  • add_compound_list(self, data_dict_list). Adds a list of compounds to the database.
  • add_compound(self, data_dict). Adds a single compound to the database.
  • commit(self). Commits the changes to the database.
  • row_to_dict(self, row). Converts a database row to a dictionary.
  • get_all(self). Retrieves all compounds from the database.
  • query_min_max_rt(self, min_max_rt). Queries compounds based on retention time range.
  • query_min_max_ri(self, min_max_ri). Queries compounds based on RI range.
  • query_names_and_rt(self, min_max_rt, compound_names). Queries compounds based on compound names and retention time range.
  • query_min_max_ri_and_rt(self, min_max_ri, min_max_rt). Queries compounds based on RI range and retention time range.
  • delete_compound(self, compound). Deletes a compound from the database.
  • purge(self). Deletes all compounds from the database table.
  • clear_data(self). Clears all tables in the database.
EI_LowRes_SQLite(url='sqlite://')
372    def __init__(self, url="sqlite://"):
373        self.engine = self.init_engine(url)
374
375        Base.metadata.create_all(self.engine)
376
377        Session = sessionmaker(bind=self.engine)
378
379        self.session = Session()
engine
session
def init_engine(self, url):
387    def init_engine(self, url):
388        """Initializes the SQLAlchemy engine.
389
390        Parameters
391        -----------
392        url : str
393            The URL of the SQLite database.
394
395        Returns
396        --------
397        sqlalchemy.engine.Engine
398            The SQLAlchemy engine for connecting to the database.
399        """
400        directory = os.getcwd()
401        if not url:
402            if not os.path.isdir(directory + "/db"):
403                os.mkdir(directory + "/db")
404            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
405                DB=directory
406            )
407        return create_engine(url, poolclass=QueuePool)

Initializes the SQLAlchemy engine.

Parameters
  • url (str): The URL of the SQLite database.
Returns
  • sqlalchemy.engine.Engine: The SQLAlchemy engine for connecting to the database.
def add_compound_list(self, data_dict_list):
413    def add_compound_list(self, data_dict_list):
414        """Adds a list of compounds to the database.
415
416        Parameters
417        -----------
418        data_dict_list : list of dict
419            A list of dictionaries representing the compounds.
420        """
421        for data_dict in data_dict_list:
422            # print(data_dict.get('NUM PEAKS'))
423            if not data_dict.get("NUM PEAKS"):
424                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
425            if not data_dict.get("CASNO"):
426                data_dict["CASNO"] = data_dict.get("CAS")
427
428        self.session.add_all(
429            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
430        )

Adds a list of compounds to the database.

Parameters
  • data_dict_list (list of dict): A list of dictionaries representing the compounds.
def add_compound(self, data_dict):
432    def add_compound(self, data_dict):
433        """Adds a single compound to the database.
434
435        Parameters
436        -----------
437        data_dict : dict
438            A dictionary representing the compound.
439
440        """
441        one_compound = LowResolutionEICompound(**data_dict)
442        self.session.add(one_compound)
443        self.commit()

Adds a single compound to the database.

Parameters
  • data_dict (dict): A dictionary representing the compound.
def commit(self):
445    def commit(self):
446        """Commits the changes to the database."""
447        try:
448            self.session.commit()
449        except SQLAlchemyError as e:
450            self.session.rollback()
451            print(str(e))

Commits the changes to the database.

def row_to_dict(self, row):
453    def row_to_dict(self, row):
454        """Converts a database row to a dictionary.
455
456        Parameters
457        -----------
458        row : sqlalchemy.engine.row.Row
459            A row from the database.
460
461        Returns
462        --------
463        dict
464            A dictionary representing the compound.
465        """
466        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
467
468        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
469        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
470
471        if row.metadatar:
472            data_dict["metadata"] = {
473                c.name: getattr(row.metadatar, c.name)
474                for c in row.metadatar.__table__.columns
475            }
476
477        else:
478            data_dict["metadata"] = None
479
480        return data_dict

Converts a database row to a dictionary.

Parameters
  • row (sqlalchemy.engine.row.Row): A row from the database.
Returns
  • dict: A dictionary representing the compound.
def get_all(self):
482    def get_all(
483        self,
484    ):
485        """Retrieves all compounds from the database.
486
487        Returns
488        --------
489        list
490            A list of dictionaries representing the compounds.
491        """
492        compounds = self.session.query(LowResolutionEICompound).all()
493
494        return [self.row_to_dict(compound) for compound in compounds]

Retrieves all compounds from the database.

Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_rt(self, min_max_rt):
496    def query_min_max_rt(
497        self,
498        min_max_rt,
499    ):
500        """Queries compounds based on retention time range.
501
502        Parameters
503        -----------
504        min_max_rt : tuple
505            A tuple containing the minimum and maximum retention time values.
506
507        Returns
508        --------
509        list
510            A list of dictionaries representing the compounds.
511        """
512        min_rt, max_rt = min_max_rt
513
514        compounds = self.session.query(LowResolutionEICompound).filter(
515            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
516        )
517
518        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on retention time range.

Parameters
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_ri(self, min_max_ri):
520    def query_min_max_ri(self, min_max_ri):
521        """Queries compounds based on RI range.
522
523        Parameters
524        -----------
525        min_max_ri : tuple
526            A tuple containing the minimum and maximum RI values.
527        """
528        min_ri, max_ri = min_max_ri
529
530        compounds = (
531            self.session.query(LowResolutionEICompound)
532            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
533            .all()
534        )
535
536        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on RI range.

Parameters
  • min_max_ri (tuple): A tuple containing the minimum and maximum RI values.
def query_names_and_rt(self, min_max_rt, compound_names):
538    def query_names_and_rt(self, min_max_rt, compound_names):
539        """Queries compounds based on compound names and retention time range.
540
541        Parameters
542        -----------
543        min_max_rt : tuple
544            A tuple containing the minimum and maximum retention time values.
545        compound_names : list
546            A list of compound names.
547
548        Returns
549        --------
550        list
551            A list of dictionaries representing the compounds.
552
553        """
554        min_rt, max_rt = min_max_rt
555
556        compounds = (
557            self.session.query(LowResolutionEICompound)
558            .filter(LowResolutionEICompound.name.in_(compound_names))
559            .filter(
560                LowResolutionEICompound.retention_time >= min_rt,
561                LowResolutionEICompound.retention_time <= max_rt,
562            )
563        )
564
565        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
566        # x = [self.row_to_dict(compound) for compound in compounds]
567
568        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on compound names and retention time range.

Parameters
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
  • compound_names (list): A list of compound names.
Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_ri_and_rt(self, min_max_ri, min_max_rt):
570    def query_min_max_ri_and_rt(
571        self,
572        min_max_ri,
573        min_max_rt,
574    ):
575        """Queries compounds based on RI range and retention time range.
576
577        Parameters
578        -----------
579        min_max_ri : tuple
580            A tuple containing the minimum and maximum RI values.
581        min_max_rt : tuple
582            A tuple containing the minimum and maximum retention time values.
583
584        Returns
585        --------
586        list
587            A list of dictionaries representing the compounds.
588
589        """
590        min_ri, max_ri = min_max_ri
591
592        min_rt, max_rt = min_max_rt
593
594        compounds = self.session.query(LowResolutionEICompound).filter(
595            LowResolutionEICompound.ri <= max_ri,
596            LowResolutionEICompound.ri >= min_ri,
597            LowResolutionEICompound.ri >= min_rt,
598            LowResolutionEICompound.ri >= max_rt,
599        )
600
601        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
602
603        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on RI range and retention time range.

Parameters
  • min_max_ri (tuple): A tuple containing the minimum and maximum RI values.
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
Returns
  • list: A list of dictionaries representing the compounds.
def delete_compound(self, compound):
605    def delete_compound(self, compound):
606        """Deletes a compound from the database.
607
608        Parameters
609        -----------
610        compound : LowResolutionEICompound
611            A compound object.
612
613        """
614        try:
615            self.session.delete(compound)
616            self.session.commit()
617
618        except SQLAlchemyError as e:
619            self.session.rollback()
620            print(str(e))

Deletes a compound from the database.

Parameters
  • compound (LowResolutionEICompound): A compound object.
def purge(self):
622    def purge(self):
623        """Deletes all compounds from the database table.
624
625        Notes
626        ------
627        Careful, this will delete the entire database table.
628        """
629        self.session.query(LowResolutionEICompound).delete()
630        self.session.commit()

Deletes all compounds from the database table.

Notes

Careful, this will delete the entire database table.

def clear_data(self):
632    def clear_data(self):
633        """Clears all tables in the database."""
634        meta = Base.metadata
635        for table in reversed(meta.sorted_tables):
636            print("Clear table %s" % table)
637            self.session.execute(table.delete())
638        self.session.commit()

Clears all tables in the database.