corems.molecular_id.factory.EI_SQL

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Feb 12, 2020"
  3
  4import os
  5from dataclasses import dataclass
  6from typing import Optional
  7
  8from numpy import array, frombuffer
  9from sqlalchemy import (
 10    Column,
 11    Float,
 12    ForeignKey,
 13    Integer,
 14    LargeBinary,
 15    String,
 16    create_engine,
 17)
 18from sqlalchemy.exc import SQLAlchemyError
 19from sqlalchemy.ext.declarative import declarative_base
 20from sqlalchemy.orm import relationship, sessionmaker
 21from sqlalchemy.pool import QueuePool
 22
 23Base = declarative_base()
 24
 25
 26class Metadatar(Base):
 27    """This class is used to store the metadata of the compounds in the database
 28
 29    Attributes
 30    -----------
 31    id : int
 32        The id of the compound.
 33    cas : str
 34        The CAS number of the compound.
 35    inchikey : str
 36        The InChiKey of the compound.
 37    inchi : str
 38        The InChi of the compound.
 39    chebi : str
 40        The ChEBI ID of the compound.
 41    smiles : str
 42        The SMILES of the compound.
 43    kegg : str
 44        The KEGG ID of the compound.
 45    iupac_name : str
 46        The IUPAC name of the compound.
 47    traditional_name : str
 48        The traditional name of the compound.
 49    common_name : str
 50        The common name of the compound.
 51    data_id : int
 52        The id of the compound in the molecularData table.
 53    data : LowResolutionEICompound
 54        The compound object.
 55    """
 56
 57    __tablename__ = "metaDataR"
 58
 59    id = Column(Integer, primary_key=True)
 60    cas = Column(String, nullable=True)
 61    inchikey = Column(String, nullable=False)
 62    inchi = Column(String, nullable=False)
 63    chebi = Column(String, nullable=True)
 64    smiles = Column(String, nullable=True)
 65    kegg = Column(String, nullable=True)
 66    iupac_name = Column(String, nullable=True)
 67    traditional_name = Column(String, nullable=True)
 68    common_name = Column(String, nullable=True)
 69
 70    data_id = Column(Integer, ForeignKey("molecularData.id"))
 71    data = relationship("LowResolutionEICompound", back_populates="metadatar")
 72
 73
 74class LowResolutionEICompound(Base):
 75    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
 76
 77    Attributes
 78    -----------
 79    id : int
 80        The id of the compound.
 81    name : str
 82        The name of the compound.
 83    classify : str
 84        The classification of the compound.
 85    formula : str
 86        The formula of the compound.
 87    ri : float
 88        The retention index of the compound.
 89    retention_time : float
 90        The retention time of the compound.
 91    source : str
 92        The source of the compound.
 93    casno : str
 94        The CAS number of the compound.
 95    comment : str
 96        The comment of the compound.
 97    source_temp_c : float
 98        The source temperature of the spectra.
 99    ev : float
100        The electron volts of the spectra.
101    peaks_count : int
102        The number of peaks in the spectra.
103    mz : numpy.ndarray
104        The m/z values of the spectra.
105    abundance : numpy.ndarray
106        The abundance values of the spectra.
107    metadatar : Metadatar
108        The metadata object.
109    """
110
111    __tablename__ = "molecularData"
112
113    id = Column(Integer, primary_key=True)
114
115    name = Column(String, nullable=False)
116    classify = Column(String, nullable=True)
117    formula = Column(String, nullable=True)
118    ri = Column(Float, nullable=False)
119    retention_time = Column(Float, nullable=False)
120
121    source = Column(String, nullable=True)
122    casno = Column(String, nullable=False)
123    comment = Column(String, nullable=True)
124
125    derivativenum = Column(String, nullable=True)
126    derivatization = Column(String, nullable=True)
127
128    source_temp_c = Column(Float, nullable=True)
129    ev = Column(Float, nullable=True)
130
131    peaks_count = Column(Integer, nullable=False)
132
133    mz = Column(LargeBinary, nullable=False)
134    abundance = Column(LargeBinary, nullable=False)
135
136    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
137
138    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
139
140    def __init__(self, **dict_data):
141        self.id = dict_data.get("id")
142
143        self.name = dict_data.get("NAME")
144        self.classify = dict_data.get("classify")
145        self.formula = dict_data.get("FORM")
146        self.ri = dict_data.get("RI")
147        self.retention_time = dict_data.get("RT")
148
149        self.source = dict_data.get("SOURCE")
150        self.casno = dict_data.get("CASNO")
151        self.comment = dict_data.get("COMMENT")
152
153        self.derivativenum = dict_data.get("derivativenum")
154        self.derivatization = dict_data.get("derivatization")
155
156        self.peaks_count = dict_data.get("NUM PEAKS")
157
158        # mz and abun are numpy arrays of 64 bits integer
159        # when using postgres array might be a better option
160
161        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
162        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
163
164        self.metadatar = dict_data.get("metadatar", None)
165
166    def __repr__(self):
167        return (
168            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
169            % (
170                self.name,
171                self.casno,
172                self.formula,
173                self.ri,
174                self.retention_time,
175                self.comment,
176            )
177        )
178
179
180@dataclass
181class MetaboliteMetadata:
182    """Dataclass for the Metabolite Metadata
183
184    Attributes
185    -----------
186    id : int
187        The id of the compound.
188    cas : str
189        The CAS number of the compound.
190    inchikey : str
191        The InChiKey of the compound.
192    inchi : str
193        The InChi of the compound.
194    chebi : str
195        The ChEBI ID of the compound.
196    smiles : str
197        The SMILES of the compound.
198    kegg : str
199        The KEGG ID of the compound.
200    iupac_name : str
201        The IUPAC name of the compound.
202    traditional_name : str
203        The traditional name of the compound.
204    common_name : str
205        The common name of the compound, preferrably the RefMet name.
206    data_id : int
207        The id of the compound in the molecularData table.
208    name : Optional[str]
209        The name of the compound, preferably the same name as in PubChem,
210        Or if a lipid, name as in LipidMaps
211    formula : Optional[str]
212        The formula of the compound.
213    pubchem_id : Optional[str]
214        The PubChem ID of the compound.
215    refmet_id : Optional[str]
216        The RefMet ID of the compound.
217    """
218
219    id: int
220    cas: str
221    inchikey: str
222    inchi: str
223    chebi: str
224    smiles: str
225    kegg: str
226    data_id: int
227    iupac_name: str
228    traditional_name: str
229    common_name: str
230    name: Optional[str]=None
231    formula: Optional[str]=None
232    pubchem_id: Optional[str]=None
233    refmet_id: Optional[str]=None
234
235
236@dataclass
237class LowResCompoundRef:
238    """Dataclass for the Low Resolution Compound Reference
239
240    This class is used to store the molecular and spectral data of the compounds in the low res EI database
241
242    Parameters
243    -----------
244    compounds_dict : dict
245        A dictionary representing the compound.
246
247    Attributes
248    -----------
249    id : int
250        The id of the compound.
251    name : str
252        The name of the compound.
253    ri : str
254        The retention index of the compound.
255    retention_time : str
256        The retention time of the compound.
257    casno : str
258        The CAS number of the compound.
259    comment : str
260        The comment of the compound.
261    peaks_count : int
262        The number of peaks in the spectra.
263    classify : str
264        The classification of the compound.
265    derivativenum : str
266        The derivative number of the compound.
267    derivatization : str
268        The derivatization applied to the compound.
269    mz : numpy.ndarray
270        The m/z values of the spectra.
271    abundance : numpy.ndarray
272        The abundance values of the spectra.
273    source_temp_c : float
274        The source temperature of the spectra.
275    ev : float
276        The electron volts of the spectra.
277    formula : str
278        The formula of the compound.
279    source : str
280        The source of the spectra data.
281    classify : str
282        The classification of the compound.
283    metadata : MetaboliteMetadata
284        The metadata object.
285    similarity_score : float
286        The similarity score of the compound.
287    ri_score : float
288        The RI score of the compound.
289    spectral_similarity_score : float
290        The spectral similarity score of the compound.
291    spectral_similarity_scores : dict
292        The spectral similarity scores of the compound.
293
294    """
295
296    # this class is use to store the results inside the GCPeak class
297    def __init__(self, compounds_dict):
298        self.id = compounds_dict.get("id")
299        self.name = compounds_dict.get("name")
300        self.ri = compounds_dict.get("ri")
301        self.retention_time = compounds_dict.get("rt")
302        self.casno = compounds_dict.get("casno")
303        self.comment = compounds_dict.get("comment")
304        self.peaks_count = compounds_dict.get("peaks_count")
305
306        self.classify = compounds_dict.get("classify")
307        self.derivativenum = compounds_dict.get("derivativenum")
308        self.derivatization = compounds_dict.get("derivatization")
309
310        self.mz = compounds_dict.get("mz")
311        self.abundance = compounds_dict.get("abundance")
312
313        self.source_temp_c = compounds_dict.get("source_temp_c")
314        self.ev = compounds_dict.get("ev")
315        self.formula = compounds_dict.get("formula")
316        self.source = compounds_dict.get("source")
317
318        self.classify = compounds_dict.get("classify")
319
320        if compounds_dict.get("metadata"):
321            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
322
323        else:
324            self.metadata = None
325
326        self.similarity_score = None
327        self.ri_score = None
328        self.spectral_similarity_score = None
329        self.spectral_similarity_scores = {}
330
331
332class EI_LowRes_SQLite:
333    """
334    A class for interacting with a SQLite database for low-resolution EI compounds.
335
336    Parameters
337    -----------
338    url : str, optional
339        The URL of the SQLite database. Default is 'sqlite://'.
340
341    Attributes
342    -----------
343    engine : sqlalchemy.engine.Engine
344        The SQLAlchemy engine for connecting to the database.
345    session : sqlalchemy.orm.Session
346        The SQLAlchemy session for executing database operations.
347
348    Methods
349    --------
350    * __init__(self, url='sqlite://').
351        Initializes the EI_LowRes_SQLite object.
352    * __exit__(self, exc_type, exc_val, exc_tb).
353        Closes the database connection.
354    * init_engine(self, url).
355        Initializes the SQLAlchemy engine.
356    * __enter__(self).
357        Returns the EI_LowRes_SQLite object.
358    * add_compound_list(self, data_dict_list).
359        Adds a list of compounds to the database.
360    * add_compound(self, data_dict).
361        Adds a single compound to the database.
362    * commit(self).
363        Commits the changes to the database.
364    * row_to_dict(self, row).
365        Converts a database row to a dictionary.
366    * get_all(self).
367        Retrieves all compounds from the database.
368    * query_min_max_rt(self, min_max_rt).
369        Queries compounds based on retention time range.
370    * query_min_max_ri(self, min_max_ri).
371        Queries compounds based on RI range.
372    * query_names_and_rt(self, min_max_rt, compound_names).
373        Queries compounds based on compound names and retention time range.
374    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
375        Queries compounds based on RI range and retention time range.
376    * delete_compound(self, compound).
377        Deletes a compound from the database.
378    * purge(self).
379        Deletes all compounds from the database table.
380    * clear_data(self).
381        Clears all tables in the database.
382    """
383
384    def __init__(self, url="sqlite://"):
385        self.engine = self.init_engine(url)
386
387        Base.metadata.create_all(self.engine)
388
389        Session = sessionmaker(bind=self.engine)
390
391        self.session = Session()
392
393    def __exit__(self, exc_type, exc_val, exc_tb):
394        """Closes the database connection."""
395        self.commit()
396        self.session.close()
397        self.engine.dispose()
398
399    def init_engine(self, url):
400        """Initializes the SQLAlchemy engine.
401
402        Parameters
403        -----------
404        url : str
405            The URL of the SQLite database.
406
407        Returns
408        --------
409        sqlalchemy.engine.Engine
410            The SQLAlchemy engine for connecting to the database.
411        """
412        directory = os.getcwd()
413        if not url:
414            if not os.path.isdir(directory + "/db"):
415                os.mkdir(directory + "/db")
416            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
417                DB=directory
418            )
419        return create_engine(url, poolclass=QueuePool)
420
421    def __enter__(self):
422        """Returns the EI_LowRes_SQLite object."""
423        return self
424
425    def add_compound_list(self, data_dict_list):
426        """Adds a list of compounds to the database.
427
428        Parameters
429        -----------
430        data_dict_list : list of dict
431            A list of dictionaries representing the compounds.
432        """
433        for data_dict in data_dict_list:
434            # print(data_dict.get('NUM PEAKS'))
435            if not data_dict.get("NUM PEAKS"):
436                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
437            if not data_dict.get("CASNO"):
438                data_dict["CASNO"] = data_dict.get("CAS")
439
440        self.session.add_all(
441            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
442        )
443
444    def add_compound(self, data_dict):
445        """Adds a single compound to the database.
446
447        Parameters
448        -----------
449        data_dict : dict
450            A dictionary representing the compound.
451
452        """
453        one_compound = LowResolutionEICompound(**data_dict)
454        self.session.add(one_compound)
455        self.commit()
456
457    def commit(self):
458        """Commits the changes to the database."""
459        try:
460            self.session.commit()
461        except SQLAlchemyError as e:
462            self.session.rollback()
463            print(str(e))
464
465    def row_to_dict(self, row):
466        """Converts a database row to a dictionary.
467
468        Parameters
469        -----------
470        row : sqlalchemy.engine.row.Row
471            A row from the database.
472
473        Returns
474        --------
475        dict
476            A dictionary representing the compound.
477        """
478        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
479
480        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
481        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
482
483        if row.metadatar:
484            data_dict["metadata"] = {
485                c.name: getattr(row.metadatar, c.name)
486                for c in row.metadatar.__table__.columns
487            }
488
489        else:
490            data_dict["metadata"] = None
491
492        return data_dict
493
494    def get_all(
495        self,
496    ):
497        """Retrieves all compounds from the database.
498
499        Returns
500        --------
501        list
502            A list of dictionaries representing the compounds.
503        """
504        compounds = self.session.query(LowResolutionEICompound).all()
505
506        return [self.row_to_dict(compound) for compound in compounds]
507
508    def query_min_max_rt(
509        self,
510        min_max_rt,
511    ):
512        """Queries compounds based on retention time range.
513
514        Parameters
515        -----------
516        min_max_rt : tuple
517            A tuple containing the minimum and maximum retention time values.
518
519        Returns
520        --------
521        list
522            A list of dictionaries representing the compounds.
523        """
524        min_rt, max_rt = min_max_rt
525
526        compounds = self.session.query(LowResolutionEICompound).filter(
527            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
528        )
529
530        return [self.row_to_dict(compound) for compound in compounds]
531
532    def query_min_max_ri(self, min_max_ri):
533        """Queries compounds based on RI range.
534
535        Parameters
536        -----------
537        min_max_ri : tuple
538            A tuple containing the minimum and maximum RI values.
539        """
540        min_ri, max_ri = min_max_ri
541
542        compounds = (
543            self.session.query(LowResolutionEICompound)
544            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
545            .all()
546        )
547
548        return [self.row_to_dict(compound) for compound in compounds]
549
550    def query_names_and_rt(self, min_max_rt, compound_names):
551        """Queries compounds based on compound names and retention time range.
552
553        Parameters
554        -----------
555        min_max_rt : tuple
556            A tuple containing the minimum and maximum retention time values.
557        compound_names : list
558            A list of compound names.
559
560        Returns
561        --------
562        list
563            A list of dictionaries representing the compounds.
564
565        """
566        min_rt, max_rt = min_max_rt
567
568        compounds = (
569            self.session.query(LowResolutionEICompound)
570            .filter(LowResolutionEICompound.name.in_(compound_names))
571            .filter(
572                LowResolutionEICompound.retention_time >= min_rt,
573                LowResolutionEICompound.retention_time <= max_rt,
574            )
575        )
576
577        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
578        # x = [self.row_to_dict(compound) for compound in compounds]
579
580        return [self.row_to_dict(compound) for compound in compounds]
581
582    def query_min_max_ri_and_rt(
583        self,
584        min_max_ri,
585        min_max_rt,
586    ):
587        """Queries compounds based on RI range and retention time range.
588
589        Parameters
590        -----------
591        min_max_ri : tuple
592            A tuple containing the minimum and maximum RI values.
593        min_max_rt : tuple
594            A tuple containing the minimum and maximum retention time values.
595
596        Returns
597        --------
598        list
599            A list of dictionaries representing the compounds.
600
601        """
602        min_ri, max_ri = min_max_ri
603
604        min_rt, max_rt = min_max_rt
605
606        compounds = self.session.query(LowResolutionEICompound).filter(
607            LowResolutionEICompound.ri <= max_ri,
608            LowResolutionEICompound.ri >= min_ri,
609            LowResolutionEICompound.ri >= min_rt,
610            LowResolutionEICompound.ri >= max_rt,
611        )
612
613        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
614
615        return [self.row_to_dict(compound) for compound in compounds]
616
617    def delete_compound(self, compound):
618        """Deletes a compound from the database.
619
620        Parameters
621        -----------
622        compound : LowResolutionEICompound
623            A compound object.
624
625        """
626        try:
627            self.session.delete(compound)
628            self.session.commit()
629
630        except SQLAlchemyError as e:
631            self.session.rollback()
632            print(str(e))
633
634    def purge(self):
635        """Deletes all compounds from the database table.
636
637        Notes
638        ------
639        Careful, this will delete the entire database table.
640        """
641        self.session.query(LowResolutionEICompound).delete()
642        self.session.commit()
643
644    def clear_data(self):
645        """Clears all tables in the database."""
646        meta = Base.metadata
647        for table in reversed(meta.sorted_tables):
648            print("Clear table %s" % table)
649            self.session.execute(table.delete())
650        self.session.commit()
class Base:

The base class of the class hierarchy.

When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.

Base(**kwargs)
1185def _declarative_constructor(self, **kwargs):
1186    """A simple constructor that allows initialization from kwargs.
1187
1188    Sets attributes on the constructed instance using the names and
1189    values in ``kwargs``.
1190
1191    Only keys that are present as
1192    attributes of the instance's class are allowed. These could be,
1193    for example, any mapped columns or relationships.
1194    """
1195    cls_ = type(self)
1196    for k in kwargs:
1197        if not hasattr(cls_, k):
1198            raise TypeError(
1199                "%r is an invalid keyword argument for %s" % (k, cls_.__name__)
1200            )
1201        setattr(self, k, kwargs[k])

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

registry = <sqlalchemy.orm.decl_api.registry object>
metadata = MetaData()
class Metadatar(corems.molecular_id.factory.EI_SQL.Base):
27class Metadatar(Base):
28    """This class is used to store the metadata of the compounds in the database
29
30    Attributes
31    -----------
32    id : int
33        The id of the compound.
34    cas : str
35        The CAS number of the compound.
36    inchikey : str
37        The InChiKey of the compound.
38    inchi : str
39        The InChi of the compound.
40    chebi : str
41        The ChEBI ID of the compound.
42    smiles : str
43        The SMILES of the compound.
44    kegg : str
45        The KEGG ID of the compound.
46    iupac_name : str
47        The IUPAC name of the compound.
48    traditional_name : str
49        The traditional name of the compound.
50    common_name : str
51        The common name of the compound.
52    data_id : int
53        The id of the compound in the molecularData table.
54    data : LowResolutionEICompound
55        The compound object.
56    """
57
58    __tablename__ = "metaDataR"
59
60    id = Column(Integer, primary_key=True)
61    cas = Column(String, nullable=True)
62    inchikey = Column(String, nullable=False)
63    inchi = Column(String, nullable=False)
64    chebi = Column(String, nullable=True)
65    smiles = Column(String, nullable=True)
66    kegg = Column(String, nullable=True)
67    iupac_name = Column(String, nullable=True)
68    traditional_name = Column(String, nullable=True)
69    common_name = Column(String, nullable=True)
70
71    data_id = Column(Integer, ForeignKey("molecularData.id"))
72    data = relationship("LowResolutionEICompound", back_populates="metadatar")

This class is used to store the metadata of the compounds in the database

Attributes
  • id (int): The id of the compound.
  • cas (str): The CAS number of the compound.
  • inchikey (str): The InChiKey of the compound.
  • inchi (str): The InChi of the compound.
  • chebi (str): The ChEBI ID of the compound.
  • smiles (str): The SMILES of the compound.
  • kegg (str): The KEGG ID of the compound.
  • iupac_name (str): The IUPAC name of the compound.
  • traditional_name (str): The traditional name of the compound.
  • common_name (str): The common name of the compound.
  • data_id (int): The id of the compound in the molecularData table.
  • data (LowResolutionEICompound): The compound object.
Metadatar(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

id
cas
inchikey
inchi
chebi
smiles
kegg
iupac_name
traditional_name
common_name
data_id
data
Inherited Members
Base
registry
metadata
class LowResolutionEICompound(corems.molecular_id.factory.EI_SQL.Base):
 75class LowResolutionEICompound(Base):
 76    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
 77
 78    Attributes
 79    -----------
 80    id : int
 81        The id of the compound.
 82    name : str
 83        The name of the compound.
 84    classify : str
 85        The classification of the compound.
 86    formula : str
 87        The formula of the compound.
 88    ri : float
 89        The retention index of the compound.
 90    retention_time : float
 91        The retention time of the compound.
 92    source : str
 93        The source of the compound.
 94    casno : str
 95        The CAS number of the compound.
 96    comment : str
 97        The comment of the compound.
 98    source_temp_c : float
 99        The source temperature of the spectra.
100    ev : float
101        The electron volts of the spectra.
102    peaks_count : int
103        The number of peaks in the spectra.
104    mz : numpy.ndarray
105        The m/z values of the spectra.
106    abundance : numpy.ndarray
107        The abundance values of the spectra.
108    metadatar : Metadatar
109        The metadata object.
110    """
111
112    __tablename__ = "molecularData"
113
114    id = Column(Integer, primary_key=True)
115
116    name = Column(String, nullable=False)
117    classify = Column(String, nullable=True)
118    formula = Column(String, nullable=True)
119    ri = Column(Float, nullable=False)
120    retention_time = Column(Float, nullable=False)
121
122    source = Column(String, nullable=True)
123    casno = Column(String, nullable=False)
124    comment = Column(String, nullable=True)
125
126    derivativenum = Column(String, nullable=True)
127    derivatization = Column(String, nullable=True)
128
129    source_temp_c = Column(Float, nullable=True)
130    ev = Column(Float, nullable=True)
131
132    peaks_count = Column(Integer, nullable=False)
133
134    mz = Column(LargeBinary, nullable=False)
135    abundance = Column(LargeBinary, nullable=False)
136
137    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
138
139    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
140
141    def __init__(self, **dict_data):
142        self.id = dict_data.get("id")
143
144        self.name = dict_data.get("NAME")
145        self.classify = dict_data.get("classify")
146        self.formula = dict_data.get("FORM")
147        self.ri = dict_data.get("RI")
148        self.retention_time = dict_data.get("RT")
149
150        self.source = dict_data.get("SOURCE")
151        self.casno = dict_data.get("CASNO")
152        self.comment = dict_data.get("COMMENT")
153
154        self.derivativenum = dict_data.get("derivativenum")
155        self.derivatization = dict_data.get("derivatization")
156
157        self.peaks_count = dict_data.get("NUM PEAKS")
158
159        # mz and abun are numpy arrays of 64 bits integer
160        # when using postgres array might be a better option
161
162        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
163        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
164
165        self.metadatar = dict_data.get("metadatar", None)
166
167    def __repr__(self):
168        return (
169            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
170            % (
171                self.name,
172                self.casno,
173                self.formula,
174                self.ri,
175                self.retention_time,
176                self.comment,
177            )
178        )

This class is used to store the molecular and spectral data of the compounds in the low res EI database

Attributes
  • id (int): The id of the compound.
  • name (str): The name of the compound.
  • classify (str): The classification of the compound.
  • formula (str): The formula of the compound.
  • ri (float): The retention index of the compound.
  • retention_time (float): The retention time of the compound.
  • source (str): The source of the compound.
  • casno (str): The CAS number of the compound.
  • comment (str): The comment of the compound.
  • source_temp_c (float): The source temperature of the spectra.
  • ev (float): The electron volts of the spectra.
  • peaks_count (int): The number of peaks in the spectra.
  • mz (numpy.ndarray): The m/z values of the spectra.
  • abundance (numpy.ndarray): The abundance values of the spectra.
  • metadatar (Metadatar): The metadata object.
LowResolutionEICompound(**dict_data)
id
name
classify
formula
ri
retention_time
source
casno
comment
derivativenum
derivatization
source_temp_c
ev
peaks_count
mz
abundance
metadatar
Inherited Members
Base
registry
metadata
@dataclass
class MetaboliteMetadata:
181@dataclass
182class MetaboliteMetadata:
183    """Dataclass for the Metabolite Metadata
184
185    Attributes
186    -----------
187    id : int
188        The id of the compound.
189    cas : str
190        The CAS number of the compound.
191    inchikey : str
192        The InChiKey of the compound.
193    inchi : str
194        The InChi of the compound.
195    chebi : str
196        The ChEBI ID of the compound.
197    smiles : str
198        The SMILES of the compound.
199    kegg : str
200        The KEGG ID of the compound.
201    iupac_name : str
202        The IUPAC name of the compound.
203    traditional_name : str
204        The traditional name of the compound.
205    common_name : str
206        The common name of the compound, preferrably the RefMet name.
207    data_id : int
208        The id of the compound in the molecularData table.
209    name : Optional[str]
210        The name of the compound, preferably the same name as in PubChem,
211        Or if a lipid, name as in LipidMaps
212    formula : Optional[str]
213        The formula of the compound.
214    pubchem_id : Optional[str]
215        The PubChem ID of the compound.
216    refmet_id : Optional[str]
217        The RefMet ID of the compound.
218    """
219
220    id: int
221    cas: str
222    inchikey: str
223    inchi: str
224    chebi: str
225    smiles: str
226    kegg: str
227    data_id: int
228    iupac_name: str
229    traditional_name: str
230    common_name: str
231    name: Optional[str]=None
232    formula: Optional[str]=None
233    pubchem_id: Optional[str]=None
234    refmet_id: Optional[str]=None

Dataclass for the Metabolite Metadata

Attributes
  • id (int): The id of the compound.
  • cas (str): The CAS number of the compound.
  • inchikey (str): The InChiKey of the compound.
  • inchi (str): The InChi of the compound.
  • chebi (str): The ChEBI ID of the compound.
  • smiles (str): The SMILES of the compound.
  • kegg (str): The KEGG ID of the compound.
  • iupac_name (str): The IUPAC name of the compound.
  • traditional_name (str): The traditional name of the compound.
  • common_name (str): The common name of the compound, preferrably the RefMet name.
  • data_id (int): The id of the compound in the molecularData table.
  • name (Optional[str]): The name of the compound, preferably the same name as in PubChem, Or if a lipid, name as in LipidMaps
  • formula (Optional[str]): The formula of the compound.
  • pubchem_id (Optional[str]): The PubChem ID of the compound.
  • refmet_id (Optional[str]): The RefMet ID of the compound.
MetaboliteMetadata( id: int, cas: str, inchikey: str, inchi: str, chebi: str, smiles: str, kegg: str, data_id: int, iupac_name: str, traditional_name: str, common_name: str, name: Optional[str] = None, formula: Optional[str] = None, pubchem_id: Optional[str] = None, refmet_id: Optional[str] = None)
id: int
cas: str
inchikey: str
inchi: str
chebi: str
smiles: str
kegg: str
data_id: int
iupac_name: str
traditional_name: str
common_name: str
name: Optional[str] = None
formula: Optional[str] = None
pubchem_id: Optional[str] = None
refmet_id: Optional[str] = None
@dataclass
class LowResCompoundRef:
237@dataclass
238class LowResCompoundRef:
239    """Dataclass for the Low Resolution Compound Reference
240
241    This class is used to store the molecular and spectral data of the compounds in the low res EI database
242
243    Parameters
244    -----------
245    compounds_dict : dict
246        A dictionary representing the compound.
247
248    Attributes
249    -----------
250    id : int
251        The id of the compound.
252    name : str
253        The name of the compound.
254    ri : str
255        The retention index of the compound.
256    retention_time : str
257        The retention time of the compound.
258    casno : str
259        The CAS number of the compound.
260    comment : str
261        The comment of the compound.
262    peaks_count : int
263        The number of peaks in the spectra.
264    classify : str
265        The classification of the compound.
266    derivativenum : str
267        The derivative number of the compound.
268    derivatization : str
269        The derivatization applied to the compound.
270    mz : numpy.ndarray
271        The m/z values of the spectra.
272    abundance : numpy.ndarray
273        The abundance values of the spectra.
274    source_temp_c : float
275        The source temperature of the spectra.
276    ev : float
277        The electron volts of the spectra.
278    formula : str
279        The formula of the compound.
280    source : str
281        The source of the spectra data.
282    classify : str
283        The classification of the compound.
284    metadata : MetaboliteMetadata
285        The metadata object.
286    similarity_score : float
287        The similarity score of the compound.
288    ri_score : float
289        The RI score of the compound.
290    spectral_similarity_score : float
291        The spectral similarity score of the compound.
292    spectral_similarity_scores : dict
293        The spectral similarity scores of the compound.
294
295    """
296
297    # this class is use to store the results inside the GCPeak class
298    def __init__(self, compounds_dict):
299        self.id = compounds_dict.get("id")
300        self.name = compounds_dict.get("name")
301        self.ri = compounds_dict.get("ri")
302        self.retention_time = compounds_dict.get("rt")
303        self.casno = compounds_dict.get("casno")
304        self.comment = compounds_dict.get("comment")
305        self.peaks_count = compounds_dict.get("peaks_count")
306
307        self.classify = compounds_dict.get("classify")
308        self.derivativenum = compounds_dict.get("derivativenum")
309        self.derivatization = compounds_dict.get("derivatization")
310
311        self.mz = compounds_dict.get("mz")
312        self.abundance = compounds_dict.get("abundance")
313
314        self.source_temp_c = compounds_dict.get("source_temp_c")
315        self.ev = compounds_dict.get("ev")
316        self.formula = compounds_dict.get("formula")
317        self.source = compounds_dict.get("source")
318
319        self.classify = compounds_dict.get("classify")
320
321        if compounds_dict.get("metadata"):
322            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
323
324        else:
325            self.metadata = None
326
327        self.similarity_score = None
328        self.ri_score = None
329        self.spectral_similarity_score = None
330        self.spectral_similarity_scores = {}

Dataclass for the Low Resolution Compound Reference

This class is used to store the molecular and spectral data of the compounds in the low res EI database

Parameters
  • compounds_dict (dict): A dictionary representing the compound.
Attributes
  • id (int): The id of the compound.
  • name (str): The name of the compound.
  • ri (str): The retention index of the compound.
  • retention_time (str): The retention time of the compound.
  • casno (str): The CAS number of the compound.
  • comment (str): The comment of the compound.
  • peaks_count (int): The number of peaks in the spectra.
  • classify (str): The classification of the compound.
  • derivativenum (str): The derivative number of the compound.
  • derivatization (str): The derivatization applied to the compound.
  • mz (numpy.ndarray): The m/z values of the spectra.
  • abundance (numpy.ndarray): The abundance values of the spectra.
  • source_temp_c (float): The source temperature of the spectra.
  • ev (float): The electron volts of the spectra.
  • formula (str): The formula of the compound.
  • source (str): The source of the spectra data.
  • classify (str): The classification of the compound.
  • metadata (MetaboliteMetadata): The metadata object.
  • similarity_score (float): The similarity score of the compound.
  • ri_score (float): The RI score of the compound.
  • spectral_similarity_score (float): The spectral similarity score of the compound.
  • spectral_similarity_scores (dict): The spectral similarity scores of the compound.
LowResCompoundRef(compounds_dict)
298    def __init__(self, compounds_dict):
299        self.id = compounds_dict.get("id")
300        self.name = compounds_dict.get("name")
301        self.ri = compounds_dict.get("ri")
302        self.retention_time = compounds_dict.get("rt")
303        self.casno = compounds_dict.get("casno")
304        self.comment = compounds_dict.get("comment")
305        self.peaks_count = compounds_dict.get("peaks_count")
306
307        self.classify = compounds_dict.get("classify")
308        self.derivativenum = compounds_dict.get("derivativenum")
309        self.derivatization = compounds_dict.get("derivatization")
310
311        self.mz = compounds_dict.get("mz")
312        self.abundance = compounds_dict.get("abundance")
313
314        self.source_temp_c = compounds_dict.get("source_temp_c")
315        self.ev = compounds_dict.get("ev")
316        self.formula = compounds_dict.get("formula")
317        self.source = compounds_dict.get("source")
318
319        self.classify = compounds_dict.get("classify")
320
321        if compounds_dict.get("metadata"):
322            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
323
324        else:
325            self.metadata = None
326
327        self.similarity_score = None
328        self.ri_score = None
329        self.spectral_similarity_score = None
330        self.spectral_similarity_scores = {}
id
name
ri
retention_time
casno
comment
peaks_count
classify
derivativenum
derivatization
mz
abundance
source_temp_c
ev
formula
source
similarity_score
ri_score
spectral_similarity_score
spectral_similarity_scores
class EI_LowRes_SQLite:
333class EI_LowRes_SQLite:
334    """
335    A class for interacting with a SQLite database for low-resolution EI compounds.
336
337    Parameters
338    -----------
339    url : str, optional
340        The URL of the SQLite database. Default is 'sqlite://'.
341
342    Attributes
343    -----------
344    engine : sqlalchemy.engine.Engine
345        The SQLAlchemy engine for connecting to the database.
346    session : sqlalchemy.orm.Session
347        The SQLAlchemy session for executing database operations.
348
349    Methods
350    --------
351    * __init__(self, url='sqlite://').
352        Initializes the EI_LowRes_SQLite object.
353    * __exit__(self, exc_type, exc_val, exc_tb).
354        Closes the database connection.
355    * init_engine(self, url).
356        Initializes the SQLAlchemy engine.
357    * __enter__(self).
358        Returns the EI_LowRes_SQLite object.
359    * add_compound_list(self, data_dict_list).
360        Adds a list of compounds to the database.
361    * add_compound(self, data_dict).
362        Adds a single compound to the database.
363    * commit(self).
364        Commits the changes to the database.
365    * row_to_dict(self, row).
366        Converts a database row to a dictionary.
367    * get_all(self).
368        Retrieves all compounds from the database.
369    * query_min_max_rt(self, min_max_rt).
370        Queries compounds based on retention time range.
371    * query_min_max_ri(self, min_max_ri).
372        Queries compounds based on RI range.
373    * query_names_and_rt(self, min_max_rt, compound_names).
374        Queries compounds based on compound names and retention time range.
375    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
376        Queries compounds based on RI range and retention time range.
377    * delete_compound(self, compound).
378        Deletes a compound from the database.
379    * purge(self).
380        Deletes all compounds from the database table.
381    * clear_data(self).
382        Clears all tables in the database.
383    """
384
385    def __init__(self, url="sqlite://"):
386        self.engine = self.init_engine(url)
387
388        Base.metadata.create_all(self.engine)
389
390        Session = sessionmaker(bind=self.engine)
391
392        self.session = Session()
393
394    def __exit__(self, exc_type, exc_val, exc_tb):
395        """Closes the database connection."""
396        self.commit()
397        self.session.close()
398        self.engine.dispose()
399
400    def init_engine(self, url):
401        """Initializes the SQLAlchemy engine.
402
403        Parameters
404        -----------
405        url : str
406            The URL of the SQLite database.
407
408        Returns
409        --------
410        sqlalchemy.engine.Engine
411            The SQLAlchemy engine for connecting to the database.
412        """
413        directory = os.getcwd()
414        if not url:
415            if not os.path.isdir(directory + "/db"):
416                os.mkdir(directory + "/db")
417            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
418                DB=directory
419            )
420        return create_engine(url, poolclass=QueuePool)
421
422    def __enter__(self):
423        """Returns the EI_LowRes_SQLite object."""
424        return self
425
426    def add_compound_list(self, data_dict_list):
427        """Adds a list of compounds to the database.
428
429        Parameters
430        -----------
431        data_dict_list : list of dict
432            A list of dictionaries representing the compounds.
433        """
434        for data_dict in data_dict_list:
435            # print(data_dict.get('NUM PEAKS'))
436            if not data_dict.get("NUM PEAKS"):
437                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
438            if not data_dict.get("CASNO"):
439                data_dict["CASNO"] = data_dict.get("CAS")
440
441        self.session.add_all(
442            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
443        )
444
445    def add_compound(self, data_dict):
446        """Adds a single compound to the database.
447
448        Parameters
449        -----------
450        data_dict : dict
451            A dictionary representing the compound.
452
453        """
454        one_compound = LowResolutionEICompound(**data_dict)
455        self.session.add(one_compound)
456        self.commit()
457
458    def commit(self):
459        """Commits the changes to the database."""
460        try:
461            self.session.commit()
462        except SQLAlchemyError as e:
463            self.session.rollback()
464            print(str(e))
465
466    def row_to_dict(self, row):
467        """Converts a database row to a dictionary.
468
469        Parameters
470        -----------
471        row : sqlalchemy.engine.row.Row
472            A row from the database.
473
474        Returns
475        --------
476        dict
477            A dictionary representing the compound.
478        """
479        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
480
481        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
482        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
483
484        if row.metadatar:
485            data_dict["metadata"] = {
486                c.name: getattr(row.metadatar, c.name)
487                for c in row.metadatar.__table__.columns
488            }
489
490        else:
491            data_dict["metadata"] = None
492
493        return data_dict
494
495    def get_all(
496        self,
497    ):
498        """Retrieves all compounds from the database.
499
500        Returns
501        --------
502        list
503            A list of dictionaries representing the compounds.
504        """
505        compounds = self.session.query(LowResolutionEICompound).all()
506
507        return [self.row_to_dict(compound) for compound in compounds]
508
509    def query_min_max_rt(
510        self,
511        min_max_rt,
512    ):
513        """Queries compounds based on retention time range.
514
515        Parameters
516        -----------
517        min_max_rt : tuple
518            A tuple containing the minimum and maximum retention time values.
519
520        Returns
521        --------
522        list
523            A list of dictionaries representing the compounds.
524        """
525        min_rt, max_rt = min_max_rt
526
527        compounds = self.session.query(LowResolutionEICompound).filter(
528            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
529        )
530
531        return [self.row_to_dict(compound) for compound in compounds]
532
533    def query_min_max_ri(self, min_max_ri):
534        """Queries compounds based on RI range.
535
536        Parameters
537        -----------
538        min_max_ri : tuple
539            A tuple containing the minimum and maximum RI values.
540        """
541        min_ri, max_ri = min_max_ri
542
543        compounds = (
544            self.session.query(LowResolutionEICompound)
545            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
546            .all()
547        )
548
549        return [self.row_to_dict(compound) for compound in compounds]
550
551    def query_names_and_rt(self, min_max_rt, compound_names):
552        """Queries compounds based on compound names and retention time range.
553
554        Parameters
555        -----------
556        min_max_rt : tuple
557            A tuple containing the minimum and maximum retention time values.
558        compound_names : list
559            A list of compound names.
560
561        Returns
562        --------
563        list
564            A list of dictionaries representing the compounds.
565
566        """
567        min_rt, max_rt = min_max_rt
568
569        compounds = (
570            self.session.query(LowResolutionEICompound)
571            .filter(LowResolutionEICompound.name.in_(compound_names))
572            .filter(
573                LowResolutionEICompound.retention_time >= min_rt,
574                LowResolutionEICompound.retention_time <= max_rt,
575            )
576        )
577
578        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
579        # x = [self.row_to_dict(compound) for compound in compounds]
580
581        return [self.row_to_dict(compound) for compound in compounds]
582
583    def query_min_max_ri_and_rt(
584        self,
585        min_max_ri,
586        min_max_rt,
587    ):
588        """Queries compounds based on RI range and retention time range.
589
590        Parameters
591        -----------
592        min_max_ri : tuple
593            A tuple containing the minimum and maximum RI values.
594        min_max_rt : tuple
595            A tuple containing the minimum and maximum retention time values.
596
597        Returns
598        --------
599        list
600            A list of dictionaries representing the compounds.
601
602        """
603        min_ri, max_ri = min_max_ri
604
605        min_rt, max_rt = min_max_rt
606
607        compounds = self.session.query(LowResolutionEICompound).filter(
608            LowResolutionEICompound.ri <= max_ri,
609            LowResolutionEICompound.ri >= min_ri,
610            LowResolutionEICompound.ri >= min_rt,
611            LowResolutionEICompound.ri >= max_rt,
612        )
613
614        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
615
616        return [self.row_to_dict(compound) for compound in compounds]
617
618    def delete_compound(self, compound):
619        """Deletes a compound from the database.
620
621        Parameters
622        -----------
623        compound : LowResolutionEICompound
624            A compound object.
625
626        """
627        try:
628            self.session.delete(compound)
629            self.session.commit()
630
631        except SQLAlchemyError as e:
632            self.session.rollback()
633            print(str(e))
634
635    def purge(self):
636        """Deletes all compounds from the database table.
637
638        Notes
639        ------
640        Careful, this will delete the entire database table.
641        """
642        self.session.query(LowResolutionEICompound).delete()
643        self.session.commit()
644
645    def clear_data(self):
646        """Clears all tables in the database."""
647        meta = Base.metadata
648        for table in reversed(meta.sorted_tables):
649            print("Clear table %s" % table)
650            self.session.execute(table.delete())
651        self.session.commit()

A class for interacting with a SQLite database for low-resolution EI compounds.

Parameters
  • url (str, optional): The URL of the SQLite database. Default is 'sqlite://'.
Attributes
  • engine (sqlalchemy.engine.Engine): The SQLAlchemy engine for connecting to the database.
  • session (sqlalchemy.orm.Session): The SQLAlchemy session for executing database operations.
Methods
  • __init__(self, url='sqlite://'). Initializes the EI_LowRes_SQLite object.
  • __exit__(self, exc_type, exc_val, exc_tb). Closes the database connection.
  • init_engine(self, url). Initializes the SQLAlchemy engine.
  • __enter__(self). Returns the EI_LowRes_SQLite object.
  • add_compound_list(self, data_dict_list). Adds a list of compounds to the database.
  • add_compound(self, data_dict). Adds a single compound to the database.
  • commit(self). Commits the changes to the database.
  • row_to_dict(self, row). Converts a database row to a dictionary.
  • get_all(self). Retrieves all compounds from the database.
  • query_min_max_rt(self, min_max_rt). Queries compounds based on retention time range.
  • query_min_max_ri(self, min_max_ri). Queries compounds based on RI range.
  • query_names_and_rt(self, min_max_rt, compound_names). Queries compounds based on compound names and retention time range.
  • query_min_max_ri_and_rt(self, min_max_ri, min_max_rt). Queries compounds based on RI range and retention time range.
  • delete_compound(self, compound). Deletes a compound from the database.
  • purge(self). Deletes all compounds from the database table.
  • clear_data(self). Clears all tables in the database.
EI_LowRes_SQLite(url='sqlite://')
385    def __init__(self, url="sqlite://"):
386        self.engine = self.init_engine(url)
387
388        Base.metadata.create_all(self.engine)
389
390        Session = sessionmaker(bind=self.engine)
391
392        self.session = Session()
engine
session
def init_engine(self, url):
400    def init_engine(self, url):
401        """Initializes the SQLAlchemy engine.
402
403        Parameters
404        -----------
405        url : str
406            The URL of the SQLite database.
407
408        Returns
409        --------
410        sqlalchemy.engine.Engine
411            The SQLAlchemy engine for connecting to the database.
412        """
413        directory = os.getcwd()
414        if not url:
415            if not os.path.isdir(directory + "/db"):
416                os.mkdir(directory + "/db")
417            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
418                DB=directory
419            )
420        return create_engine(url, poolclass=QueuePool)

Initializes the SQLAlchemy engine.

Parameters
  • url (str): The URL of the SQLite database.
Returns
  • sqlalchemy.engine.Engine: The SQLAlchemy engine for connecting to the database.
def add_compound_list(self, data_dict_list):
426    def add_compound_list(self, data_dict_list):
427        """Adds a list of compounds to the database.
428
429        Parameters
430        -----------
431        data_dict_list : list of dict
432            A list of dictionaries representing the compounds.
433        """
434        for data_dict in data_dict_list:
435            # print(data_dict.get('NUM PEAKS'))
436            if not data_dict.get("NUM PEAKS"):
437                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
438            if not data_dict.get("CASNO"):
439                data_dict["CASNO"] = data_dict.get("CAS")
440
441        self.session.add_all(
442            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
443        )

Adds a list of compounds to the database.

Parameters
  • data_dict_list (list of dict): A list of dictionaries representing the compounds.
def add_compound(self, data_dict):
445    def add_compound(self, data_dict):
446        """Adds a single compound to the database.
447
448        Parameters
449        -----------
450        data_dict : dict
451            A dictionary representing the compound.
452
453        """
454        one_compound = LowResolutionEICompound(**data_dict)
455        self.session.add(one_compound)
456        self.commit()

Adds a single compound to the database.

Parameters
  • data_dict (dict): A dictionary representing the compound.
def commit(self):
458    def commit(self):
459        """Commits the changes to the database."""
460        try:
461            self.session.commit()
462        except SQLAlchemyError as e:
463            self.session.rollback()
464            print(str(e))

Commits the changes to the database.

def row_to_dict(self, row):
466    def row_to_dict(self, row):
467        """Converts a database row to a dictionary.
468
469        Parameters
470        -----------
471        row : sqlalchemy.engine.row.Row
472            A row from the database.
473
474        Returns
475        --------
476        dict
477            A dictionary representing the compound.
478        """
479        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
480
481        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
482        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
483
484        if row.metadatar:
485            data_dict["metadata"] = {
486                c.name: getattr(row.metadatar, c.name)
487                for c in row.metadatar.__table__.columns
488            }
489
490        else:
491            data_dict["metadata"] = None
492
493        return data_dict

Converts a database row to a dictionary.

Parameters
  • row (sqlalchemy.engine.row.Row): A row from the database.
Returns
  • dict: A dictionary representing the compound.
def get_all(self):
495    def get_all(
496        self,
497    ):
498        """Retrieves all compounds from the database.
499
500        Returns
501        --------
502        list
503            A list of dictionaries representing the compounds.
504        """
505        compounds = self.session.query(LowResolutionEICompound).all()
506
507        return [self.row_to_dict(compound) for compound in compounds]

Retrieves all compounds from the database.

Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_rt(self, min_max_rt):
509    def query_min_max_rt(
510        self,
511        min_max_rt,
512    ):
513        """Queries compounds based on retention time range.
514
515        Parameters
516        -----------
517        min_max_rt : tuple
518            A tuple containing the minimum and maximum retention time values.
519
520        Returns
521        --------
522        list
523            A list of dictionaries representing the compounds.
524        """
525        min_rt, max_rt = min_max_rt
526
527        compounds = self.session.query(LowResolutionEICompound).filter(
528            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
529        )
530
531        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on retention time range.

Parameters
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_ri(self, min_max_ri):
533    def query_min_max_ri(self, min_max_ri):
534        """Queries compounds based on RI range.
535
536        Parameters
537        -----------
538        min_max_ri : tuple
539            A tuple containing the minimum and maximum RI values.
540        """
541        min_ri, max_ri = min_max_ri
542
543        compounds = (
544            self.session.query(LowResolutionEICompound)
545            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
546            .all()
547        )
548
549        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on RI range.

Parameters
  • min_max_ri (tuple): A tuple containing the minimum and maximum RI values.
def query_names_and_rt(self, min_max_rt, compound_names):
551    def query_names_and_rt(self, min_max_rt, compound_names):
552        """Queries compounds based on compound names and retention time range.
553
554        Parameters
555        -----------
556        min_max_rt : tuple
557            A tuple containing the minimum and maximum retention time values.
558        compound_names : list
559            A list of compound names.
560
561        Returns
562        --------
563        list
564            A list of dictionaries representing the compounds.
565
566        """
567        min_rt, max_rt = min_max_rt
568
569        compounds = (
570            self.session.query(LowResolutionEICompound)
571            .filter(LowResolutionEICompound.name.in_(compound_names))
572            .filter(
573                LowResolutionEICompound.retention_time >= min_rt,
574                LowResolutionEICompound.retention_time <= max_rt,
575            )
576        )
577
578        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
579        # x = [self.row_to_dict(compound) for compound in compounds]
580
581        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on compound names and retention time range.

Parameters
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
  • compound_names (list): A list of compound names.
Returns
  • list: A list of dictionaries representing the compounds.
def query_min_max_ri_and_rt(self, min_max_ri, min_max_rt):
583    def query_min_max_ri_and_rt(
584        self,
585        min_max_ri,
586        min_max_rt,
587    ):
588        """Queries compounds based on RI range and retention time range.
589
590        Parameters
591        -----------
592        min_max_ri : tuple
593            A tuple containing the minimum and maximum RI values.
594        min_max_rt : tuple
595            A tuple containing the minimum and maximum retention time values.
596
597        Returns
598        --------
599        list
600            A list of dictionaries representing the compounds.
601
602        """
603        min_ri, max_ri = min_max_ri
604
605        min_rt, max_rt = min_max_rt
606
607        compounds = self.session.query(LowResolutionEICompound).filter(
608            LowResolutionEICompound.ri <= max_ri,
609            LowResolutionEICompound.ri >= min_ri,
610            LowResolutionEICompound.ri >= min_rt,
611            LowResolutionEICompound.ri >= max_rt,
612        )
613
614        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
615
616        return [self.row_to_dict(compound) for compound in compounds]

Queries compounds based on RI range and retention time range.

Parameters
  • min_max_ri (tuple): A tuple containing the minimum and maximum RI values.
  • min_max_rt (tuple): A tuple containing the minimum and maximum retention time values.
Returns
  • list: A list of dictionaries representing the compounds.
def delete_compound(self, compound):
618    def delete_compound(self, compound):
619        """Deletes a compound from the database.
620
621        Parameters
622        -----------
623        compound : LowResolutionEICompound
624            A compound object.
625
626        """
627        try:
628            self.session.delete(compound)
629            self.session.commit()
630
631        except SQLAlchemyError as e:
632            self.session.rollback()
633            print(str(e))

Deletes a compound from the database.

Parameters
  • compound (LowResolutionEICompound): A compound object.
def purge(self):
635    def purge(self):
636        """Deletes all compounds from the database table.
637
638        Notes
639        ------
640        Careful, this will delete the entire database table.
641        """
642        self.session.query(LowResolutionEICompound).delete()
643        self.session.commit()

Deletes all compounds from the database table.

Notes

Careful, this will delete the entire database table.

def clear_data(self):
645    def clear_data(self):
646        """Clears all tables in the database."""
647        meta = Base.metadata
648        for table in reversed(meta.sorted_tables):
649            print("Clear table %s" % table)
650            self.session.execute(table.delete())
651        self.session.commit()

Clears all tables in the database.