corems.molecular_id.factory.molecularSQL

  1import sys
  2
  3import json
  4import os
  5
  6import tqdm
  7from sqlalchemy import (
  8    Column,
  9    Float,
 10    ForeignKey,
 11    Integer,
 12    String,
 13    and_,
 14    create_engine,
 15    event,
 16    exc,
 17    func,
 18)
 19from sqlalchemy.exc import SQLAlchemyError
 20from sqlalchemy.ext.associationproxy import association_proxy
 21from sqlalchemy.ext.declarative import declarative_base
 22from sqlalchemy.ext.hybrid import hybrid_method, hybrid_property
 23from sqlalchemy.orm import backref, relationship
 24from sqlalchemy.orm.scoping import scoped_session
 25from sqlalchemy.orm.session import sessionmaker
 26from sqlalchemy.sql.operators import exists
 27from sqlalchemy.sql.schema import UniqueConstraint
 28
 29from corems.encapsulation.constant import Atoms, Labels
 30
 31Base = declarative_base()
 32
 33
 34class HeteroAtoms(Base):
 35    """HeteroAtoms class for the heteroAtoms table in the SQLite database.
 36
 37    Attributes
 38    ----------
 39    id : int
 40        The primary key for the table.
 41    name : str
 42        The name of the heteroAtoms class.
 43    halogensCount : int
 44        The number of halogens in the heteroAtoms class.
 45    carbonHydrogen : relationship
 46        The relationship to the carbonHydrogen table.
 47
 48    Methods
 49    -------
 50    * __repr__()
 51        Returns the string representation of the object.
 52    * to_dict()
 53        Returns the heteroAtoms class as a dictionary.
 54    * halogens_count()
 55        Returns the number of halogens as a float.
 56
 57
 58    """
 59
 60    __tablename__ = "heteroAtoms"
 61
 62    id = Column(Integer, primary_key=True, unique=True, nullable=False)
 63
 64    name = Column(String, unique=True, nullable=False)
 65
 66    halogensCount = Column(Integer, unique=False, nullable=False)
 67
 68    carbonHydrogen = relationship(
 69        "CarbonHydrogen", secondary="molecularformula", viewonly=True
 70    )
 71
 72    def __repr__(self):
 73        return "<HeteroAtoms Model {} class {}>".format(self.id, self.name)
 74
 75    @hybrid_property
 76    def halogens_count(cls):
 77        """Returns the number of halogens as a float."""
 78        return cls.halogensCount.cast(Float)
 79
 80    def to_dict(self):
 81        """Returns the heteroAtoms class as a dictionary."""
 82        return json.loads(self.name)
 83
 84
 85class CarbonHydrogen(Base):
 86    """CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
 87
 88    Attributes
 89    ----------
 90    id : int
 91        The primary key for the table.
 92    C : int
 93        The number of carbon atoms.
 94    H : int
 95        The number of hydrogen atoms.
 96    heteroAtoms : relationship
 97        The relationship to the heteroAtoms table.
 98
 99    Methods
100    -------
101    * __repr__()
102        Returns the string representation of the object.
103    * mass()
104        Returns the mass of the carbonHydrogen class as a float.
105    * c()
106        Returns the number of carbon atoms as a float.
107    * h()
108        Returns the number of hydrogen atoms as a float.
109    * dbe()
110        Returns the double bond equivalent as a float.
111
112    """
113
114    __tablename__ = "carbonHydrogen"
115    __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),)
116
117    id = Column(Integer, primary_key=True, unique=True, nullable=False)
118
119    C = Column(Integer, nullable=False)
120
121    H = Column(Integer, nullable=False)
122
123    heteroAtoms = relationship(
124        "HeteroAtoms", secondary="molecularformula", viewonly=True
125    )
126
127    def __repr__(self):
128        """Returns the string representation of the object."""
129        return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H)
130
131    @property
132    def mass(self):
133        """Returns the mass of the carbonHydrogen class as a float."""
134        return (self.C * Atoms.atomic_masses.get("C")) + (
135            self.H * Atoms.atomic_masses.get("H")
136        )
137
138    @hybrid_property
139    def c(cls):
140        """Returns the number of carbon atoms as a float."""
141        return cls.C.cast(Float)
142
143    @hybrid_property
144    def h(cls):
145        """Returns the number of hydrogen atoms as a float."""
146        return cls.H.cast(Float)
147
148    @hybrid_property
149    def dbe(cls):
150        """Returns the double bond equivalent as a float."""
151        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
152        return float(cls.C) - float(cls.H / 2) + 1
153
154
155# 264888.88 ms
156class MolecularFormulaLink(Base):
157    """MolecularFormulaLink class for the molecularformula table in the SQLite database.
158
159    Attributes
160    ----------
161    heteroAtoms_id : int
162        The foreign key for the heteroAtoms table.
163    carbonHydrogen_id : int
164        The foreign key for the carbonHydrogen table.
165    mass : float
166        The mass of the molecular formula.
167    DBE : float
168        The double bond equivalent of the molecular formula.
169    carbonHydrogen : relationship
170        The relationship to the carbonHydrogen table.
171    heteroAtoms : relationship
172        The relationship to the heteroAtoms table.
173    C : association_proxy
174        The association proxy for the carbonHydrogen table.
175    H : association_proxy
176        The association proxy for the carbonHydrogen table.
177    classe : association_proxy
178        The association proxy for the heteroAtoms table.
179
180    Methods
181    -------
182    * __repr__()
183        Returns the string representation of the object.
184    * to_dict()
185        Returns the molecular formula as a dictionary.
186    * formula_string()
187        Returns the molecular formula as a string.
188    * classe_string()
189        Returns the heteroAtoms class as a string.
190    * _adduct_mz(ion_charge, adduct_atom)
191        Returns the m/z of the adduct ion as a float.
192    * _protonated_mz(ion_charge)
193        Returns the m/z of the protonated ion as a float.
194    * _radical_mz(ion_charge)
195        Returns the m/z of the radical ion as a float.
196
197
198
199    """
200
201    __tablename__ = "molecularformula"
202    __table_args__ = (
203        UniqueConstraint("heteroAtoms_id", "carbonHydrogen_id", name="unique_molform"),
204    )
205
206    # id = Column(Integer, primary_key=True,
207    #                    unique=True,
208    #                    nullable=False)
209
210    heteroAtoms_id = Column(Integer, ForeignKey("heteroAtoms.id"), primary_key=True)
211
212    carbonHydrogen_id = Column(
213        Integer, ForeignKey("carbonHydrogen.id"), primary_key=True
214    )
215
216    mass = Column(Float)
217
218    DBE = Column(Float)
219
220    carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc"))
221
222    heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc"))
223
224    C = association_proxy("carbonHydrogen", "C")
225
226    H = association_proxy("carbonHydrogen", "H")
227
228    classe = association_proxy("heteroAtoms", "name")
229
230    def to_dict(self):
231        """Returns the molecular formula as a dictionary.
232
233        Returns
234        -------
235        dict
236            The molecular formula as a dictionary.
237        """
238        carbon = {"C": self.C, "H": self.H}
239        classe = json.loads(self.classe)
240        if self.classe == '{"HC": ""}':
241            return {**carbon}
242        else:
243            return {**carbon, **classe}
244
245    @property
246    def formula_string(self):
247        """Returns the molecular formula as a string."""
248        class_dict = self.to_dict()
249        class_str = " ".join(
250            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
251        )
252        return class_str.strip()
253
254    @property
255    def classe_string(self):
256        """Returns the heteroAtoms class as a string."""
257        class_dict = json.loads(self.classe)
258        class_str = " ".join(
259            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
260        )
261        return class_str.strip()
262
263    @hybrid_method
264    def _adduct_mz(self, ion_charge, adduct_atom):
265        """Returns the m/z of the adduct ion as a float."""
266        return (
267            self.mass
268            + (Atoms.atomic_masses.get(adduct_atom))
269            + (ion_charge * -1 * Atoms.electron_mass)
270        ) / abs(ion_charge)
271
272    @hybrid_method
273    def _protonated_mz(self, ion_charge):
274        """Returns the m/z of the protonated ion as a float."""
275        return (
276            self.mass
277            + (ion_charge * Atoms.atomic_masses.get("H"))
278            + (ion_charge * -1 * Atoms.electron_mass)
279        ) / abs(ion_charge)
280
281    @hybrid_method
282    def _radical_mz(self, ion_charge):
283        """Returns the m/z of the radical ion as a float."""
284        return (self.mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
285
286    def __repr__(self):
287        """Returns the string representation of the object."""
288        return "<MolecularFormulaLink Model {}>".format(self.formula_string)
289
290
291class MolForm_SQL:
292    """MolForm_SQL class for the SQLite database.
293
294    Attributes
295    ----------
296    engine : sqlalchemy.engine.base.Engine
297        The SQLAlchemy engine.
298    session : sqlalchemy.orm.session.Session
299        The SQLAlchemy session.
300    type : str
301        The type of database.
302    chunks_count : int
303        The number of chunks to use when querying the database.
304
305    Methods
306    -------
307    * __init__(url=None, echo=False)
308        Initializes the database.
309    * __exit__(exc_type, exc_val, exc_tb)
310        Closes the database.
311    * initiate_database(url, database_name)
312        Creates the database.
313    * commit()
314        Commits the session.
315    * init_engine(url)
316        Initializes the SQLAlchemy engine.
317    * __enter__()
318
319    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
320        Returns a dictionary of molecular formulas.
321    * check_entry(classe, ion_type, molecular_search_settings)
322        Checks if a molecular formula is in the database.
323    * get_all_classes()
324        Returns a list of all classes in the database.
325    * get_all()
326        Returns a list of all molecular formulas in the database.
327    * delete_entry(row)
328        Deletes a molecular formula from the database.
329    * purge(cls)
330        Deletes all molecular formulas from the database.
331    * clear_data()
332        Clears the database.
333    * close(commit=True)
334        Closes the database.
335    * add_engine_pidguard(engine)
336        Adds multiprocessing guards.
337
338    """
339
340    def __init__(self, url=None, echo=False):
341        self.engine = self.init_engine(url)
342
343        self.add_engine_pidguard(self.engine)
344
345        session_factory = sessionmaker(bind=self.engine)
346
347        Session = scoped_session(session_factory)
348
349        self.session = session_factory()
350
351        Base.metadata.create_all(self.engine)
352
353        self.session.commit()
354
355    def __exit__(self, exc_type, exc_val, exc_tb):
356        """Closes the database.
357
358        Parameters
359        ----------
360        exc_type : str
361            The exception type.
362        exc_val : str
363            The exception value.
364        exc_tb : str
365            The exception traceback.
366        """
367        # make sure the dbconnection gets closed
368
369        self.commit()
370        self.session.close()
371        self.engine.dispose()
372
373    def initiate_database(self, url, database_name):  # CREATION
374        """Creates the database.
375
376        Parameters
377        ----------
378        url : str
379            The URL for the database.
380        database_name : str
381            The name of the database.
382        """
383        engine = create_engine(url)
384        conn = engine.connect()
385        conn.execute("commit")
386        conn.execute("create database " + database_name)
387        conn.close()
388
389    def commit(self):
390        """Commits the session."""
391        try:
392            self.session.commit()
393        except SQLAlchemyError as e:
394            self.session.rollback()
395            print(str(e))
396
397    def init_engine(self, url):
398        """Initializes the SQLAlchemy engine.
399
400        Parameters
401        ----------
402        url : str
403            The URL for the database.
404
405        Returns
406        -------
407        sqlalchemy.engine.base.Engine
408            The SQLAlchemy engine.
409
410        """
411        if not url or url == "None" or url == "False":
412            directory = os.getcwd()
413
414            if not os.path.isdir(directory + "/db"):
415                os.mkdir(directory + "/db")
416
417            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
418
419        if url[0:6] == "sqlite":
420            self.type = "sqlite"
421        else:
422            self.type = "normal"
423
424        if url[0:6] == "sqlite":
425            engine = create_engine(url, echo=False)
426            self.chunks_count = 50
427
428        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
429            # postgresql
430            self.chunks_count = 50000
431            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
432
433        return engine  # poolclass=NullPool
434
435    def __enter__(self):
436        """Returns the object."""
437        return self
438
439    def get_dict_by_classes(
440        self,
441        classes,
442        ion_type,
443        nominal_mzs,
444        ion_charge,
445        molecular_search_settings,
446        adducts=None,
447    ):
448        """Returns a dictionary of molecular formulas.
449
450        Parameters
451        ----------
452        classes : list
453            The list of classes.
454        ion_type : str
455            The ion type.
456        nominal_mzs : list
457            The list of nominal m/z values.
458        ion_charge : int
459            The ion charge.
460        molecular_search_settings : MolecularFormulaSearchSettings
461            The molecular formula search settings.
462        adducts : list, optional
463            The list of adducts. Default is None.
464
465        Returns
466        -------
467        dict
468            The dictionary of molecular formulas.
469
470        Notes
471        -----
472        Known issue, when using SQLite:
473        if the number of classes and nominal_m/zs are higher than 999 the query will fail
474        Solution: use postgres or split query
475        """
476        verbose = molecular_search_settings.verbose_processing
477
478        def query_normal(class_list, len_adduct):
479            """query for normal database
480
481            Parameters
482            ----------
483            class_list : list
484                The list of classes.
485            len_adduct : int
486                The length of the adduct.
487
488            Returns
489            -------
490            sqlalchemy.orm.query.Query
491                The query.
492            """
493            base_query = (
494                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
495                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
496                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
497            )
498
499            return base_query.filter(
500                and_(
501                    HeteroAtoms.name.in_(class_list),
502                    and_(
503                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
504                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
505                        and_(
506                            (
507                                (
508                                    CarbonHydrogen.h
509                                    + HeteroAtoms.halogens_count
510                                    - len_adduct
511                                )
512                                / CarbonHydrogen.c
513                            )
514                            >= molecular_search_settings.min_hc_filter,
515                            (
516                                (
517                                    CarbonHydrogen.h
518                                    + HeteroAtoms.halogens_count
519                                    - len_adduct
520                                )
521                                / CarbonHydrogen.c
522                            )
523                            <= molecular_search_settings.max_hc_filter,
524                            CarbonHydrogen.C
525                            >= molecular_search_settings.usedAtoms.get("C")[0],
526                            CarbonHydrogen.c
527                            <= molecular_search_settings.usedAtoms.get("C")[1],
528                            CarbonHydrogen.h
529                            >= molecular_search_settings.usedAtoms.get("H")[0],
530                            CarbonHydrogen.h
531                            <= molecular_search_settings.usedAtoms.get("H")[1],
532                        ),
533                    ),
534                )
535            )
536
537        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
538            """add molecular formula to dict
539
540            Parameters
541            ----------
542            formulas : sqlalchemy.orm.query.Query
543                The query.
544            ion_type : str
545                The ion type.
546            ion_charge : int
547                The ion charge.
548            adduct_atom : str, optional
549                The adduct atom. Default is None.
550
551            Returns
552            -------
553            dict
554                The dictionary of molecular formulas.
555
556            """
557            "organize data by heteroatom classes"
558            dict_res = {}
559
560            def nominal_mass_by_ion_type(formula_obj):
561                if ion_type == Labels.protonated_de_ion:
562                    return int(formula_obj._protonated_mz(ion_charge))
563
564                elif ion_type == Labels.radical_ion:
565                    return int(formula_obj._radical_mz(ion_charge))
566
567                elif ion_type == Labels.adduct_ion and adduct_atom:
568                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
569            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose):
570                nominal_mz = nominal_mass_by_ion_type(formula_obj)
571
572                if self.type != "normal":
573                    if not nominal_mz in nominal_mzs:
574                        continue
575                classe = classe_obj.name
576
577                # classe_str = formula.classe_string
578
579                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
580
581                formula_dict = formula_obj.to_dict()
582
583                if formula_dict.get("O"):
584                    if (
585                        formula_dict.get("O") / formula_dict.get("C")
586                        >= molecular_search_settings.max_oc_filter
587                    ):
588                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
589                        continue
590                    elif (
591                        formula_dict.get("O") / formula_dict.get("C")
592                        <= molecular_search_settings.min_oc_filter
593                    ):
594                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
595                        continue
596                    # if formula_dict.get("P"):
597
598                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
599
600                    #        continue
601
602                if classe in dict_res.keys():
603                    if nominal_mz in dict_res[classe].keys():
604                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
605
606                    else:
607                        dict_res.get(classe)[nominal_mz] = [formula_obj]
608
609                else:
610                    dict_res[classe] = {nominal_mz: [formula_obj]}
611
612            return dict_res
613
614        len_adducts = 0
615        if ion_type == Labels.adduct_ion:
616            len_adducts = 1
617
618        query = query_normal(classes, len_adducts)
619
620        if ion_type == Labels.protonated_de_ion:
621            if self.type == "normal":
622                query = query.filter(
623                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
624                        nominal_mzs
625                    )
626                )
627
628            return add_dict_formula(query, ion_type, ion_charge)
629
630        if ion_type == Labels.radical_ion:
631            if self.type == "normal":
632                query = query.filter(
633                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
634                        nominal_mzs
635                    )
636                )
637            return add_dict_formula(query, ion_type, ion_charge)
638
639        if ion_type == Labels.adduct_ion:
640            dict_res = {}
641            if adducts:
642                for atom in adducts:
643                    if self.type == "normal":
644                        query = query.filter(
645                            func.floor(
646                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
647                            ).in_(nominal_mzs)
648                        )
649                    dict_res[atom] = add_dict_formula(
650                        query, ion_type, ion_charge, adduct_atom=atom
651                    )
652                return dict_res
653        # dump all objs to memory
654        self.session.expunge_all()
655
656    def check_entry(self, classe, ion_type, molecular_search_settings):
657        """Checks if a molecular formula is in the database.
658
659        Parameters
660        ----------
661        classe : str
662            The class of the molecular formula.
663        ion_type : str
664            The ion type.
665        molecular_search_settings : MolecularFormulaSearchSettings
666            The molecular formula search settings.
667
668        Returns
669        -------
670        sqlalchemy.orm.query.Query
671            The query.
672        """
673        #  get all classes, ion_type, ion charge as str add to a dict or list
674        #  then check if class in database
675        has_class = self.session.query(
676            exists().where((MolecularFormulaLink.classe == classe))
677        )
678
679        return has_class
680
681    def get_all_classes(self):
682        """Returns a list of all classes in the database."""
683        query = self.session.query(
684            MolecularFormulaLink.classe.distinct().label("classe")
685        )
686
687        return query.all()
688
689    def get_all(
690        self,
691    ):
692        """Returns a list of all molecular formulas in the database."""
693        mol_formulas = self.session.query(MolecularFormulaLink).all()
694
695        return mol_formulas
696
697    def delete_entry(self, row):
698        """Deletes a molecular formula from the database."""
699        try:
700            self.session.delete(row)
701            self.session.commit()
702
703        except SQLAlchemyError as e:
704            self.session.rollback()
705            print(str(e))
706
707    def purge(self, cls):
708        """Deletes all molecular formulas from the database.
709
710        Notes
711        -------
712        Careful, this will delete the entire database table
713
714        """
715        self.session.query(cls).delete()
716        self.session.commit()
717
718    def clear_data(self):
719        """Clears the database."""
720        meta = Base.metadata
721        for table in reversed(meta.sorted_tables):
722            print("Clear table %s" % table)
723            self.session.execute(table.delete())
724        self.session.commit()
725
726    def close(self, commit=True):
727        """Closes the database.
728
729        Parameters
730        ----------
731        commit : bool, optional
732            Whether to commit the session. Default is True.
733        """
734        # make sure the dbconnection gets closed
735
736        if commit:
737            self.commit()
738        self.session.close()
739        self.engine.dispose()
740
741    def add_engine_pidguard(self, engine):
742        """Adds multiprocessing guards.
743
744        Forces a connection to be reconnected if it is detected
745        as having been shared to a sub-process.
746
747        Parameters
748        ----------
749        engine : sqlalchemy.engine.base.Engine
750            The SQLAlchemy engine.
751
752        """
753        import os
754        import warnings
755
756        @event.listens_for(engine, "connect")
757        def connect(dbapi_connection, connection_record):
758            """Forces a connection to be reconnected if it is detected
759
760            Parameters
761            ----------
762            dbapi_connection : sqlalchemy.engine.base.Engine
763                The SQLAlchemy engine.
764            connection_record : sqlalchemy.engine.base.Engine
765                The SQLAlchemy engine.
766            """
767            connection_record.info["pid"] = os.getpid()
768
769        @event.listens_for(engine, "checkout")
770        def checkout(dbapi_connection, connection_record, connection_proxy):
771            """Forces a connection to be reconnected if it is detected
772
773            Parameters
774            ----------
775            dbapi_connection : sqlalchemy.engine.base.Engine
776                The SQLAlchemy engine.
777            connection_record : sqlalchemy.engine.base.Engine
778                The SQLAlchemy engine.
779            connection_proxy : sqlalchemy.engine.base.Engine
780                The SQLAlchemy engine.
781
782            Raises
783            ------
784            exc.DisconnectionError
785                If the connection record belongs to a different process.
786
787            """
788            pid = os.getpid()
789            if connection_record.info["pid"] != pid:
790                # substitute log.debug() or similar here as desired
791                warnings.warn(
792                    "Parent process %(orig)s forked (%(newproc)s) with an open "
793                    "database connection, "
794                    "which is being discarded and recreated."
795                    % {"newproc": pid, "orig": connection_record.info["pid"]}
796                )
797                connection_record.connection = connection_proxy.connection = None
798                raise exc.DisconnectionError(
799                    "Connection record belongs to pid %s, "
800                    "attempting to check out in pid %s"
801                    % (connection_record.info["pid"], pid)
802                )
803
804
805if __name__ == "__main__":
806    sql = MolForm_SQL(url="sqlite:///")
807
808    dict_data = {"name": '{"O": 12}'}
809    dict_data2 = {"name": '{"O": 13}'}
810    hetero_obj = HeteroAtoms(**dict_data)
811    hetero_obj2 = HeteroAtoms(**dict_data2)
812    sql.session.add(hetero_obj)
813    sql.session.add(hetero_obj2)
814
815    print(sql.session.query(HeteroAtoms).all())
816    # molecular_search_settings = MolecularFormulaSearchSettings()
817    # sql = MolForm_SQL()
818    # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
819    # query = sql.get_by_classe('{"O": 12}', molecular_search_settings).filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
820    # classes = ['{"O": 12}']*1
821    # for i, classe in enumerate(classes):
822    # query = sql.get_by_classe(classe, molecular_search_settings)
823    # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
824    # for i in query.filter(MolecularFormulaLink.mass < 250):
825
826    #    print(i._radical_mz(-1), i._protonated_mz(-1), i._adduct_mz(+2, "Na"), i.mass, i.to_dict(), i.formula_string)
827    #
class Base:

The base class of the class hierarchy.

When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.

Base(**kwargs)
1185def _declarative_constructor(self, **kwargs):
1186    """A simple constructor that allows initialization from kwargs.
1187
1188    Sets attributes on the constructed instance using the names and
1189    values in ``kwargs``.
1190
1191    Only keys that are present as
1192    attributes of the instance's class are allowed. These could be,
1193    for example, any mapped columns or relationships.
1194    """
1195    cls_ = type(self)
1196    for k in kwargs:
1197        if not hasattr(cls_, k):
1198            raise TypeError(
1199                "%r is an invalid keyword argument for %s" % (k, cls_.__name__)
1200            )
1201        setattr(self, k, kwargs[k])

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

registry = <sqlalchemy.orm.decl_api.registry object>
metadata = MetaData()
class HeteroAtoms(corems.molecular_id.factory.molecularSQL.Base):
35class HeteroAtoms(Base):
36    """HeteroAtoms class for the heteroAtoms table in the SQLite database.
37
38    Attributes
39    ----------
40    id : int
41        The primary key for the table.
42    name : str
43        The name of the heteroAtoms class.
44    halogensCount : int
45        The number of halogens in the heteroAtoms class.
46    carbonHydrogen : relationship
47        The relationship to the carbonHydrogen table.
48
49    Methods
50    -------
51    * __repr__()
52        Returns the string representation of the object.
53    * to_dict()
54        Returns the heteroAtoms class as a dictionary.
55    * halogens_count()
56        Returns the number of halogens as a float.
57
58
59    """
60
61    __tablename__ = "heteroAtoms"
62
63    id = Column(Integer, primary_key=True, unique=True, nullable=False)
64
65    name = Column(String, unique=True, nullable=False)
66
67    halogensCount = Column(Integer, unique=False, nullable=False)
68
69    carbonHydrogen = relationship(
70        "CarbonHydrogen", secondary="molecularformula", viewonly=True
71    )
72
73    def __repr__(self):
74        return "<HeteroAtoms Model {} class {}>".format(self.id, self.name)
75
76    @hybrid_property
77    def halogens_count(cls):
78        """Returns the number of halogens as a float."""
79        return cls.halogensCount.cast(Float)
80
81    def to_dict(self):
82        """Returns the heteroAtoms class as a dictionary."""
83        return json.loads(self.name)

HeteroAtoms class for the heteroAtoms table in the SQLite database.

Attributes
  • id (int): The primary key for the table.
  • name (str): The name of the heteroAtoms class.
  • halogensCount (int): The number of halogens in the heteroAtoms class.
  • carbonHydrogen (relationship): The relationship to the carbonHydrogen table.
Methods
  • __repr__() Returns the string representation of the object.
  • to_dict() Returns the heteroAtoms class as a dictionary.
  • halogens_count() Returns the number of halogens as a float.
HeteroAtoms(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

id
name
halogensCount
carbonHydrogen
halogens_count

Returns the number of halogens as a float.

def to_dict(self):
81    def to_dict(self):
82        """Returns the heteroAtoms class as a dictionary."""
83        return json.loads(self.name)

Returns the heteroAtoms class as a dictionary.

Inherited Members
Base
registry
metadata
class CarbonHydrogen(corems.molecular_id.factory.molecularSQL.Base):
 86class CarbonHydrogen(Base):
 87    """CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
 88
 89    Attributes
 90    ----------
 91    id : int
 92        The primary key for the table.
 93    C : int
 94        The number of carbon atoms.
 95    H : int
 96        The number of hydrogen atoms.
 97    heteroAtoms : relationship
 98        The relationship to the heteroAtoms table.
 99
100    Methods
101    -------
102    * __repr__()
103        Returns the string representation of the object.
104    * mass()
105        Returns the mass of the carbonHydrogen class as a float.
106    * c()
107        Returns the number of carbon atoms as a float.
108    * h()
109        Returns the number of hydrogen atoms as a float.
110    * dbe()
111        Returns the double bond equivalent as a float.
112
113    """
114
115    __tablename__ = "carbonHydrogen"
116    __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),)
117
118    id = Column(Integer, primary_key=True, unique=True, nullable=False)
119
120    C = Column(Integer, nullable=False)
121
122    H = Column(Integer, nullable=False)
123
124    heteroAtoms = relationship(
125        "HeteroAtoms", secondary="molecularformula", viewonly=True
126    )
127
128    def __repr__(self):
129        """Returns the string representation of the object."""
130        return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H)
131
132    @property
133    def mass(self):
134        """Returns the mass of the carbonHydrogen class as a float."""
135        return (self.C * Atoms.atomic_masses.get("C")) + (
136            self.H * Atoms.atomic_masses.get("H")
137        )
138
139    @hybrid_property
140    def c(cls):
141        """Returns the number of carbon atoms as a float."""
142        return cls.C.cast(Float)
143
144    @hybrid_property
145    def h(cls):
146        """Returns the number of hydrogen atoms as a float."""
147        return cls.H.cast(Float)
148
149    @hybrid_property
150    def dbe(cls):
151        """Returns the double bond equivalent as a float."""
152        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
153        return float(cls.C) - float(cls.H / 2) + 1

CarbonHydrogen class for the carbonHydrogen table in the SQLite database.

Attributes
  • id (int): The primary key for the table.
  • C (int): The number of carbon atoms.
  • H (int): The number of hydrogen atoms.
  • heteroAtoms (relationship): The relationship to the heteroAtoms table.
Methods
  • __repr__() Returns the string representation of the object.
  • mass() Returns the mass of the carbonHydrogen class as a float.
  • c() Returns the number of carbon atoms as a float.
  • h() Returns the number of hydrogen atoms as a float.
  • dbe() Returns the double bond equivalent as a float.
CarbonHydrogen(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.

id
C
H
heteroAtoms
mass

Returns the mass of the carbonHydrogen class as a float.

c

Returns the number of carbon atoms as a float.

h

Returns the number of hydrogen atoms as a float.

dbe

Returns the double bond equivalent as a float.

Inherited Members
Base
registry
metadata
class MolForm_SQL:
292class MolForm_SQL:
293    """MolForm_SQL class for the SQLite database.
294
295    Attributes
296    ----------
297    engine : sqlalchemy.engine.base.Engine
298        The SQLAlchemy engine.
299    session : sqlalchemy.orm.session.Session
300        The SQLAlchemy session.
301    type : str
302        The type of database.
303    chunks_count : int
304        The number of chunks to use when querying the database.
305
306    Methods
307    -------
308    * __init__(url=None, echo=False)
309        Initializes the database.
310    * __exit__(exc_type, exc_val, exc_tb)
311        Closes the database.
312    * initiate_database(url, database_name)
313        Creates the database.
314    * commit()
315        Commits the session.
316    * init_engine(url)
317        Initializes the SQLAlchemy engine.
318    * __enter__()
319
320    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
321        Returns a dictionary of molecular formulas.
322    * check_entry(classe, ion_type, molecular_search_settings)
323        Checks if a molecular formula is in the database.
324    * get_all_classes()
325        Returns a list of all classes in the database.
326    * get_all()
327        Returns a list of all molecular formulas in the database.
328    * delete_entry(row)
329        Deletes a molecular formula from the database.
330    * purge(cls)
331        Deletes all molecular formulas from the database.
332    * clear_data()
333        Clears the database.
334    * close(commit=True)
335        Closes the database.
336    * add_engine_pidguard(engine)
337        Adds multiprocessing guards.
338
339    """
340
341    def __init__(self, url=None, echo=False):
342        self.engine = self.init_engine(url)
343
344        self.add_engine_pidguard(self.engine)
345
346        session_factory = sessionmaker(bind=self.engine)
347
348        Session = scoped_session(session_factory)
349
350        self.session = session_factory()
351
352        Base.metadata.create_all(self.engine)
353
354        self.session.commit()
355
356    def __exit__(self, exc_type, exc_val, exc_tb):
357        """Closes the database.
358
359        Parameters
360        ----------
361        exc_type : str
362            The exception type.
363        exc_val : str
364            The exception value.
365        exc_tb : str
366            The exception traceback.
367        """
368        # make sure the dbconnection gets closed
369
370        self.commit()
371        self.session.close()
372        self.engine.dispose()
373
374    def initiate_database(self, url, database_name):  # CREATION
375        """Creates the database.
376
377        Parameters
378        ----------
379        url : str
380            The URL for the database.
381        database_name : str
382            The name of the database.
383        """
384        engine = create_engine(url)
385        conn = engine.connect()
386        conn.execute("commit")
387        conn.execute("create database " + database_name)
388        conn.close()
389
390    def commit(self):
391        """Commits the session."""
392        try:
393            self.session.commit()
394        except SQLAlchemyError as e:
395            self.session.rollback()
396            print(str(e))
397
398    def init_engine(self, url):
399        """Initializes the SQLAlchemy engine.
400
401        Parameters
402        ----------
403        url : str
404            The URL for the database.
405
406        Returns
407        -------
408        sqlalchemy.engine.base.Engine
409            The SQLAlchemy engine.
410
411        """
412        if not url or url == "None" or url == "False":
413            directory = os.getcwd()
414
415            if not os.path.isdir(directory + "/db"):
416                os.mkdir(directory + "/db")
417
418            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
419
420        if url[0:6] == "sqlite":
421            self.type = "sqlite"
422        else:
423            self.type = "normal"
424
425        if url[0:6] == "sqlite":
426            engine = create_engine(url, echo=False)
427            self.chunks_count = 50
428
429        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
430            # postgresql
431            self.chunks_count = 50000
432            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
433
434        return engine  # poolclass=NullPool
435
436    def __enter__(self):
437        """Returns the object."""
438        return self
439
440    def get_dict_by_classes(
441        self,
442        classes,
443        ion_type,
444        nominal_mzs,
445        ion_charge,
446        molecular_search_settings,
447        adducts=None,
448    ):
449        """Returns a dictionary of molecular formulas.
450
451        Parameters
452        ----------
453        classes : list
454            The list of classes.
455        ion_type : str
456            The ion type.
457        nominal_mzs : list
458            The list of nominal m/z values.
459        ion_charge : int
460            The ion charge.
461        molecular_search_settings : MolecularFormulaSearchSettings
462            The molecular formula search settings.
463        adducts : list, optional
464            The list of adducts. Default is None.
465
466        Returns
467        -------
468        dict
469            The dictionary of molecular formulas.
470
471        Notes
472        -----
473        Known issue, when using SQLite:
474        if the number of classes and nominal_m/zs are higher than 999 the query will fail
475        Solution: use postgres or split query
476        """
477        verbose = molecular_search_settings.verbose_processing
478
479        def query_normal(class_list, len_adduct):
480            """query for normal database
481
482            Parameters
483            ----------
484            class_list : list
485                The list of classes.
486            len_adduct : int
487                The length of the adduct.
488
489            Returns
490            -------
491            sqlalchemy.orm.query.Query
492                The query.
493            """
494            base_query = (
495                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
496                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
497                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
498            )
499
500            return base_query.filter(
501                and_(
502                    HeteroAtoms.name.in_(class_list),
503                    and_(
504                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
505                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
506                        and_(
507                            (
508                                (
509                                    CarbonHydrogen.h
510                                    + HeteroAtoms.halogens_count
511                                    - len_adduct
512                                )
513                                / CarbonHydrogen.c
514                            )
515                            >= molecular_search_settings.min_hc_filter,
516                            (
517                                (
518                                    CarbonHydrogen.h
519                                    + HeteroAtoms.halogens_count
520                                    - len_adduct
521                                )
522                                / CarbonHydrogen.c
523                            )
524                            <= molecular_search_settings.max_hc_filter,
525                            CarbonHydrogen.C
526                            >= molecular_search_settings.usedAtoms.get("C")[0],
527                            CarbonHydrogen.c
528                            <= molecular_search_settings.usedAtoms.get("C")[1],
529                            CarbonHydrogen.h
530                            >= molecular_search_settings.usedAtoms.get("H")[0],
531                            CarbonHydrogen.h
532                            <= molecular_search_settings.usedAtoms.get("H")[1],
533                        ),
534                    ),
535                )
536            )
537
538        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
539            """add molecular formula to dict
540
541            Parameters
542            ----------
543            formulas : sqlalchemy.orm.query.Query
544                The query.
545            ion_type : str
546                The ion type.
547            ion_charge : int
548                The ion charge.
549            adduct_atom : str, optional
550                The adduct atom. Default is None.
551
552            Returns
553            -------
554            dict
555                The dictionary of molecular formulas.
556
557            """
558            "organize data by heteroatom classes"
559            dict_res = {}
560
561            def nominal_mass_by_ion_type(formula_obj):
562                if ion_type == Labels.protonated_de_ion:
563                    return int(formula_obj._protonated_mz(ion_charge))
564
565                elif ion_type == Labels.radical_ion:
566                    return int(formula_obj._radical_mz(ion_charge))
567
568                elif ion_type == Labels.adduct_ion and adduct_atom:
569                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
570            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose):
571                nominal_mz = nominal_mass_by_ion_type(formula_obj)
572
573                if self.type != "normal":
574                    if not nominal_mz in nominal_mzs:
575                        continue
576                classe = classe_obj.name
577
578                # classe_str = formula.classe_string
579
580                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
581
582                formula_dict = formula_obj.to_dict()
583
584                if formula_dict.get("O"):
585                    if (
586                        formula_dict.get("O") / formula_dict.get("C")
587                        >= molecular_search_settings.max_oc_filter
588                    ):
589                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
590                        continue
591                    elif (
592                        formula_dict.get("O") / formula_dict.get("C")
593                        <= molecular_search_settings.min_oc_filter
594                    ):
595                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
596                        continue
597                    # if formula_dict.get("P"):
598
599                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
600
601                    #        continue
602
603                if classe in dict_res.keys():
604                    if nominal_mz in dict_res[classe].keys():
605                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
606
607                    else:
608                        dict_res.get(classe)[nominal_mz] = [formula_obj]
609
610                else:
611                    dict_res[classe] = {nominal_mz: [formula_obj]}
612
613            return dict_res
614
615        len_adducts = 0
616        if ion_type == Labels.adduct_ion:
617            len_adducts = 1
618
619        query = query_normal(classes, len_adducts)
620
621        if ion_type == Labels.protonated_de_ion:
622            if self.type == "normal":
623                query = query.filter(
624                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
625                        nominal_mzs
626                    )
627                )
628
629            return add_dict_formula(query, ion_type, ion_charge)
630
631        if ion_type == Labels.radical_ion:
632            if self.type == "normal":
633                query = query.filter(
634                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
635                        nominal_mzs
636                    )
637                )
638            return add_dict_formula(query, ion_type, ion_charge)
639
640        if ion_type == Labels.adduct_ion:
641            dict_res = {}
642            if adducts:
643                for atom in adducts:
644                    if self.type == "normal":
645                        query = query.filter(
646                            func.floor(
647                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
648                            ).in_(nominal_mzs)
649                        )
650                    dict_res[atom] = add_dict_formula(
651                        query, ion_type, ion_charge, adduct_atom=atom
652                    )
653                return dict_res
654        # dump all objs to memory
655        self.session.expunge_all()
656
657    def check_entry(self, classe, ion_type, molecular_search_settings):
658        """Checks if a molecular formula is in the database.
659
660        Parameters
661        ----------
662        classe : str
663            The class of the molecular formula.
664        ion_type : str
665            The ion type.
666        molecular_search_settings : MolecularFormulaSearchSettings
667            The molecular formula search settings.
668
669        Returns
670        -------
671        sqlalchemy.orm.query.Query
672            The query.
673        """
674        #  get all classes, ion_type, ion charge as str add to a dict or list
675        #  then check if class in database
676        has_class = self.session.query(
677            exists().where((MolecularFormulaLink.classe == classe))
678        )
679
680        return has_class
681
682    def get_all_classes(self):
683        """Returns a list of all classes in the database."""
684        query = self.session.query(
685            MolecularFormulaLink.classe.distinct().label("classe")
686        )
687
688        return query.all()
689
690    def get_all(
691        self,
692    ):
693        """Returns a list of all molecular formulas in the database."""
694        mol_formulas = self.session.query(MolecularFormulaLink).all()
695
696        return mol_formulas
697
698    def delete_entry(self, row):
699        """Deletes a molecular formula from the database."""
700        try:
701            self.session.delete(row)
702            self.session.commit()
703
704        except SQLAlchemyError as e:
705            self.session.rollback()
706            print(str(e))
707
708    def purge(self, cls):
709        """Deletes all molecular formulas from the database.
710
711        Notes
712        -------
713        Careful, this will delete the entire database table
714
715        """
716        self.session.query(cls).delete()
717        self.session.commit()
718
719    def clear_data(self):
720        """Clears the database."""
721        meta = Base.metadata
722        for table in reversed(meta.sorted_tables):
723            print("Clear table %s" % table)
724            self.session.execute(table.delete())
725        self.session.commit()
726
727    def close(self, commit=True):
728        """Closes the database.
729
730        Parameters
731        ----------
732        commit : bool, optional
733            Whether to commit the session. Default is True.
734        """
735        # make sure the dbconnection gets closed
736
737        if commit:
738            self.commit()
739        self.session.close()
740        self.engine.dispose()
741
742    def add_engine_pidguard(self, engine):
743        """Adds multiprocessing guards.
744
745        Forces a connection to be reconnected if it is detected
746        as having been shared to a sub-process.
747
748        Parameters
749        ----------
750        engine : sqlalchemy.engine.base.Engine
751            The SQLAlchemy engine.
752
753        """
754        import os
755        import warnings
756
757        @event.listens_for(engine, "connect")
758        def connect(dbapi_connection, connection_record):
759            """Forces a connection to be reconnected if it is detected
760
761            Parameters
762            ----------
763            dbapi_connection : sqlalchemy.engine.base.Engine
764                The SQLAlchemy engine.
765            connection_record : sqlalchemy.engine.base.Engine
766                The SQLAlchemy engine.
767            """
768            connection_record.info["pid"] = os.getpid()
769
770        @event.listens_for(engine, "checkout")
771        def checkout(dbapi_connection, connection_record, connection_proxy):
772            """Forces a connection to be reconnected if it is detected
773
774            Parameters
775            ----------
776            dbapi_connection : sqlalchemy.engine.base.Engine
777                The SQLAlchemy engine.
778            connection_record : sqlalchemy.engine.base.Engine
779                The SQLAlchemy engine.
780            connection_proxy : sqlalchemy.engine.base.Engine
781                The SQLAlchemy engine.
782
783            Raises
784            ------
785            exc.DisconnectionError
786                If the connection record belongs to a different process.
787
788            """
789            pid = os.getpid()
790            if connection_record.info["pid"] != pid:
791                # substitute log.debug() or similar here as desired
792                warnings.warn(
793                    "Parent process %(orig)s forked (%(newproc)s) with an open "
794                    "database connection, "
795                    "which is being discarded and recreated."
796                    % {"newproc": pid, "orig": connection_record.info["pid"]}
797                )
798                connection_record.connection = connection_proxy.connection = None
799                raise exc.DisconnectionError(
800                    "Connection record belongs to pid %s, "
801                    "attempting to check out in pid %s"
802                    % (connection_record.info["pid"], pid)
803                )

MolForm_SQL class for the SQLite database.

Attributes
  • engine (sqlalchemy.engine.base.Engine): The SQLAlchemy engine.
  • session (sqlalchemy.orm.session.Session): The SQLAlchemy session.
  • type (str): The type of database.
  • chunks_count (int): The number of chunks to use when querying the database.
Methods
  • __init__(url=None, echo=False) Initializes the database.
  • __exit__(exc_type, exc_val, exc_tb) Closes the database.
  • initiate_database(url, database_name) Creates the database.
  • commit() Commits the session.
  • init_engine(url) Initializes the SQLAlchemy engine.
  • __enter__()

  • get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None) Returns a dictionary of molecular formulas.

  • check_entry(classe, ion_type, molecular_search_settings) Checks if a molecular formula is in the database.
  • get_all_classes() Returns a list of all classes in the database.
  • get_all() Returns a list of all molecular formulas in the database.
  • delete_entry(row) Deletes a molecular formula from the database.
  • purge(cls) Deletes all molecular formulas from the database.
  • clear_data() Clears the database.
  • close(commit=True) Closes the database.
  • add_engine_pidguard(engine) Adds multiprocessing guards.
MolForm_SQL(url=None, echo=False)
341    def __init__(self, url=None, echo=False):
342        self.engine = self.init_engine(url)
343
344        self.add_engine_pidguard(self.engine)
345
346        session_factory = sessionmaker(bind=self.engine)
347
348        Session = scoped_session(session_factory)
349
350        self.session = session_factory()
351
352        Base.metadata.create_all(self.engine)
353
354        self.session.commit()
engine
session
def initiate_database(self, url, database_name):
374    def initiate_database(self, url, database_name):  # CREATION
375        """Creates the database.
376
377        Parameters
378        ----------
379        url : str
380            The URL for the database.
381        database_name : str
382            The name of the database.
383        """
384        engine = create_engine(url)
385        conn = engine.connect()
386        conn.execute("commit")
387        conn.execute("create database " + database_name)
388        conn.close()

Creates the database.

Parameters
  • url (str): The URL for the database.
  • database_name (str): The name of the database.
def commit(self):
390    def commit(self):
391        """Commits the session."""
392        try:
393            self.session.commit()
394        except SQLAlchemyError as e:
395            self.session.rollback()
396            print(str(e))

Commits the session.

def init_engine(self, url):
398    def init_engine(self, url):
399        """Initializes the SQLAlchemy engine.
400
401        Parameters
402        ----------
403        url : str
404            The URL for the database.
405
406        Returns
407        -------
408        sqlalchemy.engine.base.Engine
409            The SQLAlchemy engine.
410
411        """
412        if not url or url == "None" or url == "False":
413            directory = os.getcwd()
414
415            if not os.path.isdir(directory + "/db"):
416                os.mkdir(directory + "/db")
417
418            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
419
420        if url[0:6] == "sqlite":
421            self.type = "sqlite"
422        else:
423            self.type = "normal"
424
425        if url[0:6] == "sqlite":
426            engine = create_engine(url, echo=False)
427            self.chunks_count = 50
428
429        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
430            # postgresql
431            self.chunks_count = 50000
432            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
433
434        return engine  # poolclass=NullPool

Initializes the SQLAlchemy engine.

Parameters
  • url (str): The URL for the database.
Returns
  • sqlalchemy.engine.base.Engine: The SQLAlchemy engine.
def get_dict_by_classes( self, classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None):
440    def get_dict_by_classes(
441        self,
442        classes,
443        ion_type,
444        nominal_mzs,
445        ion_charge,
446        molecular_search_settings,
447        adducts=None,
448    ):
449        """Returns a dictionary of molecular formulas.
450
451        Parameters
452        ----------
453        classes : list
454            The list of classes.
455        ion_type : str
456            The ion type.
457        nominal_mzs : list
458            The list of nominal m/z values.
459        ion_charge : int
460            The ion charge.
461        molecular_search_settings : MolecularFormulaSearchSettings
462            The molecular formula search settings.
463        adducts : list, optional
464            The list of adducts. Default is None.
465
466        Returns
467        -------
468        dict
469            The dictionary of molecular formulas.
470
471        Notes
472        -----
473        Known issue, when using SQLite:
474        if the number of classes and nominal_m/zs are higher than 999 the query will fail
475        Solution: use postgres or split query
476        """
477        verbose = molecular_search_settings.verbose_processing
478
479        def query_normal(class_list, len_adduct):
480            """query for normal database
481
482            Parameters
483            ----------
484            class_list : list
485                The list of classes.
486            len_adduct : int
487                The length of the adduct.
488
489            Returns
490            -------
491            sqlalchemy.orm.query.Query
492                The query.
493            """
494            base_query = (
495                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
496                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
497                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
498            )
499
500            return base_query.filter(
501                and_(
502                    HeteroAtoms.name.in_(class_list),
503                    and_(
504                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
505                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
506                        and_(
507                            (
508                                (
509                                    CarbonHydrogen.h
510                                    + HeteroAtoms.halogens_count
511                                    - len_adduct
512                                )
513                                / CarbonHydrogen.c
514                            )
515                            >= molecular_search_settings.min_hc_filter,
516                            (
517                                (
518                                    CarbonHydrogen.h
519                                    + HeteroAtoms.halogens_count
520                                    - len_adduct
521                                )
522                                / CarbonHydrogen.c
523                            )
524                            <= molecular_search_settings.max_hc_filter,
525                            CarbonHydrogen.C
526                            >= molecular_search_settings.usedAtoms.get("C")[0],
527                            CarbonHydrogen.c
528                            <= molecular_search_settings.usedAtoms.get("C")[1],
529                            CarbonHydrogen.h
530                            >= molecular_search_settings.usedAtoms.get("H")[0],
531                            CarbonHydrogen.h
532                            <= molecular_search_settings.usedAtoms.get("H")[1],
533                        ),
534                    ),
535                )
536            )
537
538        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
539            """add molecular formula to dict
540
541            Parameters
542            ----------
543            formulas : sqlalchemy.orm.query.Query
544                The query.
545            ion_type : str
546                The ion type.
547            ion_charge : int
548                The ion charge.
549            adduct_atom : str, optional
550                The adduct atom. Default is None.
551
552            Returns
553            -------
554            dict
555                The dictionary of molecular formulas.
556
557            """
558            "organize data by heteroatom classes"
559            dict_res = {}
560
561            def nominal_mass_by_ion_type(formula_obj):
562                if ion_type == Labels.protonated_de_ion:
563                    return int(formula_obj._protonated_mz(ion_charge))
564
565                elif ion_type == Labels.radical_ion:
566                    return int(formula_obj._radical_mz(ion_charge))
567
568                elif ion_type == Labels.adduct_ion and adduct_atom:
569                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
570            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose):
571                nominal_mz = nominal_mass_by_ion_type(formula_obj)
572
573                if self.type != "normal":
574                    if not nominal_mz in nominal_mzs:
575                        continue
576                classe = classe_obj.name
577
578                # classe_str = formula.classe_string
579
580                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
581
582                formula_dict = formula_obj.to_dict()
583
584                if formula_dict.get("O"):
585                    if (
586                        formula_dict.get("O") / formula_dict.get("C")
587                        >= molecular_search_settings.max_oc_filter
588                    ):
589                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
590                        continue
591                    elif (
592                        formula_dict.get("O") / formula_dict.get("C")
593                        <= molecular_search_settings.min_oc_filter
594                    ):
595                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
596                        continue
597                    # if formula_dict.get("P"):
598
599                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
600
601                    #        continue
602
603                if classe in dict_res.keys():
604                    if nominal_mz in dict_res[classe].keys():
605                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
606
607                    else:
608                        dict_res.get(classe)[nominal_mz] = [formula_obj]
609
610                else:
611                    dict_res[classe] = {nominal_mz: [formula_obj]}
612
613            return dict_res
614
615        len_adducts = 0
616        if ion_type == Labels.adduct_ion:
617            len_adducts = 1
618
619        query = query_normal(classes, len_adducts)
620
621        if ion_type == Labels.protonated_de_ion:
622            if self.type == "normal":
623                query = query.filter(
624                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
625                        nominal_mzs
626                    )
627                )
628
629            return add_dict_formula(query, ion_type, ion_charge)
630
631        if ion_type == Labels.radical_ion:
632            if self.type == "normal":
633                query = query.filter(
634                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
635                        nominal_mzs
636                    )
637                )
638            return add_dict_formula(query, ion_type, ion_charge)
639
640        if ion_type == Labels.adduct_ion:
641            dict_res = {}
642            if adducts:
643                for atom in adducts:
644                    if self.type == "normal":
645                        query = query.filter(
646                            func.floor(
647                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
648                            ).in_(nominal_mzs)
649                        )
650                    dict_res[atom] = add_dict_formula(
651                        query, ion_type, ion_charge, adduct_atom=atom
652                    )
653                return dict_res
654        # dump all objs to memory
655        self.session.expunge_all()

Returns a dictionary of molecular formulas.

Parameters
  • classes (list): The list of classes.
  • ion_type (str): The ion type.
  • nominal_mzs (list): The list of nominal m/z values.
  • ion_charge (int): The ion charge.
  • molecular_search_settings (MolecularFormulaSearchSettings): The molecular formula search settings.
  • adducts (list, optional): The list of adducts. Default is None.
Returns
  • dict: The dictionary of molecular formulas.
Notes

Known issue, when using SQLite: if the number of classes and nominal_m/zs are higher than 999 the query will fail Solution: use postgres or split query

def check_entry(self, classe, ion_type, molecular_search_settings):
657    def check_entry(self, classe, ion_type, molecular_search_settings):
658        """Checks if a molecular formula is in the database.
659
660        Parameters
661        ----------
662        classe : str
663            The class of the molecular formula.
664        ion_type : str
665            The ion type.
666        molecular_search_settings : MolecularFormulaSearchSettings
667            The molecular formula search settings.
668
669        Returns
670        -------
671        sqlalchemy.orm.query.Query
672            The query.
673        """
674        #  get all classes, ion_type, ion charge as str add to a dict or list
675        #  then check if class in database
676        has_class = self.session.query(
677            exists().where((MolecularFormulaLink.classe == classe))
678        )
679
680        return has_class

Checks if a molecular formula is in the database.

Parameters
  • classe (str): The class of the molecular formula.
  • ion_type (str): The ion type.
  • molecular_search_settings (MolecularFormulaSearchSettings): The molecular formula search settings.
Returns
  • sqlalchemy.orm.query.Query: The query.
def get_all_classes(self):
682    def get_all_classes(self):
683        """Returns a list of all classes in the database."""
684        query = self.session.query(
685            MolecularFormulaLink.classe.distinct().label("classe")
686        )
687
688        return query.all()

Returns a list of all classes in the database.

def get_all(self):
690    def get_all(
691        self,
692    ):
693        """Returns a list of all molecular formulas in the database."""
694        mol_formulas = self.session.query(MolecularFormulaLink).all()
695
696        return mol_formulas

Returns a list of all molecular formulas in the database.

def delete_entry(self, row):
698    def delete_entry(self, row):
699        """Deletes a molecular formula from the database."""
700        try:
701            self.session.delete(row)
702            self.session.commit()
703
704        except SQLAlchemyError as e:
705            self.session.rollback()
706            print(str(e))

Deletes a molecular formula from the database.

def purge(self, cls):
708    def purge(self, cls):
709        """Deletes all molecular formulas from the database.
710
711        Notes
712        -------
713        Careful, this will delete the entire database table
714
715        """
716        self.session.query(cls).delete()
717        self.session.commit()

Deletes all molecular formulas from the database.

Notes

Careful, this will delete the entire database table

def clear_data(self):
719    def clear_data(self):
720        """Clears the database."""
721        meta = Base.metadata
722        for table in reversed(meta.sorted_tables):
723            print("Clear table %s" % table)
724            self.session.execute(table.delete())
725        self.session.commit()

Clears the database.

def close(self, commit=True):
727    def close(self, commit=True):
728        """Closes the database.
729
730        Parameters
731        ----------
732        commit : bool, optional
733            Whether to commit the session. Default is True.
734        """
735        # make sure the dbconnection gets closed
736
737        if commit:
738            self.commit()
739        self.session.close()
740        self.engine.dispose()

Closes the database.

Parameters
  • commit (bool, optional): Whether to commit the session. Default is True.
def add_engine_pidguard(self, engine):
742    def add_engine_pidguard(self, engine):
743        """Adds multiprocessing guards.
744
745        Forces a connection to be reconnected if it is detected
746        as having been shared to a sub-process.
747
748        Parameters
749        ----------
750        engine : sqlalchemy.engine.base.Engine
751            The SQLAlchemy engine.
752
753        """
754        import os
755        import warnings
756
757        @event.listens_for(engine, "connect")
758        def connect(dbapi_connection, connection_record):
759            """Forces a connection to be reconnected if it is detected
760
761            Parameters
762            ----------
763            dbapi_connection : sqlalchemy.engine.base.Engine
764                The SQLAlchemy engine.
765            connection_record : sqlalchemy.engine.base.Engine
766                The SQLAlchemy engine.
767            """
768            connection_record.info["pid"] = os.getpid()
769
770        @event.listens_for(engine, "checkout")
771        def checkout(dbapi_connection, connection_record, connection_proxy):
772            """Forces a connection to be reconnected if it is detected
773
774            Parameters
775            ----------
776            dbapi_connection : sqlalchemy.engine.base.Engine
777                The SQLAlchemy engine.
778            connection_record : sqlalchemy.engine.base.Engine
779                The SQLAlchemy engine.
780            connection_proxy : sqlalchemy.engine.base.Engine
781                The SQLAlchemy engine.
782
783            Raises
784            ------
785            exc.DisconnectionError
786                If the connection record belongs to a different process.
787
788            """
789            pid = os.getpid()
790            if connection_record.info["pid"] != pid:
791                # substitute log.debug() or similar here as desired
792                warnings.warn(
793                    "Parent process %(orig)s forked (%(newproc)s) with an open "
794                    "database connection, "
795                    "which is being discarded and recreated."
796                    % {"newproc": pid, "orig": connection_record.info["pid"]}
797                )
798                connection_record.connection = connection_proxy.connection = None
799                raise exc.DisconnectionError(
800                    "Connection record belongs to pid %s, "
801                    "attempting to check out in pid %s"
802                    % (connection_record.info["pid"], pid)
803                )

Adds multiprocessing guards.

Forces a connection to be reconnected if it is detected as having been shared to a sub-process.

Parameters
  • engine (sqlalchemy.engine.base.Engine): The SQLAlchemy engine.