corems.molecular_id.factory.molecularSQL
1import sys 2 3import json 4import os 5 6import tqdm 7from sqlalchemy import ( 8 Column, 9 Float, 10 ForeignKey, 11 Integer, 12 String, 13 and_, 14 create_engine, 15 event, 16 exc, 17 func, 18) 19from sqlalchemy.exc import SQLAlchemyError 20from sqlalchemy.ext.associationproxy import association_proxy 21from sqlalchemy.ext.declarative import declarative_base 22from sqlalchemy.ext.hybrid import hybrid_method, hybrid_property 23from sqlalchemy.orm import backref, relationship 24from sqlalchemy.orm.scoping import scoped_session 25from sqlalchemy.orm.session import sessionmaker 26from sqlalchemy.sql.operators import exists 27from sqlalchemy.sql.schema import UniqueConstraint 28 29from corems.encapsulation.constant import Atoms, Labels 30 31Base = declarative_base() 32 33 34class HeteroAtoms(Base): 35 """HeteroAtoms class for the heteroAtoms table in the SQLite database. 36 37 Attributes 38 ---------- 39 id : int 40 The primary key for the table. 41 name : str 42 The name of the heteroAtoms class. 43 halogensCount : int 44 The number of halogens in the heteroAtoms class. 45 carbonHydrogen : relationship 46 The relationship to the carbonHydrogen table. 47 48 Methods 49 ------- 50 * __repr__() 51 Returns the string representation of the object. 52 * to_dict() 53 Returns the heteroAtoms class as a dictionary. 54 * halogens_count() 55 Returns the number of halogens as a float. 56 57 58 """ 59 60 __tablename__ = "heteroAtoms" 61 62 id = Column(Integer, primary_key=True, unique=True, nullable=False) 63 64 name = Column(String, unique=True, nullable=False) 65 66 halogensCount = Column(Integer, unique=False, nullable=False) 67 68 carbonHydrogen = relationship( 69 "CarbonHydrogen", secondary="molecularformula", viewonly=True 70 ) 71 72 def __repr__(self): 73 return "<HeteroAtoms Model {} class {}>".format(self.id, self.name) 74 75 @hybrid_property 76 def halogens_count(cls): 77 """Returns the number of halogens as a float.""" 78 return cls.halogensCount.cast(Float) 79 80 def to_dict(self): 81 """Returns the heteroAtoms class as a dictionary.""" 82 return json.loads(self.name) 83 84 85class CarbonHydrogen(Base): 86 """CarbonHydrogen class for the carbonHydrogen table in the SQLite database. 87 88 Attributes 89 ---------- 90 id : int 91 The primary key for the table. 92 C : int 93 The number of carbon atoms. 94 H : int 95 The number of hydrogen atoms. 96 heteroAtoms : relationship 97 The relationship to the heteroAtoms table. 98 99 Methods 100 ------- 101 * __repr__() 102 Returns the string representation of the object. 103 * mass() 104 Returns the mass of the carbonHydrogen class as a float. 105 * c() 106 Returns the number of carbon atoms as a float. 107 * h() 108 Returns the number of hydrogen atoms as a float. 109 * dbe() 110 Returns the double bond equivalent as a float. 111 112 """ 113 114 __tablename__ = "carbonHydrogen" 115 __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),) 116 117 id = Column(Integer, primary_key=True, unique=True, nullable=False) 118 119 C = Column(Integer, nullable=False) 120 121 H = Column(Integer, nullable=False) 122 123 heteroAtoms = relationship( 124 "HeteroAtoms", secondary="molecularformula", viewonly=True 125 ) 126 127 def __repr__(self): 128 """Returns the string representation of the object.""" 129 return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H) 130 131 @property 132 def mass(self): 133 """Returns the mass of the carbonHydrogen class as a float.""" 134 return (self.C * Atoms.atomic_masses.get("C")) + ( 135 self.H * Atoms.atomic_masses.get("H") 136 ) 137 138 @hybrid_property 139 def c(cls): 140 """Returns the number of carbon atoms as a float.""" 141 return cls.C.cast(Float) 142 143 @hybrid_property 144 def h(cls): 145 """Returns the number of hydrogen atoms as a float.""" 146 return cls.H.cast(Float) 147 148 @hybrid_property 149 def dbe(cls): 150 """Returns the double bond equivalent as a float.""" 151 # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1 152 return float(cls.C) - float(cls.H / 2) + 1 153 154 155# 264888.88 ms 156class MolecularFormulaLink(Base): 157 """MolecularFormulaLink class for the molecularformula table in the SQLite database. 158 159 Attributes 160 ---------- 161 heteroAtoms_id : int 162 The foreign key for the heteroAtoms table. 163 carbonHydrogen_id : int 164 The foreign key for the carbonHydrogen table. 165 mass : float 166 The mass of the molecular formula. 167 DBE : float 168 The double bond equivalent of the molecular formula. 169 carbonHydrogen : relationship 170 The relationship to the carbonHydrogen table. 171 heteroAtoms : relationship 172 The relationship to the heteroAtoms table. 173 C : association_proxy 174 The association proxy for the carbonHydrogen table. 175 H : association_proxy 176 The association proxy for the carbonHydrogen table. 177 classe : association_proxy 178 The association proxy for the heteroAtoms table. 179 180 Methods 181 ------- 182 * __repr__() 183 Returns the string representation of the object. 184 * to_dict() 185 Returns the molecular formula as a dictionary. 186 * formula_string() 187 Returns the molecular formula as a string. 188 * classe_string() 189 Returns the heteroAtoms class as a string. 190 * _adduct_mz(ion_charge, adduct_atom) 191 Returns the m/z of the adduct ion as a float. 192 * _protonated_mz(ion_charge) 193 Returns the m/z of the protonated ion as a float. 194 * _radical_mz(ion_charge) 195 Returns the m/z of the radical ion as a float. 196 197 198 199 """ 200 201 __tablename__ = "molecularformula" 202 __table_args__ = ( 203 UniqueConstraint("heteroAtoms_id", "carbonHydrogen_id", name="unique_molform"), 204 ) 205 206 # id = Column(Integer, primary_key=True, 207 # unique=True, 208 # nullable=False) 209 210 heteroAtoms_id = Column(Integer, ForeignKey("heteroAtoms.id"), primary_key=True) 211 212 carbonHydrogen_id = Column( 213 Integer, ForeignKey("carbonHydrogen.id"), primary_key=True 214 ) 215 216 mass = Column(Float) 217 218 DBE = Column(Float) 219 220 carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc")) 221 222 heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc")) 223 224 C = association_proxy("carbonHydrogen", "C") 225 226 H = association_proxy("carbonHydrogen", "H") 227 228 classe = association_proxy("heteroAtoms", "name") 229 230 def to_dict(self): 231 """Returns the molecular formula as a dictionary. 232 233 Returns 234 ------- 235 dict 236 The molecular formula as a dictionary. 237 """ 238 carbon = {"C": self.C, "H": self.H} 239 classe = json.loads(self.classe) 240 if self.classe == '{"HC": ""}': 241 return {**carbon} 242 else: 243 return {**carbon, **classe} 244 245 @property 246 def formula_string(self): 247 """Returns the molecular formula as a string.""" 248 class_dict = self.to_dict() 249 class_str = " ".join( 250 [atom + str(class_dict[atom]) for atom in class_dict.keys()] 251 ) 252 return class_str.strip() 253 254 @property 255 def classe_string(self): 256 """Returns the heteroAtoms class as a string.""" 257 class_dict = json.loads(self.classe) 258 class_str = " ".join( 259 [atom + str(class_dict[atom]) for atom in class_dict.keys()] 260 ) 261 return class_str.strip() 262 263 @hybrid_method 264 def _adduct_mz(self, ion_charge, adduct_atom): 265 """Returns the m/z of the adduct ion as a float.""" 266 return ( 267 self.mass 268 + (Atoms.atomic_masses.get(adduct_atom)) 269 + (ion_charge * -1 * Atoms.electron_mass) 270 ) / abs(ion_charge) 271 272 @hybrid_method 273 def _protonated_mz(self, ion_charge): 274 """Returns the m/z of the protonated ion as a float.""" 275 return ( 276 self.mass 277 + (ion_charge * Atoms.atomic_masses.get("H")) 278 + (ion_charge * -1 * Atoms.electron_mass) 279 ) / abs(ion_charge) 280 281 @hybrid_method 282 def _radical_mz(self, ion_charge): 283 """Returns the m/z of the radical ion as a float.""" 284 return (self.mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge) 285 286 def __repr__(self): 287 """Returns the string representation of the object.""" 288 return "<MolecularFormulaLink Model {}>".format(self.formula_string) 289 290 291class MolForm_SQL: 292 """MolForm_SQL class for the SQLite database. 293 294 Attributes 295 ---------- 296 engine : sqlalchemy.engine.base.Engine 297 The SQLAlchemy engine. 298 session : sqlalchemy.orm.session.Session 299 The SQLAlchemy session. 300 type : str 301 The type of database. 302 chunks_count : int 303 The number of chunks to use when querying the database. 304 305 Methods 306 ------- 307 * __init__(url=None, echo=False) 308 Initializes the database. 309 * __exit__(exc_type, exc_val, exc_tb) 310 Closes the database. 311 * initiate_database(url, database_name) 312 Creates the database. 313 * commit() 314 Commits the session. 315 * init_engine(url) 316 Initializes the SQLAlchemy engine. 317 * __enter__() 318 319 * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None) 320 Returns a dictionary of molecular formulas. 321 * check_entry(classe, ion_type, molecular_search_settings) 322 Checks if a molecular formula is in the database. 323 * get_all_classes() 324 Returns a list of all classes in the database. 325 * get_all() 326 Returns a list of all molecular formulas in the database. 327 * delete_entry(row) 328 Deletes a molecular formula from the database. 329 * purge(cls) 330 Deletes all molecular formulas from the database. 331 * clear_data() 332 Clears the database. 333 * close(commit=True) 334 Closes the database. 335 * add_engine_pidguard(engine) 336 Adds multiprocessing guards. 337 338 """ 339 340 def __init__(self, url=None, echo=False): 341 self.engine = self.init_engine(url) 342 343 self.add_engine_pidguard(self.engine) 344 345 session_factory = sessionmaker(bind=self.engine) 346 347 Session = scoped_session(session_factory) 348 349 self.session = session_factory() 350 351 Base.metadata.create_all(self.engine) 352 353 self.session.commit() 354 355 def __exit__(self, exc_type, exc_val, exc_tb): 356 """Closes the database. 357 358 Parameters 359 ---------- 360 exc_type : str 361 The exception type. 362 exc_val : str 363 The exception value. 364 exc_tb : str 365 The exception traceback. 366 """ 367 # make sure the dbconnection gets closed 368 369 self.commit() 370 self.session.close() 371 self.engine.dispose() 372 373 def initiate_database(self, url, database_name): # CREATION 374 """Creates the database. 375 376 Parameters 377 ---------- 378 url : str 379 The URL for the database. 380 database_name : str 381 The name of the database. 382 """ 383 engine = create_engine(url) 384 conn = engine.connect() 385 conn.execute("commit") 386 conn.execute("create database " + database_name) 387 conn.close() 388 389 def commit(self): 390 """Commits the session.""" 391 try: 392 self.session.commit() 393 except SQLAlchemyError as e: 394 self.session.rollback() 395 print(str(e)) 396 397 def init_engine(self, url): 398 """Initializes the SQLAlchemy engine. 399 400 Parameters 401 ---------- 402 url : str 403 The URL for the database. 404 405 Returns 406 ------- 407 sqlalchemy.engine.base.Engine 408 The SQLAlchemy engine. 409 410 """ 411 if not url or url == "None" or url == "False": 412 directory = os.getcwd() 413 414 if not os.path.isdir(directory + "/db"): 415 os.mkdir(directory + "/db") 416 417 url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory) 418 419 if url[0:6] == "sqlite": 420 self.type = "sqlite" 421 else: 422 self.type = "normal" 423 424 if url[0:6] == "sqlite": 425 engine = create_engine(url, echo=False) 426 self.chunks_count = 50 427 428 elif url[0:10] == "postgresql" or url[0:8] == "postgres": 429 # postgresql 430 self.chunks_count = 50000 431 engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT") 432 433 return engine # poolclass=NullPool 434 435 def __enter__(self): 436 """Returns the object.""" 437 return self 438 439 def get_dict_by_classes( 440 self, 441 classes, 442 ion_type, 443 nominal_mzs, 444 ion_charge, 445 molecular_search_settings, 446 adducts=None, 447 ): 448 """Returns a dictionary of molecular formulas. 449 450 Parameters 451 ---------- 452 classes : list 453 The list of classes. 454 ion_type : str 455 The ion type. 456 nominal_mzs : list 457 The list of nominal m/z values. 458 ion_charge : int 459 The ion charge. 460 molecular_search_settings : MolecularFormulaSearchSettings 461 The molecular formula search settings. 462 adducts : list, optional 463 The list of adducts. Default is None. 464 465 Returns 466 ------- 467 dict 468 The dictionary of molecular formulas. 469 470 Notes 471 ----- 472 Known issue, when using SQLite: 473 if the number of classes and nominal_m/zs are higher than 999 the query will fail 474 Solution: use postgres or split query 475 """ 476 verbose = molecular_search_settings.verbose_processing 477 478 def query_normal(class_list, len_adduct): 479 """query for normal database 480 481 Parameters 482 ---------- 483 class_list : list 484 The list of classes. 485 len_adduct : int 486 The length of the adduct. 487 488 Returns 489 ------- 490 sqlalchemy.orm.query.Query 491 The query. 492 """ 493 base_query = ( 494 self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms) 495 .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id) 496 .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id) 497 ) 498 499 return base_query.filter( 500 and_( 501 HeteroAtoms.name.in_(class_list), 502 and_( 503 MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 504 MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 505 and_( 506 ( 507 ( 508 CarbonHydrogen.h 509 + HeteroAtoms.halogens_count 510 - len_adduct 511 ) 512 / CarbonHydrogen.c 513 ) 514 >= molecular_search_settings.min_hc_filter, 515 ( 516 ( 517 CarbonHydrogen.h 518 + HeteroAtoms.halogens_count 519 - len_adduct 520 ) 521 / CarbonHydrogen.c 522 ) 523 <= molecular_search_settings.max_hc_filter, 524 CarbonHydrogen.C 525 >= molecular_search_settings.usedAtoms.get("C")[0], 526 CarbonHydrogen.c 527 <= molecular_search_settings.usedAtoms.get("C")[1], 528 CarbonHydrogen.h 529 >= molecular_search_settings.usedAtoms.get("H")[0], 530 CarbonHydrogen.h 531 <= molecular_search_settings.usedAtoms.get("H")[1], 532 ), 533 ), 534 ) 535 ) 536 537 def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None): 538 """add molecular formula to dict 539 540 Parameters 541 ---------- 542 formulas : sqlalchemy.orm.query.Query 543 The query. 544 ion_type : str 545 The ion type. 546 ion_charge : int 547 The ion charge. 548 adduct_atom : str, optional 549 The adduct atom. Default is None. 550 551 Returns 552 ------- 553 dict 554 The dictionary of molecular formulas. 555 556 """ 557 "organize data by heteroatom classes" 558 dict_res = {} 559 560 def nominal_mass_by_ion_type(formula_obj): 561 if ion_type == Labels.protonated_de_ion: 562 return int(formula_obj._protonated_mz(ion_charge)) 563 564 elif ion_type == Labels.radical_ion: 565 return int(formula_obj._radical_mz(ion_charge)) 566 567 elif ion_type == Labels.adduct_ion and adduct_atom: 568 return int(formula_obj._adduct_mz(ion_charge, adduct_atom)) 569 for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose): 570 nominal_mz = nominal_mass_by_ion_type(formula_obj) 571 572 if self.type != "normal": 573 if not nominal_mz in nominal_mzs: 574 continue 575 classe = classe_obj.name 576 577 # classe_str = formula.classe_string 578 579 # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str) 580 581 formula_dict = formula_obj.to_dict() 582 583 if formula_dict.get("O"): 584 if ( 585 formula_dict.get("O") / formula_dict.get("C") 586 >= molecular_search_settings.max_oc_filter 587 ): 588 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter) 589 continue 590 elif ( 591 formula_dict.get("O") / formula_dict.get("C") 592 <= molecular_search_settings.min_oc_filter 593 ): 594 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter) 595 continue 596 # if formula_dict.get("P"): 597 598 # if not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter: 599 600 # continue 601 602 if classe in dict_res.keys(): 603 if nominal_mz in dict_res[classe].keys(): 604 dict_res.get(classe).get(nominal_mz).append(formula_obj) 605 606 else: 607 dict_res.get(classe)[nominal_mz] = [formula_obj] 608 609 else: 610 dict_res[classe] = {nominal_mz: [formula_obj]} 611 612 return dict_res 613 614 len_adducts = 0 615 if ion_type == Labels.adduct_ion: 616 len_adducts = 1 617 618 query = query_normal(classes, len_adducts) 619 620 if ion_type == Labels.protonated_de_ion: 621 if self.type == "normal": 622 query = query.filter( 623 func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_( 624 nominal_mzs 625 ) 626 ) 627 628 return add_dict_formula(query, ion_type, ion_charge) 629 630 if ion_type == Labels.radical_ion: 631 if self.type == "normal": 632 query = query.filter( 633 func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_( 634 nominal_mzs 635 ) 636 ) 637 return add_dict_formula(query, ion_type, ion_charge) 638 639 if ion_type == Labels.adduct_ion: 640 dict_res = {} 641 if adducts: 642 for atom in adducts: 643 if self.type == "normal": 644 query = query.filter( 645 func.floor( 646 MolecularFormulaLink._adduct_mz(ion_charge, atom) 647 ).in_(nominal_mzs) 648 ) 649 dict_res[atom] = add_dict_formula( 650 query, ion_type, ion_charge, adduct_atom=atom 651 ) 652 return dict_res 653 # dump all objs to memory 654 self.session.expunge_all() 655 656 def check_entry(self, classe, ion_type, molecular_search_settings): 657 """Checks if a molecular formula is in the database. 658 659 Parameters 660 ---------- 661 classe : str 662 The class of the molecular formula. 663 ion_type : str 664 The ion type. 665 molecular_search_settings : MolecularFormulaSearchSettings 666 The molecular formula search settings. 667 668 Returns 669 ------- 670 sqlalchemy.orm.query.Query 671 The query. 672 """ 673 # get all classes, ion_type, ion charge as str add to a dict or list 674 # then check if class in database 675 has_class = self.session.query( 676 exists().where((MolecularFormulaLink.classe == classe)) 677 ) 678 679 return has_class 680 681 def get_all_classes(self): 682 """Returns a list of all classes in the database.""" 683 query = self.session.query( 684 MolecularFormulaLink.classe.distinct().label("classe") 685 ) 686 687 return query.all() 688 689 def get_all( 690 self, 691 ): 692 """Returns a list of all molecular formulas in the database.""" 693 mol_formulas = self.session.query(MolecularFormulaLink).all() 694 695 return mol_formulas 696 697 def delete_entry(self, row): 698 """Deletes a molecular formula from the database.""" 699 try: 700 self.session.delete(row) 701 self.session.commit() 702 703 except SQLAlchemyError as e: 704 self.session.rollback() 705 print(str(e)) 706 707 def purge(self, cls): 708 """Deletes all molecular formulas from the database. 709 710 Notes 711 ------- 712 Careful, this will delete the entire database table 713 714 """ 715 self.session.query(cls).delete() 716 self.session.commit() 717 718 def clear_data(self): 719 """Clears the database.""" 720 meta = Base.metadata 721 for table in reversed(meta.sorted_tables): 722 print("Clear table %s" % table) 723 self.session.execute(table.delete()) 724 self.session.commit() 725 726 def close(self, commit=True): 727 """Closes the database. 728 729 Parameters 730 ---------- 731 commit : bool, optional 732 Whether to commit the session. Default is True. 733 """ 734 # make sure the dbconnection gets closed 735 736 if commit: 737 self.commit() 738 self.session.close() 739 self.engine.dispose() 740 741 def add_engine_pidguard(self, engine): 742 """Adds multiprocessing guards. 743 744 Forces a connection to be reconnected if it is detected 745 as having been shared to a sub-process. 746 747 Parameters 748 ---------- 749 engine : sqlalchemy.engine.base.Engine 750 The SQLAlchemy engine. 751 752 """ 753 import os 754 import warnings 755 756 @event.listens_for(engine, "connect") 757 def connect(dbapi_connection, connection_record): 758 """Forces a connection to be reconnected if it is detected 759 760 Parameters 761 ---------- 762 dbapi_connection : sqlalchemy.engine.base.Engine 763 The SQLAlchemy engine. 764 connection_record : sqlalchemy.engine.base.Engine 765 The SQLAlchemy engine. 766 """ 767 connection_record.info["pid"] = os.getpid() 768 769 @event.listens_for(engine, "checkout") 770 def checkout(dbapi_connection, connection_record, connection_proxy): 771 """Forces a connection to be reconnected if it is detected 772 773 Parameters 774 ---------- 775 dbapi_connection : sqlalchemy.engine.base.Engine 776 The SQLAlchemy engine. 777 connection_record : sqlalchemy.engine.base.Engine 778 The SQLAlchemy engine. 779 connection_proxy : sqlalchemy.engine.base.Engine 780 The SQLAlchemy engine. 781 782 Raises 783 ------ 784 exc.DisconnectionError 785 If the connection record belongs to a different process. 786 787 """ 788 pid = os.getpid() 789 if connection_record.info["pid"] != pid: 790 # substitute log.debug() or similar here as desired 791 warnings.warn( 792 "Parent process %(orig)s forked (%(newproc)s) with an open " 793 "database connection, " 794 "which is being discarded and recreated." 795 % {"newproc": pid, "orig": connection_record.info["pid"]} 796 ) 797 connection_record.connection = connection_proxy.connection = None 798 raise exc.DisconnectionError( 799 "Connection record belongs to pid %s, " 800 "attempting to check out in pid %s" 801 % (connection_record.info["pid"], pid) 802 ) 803 804 805if __name__ == "__main__": 806 sql = MolForm_SQL(url="sqlite:///") 807 808 dict_data = {"name": '{"O": 12}'} 809 dict_data2 = {"name": '{"O": 13}'} 810 hetero_obj = HeteroAtoms(**dict_data) 811 hetero_obj2 = HeteroAtoms(**dict_data2) 812 sql.session.add(hetero_obj) 813 sql.session.add(hetero_obj2) 814 815 print(sql.session.query(HeteroAtoms).all()) 816 # molecular_search_settings = MolecularFormulaSearchSettings() 817 # sql = MolForm_SQL() 818 # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250) 819 # query = sql.get_by_classe('{"O": 12}', molecular_search_settings).filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250) 820 # classes = ['{"O": 12}']*1 821 # for i, classe in enumerate(classes): 822 # query = sql.get_by_classe(classe, molecular_search_settings) 823 # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250) 824 # for i in query.filter(MolecularFormulaLink.mass < 250): 825 826 # print(i._radical_mz(-1), i._protonated_mz(-1), i._adduct_mz(+2, "Na"), i.mass, i.to_dict(), i.formula_string) 827 #
The base class of the class hierarchy.
When called, it accepts no arguments and returns a new featureless instance that has no instance attributes and cannot be given any.
1185def _declarative_constructor(self, **kwargs): 1186 """A simple constructor that allows initialization from kwargs. 1187 1188 Sets attributes on the constructed instance using the names and 1189 values in ``kwargs``. 1190 1191 Only keys that are present as 1192 attributes of the instance's class are allowed. These could be, 1193 for example, any mapped columns or relationships. 1194 """ 1195 cls_ = type(self) 1196 for k in kwargs: 1197 if not hasattr(cls_, k): 1198 raise TypeError( 1199 "%r is an invalid keyword argument for %s" % (k, cls_.__name__) 1200 ) 1201 setattr(self, k, kwargs[k])
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and
values in kwargs
.
Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.
35class HeteroAtoms(Base): 36 """HeteroAtoms class for the heteroAtoms table in the SQLite database. 37 38 Attributes 39 ---------- 40 id : int 41 The primary key for the table. 42 name : str 43 The name of the heteroAtoms class. 44 halogensCount : int 45 The number of halogens in the heteroAtoms class. 46 carbonHydrogen : relationship 47 The relationship to the carbonHydrogen table. 48 49 Methods 50 ------- 51 * __repr__() 52 Returns the string representation of the object. 53 * to_dict() 54 Returns the heteroAtoms class as a dictionary. 55 * halogens_count() 56 Returns the number of halogens as a float. 57 58 59 """ 60 61 __tablename__ = "heteroAtoms" 62 63 id = Column(Integer, primary_key=True, unique=True, nullable=False) 64 65 name = Column(String, unique=True, nullable=False) 66 67 halogensCount = Column(Integer, unique=False, nullable=False) 68 69 carbonHydrogen = relationship( 70 "CarbonHydrogen", secondary="molecularformula", viewonly=True 71 ) 72 73 def __repr__(self): 74 return "<HeteroAtoms Model {} class {}>".format(self.id, self.name) 75 76 @hybrid_property 77 def halogens_count(cls): 78 """Returns the number of halogens as a float.""" 79 return cls.halogensCount.cast(Float) 80 81 def to_dict(self): 82 """Returns the heteroAtoms class as a dictionary.""" 83 return json.loads(self.name)
HeteroAtoms class for the heteroAtoms table in the SQLite database.
Attributes
- id (int): The primary key for the table.
- name (str): The name of the heteroAtoms class.
- halogensCount (int): The number of halogens in the heteroAtoms class.
- carbonHydrogen (relationship): The relationship to the carbonHydrogen table.
Methods
- __repr__() Returns the string representation of the object.
- to_dict() Returns the heteroAtoms class as a dictionary.
- halogens_count() Returns the number of halogens as a float.
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and
values in kwargs
.
Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.
86class CarbonHydrogen(Base): 87 """CarbonHydrogen class for the carbonHydrogen table in the SQLite database. 88 89 Attributes 90 ---------- 91 id : int 92 The primary key for the table. 93 C : int 94 The number of carbon atoms. 95 H : int 96 The number of hydrogen atoms. 97 heteroAtoms : relationship 98 The relationship to the heteroAtoms table. 99 100 Methods 101 ------- 102 * __repr__() 103 Returns the string representation of the object. 104 * mass() 105 Returns the mass of the carbonHydrogen class as a float. 106 * c() 107 Returns the number of carbon atoms as a float. 108 * h() 109 Returns the number of hydrogen atoms as a float. 110 * dbe() 111 Returns the double bond equivalent as a float. 112 113 """ 114 115 __tablename__ = "carbonHydrogen" 116 __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),) 117 118 id = Column(Integer, primary_key=True, unique=True, nullable=False) 119 120 C = Column(Integer, nullable=False) 121 122 H = Column(Integer, nullable=False) 123 124 heteroAtoms = relationship( 125 "HeteroAtoms", secondary="molecularformula", viewonly=True 126 ) 127 128 def __repr__(self): 129 """Returns the string representation of the object.""" 130 return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H) 131 132 @property 133 def mass(self): 134 """Returns the mass of the carbonHydrogen class as a float.""" 135 return (self.C * Atoms.atomic_masses.get("C")) + ( 136 self.H * Atoms.atomic_masses.get("H") 137 ) 138 139 @hybrid_property 140 def c(cls): 141 """Returns the number of carbon atoms as a float.""" 142 return cls.C.cast(Float) 143 144 @hybrid_property 145 def h(cls): 146 """Returns the number of hydrogen atoms as a float.""" 147 return cls.H.cast(Float) 148 149 @hybrid_property 150 def dbe(cls): 151 """Returns the double bond equivalent as a float.""" 152 # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1 153 return float(cls.C) - float(cls.H / 2) + 1
CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
Attributes
- id (int): The primary key for the table.
- C (int): The number of carbon atoms.
- H (int): The number of hydrogen atoms.
- heteroAtoms (relationship): The relationship to the heteroAtoms table.
Methods
- __repr__() Returns the string representation of the object.
- mass() Returns the mass of the carbonHydrogen class as a float.
- c() Returns the number of carbon atoms as a float.
- h() Returns the number of hydrogen atoms as a float.
- dbe() Returns the double bond equivalent as a float.
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and
values in kwargs
.
Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.
157class MolecularFormulaLink(Base): 158 """MolecularFormulaLink class for the molecularformula table in the SQLite database. 159 160 Attributes 161 ---------- 162 heteroAtoms_id : int 163 The foreign key for the heteroAtoms table. 164 carbonHydrogen_id : int 165 The foreign key for the carbonHydrogen table. 166 mass : float 167 The mass of the molecular formula. 168 DBE : float 169 The double bond equivalent of the molecular formula. 170 carbonHydrogen : relationship 171 The relationship to the carbonHydrogen table. 172 heteroAtoms : relationship 173 The relationship to the heteroAtoms table. 174 C : association_proxy 175 The association proxy for the carbonHydrogen table. 176 H : association_proxy 177 The association proxy for the carbonHydrogen table. 178 classe : association_proxy 179 The association proxy for the heteroAtoms table. 180 181 Methods 182 ------- 183 * __repr__() 184 Returns the string representation of the object. 185 * to_dict() 186 Returns the molecular formula as a dictionary. 187 * formula_string() 188 Returns the molecular formula as a string. 189 * classe_string() 190 Returns the heteroAtoms class as a string. 191 * _adduct_mz(ion_charge, adduct_atom) 192 Returns the m/z of the adduct ion as a float. 193 * _protonated_mz(ion_charge) 194 Returns the m/z of the protonated ion as a float. 195 * _radical_mz(ion_charge) 196 Returns the m/z of the radical ion as a float. 197 198 199 200 """ 201 202 __tablename__ = "molecularformula" 203 __table_args__ = ( 204 UniqueConstraint("heteroAtoms_id", "carbonHydrogen_id", name="unique_molform"), 205 ) 206 207 # id = Column(Integer, primary_key=True, 208 # unique=True, 209 # nullable=False) 210 211 heteroAtoms_id = Column(Integer, ForeignKey("heteroAtoms.id"), primary_key=True) 212 213 carbonHydrogen_id = Column( 214 Integer, ForeignKey("carbonHydrogen.id"), primary_key=True 215 ) 216 217 mass = Column(Float) 218 219 DBE = Column(Float) 220 221 carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc")) 222 223 heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc")) 224 225 C = association_proxy("carbonHydrogen", "C") 226 227 H = association_proxy("carbonHydrogen", "H") 228 229 classe = association_proxy("heteroAtoms", "name") 230 231 def to_dict(self): 232 """Returns the molecular formula as a dictionary. 233 234 Returns 235 ------- 236 dict 237 The molecular formula as a dictionary. 238 """ 239 carbon = {"C": self.C, "H": self.H} 240 classe = json.loads(self.classe) 241 if self.classe == '{"HC": ""}': 242 return {**carbon} 243 else: 244 return {**carbon, **classe} 245 246 @property 247 def formula_string(self): 248 """Returns the molecular formula as a string.""" 249 class_dict = self.to_dict() 250 class_str = " ".join( 251 [atom + str(class_dict[atom]) for atom in class_dict.keys()] 252 ) 253 return class_str.strip() 254 255 @property 256 def classe_string(self): 257 """Returns the heteroAtoms class as a string.""" 258 class_dict = json.loads(self.classe) 259 class_str = " ".join( 260 [atom + str(class_dict[atom]) for atom in class_dict.keys()] 261 ) 262 return class_str.strip() 263 264 @hybrid_method 265 def _adduct_mz(self, ion_charge, adduct_atom): 266 """Returns the m/z of the adduct ion as a float.""" 267 return ( 268 self.mass 269 + (Atoms.atomic_masses.get(adduct_atom)) 270 + (ion_charge * -1 * Atoms.electron_mass) 271 ) / abs(ion_charge) 272 273 @hybrid_method 274 def _protonated_mz(self, ion_charge): 275 """Returns the m/z of the protonated ion as a float.""" 276 return ( 277 self.mass 278 + (ion_charge * Atoms.atomic_masses.get("H")) 279 + (ion_charge * -1 * Atoms.electron_mass) 280 ) / abs(ion_charge) 281 282 @hybrid_method 283 def _radical_mz(self, ion_charge): 284 """Returns the m/z of the radical ion as a float.""" 285 return (self.mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge) 286 287 def __repr__(self): 288 """Returns the string representation of the object.""" 289 return "<MolecularFormulaLink Model {}>".format(self.formula_string)
MolecularFormulaLink class for the molecularformula table in the SQLite database.
Attributes
- heteroAtoms_id (int): The foreign key for the heteroAtoms table.
- carbonHydrogen_id (int): The foreign key for the carbonHydrogen table.
- mass (float): The mass of the molecular formula.
- DBE (float): The double bond equivalent of the molecular formula.
- carbonHydrogen (relationship): The relationship to the carbonHydrogen table.
- heteroAtoms (relationship): The relationship to the heteroAtoms table.
- C (association_proxy): The association proxy for the carbonHydrogen table.
- H (association_proxy): The association proxy for the carbonHydrogen table.
- classe (association_proxy): The association proxy for the heteroAtoms table.
Methods
- __repr__() Returns the string representation of the object.
- to_dict() Returns the molecular formula as a dictionary.
- formula_string() Returns the molecular formula as a string.
- classe_string() Returns the heteroAtoms class as a string.
- _adduct_mz(ion_charge, adduct_atom) Returns the m/z of the adduct ion as a float.
- _protonated_mz(ion_charge) Returns the m/z of the protonated ion as a float.
- _radical_mz(ion_charge) Returns the m/z of the radical ion as a float.
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and
values in kwargs
.
Only keys that are present as attributes of the instance's class are allowed. These could be, for example, any mapped columns or relationships.
231 def to_dict(self): 232 """Returns the molecular formula as a dictionary. 233 234 Returns 235 ------- 236 dict 237 The molecular formula as a dictionary. 238 """ 239 carbon = {"C": self.C, "H": self.H} 240 classe = json.loads(self.classe) 241 if self.classe == '{"HC": ""}': 242 return {**carbon} 243 else: 244 return {**carbon, **classe}
Returns the molecular formula as a dictionary.
Returns
- dict: The molecular formula as a dictionary.
292class MolForm_SQL: 293 """MolForm_SQL class for the SQLite database. 294 295 Attributes 296 ---------- 297 engine : sqlalchemy.engine.base.Engine 298 The SQLAlchemy engine. 299 session : sqlalchemy.orm.session.Session 300 The SQLAlchemy session. 301 type : str 302 The type of database. 303 chunks_count : int 304 The number of chunks to use when querying the database. 305 306 Methods 307 ------- 308 * __init__(url=None, echo=False) 309 Initializes the database. 310 * __exit__(exc_type, exc_val, exc_tb) 311 Closes the database. 312 * initiate_database(url, database_name) 313 Creates the database. 314 * commit() 315 Commits the session. 316 * init_engine(url) 317 Initializes the SQLAlchemy engine. 318 * __enter__() 319 320 * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None) 321 Returns a dictionary of molecular formulas. 322 * check_entry(classe, ion_type, molecular_search_settings) 323 Checks if a molecular formula is in the database. 324 * get_all_classes() 325 Returns a list of all classes in the database. 326 * get_all() 327 Returns a list of all molecular formulas in the database. 328 * delete_entry(row) 329 Deletes a molecular formula from the database. 330 * purge(cls) 331 Deletes all molecular formulas from the database. 332 * clear_data() 333 Clears the database. 334 * close(commit=True) 335 Closes the database. 336 * add_engine_pidguard(engine) 337 Adds multiprocessing guards. 338 339 """ 340 341 def __init__(self, url=None, echo=False): 342 self.engine = self.init_engine(url) 343 344 self.add_engine_pidguard(self.engine) 345 346 session_factory = sessionmaker(bind=self.engine) 347 348 Session = scoped_session(session_factory) 349 350 self.session = session_factory() 351 352 Base.metadata.create_all(self.engine) 353 354 self.session.commit() 355 356 def __exit__(self, exc_type, exc_val, exc_tb): 357 """Closes the database. 358 359 Parameters 360 ---------- 361 exc_type : str 362 The exception type. 363 exc_val : str 364 The exception value. 365 exc_tb : str 366 The exception traceback. 367 """ 368 # make sure the dbconnection gets closed 369 370 self.commit() 371 self.session.close() 372 self.engine.dispose() 373 374 def initiate_database(self, url, database_name): # CREATION 375 """Creates the database. 376 377 Parameters 378 ---------- 379 url : str 380 The URL for the database. 381 database_name : str 382 The name of the database. 383 """ 384 engine = create_engine(url) 385 conn = engine.connect() 386 conn.execute("commit") 387 conn.execute("create database " + database_name) 388 conn.close() 389 390 def commit(self): 391 """Commits the session.""" 392 try: 393 self.session.commit() 394 except SQLAlchemyError as e: 395 self.session.rollback() 396 print(str(e)) 397 398 def init_engine(self, url): 399 """Initializes the SQLAlchemy engine. 400 401 Parameters 402 ---------- 403 url : str 404 The URL for the database. 405 406 Returns 407 ------- 408 sqlalchemy.engine.base.Engine 409 The SQLAlchemy engine. 410 411 """ 412 if not url or url == "None" or url == "False": 413 directory = os.getcwd() 414 415 if not os.path.isdir(directory + "/db"): 416 os.mkdir(directory + "/db") 417 418 url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory) 419 420 if url[0:6] == "sqlite": 421 self.type = "sqlite" 422 else: 423 self.type = "normal" 424 425 if url[0:6] == "sqlite": 426 engine = create_engine(url, echo=False) 427 self.chunks_count = 50 428 429 elif url[0:10] == "postgresql" or url[0:8] == "postgres": 430 # postgresql 431 self.chunks_count = 50000 432 engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT") 433 434 return engine # poolclass=NullPool 435 436 def __enter__(self): 437 """Returns the object.""" 438 return self 439 440 def get_dict_by_classes( 441 self, 442 classes, 443 ion_type, 444 nominal_mzs, 445 ion_charge, 446 molecular_search_settings, 447 adducts=None, 448 ): 449 """Returns a dictionary of molecular formulas. 450 451 Parameters 452 ---------- 453 classes : list 454 The list of classes. 455 ion_type : str 456 The ion type. 457 nominal_mzs : list 458 The list of nominal m/z values. 459 ion_charge : int 460 The ion charge. 461 molecular_search_settings : MolecularFormulaSearchSettings 462 The molecular formula search settings. 463 adducts : list, optional 464 The list of adducts. Default is None. 465 466 Returns 467 ------- 468 dict 469 The dictionary of molecular formulas. 470 471 Notes 472 ----- 473 Known issue, when using SQLite: 474 if the number of classes and nominal_m/zs are higher than 999 the query will fail 475 Solution: use postgres or split query 476 """ 477 verbose = molecular_search_settings.verbose_processing 478 479 def query_normal(class_list, len_adduct): 480 """query for normal database 481 482 Parameters 483 ---------- 484 class_list : list 485 The list of classes. 486 len_adduct : int 487 The length of the adduct. 488 489 Returns 490 ------- 491 sqlalchemy.orm.query.Query 492 The query. 493 """ 494 base_query = ( 495 self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms) 496 .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id) 497 .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id) 498 ) 499 500 return base_query.filter( 501 and_( 502 HeteroAtoms.name.in_(class_list), 503 and_( 504 MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 505 MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 506 and_( 507 ( 508 ( 509 CarbonHydrogen.h 510 + HeteroAtoms.halogens_count 511 - len_adduct 512 ) 513 / CarbonHydrogen.c 514 ) 515 >= molecular_search_settings.min_hc_filter, 516 ( 517 ( 518 CarbonHydrogen.h 519 + HeteroAtoms.halogens_count 520 - len_adduct 521 ) 522 / CarbonHydrogen.c 523 ) 524 <= molecular_search_settings.max_hc_filter, 525 CarbonHydrogen.C 526 >= molecular_search_settings.usedAtoms.get("C")[0], 527 CarbonHydrogen.c 528 <= molecular_search_settings.usedAtoms.get("C")[1], 529 CarbonHydrogen.h 530 >= molecular_search_settings.usedAtoms.get("H")[0], 531 CarbonHydrogen.h 532 <= molecular_search_settings.usedAtoms.get("H")[1], 533 ), 534 ), 535 ) 536 ) 537 538 def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None): 539 """add molecular formula to dict 540 541 Parameters 542 ---------- 543 formulas : sqlalchemy.orm.query.Query 544 The query. 545 ion_type : str 546 The ion type. 547 ion_charge : int 548 The ion charge. 549 adduct_atom : str, optional 550 The adduct atom. Default is None. 551 552 Returns 553 ------- 554 dict 555 The dictionary of molecular formulas. 556 557 """ 558 "organize data by heteroatom classes" 559 dict_res = {} 560 561 def nominal_mass_by_ion_type(formula_obj): 562 if ion_type == Labels.protonated_de_ion: 563 return int(formula_obj._protonated_mz(ion_charge)) 564 565 elif ion_type == Labels.radical_ion: 566 return int(formula_obj._radical_mz(ion_charge)) 567 568 elif ion_type == Labels.adduct_ion and adduct_atom: 569 return int(formula_obj._adduct_mz(ion_charge, adduct_atom)) 570 for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose): 571 nominal_mz = nominal_mass_by_ion_type(formula_obj) 572 573 if self.type != "normal": 574 if not nominal_mz in nominal_mzs: 575 continue 576 classe = classe_obj.name 577 578 # classe_str = formula.classe_string 579 580 # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str) 581 582 formula_dict = formula_obj.to_dict() 583 584 if formula_dict.get("O"): 585 if ( 586 formula_dict.get("O") / formula_dict.get("C") 587 >= molecular_search_settings.max_oc_filter 588 ): 589 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter) 590 continue 591 elif ( 592 formula_dict.get("O") / formula_dict.get("C") 593 <= molecular_search_settings.min_oc_filter 594 ): 595 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter) 596 continue 597 # if formula_dict.get("P"): 598 599 # if not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter: 600 601 # continue 602 603 if classe in dict_res.keys(): 604 if nominal_mz in dict_res[classe].keys(): 605 dict_res.get(classe).get(nominal_mz).append(formula_obj) 606 607 else: 608 dict_res.get(classe)[nominal_mz] = [formula_obj] 609 610 else: 611 dict_res[classe] = {nominal_mz: [formula_obj]} 612 613 return dict_res 614 615 len_adducts = 0 616 if ion_type == Labels.adduct_ion: 617 len_adducts = 1 618 619 query = query_normal(classes, len_adducts) 620 621 if ion_type == Labels.protonated_de_ion: 622 if self.type == "normal": 623 query = query.filter( 624 func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_( 625 nominal_mzs 626 ) 627 ) 628 629 return add_dict_formula(query, ion_type, ion_charge) 630 631 if ion_type == Labels.radical_ion: 632 if self.type == "normal": 633 query = query.filter( 634 func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_( 635 nominal_mzs 636 ) 637 ) 638 return add_dict_formula(query, ion_type, ion_charge) 639 640 if ion_type == Labels.adduct_ion: 641 dict_res = {} 642 if adducts: 643 for atom in adducts: 644 if self.type == "normal": 645 query = query.filter( 646 func.floor( 647 MolecularFormulaLink._adduct_mz(ion_charge, atom) 648 ).in_(nominal_mzs) 649 ) 650 dict_res[atom] = add_dict_formula( 651 query, ion_type, ion_charge, adduct_atom=atom 652 ) 653 return dict_res 654 # dump all objs to memory 655 self.session.expunge_all() 656 657 def check_entry(self, classe, ion_type, molecular_search_settings): 658 """Checks if a molecular formula is in the database. 659 660 Parameters 661 ---------- 662 classe : str 663 The class of the molecular formula. 664 ion_type : str 665 The ion type. 666 molecular_search_settings : MolecularFormulaSearchSettings 667 The molecular formula search settings. 668 669 Returns 670 ------- 671 sqlalchemy.orm.query.Query 672 The query. 673 """ 674 # get all classes, ion_type, ion charge as str add to a dict or list 675 # then check if class in database 676 has_class = self.session.query( 677 exists().where((MolecularFormulaLink.classe == classe)) 678 ) 679 680 return has_class 681 682 def get_all_classes(self): 683 """Returns a list of all classes in the database.""" 684 query = self.session.query( 685 MolecularFormulaLink.classe.distinct().label("classe") 686 ) 687 688 return query.all() 689 690 def get_all( 691 self, 692 ): 693 """Returns a list of all molecular formulas in the database.""" 694 mol_formulas = self.session.query(MolecularFormulaLink).all() 695 696 return mol_formulas 697 698 def delete_entry(self, row): 699 """Deletes a molecular formula from the database.""" 700 try: 701 self.session.delete(row) 702 self.session.commit() 703 704 except SQLAlchemyError as e: 705 self.session.rollback() 706 print(str(e)) 707 708 def purge(self, cls): 709 """Deletes all molecular formulas from the database. 710 711 Notes 712 ------- 713 Careful, this will delete the entire database table 714 715 """ 716 self.session.query(cls).delete() 717 self.session.commit() 718 719 def clear_data(self): 720 """Clears the database.""" 721 meta = Base.metadata 722 for table in reversed(meta.sorted_tables): 723 print("Clear table %s" % table) 724 self.session.execute(table.delete()) 725 self.session.commit() 726 727 def close(self, commit=True): 728 """Closes the database. 729 730 Parameters 731 ---------- 732 commit : bool, optional 733 Whether to commit the session. Default is True. 734 """ 735 # make sure the dbconnection gets closed 736 737 if commit: 738 self.commit() 739 self.session.close() 740 self.engine.dispose() 741 742 def add_engine_pidguard(self, engine): 743 """Adds multiprocessing guards. 744 745 Forces a connection to be reconnected if it is detected 746 as having been shared to a sub-process. 747 748 Parameters 749 ---------- 750 engine : sqlalchemy.engine.base.Engine 751 The SQLAlchemy engine. 752 753 """ 754 import os 755 import warnings 756 757 @event.listens_for(engine, "connect") 758 def connect(dbapi_connection, connection_record): 759 """Forces a connection to be reconnected if it is detected 760 761 Parameters 762 ---------- 763 dbapi_connection : sqlalchemy.engine.base.Engine 764 The SQLAlchemy engine. 765 connection_record : sqlalchemy.engine.base.Engine 766 The SQLAlchemy engine. 767 """ 768 connection_record.info["pid"] = os.getpid() 769 770 @event.listens_for(engine, "checkout") 771 def checkout(dbapi_connection, connection_record, connection_proxy): 772 """Forces a connection to be reconnected if it is detected 773 774 Parameters 775 ---------- 776 dbapi_connection : sqlalchemy.engine.base.Engine 777 The SQLAlchemy engine. 778 connection_record : sqlalchemy.engine.base.Engine 779 The SQLAlchemy engine. 780 connection_proxy : sqlalchemy.engine.base.Engine 781 The SQLAlchemy engine. 782 783 Raises 784 ------ 785 exc.DisconnectionError 786 If the connection record belongs to a different process. 787 788 """ 789 pid = os.getpid() 790 if connection_record.info["pid"] != pid: 791 # substitute log.debug() or similar here as desired 792 warnings.warn( 793 "Parent process %(orig)s forked (%(newproc)s) with an open " 794 "database connection, " 795 "which is being discarded and recreated." 796 % {"newproc": pid, "orig": connection_record.info["pid"]} 797 ) 798 connection_record.connection = connection_proxy.connection = None 799 raise exc.DisconnectionError( 800 "Connection record belongs to pid %s, " 801 "attempting to check out in pid %s" 802 % (connection_record.info["pid"], pid) 803 )
MolForm_SQL class for the SQLite database.
Attributes
- engine (sqlalchemy.engine.base.Engine): The SQLAlchemy engine.
- session (sqlalchemy.orm.session.Session): The SQLAlchemy session.
- type (str): The type of database.
- chunks_count (int): The number of chunks to use when querying the database.
Methods
- __init__(url=None, echo=False) Initializes the database.
- __exit__(exc_type, exc_val, exc_tb) Closes the database.
- initiate_database(url, database_name) Creates the database.
- commit() Commits the session.
- init_engine(url) Initializes the SQLAlchemy engine.
__enter__()
get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None) Returns a dictionary of molecular formulas.
- check_entry(classe, ion_type, molecular_search_settings) Checks if a molecular formula is in the database.
- get_all_classes() Returns a list of all classes in the database.
- get_all() Returns a list of all molecular formulas in the database.
- delete_entry(row) Deletes a molecular formula from the database.
- purge(cls) Deletes all molecular formulas from the database.
- clear_data() Clears the database.
- close(commit=True) Closes the database.
- add_engine_pidguard(engine) Adds multiprocessing guards.
341 def __init__(self, url=None, echo=False): 342 self.engine = self.init_engine(url) 343 344 self.add_engine_pidguard(self.engine) 345 346 session_factory = sessionmaker(bind=self.engine) 347 348 Session = scoped_session(session_factory) 349 350 self.session = session_factory() 351 352 Base.metadata.create_all(self.engine) 353 354 self.session.commit()
374 def initiate_database(self, url, database_name): # CREATION 375 """Creates the database. 376 377 Parameters 378 ---------- 379 url : str 380 The URL for the database. 381 database_name : str 382 The name of the database. 383 """ 384 engine = create_engine(url) 385 conn = engine.connect() 386 conn.execute("commit") 387 conn.execute("create database " + database_name) 388 conn.close()
Creates the database.
Parameters
- url (str): The URL for the database.
- database_name (str): The name of the database.
390 def commit(self): 391 """Commits the session.""" 392 try: 393 self.session.commit() 394 except SQLAlchemyError as e: 395 self.session.rollback() 396 print(str(e))
Commits the session.
398 def init_engine(self, url): 399 """Initializes the SQLAlchemy engine. 400 401 Parameters 402 ---------- 403 url : str 404 The URL for the database. 405 406 Returns 407 ------- 408 sqlalchemy.engine.base.Engine 409 The SQLAlchemy engine. 410 411 """ 412 if not url or url == "None" or url == "False": 413 directory = os.getcwd() 414 415 if not os.path.isdir(directory + "/db"): 416 os.mkdir(directory + "/db") 417 418 url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory) 419 420 if url[0:6] == "sqlite": 421 self.type = "sqlite" 422 else: 423 self.type = "normal" 424 425 if url[0:6] == "sqlite": 426 engine = create_engine(url, echo=False) 427 self.chunks_count = 50 428 429 elif url[0:10] == "postgresql" or url[0:8] == "postgres": 430 # postgresql 431 self.chunks_count = 50000 432 engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT") 433 434 return engine # poolclass=NullPool
Initializes the SQLAlchemy engine.
Parameters
- url (str): The URL for the database.
Returns
- sqlalchemy.engine.base.Engine: The SQLAlchemy engine.
440 def get_dict_by_classes( 441 self, 442 classes, 443 ion_type, 444 nominal_mzs, 445 ion_charge, 446 molecular_search_settings, 447 adducts=None, 448 ): 449 """Returns a dictionary of molecular formulas. 450 451 Parameters 452 ---------- 453 classes : list 454 The list of classes. 455 ion_type : str 456 The ion type. 457 nominal_mzs : list 458 The list of nominal m/z values. 459 ion_charge : int 460 The ion charge. 461 molecular_search_settings : MolecularFormulaSearchSettings 462 The molecular formula search settings. 463 adducts : list, optional 464 The list of adducts. Default is None. 465 466 Returns 467 ------- 468 dict 469 The dictionary of molecular formulas. 470 471 Notes 472 ----- 473 Known issue, when using SQLite: 474 if the number of classes and nominal_m/zs are higher than 999 the query will fail 475 Solution: use postgres or split query 476 """ 477 verbose = molecular_search_settings.verbose_processing 478 479 def query_normal(class_list, len_adduct): 480 """query for normal database 481 482 Parameters 483 ---------- 484 class_list : list 485 The list of classes. 486 len_adduct : int 487 The length of the adduct. 488 489 Returns 490 ------- 491 sqlalchemy.orm.query.Query 492 The query. 493 """ 494 base_query = ( 495 self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms) 496 .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id) 497 .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id) 498 ) 499 500 return base_query.filter( 501 and_( 502 HeteroAtoms.name.in_(class_list), 503 and_( 504 MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 505 MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 506 and_( 507 ( 508 ( 509 CarbonHydrogen.h 510 + HeteroAtoms.halogens_count 511 - len_adduct 512 ) 513 / CarbonHydrogen.c 514 ) 515 >= molecular_search_settings.min_hc_filter, 516 ( 517 ( 518 CarbonHydrogen.h 519 + HeteroAtoms.halogens_count 520 - len_adduct 521 ) 522 / CarbonHydrogen.c 523 ) 524 <= molecular_search_settings.max_hc_filter, 525 CarbonHydrogen.C 526 >= molecular_search_settings.usedAtoms.get("C")[0], 527 CarbonHydrogen.c 528 <= molecular_search_settings.usedAtoms.get("C")[1], 529 CarbonHydrogen.h 530 >= molecular_search_settings.usedAtoms.get("H")[0], 531 CarbonHydrogen.h 532 <= molecular_search_settings.usedAtoms.get("H")[1], 533 ), 534 ), 535 ) 536 ) 537 538 def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None): 539 """add molecular formula to dict 540 541 Parameters 542 ---------- 543 formulas : sqlalchemy.orm.query.Query 544 The query. 545 ion_type : str 546 The ion type. 547 ion_charge : int 548 The ion charge. 549 adduct_atom : str, optional 550 The adduct atom. Default is None. 551 552 Returns 553 ------- 554 dict 555 The dictionary of molecular formulas. 556 557 """ 558 "organize data by heteroatom classes" 559 dict_res = {} 560 561 def nominal_mass_by_ion_type(formula_obj): 562 if ion_type == Labels.protonated_de_ion: 563 return int(formula_obj._protonated_mz(ion_charge)) 564 565 elif ion_type == Labels.radical_ion: 566 return int(formula_obj._radical_mz(ion_charge)) 567 568 elif ion_type == Labels.adduct_ion and adduct_atom: 569 return int(formula_obj._adduct_mz(ion_charge, adduct_atom)) 570 for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose): 571 nominal_mz = nominal_mass_by_ion_type(formula_obj) 572 573 if self.type != "normal": 574 if not nominal_mz in nominal_mzs: 575 continue 576 classe = classe_obj.name 577 578 # classe_str = formula.classe_string 579 580 # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str) 581 582 formula_dict = formula_obj.to_dict() 583 584 if formula_dict.get("O"): 585 if ( 586 formula_dict.get("O") / formula_dict.get("C") 587 >= molecular_search_settings.max_oc_filter 588 ): 589 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter) 590 continue 591 elif ( 592 formula_dict.get("O") / formula_dict.get("C") 593 <= molecular_search_settings.min_oc_filter 594 ): 595 # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter) 596 continue 597 # if formula_dict.get("P"): 598 599 # if not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter: 600 601 # continue 602 603 if classe in dict_res.keys(): 604 if nominal_mz in dict_res[classe].keys(): 605 dict_res.get(classe).get(nominal_mz).append(formula_obj) 606 607 else: 608 dict_res.get(classe)[nominal_mz] = [formula_obj] 609 610 else: 611 dict_res[classe] = {nominal_mz: [formula_obj]} 612 613 return dict_res 614 615 len_adducts = 0 616 if ion_type == Labels.adduct_ion: 617 len_adducts = 1 618 619 query = query_normal(classes, len_adducts) 620 621 if ion_type == Labels.protonated_de_ion: 622 if self.type == "normal": 623 query = query.filter( 624 func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_( 625 nominal_mzs 626 ) 627 ) 628 629 return add_dict_formula(query, ion_type, ion_charge) 630 631 if ion_type == Labels.radical_ion: 632 if self.type == "normal": 633 query = query.filter( 634 func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_( 635 nominal_mzs 636 ) 637 ) 638 return add_dict_formula(query, ion_type, ion_charge) 639 640 if ion_type == Labels.adduct_ion: 641 dict_res = {} 642 if adducts: 643 for atom in adducts: 644 if self.type == "normal": 645 query = query.filter( 646 func.floor( 647 MolecularFormulaLink._adduct_mz(ion_charge, atom) 648 ).in_(nominal_mzs) 649 ) 650 dict_res[atom] = add_dict_formula( 651 query, ion_type, ion_charge, adduct_atom=atom 652 ) 653 return dict_res 654 # dump all objs to memory 655 self.session.expunge_all()
Returns a dictionary of molecular formulas.
Parameters
- classes (list): The list of classes.
- ion_type (str): The ion type.
- nominal_mzs (list): The list of nominal m/z values.
- ion_charge (int): The ion charge.
- molecular_search_settings (MolecularFormulaSearchSettings): The molecular formula search settings.
- adducts (list, optional): The list of adducts. Default is None.
Returns
- dict: The dictionary of molecular formulas.
Notes
Known issue, when using SQLite: if the number of classes and nominal_m/zs are higher than 999 the query will fail Solution: use postgres or split query
657 def check_entry(self, classe, ion_type, molecular_search_settings): 658 """Checks if a molecular formula is in the database. 659 660 Parameters 661 ---------- 662 classe : str 663 The class of the molecular formula. 664 ion_type : str 665 The ion type. 666 molecular_search_settings : MolecularFormulaSearchSettings 667 The molecular formula search settings. 668 669 Returns 670 ------- 671 sqlalchemy.orm.query.Query 672 The query. 673 """ 674 # get all classes, ion_type, ion charge as str add to a dict or list 675 # then check if class in database 676 has_class = self.session.query( 677 exists().where((MolecularFormulaLink.classe == classe)) 678 ) 679 680 return has_class
Checks if a molecular formula is in the database.
Parameters
- classe (str): The class of the molecular formula.
- ion_type (str): The ion type.
- molecular_search_settings (MolecularFormulaSearchSettings): The molecular formula search settings.
Returns
- sqlalchemy.orm.query.Query: The query.
682 def get_all_classes(self): 683 """Returns a list of all classes in the database.""" 684 query = self.session.query( 685 MolecularFormulaLink.classe.distinct().label("classe") 686 ) 687 688 return query.all()
Returns a list of all classes in the database.
690 def get_all( 691 self, 692 ): 693 """Returns a list of all molecular formulas in the database.""" 694 mol_formulas = self.session.query(MolecularFormulaLink).all() 695 696 return mol_formulas
Returns a list of all molecular formulas in the database.
698 def delete_entry(self, row): 699 """Deletes a molecular formula from the database.""" 700 try: 701 self.session.delete(row) 702 self.session.commit() 703 704 except SQLAlchemyError as e: 705 self.session.rollback() 706 print(str(e))
Deletes a molecular formula from the database.
708 def purge(self, cls): 709 """Deletes all molecular formulas from the database. 710 711 Notes 712 ------- 713 Careful, this will delete the entire database table 714 715 """ 716 self.session.query(cls).delete() 717 self.session.commit()
Deletes all molecular formulas from the database.
Notes
Careful, this will delete the entire database table
719 def clear_data(self): 720 """Clears the database.""" 721 meta = Base.metadata 722 for table in reversed(meta.sorted_tables): 723 print("Clear table %s" % table) 724 self.session.execute(table.delete()) 725 self.session.commit()
Clears the database.
727 def close(self, commit=True): 728 """Closes the database. 729 730 Parameters 731 ---------- 732 commit : bool, optional 733 Whether to commit the session. Default is True. 734 """ 735 # make sure the dbconnection gets closed 736 737 if commit: 738 self.commit() 739 self.session.close() 740 self.engine.dispose()
Closes the database.
Parameters
- commit (bool, optional): Whether to commit the session. Default is True.
742 def add_engine_pidguard(self, engine): 743 """Adds multiprocessing guards. 744 745 Forces a connection to be reconnected if it is detected 746 as having been shared to a sub-process. 747 748 Parameters 749 ---------- 750 engine : sqlalchemy.engine.base.Engine 751 The SQLAlchemy engine. 752 753 """ 754 import os 755 import warnings 756 757 @event.listens_for(engine, "connect") 758 def connect(dbapi_connection, connection_record): 759 """Forces a connection to be reconnected if it is detected 760 761 Parameters 762 ---------- 763 dbapi_connection : sqlalchemy.engine.base.Engine 764 The SQLAlchemy engine. 765 connection_record : sqlalchemy.engine.base.Engine 766 The SQLAlchemy engine. 767 """ 768 connection_record.info["pid"] = os.getpid() 769 770 @event.listens_for(engine, "checkout") 771 def checkout(dbapi_connection, connection_record, connection_proxy): 772 """Forces a connection to be reconnected if it is detected 773 774 Parameters 775 ---------- 776 dbapi_connection : sqlalchemy.engine.base.Engine 777 The SQLAlchemy engine. 778 connection_record : sqlalchemy.engine.base.Engine 779 The SQLAlchemy engine. 780 connection_proxy : sqlalchemy.engine.base.Engine 781 The SQLAlchemy engine. 782 783 Raises 784 ------ 785 exc.DisconnectionError 786 If the connection record belongs to a different process. 787 788 """ 789 pid = os.getpid() 790 if connection_record.info["pid"] != pid: 791 # substitute log.debug() or similar here as desired 792 warnings.warn( 793 "Parent process %(orig)s forked (%(newproc)s) with an open " 794 "database connection, " 795 "which is being discarded and recreated." 796 % {"newproc": pid, "orig": connection_record.info["pid"]} 797 ) 798 connection_record.connection = connection_proxy.connection = None 799 raise exc.DisconnectionError( 800 "Connection record belongs to pid %s, " 801 "attempting to check out in pid %s" 802 % (connection_record.info["pid"], pid) 803 )
Adds multiprocessing guards.
Forces a connection to be reconnected if it is detected as having been shared to a sub-process.
Parameters
- engine (sqlalchemy.engine.base.Engine): The SQLAlchemy engine.