corems.mass_spectra.factory.GC_Class
1__author__ = "Yuri E. Corilo" 2__date__ = "Feb 13, 2020" 3 4 5from collections.abc import Mapping 6from pathlib import Path 7import json 8 9from numpy import array 10 11 12from corems.mass_spectra.calc.GC_Calc import GC_Calculations 13from corems.mass_spectra.calc.GC_Deconvolution import MassDeconvolution 14from corems.mass_spectra.calc import SignalProcessing as sp 15 16from corems.chroma_peak.factory.chroma_peak_classes import GCPeak 17from corems.mass_spectra.output.export import LowResGCMSExport 18from corems.encapsulation.factory.parameters import GCMSParameters 19 20 21class GCMSBase(GC_Calculations, MassDeconvolution): 22 """Base class for GC-MS data processing. 23 24 Parameters 25 ---- 26 file_location : str, pathlib.Path, or s3path.S3Path 27 Path object containing the file location. 28 analyzer : str, optional 29 Name of the analyzer. Defaults to 'Unknown'. 30 instrument_label : str, optional 31 Label of the instrument. Defaults to 'Unknown'. 32 sample_name : str, optional 33 Name of the sample. If not provided, it is derived from the file location. 34 35 Attributes 36 ------------ 37 file_location : pathlib.Path 38 Path object containing the file location. 39 sample_name : str 40 Name of the sample. 41 analyzer : str 42 Name of the analyzer. 43 instrument_label : str 44 Label of the instrument. 45 gcpeaks : list 46 List of GCPeak objects. 47 ri_pairs_ref : None 48 Reference retention index pairs. 49 cal_file_path : None 50 Calibration file path. 51 _parameters : GCMSParameters 52 GC-MS parameters. 53 _retention_time_list : list 54 List of retention times. 55 _scans_number_list : list 56 List of scan numbers. 57 _tic_list : list 58 List of total ion chromatogram values. 59 _ms : dict 60 Dictionary containing all mass spectra. 61 _processed_tic : list 62 List of processed total ion chromatogram values. 63 64 Methods 65 ------- 66 * process_chromatogram(plot_res=False). Process the chromatogram. 67 * plot_gc_peaks(ax=None, color='red'). Plot the GC peaks. 68 """ 69 70 def __init__( 71 self, 72 file_location, 73 analyzer="Unknown", 74 instrument_label="Unknown", 75 sample_name=None, 76 ): 77 if isinstance(file_location, str): 78 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 79 file_location = Path(file_location) 80 81 if not file_location.exists(): 82 raise FileExistsError("File does not exist: " + str(file_location)) 83 84 self.file_location = file_location 85 86 if sample_name: 87 self.sample_name = sample_name 88 else: 89 self.sample_name = file_location.stem 90 91 self.analyzer = analyzer 92 self.instrument_label = instrument_label 93 self._init_settings() 94 95 self._retention_time_list = [] 96 self._scans_number_list = [] 97 self._tic_list = [] 98 99 # all scans 100 self._ms = {} 101 102 # after peak detection 103 self._processed_tic = [] 104 self.gcpeaks = [] 105 106 self.ri_pairs_ref = None 107 self.cal_file_path = None 108 109 def _init_settings(self): 110 """Initialize the settings for GC_Class. 111 112 This method initializes the settings for the GC_Class object using the GCMSParameters class. 113 """ 114 self._parameters = GCMSParameters() 115 116 def __len__(self): 117 """Return the number of GC peaks in the GC_Class object.""" 118 return len(self.gcpeaks) 119 120 def __getitem__(self, scan_number) -> GCPeak: 121 """Return the GCPeak with the given scan number.""" 122 return self.gcpeaks[scan_number] 123 124 # def __iter__(self): 125 126 # return iter(self.gcpeaks.values()) 127 128 def process_chromatogram(self, plot_res=False): 129 """Process the chromatogram. 130 131 This method processes the chromatogram. 132 133 Parameters 134 ---------- 135 plot_res : bool, optional 136 If True, plot the results. Defaults to False. 137 """ 138 139 # tic = self.tic - self.baseline_detector(self.tic) 140 141 self._processed_tic = self.smooth_tic(self.tic) 142 143 for index, tic in enumerate(self._processed_tic): 144 self._ms[index]._processed_tic = tic 145 146 # self.second_derivative_threshold(self._processed_tic) 147 148 if self.chromatogram_settings.use_deconvolution: 149 self.run_deconvolution(plot_res=False) 150 151 else: 152 peaks_index = self.centroid_detector( 153 self._processed_tic, self.retention_time 154 ) 155 156 for i in peaks_index: 157 apex_index = i[1] 158 159 gc_peak = GCPeak(self, self._ms[apex_index], i) 160 161 gc_peak.calc_area(self._processed_tic, 1) 162 163 self.gcpeaks.append(gc_peak) 164 165 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak 166 167 def add_mass_spectrum(self, mass_spec): 168 """Add a mass spectrum to the GC-MS object. 169 170 This method adds a mass spectrum to the GC-MS object. 171 172 Parameters 173 ---------- 174 mass_spec : MassSpectrum 175 Mass spectrum to be added. 176 """ 177 178 self._ms[mass_spec.scan_number] = mass_spec 179 180 def set_tic_list_from_data(self): 181 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" 182 183 self.tic = [self._ms.get(i).tic for i in self.scans_number] 184 185 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) 186 187 def set_retention_time_from_data(self): 188 """Set the retention time list from the mass spectra data within the GC-MS data object.""" 189 190 retention_time_list = [] 191 192 for key_ms in sorted(self._ms.keys()): 193 retention_time_list.append(self._ms.get(key_ms).retention_time) 194 195 self.retention_time = retention_time_list 196 197 # self.set_retention_time_list(sorted(self._ms.keys())) 198 199 def set_scans_number_from_data(self): 200 """Set the scan number list from the mass spectra data within the GC-MS data object.""" 201 202 self.scans_number = sorted(self._ms.keys()) 203 204 @property 205 def parameters(self): 206 """GCMS Parameters""" 207 return self._parameters 208 209 @parameters.setter 210 def parameters(self, gcms_parameters_instance): 211 self._parameters = gcms_parameters_instance 212 213 # Note: maintaining `parameter` for backwards compatibility, 214 # but proper usage would reference `parameters` to conform 215 # to other classes. 216 @property 217 def parameter(self): 218 """GCMS Parameters""" 219 return self._parameters 220 221 @parameter.setter 222 def parameter(self, gcms_parameters_instance): 223 self._parameters = gcms_parameters_instance 224 225 @property 226 def molecular_search_settings(self): 227 """Molecular Search Settings""" 228 return self.parameters.molecular_search 229 230 @molecular_search_settings.setter 231 def molecular_search_settings(self, settings_class_instance): 232 self.parameters.molecular_search = settings_class_instance 233 234 @property 235 def chromatogram_settings(self): 236 """Chromatogram Settings""" 237 return self.parameters.gc_ms 238 239 @chromatogram_settings.setter 240 def chromatogram_settings(self, settings_class_instance): 241 self.parameters.gc_ms = settings_class_instance 242 243 @property 244 def scans_number(self): 245 """Scans Number""" 246 return self._scans_number_list 247 248 @property 249 def retention_time(self): 250 """Retention Time""" 251 return self._retention_time_list 252 253 @property 254 def processed_tic(self): 255 """Processed Total Ion Current""" 256 return self._processed_tic 257 258 @property 259 def tic(self): 260 """Total Ion Current""" 261 return self._tic_list 262 263 @property 264 def max_tic(self): 265 """Maximum Total Ion Current""" 266 return max([gc_peak.tic for gc_peak in self]) 267 268 @property 269 def min_tic(self): 270 """Minimum Total Ion Current""" 271 return min([gc_peak.tic for gc_peak in self]) 272 273 @property 274 def dynamic_range(self): 275 """Dynamic Range of the Total Ion Current""" 276 return self.max_tic / self.min_tic 277 278 @property 279 def matched_peaks(self): 280 """Matched Peaks""" 281 return [gc_peak for gc_peak in self if gc_peak] 282 283 @property 284 def sorted_gcpeaks(self): 285 """Sorted GC Peaks, by retention time""" 286 return sorted(self, key=lambda g: g.retention_time) 287 288 @property 289 def unique_metabolites(self): 290 """Unique Metabolites""" 291 metabolites = set() 292 for gc_peak in self: 293 if gc_peak: 294 for compound_obj in gc_peak: 295 metabolites.add(compound_obj.name) 296 297 return metabolites 298 299 @property 300 def metabolites_data(self): 301 """Metabolites Data""" 302 metabolites = {} 303 for gc_peak in self: 304 if gc_peak: 305 for compound_obj in gc_peak: 306 if compound_obj.name in metabolites.keys(): 307 current_score = metabolites[compound_obj.name][ 308 "highest_similarity_score" 309 ] 310 compound_score = compound_obj.spectral_similarity_score 311 metabolites[compound_obj.name]["highest_similarity_score"] = ( 312 compound_score 313 if compound_score > current_score 314 else current_score 315 ) 316 317 else: 318 if compound_obj.metadata: 319 metabolites[compound_obj.name] = { 320 "name": compound_obj.name, 321 "highest_similarity_score": compound_obj.spectral_similarity_score, 322 "casno": compound_obj.metadata.cas, 323 "kegg": compound_obj.metadata.kegg, 324 "inchi": compound_obj.metadata.inchi, 325 "inchi_key": compound_obj.metadata.inchikey, 326 "chebi": compound_obj.metadata.chebi, 327 "smiles": compound_obj.metadata.smiles, 328 } 329 else: 330 metabolites[compound_obj.name] = { 331 "name": compound_obj.name, 332 "highest_similarity_score": compound_obj.spectral_similarity_score, 333 "casno": "", 334 "kegg": "", 335 "inchi": "", 336 "inchikey": "", 337 "chebi": "", 338 "smiles": "", 339 } 340 341 return list(metabolites.values()) 342 343 @property 344 def no_matched_peaks(self): 345 """Peaks with no Matched Metabolites""" 346 return [peak for peak in self if not peak] 347 348 @retention_time.setter 349 def retention_time(self, alist): 350 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) 351 self._retention_time_list = alist 352 353 @scans_number.setter 354 def scans_number(self, alist): 355 self._scans_number_list = alist 356 357 @tic.setter 358 def tic(self, alist): 359 self._tic_list = array(alist) 360 361 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover 362 """Plot the GC peaks. 363 364 This method plots the GC peaks. 365 366 Parameters 367 ---------- 368 ax : matplotlib.axes.Axes, optional 369 Axes object to plot the GC peaks. Defaults to None. 370 color : str, optional 371 Color of the GC peaks. Defaults to 'red'. 372 """ 373 374 import matplotlib.pyplot as plt 375 376 fig = plt.gcf() 377 if ax is None: 378 ax = plt.gca() 379 380 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] 381 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] 382 383 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] 384 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] 385 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') 386 387 sc = ax.scatter(max_rts, max_tics, color=color, marker="v") 388 389 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 390 391 annot = ax.annotate( 392 "", 393 xy=(0, 0), 394 xytext=(20, 20), 395 textcoords="offset points", 396 bbox=dict(boxstyle="round", fc="w"), 397 arrowprops=dict(arrowstyle="->"), 398 ) 399 annot.set_visible(False) 400 annot.get_bbox_patch().set_facecolor(("lightblue")) 401 annot.get_bbox_patch().set_alpha(0.8) 402 403 def update_annot(ind): 404 pos = sc.get_offsets()[ind["ind"][0]] 405 annot.xy = pos 406 407 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format( 408 " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), 409 " ".join( 410 [ 411 str( 412 round(self[n].highest_score_compound.retention_time, 2) 413 if self[n].highest_score_compound 414 else None 415 ) 416 for n in ind["ind"] 417 ] 418 ), 419 " ".join( 420 [ 421 str(round(self[n].ri, 2) if self[n].ri else None) 422 for n in ind["ind"] 423 ] 424 ), 425 " ".join( 426 [ 427 str( 428 round(self[n].highest_score_compound.ri, 2) 429 if self[n].highest_score_compound 430 else None 431 ) 432 for n in ind["ind"] 433 ] 434 ), 435 " ".join( 436 [ 437 str( 438 round(self[n].highest_score_compound.similarity_score, 4) 439 if self[n].highest_score_compound 440 else None 441 ) 442 for n in ind["ind"] 443 ] 444 ), 445 " ".join( 446 [ 447 str( 448 self[n].highest_score_compound.name 449 if self[n].highest_score_compound 450 else None 451 ) 452 for n in ind["ind"] 453 ] 454 ), 455 ) 456 annot.set_text(text) 457 458 def hover(event): 459 vis = annot.get_visible() 460 if event.inaxes == ax: 461 cont, ind = sc.contains(event) 462 if cont: 463 update_annot(ind) 464 annot.set_visible(True) 465 fig.canvas.draw_idle() 466 else: 467 if vis: 468 annot.set_visible(False) 469 fig.canvas.draw_idle() 470 471 fig.canvas.mpl_connect("motion_notify_event", hover) 472 473 return ax 474 475 def to_excel( 476 self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:" 477 ): 478 """Export the GC-MS data to an Excel file. 479 480 This method exports the GC-MS data to an Excel file. 481 482 Parameters 483 ---------- 484 out_file_path : str, pathlib.Path, or s3path.S3Path 485 Path object containing the file location. 486 write_mode : str, optional 487 Write mode. Defaults to 'ab'. 488 write_metadata : bool, optional 489 If True, write the metadata. Defaults to True. 490 id_label : str, optional 491 Label of the ID. Defaults to 'corems:'. 492 493 """ 494 495 if isinstance(out_file_path, str): 496 out_file_path = Path(out_file_path) 497 498 exportMS = LowResGCMSExport(out_file_path, self) 499 exportMS.to_excel( 500 id_label=id_label, write_mode=write_mode, write_metadata=write_metadata 501 ) 502 503 return out_file_path.with_suffix(".xlsx") 504 505 def to_csv( 506 self, 507 out_file_path, 508 separate_output=False, 509 write_metadata=True, 510 id_label="corems:", 511 ): 512 """Export the GC-MS data to a CSV file. 513 514 Parameters 515 ---------- 516 out_file_path : str, pathlib.Path, or s3path.S3Path 517 Path object containing the file location. 518 separate_output : bool, optional 519 If True, separate the output. Defaults to False. 520 write_metadata : bool, optional 521 If True, write the metadata. Defaults to True. 522 523 """ 524 525 if isinstance(out_file_path, str): 526 out_file_path = Path(out_file_path) 527 528 exportMS = LowResGCMSExport(out_file_path, self) 529 exportMS.to_csv( 530 id_label=id_label, 531 separate_output=separate_output, 532 write_metadata=write_metadata, 533 ) 534 535 return out_file_path.with_suffix(".csv") 536 537 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): 538 """Export the GC-MS data to a Pandas dataframe. 539 540 Parameters 541 ---------- 542 out_file_path : str, pathlib.Path, or s3path.S3Path 543 Path object containing the file location. 544 write_metadata : bool, optional 545 If True, write the metadata. Defaults to True. 546 id_label : str, optional 547 Label of the ID. Defaults to 'corems:'. 548 549 """ 550 551 if isinstance(out_file_path, str): 552 out_file_path = Path(out_file_path) 553 # pickle dataframe (pkl extension) 554 exportMS = LowResGCMSExport(out_file_path, self) 555 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) 556 557 return out_file_path.with_suffix(".pkl") 558 559 def to_dataframe(self, id_label="corems:"): 560 """Export the GC-MS data to a Pandas dataframe. 561 562 Parameters 563 ---------- 564 id_label : str, optional 565 Label of the ID. Defaults to 'corems:'. 566 567 """ 568 569 # returns pandas dataframe 570 exportMS = LowResGCMSExport(self.sample_name, self) 571 return exportMS.get_pandas_df(id_label=id_label) 572 573 def processing_stats(self): 574 """Return the processing statistics.""" 575 576 # returns json string 577 exportMS = LowResGCMSExport(self.sample_name, self) 578 return exportMS.get_data_stats(self) 579 580 def parameters_json(self, id_label="corems:", output_path=" "): 581 """Return the parameters in JSON format. 582 583 Parameters 584 ---------- 585 id_label : str, optional 586 Label of the ID. Defaults to 'corems:'. 587 output_path : str, optional 588 Path object containing the file location. Defaults to " ". 589 """ 590 591 # returns json string 592 exportMS = LowResGCMSExport(self.sample_name, self) 593 return exportMS.get_parameters_json(self, id_label, output_path) 594 595 def to_json(self, id_label="corems:"): 596 """Export the GC-MS data to a JSON file. 597 598 Parameters 599 ---------- 600 id_label : str, optional 601 Label of the ID. Defaults to 'corems:'. 602 603 """ 604 605 # returns pandas dataframe 606 exportMS = LowResGCMSExport(self.sample_name, self) 607 return exportMS.get_json(id_label=id_label) 608 609 def to_hdf(self, id_label="corems:"): 610 """Export the GC-MS data to a HDF file. 611 612 Parameters 613 ---------- 614 id_label : str, optional 615 Label of the ID. Defaults to 'corems:'. 616 617 """ 618 619 # returns pandas dataframe 620 exportMS = LowResGCMSExport(self.sample_name, self) 621 return exportMS.to_hdf(id_label=id_label) 622 623 def plot_chromatogram(self, ax=None, color="blue"): # pragma: no cover 624 """Plot the chromatogram. 625 626 Parameters 627 ---------- 628 ax : matplotlib.axes.Axes, optional 629 Axes object to plot the chromatogram. Defaults to None. 630 color : str, optional 631 Color of the chromatogram. Defaults to 'blue'. 632 633 """ 634 635 import matplotlib.pyplot as plt 636 637 if ax is None: 638 ax = plt.gca() 639 640 ax.plot(self.retention_time, self.tic, color=color) 641 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 642 643 return ax 644 645 def plot_smoothed_chromatogram(self, ax=None, color="green"): # pragma: no cover 646 """Plot the smoothed chromatogram. 647 648 Parameters 649 ---------- 650 ax : matplotlib.axes.Axes, optional 651 Axes object to plot the smoothed chromatogram. Defaults to None. 652 color : str, optional 653 Color of the smoothed chromatogram. Defaults to 'green'. 654 655 """ 656 657 import matplotlib.pyplot as plt 658 659 if ax is None: 660 ax = plt.gca() 661 662 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) 663 664 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 665 666 return ax 667 668 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover 669 """Plot the detected baseline. 670 671 Parameters 672 ---------- 673 ax : matplotlib.axes.Axes, optional 674 Axes object to plot the detected baseline. Defaults to None. 675 color : str, optional 676 Color of the detected baseline. Defaults to 'blue'. 677 678 """ 679 680 import matplotlib.pyplot as plt 681 682 if ax is None: 683 ax = plt.gca() 684 685 max_height = self.chromatogram_settings.peak_height_max_percent 686 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 687 688 baseline = sp.baseline_detector( 689 self.tic, self.retention_time, max_height, max_prominence 690 ) 691 ax.plot(self.retention_time, color=color) 692 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 693 694 return ax 695 696 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover 697 """Plot the baseline subtraction. 698 699 Parameters 700 ---------- 701 ax : matplotlib.axes.Axes, optional 702 Axes object to plot the baseline subtraction. Defaults to None. 703 color : str, optional 704 Color of the baseline subtraction. Defaults to 'black'. 705 706 """ 707 708 import matplotlib.pyplot as plt 709 710 if ax is None: 711 ax = plt.gca() 712 713 max_height = self.chromatogram_settings.peak_height_max_percent 714 715 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 716 717 x = self.tic + sp.baseline_detector( 718 self.tic, self.retention_time, max_height, max_prominence 719 ) 720 721 ax.plot(self.retention_time, x, color=color) 722 723 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 724 725 return ax 726 727 def peaks_rt_tic(self, json_string=False): 728 """Return the peaks, retention time, and total ion chromatogram. 729 730 Parameters 731 ---------- 732 json_string : bool, optional 733 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. 734 735 """ 736 737 peaks_list = dict() 738 739 all_candidates_data = {} 740 741 all_peaks_data = {} 742 743 for gcms_peak in self.sorted_gcpeaks: 744 dict_data = { 745 "rt": gcms_peak.rt_list, 746 "tic": gcms_peak.tic_list, 747 "mz": gcms_peak.mass_spectrum.mz_exp.tolist(), 748 "abundance": gcms_peak.mass_spectrum.abundance.tolist(), 749 "candidate_names": gcms_peak.compound_names, 750 } 751 752 peaks_list[gcms_peak.retention_time] = dict_data 753 754 for compound in gcms_peak: 755 if compound.name not in all_candidates_data.keys(): 756 mz = array(compound.mz).tolist() 757 abundance = array(compound.abundance).tolist() 758 data = {"mz": mz, "abundance": abundance} 759 all_candidates_data[compound.name] = data 760 761 all_peaks_data["peak_data"] = peaks_list 762 all_peaks_data["ref_data"] = all_candidates_data 763 764 if json_string: 765 return json.dumps(all_peaks_data) 766 767 else: 768 return all_peaks_data 769 770 def plot_processed_chromatogram(self, ax=None, color="black"): 771 """Plot the processed chromatogram. 772 773 Parameters 774 ---------- 775 ax : matplotlib.axes.Axes, optional 776 Axes object to plot the processed chromatogram. Defaults to None. 777 color : str, optional 778 Color of the processed chromatogram. Defaults to 'black'. 779 780 """ 781 782 import matplotlib.pyplot as plt 783 784 if ax is None: 785 ax = plt.gca() 786 787 ax.plot(self.retention_time, self.processed_tic, color=color) 788 789 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 790 791 return ax
22class GCMSBase(GC_Calculations, MassDeconvolution): 23 """Base class for GC-MS data processing. 24 25 Parameters 26 ---- 27 file_location : str, pathlib.Path, or s3path.S3Path 28 Path object containing the file location. 29 analyzer : str, optional 30 Name of the analyzer. Defaults to 'Unknown'. 31 instrument_label : str, optional 32 Label of the instrument. Defaults to 'Unknown'. 33 sample_name : str, optional 34 Name of the sample. If not provided, it is derived from the file location. 35 36 Attributes 37 ------------ 38 file_location : pathlib.Path 39 Path object containing the file location. 40 sample_name : str 41 Name of the sample. 42 analyzer : str 43 Name of the analyzer. 44 instrument_label : str 45 Label of the instrument. 46 gcpeaks : list 47 List of GCPeak objects. 48 ri_pairs_ref : None 49 Reference retention index pairs. 50 cal_file_path : None 51 Calibration file path. 52 _parameters : GCMSParameters 53 GC-MS parameters. 54 _retention_time_list : list 55 List of retention times. 56 _scans_number_list : list 57 List of scan numbers. 58 _tic_list : list 59 List of total ion chromatogram values. 60 _ms : dict 61 Dictionary containing all mass spectra. 62 _processed_tic : list 63 List of processed total ion chromatogram values. 64 65 Methods 66 ------- 67 * process_chromatogram(plot_res=False). Process the chromatogram. 68 * plot_gc_peaks(ax=None, color='red'). Plot the GC peaks. 69 """ 70 71 def __init__( 72 self, 73 file_location, 74 analyzer="Unknown", 75 instrument_label="Unknown", 76 sample_name=None, 77 ): 78 if isinstance(file_location, str): 79 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 80 file_location = Path(file_location) 81 82 if not file_location.exists(): 83 raise FileExistsError("File does not exist: " + str(file_location)) 84 85 self.file_location = file_location 86 87 if sample_name: 88 self.sample_name = sample_name 89 else: 90 self.sample_name = file_location.stem 91 92 self.analyzer = analyzer 93 self.instrument_label = instrument_label 94 self._init_settings() 95 96 self._retention_time_list = [] 97 self._scans_number_list = [] 98 self._tic_list = [] 99 100 # all scans 101 self._ms = {} 102 103 # after peak detection 104 self._processed_tic = [] 105 self.gcpeaks = [] 106 107 self.ri_pairs_ref = None 108 self.cal_file_path = None 109 110 def _init_settings(self): 111 """Initialize the settings for GC_Class. 112 113 This method initializes the settings for the GC_Class object using the GCMSParameters class. 114 """ 115 self._parameters = GCMSParameters() 116 117 def __len__(self): 118 """Return the number of GC peaks in the GC_Class object.""" 119 return len(self.gcpeaks) 120 121 def __getitem__(self, scan_number) -> GCPeak: 122 """Return the GCPeak with the given scan number.""" 123 return self.gcpeaks[scan_number] 124 125 # def __iter__(self): 126 127 # return iter(self.gcpeaks.values()) 128 129 def process_chromatogram(self, plot_res=False): 130 """Process the chromatogram. 131 132 This method processes the chromatogram. 133 134 Parameters 135 ---------- 136 plot_res : bool, optional 137 If True, plot the results. Defaults to False. 138 """ 139 140 # tic = self.tic - self.baseline_detector(self.tic) 141 142 self._processed_tic = self.smooth_tic(self.tic) 143 144 for index, tic in enumerate(self._processed_tic): 145 self._ms[index]._processed_tic = tic 146 147 # self.second_derivative_threshold(self._processed_tic) 148 149 if self.chromatogram_settings.use_deconvolution: 150 self.run_deconvolution(plot_res=False) 151 152 else: 153 peaks_index = self.centroid_detector( 154 self._processed_tic, self.retention_time 155 ) 156 157 for i in peaks_index: 158 apex_index = i[1] 159 160 gc_peak = GCPeak(self, self._ms[apex_index], i) 161 162 gc_peak.calc_area(self._processed_tic, 1) 163 164 self.gcpeaks.append(gc_peak) 165 166 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak 167 168 def add_mass_spectrum(self, mass_spec): 169 """Add a mass spectrum to the GC-MS object. 170 171 This method adds a mass spectrum to the GC-MS object. 172 173 Parameters 174 ---------- 175 mass_spec : MassSpectrum 176 Mass spectrum to be added. 177 """ 178 179 self._ms[mass_spec.scan_number] = mass_spec 180 181 def set_tic_list_from_data(self): 182 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" 183 184 self.tic = [self._ms.get(i).tic for i in self.scans_number] 185 186 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) 187 188 def set_retention_time_from_data(self): 189 """Set the retention time list from the mass spectra data within the GC-MS data object.""" 190 191 retention_time_list = [] 192 193 for key_ms in sorted(self._ms.keys()): 194 retention_time_list.append(self._ms.get(key_ms).retention_time) 195 196 self.retention_time = retention_time_list 197 198 # self.set_retention_time_list(sorted(self._ms.keys())) 199 200 def set_scans_number_from_data(self): 201 """Set the scan number list from the mass spectra data within the GC-MS data object.""" 202 203 self.scans_number = sorted(self._ms.keys()) 204 205 @property 206 def parameters(self): 207 """GCMS Parameters""" 208 return self._parameters 209 210 @parameters.setter 211 def parameters(self, gcms_parameters_instance): 212 self._parameters = gcms_parameters_instance 213 214 # Note: maintaining `parameter` for backwards compatibility, 215 # but proper usage would reference `parameters` to conform 216 # to other classes. 217 @property 218 def parameter(self): 219 """GCMS Parameters""" 220 return self._parameters 221 222 @parameter.setter 223 def parameter(self, gcms_parameters_instance): 224 self._parameters = gcms_parameters_instance 225 226 @property 227 def molecular_search_settings(self): 228 """Molecular Search Settings""" 229 return self.parameters.molecular_search 230 231 @molecular_search_settings.setter 232 def molecular_search_settings(self, settings_class_instance): 233 self.parameters.molecular_search = settings_class_instance 234 235 @property 236 def chromatogram_settings(self): 237 """Chromatogram Settings""" 238 return self.parameters.gc_ms 239 240 @chromatogram_settings.setter 241 def chromatogram_settings(self, settings_class_instance): 242 self.parameters.gc_ms = settings_class_instance 243 244 @property 245 def scans_number(self): 246 """Scans Number""" 247 return self._scans_number_list 248 249 @property 250 def retention_time(self): 251 """Retention Time""" 252 return self._retention_time_list 253 254 @property 255 def processed_tic(self): 256 """Processed Total Ion Current""" 257 return self._processed_tic 258 259 @property 260 def tic(self): 261 """Total Ion Current""" 262 return self._tic_list 263 264 @property 265 def max_tic(self): 266 """Maximum Total Ion Current""" 267 return max([gc_peak.tic for gc_peak in self]) 268 269 @property 270 def min_tic(self): 271 """Minimum Total Ion Current""" 272 return min([gc_peak.tic for gc_peak in self]) 273 274 @property 275 def dynamic_range(self): 276 """Dynamic Range of the Total Ion Current""" 277 return self.max_tic / self.min_tic 278 279 @property 280 def matched_peaks(self): 281 """Matched Peaks""" 282 return [gc_peak for gc_peak in self if gc_peak] 283 284 @property 285 def sorted_gcpeaks(self): 286 """Sorted GC Peaks, by retention time""" 287 return sorted(self, key=lambda g: g.retention_time) 288 289 @property 290 def unique_metabolites(self): 291 """Unique Metabolites""" 292 metabolites = set() 293 for gc_peak in self: 294 if gc_peak: 295 for compound_obj in gc_peak: 296 metabolites.add(compound_obj.name) 297 298 return metabolites 299 300 @property 301 def metabolites_data(self): 302 """Metabolites Data""" 303 metabolites = {} 304 for gc_peak in self: 305 if gc_peak: 306 for compound_obj in gc_peak: 307 if compound_obj.name in metabolites.keys(): 308 current_score = metabolites[compound_obj.name][ 309 "highest_similarity_score" 310 ] 311 compound_score = compound_obj.spectral_similarity_score 312 metabolites[compound_obj.name]["highest_similarity_score"] = ( 313 compound_score 314 if compound_score > current_score 315 else current_score 316 ) 317 318 else: 319 if compound_obj.metadata: 320 metabolites[compound_obj.name] = { 321 "name": compound_obj.name, 322 "highest_similarity_score": compound_obj.spectral_similarity_score, 323 "casno": compound_obj.metadata.cas, 324 "kegg": compound_obj.metadata.kegg, 325 "inchi": compound_obj.metadata.inchi, 326 "inchi_key": compound_obj.metadata.inchikey, 327 "chebi": compound_obj.metadata.chebi, 328 "smiles": compound_obj.metadata.smiles, 329 } 330 else: 331 metabolites[compound_obj.name] = { 332 "name": compound_obj.name, 333 "highest_similarity_score": compound_obj.spectral_similarity_score, 334 "casno": "", 335 "kegg": "", 336 "inchi": "", 337 "inchikey": "", 338 "chebi": "", 339 "smiles": "", 340 } 341 342 return list(metabolites.values()) 343 344 @property 345 def no_matched_peaks(self): 346 """Peaks with no Matched Metabolites""" 347 return [peak for peak in self if not peak] 348 349 @retention_time.setter 350 def retention_time(self, alist): 351 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) 352 self._retention_time_list = alist 353 354 @scans_number.setter 355 def scans_number(self, alist): 356 self._scans_number_list = alist 357 358 @tic.setter 359 def tic(self, alist): 360 self._tic_list = array(alist) 361 362 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover 363 """Plot the GC peaks. 364 365 This method plots the GC peaks. 366 367 Parameters 368 ---------- 369 ax : matplotlib.axes.Axes, optional 370 Axes object to plot the GC peaks. Defaults to None. 371 color : str, optional 372 Color of the GC peaks. Defaults to 'red'. 373 """ 374 375 import matplotlib.pyplot as plt 376 377 fig = plt.gcf() 378 if ax is None: 379 ax = plt.gca() 380 381 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] 382 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] 383 384 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] 385 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] 386 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') 387 388 sc = ax.scatter(max_rts, max_tics, color=color, marker="v") 389 390 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 391 392 annot = ax.annotate( 393 "", 394 xy=(0, 0), 395 xytext=(20, 20), 396 textcoords="offset points", 397 bbox=dict(boxstyle="round", fc="w"), 398 arrowprops=dict(arrowstyle="->"), 399 ) 400 annot.set_visible(False) 401 annot.get_bbox_patch().set_facecolor(("lightblue")) 402 annot.get_bbox_patch().set_alpha(0.8) 403 404 def update_annot(ind): 405 pos = sc.get_offsets()[ind["ind"][0]] 406 annot.xy = pos 407 408 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format( 409 " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), 410 " ".join( 411 [ 412 str( 413 round(self[n].highest_score_compound.retention_time, 2) 414 if self[n].highest_score_compound 415 else None 416 ) 417 for n in ind["ind"] 418 ] 419 ), 420 " ".join( 421 [ 422 str(round(self[n].ri, 2) if self[n].ri else None) 423 for n in ind["ind"] 424 ] 425 ), 426 " ".join( 427 [ 428 str( 429 round(self[n].highest_score_compound.ri, 2) 430 if self[n].highest_score_compound 431 else None 432 ) 433 for n in ind["ind"] 434 ] 435 ), 436 " ".join( 437 [ 438 str( 439 round(self[n].highest_score_compound.similarity_score, 4) 440 if self[n].highest_score_compound 441 else None 442 ) 443 for n in ind["ind"] 444 ] 445 ), 446 " ".join( 447 [ 448 str( 449 self[n].highest_score_compound.name 450 if self[n].highest_score_compound 451 else None 452 ) 453 for n in ind["ind"] 454 ] 455 ), 456 ) 457 annot.set_text(text) 458 459 def hover(event): 460 vis = annot.get_visible() 461 if event.inaxes == ax: 462 cont, ind = sc.contains(event) 463 if cont: 464 update_annot(ind) 465 annot.set_visible(True) 466 fig.canvas.draw_idle() 467 else: 468 if vis: 469 annot.set_visible(False) 470 fig.canvas.draw_idle() 471 472 fig.canvas.mpl_connect("motion_notify_event", hover) 473 474 return ax 475 476 def to_excel( 477 self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:" 478 ): 479 """Export the GC-MS data to an Excel file. 480 481 This method exports the GC-MS data to an Excel file. 482 483 Parameters 484 ---------- 485 out_file_path : str, pathlib.Path, or s3path.S3Path 486 Path object containing the file location. 487 write_mode : str, optional 488 Write mode. Defaults to 'ab'. 489 write_metadata : bool, optional 490 If True, write the metadata. Defaults to True. 491 id_label : str, optional 492 Label of the ID. Defaults to 'corems:'. 493 494 """ 495 496 if isinstance(out_file_path, str): 497 out_file_path = Path(out_file_path) 498 499 exportMS = LowResGCMSExport(out_file_path, self) 500 exportMS.to_excel( 501 id_label=id_label, write_mode=write_mode, write_metadata=write_metadata 502 ) 503 504 return out_file_path.with_suffix(".xlsx") 505 506 def to_csv( 507 self, 508 out_file_path, 509 separate_output=False, 510 write_metadata=True, 511 id_label="corems:", 512 ): 513 """Export the GC-MS data to a CSV file. 514 515 Parameters 516 ---------- 517 out_file_path : str, pathlib.Path, or s3path.S3Path 518 Path object containing the file location. 519 separate_output : bool, optional 520 If True, separate the output. Defaults to False. 521 write_metadata : bool, optional 522 If True, write the metadata. Defaults to True. 523 524 """ 525 526 if isinstance(out_file_path, str): 527 out_file_path = Path(out_file_path) 528 529 exportMS = LowResGCMSExport(out_file_path, self) 530 exportMS.to_csv( 531 id_label=id_label, 532 separate_output=separate_output, 533 write_metadata=write_metadata, 534 ) 535 536 return out_file_path.with_suffix(".csv") 537 538 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): 539 """Export the GC-MS data to a Pandas dataframe. 540 541 Parameters 542 ---------- 543 out_file_path : str, pathlib.Path, or s3path.S3Path 544 Path object containing the file location. 545 write_metadata : bool, optional 546 If True, write the metadata. Defaults to True. 547 id_label : str, optional 548 Label of the ID. Defaults to 'corems:'. 549 550 """ 551 552 if isinstance(out_file_path, str): 553 out_file_path = Path(out_file_path) 554 # pickle dataframe (pkl extension) 555 exportMS = LowResGCMSExport(out_file_path, self) 556 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) 557 558 return out_file_path.with_suffix(".pkl") 559 560 def to_dataframe(self, id_label="corems:"): 561 """Export the GC-MS data to a Pandas dataframe. 562 563 Parameters 564 ---------- 565 id_label : str, optional 566 Label of the ID. Defaults to 'corems:'. 567 568 """ 569 570 # returns pandas dataframe 571 exportMS = LowResGCMSExport(self.sample_name, self) 572 return exportMS.get_pandas_df(id_label=id_label) 573 574 def processing_stats(self): 575 """Return the processing statistics.""" 576 577 # returns json string 578 exportMS = LowResGCMSExport(self.sample_name, self) 579 return exportMS.get_data_stats(self) 580 581 def parameters_json(self, id_label="corems:", output_path=" "): 582 """Return the parameters in JSON format. 583 584 Parameters 585 ---------- 586 id_label : str, optional 587 Label of the ID. Defaults to 'corems:'. 588 output_path : str, optional 589 Path object containing the file location. Defaults to " ". 590 """ 591 592 # returns json string 593 exportMS = LowResGCMSExport(self.sample_name, self) 594 return exportMS.get_parameters_json(self, id_label, output_path) 595 596 def to_json(self, id_label="corems:"): 597 """Export the GC-MS data to a JSON file. 598 599 Parameters 600 ---------- 601 id_label : str, optional 602 Label of the ID. Defaults to 'corems:'. 603 604 """ 605 606 # returns pandas dataframe 607 exportMS = LowResGCMSExport(self.sample_name, self) 608 return exportMS.get_json(id_label=id_label) 609 610 def to_hdf(self, id_label="corems:"): 611 """Export the GC-MS data to a HDF file. 612 613 Parameters 614 ---------- 615 id_label : str, optional 616 Label of the ID. Defaults to 'corems:'. 617 618 """ 619 620 # returns pandas dataframe 621 exportMS = LowResGCMSExport(self.sample_name, self) 622 return exportMS.to_hdf(id_label=id_label) 623 624 def plot_chromatogram(self, ax=None, color="blue"): # pragma: no cover 625 """Plot the chromatogram. 626 627 Parameters 628 ---------- 629 ax : matplotlib.axes.Axes, optional 630 Axes object to plot the chromatogram. Defaults to None. 631 color : str, optional 632 Color of the chromatogram. Defaults to 'blue'. 633 634 """ 635 636 import matplotlib.pyplot as plt 637 638 if ax is None: 639 ax = plt.gca() 640 641 ax.plot(self.retention_time, self.tic, color=color) 642 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 643 644 return ax 645 646 def plot_smoothed_chromatogram(self, ax=None, color="green"): # pragma: no cover 647 """Plot the smoothed chromatogram. 648 649 Parameters 650 ---------- 651 ax : matplotlib.axes.Axes, optional 652 Axes object to plot the smoothed chromatogram. Defaults to None. 653 color : str, optional 654 Color of the smoothed chromatogram. Defaults to 'green'. 655 656 """ 657 658 import matplotlib.pyplot as plt 659 660 if ax is None: 661 ax = plt.gca() 662 663 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) 664 665 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 666 667 return ax 668 669 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover 670 """Plot the detected baseline. 671 672 Parameters 673 ---------- 674 ax : matplotlib.axes.Axes, optional 675 Axes object to plot the detected baseline. Defaults to None. 676 color : str, optional 677 Color of the detected baseline. Defaults to 'blue'. 678 679 """ 680 681 import matplotlib.pyplot as plt 682 683 if ax is None: 684 ax = plt.gca() 685 686 max_height = self.chromatogram_settings.peak_height_max_percent 687 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 688 689 baseline = sp.baseline_detector( 690 self.tic, self.retention_time, max_height, max_prominence 691 ) 692 ax.plot(self.retention_time, color=color) 693 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 694 695 return ax 696 697 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover 698 """Plot the baseline subtraction. 699 700 Parameters 701 ---------- 702 ax : matplotlib.axes.Axes, optional 703 Axes object to plot the baseline subtraction. Defaults to None. 704 color : str, optional 705 Color of the baseline subtraction. Defaults to 'black'. 706 707 """ 708 709 import matplotlib.pyplot as plt 710 711 if ax is None: 712 ax = plt.gca() 713 714 max_height = self.chromatogram_settings.peak_height_max_percent 715 716 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 717 718 x = self.tic + sp.baseline_detector( 719 self.tic, self.retention_time, max_height, max_prominence 720 ) 721 722 ax.plot(self.retention_time, x, color=color) 723 724 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 725 726 return ax 727 728 def peaks_rt_tic(self, json_string=False): 729 """Return the peaks, retention time, and total ion chromatogram. 730 731 Parameters 732 ---------- 733 json_string : bool, optional 734 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. 735 736 """ 737 738 peaks_list = dict() 739 740 all_candidates_data = {} 741 742 all_peaks_data = {} 743 744 for gcms_peak in self.sorted_gcpeaks: 745 dict_data = { 746 "rt": gcms_peak.rt_list, 747 "tic": gcms_peak.tic_list, 748 "mz": gcms_peak.mass_spectrum.mz_exp.tolist(), 749 "abundance": gcms_peak.mass_spectrum.abundance.tolist(), 750 "candidate_names": gcms_peak.compound_names, 751 } 752 753 peaks_list[gcms_peak.retention_time] = dict_data 754 755 for compound in gcms_peak: 756 if compound.name not in all_candidates_data.keys(): 757 mz = array(compound.mz).tolist() 758 abundance = array(compound.abundance).tolist() 759 data = {"mz": mz, "abundance": abundance} 760 all_candidates_data[compound.name] = data 761 762 all_peaks_data["peak_data"] = peaks_list 763 all_peaks_data["ref_data"] = all_candidates_data 764 765 if json_string: 766 return json.dumps(all_peaks_data) 767 768 else: 769 return all_peaks_data 770 771 def plot_processed_chromatogram(self, ax=None, color="black"): 772 """Plot the processed chromatogram. 773 774 Parameters 775 ---------- 776 ax : matplotlib.axes.Axes, optional 777 Axes object to plot the processed chromatogram. Defaults to None. 778 color : str, optional 779 Color of the processed chromatogram. Defaults to 'black'. 780 781 """ 782 783 import matplotlib.pyplot as plt 784 785 if ax is None: 786 ax = plt.gca() 787 788 ax.plot(self.retention_time, self.processed_tic, color=color) 789 790 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 791 792 return ax
Base class for GC-MS data processing.
Parameters
- file_location (str, pathlib.Path, or s3path.S3Path): Path object containing the file location.
- analyzer (str, optional): Name of the analyzer. Defaults to 'Unknown'.
- instrument_label (str, optional): Label of the instrument. Defaults to 'Unknown'.
- sample_name (str, optional): Name of the sample. If not provided, it is derived from the file location.
Attributes
- file_location (pathlib.Path): Path object containing the file location.
- sample_name (str): Name of the sample.
- analyzer (str): Name of the analyzer.
- instrument_label (str): Label of the instrument.
- gcpeaks (list): List of GCPeak objects.
- ri_pairs_ref (None): Reference retention index pairs.
- cal_file_path (None): Calibration file path.
- _parameters (GCMSParameters): GC-MS parameters.
- _retention_time_list (list): List of retention times.
- _scans_number_list (list): List of scan numbers.
- _tic_list (list): List of total ion chromatogram values.
- _ms (dict): Dictionary containing all mass spectra.
- _processed_tic (list): List of processed total ion chromatogram values.
Methods
- process_chromatogram(plot_res=False). Process the chromatogram.
- plot_gc_peaks(ax=None, color='red'). Plot the GC peaks.
71 def __init__( 72 self, 73 file_location, 74 analyzer="Unknown", 75 instrument_label="Unknown", 76 sample_name=None, 77 ): 78 if isinstance(file_location, str): 79 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed 80 file_location = Path(file_location) 81 82 if not file_location.exists(): 83 raise FileExistsError("File does not exist: " + str(file_location)) 84 85 self.file_location = file_location 86 87 if sample_name: 88 self.sample_name = sample_name 89 else: 90 self.sample_name = file_location.stem 91 92 self.analyzer = analyzer 93 self.instrument_label = instrument_label 94 self._init_settings() 95 96 self._retention_time_list = [] 97 self._scans_number_list = [] 98 self._tic_list = [] 99 100 # all scans 101 self._ms = {} 102 103 # after peak detection 104 self._processed_tic = [] 105 self.gcpeaks = [] 106 107 self.ri_pairs_ref = None 108 self.cal_file_path = None
129 def process_chromatogram(self, plot_res=False): 130 """Process the chromatogram. 131 132 This method processes the chromatogram. 133 134 Parameters 135 ---------- 136 plot_res : bool, optional 137 If True, plot the results. Defaults to False. 138 """ 139 140 # tic = self.tic - self.baseline_detector(self.tic) 141 142 self._processed_tic = self.smooth_tic(self.tic) 143 144 for index, tic in enumerate(self._processed_tic): 145 self._ms[index]._processed_tic = tic 146 147 # self.second_derivative_threshold(self._processed_tic) 148 149 if self.chromatogram_settings.use_deconvolution: 150 self.run_deconvolution(plot_res=False) 151 152 else: 153 peaks_index = self.centroid_detector( 154 self._processed_tic, self.retention_time 155 ) 156 157 for i in peaks_index: 158 apex_index = i[1] 159 160 gc_peak = GCPeak(self, self._ms[apex_index], i) 161 162 gc_peak.calc_area(self._processed_tic, 1) 163 164 self.gcpeaks.append(gc_peak) 165 166 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak
Process the chromatogram.
This method processes the chromatogram.
Parameters
- plot_res (bool, optional): If True, plot the results. Defaults to False.
168 def add_mass_spectrum(self, mass_spec): 169 """Add a mass spectrum to the GC-MS object. 170 171 This method adds a mass spectrum to the GC-MS object. 172 173 Parameters 174 ---------- 175 mass_spec : MassSpectrum 176 Mass spectrum to be added. 177 """ 178 179 self._ms[mass_spec.scan_number] = mass_spec
Add a mass spectrum to the GC-MS object.
This method adds a mass spectrum to the GC-MS object.
Parameters
- mass_spec (MassSpectrum): Mass spectrum to be added.
181 def set_tic_list_from_data(self): 182 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" 183 184 self.tic = [self._ms.get(i).tic for i in self.scans_number] 185 186 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()])
Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.
188 def set_retention_time_from_data(self): 189 """Set the retention time list from the mass spectra data within the GC-MS data object.""" 190 191 retention_time_list = [] 192 193 for key_ms in sorted(self._ms.keys()): 194 retention_time_list.append(self._ms.get(key_ms).retention_time) 195 196 self.retention_time = retention_time_list 197 198 # self.set_retention_time_list(sorted(self._ms.keys()))
Set the retention time list from the mass spectra data within the GC-MS data object.
200 def set_scans_number_from_data(self): 201 """Set the scan number list from the mass spectra data within the GC-MS data object.""" 202 203 self.scans_number = sorted(self._ms.keys())
Set the scan number list from the mass spectra data within the GC-MS data object.
362 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover 363 """Plot the GC peaks. 364 365 This method plots the GC peaks. 366 367 Parameters 368 ---------- 369 ax : matplotlib.axes.Axes, optional 370 Axes object to plot the GC peaks. Defaults to None. 371 color : str, optional 372 Color of the GC peaks. Defaults to 'red'. 373 """ 374 375 import matplotlib.pyplot as plt 376 377 fig = plt.gcf() 378 if ax is None: 379 ax = plt.gca() 380 381 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] 382 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] 383 384 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] 385 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] 386 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') 387 388 sc = ax.scatter(max_rts, max_tics, color=color, marker="v") 389 390 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 391 392 annot = ax.annotate( 393 "", 394 xy=(0, 0), 395 xytext=(20, 20), 396 textcoords="offset points", 397 bbox=dict(boxstyle="round", fc="w"), 398 arrowprops=dict(arrowstyle="->"), 399 ) 400 annot.set_visible(False) 401 annot.get_bbox_patch().set_facecolor(("lightblue")) 402 annot.get_bbox_patch().set_alpha(0.8) 403 404 def update_annot(ind): 405 pos = sc.get_offsets()[ind["ind"][0]] 406 annot.xy = pos 407 408 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format( 409 " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), 410 " ".join( 411 [ 412 str( 413 round(self[n].highest_score_compound.retention_time, 2) 414 if self[n].highest_score_compound 415 else None 416 ) 417 for n in ind["ind"] 418 ] 419 ), 420 " ".join( 421 [ 422 str(round(self[n].ri, 2) if self[n].ri else None) 423 for n in ind["ind"] 424 ] 425 ), 426 " ".join( 427 [ 428 str( 429 round(self[n].highest_score_compound.ri, 2) 430 if self[n].highest_score_compound 431 else None 432 ) 433 for n in ind["ind"] 434 ] 435 ), 436 " ".join( 437 [ 438 str( 439 round(self[n].highest_score_compound.similarity_score, 4) 440 if self[n].highest_score_compound 441 else None 442 ) 443 for n in ind["ind"] 444 ] 445 ), 446 " ".join( 447 [ 448 str( 449 self[n].highest_score_compound.name 450 if self[n].highest_score_compound 451 else None 452 ) 453 for n in ind["ind"] 454 ] 455 ), 456 ) 457 annot.set_text(text) 458 459 def hover(event): 460 vis = annot.get_visible() 461 if event.inaxes == ax: 462 cont, ind = sc.contains(event) 463 if cont: 464 update_annot(ind) 465 annot.set_visible(True) 466 fig.canvas.draw_idle() 467 else: 468 if vis: 469 annot.set_visible(False) 470 fig.canvas.draw_idle() 471 472 fig.canvas.mpl_connect("motion_notify_event", hover) 473 474 return ax
Plot the GC peaks.
This method plots the GC peaks.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the GC peaks. Defaults to None.
- color (str, optional): Color of the GC peaks. Defaults to 'red'.
476 def to_excel( 477 self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:" 478 ): 479 """Export the GC-MS data to an Excel file. 480 481 This method exports the GC-MS data to an Excel file. 482 483 Parameters 484 ---------- 485 out_file_path : str, pathlib.Path, or s3path.S3Path 486 Path object containing the file location. 487 write_mode : str, optional 488 Write mode. Defaults to 'ab'. 489 write_metadata : bool, optional 490 If True, write the metadata. Defaults to True. 491 id_label : str, optional 492 Label of the ID. Defaults to 'corems:'. 493 494 """ 495 496 if isinstance(out_file_path, str): 497 out_file_path = Path(out_file_path) 498 499 exportMS = LowResGCMSExport(out_file_path, self) 500 exportMS.to_excel( 501 id_label=id_label, write_mode=write_mode, write_metadata=write_metadata 502 ) 503 504 return out_file_path.with_suffix(".xlsx")
Export the GC-MS data to an Excel file.
This method exports the GC-MS data to an Excel file.
Parameters
- out_file_path (str, pathlib.Path, or s3path.S3Path): Path object containing the file location.
- write_mode (str, optional): Write mode. Defaults to 'ab'.
- write_metadata (bool, optional): If True, write the metadata. Defaults to True.
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
506 def to_csv( 507 self, 508 out_file_path, 509 separate_output=False, 510 write_metadata=True, 511 id_label="corems:", 512 ): 513 """Export the GC-MS data to a CSV file. 514 515 Parameters 516 ---------- 517 out_file_path : str, pathlib.Path, or s3path.S3Path 518 Path object containing the file location. 519 separate_output : bool, optional 520 If True, separate the output. Defaults to False. 521 write_metadata : bool, optional 522 If True, write the metadata. Defaults to True. 523 524 """ 525 526 if isinstance(out_file_path, str): 527 out_file_path = Path(out_file_path) 528 529 exportMS = LowResGCMSExport(out_file_path, self) 530 exportMS.to_csv( 531 id_label=id_label, 532 separate_output=separate_output, 533 write_metadata=write_metadata, 534 ) 535 536 return out_file_path.with_suffix(".csv")
Export the GC-MS data to a CSV file.
Parameters
- out_file_path (str, pathlib.Path, or s3path.S3Path): Path object containing the file location.
- separate_output (bool, optional): If True, separate the output. Defaults to False.
- write_metadata (bool, optional): If True, write the metadata. Defaults to True.
538 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): 539 """Export the GC-MS data to a Pandas dataframe. 540 541 Parameters 542 ---------- 543 out_file_path : str, pathlib.Path, or s3path.S3Path 544 Path object containing the file location. 545 write_metadata : bool, optional 546 If True, write the metadata. Defaults to True. 547 id_label : str, optional 548 Label of the ID. Defaults to 'corems:'. 549 550 """ 551 552 if isinstance(out_file_path, str): 553 out_file_path = Path(out_file_path) 554 # pickle dataframe (pkl extension) 555 exportMS = LowResGCMSExport(out_file_path, self) 556 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) 557 558 return out_file_path.with_suffix(".pkl")
Export the GC-MS data to a Pandas dataframe.
Parameters
- out_file_path (str, pathlib.Path, or s3path.S3Path): Path object containing the file location.
- write_metadata (bool, optional): If True, write the metadata. Defaults to True.
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
560 def to_dataframe(self, id_label="corems:"): 561 """Export the GC-MS data to a Pandas dataframe. 562 563 Parameters 564 ---------- 565 id_label : str, optional 566 Label of the ID. Defaults to 'corems:'. 567 568 """ 569 570 # returns pandas dataframe 571 exportMS = LowResGCMSExport(self.sample_name, self) 572 return exportMS.get_pandas_df(id_label=id_label)
Export the GC-MS data to a Pandas dataframe.
Parameters
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
574 def processing_stats(self): 575 """Return the processing statistics.""" 576 577 # returns json string 578 exportMS = LowResGCMSExport(self.sample_name, self) 579 return exportMS.get_data_stats(self)
Return the processing statistics.
581 def parameters_json(self, id_label="corems:", output_path=" "): 582 """Return the parameters in JSON format. 583 584 Parameters 585 ---------- 586 id_label : str, optional 587 Label of the ID. Defaults to 'corems:'. 588 output_path : str, optional 589 Path object containing the file location. Defaults to " ". 590 """ 591 592 # returns json string 593 exportMS = LowResGCMSExport(self.sample_name, self) 594 return exportMS.get_parameters_json(self, id_label, output_path)
Return the parameters in JSON format.
Parameters
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
- output_path (str, optional): Path object containing the file location. Defaults to " ".
596 def to_json(self, id_label="corems:"): 597 """Export the GC-MS data to a JSON file. 598 599 Parameters 600 ---------- 601 id_label : str, optional 602 Label of the ID. Defaults to 'corems:'. 603 604 """ 605 606 # returns pandas dataframe 607 exportMS = LowResGCMSExport(self.sample_name, self) 608 return exportMS.get_json(id_label=id_label)
Export the GC-MS data to a JSON file.
Parameters
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
610 def to_hdf(self, id_label="corems:"): 611 """Export the GC-MS data to a HDF file. 612 613 Parameters 614 ---------- 615 id_label : str, optional 616 Label of the ID. Defaults to 'corems:'. 617 618 """ 619 620 # returns pandas dataframe 621 exportMS = LowResGCMSExport(self.sample_name, self) 622 return exportMS.to_hdf(id_label=id_label)
Export the GC-MS data to a HDF file.
Parameters
- id_label (str, optional): Label of the ID. Defaults to 'corems:'.
624 def plot_chromatogram(self, ax=None, color="blue"): # pragma: no cover 625 """Plot the chromatogram. 626 627 Parameters 628 ---------- 629 ax : matplotlib.axes.Axes, optional 630 Axes object to plot the chromatogram. Defaults to None. 631 color : str, optional 632 Color of the chromatogram. Defaults to 'blue'. 633 634 """ 635 636 import matplotlib.pyplot as plt 637 638 if ax is None: 639 ax = plt.gca() 640 641 ax.plot(self.retention_time, self.tic, color=color) 642 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 643 644 return ax
Plot the chromatogram.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the chromatogram. Defaults to None.
- color (str, optional): Color of the chromatogram. Defaults to 'blue'.
646 def plot_smoothed_chromatogram(self, ax=None, color="green"): # pragma: no cover 647 """Plot the smoothed chromatogram. 648 649 Parameters 650 ---------- 651 ax : matplotlib.axes.Axes, optional 652 Axes object to plot the smoothed chromatogram. Defaults to None. 653 color : str, optional 654 Color of the smoothed chromatogram. Defaults to 'green'. 655 656 """ 657 658 import matplotlib.pyplot as plt 659 660 if ax is None: 661 ax = plt.gca() 662 663 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) 664 665 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 666 667 return ax
Plot the smoothed chromatogram.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the smoothed chromatogram. Defaults to None.
- color (str, optional): Color of the smoothed chromatogram. Defaults to 'green'.
669 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover 670 """Plot the detected baseline. 671 672 Parameters 673 ---------- 674 ax : matplotlib.axes.Axes, optional 675 Axes object to plot the detected baseline. Defaults to None. 676 color : str, optional 677 Color of the detected baseline. Defaults to 'blue'. 678 679 """ 680 681 import matplotlib.pyplot as plt 682 683 if ax is None: 684 ax = plt.gca() 685 686 max_height = self.chromatogram_settings.peak_height_max_percent 687 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 688 689 baseline = sp.baseline_detector( 690 self.tic, self.retention_time, max_height, max_prominence 691 ) 692 ax.plot(self.retention_time, color=color) 693 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 694 695 return ax
Plot the detected baseline.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the detected baseline. Defaults to None.
- color (str, optional): Color of the detected baseline. Defaults to 'blue'.
697 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover 698 """Plot the baseline subtraction. 699 700 Parameters 701 ---------- 702 ax : matplotlib.axes.Axes, optional 703 Axes object to plot the baseline subtraction. Defaults to None. 704 color : str, optional 705 Color of the baseline subtraction. Defaults to 'black'. 706 707 """ 708 709 import matplotlib.pyplot as plt 710 711 if ax is None: 712 ax = plt.gca() 713 714 max_height = self.chromatogram_settings.peak_height_max_percent 715 716 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 717 718 x = self.tic + sp.baseline_detector( 719 self.tic, self.retention_time, max_height, max_prominence 720 ) 721 722 ax.plot(self.retention_time, x, color=color) 723 724 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 725 726 return ax
Plot the baseline subtraction.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the baseline subtraction. Defaults to None.
- color (str, optional): Color of the baseline subtraction. Defaults to 'black'.
728 def peaks_rt_tic(self, json_string=False): 729 """Return the peaks, retention time, and total ion chromatogram. 730 731 Parameters 732 ---------- 733 json_string : bool, optional 734 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. 735 736 """ 737 738 peaks_list = dict() 739 740 all_candidates_data = {} 741 742 all_peaks_data = {} 743 744 for gcms_peak in self.sorted_gcpeaks: 745 dict_data = { 746 "rt": gcms_peak.rt_list, 747 "tic": gcms_peak.tic_list, 748 "mz": gcms_peak.mass_spectrum.mz_exp.tolist(), 749 "abundance": gcms_peak.mass_spectrum.abundance.tolist(), 750 "candidate_names": gcms_peak.compound_names, 751 } 752 753 peaks_list[gcms_peak.retention_time] = dict_data 754 755 for compound in gcms_peak: 756 if compound.name not in all_candidates_data.keys(): 757 mz = array(compound.mz).tolist() 758 abundance = array(compound.abundance).tolist() 759 data = {"mz": mz, "abundance": abundance} 760 all_candidates_data[compound.name] = data 761 762 all_peaks_data["peak_data"] = peaks_list 763 all_peaks_data["ref_data"] = all_candidates_data 764 765 if json_string: 766 return json.dumps(all_peaks_data) 767 768 else: 769 return all_peaks_data
Return the peaks, retention time, and total ion chromatogram.
Parameters
- json_string (bool, optional): If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False.
771 def plot_processed_chromatogram(self, ax=None, color="black"): 772 """Plot the processed chromatogram. 773 774 Parameters 775 ---------- 776 ax : matplotlib.axes.Axes, optional 777 Axes object to plot the processed chromatogram. Defaults to None. 778 color : str, optional 779 Color of the processed chromatogram. Defaults to 'black'. 780 781 """ 782 783 import matplotlib.pyplot as plt 784 785 if ax is None: 786 ax = plt.gca() 787 788 ax.plot(self.retention_time, self.processed_tic, color=color) 789 790 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 791 792 return ax
Plot the processed chromatogram.
Parameters
- ax (matplotlib.axes.Axes, optional): Axes object to plot the processed chromatogram. Defaults to None.
- color (str, optional): Color of the processed chromatogram. Defaults to 'black'.