corems.chroma_peak.factory.chroma_peak_classes
1__author__ = "Yuri E. Corilo" 2__date__ = "Jun 12, 2019" 3 4import matplotlib.pyplot as plt 5import numpy as np 6import pandas as pd 7import copy 8 9from corems.chroma_peak.calc.ChromaPeakCalc import ( 10 GCPeakCalculation, 11 LCMSMassFeatureCalculation, 12) 13from corems.mass_spectra.factory.chromat_data import EIC_Data 14from corems.molecular_id.factory.EI_SQL import LowResCompoundRef 15 16 17class ChromaPeakBase: 18 """Base class for chromatographic peak (ChromaPeak) objects. 19 20 Parameters 21 ------- 22 chromatogram_parent : Chromatogram 23 The parent chromatogram object. 24 mass_spectrum_obj : MassSpectrum 25 The mass spectrum object. 26 start_index : int 27 The start index of the peak. 28 index : int 29 The index of the peak. 30 final_index : int 31 The final index of the peak. 32 33 Attributes 34 -------- 35 start_scan : int 36 The start scan of the peak. 37 final_scan : int 38 The final scan of the peak. 39 apex_scan : int 40 The apex scan of the peak. 41 chromatogram_parent : Chromatogram 42 The parent chromatogram object. 43 mass_spectrum : MassSpectrum 44 The mass spectrum object. 45 _area : float 46 The area of the peak. 47 48 Properties 49 -------- 50 * retention_time : float. 51 The retention time of the peak. 52 * tic : float. 53 The total ion current of the peak. 54 * area : float. 55 The area of the peak. 56 * rt_list : list. 57 The list of retention times within the peak. 58 * tic_list : list. 59 The list of total ion currents within the peak. 60 61 Methods 62 -------- 63 * None 64 """ 65 66 def __init__( 67 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 68 ): 69 self.start_scan = start_index 70 self.final_scan = final_index 71 self.apex_scan = int(index) 72 self.chromatogram_parent = chromatogram_parent 73 self.mass_spectrum = mass_spectrum_obj 74 self._area = None 75 76 @property 77 def retention_time(self): 78 """Retention Time""" 79 return self.mass_spectrum.retention_time 80 81 @property 82 def tic(self): 83 """Total Ion Current""" 84 return self.mass_spectrum.tic 85 86 @property 87 def area(self): 88 """Peak Area""" 89 return self._area 90 91 @property 92 def rt_list(self): 93 """Retention Time List""" 94 return [ 95 self.chromatogram_parent.retention_time[i] 96 for i in range(self.start_scan, self.final_scan + 1) 97 ] 98 99 @property 100 def tic_list(self): 101 """Total Ion Current List""" 102 return [ 103 self.chromatogram_parent.tic[i] 104 for i in range(self.start_scan, self.final_scan + 1) 105 ] 106 107 108class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): 109 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. 110 111 Parameters 112 ------- 113 lcms_parent : LCMS 114 The parent LCMSBase object. 115 mz : float 116 The observed mass to charge ratio of the feature. 117 retention_time : float 118 The retention time of the feature (in minutes), at the apex. 119 intensity : float 120 The intensity of the feature. 121 apex_scan : int 122 The scan number of the apex of the feature. 123 persistence : float, optional 124 The persistence of the feature. Default is None. 125 126 Attributes 127 -------- 128 _mz_exp : float 129 The observed mass to charge ratio of the feature. 130 _mz_cal : float 131 The calibrated mass to charge ratio of the feature. 132 _retention_time : float 133 The retention time of the feature (in minutes), at the apex. 134 _apex_scan : int 135 The scan number of the apex of the feature. 136 _intensity : float 137 The intensity of the feature. 138 _persistence : float 139 The persistence of the feature. 140 _eic_data : EIC_Data 141 The EIC data object associated with the feature. 142 _dispersity_index : float 143 The dispersity index of the feature, in minutes. 144 _normalized_dispersity_index : float 145 The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index). 146 _half_height_width : numpy.ndarray 147 The half height width of the feature (in minutes, as an array of min and max values). 148 _tailing_factor : float 149 The tailing factor of the feature. 150 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. 151 _noise_score : tuple 152 The noise score of the feature, as a tuple of (left, right) scores. 153 Each score is a float, with higher values indicating better signal to noise. 154 _gaussian_similarity : float 155 The Gaussian similarity of the feature, as a float between 0 and 1. 156 1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape. 157 _ms_deconvoluted_idx : [int] 158 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. 159 is_calibrated : bool 160 If True, the feature has been calibrated. Default is False. 161 monoisotopic_mf_id : int 162 Mass feature id that is the monoisotopic version of self. 163 If self.id, then self is the monoisotopic feature). Default is None. 164 isotopologue_type : str 165 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. 166 Default is None. 167 ms2_scan_numbers : list 168 List of scan numbers of the MS2 spectra associated with the feature. 169 Default is an empty list. 170 ms2_mass_spectra : dict 171 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). 172 Default is an empty dictionary. 173 ms2_similarity_results : list 174 List of MS2 similarity results associated with the mass feature. 175 Default is an empty list. 176 id : int 177 The ID of the feature, also the key in the parent LCMS object's 178 `mass_features` dictionary. 179 mass_spectrum_deconvoluted_parent : bool 180 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. 181 associated_mass_features_deconvoluted : list 182 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. 183 184 """ 185 186 def __init__( 187 self, 188 lcms_parent, 189 mz: float, 190 retention_time: float, 191 intensity: float, 192 apex_scan: int, 193 persistence: float = None, 194 id: int = None, 195 ): 196 super().__init__( 197 chromatogram_parent=lcms_parent, 198 mass_spectrum_obj=None, 199 start_index=None, 200 index=apex_scan, 201 final_index=None, 202 ) 203 # Core attributes, marked as private 204 self._mz_exp: float = mz 205 self._mz_cal: float = None 206 self._retention_time: float = retention_time 207 self._apex_scan: int = apex_scan 208 self._intensity: float = intensity 209 self._persistence: float = persistence 210 self._eic_data: EIC_Data = None 211 self._dispersity_index: float = None 212 self._normalized_dispersity_index: float = None 213 self._half_height_width: np.ndarray = None 214 self._ms_deconvoluted_idx = None 215 self._tailing_factor: float = None 216 self._noise_score: tuple = None 217 self._gaussian_similarity: float = None 218 219 # Additional attributes 220 self.monoisotopic_mf_id = None 221 self.isotopologue_type = None 222 self.ms2_scan_numbers = [] 223 self.ms2_mass_spectra = {} 224 self.ms2_similarity_results = [] 225 self.mass_spectrum_deconvoluted_parent: bool = None 226 self.associated_mass_features_deconvoluted = [] 227 228 if id: 229 self.id = id 230 else: 231 # get the parent's mass feature keys and add 1 to the max value to get the new key 232 self.id = ( 233 max(lcms_parent.mass_features.keys()) + 1 234 if lcms_parent.mass_features.keys() 235 else 0 236 ) 237 238 def update_mz(self): 239 """Update the mass to charge ratio from the mass spectrum object.""" 240 if self.mass_spectrum is None: 241 raise ValueError( 242 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 243 ) 244 if len(self.mass_spectrum.mz_exp) == 0: 245 raise ValueError( 246 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 247 ) 248 new_mz = self.ms1_peak.mz_exp 249 250 # calculate the difference between the new and old m/z, only update if it is close 251 mz_diff = new_mz - self.mz 252 if abs(mz_diff) < 0.01: 253 self._mz_exp = new_mz 254 255 def plot( 256 self, 257 to_plot=["EIC", "MS1", "MS2"], 258 return_fig=True, 259 plot_smoothed_eic=False, 260 plot_eic_datapoints=False, 261 ): 262 """Plot the mass feature. 263 264 Parameters 265 ---------- 266 to_plot : list, optional 267 List of strings specifying what to plot, any iteration of 268 "EIC", "MS2", and "MS1". 269 Default is ["EIC", "MS1", "MS2"]. 270 return_fig : bool, optional 271 If True, the figure is returned. Default is True. 272 plot_smoothed_eic : bool, optional 273 If True, the smoothed EIC is plotted. Default is False. 274 plot_eic_datapoints : bool, optional 275 If True, the EIC data points are plotted. Default is False. 276 277 Returns 278 ------- 279 matplotlib.figure.Figure or None 280 The figure object if `return_fig` is True. 281 Otherwise None and the figure is displayed. 282 """ 283 284 # EIC plot preparation 285 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 286 287 # Adjust to_plot list if there are not spectra added to the mass features 288 if self.mass_spectrum is None: 289 to_plot = [x for x in to_plot if x != "MS1"] 290 if len(self.ms2_mass_spectra) == 0: 291 to_plot = [x for x in to_plot if x != "MS2"] 292 if self._eic_data is None: 293 to_plot = [x for x in to_plot if x != "EIC"] 294 if self._ms_deconvoluted_idx is not None: 295 deconvoluted = True 296 else: 297 deconvoluted = False 298 299 fig, axs = plt.subplots( 300 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 301 ) 302 fig.suptitle( 303 "Mass Feature " 304 + str(self.id) 305 + ": m/z = " 306 + str(round(self.mz, ndigits=4)) 307 + "; time = " 308 + str(round(self.retention_time, ndigits=1)) 309 + " minutes" 310 ) 311 312 i = 0 313 # EIC plot 314 if "EIC" in to_plot: 315 if self._eic_data is None: 316 raise ValueError( 317 "EIC data is not available, cannot plot the mass feature's EIC" 318 ) 319 axs[i][0].set_title("EIC", loc="left") 320 axs[i][0].plot( 321 self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC" 322 ) 323 if plot_eic_datapoints: 324 axs[i][0].scatter( 325 self._eic_data.time, 326 self._eic_data.eic, 327 c="tab:blue", 328 label="EIC Data Points", 329 ) 330 if plot_smoothed_eic: 331 axs[i][0].plot( 332 self._eic_data.time, 333 self._eic_data.eic_smoothed, 334 c="tab:red", 335 label="Smoothed EIC", 336 ) 337 if self.start_scan is not None: 338 axs[i][0].fill_between( 339 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 340 ) 341 else: 342 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 343 print( 344 "No start and final scan numbers were provided for mass feature " 345 + str(self.id) 346 ) 347 axs[i][0].set_ylabel("Intensity") 348 axs[i][0].set_xlabel("Time (minutes)") 349 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 350 axs[i][0].set_xlim( 351 self.retention_time - eic_buffer_time, 352 self.retention_time + eic_buffer_time, 353 ) 354 axs[i][0].axvline( 355 x=self.retention_time, color="k", label="MS1 scan time (apex)" 356 ) 357 if len(self.ms2_scan_numbers) > 0: 358 axs[i][0].axvline( 359 x=self.chromatogram_parent.get_time_of_scan_id( 360 self.best_ms2.scan_number 361 ), 362 color="grey", 363 linestyle="--", 364 label="MS2 scan time", 365 ) 366 axs[i][0].legend(loc="upper left") 367 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 368 i += 1 369 370 # MS1 plot 371 if "MS1" in to_plot: 372 if deconvoluted: 373 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 374 axs[i][0].vlines( 375 self.mass_spectrum.mz_exp, 376 0, 377 self.mass_spectrum.abundance, 378 color="k", 379 alpha=0.2, 380 label="Raw MS1", 381 ) 382 axs[i][0].vlines( 383 self.mass_spectrum_deconvoluted.mz_exp, 384 0, 385 self.mass_spectrum_deconvoluted.abundance, 386 color="k", 387 label="Deconvoluted MS1", 388 ) 389 axs[i][0].set_xlim( 390 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 391 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 392 ) 393 axs[i][0].set_ylim( 394 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 395 ) 396 else: 397 axs[i][0].set_title("MS1 (raw)", loc="left") 398 axs[i][0].vlines( 399 self.mass_spectrum.mz_exp, 400 0, 401 self.mass_spectrum.abundance, 402 color="k", 403 label="Raw MS1", 404 ) 405 axs[i][0].set_xlim( 406 self.mass_spectrum.mz_exp.min() * 0.8, 407 self.mass_spectrum.mz_exp.max() * 1.1, 408 ) 409 axs[i][0].set_ylim(bottom=0) 410 411 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 412 axs[i][0].vlines( 413 self.ms1_peak.mz_exp, 414 0, 415 self.ms1_peak.abundance, 416 color="m", 417 label="Feature m/z", 418 ) 419 420 else: 421 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 422 print( 423 "The m/z of the mass feature " 424 + str(self.id) 425 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 426 ) 427 axs[i][0].legend(loc="upper left") 428 axs[i][0].set_ylabel("Intensity") 429 axs[i][0].set_xlabel("m/z") 430 axs[i][0].yaxis.set_tick_params(labelleft=False) 431 i += 1 432 433 # MS2 plot 434 if "MS2" in to_plot: 435 axs[i][0].set_title("MS2", loc="left") 436 axs[i][0].vlines( 437 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 438 ) 439 axs[i][0].set_ylabel("Intensity") 440 axs[i][0].set_xlabel("m/z") 441 axs[i][0].set_ylim(bottom=0) 442 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 443 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 444 axs[i][0].set_xlim( 445 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 446 ) 447 axs[i][0].yaxis.set_tick_params(labelleft=False) 448 449 # Add space between subplots 450 plt.tight_layout() 451 452 if return_fig: 453 # Close figure 454 plt.close(fig) 455 return fig 456 457 @property 458 def mz(self): 459 """Mass to charge ratio of the mass feature""" 460 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z 461 if self._mz_cal is not None: 462 return self._mz_cal 463 else: 464 return self._mz_exp 465 466 @property 467 def mass_spectrum_deconvoluted(self): 468 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" 469 if self._ms_deconvoluted_idx is not None: 470 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) 471 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) 472 return ms_deconvoluted 473 else: 474 raise ValueError( 475 "Deconvolution has not been performed for mass feature " + str(self.id) 476 ) 477 478 @property 479 def retention_time(self): 480 """Retention time of the mass feature""" 481 return self._retention_time 482 483 @retention_time.setter 484 def retention_time(self, value): 485 """Set the retention time of the mass feature""" 486 if not isinstance(value, float): 487 raise ValueError("The retention time of the mass feature must be a float") 488 self._retention_time = value 489 490 @property 491 def apex_scan(self): 492 """Apex scan of the mass feature""" 493 return self._apex_scan 494 495 @apex_scan.setter 496 def apex_scan(self, value): 497 """Set the apex scan of the mass feature""" 498 if not isinstance(value, int): 499 raise ValueError("The apex scan of the mass feature must be an integer") 500 self._apex_scan = value 501 502 @property 503 def intensity(self): 504 """Intensity of the mass feature""" 505 return self._intensity 506 507 @intensity.setter 508 def intensity(self, value): 509 """Set the intensity of the mass feature""" 510 if not isinstance(value, float): 511 raise ValueError("The intensity of the mass feature must be a float") 512 self._intensity = value 513 514 @property 515 def persistence(self): 516 """Persistence of the mass feature""" 517 return self._persistence 518 519 @persistence.setter 520 def persistence(self, value): 521 """Set the persistence of the mass feature""" 522 if not isinstance(value, float): 523 raise ValueError("The persistence of the mass feature must be a float") 524 self._persistence = value 525 526 @property 527 def eic_rt_list(self): 528 """Retention time list between the beginning and end of the mass feature""" 529 # Find index of the start and final scans in the EIC data 530 start_index = self._eic_data.scans.tolist().index(self.start_scan) 531 final_index = self._eic_data.scans.tolist().index(self.final_scan) 532 533 # Get the retention time list 534 rt_list = self._eic_data.time[start_index : final_index + 1] 535 return rt_list 536 537 @property 538 def eic_list(self): 539 """EIC List between the beginning and end of the mass feature""" 540 # Find index of the start and final scans in the EIC data 541 start_index = self._eic_data.scans.tolist().index(self.start_scan) 542 final_index = self._eic_data.scans.tolist().index(self.final_scan) 543 544 # Get the retention time list 545 eic = self._eic_data.eic[start_index : final_index + 1] 546 return eic 547 548 @property 549 def ms1_peak(self): 550 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" 551 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz 552 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) 553 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) 554 555 return self.mass_spectrum._mspeaks[closest_mz_index] 556 557 @property 558 def tailing_factor(self): 559 """Tailing factor of the mass feature""" 560 return self._tailing_factor 561 562 @tailing_factor.setter 563 def tailing_factor(self, value): 564 """Set the tailing factor of the mass feature""" 565 if not isinstance(value, float): 566 raise ValueError("The tailing factor of the mass feature must be a float") 567 self._tailing_factor = value 568 569 @property 570 def dispersity_index(self): 571 """Dispersity index of the mass feature""" 572 return self._dispersity_index 573 574 @dispersity_index.setter 575 def dispersity_index(self, value): 576 """Set the dispersity index of the mass feature""" 577 if not isinstance(value, float): 578 raise ValueError("The dispersity index of the mass feature must be a float") 579 self._dispersity_index = value 580 581 @property 582 def normalized_dispersity_index(self): 583 """Normalized dispersity index of the mass feature, unitless (fraction of total window used)""" 584 return self._normalized_dispersity_index 585 586 @property 587 def half_height_width(self): 588 """Half height width of the mass feature, average of min and max values, in minutes""" 589 return np.mean(self._half_height_width) 590 591 @property 592 def noise_score(self): 593 """Mean of left and right noise scores. 594 595 Returns 596 ------- 597 float or np.nan 598 Mean noise score, or np.nan if both sides are np.nan. 599 """ 600 if self._noise_score is None: 601 return np.nan 602 603 left, right = self._noise_score 604 # Handle NaN values 605 if np.isnan(left) and np.isnan(right): 606 return np.nan 607 elif np.isnan(left): 608 return right 609 elif np.isnan(right): 610 return left 611 else: 612 return (left + right) / 2.0 613 614 @property 615 def noise_score_min(self): 616 """Minimum of left and right noise scores. 617 618 Returns 619 ------- 620 float or np.nan 621 Minimum noise score, or np.nan if both sides are np.nan. 622 """ 623 if self._noise_score is None: 624 return np.nan 625 626 left, right = self._noise_score 627 # Handle NaN values - nanmin ignores NaN 628 return np.nanmin([left, right]) 629 630 @property 631 def noise_score_max(self): 632 """Maximum of left and right noise scores. 633 634 Returns 635 ------- 636 float or np.nan 637 Maximum noise score, or np.nan if both sides are np.nan. 638 """ 639 if self._noise_score is None: 640 return np.nan 641 642 left, right = self._noise_score 643 # Handle NaN values - nanmax ignores NaN 644 return np.nanmax([left, right]) 645 646 @property 647 def best_ms2(self): 648 """Points to the best representative MS2 mass spectrum 649 650 Notes 651 ----- 652 If there is only one MS2 mass spectrum, it will be returned 653 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. 654 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. 655 656 Returns 657 ------- 658 MassSpectrum or None 659 The best MS2 mass spectrum. 660 """ 661 if len(self.ms2_similarity_results) > 0: 662 # the scan number with the highest similarity score 663 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] 664 results_df = pd.concat(results_df) 665 results_df = results_df.sort_values( 666 by="entropy_similarity", ascending=False 667 ) 668 best_scan_number = results_df.iloc[0]["query_spectrum_id"] 669 return self.ms2_mass_spectra[best_scan_number] 670 671 ms2_scans = list(self.ms2_mass_spectra.keys()) 672 if len(ms2_scans) > 1: 673 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan 674 res_list = [] # List of maximum resolving power of peaks in each scan 675 time_diff_list = [] # List of time difference between scan and apex scan in each scan 676 for scan in ms2_scans: 677 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: 678 # Find mz closest to mass feature mz, return both the difference in mass and its resolution 679 closest_mz = min( 680 self.ms2_mass_spectra[scan].mz_exp, 681 key=lambda x: abs(x - self.mz), 682 ) 683 if all( 684 np.isnan(self.ms2_mass_spectra[scan].resolving_power) 685 ): # All NA for resolving power in peaks, not uncommon in CID spectra 686 res_list.append(2) # Assumes very low resolving power 687 else: 688 res_list.append( 689 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) 690 ) 691 mz_diff_list.append(np.abs(closest_mz - self.mz)) 692 time_diff_list.append( 693 np.abs( 694 self.chromatogram_parent.get_time_of_scan_id(scan) 695 - self.retention_time 696 ) 697 ) 698 else: 699 res_list.append(np.nan) 700 mz_diff_list.append(np.nan) 701 time_diff_list.append(np.nan) 702 # Convert diff_lists into logical scores (higher is better for each score) 703 time_score = 1 - np.array(time_diff_list) / np.nanmax( 704 np.array(time_diff_list) 705 ) 706 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) 707 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) 708 mz_score = np.zeros(len(ms2_scans)) 709 for i in np.arange(0, len(ms2_scans)): 710 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric 711 mz_score[i] = 0 712 else: 713 mz_score[i] = 1 714 # get the index of the best score and return the mass spectrum 715 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: 716 return self.ms2_mass_spectra[ 717 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] 718 ] 719 # remove the mz_score condition and try again 720 elif len(np.argmax(time_score * res_score)) == 1: 721 return self.ms2_mass_spectra[ 722 ms2_scans[np.nanargmax(time_score * res_score)] 723 ] 724 else: 725 raise ValueError( 726 "No best MS2 mass spectrum could be found for mass feature " 727 + str(self.id) 728 ) 729 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it 730 return self.ms2_mass_spectra[ms2_scans[0]] 731 else: # if no ms2 spectra, return None 732 return None 733 734 735class GCPeak(ChromaPeakBase, GCPeakCalculation): 736 """Class representing a peak in a gas chromatography (GC) chromatogram. 737 738 Parameters 739 ---------- 740 chromatogram_parent : Chromatogram 741 The parent chromatogram object. 742 mass_spectrum_obj : MassSpectrum 743 The mass spectrum object associated with the peak. 744 indexes : tuple 745 The indexes of the peak in the chromatogram. 746 747 Attributes 748 ---------- 749 _compounds : list 750 List of compounds associated with the peak. 751 _ri : float or None 752 Retention index of the peak. 753 754 Methods 755 ------- 756 * __len__(). Returns the number of compounds associated with the peak. 757 * __getitem__(position). Returns the compound at the specified position. 758 * remove_compound(compounds_obj). Removes the specified compound from the peak. 759 * clear_compounds(). Removes all compounds from the peak. 760 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. 761 * ri(). Returns the retention index of the peak. 762 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. 763 * highest_score_compound(). Returns the compound with the highest similarity score. 764 * compound_names(). Returns a list of names of compounds associated with the peak. 765 """ 766 767 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): 768 self._compounds = [] 769 self._ri = None 770 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) 771 772 def __len__(self): 773 return len(self._compounds) 774 775 def __getitem__(self, position): 776 return self._compounds[position] 777 778 def remove_compound(self, compounds_obj): 779 self._compounds.remove(compounds_obj) 780 781 def clear_compounds(self): 782 self._compounds = [] 783 784 def add_compound( 785 self, 786 compounds_dict, 787 spectral_similarity_scores, 788 ri_score=None, 789 similarity_score=None, 790 ): 791 """Adds a compound to the peak with the specified attributes. 792 793 Parameters 794 ---------- 795 compounds_dict : dict 796 Dictionary containing the compound information. 797 spectral_similarity_scores : dict 798 Dictionary containing the spectral similarity scores. 799 ri_score : float or None, optional 800 The retention index score of the compound. Default is None. 801 similarity_score : float or None, optional 802 The similarity score of the compound. Default is None. 803 """ 804 compound_obj = LowResCompoundRef(compounds_dict) 805 compound_obj.spectral_similarity_scores = spectral_similarity_scores 806 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 807 "cosine_correlation" 808 ) 809 # TODO check is the above line correct? 810 compound_obj.ri_score = ri_score 811 compound_obj.similarity_score = similarity_score 812 self._compounds.append(compound_obj) 813 if similarity_score: 814 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 815 else: 816 self._compounds.sort( 817 key=lambda c: c.spectral_similarity_score, reverse=True 818 ) 819 820 @property 821 def ri(self): 822 """Returns the retention index of the peak. 823 824 Returns 825 ------- 826 float or None 827 The retention index of the peak. 828 """ 829 return self._ri 830 831 @property 832 def highest_ss_compound(self): 833 """Returns the compound with the highest spectral similarity score. 834 835 Returns 836 ------- 837 LowResCompoundRef or None 838 The compound with the highest spectral similarity score. 839 """ 840 if self: 841 return max(self, key=lambda c: c.spectral_similarity_score) 842 else: 843 return None 844 845 @property 846 def highest_score_compound(self): 847 """Returns the compound with the highest similarity score. 848 849 Returns 850 ------- 851 LowResCompoundRef or None 852 The compound with the highest similarity score. 853 """ 854 if self: 855 return max(self, key=lambda c: c.similarity_score) 856 else: 857 return None 858 859 @property 860 def compound_names(self): 861 """Returns a list of names of compounds associated with the peak. 862 863 Returns 864 ------- 865 list 866 List of names of compounds associated with the peak. 867 """ 868 if self: 869 return [c.name for c in self] 870 else: 871 return [] 872 873 874class GCPeakDeconvolved(GCPeak): 875 """Represents a deconvolved peak in a chromatogram. 876 877 Parameters 878 ---------- 879 chromatogram_parent : Chromatogram 880 The parent chromatogram object. 881 mass_spectra : list 882 List of mass spectra associated with the peak. 883 apex_index : int 884 Index of the apex mass spectrum in the `mass_spectra` list. 885 rt_list : list 886 List of retention times. 887 tic_list : list 888 List of total ion currents. 889 """ 890 891 def __init__( 892 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 893 ): 894 self._ri = None 895 self._rt_list = list(rt_list) 896 self._tic_list = list(tic_list) 897 self.mass_spectra = list(mass_spectra) 898 super().__init__( 899 chromatogram_parent, 900 self.mass_spectra[apex_index], 901 (0, apex_index, len(self.mass_spectra) - 1), 902 ) 903 904 @property 905 def rt_list(self): 906 """Get the list of retention times. 907 908 Returns 909 ------- 910 list 911 The list of retention times. 912 """ 913 return self._rt_list 914 915 @property 916 def tic_list(self): 917 """Get the list of total ion currents. 918 919 Returns 920 ------- 921 list 922 The list of total ion currents. 923 """ 924 return self._tic_list
18class ChromaPeakBase: 19 """Base class for chromatographic peak (ChromaPeak) objects. 20 21 Parameters 22 ------- 23 chromatogram_parent : Chromatogram 24 The parent chromatogram object. 25 mass_spectrum_obj : MassSpectrum 26 The mass spectrum object. 27 start_index : int 28 The start index of the peak. 29 index : int 30 The index of the peak. 31 final_index : int 32 The final index of the peak. 33 34 Attributes 35 -------- 36 start_scan : int 37 The start scan of the peak. 38 final_scan : int 39 The final scan of the peak. 40 apex_scan : int 41 The apex scan of the peak. 42 chromatogram_parent : Chromatogram 43 The parent chromatogram object. 44 mass_spectrum : MassSpectrum 45 The mass spectrum object. 46 _area : float 47 The area of the peak. 48 49 Properties 50 -------- 51 * retention_time : float. 52 The retention time of the peak. 53 * tic : float. 54 The total ion current of the peak. 55 * area : float. 56 The area of the peak. 57 * rt_list : list. 58 The list of retention times within the peak. 59 * tic_list : list. 60 The list of total ion currents within the peak. 61 62 Methods 63 -------- 64 * None 65 """ 66 67 def __init__( 68 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 69 ): 70 self.start_scan = start_index 71 self.final_scan = final_index 72 self.apex_scan = int(index) 73 self.chromatogram_parent = chromatogram_parent 74 self.mass_spectrum = mass_spectrum_obj 75 self._area = None 76 77 @property 78 def retention_time(self): 79 """Retention Time""" 80 return self.mass_spectrum.retention_time 81 82 @property 83 def tic(self): 84 """Total Ion Current""" 85 return self.mass_spectrum.tic 86 87 @property 88 def area(self): 89 """Peak Area""" 90 return self._area 91 92 @property 93 def rt_list(self): 94 """Retention Time List""" 95 return [ 96 self.chromatogram_parent.retention_time[i] 97 for i in range(self.start_scan, self.final_scan + 1) 98 ] 99 100 @property 101 def tic_list(self): 102 """Total Ion Current List""" 103 return [ 104 self.chromatogram_parent.tic[i] 105 for i in range(self.start_scan, self.final_scan + 1) 106 ]
Base class for chromatographic peak (ChromaPeak) objects.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum_obj (MassSpectrum): The mass spectrum object.
- start_index (int): The start index of the peak.
- index (int): The index of the peak.
- final_index (int): The final index of the peak.
Attributes
- start_scan (int): The start scan of the peak.
- final_scan (int): The final scan of the peak.
- apex_scan (int): The apex scan of the peak.
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum (MassSpectrum): The mass spectrum object.
- _area (float): The area of the peak.
Properties
- retention_time : float. The retention time of the peak.
- tic : float. The total ion current of the peak.
- area : float. The area of the peak.
- rt_list : list. The list of retention times within the peak.
- tic_list : list. The list of total ion currents within the peak.
Methods
- None
67 def __init__( 68 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 69 ): 70 self.start_scan = start_index 71 self.final_scan = final_index 72 self.apex_scan = int(index) 73 self.chromatogram_parent = chromatogram_parent 74 self.mass_spectrum = mass_spectrum_obj 75 self._area = None
109class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): 110 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. 111 112 Parameters 113 ------- 114 lcms_parent : LCMS 115 The parent LCMSBase object. 116 mz : float 117 The observed mass to charge ratio of the feature. 118 retention_time : float 119 The retention time of the feature (in minutes), at the apex. 120 intensity : float 121 The intensity of the feature. 122 apex_scan : int 123 The scan number of the apex of the feature. 124 persistence : float, optional 125 The persistence of the feature. Default is None. 126 127 Attributes 128 -------- 129 _mz_exp : float 130 The observed mass to charge ratio of the feature. 131 _mz_cal : float 132 The calibrated mass to charge ratio of the feature. 133 _retention_time : float 134 The retention time of the feature (in minutes), at the apex. 135 _apex_scan : int 136 The scan number of the apex of the feature. 137 _intensity : float 138 The intensity of the feature. 139 _persistence : float 140 The persistence of the feature. 141 _eic_data : EIC_Data 142 The EIC data object associated with the feature. 143 _dispersity_index : float 144 The dispersity index of the feature, in minutes. 145 _normalized_dispersity_index : float 146 The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index). 147 _half_height_width : numpy.ndarray 148 The half height width of the feature (in minutes, as an array of min and max values). 149 _tailing_factor : float 150 The tailing factor of the feature. 151 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. 152 _noise_score : tuple 153 The noise score of the feature, as a tuple of (left, right) scores. 154 Each score is a float, with higher values indicating better signal to noise. 155 _gaussian_similarity : float 156 The Gaussian similarity of the feature, as a float between 0 and 1. 157 1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape. 158 _ms_deconvoluted_idx : [int] 159 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. 160 is_calibrated : bool 161 If True, the feature has been calibrated. Default is False. 162 monoisotopic_mf_id : int 163 Mass feature id that is the monoisotopic version of self. 164 If self.id, then self is the monoisotopic feature). Default is None. 165 isotopologue_type : str 166 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. 167 Default is None. 168 ms2_scan_numbers : list 169 List of scan numbers of the MS2 spectra associated with the feature. 170 Default is an empty list. 171 ms2_mass_spectra : dict 172 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). 173 Default is an empty dictionary. 174 ms2_similarity_results : list 175 List of MS2 similarity results associated with the mass feature. 176 Default is an empty list. 177 id : int 178 The ID of the feature, also the key in the parent LCMS object's 179 `mass_features` dictionary. 180 mass_spectrum_deconvoluted_parent : bool 181 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. 182 associated_mass_features_deconvoluted : list 183 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. 184 185 """ 186 187 def __init__( 188 self, 189 lcms_parent, 190 mz: float, 191 retention_time: float, 192 intensity: float, 193 apex_scan: int, 194 persistence: float = None, 195 id: int = None, 196 ): 197 super().__init__( 198 chromatogram_parent=lcms_parent, 199 mass_spectrum_obj=None, 200 start_index=None, 201 index=apex_scan, 202 final_index=None, 203 ) 204 # Core attributes, marked as private 205 self._mz_exp: float = mz 206 self._mz_cal: float = None 207 self._retention_time: float = retention_time 208 self._apex_scan: int = apex_scan 209 self._intensity: float = intensity 210 self._persistence: float = persistence 211 self._eic_data: EIC_Data = None 212 self._dispersity_index: float = None 213 self._normalized_dispersity_index: float = None 214 self._half_height_width: np.ndarray = None 215 self._ms_deconvoluted_idx = None 216 self._tailing_factor: float = None 217 self._noise_score: tuple = None 218 self._gaussian_similarity: float = None 219 220 # Additional attributes 221 self.monoisotopic_mf_id = None 222 self.isotopologue_type = None 223 self.ms2_scan_numbers = [] 224 self.ms2_mass_spectra = {} 225 self.ms2_similarity_results = [] 226 self.mass_spectrum_deconvoluted_parent: bool = None 227 self.associated_mass_features_deconvoluted = [] 228 229 if id: 230 self.id = id 231 else: 232 # get the parent's mass feature keys and add 1 to the max value to get the new key 233 self.id = ( 234 max(lcms_parent.mass_features.keys()) + 1 235 if lcms_parent.mass_features.keys() 236 else 0 237 ) 238 239 def update_mz(self): 240 """Update the mass to charge ratio from the mass spectrum object.""" 241 if self.mass_spectrum is None: 242 raise ValueError( 243 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 244 ) 245 if len(self.mass_spectrum.mz_exp) == 0: 246 raise ValueError( 247 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 248 ) 249 new_mz = self.ms1_peak.mz_exp 250 251 # calculate the difference between the new and old m/z, only update if it is close 252 mz_diff = new_mz - self.mz 253 if abs(mz_diff) < 0.01: 254 self._mz_exp = new_mz 255 256 def plot( 257 self, 258 to_plot=["EIC", "MS1", "MS2"], 259 return_fig=True, 260 plot_smoothed_eic=False, 261 plot_eic_datapoints=False, 262 ): 263 """Plot the mass feature. 264 265 Parameters 266 ---------- 267 to_plot : list, optional 268 List of strings specifying what to plot, any iteration of 269 "EIC", "MS2", and "MS1". 270 Default is ["EIC", "MS1", "MS2"]. 271 return_fig : bool, optional 272 If True, the figure is returned. Default is True. 273 plot_smoothed_eic : bool, optional 274 If True, the smoothed EIC is plotted. Default is False. 275 plot_eic_datapoints : bool, optional 276 If True, the EIC data points are plotted. Default is False. 277 278 Returns 279 ------- 280 matplotlib.figure.Figure or None 281 The figure object if `return_fig` is True. 282 Otherwise None and the figure is displayed. 283 """ 284 285 # EIC plot preparation 286 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 287 288 # Adjust to_plot list if there are not spectra added to the mass features 289 if self.mass_spectrum is None: 290 to_plot = [x for x in to_plot if x != "MS1"] 291 if len(self.ms2_mass_spectra) == 0: 292 to_plot = [x for x in to_plot if x != "MS2"] 293 if self._eic_data is None: 294 to_plot = [x for x in to_plot if x != "EIC"] 295 if self._ms_deconvoluted_idx is not None: 296 deconvoluted = True 297 else: 298 deconvoluted = False 299 300 fig, axs = plt.subplots( 301 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 302 ) 303 fig.suptitle( 304 "Mass Feature " 305 + str(self.id) 306 + ": m/z = " 307 + str(round(self.mz, ndigits=4)) 308 + "; time = " 309 + str(round(self.retention_time, ndigits=1)) 310 + " minutes" 311 ) 312 313 i = 0 314 # EIC plot 315 if "EIC" in to_plot: 316 if self._eic_data is None: 317 raise ValueError( 318 "EIC data is not available, cannot plot the mass feature's EIC" 319 ) 320 axs[i][0].set_title("EIC", loc="left") 321 axs[i][0].plot( 322 self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC" 323 ) 324 if plot_eic_datapoints: 325 axs[i][0].scatter( 326 self._eic_data.time, 327 self._eic_data.eic, 328 c="tab:blue", 329 label="EIC Data Points", 330 ) 331 if plot_smoothed_eic: 332 axs[i][0].plot( 333 self._eic_data.time, 334 self._eic_data.eic_smoothed, 335 c="tab:red", 336 label="Smoothed EIC", 337 ) 338 if self.start_scan is not None: 339 axs[i][0].fill_between( 340 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 341 ) 342 else: 343 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 344 print( 345 "No start and final scan numbers were provided for mass feature " 346 + str(self.id) 347 ) 348 axs[i][0].set_ylabel("Intensity") 349 axs[i][0].set_xlabel("Time (minutes)") 350 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 351 axs[i][0].set_xlim( 352 self.retention_time - eic_buffer_time, 353 self.retention_time + eic_buffer_time, 354 ) 355 axs[i][0].axvline( 356 x=self.retention_time, color="k", label="MS1 scan time (apex)" 357 ) 358 if len(self.ms2_scan_numbers) > 0: 359 axs[i][0].axvline( 360 x=self.chromatogram_parent.get_time_of_scan_id( 361 self.best_ms2.scan_number 362 ), 363 color="grey", 364 linestyle="--", 365 label="MS2 scan time", 366 ) 367 axs[i][0].legend(loc="upper left") 368 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 369 i += 1 370 371 # MS1 plot 372 if "MS1" in to_plot: 373 if deconvoluted: 374 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 375 axs[i][0].vlines( 376 self.mass_spectrum.mz_exp, 377 0, 378 self.mass_spectrum.abundance, 379 color="k", 380 alpha=0.2, 381 label="Raw MS1", 382 ) 383 axs[i][0].vlines( 384 self.mass_spectrum_deconvoluted.mz_exp, 385 0, 386 self.mass_spectrum_deconvoluted.abundance, 387 color="k", 388 label="Deconvoluted MS1", 389 ) 390 axs[i][0].set_xlim( 391 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 392 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 393 ) 394 axs[i][0].set_ylim( 395 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 396 ) 397 else: 398 axs[i][0].set_title("MS1 (raw)", loc="left") 399 axs[i][0].vlines( 400 self.mass_spectrum.mz_exp, 401 0, 402 self.mass_spectrum.abundance, 403 color="k", 404 label="Raw MS1", 405 ) 406 axs[i][0].set_xlim( 407 self.mass_spectrum.mz_exp.min() * 0.8, 408 self.mass_spectrum.mz_exp.max() * 1.1, 409 ) 410 axs[i][0].set_ylim(bottom=0) 411 412 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 413 axs[i][0].vlines( 414 self.ms1_peak.mz_exp, 415 0, 416 self.ms1_peak.abundance, 417 color="m", 418 label="Feature m/z", 419 ) 420 421 else: 422 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 423 print( 424 "The m/z of the mass feature " 425 + str(self.id) 426 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 427 ) 428 axs[i][0].legend(loc="upper left") 429 axs[i][0].set_ylabel("Intensity") 430 axs[i][0].set_xlabel("m/z") 431 axs[i][0].yaxis.set_tick_params(labelleft=False) 432 i += 1 433 434 # MS2 plot 435 if "MS2" in to_plot: 436 axs[i][0].set_title("MS2", loc="left") 437 axs[i][0].vlines( 438 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 439 ) 440 axs[i][0].set_ylabel("Intensity") 441 axs[i][0].set_xlabel("m/z") 442 axs[i][0].set_ylim(bottom=0) 443 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 444 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 445 axs[i][0].set_xlim( 446 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 447 ) 448 axs[i][0].yaxis.set_tick_params(labelleft=False) 449 450 # Add space between subplots 451 plt.tight_layout() 452 453 if return_fig: 454 # Close figure 455 plt.close(fig) 456 return fig 457 458 @property 459 def mz(self): 460 """Mass to charge ratio of the mass feature""" 461 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z 462 if self._mz_cal is not None: 463 return self._mz_cal 464 else: 465 return self._mz_exp 466 467 @property 468 def mass_spectrum_deconvoluted(self): 469 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" 470 if self._ms_deconvoluted_idx is not None: 471 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) 472 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) 473 return ms_deconvoluted 474 else: 475 raise ValueError( 476 "Deconvolution has not been performed for mass feature " + str(self.id) 477 ) 478 479 @property 480 def retention_time(self): 481 """Retention time of the mass feature""" 482 return self._retention_time 483 484 @retention_time.setter 485 def retention_time(self, value): 486 """Set the retention time of the mass feature""" 487 if not isinstance(value, float): 488 raise ValueError("The retention time of the mass feature must be a float") 489 self._retention_time = value 490 491 @property 492 def apex_scan(self): 493 """Apex scan of the mass feature""" 494 return self._apex_scan 495 496 @apex_scan.setter 497 def apex_scan(self, value): 498 """Set the apex scan of the mass feature""" 499 if not isinstance(value, int): 500 raise ValueError("The apex scan of the mass feature must be an integer") 501 self._apex_scan = value 502 503 @property 504 def intensity(self): 505 """Intensity of the mass feature""" 506 return self._intensity 507 508 @intensity.setter 509 def intensity(self, value): 510 """Set the intensity of the mass feature""" 511 if not isinstance(value, float): 512 raise ValueError("The intensity of the mass feature must be a float") 513 self._intensity = value 514 515 @property 516 def persistence(self): 517 """Persistence of the mass feature""" 518 return self._persistence 519 520 @persistence.setter 521 def persistence(self, value): 522 """Set the persistence of the mass feature""" 523 if not isinstance(value, float): 524 raise ValueError("The persistence of the mass feature must be a float") 525 self._persistence = value 526 527 @property 528 def eic_rt_list(self): 529 """Retention time list between the beginning and end of the mass feature""" 530 # Find index of the start and final scans in the EIC data 531 start_index = self._eic_data.scans.tolist().index(self.start_scan) 532 final_index = self._eic_data.scans.tolist().index(self.final_scan) 533 534 # Get the retention time list 535 rt_list = self._eic_data.time[start_index : final_index + 1] 536 return rt_list 537 538 @property 539 def eic_list(self): 540 """EIC List between the beginning and end of the mass feature""" 541 # Find index of the start and final scans in the EIC data 542 start_index = self._eic_data.scans.tolist().index(self.start_scan) 543 final_index = self._eic_data.scans.tolist().index(self.final_scan) 544 545 # Get the retention time list 546 eic = self._eic_data.eic[start_index : final_index + 1] 547 return eic 548 549 @property 550 def ms1_peak(self): 551 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" 552 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz 553 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) 554 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) 555 556 return self.mass_spectrum._mspeaks[closest_mz_index] 557 558 @property 559 def tailing_factor(self): 560 """Tailing factor of the mass feature""" 561 return self._tailing_factor 562 563 @tailing_factor.setter 564 def tailing_factor(self, value): 565 """Set the tailing factor of the mass feature""" 566 if not isinstance(value, float): 567 raise ValueError("The tailing factor of the mass feature must be a float") 568 self._tailing_factor = value 569 570 @property 571 def dispersity_index(self): 572 """Dispersity index of the mass feature""" 573 return self._dispersity_index 574 575 @dispersity_index.setter 576 def dispersity_index(self, value): 577 """Set the dispersity index of the mass feature""" 578 if not isinstance(value, float): 579 raise ValueError("The dispersity index of the mass feature must be a float") 580 self._dispersity_index = value 581 582 @property 583 def normalized_dispersity_index(self): 584 """Normalized dispersity index of the mass feature, unitless (fraction of total window used)""" 585 return self._normalized_dispersity_index 586 587 @property 588 def half_height_width(self): 589 """Half height width of the mass feature, average of min and max values, in minutes""" 590 return np.mean(self._half_height_width) 591 592 @property 593 def noise_score(self): 594 """Mean of left and right noise scores. 595 596 Returns 597 ------- 598 float or np.nan 599 Mean noise score, or np.nan if both sides are np.nan. 600 """ 601 if self._noise_score is None: 602 return np.nan 603 604 left, right = self._noise_score 605 # Handle NaN values 606 if np.isnan(left) and np.isnan(right): 607 return np.nan 608 elif np.isnan(left): 609 return right 610 elif np.isnan(right): 611 return left 612 else: 613 return (left + right) / 2.0 614 615 @property 616 def noise_score_min(self): 617 """Minimum of left and right noise scores. 618 619 Returns 620 ------- 621 float or np.nan 622 Minimum noise score, or np.nan if both sides are np.nan. 623 """ 624 if self._noise_score is None: 625 return np.nan 626 627 left, right = self._noise_score 628 # Handle NaN values - nanmin ignores NaN 629 return np.nanmin([left, right]) 630 631 @property 632 def noise_score_max(self): 633 """Maximum of left and right noise scores. 634 635 Returns 636 ------- 637 float or np.nan 638 Maximum noise score, or np.nan if both sides are np.nan. 639 """ 640 if self._noise_score is None: 641 return np.nan 642 643 left, right = self._noise_score 644 # Handle NaN values - nanmax ignores NaN 645 return np.nanmax([left, right]) 646 647 @property 648 def best_ms2(self): 649 """Points to the best representative MS2 mass spectrum 650 651 Notes 652 ----- 653 If there is only one MS2 mass spectrum, it will be returned 654 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. 655 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. 656 657 Returns 658 ------- 659 MassSpectrum or None 660 The best MS2 mass spectrum. 661 """ 662 if len(self.ms2_similarity_results) > 0: 663 # the scan number with the highest similarity score 664 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] 665 results_df = pd.concat(results_df) 666 results_df = results_df.sort_values( 667 by="entropy_similarity", ascending=False 668 ) 669 best_scan_number = results_df.iloc[0]["query_spectrum_id"] 670 return self.ms2_mass_spectra[best_scan_number] 671 672 ms2_scans = list(self.ms2_mass_spectra.keys()) 673 if len(ms2_scans) > 1: 674 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan 675 res_list = [] # List of maximum resolving power of peaks in each scan 676 time_diff_list = [] # List of time difference between scan and apex scan in each scan 677 for scan in ms2_scans: 678 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: 679 # Find mz closest to mass feature mz, return both the difference in mass and its resolution 680 closest_mz = min( 681 self.ms2_mass_spectra[scan].mz_exp, 682 key=lambda x: abs(x - self.mz), 683 ) 684 if all( 685 np.isnan(self.ms2_mass_spectra[scan].resolving_power) 686 ): # All NA for resolving power in peaks, not uncommon in CID spectra 687 res_list.append(2) # Assumes very low resolving power 688 else: 689 res_list.append( 690 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) 691 ) 692 mz_diff_list.append(np.abs(closest_mz - self.mz)) 693 time_diff_list.append( 694 np.abs( 695 self.chromatogram_parent.get_time_of_scan_id(scan) 696 - self.retention_time 697 ) 698 ) 699 else: 700 res_list.append(np.nan) 701 mz_diff_list.append(np.nan) 702 time_diff_list.append(np.nan) 703 # Convert diff_lists into logical scores (higher is better for each score) 704 time_score = 1 - np.array(time_diff_list) / np.nanmax( 705 np.array(time_diff_list) 706 ) 707 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) 708 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) 709 mz_score = np.zeros(len(ms2_scans)) 710 for i in np.arange(0, len(ms2_scans)): 711 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric 712 mz_score[i] = 0 713 else: 714 mz_score[i] = 1 715 # get the index of the best score and return the mass spectrum 716 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: 717 return self.ms2_mass_spectra[ 718 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] 719 ] 720 # remove the mz_score condition and try again 721 elif len(np.argmax(time_score * res_score)) == 1: 722 return self.ms2_mass_spectra[ 723 ms2_scans[np.nanargmax(time_score * res_score)] 724 ] 725 else: 726 raise ValueError( 727 "No best MS2 mass spectrum could be found for mass feature " 728 + str(self.id) 729 ) 730 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it 731 return self.ms2_mass_spectra[ms2_scans[0]] 732 else: # if no ms2 spectra, return None 733 return None
Class representing a mass feature in a liquid chromatography (LC) chromatogram.
Parameters
- lcms_parent (LCMS): The parent LCMSBase object.
- mz (float): The observed mass to charge ratio of the feature.
- retention_time (float): The retention time of the feature (in minutes), at the apex.
- intensity (float): The intensity of the feature.
- apex_scan (int): The scan number of the apex of the feature.
- persistence (float, optional): The persistence of the feature. Default is None.
Attributes
- _mz_exp (float): The observed mass to charge ratio of the feature.
- _mz_cal (float): The calibrated mass to charge ratio of the feature.
- _retention_time (float): The retention time of the feature (in minutes), at the apex.
- _apex_scan (int): The scan number of the apex of the feature.
- _intensity (float): The intensity of the feature.
- _persistence (float): The persistence of the feature.
- _eic_data (EIC_Data): The EIC data object associated with the feature.
- _dispersity_index (float): The dispersity index of the feature, in minutes.
- _normalized_dispersity_index (float): The normalized dispersity index of the feature (unitless, fraction of total window used to calculate dispersity index).
- _half_height_width (numpy.ndarray): The half height width of the feature (in minutes, as an array of min and max values).
- _tailing_factor (float): The tailing factor of the feature. > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
- _noise_score (tuple): The noise score of the feature, as a tuple of (left, right) scores. Each score is a float, with higher values indicating better signal to noise.
- _gaussian_similarity (float): The Gaussian similarity of the feature, as a float between 0 and 1. 1 indicates a perfect Gaussian shape, 0 indicates a non-Gaussian shape.
- _ms_deconvoluted_idx ([int]): The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
- is_calibrated (bool): If True, the feature has been calibrated. Default is False.
- monoisotopic_mf_id (int): Mass feature id that is the monoisotopic version of self. If self.id, then self is the monoisotopic feature). Default is None.
- isotopologue_type (str): The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. Default is None.
- ms2_scan_numbers (list): List of scan numbers of the MS2 spectra associated with the feature. Default is an empty list.
- ms2_mass_spectra (dict): Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). Default is an empty dictionary.
- ms2_similarity_results (list): List of MS2 similarity results associated with the mass feature. Default is an empty list.
- id (int):
The ID of the feature, also the key in the parent LCMS object's
mass_featuresdictionary. - mass_spectrum_deconvoluted_parent (bool): If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
- associated_mass_features_deconvoluted (list): List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
187 def __init__( 188 self, 189 lcms_parent, 190 mz: float, 191 retention_time: float, 192 intensity: float, 193 apex_scan: int, 194 persistence: float = None, 195 id: int = None, 196 ): 197 super().__init__( 198 chromatogram_parent=lcms_parent, 199 mass_spectrum_obj=None, 200 start_index=None, 201 index=apex_scan, 202 final_index=None, 203 ) 204 # Core attributes, marked as private 205 self._mz_exp: float = mz 206 self._mz_cal: float = None 207 self._retention_time: float = retention_time 208 self._apex_scan: int = apex_scan 209 self._intensity: float = intensity 210 self._persistence: float = persistence 211 self._eic_data: EIC_Data = None 212 self._dispersity_index: float = None 213 self._normalized_dispersity_index: float = None 214 self._half_height_width: np.ndarray = None 215 self._ms_deconvoluted_idx = None 216 self._tailing_factor: float = None 217 self._noise_score: tuple = None 218 self._gaussian_similarity: float = None 219 220 # Additional attributes 221 self.monoisotopic_mf_id = None 222 self.isotopologue_type = None 223 self.ms2_scan_numbers = [] 224 self.ms2_mass_spectra = {} 225 self.ms2_similarity_results = [] 226 self.mass_spectrum_deconvoluted_parent: bool = None 227 self.associated_mass_features_deconvoluted = [] 228 229 if id: 230 self.id = id 231 else: 232 # get the parent's mass feature keys and add 1 to the max value to get the new key 233 self.id = ( 234 max(lcms_parent.mass_features.keys()) + 1 235 if lcms_parent.mass_features.keys() 236 else 0 237 )
239 def update_mz(self): 240 """Update the mass to charge ratio from the mass spectrum object.""" 241 if self.mass_spectrum is None: 242 raise ValueError( 243 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 244 ) 245 if len(self.mass_spectrum.mz_exp) == 0: 246 raise ValueError( 247 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 248 ) 249 new_mz = self.ms1_peak.mz_exp 250 251 # calculate the difference between the new and old m/z, only update if it is close 252 mz_diff = new_mz - self.mz 253 if abs(mz_diff) < 0.01: 254 self._mz_exp = new_mz
Update the mass to charge ratio from the mass spectrum object.
256 def plot( 257 self, 258 to_plot=["EIC", "MS1", "MS2"], 259 return_fig=True, 260 plot_smoothed_eic=False, 261 plot_eic_datapoints=False, 262 ): 263 """Plot the mass feature. 264 265 Parameters 266 ---------- 267 to_plot : list, optional 268 List of strings specifying what to plot, any iteration of 269 "EIC", "MS2", and "MS1". 270 Default is ["EIC", "MS1", "MS2"]. 271 return_fig : bool, optional 272 If True, the figure is returned. Default is True. 273 plot_smoothed_eic : bool, optional 274 If True, the smoothed EIC is plotted. Default is False. 275 plot_eic_datapoints : bool, optional 276 If True, the EIC data points are plotted. Default is False. 277 278 Returns 279 ------- 280 matplotlib.figure.Figure or None 281 The figure object if `return_fig` is True. 282 Otherwise None and the figure is displayed. 283 """ 284 285 # EIC plot preparation 286 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 287 288 # Adjust to_plot list if there are not spectra added to the mass features 289 if self.mass_spectrum is None: 290 to_plot = [x for x in to_plot if x != "MS1"] 291 if len(self.ms2_mass_spectra) == 0: 292 to_plot = [x for x in to_plot if x != "MS2"] 293 if self._eic_data is None: 294 to_plot = [x for x in to_plot if x != "EIC"] 295 if self._ms_deconvoluted_idx is not None: 296 deconvoluted = True 297 else: 298 deconvoluted = False 299 300 fig, axs = plt.subplots( 301 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 302 ) 303 fig.suptitle( 304 "Mass Feature " 305 + str(self.id) 306 + ": m/z = " 307 + str(round(self.mz, ndigits=4)) 308 + "; time = " 309 + str(round(self.retention_time, ndigits=1)) 310 + " minutes" 311 ) 312 313 i = 0 314 # EIC plot 315 if "EIC" in to_plot: 316 if self._eic_data is None: 317 raise ValueError( 318 "EIC data is not available, cannot plot the mass feature's EIC" 319 ) 320 axs[i][0].set_title("EIC", loc="left") 321 axs[i][0].plot( 322 self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC" 323 ) 324 if plot_eic_datapoints: 325 axs[i][0].scatter( 326 self._eic_data.time, 327 self._eic_data.eic, 328 c="tab:blue", 329 label="EIC Data Points", 330 ) 331 if plot_smoothed_eic: 332 axs[i][0].plot( 333 self._eic_data.time, 334 self._eic_data.eic_smoothed, 335 c="tab:red", 336 label="Smoothed EIC", 337 ) 338 if self.start_scan is not None: 339 axs[i][0].fill_between( 340 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 341 ) 342 else: 343 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 344 print( 345 "No start and final scan numbers were provided for mass feature " 346 + str(self.id) 347 ) 348 axs[i][0].set_ylabel("Intensity") 349 axs[i][0].set_xlabel("Time (minutes)") 350 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 351 axs[i][0].set_xlim( 352 self.retention_time - eic_buffer_time, 353 self.retention_time + eic_buffer_time, 354 ) 355 axs[i][0].axvline( 356 x=self.retention_time, color="k", label="MS1 scan time (apex)" 357 ) 358 if len(self.ms2_scan_numbers) > 0: 359 axs[i][0].axvline( 360 x=self.chromatogram_parent.get_time_of_scan_id( 361 self.best_ms2.scan_number 362 ), 363 color="grey", 364 linestyle="--", 365 label="MS2 scan time", 366 ) 367 axs[i][0].legend(loc="upper left") 368 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 369 i += 1 370 371 # MS1 plot 372 if "MS1" in to_plot: 373 if deconvoluted: 374 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 375 axs[i][0].vlines( 376 self.mass_spectrum.mz_exp, 377 0, 378 self.mass_spectrum.abundance, 379 color="k", 380 alpha=0.2, 381 label="Raw MS1", 382 ) 383 axs[i][0].vlines( 384 self.mass_spectrum_deconvoluted.mz_exp, 385 0, 386 self.mass_spectrum_deconvoluted.abundance, 387 color="k", 388 label="Deconvoluted MS1", 389 ) 390 axs[i][0].set_xlim( 391 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 392 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 393 ) 394 axs[i][0].set_ylim( 395 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 396 ) 397 else: 398 axs[i][0].set_title("MS1 (raw)", loc="left") 399 axs[i][0].vlines( 400 self.mass_spectrum.mz_exp, 401 0, 402 self.mass_spectrum.abundance, 403 color="k", 404 label="Raw MS1", 405 ) 406 axs[i][0].set_xlim( 407 self.mass_spectrum.mz_exp.min() * 0.8, 408 self.mass_spectrum.mz_exp.max() * 1.1, 409 ) 410 axs[i][0].set_ylim(bottom=0) 411 412 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 413 axs[i][0].vlines( 414 self.ms1_peak.mz_exp, 415 0, 416 self.ms1_peak.abundance, 417 color="m", 418 label="Feature m/z", 419 ) 420 421 else: 422 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 423 print( 424 "The m/z of the mass feature " 425 + str(self.id) 426 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 427 ) 428 axs[i][0].legend(loc="upper left") 429 axs[i][0].set_ylabel("Intensity") 430 axs[i][0].set_xlabel("m/z") 431 axs[i][0].yaxis.set_tick_params(labelleft=False) 432 i += 1 433 434 # MS2 plot 435 if "MS2" in to_plot: 436 axs[i][0].set_title("MS2", loc="left") 437 axs[i][0].vlines( 438 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 439 ) 440 axs[i][0].set_ylabel("Intensity") 441 axs[i][0].set_xlabel("m/z") 442 axs[i][0].set_ylim(bottom=0) 443 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 444 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 445 axs[i][0].set_xlim( 446 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 447 ) 448 axs[i][0].yaxis.set_tick_params(labelleft=False) 449 450 # Add space between subplots 451 plt.tight_layout() 452 453 if return_fig: 454 # Close figure 455 plt.close(fig) 456 return fig
Plot the mass feature.
Parameters
- to_plot (list, optional): List of strings specifying what to plot, any iteration of "EIC", "MS2", and "MS1". Default is ["EIC", "MS1", "MS2"].
- return_fig (bool, optional): If True, the figure is returned. Default is True.
- plot_smoothed_eic (bool, optional): If True, the smoothed EIC is plotted. Default is False.
- plot_eic_datapoints (bool, optional): If True, the EIC data points are plotted. Default is False.
Returns
- matplotlib.figure.Figure or None: The figure object if
return_figis True. Otherwise None and the figure is displayed.
Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.
Normalized dispersity index of the mass feature, unitless (fraction of total window used)
Mean of left and right noise scores.
Returns
- float or np.nan: Mean noise score, or np.nan if both sides are np.nan.
Minimum of left and right noise scores.
Returns
- float or np.nan: Minimum noise score, or np.nan if both sides are np.nan.
Maximum of left and right noise scores.
Returns
- float or np.nan: Maximum noise score, or np.nan if both sides are np.nan.
Points to the best representative MS2 mass spectrum
Notes
If there is only one MS2 mass spectrum, it will be returned If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra.
Returns
- MassSpectrum or None: The best MS2 mass spectrum.
Inherited Members
736class GCPeak(ChromaPeakBase, GCPeakCalculation): 737 """Class representing a peak in a gas chromatography (GC) chromatogram. 738 739 Parameters 740 ---------- 741 chromatogram_parent : Chromatogram 742 The parent chromatogram object. 743 mass_spectrum_obj : MassSpectrum 744 The mass spectrum object associated with the peak. 745 indexes : tuple 746 The indexes of the peak in the chromatogram. 747 748 Attributes 749 ---------- 750 _compounds : list 751 List of compounds associated with the peak. 752 _ri : float or None 753 Retention index of the peak. 754 755 Methods 756 ------- 757 * __len__(). Returns the number of compounds associated with the peak. 758 * __getitem__(position). Returns the compound at the specified position. 759 * remove_compound(compounds_obj). Removes the specified compound from the peak. 760 * clear_compounds(). Removes all compounds from the peak. 761 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. 762 * ri(). Returns the retention index of the peak. 763 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. 764 * highest_score_compound(). Returns the compound with the highest similarity score. 765 * compound_names(). Returns a list of names of compounds associated with the peak. 766 """ 767 768 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): 769 self._compounds = [] 770 self._ri = None 771 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) 772 773 def __len__(self): 774 return len(self._compounds) 775 776 def __getitem__(self, position): 777 return self._compounds[position] 778 779 def remove_compound(self, compounds_obj): 780 self._compounds.remove(compounds_obj) 781 782 def clear_compounds(self): 783 self._compounds = [] 784 785 def add_compound( 786 self, 787 compounds_dict, 788 spectral_similarity_scores, 789 ri_score=None, 790 similarity_score=None, 791 ): 792 """Adds a compound to the peak with the specified attributes. 793 794 Parameters 795 ---------- 796 compounds_dict : dict 797 Dictionary containing the compound information. 798 spectral_similarity_scores : dict 799 Dictionary containing the spectral similarity scores. 800 ri_score : float or None, optional 801 The retention index score of the compound. Default is None. 802 similarity_score : float or None, optional 803 The similarity score of the compound. Default is None. 804 """ 805 compound_obj = LowResCompoundRef(compounds_dict) 806 compound_obj.spectral_similarity_scores = spectral_similarity_scores 807 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 808 "cosine_correlation" 809 ) 810 # TODO check is the above line correct? 811 compound_obj.ri_score = ri_score 812 compound_obj.similarity_score = similarity_score 813 self._compounds.append(compound_obj) 814 if similarity_score: 815 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 816 else: 817 self._compounds.sort( 818 key=lambda c: c.spectral_similarity_score, reverse=True 819 ) 820 821 @property 822 def ri(self): 823 """Returns the retention index of the peak. 824 825 Returns 826 ------- 827 float or None 828 The retention index of the peak. 829 """ 830 return self._ri 831 832 @property 833 def highest_ss_compound(self): 834 """Returns the compound with the highest spectral similarity score. 835 836 Returns 837 ------- 838 LowResCompoundRef or None 839 The compound with the highest spectral similarity score. 840 """ 841 if self: 842 return max(self, key=lambda c: c.spectral_similarity_score) 843 else: 844 return None 845 846 @property 847 def highest_score_compound(self): 848 """Returns the compound with the highest similarity score. 849 850 Returns 851 ------- 852 LowResCompoundRef or None 853 The compound with the highest similarity score. 854 """ 855 if self: 856 return max(self, key=lambda c: c.similarity_score) 857 else: 858 return None 859 860 @property 861 def compound_names(self): 862 """Returns a list of names of compounds associated with the peak. 863 864 Returns 865 ------- 866 list 867 List of names of compounds associated with the peak. 868 """ 869 if self: 870 return [c.name for c in self] 871 else: 872 return []
Class representing a peak in a gas chromatography (GC) chromatogram.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum_obj (MassSpectrum): The mass spectrum object associated with the peak.
- indexes (tuple): The indexes of the peak in the chromatogram.
Attributes
- _compounds (list): List of compounds associated with the peak.
- _ri (float or None): Retention index of the peak.
Methods
- __len__(). Returns the number of compounds associated with the peak.
- __getitem__(position). Returns the compound at the specified position.
- remove_compound(compounds_obj). Removes the specified compound from the peak.
- clear_compounds(). Removes all compounds from the peak.
- add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
- ri(). Returns the retention index of the peak.
- highest_ss_compound(). Returns the compound with the highest spectral similarity score.
- highest_score_compound(). Returns the compound with the highest similarity score.
- compound_names(). Returns a list of names of compounds associated with the peak.
785 def add_compound( 786 self, 787 compounds_dict, 788 spectral_similarity_scores, 789 ri_score=None, 790 similarity_score=None, 791 ): 792 """Adds a compound to the peak with the specified attributes. 793 794 Parameters 795 ---------- 796 compounds_dict : dict 797 Dictionary containing the compound information. 798 spectral_similarity_scores : dict 799 Dictionary containing the spectral similarity scores. 800 ri_score : float or None, optional 801 The retention index score of the compound. Default is None. 802 similarity_score : float or None, optional 803 The similarity score of the compound. Default is None. 804 """ 805 compound_obj = LowResCompoundRef(compounds_dict) 806 compound_obj.spectral_similarity_scores = spectral_similarity_scores 807 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 808 "cosine_correlation" 809 ) 810 # TODO check is the above line correct? 811 compound_obj.ri_score = ri_score 812 compound_obj.similarity_score = similarity_score 813 self._compounds.append(compound_obj) 814 if similarity_score: 815 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 816 else: 817 self._compounds.sort( 818 key=lambda c: c.spectral_similarity_score, reverse=True 819 )
Adds a compound to the peak with the specified attributes.
Parameters
- compounds_dict (dict): Dictionary containing the compound information.
- spectral_similarity_scores (dict): Dictionary containing the spectral similarity scores.
- ri_score (float or None, optional): The retention index score of the compound. Default is None.
- similarity_score (float or None, optional): The similarity score of the compound. Default is None.
Returns the compound with the highest spectral similarity score.
Returns
- LowResCompoundRef or None: The compound with the highest spectral similarity score.
Returns the compound with the highest similarity score.
Returns
- LowResCompoundRef or None: The compound with the highest similarity score.
875class GCPeakDeconvolved(GCPeak): 876 """Represents a deconvolved peak in a chromatogram. 877 878 Parameters 879 ---------- 880 chromatogram_parent : Chromatogram 881 The parent chromatogram object. 882 mass_spectra : list 883 List of mass spectra associated with the peak. 884 apex_index : int 885 Index of the apex mass spectrum in the `mass_spectra` list. 886 rt_list : list 887 List of retention times. 888 tic_list : list 889 List of total ion currents. 890 """ 891 892 def __init__( 893 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 894 ): 895 self._ri = None 896 self._rt_list = list(rt_list) 897 self._tic_list = list(tic_list) 898 self.mass_spectra = list(mass_spectra) 899 super().__init__( 900 chromatogram_parent, 901 self.mass_spectra[apex_index], 902 (0, apex_index, len(self.mass_spectra) - 1), 903 ) 904 905 @property 906 def rt_list(self): 907 """Get the list of retention times. 908 909 Returns 910 ------- 911 list 912 The list of retention times. 913 """ 914 return self._rt_list 915 916 @property 917 def tic_list(self): 918 """Get the list of total ion currents. 919 920 Returns 921 ------- 922 list 923 The list of total ion currents. 924 """ 925 return self._tic_list
Represents a deconvolved peak in a chromatogram.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectra (list): List of mass spectra associated with the peak.
- apex_index (int):
Index of the apex mass spectrum in the
mass_spectralist. - rt_list (list): List of retention times.
- tic_list (list): List of total ion currents.
892 def __init__( 893 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 894 ): 895 self._ri = None 896 self._rt_list = list(rt_list) 897 self._tic_list = list(tic_list) 898 self.mass_spectra = list(mass_spectra) 899 super().__init__( 900 chromatogram_parent, 901 self.mass_spectra[apex_index], 902 (0, apex_index, len(self.mass_spectra) - 1), 903 )