corems.chroma_peak.factory.chroma_peak_classes
1__author__ = "Yuri E. Corilo" 2__date__ = "Jun 12, 2019" 3 4import matplotlib.pyplot as plt 5import numpy as np 6import pandas as pd 7import copy 8 9from corems.chroma_peak.calc.ChromaPeakCalc import ( 10 GCPeakCalculation, 11 LCMSMassFeatureCalculation, 12) 13from corems.mass_spectra.factory.chromat_data import EIC_Data 14from corems.molecular_id.factory.EI_SQL import LowResCompoundRef 15 16 17class ChromaPeakBase: 18 """Base class for chromatographic peak (ChromaPeak) objects. 19 20 Parameters 21 ------- 22 chromatogram_parent : Chromatogram 23 The parent chromatogram object. 24 mass_spectrum_obj : MassSpectrum 25 The mass spectrum object. 26 start_index : int 27 The start index of the peak. 28 index : int 29 The index of the peak. 30 final_index : int 31 The final index of the peak. 32 33 Attributes 34 -------- 35 start_scan : int 36 The start scan of the peak. 37 final_scan : int 38 The final scan of the peak. 39 apex_scan : int 40 The apex scan of the peak. 41 chromatogram_parent : Chromatogram 42 The parent chromatogram object. 43 mass_spectrum : MassSpectrum 44 The mass spectrum object. 45 _area : float 46 The area of the peak. 47 48 Properties 49 -------- 50 * retention_time : float. 51 The retention time of the peak. 52 * tic : float. 53 The total ion current of the peak. 54 * area : float. 55 The area of the peak. 56 * rt_list : list. 57 The list of retention times within the peak. 58 * tic_list : list. 59 The list of total ion currents within the peak. 60 61 Methods 62 -------- 63 * None 64 """ 65 66 def __init__( 67 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 68 ): 69 self.start_scan = start_index 70 self.final_scan = final_index 71 self.apex_scan = int(index) 72 self.chromatogram_parent = chromatogram_parent 73 self.mass_spectrum = mass_spectrum_obj 74 self._area = None 75 76 @property 77 def retention_time(self): 78 """Retention Time""" 79 return self.mass_spectrum.retention_time 80 81 @property 82 def tic(self): 83 """Total Ion Current""" 84 return self.mass_spectrum.tic 85 86 @property 87 def area(self): 88 """Peak Area""" 89 return self._area 90 91 @property 92 def rt_list(self): 93 """Retention Time List""" 94 return [ 95 self.chromatogram_parent.retention_time[i] 96 for i in range(self.start_scan, self.final_scan + 1) 97 ] 98 99 @property 100 def tic_list(self): 101 """Total Ion Current List""" 102 return [ 103 self.chromatogram_parent.tic[i] 104 for i in range(self.start_scan, self.final_scan + 1) 105 ] 106 107 108class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): 109 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. 110 111 Parameters 112 ------- 113 lcms_parent : LCMS 114 The parent LCMSBase object. 115 mz : float 116 The observed mass to charge ratio of the feature. 117 retention_time : float 118 The retention time of the feature (in minutes), at the apex. 119 intensity : float 120 The intensity of the feature. 121 apex_scan : int 122 The scan number of the apex of the feature. 123 persistence : float, optional 124 The persistence of the feature. Default is None. 125 126 Attributes 127 -------- 128 _mz_exp : float 129 The observed mass to charge ratio of the feature. 130 _mz_cal : float 131 The calibrated mass to charge ratio of the feature. 132 _retention_time : float 133 The retention time of the feature (in minutes), at the apex. 134 _apex_scan : int 135 The scan number of the apex of the feature. 136 _intensity : float 137 The intensity of the feature. 138 _persistence : float 139 The persistence of the feature. 140 _eic_data : EIC_Data 141 The EIC data object associated with the feature. 142 _dispersity_index : float 143 The dispersity index of the feature. 144 _half_height_width : numpy.ndarray 145 The half height width of the feature (in minutes, as an array of min and max values). 146 _tailing_factor : float 147 The tailing factor of the feature. 148 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. 149 _ms_deconvoluted_idx : [int] 150 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. 151 is_calibrated : bool 152 If True, the feature has been calibrated. Default is False. 153 monoisotopic_mf_id : int 154 Mass feature id that is the monoisotopic version of self. 155 If self.id, then self is the monoisotopic feature). Default is None. 156 isotopologue_type : str 157 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. 158 Default is None. 159 ms2_scan_numbers : list 160 List of scan numbers of the MS2 spectra associated with the feature. 161 Default is an empty list. 162 ms2_mass_spectra : dict 163 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). 164 Default is an empty dictionary. 165 ms2_similarity_results : list 166 List of MS2 similarity results associated with the mass feature. 167 Default is an empty list. 168 id : int 169 The ID of the feature, also the key in the parent LCMS object's 170 `mass_features` dictionary. 171 mass_spectrum_deconvoluted_parent : bool 172 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. 173 associated_mass_features_deconvoluted : list 174 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. 175 176 """ 177 178 def __init__( 179 self, 180 lcms_parent, 181 mz: float, 182 retention_time: float, 183 intensity: float, 184 apex_scan: int, 185 persistence: float = None, 186 id: int = None, 187 ): 188 super().__init__( 189 chromatogram_parent=lcms_parent, 190 mass_spectrum_obj=None, 191 start_index=None, 192 index=apex_scan, 193 final_index=None, 194 ) 195 # Core attributes, marked as private 196 self._mz_exp: float = mz 197 self._mz_cal: float = None 198 self._retention_time: float = retention_time 199 self._apex_scan: int = apex_scan 200 self._intensity: float = intensity 201 self._persistence: float = persistence 202 self._eic_data: EIC_Data = None 203 self._dispersity_index: float = None 204 self._half_height_width: np.ndarray = None 205 self._ms_deconvoluted_idx = None 206 207 # Additional attributes 208 self.monoisotopic_mf_id = None 209 self.isotopologue_type = None 210 self.ms2_scan_numbers = [] 211 self.ms2_mass_spectra = {} 212 self.ms2_similarity_results = [] 213 self.mass_spectrum_deconvoluted_parent: bool = None 214 self.associated_mass_features_deconvoluted = [] 215 216 if id: 217 self.id = id 218 else: 219 # get the parent's mass feature keys and add 1 to the max value to get the new key 220 self.id = ( 221 max(lcms_parent.mass_features.keys()) + 1 222 if lcms_parent.mass_features.keys() 223 else 0 224 ) 225 226 def update_mz(self): 227 """Update the mass to charge ratio from the mass spectrum object.""" 228 if self.mass_spectrum is None: 229 raise ValueError( 230 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 231 ) 232 if len(self.mass_spectrum.mz_exp) == 0: 233 raise ValueError( 234 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 235 ) 236 new_mz = self.ms1_peak.mz_exp 237 238 # calculate the difference between the new and old m/z, only update if it is close 239 mz_diff = new_mz - self.mz 240 if abs(mz_diff) < 0.01: 241 self._mz_exp = new_mz 242 243 def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True, plot_smoothed_eic = False, plot_eic_datapoints = False): 244 """Plot the mass feature. 245 246 Parameters 247 ---------- 248 to_plot : list, optional 249 List of strings specifying what to plot, any iteration of 250 "EIC", "MS2", and "MS1". 251 Default is ["EIC", "MS1", "MS2"]. 252 return_fig : bool, optional 253 If True, the figure is returned. Default is True. 254 plot_smoothed_eic : bool, optional 255 If True, the smoothed EIC is plotted. Default is False. 256 plot_eic_datapoints : bool, optional 257 If True, the EIC data points are plotted. Default is False. 258 259 Returns 260 ------- 261 matplotlib.figure.Figure or None 262 The figure object if `return_fig` is True. 263 Otherwise None and the figure is displayed. 264 """ 265 266 # EIC plot preparation 267 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 268 269 # Adjust to_plot list if there are not spectra added to the mass features 270 if self.mass_spectrum is None: 271 to_plot = [x for x in to_plot if x != "MS1"] 272 if len(self.ms2_mass_spectra) == 0: 273 to_plot = [x for x in to_plot if x != "MS2"] 274 if self._eic_data is None: 275 to_plot = [x for x in to_plot if x != "EIC"] 276 if self._ms_deconvoluted_idx is not None: 277 deconvoluted = True 278 else: 279 deconvoluted = False 280 281 fig, axs = plt.subplots( 282 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 283 ) 284 fig.suptitle( 285 "Mass Feature " 286 + str(self.id) 287 + ": m/z = " 288 + str(round(self.mz, ndigits=4)) 289 + "; time = " 290 + str(round(self.retention_time, ndigits=1)) 291 + " minutes" 292 ) 293 294 i = 0 295 # EIC plot 296 if "EIC" in to_plot: 297 if self._eic_data is None: 298 raise ValueError( 299 "EIC data is not available, cannot plot the mass feature's EIC" 300 ) 301 axs[i][0].set_title("EIC", loc="left") 302 axs[i][0].plot(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC") 303 if plot_eic_datapoints: 304 axs[i][0].scatter(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC Data Points") 305 if plot_smoothed_eic: 306 axs[i][0].plot(self._eic_data.time, self._eic_data.eic_smoothed, c="tab:red", label="Smoothed EIC") 307 if self.start_scan is not None: 308 axs[i][0].fill_between( 309 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 310 ) 311 else: 312 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 313 print( 314 "No start and final scan numbers were provided for mass feature " 315 + str(self.id) 316 ) 317 axs[i][0].set_ylabel("Intensity") 318 axs[i][0].set_xlabel("Time (minutes)") 319 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 320 axs[i][0].set_xlim( 321 self.retention_time - eic_buffer_time, 322 self.retention_time + eic_buffer_time, 323 ) 324 axs[i][0].axvline( 325 x=self.retention_time, color="k", label="MS1 scan time (apex)" 326 ) 327 if len(self.ms2_scan_numbers) > 0: 328 axs[i][0].axvline( 329 x=self.chromatogram_parent.get_time_of_scan_id( 330 self.best_ms2.scan_number 331 ), 332 color="grey", 333 linestyle="--", 334 label="MS2 scan time", 335 ) 336 axs[i][0].legend(loc="upper left") 337 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 338 i += 1 339 340 # MS1 plot 341 if "MS1" in to_plot: 342 if deconvoluted: 343 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 344 axs[i][0].vlines( 345 self.mass_spectrum.mz_exp, 346 0, 347 self.mass_spectrum.abundance, 348 color="k", 349 alpha=0.2, 350 label="Raw MS1", 351 ) 352 axs[i][0].vlines( 353 self.mass_spectrum_deconvoluted.mz_exp, 354 0, 355 self.mass_spectrum_deconvoluted.abundance, 356 color="k", 357 label="Deconvoluted MS1", 358 ) 359 axs[i][0].set_xlim( 360 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 361 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 362 ) 363 axs[i][0].set_ylim( 364 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 365 ) 366 else: 367 axs[i][0].set_title("MS1 (raw)", loc="left") 368 axs[i][0].vlines( 369 self.mass_spectrum.mz_exp, 370 0, 371 self.mass_spectrum.abundance, 372 color="k", 373 label="Raw MS1", 374 ) 375 axs[i][0].set_xlim( 376 self.mass_spectrum.mz_exp.min() * 0.8, 377 self.mass_spectrum.mz_exp.max() * 1.1, 378 ) 379 axs[i][0].set_ylim(bottom=0) 380 381 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 382 axs[i][0].vlines( 383 self.ms1_peak.mz_exp, 384 0, 385 self.ms1_peak.abundance, 386 color="m", 387 label="Feature m/z", 388 ) 389 390 else: 391 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 392 print( 393 "The m/z of the mass feature " 394 + str(self.id) 395 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 396 ) 397 axs[i][0].legend(loc="upper left") 398 axs[i][0].set_ylabel("Intensity") 399 axs[i][0].set_xlabel("m/z") 400 axs[i][0].yaxis.set_tick_params(labelleft=False) 401 i += 1 402 403 # MS2 plot 404 if "MS2" in to_plot: 405 axs[i][0].set_title("MS2", loc="left") 406 axs[i][0].vlines( 407 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 408 ) 409 axs[i][0].set_ylabel("Intensity") 410 axs[i][0].set_xlabel("m/z") 411 axs[i][0].set_ylim(bottom=0) 412 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 413 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 414 axs[i][0].set_xlim( 415 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 416 ) 417 axs[i][0].yaxis.set_tick_params(labelleft=False) 418 419 # Add space between subplots 420 plt.tight_layout() 421 422 if return_fig: 423 # Close figure 424 plt.close(fig) 425 return fig 426 427 @property 428 def mz(self): 429 """Mass to charge ratio of the mass feature""" 430 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z 431 if self._mz_cal is not None: 432 return self._mz_cal 433 else: 434 return self._mz_exp 435 436 @property 437 def mass_spectrum_deconvoluted(self): 438 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" 439 if self._ms_deconvoluted_idx is not None: 440 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) 441 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) 442 return ms_deconvoluted 443 else: 444 raise ValueError( 445 "Deconvolution has not been performed for mass feature " + str(self.id) 446 ) 447 448 @property 449 def retention_time(self): 450 """Retention time of the mass feature""" 451 return self._retention_time 452 453 @retention_time.setter 454 def retention_time(self, value): 455 """Set the retention time of the mass feature""" 456 if not isinstance(value, float): 457 raise ValueError("The retention time of the mass feature must be a float") 458 self._retention_time = value 459 460 @property 461 def apex_scan(self): 462 """Apex scan of the mass feature""" 463 return self._apex_scan 464 465 @apex_scan.setter 466 def apex_scan(self, value): 467 """Set the apex scan of the mass feature""" 468 if not isinstance(value, int): 469 raise ValueError("The apex scan of the mass feature must be an integer") 470 self._apex_scan = value 471 472 @property 473 def intensity(self): 474 """Intensity of the mass feature""" 475 return self._intensity 476 477 @intensity.setter 478 def intensity(self, value): 479 """Set the intensity of the mass feature""" 480 if not isinstance(value, float): 481 raise ValueError("The intensity of the mass feature must be a float") 482 self._intensity = value 483 484 @property 485 def persistence(self): 486 """Persistence of the mass feature""" 487 return self._persistence 488 489 @persistence.setter 490 def persistence(self, value): 491 """Set the persistence of the mass feature""" 492 if not isinstance(value, float): 493 raise ValueError("The persistence of the mass feature must be a float") 494 self._persistence = value 495 496 @property 497 def eic_rt_list(self): 498 """Retention time list between the beginning and end of the mass feature""" 499 # Find index of the start and final scans in the EIC data 500 start_index = self._eic_data.scans.tolist().index(self.start_scan) 501 final_index = self._eic_data.scans.tolist().index(self.final_scan) 502 503 # Get the retention time list 504 rt_list = self._eic_data.time[start_index : final_index + 1] 505 return rt_list 506 507 @property 508 def eic_list(self): 509 """EIC List between the beginning and end of the mass feature""" 510 # Find index of the start and final scans in the EIC data 511 start_index = self._eic_data.scans.tolist().index(self.start_scan) 512 final_index = self._eic_data.scans.tolist().index(self.final_scan) 513 514 # Get the retention time list 515 eic = self._eic_data.eic[start_index : final_index + 1] 516 return eic 517 518 @property 519 def ms1_peak(self): 520 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" 521 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz 522 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) 523 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) 524 525 return self.mass_spectrum._mspeaks[closest_mz_index] 526 527 @property 528 def tailing_factor(self): 529 """Tailing factor of the mass feature""" 530 return self._tailing_factor 531 532 @tailing_factor.setter 533 def tailing_factor(self, value): 534 """Set the tailing factor of the mass feature""" 535 if not isinstance(value, float): 536 raise ValueError("The tailing factor of the mass feature must be a float") 537 self._tailing_factor = value 538 539 @property 540 def dispersity_index(self): 541 """Dispersity index of the mass feature""" 542 return self._dispersity_index 543 544 @dispersity_index.setter 545 def dispersity_index(self, value): 546 """Set the dispersity index of the mass feature""" 547 if not isinstance(value, float): 548 raise ValueError("The dispersity index of the mass feature must be a float") 549 self._dispersity_index = value 550 551 @property 552 def half_height_width(self): 553 """Half height width of the mass feature, average of min and max values, in minutes""" 554 return np.mean(self._half_height_width) 555 556 @property 557 def best_ms2(self): 558 """Points to the best representative MS2 mass spectrum 559 560 Notes 561 ----- 562 If there is only one MS2 mass spectrum, it will be returned 563 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. 564 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. 565 566 Returns 567 ------- 568 MassSpectrum or None 569 The best MS2 mass spectrum. 570 """ 571 if len(self.ms2_similarity_results) > 0: 572 # the scan number with the highest similarity score 573 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] 574 results_df = pd.concat(results_df) 575 results_df = results_df.sort_values( 576 by="entropy_similarity", ascending=False 577 ) 578 best_scan_number = results_df.iloc[0]["query_spectrum_id"] 579 return self.ms2_mass_spectra[best_scan_number] 580 581 ms2_scans = list(self.ms2_mass_spectra.keys()) 582 if len(ms2_scans) > 1: 583 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan 584 res_list = [] # List of maximum resolving power of peaks in each scan 585 time_diff_list = [] # List of time difference between scan and apex scan in each scan 586 for scan in ms2_scans: 587 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: 588 # Find mz closest to mass feature mz, return both the difference in mass and its resolution 589 closest_mz = min( 590 self.ms2_mass_spectra[scan].mz_exp, 591 key=lambda x: abs(x - self.mz), 592 ) 593 if all( 594 np.isnan(self.ms2_mass_spectra[scan].resolving_power) 595 ): # All NA for resolving power in peaks, not uncommon in CID spectra 596 res_list.append(2) # Assumes very low resolving power 597 else: 598 res_list.append( 599 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) 600 ) 601 mz_diff_list.append(np.abs(closest_mz - self.mz)) 602 time_diff_list.append( 603 np.abs( 604 self.chromatogram_parent.get_time_of_scan_id(scan) 605 - self.retention_time 606 ) 607 ) 608 else: 609 res_list.append(np.nan) 610 mz_diff_list.append(np.nan) 611 time_diff_list.append(np.nan) 612 # Convert diff_lists into logical scores (higher is better for each score) 613 time_score = 1 - np.array(time_diff_list) / np.nanmax( 614 np.array(time_diff_list) 615 ) 616 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) 617 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) 618 mz_score = np.zeros(len(ms2_scans)) 619 for i in np.arange(0, len(ms2_scans)): 620 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric 621 mz_score[i] = 0 622 else: 623 mz_score[i] = 1 624 # get the index of the best score and return the mass spectrum 625 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: 626 return self.ms2_mass_spectra[ 627 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] 628 ] 629 # remove the mz_score condition and try again 630 elif len(np.argmax(time_score * res_score)) == 1: 631 return self.ms2_mass_spectra[ 632 ms2_scans[np.nanargmax(time_score * res_score)] 633 ] 634 else: 635 raise ValueError( 636 "No best MS2 mass spectrum could be found for mass feature " 637 + str(self.id) 638 ) 639 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it 640 return self.ms2_mass_spectra[ms2_scans[0]] 641 else: # if no ms2 spectra, return None 642 return None 643 644 645class GCPeak(ChromaPeakBase, GCPeakCalculation): 646 """Class representing a peak in a gas chromatography (GC) chromatogram. 647 648 Parameters 649 ---------- 650 chromatogram_parent : Chromatogram 651 The parent chromatogram object. 652 mass_spectrum_obj : MassSpectrum 653 The mass spectrum object associated with the peak. 654 indexes : tuple 655 The indexes of the peak in the chromatogram. 656 657 Attributes 658 ---------- 659 _compounds : list 660 List of compounds associated with the peak. 661 _ri : float or None 662 Retention index of the peak. 663 664 Methods 665 ------- 666 * __len__(). Returns the number of compounds associated with the peak. 667 * __getitem__(position). Returns the compound at the specified position. 668 * remove_compound(compounds_obj). Removes the specified compound from the peak. 669 * clear_compounds(). Removes all compounds from the peak. 670 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. 671 * ri(). Returns the retention index of the peak. 672 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. 673 * highest_score_compound(). Returns the compound with the highest similarity score. 674 * compound_names(). Returns a list of names of compounds associated with the peak. 675 """ 676 677 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): 678 self._compounds = [] 679 self._ri = None 680 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) 681 682 def __len__(self): 683 return len(self._compounds) 684 685 def __getitem__(self, position): 686 return self._compounds[position] 687 688 def remove_compound(self, compounds_obj): 689 self._compounds.remove(compounds_obj) 690 691 def clear_compounds(self): 692 self._compounds = [] 693 694 def add_compound( 695 self, 696 compounds_dict, 697 spectral_similarity_scores, 698 ri_score=None, 699 similarity_score=None, 700 ): 701 """Adds a compound to the peak with the specified attributes. 702 703 Parameters 704 ---------- 705 compounds_dict : dict 706 Dictionary containing the compound information. 707 spectral_similarity_scores : dict 708 Dictionary containing the spectral similarity scores. 709 ri_score : float or None, optional 710 The retention index score of the compound. Default is None. 711 similarity_score : float or None, optional 712 The similarity score of the compound. Default is None. 713 """ 714 compound_obj = LowResCompoundRef(compounds_dict) 715 compound_obj.spectral_similarity_scores = spectral_similarity_scores 716 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 717 "cosine_correlation" 718 ) 719 # TODO check is the above line correct? 720 compound_obj.ri_score = ri_score 721 compound_obj.similarity_score = similarity_score 722 self._compounds.append(compound_obj) 723 if similarity_score: 724 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 725 else: 726 self._compounds.sort( 727 key=lambda c: c.spectral_similarity_score, reverse=True 728 ) 729 730 @property 731 def ri(self): 732 """Returns the retention index of the peak. 733 734 Returns 735 ------- 736 float or None 737 The retention index of the peak. 738 """ 739 return self._ri 740 741 @property 742 def highest_ss_compound(self): 743 """Returns the compound with the highest spectral similarity score. 744 745 Returns 746 ------- 747 LowResCompoundRef or None 748 The compound with the highest spectral similarity score. 749 """ 750 if self: 751 return max(self, key=lambda c: c.spectral_similarity_score) 752 else: 753 return None 754 755 @property 756 def highest_score_compound(self): 757 """Returns the compound with the highest similarity score. 758 759 Returns 760 ------- 761 LowResCompoundRef or None 762 The compound with the highest similarity score. 763 """ 764 if self: 765 return max(self, key=lambda c: c.similarity_score) 766 else: 767 return None 768 769 @property 770 def compound_names(self): 771 """Returns a list of names of compounds associated with the peak. 772 773 Returns 774 ------- 775 list 776 List of names of compounds associated with the peak. 777 """ 778 if self: 779 return [c.name for c in self] 780 else: 781 return [] 782 783 784class GCPeakDeconvolved(GCPeak): 785 """Represents a deconvolved peak in a chromatogram. 786 787 Parameters 788 ---------- 789 chromatogram_parent : Chromatogram 790 The parent chromatogram object. 791 mass_spectra : list 792 List of mass spectra associated with the peak. 793 apex_index : int 794 Index of the apex mass spectrum in the `mass_spectra` list. 795 rt_list : list 796 List of retention times. 797 tic_list : list 798 List of total ion currents. 799 """ 800 801 def __init__( 802 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 803 ): 804 self._ri = None 805 self._rt_list = list(rt_list) 806 self._tic_list = list(tic_list) 807 self.mass_spectra = list(mass_spectra) 808 super().__init__( 809 chromatogram_parent, 810 self.mass_spectra[apex_index], 811 (0, apex_index, len(self.mass_spectra) - 1), 812 ) 813 814 @property 815 def rt_list(self): 816 """Get the list of retention times. 817 818 Returns 819 ------- 820 list 821 The list of retention times. 822 """ 823 return self._rt_list 824 825 @property 826 def tic_list(self): 827 """Get the list of total ion currents. 828 829 Returns 830 ------- 831 list 832 The list of total ion currents. 833 """ 834 return self._tic_list
18class ChromaPeakBase: 19 """Base class for chromatographic peak (ChromaPeak) objects. 20 21 Parameters 22 ------- 23 chromatogram_parent : Chromatogram 24 The parent chromatogram object. 25 mass_spectrum_obj : MassSpectrum 26 The mass spectrum object. 27 start_index : int 28 The start index of the peak. 29 index : int 30 The index of the peak. 31 final_index : int 32 The final index of the peak. 33 34 Attributes 35 -------- 36 start_scan : int 37 The start scan of the peak. 38 final_scan : int 39 The final scan of the peak. 40 apex_scan : int 41 The apex scan of the peak. 42 chromatogram_parent : Chromatogram 43 The parent chromatogram object. 44 mass_spectrum : MassSpectrum 45 The mass spectrum object. 46 _area : float 47 The area of the peak. 48 49 Properties 50 -------- 51 * retention_time : float. 52 The retention time of the peak. 53 * tic : float. 54 The total ion current of the peak. 55 * area : float. 56 The area of the peak. 57 * rt_list : list. 58 The list of retention times within the peak. 59 * tic_list : list. 60 The list of total ion currents within the peak. 61 62 Methods 63 -------- 64 * None 65 """ 66 67 def __init__( 68 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 69 ): 70 self.start_scan = start_index 71 self.final_scan = final_index 72 self.apex_scan = int(index) 73 self.chromatogram_parent = chromatogram_parent 74 self.mass_spectrum = mass_spectrum_obj 75 self._area = None 76 77 @property 78 def retention_time(self): 79 """Retention Time""" 80 return self.mass_spectrum.retention_time 81 82 @property 83 def tic(self): 84 """Total Ion Current""" 85 return self.mass_spectrum.tic 86 87 @property 88 def area(self): 89 """Peak Area""" 90 return self._area 91 92 @property 93 def rt_list(self): 94 """Retention Time List""" 95 return [ 96 self.chromatogram_parent.retention_time[i] 97 for i in range(self.start_scan, self.final_scan + 1) 98 ] 99 100 @property 101 def tic_list(self): 102 """Total Ion Current List""" 103 return [ 104 self.chromatogram_parent.tic[i] 105 for i in range(self.start_scan, self.final_scan + 1) 106 ]
Base class for chromatographic peak (ChromaPeak) objects.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum_obj (MassSpectrum): The mass spectrum object.
- start_index (int): The start index of the peak.
- index (int): The index of the peak.
- final_index (int): The final index of the peak.
Attributes
- start_scan (int): The start scan of the peak.
- final_scan (int): The final scan of the peak.
- apex_scan (int): The apex scan of the peak.
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum (MassSpectrum): The mass spectrum object.
- _area (float): The area of the peak.
Properties
- retention_time : float. The retention time of the peak.
- tic : float. The total ion current of the peak.
- area : float. The area of the peak.
- rt_list : list. The list of retention times within the peak.
- tic_list : list. The list of total ion currents within the peak.
Methods
- None
67 def __init__( 68 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index 69 ): 70 self.start_scan = start_index 71 self.final_scan = final_index 72 self.apex_scan = int(index) 73 self.chromatogram_parent = chromatogram_parent 74 self.mass_spectrum = mass_spectrum_obj 75 self._area = None
109class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): 110 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. 111 112 Parameters 113 ------- 114 lcms_parent : LCMS 115 The parent LCMSBase object. 116 mz : float 117 The observed mass to charge ratio of the feature. 118 retention_time : float 119 The retention time of the feature (in minutes), at the apex. 120 intensity : float 121 The intensity of the feature. 122 apex_scan : int 123 The scan number of the apex of the feature. 124 persistence : float, optional 125 The persistence of the feature. Default is None. 126 127 Attributes 128 -------- 129 _mz_exp : float 130 The observed mass to charge ratio of the feature. 131 _mz_cal : float 132 The calibrated mass to charge ratio of the feature. 133 _retention_time : float 134 The retention time of the feature (in minutes), at the apex. 135 _apex_scan : int 136 The scan number of the apex of the feature. 137 _intensity : float 138 The intensity of the feature. 139 _persistence : float 140 The persistence of the feature. 141 _eic_data : EIC_Data 142 The EIC data object associated with the feature. 143 _dispersity_index : float 144 The dispersity index of the feature. 145 _half_height_width : numpy.ndarray 146 The half height width of the feature (in minutes, as an array of min and max values). 147 _tailing_factor : float 148 The tailing factor of the feature. 149 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. 150 _ms_deconvoluted_idx : [int] 151 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. 152 is_calibrated : bool 153 If True, the feature has been calibrated. Default is False. 154 monoisotopic_mf_id : int 155 Mass feature id that is the monoisotopic version of self. 156 If self.id, then self is the monoisotopic feature). Default is None. 157 isotopologue_type : str 158 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. 159 Default is None. 160 ms2_scan_numbers : list 161 List of scan numbers of the MS2 spectra associated with the feature. 162 Default is an empty list. 163 ms2_mass_spectra : dict 164 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). 165 Default is an empty dictionary. 166 ms2_similarity_results : list 167 List of MS2 similarity results associated with the mass feature. 168 Default is an empty list. 169 id : int 170 The ID of the feature, also the key in the parent LCMS object's 171 `mass_features` dictionary. 172 mass_spectrum_deconvoluted_parent : bool 173 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. 174 associated_mass_features_deconvoluted : list 175 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. 176 177 """ 178 179 def __init__( 180 self, 181 lcms_parent, 182 mz: float, 183 retention_time: float, 184 intensity: float, 185 apex_scan: int, 186 persistence: float = None, 187 id: int = None, 188 ): 189 super().__init__( 190 chromatogram_parent=lcms_parent, 191 mass_spectrum_obj=None, 192 start_index=None, 193 index=apex_scan, 194 final_index=None, 195 ) 196 # Core attributes, marked as private 197 self._mz_exp: float = mz 198 self._mz_cal: float = None 199 self._retention_time: float = retention_time 200 self._apex_scan: int = apex_scan 201 self._intensity: float = intensity 202 self._persistence: float = persistence 203 self._eic_data: EIC_Data = None 204 self._dispersity_index: float = None 205 self._half_height_width: np.ndarray = None 206 self._ms_deconvoluted_idx = None 207 208 # Additional attributes 209 self.monoisotopic_mf_id = None 210 self.isotopologue_type = None 211 self.ms2_scan_numbers = [] 212 self.ms2_mass_spectra = {} 213 self.ms2_similarity_results = [] 214 self.mass_spectrum_deconvoluted_parent: bool = None 215 self.associated_mass_features_deconvoluted = [] 216 217 if id: 218 self.id = id 219 else: 220 # get the parent's mass feature keys and add 1 to the max value to get the new key 221 self.id = ( 222 max(lcms_parent.mass_features.keys()) + 1 223 if lcms_parent.mass_features.keys() 224 else 0 225 ) 226 227 def update_mz(self): 228 """Update the mass to charge ratio from the mass spectrum object.""" 229 if self.mass_spectrum is None: 230 raise ValueError( 231 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 232 ) 233 if len(self.mass_spectrum.mz_exp) == 0: 234 raise ValueError( 235 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 236 ) 237 new_mz = self.ms1_peak.mz_exp 238 239 # calculate the difference between the new and old m/z, only update if it is close 240 mz_diff = new_mz - self.mz 241 if abs(mz_diff) < 0.01: 242 self._mz_exp = new_mz 243 244 def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True, plot_smoothed_eic = False, plot_eic_datapoints = False): 245 """Plot the mass feature. 246 247 Parameters 248 ---------- 249 to_plot : list, optional 250 List of strings specifying what to plot, any iteration of 251 "EIC", "MS2", and "MS1". 252 Default is ["EIC", "MS1", "MS2"]. 253 return_fig : bool, optional 254 If True, the figure is returned. Default is True. 255 plot_smoothed_eic : bool, optional 256 If True, the smoothed EIC is plotted. Default is False. 257 plot_eic_datapoints : bool, optional 258 If True, the EIC data points are plotted. Default is False. 259 260 Returns 261 ------- 262 matplotlib.figure.Figure or None 263 The figure object if `return_fig` is True. 264 Otherwise None and the figure is displayed. 265 """ 266 267 # EIC plot preparation 268 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 269 270 # Adjust to_plot list if there are not spectra added to the mass features 271 if self.mass_spectrum is None: 272 to_plot = [x for x in to_plot if x != "MS1"] 273 if len(self.ms2_mass_spectra) == 0: 274 to_plot = [x for x in to_plot if x != "MS2"] 275 if self._eic_data is None: 276 to_plot = [x for x in to_plot if x != "EIC"] 277 if self._ms_deconvoluted_idx is not None: 278 deconvoluted = True 279 else: 280 deconvoluted = False 281 282 fig, axs = plt.subplots( 283 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 284 ) 285 fig.suptitle( 286 "Mass Feature " 287 + str(self.id) 288 + ": m/z = " 289 + str(round(self.mz, ndigits=4)) 290 + "; time = " 291 + str(round(self.retention_time, ndigits=1)) 292 + " minutes" 293 ) 294 295 i = 0 296 # EIC plot 297 if "EIC" in to_plot: 298 if self._eic_data is None: 299 raise ValueError( 300 "EIC data is not available, cannot plot the mass feature's EIC" 301 ) 302 axs[i][0].set_title("EIC", loc="left") 303 axs[i][0].plot(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC") 304 if plot_eic_datapoints: 305 axs[i][0].scatter(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC Data Points") 306 if plot_smoothed_eic: 307 axs[i][0].plot(self._eic_data.time, self._eic_data.eic_smoothed, c="tab:red", label="Smoothed EIC") 308 if self.start_scan is not None: 309 axs[i][0].fill_between( 310 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 311 ) 312 else: 313 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 314 print( 315 "No start and final scan numbers were provided for mass feature " 316 + str(self.id) 317 ) 318 axs[i][0].set_ylabel("Intensity") 319 axs[i][0].set_xlabel("Time (minutes)") 320 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 321 axs[i][0].set_xlim( 322 self.retention_time - eic_buffer_time, 323 self.retention_time + eic_buffer_time, 324 ) 325 axs[i][0].axvline( 326 x=self.retention_time, color="k", label="MS1 scan time (apex)" 327 ) 328 if len(self.ms2_scan_numbers) > 0: 329 axs[i][0].axvline( 330 x=self.chromatogram_parent.get_time_of_scan_id( 331 self.best_ms2.scan_number 332 ), 333 color="grey", 334 linestyle="--", 335 label="MS2 scan time", 336 ) 337 axs[i][0].legend(loc="upper left") 338 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 339 i += 1 340 341 # MS1 plot 342 if "MS1" in to_plot: 343 if deconvoluted: 344 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 345 axs[i][0].vlines( 346 self.mass_spectrum.mz_exp, 347 0, 348 self.mass_spectrum.abundance, 349 color="k", 350 alpha=0.2, 351 label="Raw MS1", 352 ) 353 axs[i][0].vlines( 354 self.mass_spectrum_deconvoluted.mz_exp, 355 0, 356 self.mass_spectrum_deconvoluted.abundance, 357 color="k", 358 label="Deconvoluted MS1", 359 ) 360 axs[i][0].set_xlim( 361 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 362 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 363 ) 364 axs[i][0].set_ylim( 365 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 366 ) 367 else: 368 axs[i][0].set_title("MS1 (raw)", loc="left") 369 axs[i][0].vlines( 370 self.mass_spectrum.mz_exp, 371 0, 372 self.mass_spectrum.abundance, 373 color="k", 374 label="Raw MS1", 375 ) 376 axs[i][0].set_xlim( 377 self.mass_spectrum.mz_exp.min() * 0.8, 378 self.mass_spectrum.mz_exp.max() * 1.1, 379 ) 380 axs[i][0].set_ylim(bottom=0) 381 382 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 383 axs[i][0].vlines( 384 self.ms1_peak.mz_exp, 385 0, 386 self.ms1_peak.abundance, 387 color="m", 388 label="Feature m/z", 389 ) 390 391 else: 392 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 393 print( 394 "The m/z of the mass feature " 395 + str(self.id) 396 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 397 ) 398 axs[i][0].legend(loc="upper left") 399 axs[i][0].set_ylabel("Intensity") 400 axs[i][0].set_xlabel("m/z") 401 axs[i][0].yaxis.set_tick_params(labelleft=False) 402 i += 1 403 404 # MS2 plot 405 if "MS2" in to_plot: 406 axs[i][0].set_title("MS2", loc="left") 407 axs[i][0].vlines( 408 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 409 ) 410 axs[i][0].set_ylabel("Intensity") 411 axs[i][0].set_xlabel("m/z") 412 axs[i][0].set_ylim(bottom=0) 413 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 414 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 415 axs[i][0].set_xlim( 416 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 417 ) 418 axs[i][0].yaxis.set_tick_params(labelleft=False) 419 420 # Add space between subplots 421 plt.tight_layout() 422 423 if return_fig: 424 # Close figure 425 plt.close(fig) 426 return fig 427 428 @property 429 def mz(self): 430 """Mass to charge ratio of the mass feature""" 431 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z 432 if self._mz_cal is not None: 433 return self._mz_cal 434 else: 435 return self._mz_exp 436 437 @property 438 def mass_spectrum_deconvoluted(self): 439 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" 440 if self._ms_deconvoluted_idx is not None: 441 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) 442 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) 443 return ms_deconvoluted 444 else: 445 raise ValueError( 446 "Deconvolution has not been performed for mass feature " + str(self.id) 447 ) 448 449 @property 450 def retention_time(self): 451 """Retention time of the mass feature""" 452 return self._retention_time 453 454 @retention_time.setter 455 def retention_time(self, value): 456 """Set the retention time of the mass feature""" 457 if not isinstance(value, float): 458 raise ValueError("The retention time of the mass feature must be a float") 459 self._retention_time = value 460 461 @property 462 def apex_scan(self): 463 """Apex scan of the mass feature""" 464 return self._apex_scan 465 466 @apex_scan.setter 467 def apex_scan(self, value): 468 """Set the apex scan of the mass feature""" 469 if not isinstance(value, int): 470 raise ValueError("The apex scan of the mass feature must be an integer") 471 self._apex_scan = value 472 473 @property 474 def intensity(self): 475 """Intensity of the mass feature""" 476 return self._intensity 477 478 @intensity.setter 479 def intensity(self, value): 480 """Set the intensity of the mass feature""" 481 if not isinstance(value, float): 482 raise ValueError("The intensity of the mass feature must be a float") 483 self._intensity = value 484 485 @property 486 def persistence(self): 487 """Persistence of the mass feature""" 488 return self._persistence 489 490 @persistence.setter 491 def persistence(self, value): 492 """Set the persistence of the mass feature""" 493 if not isinstance(value, float): 494 raise ValueError("The persistence of the mass feature must be a float") 495 self._persistence = value 496 497 @property 498 def eic_rt_list(self): 499 """Retention time list between the beginning and end of the mass feature""" 500 # Find index of the start and final scans in the EIC data 501 start_index = self._eic_data.scans.tolist().index(self.start_scan) 502 final_index = self._eic_data.scans.tolist().index(self.final_scan) 503 504 # Get the retention time list 505 rt_list = self._eic_data.time[start_index : final_index + 1] 506 return rt_list 507 508 @property 509 def eic_list(self): 510 """EIC List between the beginning and end of the mass feature""" 511 # Find index of the start and final scans in the EIC data 512 start_index = self._eic_data.scans.tolist().index(self.start_scan) 513 final_index = self._eic_data.scans.tolist().index(self.final_scan) 514 515 # Get the retention time list 516 eic = self._eic_data.eic[start_index : final_index + 1] 517 return eic 518 519 @property 520 def ms1_peak(self): 521 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" 522 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz 523 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) 524 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) 525 526 return self.mass_spectrum._mspeaks[closest_mz_index] 527 528 @property 529 def tailing_factor(self): 530 """Tailing factor of the mass feature""" 531 return self._tailing_factor 532 533 @tailing_factor.setter 534 def tailing_factor(self, value): 535 """Set the tailing factor of the mass feature""" 536 if not isinstance(value, float): 537 raise ValueError("The tailing factor of the mass feature must be a float") 538 self._tailing_factor = value 539 540 @property 541 def dispersity_index(self): 542 """Dispersity index of the mass feature""" 543 return self._dispersity_index 544 545 @dispersity_index.setter 546 def dispersity_index(self, value): 547 """Set the dispersity index of the mass feature""" 548 if not isinstance(value, float): 549 raise ValueError("The dispersity index of the mass feature must be a float") 550 self._dispersity_index = value 551 552 @property 553 def half_height_width(self): 554 """Half height width of the mass feature, average of min and max values, in minutes""" 555 return np.mean(self._half_height_width) 556 557 @property 558 def best_ms2(self): 559 """Points to the best representative MS2 mass spectrum 560 561 Notes 562 ----- 563 If there is only one MS2 mass spectrum, it will be returned 564 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. 565 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. 566 567 Returns 568 ------- 569 MassSpectrum or None 570 The best MS2 mass spectrum. 571 """ 572 if len(self.ms2_similarity_results) > 0: 573 # the scan number with the highest similarity score 574 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] 575 results_df = pd.concat(results_df) 576 results_df = results_df.sort_values( 577 by="entropy_similarity", ascending=False 578 ) 579 best_scan_number = results_df.iloc[0]["query_spectrum_id"] 580 return self.ms2_mass_spectra[best_scan_number] 581 582 ms2_scans = list(self.ms2_mass_spectra.keys()) 583 if len(ms2_scans) > 1: 584 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan 585 res_list = [] # List of maximum resolving power of peaks in each scan 586 time_diff_list = [] # List of time difference between scan and apex scan in each scan 587 for scan in ms2_scans: 588 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: 589 # Find mz closest to mass feature mz, return both the difference in mass and its resolution 590 closest_mz = min( 591 self.ms2_mass_spectra[scan].mz_exp, 592 key=lambda x: abs(x - self.mz), 593 ) 594 if all( 595 np.isnan(self.ms2_mass_spectra[scan].resolving_power) 596 ): # All NA for resolving power in peaks, not uncommon in CID spectra 597 res_list.append(2) # Assumes very low resolving power 598 else: 599 res_list.append( 600 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) 601 ) 602 mz_diff_list.append(np.abs(closest_mz - self.mz)) 603 time_diff_list.append( 604 np.abs( 605 self.chromatogram_parent.get_time_of_scan_id(scan) 606 - self.retention_time 607 ) 608 ) 609 else: 610 res_list.append(np.nan) 611 mz_diff_list.append(np.nan) 612 time_diff_list.append(np.nan) 613 # Convert diff_lists into logical scores (higher is better for each score) 614 time_score = 1 - np.array(time_diff_list) / np.nanmax( 615 np.array(time_diff_list) 616 ) 617 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) 618 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) 619 mz_score = np.zeros(len(ms2_scans)) 620 for i in np.arange(0, len(ms2_scans)): 621 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric 622 mz_score[i] = 0 623 else: 624 mz_score[i] = 1 625 # get the index of the best score and return the mass spectrum 626 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: 627 return self.ms2_mass_spectra[ 628 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] 629 ] 630 # remove the mz_score condition and try again 631 elif len(np.argmax(time_score * res_score)) == 1: 632 return self.ms2_mass_spectra[ 633 ms2_scans[np.nanargmax(time_score * res_score)] 634 ] 635 else: 636 raise ValueError( 637 "No best MS2 mass spectrum could be found for mass feature " 638 + str(self.id) 639 ) 640 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it 641 return self.ms2_mass_spectra[ms2_scans[0]] 642 else: # if no ms2 spectra, return None 643 return None
Class representing a mass feature in a liquid chromatography (LC) chromatogram.
Parameters
- lcms_parent (LCMS): The parent LCMSBase object.
- mz (float): The observed mass to charge ratio of the feature.
- retention_time (float): The retention time of the feature (in minutes), at the apex.
- intensity (float): The intensity of the feature.
- apex_scan (int): The scan number of the apex of the feature.
- persistence (float, optional): The persistence of the feature. Default is None.
Attributes
- _mz_exp (float): The observed mass to charge ratio of the feature.
- _mz_cal (float): The calibrated mass to charge ratio of the feature.
- _retention_time (float): The retention time of the feature (in minutes), at the apex.
- _apex_scan (int): The scan number of the apex of the feature.
- _intensity (float): The intensity of the feature.
- _persistence (float): The persistence of the feature.
- _eic_data (EIC_Data): The EIC data object associated with the feature.
- _dispersity_index (float): The dispersity index of the feature.
- _half_height_width (numpy.ndarray): The half height width of the feature (in minutes, as an array of min and max values).
- _tailing_factor (float): The tailing factor of the feature. > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
- _ms_deconvoluted_idx ([int]): The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
- is_calibrated (bool): If True, the feature has been calibrated. Default is False.
- monoisotopic_mf_id (int): Mass feature id that is the monoisotopic version of self. If self.id, then self is the monoisotopic feature). Default is None.
- isotopologue_type (str): The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. Default is None.
- ms2_scan_numbers (list): List of scan numbers of the MS2 spectra associated with the feature. Default is an empty list.
- ms2_mass_spectra (dict): Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). Default is an empty dictionary.
- ms2_similarity_results (list): List of MS2 similarity results associated with the mass feature. Default is an empty list.
- id (int):
The ID of the feature, also the key in the parent LCMS object's
mass_features
dictionary. - mass_spectrum_deconvoluted_parent (bool): If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
- associated_mass_features_deconvoluted (list): List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
179 def __init__( 180 self, 181 lcms_parent, 182 mz: float, 183 retention_time: float, 184 intensity: float, 185 apex_scan: int, 186 persistence: float = None, 187 id: int = None, 188 ): 189 super().__init__( 190 chromatogram_parent=lcms_parent, 191 mass_spectrum_obj=None, 192 start_index=None, 193 index=apex_scan, 194 final_index=None, 195 ) 196 # Core attributes, marked as private 197 self._mz_exp: float = mz 198 self._mz_cal: float = None 199 self._retention_time: float = retention_time 200 self._apex_scan: int = apex_scan 201 self._intensity: float = intensity 202 self._persistence: float = persistence 203 self._eic_data: EIC_Data = None 204 self._dispersity_index: float = None 205 self._half_height_width: np.ndarray = None 206 self._ms_deconvoluted_idx = None 207 208 # Additional attributes 209 self.monoisotopic_mf_id = None 210 self.isotopologue_type = None 211 self.ms2_scan_numbers = [] 212 self.ms2_mass_spectra = {} 213 self.ms2_similarity_results = [] 214 self.mass_spectrum_deconvoluted_parent: bool = None 215 self.associated_mass_features_deconvoluted = [] 216 217 if id: 218 self.id = id 219 else: 220 # get the parent's mass feature keys and add 1 to the max value to get the new key 221 self.id = ( 222 max(lcms_parent.mass_features.keys()) + 1 223 if lcms_parent.mass_features.keys() 224 else 0 225 )
227 def update_mz(self): 228 """Update the mass to charge ratio from the mass spectrum object.""" 229 if self.mass_spectrum is None: 230 raise ValueError( 231 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" 232 ) 233 if len(self.mass_spectrum.mz_exp) == 0: 234 raise ValueError( 235 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" 236 ) 237 new_mz = self.ms1_peak.mz_exp 238 239 # calculate the difference between the new and old m/z, only update if it is close 240 mz_diff = new_mz - self.mz 241 if abs(mz_diff) < 0.01: 242 self._mz_exp = new_mz
Update the mass to charge ratio from the mass spectrum object.
244 def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True, plot_smoothed_eic = False, plot_eic_datapoints = False): 245 """Plot the mass feature. 246 247 Parameters 248 ---------- 249 to_plot : list, optional 250 List of strings specifying what to plot, any iteration of 251 "EIC", "MS2", and "MS1". 252 Default is ["EIC", "MS1", "MS2"]. 253 return_fig : bool, optional 254 If True, the figure is returned. Default is True. 255 plot_smoothed_eic : bool, optional 256 If True, the smoothed EIC is plotted. Default is False. 257 plot_eic_datapoints : bool, optional 258 If True, the EIC data points are plotted. Default is False. 259 260 Returns 261 ------- 262 matplotlib.figure.Figure or None 263 The figure object if `return_fig` is True. 264 Otherwise None and the figure is displayed. 265 """ 266 267 # EIC plot preparation 268 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time 269 270 # Adjust to_plot list if there are not spectra added to the mass features 271 if self.mass_spectrum is None: 272 to_plot = [x for x in to_plot if x != "MS1"] 273 if len(self.ms2_mass_spectra) == 0: 274 to_plot = [x for x in to_plot if x != "MS2"] 275 if self._eic_data is None: 276 to_plot = [x for x in to_plot if x != "EIC"] 277 if self._ms_deconvoluted_idx is not None: 278 deconvoluted = True 279 else: 280 deconvoluted = False 281 282 fig, axs = plt.subplots( 283 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False 284 ) 285 fig.suptitle( 286 "Mass Feature " 287 + str(self.id) 288 + ": m/z = " 289 + str(round(self.mz, ndigits=4)) 290 + "; time = " 291 + str(round(self.retention_time, ndigits=1)) 292 + " minutes" 293 ) 294 295 i = 0 296 # EIC plot 297 if "EIC" in to_plot: 298 if self._eic_data is None: 299 raise ValueError( 300 "EIC data is not available, cannot plot the mass feature's EIC" 301 ) 302 axs[i][0].set_title("EIC", loc="left") 303 axs[i][0].plot(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC") 304 if plot_eic_datapoints: 305 axs[i][0].scatter(self._eic_data.time, self._eic_data.eic, c="tab:blue", label="EIC Data Points") 306 if plot_smoothed_eic: 307 axs[i][0].plot(self._eic_data.time, self._eic_data.eic_smoothed, c="tab:red", label="Smoothed EIC") 308 if self.start_scan is not None: 309 axs[i][0].fill_between( 310 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 311 ) 312 else: 313 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 314 print( 315 "No start and final scan numbers were provided for mass feature " 316 + str(self.id) 317 ) 318 axs[i][0].set_ylabel("Intensity") 319 axs[i][0].set_xlabel("Time (minutes)") 320 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) 321 axs[i][0].set_xlim( 322 self.retention_time - eic_buffer_time, 323 self.retention_time + eic_buffer_time, 324 ) 325 axs[i][0].axvline( 326 x=self.retention_time, color="k", label="MS1 scan time (apex)" 327 ) 328 if len(self.ms2_scan_numbers) > 0: 329 axs[i][0].axvline( 330 x=self.chromatogram_parent.get_time_of_scan_id( 331 self.best_ms2.scan_number 332 ), 333 color="grey", 334 linestyle="--", 335 label="MS2 scan time", 336 ) 337 axs[i][0].legend(loc="upper left") 338 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 339 i += 1 340 341 # MS1 plot 342 if "MS1" in to_plot: 343 if deconvoluted: 344 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 345 axs[i][0].vlines( 346 self.mass_spectrum.mz_exp, 347 0, 348 self.mass_spectrum.abundance, 349 color="k", 350 alpha=0.2, 351 label="Raw MS1", 352 ) 353 axs[i][0].vlines( 354 self.mass_spectrum_deconvoluted.mz_exp, 355 0, 356 self.mass_spectrum_deconvoluted.abundance, 357 color="k", 358 label="Deconvoluted MS1", 359 ) 360 axs[i][0].set_xlim( 361 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, 362 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, 363 ) 364 axs[i][0].set_ylim( 365 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 366 ) 367 else: 368 axs[i][0].set_title("MS1 (raw)", loc="left") 369 axs[i][0].vlines( 370 self.mass_spectrum.mz_exp, 371 0, 372 self.mass_spectrum.abundance, 373 color="k", 374 label="Raw MS1", 375 ) 376 axs[i][0].set_xlim( 377 self.mass_spectrum.mz_exp.min() * 0.8, 378 self.mass_spectrum.mz_exp.max() * 1.1, 379 ) 380 axs[i][0].set_ylim(bottom=0) 381 382 if (self.ms1_peak.mz_exp - self.mz) < 0.01: 383 axs[i][0].vlines( 384 self.ms1_peak.mz_exp, 385 0, 386 self.ms1_peak.abundance, 387 color="m", 388 label="Feature m/z", 389 ) 390 391 else: 392 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: 393 print( 394 "The m/z of the mass feature " 395 + str(self.id) 396 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" 397 ) 398 axs[i][0].legend(loc="upper left") 399 axs[i][0].set_ylabel("Intensity") 400 axs[i][0].set_xlabel("m/z") 401 axs[i][0].yaxis.set_tick_params(labelleft=False) 402 i += 1 403 404 # MS2 plot 405 if "MS2" in to_plot: 406 axs[i][0].set_title("MS2", loc="left") 407 axs[i][0].vlines( 408 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" 409 ) 410 axs[i][0].set_ylabel("Intensity") 411 axs[i][0].set_xlabel("m/z") 412 axs[i][0].set_ylim(bottom=0) 413 axs[i][0].yaxis.get_major_formatter().set_scientific(False) 414 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) 415 axs[i][0].set_xlim( 416 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 417 ) 418 axs[i][0].yaxis.set_tick_params(labelleft=False) 419 420 # Add space between subplots 421 plt.tight_layout() 422 423 if return_fig: 424 # Close figure 425 plt.close(fig) 426 return fig
Plot the mass feature.
Parameters
- to_plot (list, optional): List of strings specifying what to plot, any iteration of "EIC", "MS2", and "MS1". Default is ["EIC", "MS1", "MS2"].
- return_fig (bool, optional): If True, the figure is returned. Default is True.
- plot_smoothed_eic (bool, optional): If True, the smoothed EIC is plotted. Default is False.
- plot_eic_datapoints (bool, optional): If True, the EIC data points are plotted. Default is False.
Returns
- matplotlib.figure.Figure or None: The figure object if
return_fig
is True. Otherwise None and the figure is displayed.
Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.
Points to the best representative MS2 mass spectrum
Notes
If there is only one MS2 mass spectrum, it will be returned If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra.
Returns
- MassSpectrum or None: The best MS2 mass spectrum.
646class GCPeak(ChromaPeakBase, GCPeakCalculation): 647 """Class representing a peak in a gas chromatography (GC) chromatogram. 648 649 Parameters 650 ---------- 651 chromatogram_parent : Chromatogram 652 The parent chromatogram object. 653 mass_spectrum_obj : MassSpectrum 654 The mass spectrum object associated with the peak. 655 indexes : tuple 656 The indexes of the peak in the chromatogram. 657 658 Attributes 659 ---------- 660 _compounds : list 661 List of compounds associated with the peak. 662 _ri : float or None 663 Retention index of the peak. 664 665 Methods 666 ------- 667 * __len__(). Returns the number of compounds associated with the peak. 668 * __getitem__(position). Returns the compound at the specified position. 669 * remove_compound(compounds_obj). Removes the specified compound from the peak. 670 * clear_compounds(). Removes all compounds from the peak. 671 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. 672 * ri(). Returns the retention index of the peak. 673 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. 674 * highest_score_compound(). Returns the compound with the highest similarity score. 675 * compound_names(). Returns a list of names of compounds associated with the peak. 676 """ 677 678 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): 679 self._compounds = [] 680 self._ri = None 681 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) 682 683 def __len__(self): 684 return len(self._compounds) 685 686 def __getitem__(self, position): 687 return self._compounds[position] 688 689 def remove_compound(self, compounds_obj): 690 self._compounds.remove(compounds_obj) 691 692 def clear_compounds(self): 693 self._compounds = [] 694 695 def add_compound( 696 self, 697 compounds_dict, 698 spectral_similarity_scores, 699 ri_score=None, 700 similarity_score=None, 701 ): 702 """Adds a compound to the peak with the specified attributes. 703 704 Parameters 705 ---------- 706 compounds_dict : dict 707 Dictionary containing the compound information. 708 spectral_similarity_scores : dict 709 Dictionary containing the spectral similarity scores. 710 ri_score : float or None, optional 711 The retention index score of the compound. Default is None. 712 similarity_score : float or None, optional 713 The similarity score of the compound. Default is None. 714 """ 715 compound_obj = LowResCompoundRef(compounds_dict) 716 compound_obj.spectral_similarity_scores = spectral_similarity_scores 717 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 718 "cosine_correlation" 719 ) 720 # TODO check is the above line correct? 721 compound_obj.ri_score = ri_score 722 compound_obj.similarity_score = similarity_score 723 self._compounds.append(compound_obj) 724 if similarity_score: 725 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 726 else: 727 self._compounds.sort( 728 key=lambda c: c.spectral_similarity_score, reverse=True 729 ) 730 731 @property 732 def ri(self): 733 """Returns the retention index of the peak. 734 735 Returns 736 ------- 737 float or None 738 The retention index of the peak. 739 """ 740 return self._ri 741 742 @property 743 def highest_ss_compound(self): 744 """Returns the compound with the highest spectral similarity score. 745 746 Returns 747 ------- 748 LowResCompoundRef or None 749 The compound with the highest spectral similarity score. 750 """ 751 if self: 752 return max(self, key=lambda c: c.spectral_similarity_score) 753 else: 754 return None 755 756 @property 757 def highest_score_compound(self): 758 """Returns the compound with the highest similarity score. 759 760 Returns 761 ------- 762 LowResCompoundRef or None 763 The compound with the highest similarity score. 764 """ 765 if self: 766 return max(self, key=lambda c: c.similarity_score) 767 else: 768 return None 769 770 @property 771 def compound_names(self): 772 """Returns a list of names of compounds associated with the peak. 773 774 Returns 775 ------- 776 list 777 List of names of compounds associated with the peak. 778 """ 779 if self: 780 return [c.name for c in self] 781 else: 782 return []
Class representing a peak in a gas chromatography (GC) chromatogram.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectrum_obj (MassSpectrum): The mass spectrum object associated with the peak.
- indexes (tuple): The indexes of the peak in the chromatogram.
Attributes
- _compounds (list): List of compounds associated with the peak.
- _ri (float or None): Retention index of the peak.
Methods
- __len__(). Returns the number of compounds associated with the peak.
- __getitem__(position). Returns the compound at the specified position.
- remove_compound(compounds_obj). Removes the specified compound from the peak.
- clear_compounds(). Removes all compounds from the peak.
- add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
- ri(). Returns the retention index of the peak.
- highest_ss_compound(). Returns the compound with the highest spectral similarity score.
- highest_score_compound(). Returns the compound with the highest similarity score.
- compound_names(). Returns a list of names of compounds associated with the peak.
695 def add_compound( 696 self, 697 compounds_dict, 698 spectral_similarity_scores, 699 ri_score=None, 700 similarity_score=None, 701 ): 702 """Adds a compound to the peak with the specified attributes. 703 704 Parameters 705 ---------- 706 compounds_dict : dict 707 Dictionary containing the compound information. 708 spectral_similarity_scores : dict 709 Dictionary containing the spectral similarity scores. 710 ri_score : float or None, optional 711 The retention index score of the compound. Default is None. 712 similarity_score : float or None, optional 713 The similarity score of the compound. Default is None. 714 """ 715 compound_obj = LowResCompoundRef(compounds_dict) 716 compound_obj.spectral_similarity_scores = spectral_similarity_scores 717 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( 718 "cosine_correlation" 719 ) 720 # TODO check is the above line correct? 721 compound_obj.ri_score = ri_score 722 compound_obj.similarity_score = similarity_score 723 self._compounds.append(compound_obj) 724 if similarity_score: 725 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) 726 else: 727 self._compounds.sort( 728 key=lambda c: c.spectral_similarity_score, reverse=True 729 )
Adds a compound to the peak with the specified attributes.
Parameters
- compounds_dict (dict): Dictionary containing the compound information.
- spectral_similarity_scores (dict): Dictionary containing the spectral similarity scores.
- ri_score (float or None, optional): The retention index score of the compound. Default is None.
- similarity_score (float or None, optional): The similarity score of the compound. Default is None.
Returns the compound with the highest spectral similarity score.
Returns
- LowResCompoundRef or None: The compound with the highest spectral similarity score.
Returns the compound with the highest similarity score.
Returns
- LowResCompoundRef or None: The compound with the highest similarity score.
785class GCPeakDeconvolved(GCPeak): 786 """Represents a deconvolved peak in a chromatogram. 787 788 Parameters 789 ---------- 790 chromatogram_parent : Chromatogram 791 The parent chromatogram object. 792 mass_spectra : list 793 List of mass spectra associated with the peak. 794 apex_index : int 795 Index of the apex mass spectrum in the `mass_spectra` list. 796 rt_list : list 797 List of retention times. 798 tic_list : list 799 List of total ion currents. 800 """ 801 802 def __init__( 803 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 804 ): 805 self._ri = None 806 self._rt_list = list(rt_list) 807 self._tic_list = list(tic_list) 808 self.mass_spectra = list(mass_spectra) 809 super().__init__( 810 chromatogram_parent, 811 self.mass_spectra[apex_index], 812 (0, apex_index, len(self.mass_spectra) - 1), 813 ) 814 815 @property 816 def rt_list(self): 817 """Get the list of retention times. 818 819 Returns 820 ------- 821 list 822 The list of retention times. 823 """ 824 return self._rt_list 825 826 @property 827 def tic_list(self): 828 """Get the list of total ion currents. 829 830 Returns 831 ------- 832 list 833 The list of total ion currents. 834 """ 835 return self._tic_list
Represents a deconvolved peak in a chromatogram.
Parameters
- chromatogram_parent (Chromatogram): The parent chromatogram object.
- mass_spectra (list): List of mass spectra associated with the peak.
- apex_index (int):
Index of the apex mass spectrum in the
mass_spectra
list. - rt_list (list): List of retention times.
- tic_list (list): List of total ion currents.
802 def __init__( 803 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list 804 ): 805 self._ri = None 806 self._rt_list = list(rt_list) 807 self._tic_list = list(tic_list) 808 self.mass_spectra = list(mass_spectra) 809 super().__init__( 810 chromatogram_parent, 811 self.mass_spectra[apex_index], 812 (0, apex_index, len(self.mass_spectra) - 1), 813 )