corems.encapsulation.factory.processingSetting
1__author__ = "Yuri E. Corilo" 2__date__ = "Jul 02, 2019" 3 4import dataclasses 5import os 6from typing import List, Dict 7 8from corems.encapsulation.constant import Atoms, Labels 9 10 11@dataclasses.dataclass 12class TransientSetting: 13 """Transient processing settings class 14 15 Attributes 16 ---------- 17 implemented_apodization_function : tuple 18 Available apodization functions 19 apodization_method : str 20 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. 21 For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 22 number_of_truncations : int 23 How many times to truncate the transient prior to Fourier transform 24 number_of_zero_fills : int 25 How many times to zero fille the transient prior to Fourier transform. 26 next_power_of_two : bool 27 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 28 kaiser_beta : float 29 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 30 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 31 32 """ 33 34 implemented_apodization_function: tuple = ( 35 "Hamming", 36 "Hanning", 37 "Blackman", 38 "Full-Sine", 39 "Half-Sine", 40 "Kaiser", 41 "Half-Kaiser", 42 "Rectangle", 43 ) 44 apodization_method: str = "Hanning" 45 number_of_truncations: int = 0 46 number_of_zero_fills: int = 1 47 next_power_of_two: bool = False 48 kaiser_beta: float = 8.6 49 50 def __post_init__(self): 51 # enforce datatype 52 for field in dataclasses.fields(self): 53 value = getattr(self, field.name) 54 if not isinstance(value, field.type): 55 value = field.type(value) 56 setattr(self, field.name, value) 57 58 59@dataclasses.dataclass 60class DataInputSetting: 61 """Data input settings class 62 63 Attributes 64 ---------- 65 header_translate : dict 66 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 67 """ 68 69 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 70 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 71 header_translate: dict = dataclasses.field(default_factory=dict) 72 73 def __post_init__(self): 74 self.header_translate = { 75 "m/z": Labels.mz, 76 "mOz": Labels.mz, 77 "Mass": Labels.mz, 78 "Resolving Power": Labels.rp, 79 "Res.": Labels.rp, 80 "resolution": Labels.rp, 81 "Intensity": Labels.abundance, 82 "Peak Height": Labels.abundance, 83 "I": Labels.abundance, 84 "Abundance": Labels.abundance, 85 "abs_abu": Labels.abundance, 86 "Signal/Noise": Labels.s2n, 87 "S/N": Labels.s2n, 88 "sn": Labels.s2n, 89 } 90 91 def add_mz_label(self, label): 92 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 93 self.header_translate[label] = Labels.mz 94 95 def add_peak_height_label(self, label): 96 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 97 98 self.header_translate[label] = Labels.abundance 99 100 def add_sn_label(self, label): 101 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 102 self.header_translate[label] = Labels.s2n 103 104 def add_resolving_power_label(self, label): 105 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 106 self.header_translate[label] = Labels.rp 107 108 109@dataclasses.dataclass 110class LiquidChromatographSetting: 111 """Liquid chromatograph processing settings class 112 113 Attributes 114 ---------- 115 scans : list or tuple, optional 116 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 117 eic_tolerance_ppm : float, optional 118 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 119 correct_eic_baseline : bool, optional 120 If True, correct the baseline of the extracted ion chromatogram. Default is True. 121 smooth_window : int, optional 122 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 123 smooth_method : str, optional 124 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 125 implemented_smooth_method : tuple, optional 126 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 127 savgol_pol_order : int, optional 128 Polynomial order for Savitzky-Golay smoothing. Default is 2. 129 peak_height_max_percent : float, optional 130 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 131 peak_max_prominence_percent : float, optional 132 1-100 % used for baseline detection. Default is 1. 133 peak_derivative_threshold : float, optional 134 Threshold for defining derivative crossing. Default is 0.0005. 135 min_peak_datapoints : float, optional 136 minimum data point to define a chromatografic peak. Default is 5. 137 noise_threshold_method : str, optional 138 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 139 noise_threshold_methods_implemented : tuple, optional 140 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 141 peak_height_min_percent : float, optional 142 0-100 % used for peak detection. Default is 0.1. 143 eic_signal_threshold : float, optional 144 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 145 eic_buffer_time : float, optional 146 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 147 peak_picking_method : str, optional 148 Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'. 149 implemented_peak_picking_methods : tuple, optional 150 Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology'). 151 ph_smooth_it : int, optional 152 Number of iterations to use for smoothing prior to finding mass features. 153 Used only for "persistent homology" peak picking method. 154 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 155 ph_smooth_radius_mz : int, optional 156 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 157 Used only for "persistent homology" peak picking method. 158 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 159 ph_smooth_radius_scan : int, optional 160 Radius in scan steps for smoothing prior to finding mass features. 161 Used only for "persistent homology" peak picking method. 162 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 163 ph_inten_min_rel : int, optional 164 Relative minimum intensity to use for finding mass features for persistent homology. 165 Used only for "persistent homology" peak picking method. 166 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 167 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 168 ph_persis_min_rel : int, optional 169 Relative minimum persistence for retaining mass features. 170 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 171 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 172 Should be greater to or equal to ph_inten_min_rel. 173 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 174 mass_feature_cluster_mz_tolerance_rel : float, optional 175 Relative m/z tolerance to use for clustering mass features. 176 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 177 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 178 Default is 5E-6 (5 ppm). 179 mass_feature_cluster_rt_tolerance : float, optional 180 Retention time tolerance to use for clustering mass features, in minutes. 181 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 182 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 183 Default is 0.2. 184 ms1_scans_to_average : int, optional 185 Number of MS1 scans to average for mass-feature associated m/zs. 186 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 187 ms1_deconvolution_corr_min : float, optional 188 Minimum correlation to use for deconvoluting MS1 mass features. 189 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 190 Default is 0.8. 191 ms2_dda_rt_tolerance : float, optional 192 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 193 ms2_dda_mz_tolerance : float, optional 194 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 195 ms2_min_fe_score : float, optional 196 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 197 search_as_lipids : bool, optional 198 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 199 include_fragment_types : bool, optional 200 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 201 verbose_processing : bool, optional 202 If True, print verbose processing information. Default is True. 203 """ 204 205 scans: list | tuple = (-1, -1) 206 207 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 208 eic_tolerance_ppm: float = 5 209 correct_eic_baseline = True 210 smooth_window: int = 5 211 smooth_method: str = "savgol" 212 implemented_smooth_method: tuple = ( 213 "savgol", 214 "hanning", 215 "blackman", 216 "bartlett", 217 "flat", 218 "boxcar", 219 ) 220 savgol_pol_order: int = 2 221 peak_height_max_percent: float = 10 222 peak_max_prominence_percent: float = 1 223 peak_derivative_threshold: float = 0.0005 224 min_peak_datapoints: float = 5 225 noise_threshold_method: str = "manual_relative_abundance" 226 noise_threshold_methods_implemented: tuple = ( 227 "auto_relative_abundance", 228 "manual_relative_abundance", 229 "second_derivative", 230 ) 231 peak_height_min_percent: float = 0.1 232 eic_signal_threshold: float = 0.01 233 eic_buffer_time = 1.5 234 235 # Parameters used for 2D peak picking 236 peak_picking_method: str = "persistent homology" 237 implemented_peak_picking_methods: tuple = ( 238 "persistent homology", 239 "centroided_persistent_homology", 240 ) 241 242 # Parameters used in persistent homology calculations 243 ph_smooth_it = 1 244 ph_smooth_radius_mz = 0 245 ph_smooth_radius_scan = 1 246 ph_inten_min_rel = 0.001 247 ph_persis_min_rel = 0.001 248 249 # Parameters used to cluster mass features 250 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 251 mass_feature_cluster_rt_tolerance: float = 0.3 252 253 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 254 ms1_scans_to_average: int = 1 255 ms1_deconvolution_corr_min: float = 0.8 256 ms2_dda_rt_tolerance: float = 0.15 257 ms2_dda_mz_tolerance: float = 0.05 258 259 # Parameters used for flash entropy searching and database preparation 260 ms2_min_fe_score: float = 0.2 261 search_as_lipids: bool = False 262 include_fragment_types: bool = False 263 264 # Parameters used for saving the data 265 export_profile_spectra: bool = False 266 export_eics: bool = True 267 export_unprocessed_ms1: bool = False 268 269 # Parameters used for verbose processing 270 verbose_processing: bool = True 271 272 def __post_init__(self): 273 # enforce datatype 274 for field in dataclasses.fields(self): 275 value = getattr(self, field.name) 276 if not isinstance(value, field.type): 277 value = field.type(value) 278 setattr(self, field.name, value) 279 280 281@dataclasses.dataclass 282class MassSpectrumSetting: 283 """Mass spectrum processing settings class 284 285 Attributes 286 ---------- 287 noise_threshold_method : str, optional 288 Method for detecting noise threshold. Default is 'log'. 289 noise_threshold_methods_implemented : tuple, optional 290 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 291 noise_threshold_min_std : int, optional 292 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 293 noise_threshold_min_s2n : float, optional 294 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 295 noise_threshold_min_relative_abundance : float, optional 296 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 297 noise_threshold_absolute_abundance : float, optional 298 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 299 noise_threshold_log_nsigma : int, optional 300 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 301 noise_threshold_log_nsigma_corr_factor : float, optional 302 Correction factor for log noise threshold method. Default is 0.463. 303 noise_threshold_log_nsigma_bins : int, optional 304 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 305 noise_min_mz : float, optional 306 Minimum m/z to use for noise thresholding. Default is 50.0. 307 noise_max_mz : float, optional 308 Maximum m/z to use for noise thresholding. Default is 1200.0. 309 min_picking_mz : float, optional 310 Minimum m/z to use for peak picking. Default is 50.0. 311 max_picking_mz : float, optional 312 Maximum m/z to use for peak picking. Default is 1200.0. 313 picking_point_extrapolate : int, optional 314 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 315 Recommend 3 for reduced profile data or if peak picking faults 316 calib_minimize_method : str, optional 317 Minimization method to use for calibration. Default is 'Powell'. 318 calib_pol_order : int, optional 319 Polynomial order to use for calibration. Default is 2. 320 max_calib_ppm_error : float, optional 321 Maximum ppm error to use for calibration. Default is 1.0. 322 min_calib_ppm_error : float, optional 323 Minimum ppm error to use for calibration. Default is -1.0. 324 calib_sn_threshold : float, optional 325 Signal to noise threshold to use for calibration. Default is 2.0. 326 calibration_ref_match_method: string, optional 327 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 328 calibration_ref_match_tolerance: float, optional 329 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 330 do_calibration : bool, optional 331 If True, perform calibration. Default is True. 332 verbose_processing : bool, optional 333 If True, print verbose processing information. Default is True. 334 """ 335 336 noise_threshold_method: str = "log" 337 338 noise_threshold_methods_implemented: tuple = ( 339 "minima", 340 "signal_noise", 341 "relative_abundance", 342 "absolute_abundance", 343 "log", 344 ) 345 346 noise_threshold_min_std: int = 6 # when using 'minima' method 347 348 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 349 350 noise_threshold_min_relative_abundance: float = ( 351 6 # from 0-100, when using 'relative_abundance' method 352 ) 353 354 noise_threshold_absolute_abundance: float = ( 355 1_000_000 # when using 'absolute_abundance' method 356 ) 357 358 noise_threshold_log_nsigma: int = 6 # when using 'log' method 359 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 360 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 361 362 noise_min_mz: float = 50.0 363 noise_max_mz: float = 1200.0 364 365 min_picking_mz: float = 50.0 366 max_picking_mz: float = 1200.0 367 368 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 369 # This will fix peak picking at spectrum limit issues 370 # 0 to keep normal behaviour, typical value 3 to fix 371 picking_point_extrapolate: int = 3 372 373 calib_minimize_method: str = "Powell" 374 calib_pol_order: int = 2 375 max_calib_ppm_error: float = 1.0 376 min_calib_ppm_error: float = -1.0 377 calib_sn_threshold: float = 2.0 378 calibration_ref_match_method: str = "legacy" 379 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 380 calibration_ref_match_tolerance: float = 0.003 381 calibration_ref_match_std_raw_error_limit: float = 1.5 382 # calib_ref_mzs: list = [0] 383 384 do_calibration: bool = True 385 verbose_processing: bool = True 386 387 def __post_init__(self): 388 # enforce datatype 389 for field in dataclasses.fields(self): 390 value = getattr(self, field.name) 391 if not isinstance(value, field.type): 392 value = field.type(value) 393 setattr(self, field.name, value) 394 395 396@dataclasses.dataclass 397class MassSpecPeakSetting: 398 """Mass spectrum peak processing settings class 399 400 Attributes 401 ---------- 402 kendrick_base : Dict, optional 403 Dictionary specifying the elements and their counts in the Kendrick base. 404 Defaults to {'C': 1, 'H': 2}. 405 kendrick_rounding_method : str, optional 406 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 407 Defaults to 'floor'. 408 implemented_kendrick_rounding_methods : tuple 409 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 410 Defaults to ('floor', 'ceil', 'round'). 411 peak_derivative_threshold : float, optional 412 Threshold for defining derivative crossing. Should be a value between 0 and 1. 413 Defaults to 0.0. 414 peak_min_prominence_percent : float, optional 415 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 416 Defaults to 0.1. 417 min_peak_datapoints : float, optional 418 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 419 Defaults to 5. 420 peak_max_prominence_percent : float, optional 421 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 422 Defaults to 0.1. 423 peak_height_max_percent : float, optional 424 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 425 Defaults to 10. 426 legacy_resolving_power : bool, optional 427 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 428 Defaults to True. 429 legacy_centroid_polyfit : bool, optional 430 Use legacy (numpy polyfit) to fit centroid 431 Default false. 432 """ 433 434 kendrick_base: Dict = dataclasses.field(default_factory=dict) 435 436 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 437 438 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 439 440 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 441 442 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 443 444 min_peak_datapoints: float = 5 # 0-inf used for peak detection 445 446 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 447 448 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 449 450 legacy_resolving_power: bool = ( 451 True # Use the legacy (CoreMS v1) resolving power calculation (True) 452 ) 453 454 legacy_centroid_polyfit: bool = False 455 456 def __post_init__(self): 457 # default to CH2 458 if not self.kendrick_base: 459 self.kendrick_base = {"C": 1, "H": 2} 460 # enforce datatype 461 for field in dataclasses.fields(self): 462 value = getattr(self, field.name) 463 if not isinstance(value, field.type): 464 value = field.type(value) 465 setattr(self, field.name, value) 466 467 468@dataclasses.dataclass 469class GasChromatographSetting: 470 """Gas chromatograph processing settings class 471 472 Attributes 473 ---------- 474 use_deconvolution : bool, optional 475 If True, use deconvolution. Default is False. 476 implemented_smooth_method : tuple, optional 477 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 478 smooth_window : int, optional 479 Window size for smoothing the ion chromatogram. Default is 5. 480 smooth_method : str, optional 481 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 482 savgol_pol_order : int, optional 483 Polynomial order for Savitzky-Golay smoothing. Default is 2. 484 peak_derivative_threshold : float, optional 485 Threshold for defining derivative crossing. Should be a value between 0 and 1. 486 Defaults to 0.0005. 487 peak_height_max_percent : float, optional 488 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 489 Defaults to 10. 490 peak_max_prominence_percent : float, optional 491 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 492 Defaults to 1. 493 min_peak_datapoints : float, optional 494 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 495 Defaults to 5. 496 max_peak_width : float, optional 497 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 498 Defaults to 0.1. 499 noise_threshold_method : str, optional 500 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 501 noise_threshold_methods_implemented : tuple, optional 502 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 503 std_noise_threshold : int, optional 504 Default is 3. 505 peak_height_min_percent : float, optional 506 0-100 % used for peak detection. Default is 0.1. 507 peak_min_prominence_percent : float, optional 508 0-100 % used for peak detection. Default is 0.1. 509 eic_signal_threshold : float, optional 510 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 511 max_rt_distance : float, optional 512 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 513 verbose_processing : bool, optional 514 If True, print verbose processing information. Default is True. 515 """ 516 517 use_deconvolution: bool = False 518 519 implemented_smooth_method: tuple = ( 520 "savgol", 521 "hanning", 522 "blackman", 523 "bartlett", 524 "flat", 525 "boxcar", 526 ) 527 528 smooth_window: int = 5 529 530 smooth_method: str = "savgol" 531 532 savgol_pol_order: int = 2 533 534 peak_derivative_threshold: float = 0.0005 535 536 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 537 538 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 539 540 min_peak_datapoints: float = 5 541 542 max_peak_width: float = 0.1 543 544 noise_threshold_method: str = "manual_relative_abundance" 545 546 noise_threshold_methods_implemented: tuple = ( 547 "auto_relative_abundance", 548 "manual_relative_abundance", 549 "second_derivative", 550 ) 551 552 std_noise_threshold: int = 3 553 554 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 555 556 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 557 558 eic_signal_threshold: float = ( 559 0.01 # 0-100 % used for extracted ion chromatogram peak detection 560 ) 561 562 max_rt_distance: float = ( 563 0.025 # minutes, max distance allowance hierarchical clutter 564 ) 565 566 verbose_processing: bool = True 567 568 def __post_init__(self): 569 # enforce datatype 570 for field in dataclasses.fields(self): 571 value = getattr(self, field.name) 572 if not isinstance(value, field.type): 573 value = field.type(value) 574 setattr(self, field.name, value) 575 576 577@dataclasses.dataclass 578class CompoundSearchSettings: 579 """Settings for compound search 580 581 Attributes 582 ---------- 583 url_database : str, optional 584 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 585 ri_search_range : float, optional 586 Retention index search range. Default is 35. 587 rt_search_range : float, optional 588 Retention time search range, in minutes. Default is 1.0. 589 correlation_threshold : float, optional 590 Threshold for correlation for spectral similarity. Default is 0.5. 591 score_threshold : float, optional 592 Threshold for compsite score. Default is 0.0. 593 ri_spacing : float, optional 594 Retention index spacing. Default is 200. 595 ri_std : float, optional 596 Retention index standard deviation. Default is 3. 597 ri_calibration_compound_names : list, optional 598 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 599 600 """ 601 602 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 603 604 ri_search_range: float = 35 605 606 rt_search_range: float = 1.0 # used for retention index calibration 607 608 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 609 610 score_threshold: float = 0.0 611 612 ri_spacing: float = 200 613 614 ri_std: float = 3 # in standard deviation 615 616 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 617 618 # calculates and export all spectral similarity methods 619 exploratory_mode: bool = False 620 621 score_methods: tuple = ("highest_sim_score", "highest_ss") 622 623 output_score_method: str = "All" 624 625 def __post_init__(self): 626 # enforce datatype 627 self.url_database = os.getenv( 628 "SPECTRAL_GCMS_DATABASE_URL", 629 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 630 ) 631 632 for field in dataclasses.fields(self): 633 value = getattr(self, field.name) 634 if not isinstance(value, field.type): 635 value = field.type(value) 636 setattr(self, field.name, value) 637 638 self.ri_calibration_compound_names = [ 639 "Methyl Caprylate", 640 "Methyl Caprate", 641 "Methyl Pelargonate", 642 "Methyl Laurate", 643 "Methyl Myristate", 644 "Methyl Palmitate", 645 "Methyl Stearate", 646 "Methyl Eicosanoate", 647 "Methyl Docosanoate", 648 "Methyl Linocerate", 649 "Methyl Hexacosanoate", 650 "Methyl Octacosanoate", 651 "Methyl Triacontanoate", 652 ] 653 654 655class MolecularLookupDictSettings: 656 """Settings for molecular searching 657 658 These are used to generate the database entries, do not change. 659 660 Attributes 661 ---------- 662 usedAtoms : dict, optional 663 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 664 min_mz : float, optional 665 Minimum m/z to use for searching. Default is 50.0. 666 max_mz : float, optional 667 Maximum m/z to use for searching. Default is 1200.0. 668 min_dbe : float, optional 669 Minimum double bond equivalent to use for searching. Default is 0. 670 max_dbe : float, optional 671 Maximum double bond equivalent to use for searching. Default is 50. 672 use_pah_line_rule : bool, optional 673 If True, use the PAH line rule. Default is False. 674 isRadical : bool, optional 675 If True, search for radical ions. Default is True. 676 isProtonated : bool, optional 677 If True, search for protonated ions. Default is True. 678 url_database : str, optional 679 URL for the database. Default is None. 680 db_jobs : int, optional 681 Number of jobs to use for database queries. Default is 1. 682 used_atom_valences : dict, optional 683 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 684 685 """ 686 687 ### DO NOT CHANGE IT! These are used to generate the database entries 688 689 ### DO change when creating a new application database 690 691 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 692 693 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 694 ### if you don't want to include one of those atoms set the max and min at 0 695 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 696 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 697 ### NOTE : Adducts atoms have zero covalence 698 ### NOTE : Not using static variable because this class is distributed using multiprocessing 699 def __init__(self): 700 self.usedAtoms = { 701 "C": (1, 90), 702 "H": (4, 200), 703 "O": (0, 12), 704 "N": (0, 0), 705 "S": (0, 0), 706 "P": (0, 0), 707 "Cl": (0, 0), 708 } 709 710 self.min_mz = 50 711 712 self.max_mz = 1200 713 714 self.min_dbe = 0 715 716 self.max_dbe = 50 717 718 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 719 self.use_pah_line_rule = False 720 721 self.isRadical = True 722 723 self.isProtonated = True 724 725 self.url_database = None 726 727 self.db_jobs = 1 728 729 self.used_atom_valences = { 730 "C": 4, 731 "13C": 4, 732 "H": 1, 733 "O": 2, 734 "18O": 2, 735 "N": 3, 736 "S": 2, 737 "34S": 2, 738 "P": 3, 739 "Cl": 1, 740 "37Cl": 1, 741 "Br": 1, 742 "Na": 1, 743 "F": 1, 744 "K": 0, 745 } 746 747 748@dataclasses.dataclass 749class MolecularFormulaSearchSettings: 750 """Settings for molecular searching 751 752 Attributes 753 ---------- 754 use_isotopologue_filter : bool, optional 755 If True, use isotopologue filter. Default is False. 756 isotopologue_filter_threshold : float, optional 757 Threshold for isotopologue filter. Default is 33. 758 isotopologue_filter_atoms : tuple, optional 759 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 760 use_runtime_kendrick_filter : bool, optional 761 If True, use runtime Kendrick filter. Default is False. 762 use_min_peaks_filter : bool, optional 763 If True, use minimum peaks filter. Default is True. 764 min_peaks_per_class : int, optional 765 Minimum number of peaks per class. Default is 15. 766 url_database : str, optional 767 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 768 db_jobs : int, optional 769 Number of jobs to use for database queries. Default is 3. 770 db_chunk_size : int, optional 771 Chunk size to use for database queries. Default is 300. 772 ion_charge : int, optional 773 Ion charge. Default is -1. 774 min_hc_filter : float, optional 775 Minimum hydrogen to carbon ratio. Default is 0.3. 776 max_hc_filter : float, optional 777 Maximum hydrogen to carbon ratio. Default is 3. 778 min_oc_filter : float, optional 779 Minimum oxygen to carbon ratio. Default is 0.0. 780 max_oc_filter : float, optional 781 Maximum oxygen to carbon ratio. Default is 1.2. 782 min_op_filter : float, optional 783 Minimum oxygen to phosphorous ratio. Default is 2. 784 use_pah_line_rule : bool, optional 785 If True, use the PAH line rule. Default is False. 786 min_dbe : float, optional 787 Minimum double bond equivalent to use for searching. Default is 0. 788 max_dbe : float, optional 789 Maximum double bond equivalent to use for searching. Default is 40. 790 mz_error_score_weight : float, optional 791 Weight for m/z error score to contribute to composite score. Default is 0.6. 792 isotopologue_score_weight : float, optional 793 Weight for isotopologue score to contribute to composite score. Default is 0.4. 794 adduct_atoms_neg : tuple, optional 795 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 796 adduct_atoms_pos : tuple, optional 797 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 798 score_methods : tuple, optional 799 Tuple of score method that can be implemented. 800 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 801 score_method : str, optional 802 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 803 output_min_score : float, optional 804 Minimum score for output. Default is 0.1. 805 output_score_method : str, optional 806 Score method to use for output. Default is 'All Candidates'. 807 isRadical : bool, optional 808 If True, search for radical ions. Default is False. 809 isProtonated : bool, optional 810 If True, search for protonated ions. Default is True. 811 isAdduct : bool, optional 812 If True, search for adduct ions. Default is False. 813 usedAtoms : dict, optional 814 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 815 ion_types_excluded : list, optional 816 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 817 ionization_type : str, optional 818 Ionization type. Default is 'ESI'. 819 min_ppm_error : float, optional 820 Minimum ppm error. Default is -10.0. 821 max_ppm_error : float, optional 822 Maximum ppm error. Default is 10.0. 823 min_abun_error : float, optional 824 Minimum abundance error for isotolopologue search. Default is -100.0. 825 max_abun_error : float, optional 826 Maximum abundance error for isotolopologue search. Default is 100.0. 827 mz_error_range : float, optional 828 m/z error range. Default is 1.5. 829 error_method : str, optional 830 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 831 mz_error_average : float, optional 832 m/z error average. Default is 0.0. 833 used_atom_valences : dict, optional 834 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 835 verbose_processing: bool, optional 836 If True, print verbose processing information. Default is True. 837 """ 838 839 verbose_processing: bool = True 840 841 use_isotopologue_filter: bool = False 842 843 isotopologue_filter_threshold: float = 33 844 845 isotopologue_filter_atoms: tuple = ("Cl", "Br") 846 847 use_runtime_kendrick_filter: bool = False 848 849 use_min_peaks_filter: bool = True 850 851 min_peaks_per_class: int = 15 852 853 url_database: str = ( 854 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 855 ) 856 857 db_jobs: int = 3 858 859 db_chunk_size: int = 300 860 861 # query setting======== 862 ion_charge: int = -1 863 864 min_hc_filter: float = 0.3 865 866 max_hc_filter: float = 3 867 868 min_oc_filter: float = 0.0 869 870 max_oc_filter: float = 1.2 871 872 min_op_filter: float = 2 873 874 use_pah_line_rule: bool = False 875 876 min_dbe: float = 0 877 878 max_dbe: float = 40 879 880 mz_error_score_weight: float = 0.6 881 882 isotopologue_score_weight: float = 0.4 883 884 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 885 adduct_atoms_neg: tuple = ("Cl", "Br") 886 887 adduct_atoms_pos: tuple = ("Na", "K") 888 889 score_methods: tuple = ( 890 "S_P_lowest_error", 891 "N_S_P_lowest_error", 892 "lowest_error", 893 "prob_score", 894 "air_filter_error", 895 "water_filter_error", 896 "earth_filter_error", 897 ) 898 899 score_method: str = "prob_score" 900 901 output_min_score: float = 0.1 902 903 output_score_method: str = "All Candidates" 904 905 # depending on the polarity mode it looks for [M].+ , [M].- 906 # query and automatically compile add entry if it doesn't exist 907 908 isRadical: bool = False 909 910 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 911 # query and automatically compile and push options if it doesn't exist 912 isProtonated: bool = True 913 914 isAdduct: bool = False 915 916 usedAtoms: dict = dataclasses.field(default_factory=dict) 917 ion_types_excluded: list = dataclasses.field(default_factory=list) 918 919 # search setting ======== 920 921 ionization_type: str = "ESI" 922 923 # empirically set / needs optimization 924 min_ppm_error: float = -10.0 # ppm 925 926 # empirically set / needs optimization 927 max_ppm_error: float = 10.0 # ppm 928 929 # empirically set / needs optimization set for isotopologue search 930 min_abun_error: float = -100.0 # percentage 931 932 # empirically set / needs optimization set for isotopologue search 933 max_abun_error: float = 100.0 # percentage 934 935 # empirically set / needs optimization 936 mz_error_range: float = 1.5 937 938 # 'distance', 'lowest', 'symmetrical','average' 'None' 939 error_method: str = "None" 940 941 mz_error_average: float = 0.0 942 943 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 944 used_atom_valences: dict = dataclasses.field(default_factory=dict) 945 946 def __post_init__(self): 947 if not self.url_database or self.url_database == "": 948 self.url_database = os.getenv( 949 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 950 ) 951 # enforce datatype 952 for field in dataclasses.fields(self): 953 value = getattr(self, field.name) 954 if not isinstance(value, field.type): 955 value = field.type(value) 956 setattr(self, field.name, value) 957 958 # enforce C and H if either do not exists 959 if "C" not in self.usedAtoms.keys(): 960 self.usedAtoms["C"] = (1, 100) 961 if "H" not in self.usedAtoms.keys(): 962 self.usedAtoms["H"] = (1, 200) 963 964 # add cummon values 965 current_used_atoms = self.used_atom_valences.keys() 966 967 for atom in Atoms.atoms_covalence.keys(): 968 if atom not in current_used_atoms: 969 covalence = Atoms.atoms_covalence.get(atom) 970 971 if isinstance(covalence, int): 972 self.used_atom_valences[atom] = covalence 973 974 else: 975 # will get the first number of all possible covalances, which should be the most commum 976 self.used_atom_valences[atom] = covalence[0]
12@dataclasses.dataclass 13class TransientSetting: 14 """Transient processing settings class 15 16 Attributes 17 ---------- 18 implemented_apodization_function : tuple 19 Available apodization functions 20 apodization_method : str 21 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. 22 For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 23 number_of_truncations : int 24 How many times to truncate the transient prior to Fourier transform 25 number_of_zero_fills : int 26 How many times to zero fille the transient prior to Fourier transform. 27 next_power_of_two : bool 28 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 29 kaiser_beta : float 30 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 31 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 32 33 """ 34 35 implemented_apodization_function: tuple = ( 36 "Hamming", 37 "Hanning", 38 "Blackman", 39 "Full-Sine", 40 "Half-Sine", 41 "Kaiser", 42 "Half-Kaiser", 43 "Rectangle", 44 ) 45 apodization_method: str = "Hanning" 46 number_of_truncations: int = 0 47 number_of_zero_fills: int = 1 48 next_power_of_two: bool = False 49 kaiser_beta: float = 8.6 50 51 def __post_init__(self): 52 # enforce datatype 53 for field in dataclasses.fields(self): 54 value = getattr(self, field.name) 55 if not isinstance(value, field.type): 56 value = field.type(value) 57 setattr(self, field.name, value)
Transient processing settings class
Attributes
- implemented_apodization_function (tuple): Available apodization functions
- apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
- number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
- number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
- next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
- kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
60@dataclasses.dataclass 61class DataInputSetting: 62 """Data input settings class 63 64 Attributes 65 ---------- 66 header_translate : dict 67 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 68 """ 69 70 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 71 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 72 header_translate: dict = dataclasses.field(default_factory=dict) 73 74 def __post_init__(self): 75 self.header_translate = { 76 "m/z": Labels.mz, 77 "mOz": Labels.mz, 78 "Mass": Labels.mz, 79 "Resolving Power": Labels.rp, 80 "Res.": Labels.rp, 81 "resolution": Labels.rp, 82 "Intensity": Labels.abundance, 83 "Peak Height": Labels.abundance, 84 "I": Labels.abundance, 85 "Abundance": Labels.abundance, 86 "abs_abu": Labels.abundance, 87 "Signal/Noise": Labels.s2n, 88 "S/N": Labels.s2n, 89 "sn": Labels.s2n, 90 } 91 92 def add_mz_label(self, label): 93 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 94 self.header_translate[label] = Labels.mz 95 96 def add_peak_height_label(self, label): 97 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 98 99 self.header_translate[label] = Labels.abundance 100 101 def add_sn_label(self, label): 102 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 103 self.header_translate[label] = Labels.s2n 104 105 def add_resolving_power_label(self, label): 106 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 107 self.header_translate[label] = Labels.rp
Data input settings class
Attributes
- header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
92 def add_mz_label(self, label): 93 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 94 self.header_translate[label] = Labels.mz
Add a label to the header_translate dictionary to be translated to the corems label for mz.
96 def add_peak_height_label(self, label): 97 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 98 99 self.header_translate[label] = Labels.abundance
Add a label to the header_translate dictionary to be translated to the corems label for peak height.
101 def add_sn_label(self, label): 102 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 103 self.header_translate[label] = Labels.s2n
Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.
105 def add_resolving_power_label(self, label): 106 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 107 self.header_translate[label] = Labels.rp
Add a label to the header_translate dictionary to be translated to the corems label for resolving power.
110@dataclasses.dataclass 111class LiquidChromatographSetting: 112 """Liquid chromatograph processing settings class 113 114 Attributes 115 ---------- 116 scans : list or tuple, optional 117 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 118 eic_tolerance_ppm : float, optional 119 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 120 correct_eic_baseline : bool, optional 121 If True, correct the baseline of the extracted ion chromatogram. Default is True. 122 smooth_window : int, optional 123 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 124 smooth_method : str, optional 125 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 126 implemented_smooth_method : tuple, optional 127 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 128 savgol_pol_order : int, optional 129 Polynomial order for Savitzky-Golay smoothing. Default is 2. 130 peak_height_max_percent : float, optional 131 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 132 peak_max_prominence_percent : float, optional 133 1-100 % used for baseline detection. Default is 1. 134 peak_derivative_threshold : float, optional 135 Threshold for defining derivative crossing. Default is 0.0005. 136 min_peak_datapoints : float, optional 137 minimum data point to define a chromatografic peak. Default is 5. 138 noise_threshold_method : str, optional 139 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 140 noise_threshold_methods_implemented : tuple, optional 141 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 142 peak_height_min_percent : float, optional 143 0-100 % used for peak detection. Default is 0.1. 144 eic_signal_threshold : float, optional 145 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 146 eic_buffer_time : float, optional 147 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 148 peak_picking_method : str, optional 149 Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'. 150 implemented_peak_picking_methods : tuple, optional 151 Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology'). 152 ph_smooth_it : int, optional 153 Number of iterations to use for smoothing prior to finding mass features. 154 Used only for "persistent homology" peak picking method. 155 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 156 ph_smooth_radius_mz : int, optional 157 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 158 Used only for "persistent homology" peak picking method. 159 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 160 ph_smooth_radius_scan : int, optional 161 Radius in scan steps for smoothing prior to finding mass features. 162 Used only for "persistent homology" peak picking method. 163 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 164 ph_inten_min_rel : int, optional 165 Relative minimum intensity to use for finding mass features for persistent homology. 166 Used only for "persistent homology" peak picking method. 167 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 168 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 169 ph_persis_min_rel : int, optional 170 Relative minimum persistence for retaining mass features. 171 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 172 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 173 Should be greater to or equal to ph_inten_min_rel. 174 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 175 mass_feature_cluster_mz_tolerance_rel : float, optional 176 Relative m/z tolerance to use for clustering mass features. 177 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 178 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 179 Default is 5E-6 (5 ppm). 180 mass_feature_cluster_rt_tolerance : float, optional 181 Retention time tolerance to use for clustering mass features, in minutes. 182 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 183 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 184 Default is 0.2. 185 ms1_scans_to_average : int, optional 186 Number of MS1 scans to average for mass-feature associated m/zs. 187 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 188 ms1_deconvolution_corr_min : float, optional 189 Minimum correlation to use for deconvoluting MS1 mass features. 190 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 191 Default is 0.8. 192 ms2_dda_rt_tolerance : float, optional 193 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 194 ms2_dda_mz_tolerance : float, optional 195 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 196 ms2_min_fe_score : float, optional 197 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 198 search_as_lipids : bool, optional 199 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 200 include_fragment_types : bool, optional 201 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 202 verbose_processing : bool, optional 203 If True, print verbose processing information. Default is True. 204 """ 205 206 scans: list | tuple = (-1, -1) 207 208 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 209 eic_tolerance_ppm: float = 5 210 correct_eic_baseline = True 211 smooth_window: int = 5 212 smooth_method: str = "savgol" 213 implemented_smooth_method: tuple = ( 214 "savgol", 215 "hanning", 216 "blackman", 217 "bartlett", 218 "flat", 219 "boxcar", 220 ) 221 savgol_pol_order: int = 2 222 peak_height_max_percent: float = 10 223 peak_max_prominence_percent: float = 1 224 peak_derivative_threshold: float = 0.0005 225 min_peak_datapoints: float = 5 226 noise_threshold_method: str = "manual_relative_abundance" 227 noise_threshold_methods_implemented: tuple = ( 228 "auto_relative_abundance", 229 "manual_relative_abundance", 230 "second_derivative", 231 ) 232 peak_height_min_percent: float = 0.1 233 eic_signal_threshold: float = 0.01 234 eic_buffer_time = 1.5 235 236 # Parameters used for 2D peak picking 237 peak_picking_method: str = "persistent homology" 238 implemented_peak_picking_methods: tuple = ( 239 "persistent homology", 240 "centroided_persistent_homology", 241 ) 242 243 # Parameters used in persistent homology calculations 244 ph_smooth_it = 1 245 ph_smooth_radius_mz = 0 246 ph_smooth_radius_scan = 1 247 ph_inten_min_rel = 0.001 248 ph_persis_min_rel = 0.001 249 250 # Parameters used to cluster mass features 251 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 252 mass_feature_cluster_rt_tolerance: float = 0.3 253 254 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 255 ms1_scans_to_average: int = 1 256 ms1_deconvolution_corr_min: float = 0.8 257 ms2_dda_rt_tolerance: float = 0.15 258 ms2_dda_mz_tolerance: float = 0.05 259 260 # Parameters used for flash entropy searching and database preparation 261 ms2_min_fe_score: float = 0.2 262 search_as_lipids: bool = False 263 include_fragment_types: bool = False 264 265 # Parameters used for saving the data 266 export_profile_spectra: bool = False 267 export_eics: bool = True 268 export_unprocessed_ms1: bool = False 269 270 # Parameters used for verbose processing 271 verbose_processing: bool = True 272 273 def __post_init__(self): 274 # enforce datatype 275 for field in dataclasses.fields(self): 276 value = getattr(self, field.name) 277 if not isinstance(value, field.type): 278 value = field.type(value) 279 setattr(self, field.name, value)
Liquid chromatograph processing settings class
Attributes
- scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (0, 1).
- eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
- correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
- smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
- peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
- min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
- peak_picking_method (str, optional): Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
- implemented_peak_picking_methods (tuple, optional): Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
- ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
- ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
- ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
- ph_inten_min_rel (int, optional): Relative minimum intensity to use for finding mass features for persistent homology. Used only for "persistent homology" peak picking method. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- ph_persis_min_rel (int, optional): Relative minimum persistence for retaining mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
- mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
- ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
- ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
- ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
- ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
- ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
- search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
- include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
282@dataclasses.dataclass 283class MassSpectrumSetting: 284 """Mass spectrum processing settings class 285 286 Attributes 287 ---------- 288 noise_threshold_method : str, optional 289 Method for detecting noise threshold. Default is 'log'. 290 noise_threshold_methods_implemented : tuple, optional 291 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 292 noise_threshold_min_std : int, optional 293 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 294 noise_threshold_min_s2n : float, optional 295 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 296 noise_threshold_min_relative_abundance : float, optional 297 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 298 noise_threshold_absolute_abundance : float, optional 299 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 300 noise_threshold_log_nsigma : int, optional 301 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 302 noise_threshold_log_nsigma_corr_factor : float, optional 303 Correction factor for log noise threshold method. Default is 0.463. 304 noise_threshold_log_nsigma_bins : int, optional 305 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 306 noise_min_mz : float, optional 307 Minimum m/z to use for noise thresholding. Default is 50.0. 308 noise_max_mz : float, optional 309 Maximum m/z to use for noise thresholding. Default is 1200.0. 310 min_picking_mz : float, optional 311 Minimum m/z to use for peak picking. Default is 50.0. 312 max_picking_mz : float, optional 313 Maximum m/z to use for peak picking. Default is 1200.0. 314 picking_point_extrapolate : int, optional 315 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 316 Recommend 3 for reduced profile data or if peak picking faults 317 calib_minimize_method : str, optional 318 Minimization method to use for calibration. Default is 'Powell'. 319 calib_pol_order : int, optional 320 Polynomial order to use for calibration. Default is 2. 321 max_calib_ppm_error : float, optional 322 Maximum ppm error to use for calibration. Default is 1.0. 323 min_calib_ppm_error : float, optional 324 Minimum ppm error to use for calibration. Default is -1.0. 325 calib_sn_threshold : float, optional 326 Signal to noise threshold to use for calibration. Default is 2.0. 327 calibration_ref_match_method: string, optional 328 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 329 calibration_ref_match_tolerance: float, optional 330 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 331 do_calibration : bool, optional 332 If True, perform calibration. Default is True. 333 verbose_processing : bool, optional 334 If True, print verbose processing information. Default is True. 335 """ 336 337 noise_threshold_method: str = "log" 338 339 noise_threshold_methods_implemented: tuple = ( 340 "minima", 341 "signal_noise", 342 "relative_abundance", 343 "absolute_abundance", 344 "log", 345 ) 346 347 noise_threshold_min_std: int = 6 # when using 'minima' method 348 349 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 350 351 noise_threshold_min_relative_abundance: float = ( 352 6 # from 0-100, when using 'relative_abundance' method 353 ) 354 355 noise_threshold_absolute_abundance: float = ( 356 1_000_000 # when using 'absolute_abundance' method 357 ) 358 359 noise_threshold_log_nsigma: int = 6 # when using 'log' method 360 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 361 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 362 363 noise_min_mz: float = 50.0 364 noise_max_mz: float = 1200.0 365 366 min_picking_mz: float = 50.0 367 max_picking_mz: float = 1200.0 368 369 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 370 # This will fix peak picking at spectrum limit issues 371 # 0 to keep normal behaviour, typical value 3 to fix 372 picking_point_extrapolate: int = 3 373 374 calib_minimize_method: str = "Powell" 375 calib_pol_order: int = 2 376 max_calib_ppm_error: float = 1.0 377 min_calib_ppm_error: float = -1.0 378 calib_sn_threshold: float = 2.0 379 calibration_ref_match_method: str = "legacy" 380 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 381 calibration_ref_match_tolerance: float = 0.003 382 calibration_ref_match_std_raw_error_limit: float = 1.5 383 # calib_ref_mzs: list = [0] 384 385 do_calibration: bool = True 386 verbose_processing: bool = True 387 388 def __post_init__(self): 389 # enforce datatype 390 for field in dataclasses.fields(self): 391 value = getattr(self, field.name) 392 if not isinstance(value, field.type): 393 value = field.type(value) 394 setattr(self, field.name, value)
Mass spectrum processing settings class
Attributes
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
- noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
- noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
- noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
- noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
- noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
- noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
- noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
- noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
- noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
- min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
- max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
- picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
- calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
- calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
- max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
- min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
- calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
- calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
- calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
- do_calibration (bool, optional): If True, perform calibration. Default is True.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
397@dataclasses.dataclass 398class MassSpecPeakSetting: 399 """Mass spectrum peak processing settings class 400 401 Attributes 402 ---------- 403 kendrick_base : Dict, optional 404 Dictionary specifying the elements and their counts in the Kendrick base. 405 Defaults to {'C': 1, 'H': 2}. 406 kendrick_rounding_method : str, optional 407 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 408 Defaults to 'floor'. 409 implemented_kendrick_rounding_methods : tuple 410 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 411 Defaults to ('floor', 'ceil', 'round'). 412 peak_derivative_threshold : float, optional 413 Threshold for defining derivative crossing. Should be a value between 0 and 1. 414 Defaults to 0.0. 415 peak_min_prominence_percent : float, optional 416 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 417 Defaults to 0.1. 418 min_peak_datapoints : float, optional 419 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 420 Defaults to 5. 421 peak_max_prominence_percent : float, optional 422 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 423 Defaults to 0.1. 424 peak_height_max_percent : float, optional 425 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 426 Defaults to 10. 427 legacy_resolving_power : bool, optional 428 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 429 Defaults to True. 430 legacy_centroid_polyfit : bool, optional 431 Use legacy (numpy polyfit) to fit centroid 432 Default false. 433 """ 434 435 kendrick_base: Dict = dataclasses.field(default_factory=dict) 436 437 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 438 439 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 440 441 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 442 443 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 444 445 min_peak_datapoints: float = 5 # 0-inf used for peak detection 446 447 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 448 449 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 450 451 legacy_resolving_power: bool = ( 452 True # Use the legacy (CoreMS v1) resolving power calculation (True) 453 ) 454 455 legacy_centroid_polyfit: bool = False 456 457 def __post_init__(self): 458 # default to CH2 459 if not self.kendrick_base: 460 self.kendrick_base = {"C": 1, "H": 2} 461 # enforce datatype 462 for field in dataclasses.fields(self): 463 value = getattr(self, field.name) 464 if not isinstance(value, field.type): 465 value = field.type(value) 466 setattr(self, field.name, value)
Mass spectrum peak processing settings class
Attributes
- kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
- kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
- implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
- peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
- legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
469@dataclasses.dataclass 470class GasChromatographSetting: 471 """Gas chromatograph processing settings class 472 473 Attributes 474 ---------- 475 use_deconvolution : bool, optional 476 If True, use deconvolution. Default is False. 477 implemented_smooth_method : tuple, optional 478 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 479 smooth_window : int, optional 480 Window size for smoothing the ion chromatogram. Default is 5. 481 smooth_method : str, optional 482 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 483 savgol_pol_order : int, optional 484 Polynomial order for Savitzky-Golay smoothing. Default is 2. 485 peak_derivative_threshold : float, optional 486 Threshold for defining derivative crossing. Should be a value between 0 and 1. 487 Defaults to 0.0005. 488 peak_height_max_percent : float, optional 489 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 490 Defaults to 10. 491 peak_max_prominence_percent : float, optional 492 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 493 Defaults to 1. 494 min_peak_datapoints : float, optional 495 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 496 Defaults to 5. 497 max_peak_width : float, optional 498 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 499 Defaults to 0.1. 500 noise_threshold_method : str, optional 501 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 502 noise_threshold_methods_implemented : tuple, optional 503 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 504 std_noise_threshold : int, optional 505 Default is 3. 506 peak_height_min_percent : float, optional 507 0-100 % used for peak detection. Default is 0.1. 508 peak_min_prominence_percent : float, optional 509 0-100 % used for peak detection. Default is 0.1. 510 eic_signal_threshold : float, optional 511 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 512 max_rt_distance : float, optional 513 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 514 verbose_processing : bool, optional 515 If True, print verbose processing information. Default is True. 516 """ 517 518 use_deconvolution: bool = False 519 520 implemented_smooth_method: tuple = ( 521 "savgol", 522 "hanning", 523 "blackman", 524 "bartlett", 525 "flat", 526 "boxcar", 527 ) 528 529 smooth_window: int = 5 530 531 smooth_method: str = "savgol" 532 533 savgol_pol_order: int = 2 534 535 peak_derivative_threshold: float = 0.0005 536 537 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 538 539 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 540 541 min_peak_datapoints: float = 5 542 543 max_peak_width: float = 0.1 544 545 noise_threshold_method: str = "manual_relative_abundance" 546 547 noise_threshold_methods_implemented: tuple = ( 548 "auto_relative_abundance", 549 "manual_relative_abundance", 550 "second_derivative", 551 ) 552 553 std_noise_threshold: int = 3 554 555 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 556 557 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 558 559 eic_signal_threshold: float = ( 560 0.01 # 0-100 % used for extracted ion chromatogram peak detection 561 ) 562 563 max_rt_distance: float = ( 564 0.025 # minutes, max distance allowance hierarchical clutter 565 ) 566 567 verbose_processing: bool = True 568 569 def __post_init__(self): 570 # enforce datatype 571 for field in dataclasses.fields(self): 572 value = getattr(self, field.name) 573 if not isinstance(value, field.type): 574 value = field.type(value) 575 setattr(self, field.name, value)
Gas chromatograph processing settings class
Attributes
- use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- std_noise_threshold (int, optional): Default is 3.
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
578@dataclasses.dataclass 579class CompoundSearchSettings: 580 """Settings for compound search 581 582 Attributes 583 ---------- 584 url_database : str, optional 585 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 586 ri_search_range : float, optional 587 Retention index search range. Default is 35. 588 rt_search_range : float, optional 589 Retention time search range, in minutes. Default is 1.0. 590 correlation_threshold : float, optional 591 Threshold for correlation for spectral similarity. Default is 0.5. 592 score_threshold : float, optional 593 Threshold for compsite score. Default is 0.0. 594 ri_spacing : float, optional 595 Retention index spacing. Default is 200. 596 ri_std : float, optional 597 Retention index standard deviation. Default is 3. 598 ri_calibration_compound_names : list, optional 599 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 600 601 """ 602 603 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 604 605 ri_search_range: float = 35 606 607 rt_search_range: float = 1.0 # used for retention index calibration 608 609 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 610 611 score_threshold: float = 0.0 612 613 ri_spacing: float = 200 614 615 ri_std: float = 3 # in standard deviation 616 617 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 618 619 # calculates and export all spectral similarity methods 620 exploratory_mode: bool = False 621 622 score_methods: tuple = ("highest_sim_score", "highest_ss") 623 624 output_score_method: str = "All" 625 626 def __post_init__(self): 627 # enforce datatype 628 self.url_database = os.getenv( 629 "SPECTRAL_GCMS_DATABASE_URL", 630 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 631 ) 632 633 for field in dataclasses.fields(self): 634 value = getattr(self, field.name) 635 if not isinstance(value, field.type): 636 value = field.type(value) 637 setattr(self, field.name, value) 638 639 self.ri_calibration_compound_names = [ 640 "Methyl Caprylate", 641 "Methyl Caprate", 642 "Methyl Pelargonate", 643 "Methyl Laurate", 644 "Methyl Myristate", 645 "Methyl Palmitate", 646 "Methyl Stearate", 647 "Methyl Eicosanoate", 648 "Methyl Docosanoate", 649 "Methyl Linocerate", 650 "Methyl Hexacosanoate", 651 "Methyl Octacosanoate", 652 "Methyl Triacontanoate", 653 ]
Settings for compound search
Attributes
- url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
- ri_search_range (float, optional): Retention index search range. Default is 35.
- rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
- correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
- score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
- ri_spacing (float, optional): Retention index spacing. Default is 200.
- ri_std (float, optional): Retention index standard deviation. Default is 3.
- ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
656class MolecularLookupDictSettings: 657 """Settings for molecular searching 658 659 These are used to generate the database entries, do not change. 660 661 Attributes 662 ---------- 663 usedAtoms : dict, optional 664 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 665 min_mz : float, optional 666 Minimum m/z to use for searching. Default is 50.0. 667 max_mz : float, optional 668 Maximum m/z to use for searching. Default is 1200.0. 669 min_dbe : float, optional 670 Minimum double bond equivalent to use for searching. Default is 0. 671 max_dbe : float, optional 672 Maximum double bond equivalent to use for searching. Default is 50. 673 use_pah_line_rule : bool, optional 674 If True, use the PAH line rule. Default is False. 675 isRadical : bool, optional 676 If True, search for radical ions. Default is True. 677 isProtonated : bool, optional 678 If True, search for protonated ions. Default is True. 679 url_database : str, optional 680 URL for the database. Default is None. 681 db_jobs : int, optional 682 Number of jobs to use for database queries. Default is 1. 683 used_atom_valences : dict, optional 684 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 685 686 """ 687 688 ### DO NOT CHANGE IT! These are used to generate the database entries 689 690 ### DO change when creating a new application database 691 692 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 693 694 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 695 ### if you don't want to include one of those atoms set the max and min at 0 696 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 697 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 698 ### NOTE : Adducts atoms have zero covalence 699 ### NOTE : Not using static variable because this class is distributed using multiprocessing 700 def __init__(self): 701 self.usedAtoms = { 702 "C": (1, 90), 703 "H": (4, 200), 704 "O": (0, 12), 705 "N": (0, 0), 706 "S": (0, 0), 707 "P": (0, 0), 708 "Cl": (0, 0), 709 } 710 711 self.min_mz = 50 712 713 self.max_mz = 1200 714 715 self.min_dbe = 0 716 717 self.max_dbe = 50 718 719 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 720 self.use_pah_line_rule = False 721 722 self.isRadical = True 723 724 self.isProtonated = True 725 726 self.url_database = None 727 728 self.db_jobs = 1 729 730 self.used_atom_valences = { 731 "C": 4, 732 "13C": 4, 733 "H": 1, 734 "O": 2, 735 "18O": 2, 736 "N": 3, 737 "S": 2, 738 "34S": 2, 739 "P": 3, 740 "Cl": 1, 741 "37Cl": 1, 742 "Br": 1, 743 "Na": 1, 744 "F": 1, 745 "K": 0, 746 }
Settings for molecular searching
These are used to generate the database entries, do not change.
Attributes
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
- max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- isRadical (bool, optional): If True, search for radical ions. Default is True.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- url_database (str, optional): URL for the database. Default is None.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
749@dataclasses.dataclass 750class MolecularFormulaSearchSettings: 751 """Settings for molecular searching 752 753 Attributes 754 ---------- 755 use_isotopologue_filter : bool, optional 756 If True, use isotopologue filter. Default is False. 757 isotopologue_filter_threshold : float, optional 758 Threshold for isotopologue filter. Default is 33. 759 isotopologue_filter_atoms : tuple, optional 760 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 761 use_runtime_kendrick_filter : bool, optional 762 If True, use runtime Kendrick filter. Default is False. 763 use_min_peaks_filter : bool, optional 764 If True, use minimum peaks filter. Default is True. 765 min_peaks_per_class : int, optional 766 Minimum number of peaks per class. Default is 15. 767 url_database : str, optional 768 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 769 db_jobs : int, optional 770 Number of jobs to use for database queries. Default is 3. 771 db_chunk_size : int, optional 772 Chunk size to use for database queries. Default is 300. 773 ion_charge : int, optional 774 Ion charge. Default is -1. 775 min_hc_filter : float, optional 776 Minimum hydrogen to carbon ratio. Default is 0.3. 777 max_hc_filter : float, optional 778 Maximum hydrogen to carbon ratio. Default is 3. 779 min_oc_filter : float, optional 780 Minimum oxygen to carbon ratio. Default is 0.0. 781 max_oc_filter : float, optional 782 Maximum oxygen to carbon ratio. Default is 1.2. 783 min_op_filter : float, optional 784 Minimum oxygen to phosphorous ratio. Default is 2. 785 use_pah_line_rule : bool, optional 786 If True, use the PAH line rule. Default is False. 787 min_dbe : float, optional 788 Minimum double bond equivalent to use for searching. Default is 0. 789 max_dbe : float, optional 790 Maximum double bond equivalent to use for searching. Default is 40. 791 mz_error_score_weight : float, optional 792 Weight for m/z error score to contribute to composite score. Default is 0.6. 793 isotopologue_score_weight : float, optional 794 Weight for isotopologue score to contribute to composite score. Default is 0.4. 795 adduct_atoms_neg : tuple, optional 796 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 797 adduct_atoms_pos : tuple, optional 798 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 799 score_methods : tuple, optional 800 Tuple of score method that can be implemented. 801 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 802 score_method : str, optional 803 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 804 output_min_score : float, optional 805 Minimum score for output. Default is 0.1. 806 output_score_method : str, optional 807 Score method to use for output. Default is 'All Candidates'. 808 isRadical : bool, optional 809 If True, search for radical ions. Default is False. 810 isProtonated : bool, optional 811 If True, search for protonated ions. Default is True. 812 isAdduct : bool, optional 813 If True, search for adduct ions. Default is False. 814 usedAtoms : dict, optional 815 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 816 ion_types_excluded : list, optional 817 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 818 ionization_type : str, optional 819 Ionization type. Default is 'ESI'. 820 min_ppm_error : float, optional 821 Minimum ppm error. Default is -10.0. 822 max_ppm_error : float, optional 823 Maximum ppm error. Default is 10.0. 824 min_abun_error : float, optional 825 Minimum abundance error for isotolopologue search. Default is -100.0. 826 max_abun_error : float, optional 827 Maximum abundance error for isotolopologue search. Default is 100.0. 828 mz_error_range : float, optional 829 m/z error range. Default is 1.5. 830 error_method : str, optional 831 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 832 mz_error_average : float, optional 833 m/z error average. Default is 0.0. 834 used_atom_valences : dict, optional 835 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 836 verbose_processing: bool, optional 837 If True, print verbose processing information. Default is True. 838 """ 839 840 verbose_processing: bool = True 841 842 use_isotopologue_filter: bool = False 843 844 isotopologue_filter_threshold: float = 33 845 846 isotopologue_filter_atoms: tuple = ("Cl", "Br") 847 848 use_runtime_kendrick_filter: bool = False 849 850 use_min_peaks_filter: bool = True 851 852 min_peaks_per_class: int = 15 853 854 url_database: str = ( 855 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 856 ) 857 858 db_jobs: int = 3 859 860 db_chunk_size: int = 300 861 862 # query setting======== 863 ion_charge: int = -1 864 865 min_hc_filter: float = 0.3 866 867 max_hc_filter: float = 3 868 869 min_oc_filter: float = 0.0 870 871 max_oc_filter: float = 1.2 872 873 min_op_filter: float = 2 874 875 use_pah_line_rule: bool = False 876 877 min_dbe: float = 0 878 879 max_dbe: float = 40 880 881 mz_error_score_weight: float = 0.6 882 883 isotopologue_score_weight: float = 0.4 884 885 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 886 adduct_atoms_neg: tuple = ("Cl", "Br") 887 888 adduct_atoms_pos: tuple = ("Na", "K") 889 890 score_methods: tuple = ( 891 "S_P_lowest_error", 892 "N_S_P_lowest_error", 893 "lowest_error", 894 "prob_score", 895 "air_filter_error", 896 "water_filter_error", 897 "earth_filter_error", 898 ) 899 900 score_method: str = "prob_score" 901 902 output_min_score: float = 0.1 903 904 output_score_method: str = "All Candidates" 905 906 # depending on the polarity mode it looks for [M].+ , [M].- 907 # query and automatically compile add entry if it doesn't exist 908 909 isRadical: bool = False 910 911 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 912 # query and automatically compile and push options if it doesn't exist 913 isProtonated: bool = True 914 915 isAdduct: bool = False 916 917 usedAtoms: dict = dataclasses.field(default_factory=dict) 918 ion_types_excluded: list = dataclasses.field(default_factory=list) 919 920 # search setting ======== 921 922 ionization_type: str = "ESI" 923 924 # empirically set / needs optimization 925 min_ppm_error: float = -10.0 # ppm 926 927 # empirically set / needs optimization 928 max_ppm_error: float = 10.0 # ppm 929 930 # empirically set / needs optimization set for isotopologue search 931 min_abun_error: float = -100.0 # percentage 932 933 # empirically set / needs optimization set for isotopologue search 934 max_abun_error: float = 100.0 # percentage 935 936 # empirically set / needs optimization 937 mz_error_range: float = 1.5 938 939 # 'distance', 'lowest', 'symmetrical','average' 'None' 940 error_method: str = "None" 941 942 mz_error_average: float = 0.0 943 944 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 945 used_atom_valences: dict = dataclasses.field(default_factory=dict) 946 947 def __post_init__(self): 948 if not self.url_database or self.url_database == "": 949 self.url_database = os.getenv( 950 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 951 ) 952 # enforce datatype 953 for field in dataclasses.fields(self): 954 value = getattr(self, field.name) 955 if not isinstance(value, field.type): 956 value = field.type(value) 957 setattr(self, field.name, value) 958 959 # enforce C and H if either do not exists 960 if "C" not in self.usedAtoms.keys(): 961 self.usedAtoms["C"] = (1, 100) 962 if "H" not in self.usedAtoms.keys(): 963 self.usedAtoms["H"] = (1, 200) 964 965 # add cummon values 966 current_used_atoms = self.used_atom_valences.keys() 967 968 for atom in Atoms.atoms_covalence.keys(): 969 if atom not in current_used_atoms: 970 covalence = Atoms.atoms_covalence.get(atom) 971 972 if isinstance(covalence, int): 973 self.used_atom_valences[atom] = covalence 974 975 else: 976 # will get the first number of all possible covalances, which should be the most commum 977 self.used_atom_valences[atom] = covalence[0]
Settings for molecular searching
Attributes
- use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
- isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
- isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
- use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
- use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
- min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
- url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
- db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
- ion_charge (int, optional): Ion charge. Default is -1.
- min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
- max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
- min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
- max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
- min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
- mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
- isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
- adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
- adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
- score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
- score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
- output_min_score (float, optional): Minimum score for output. Default is 0.1.
- output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
- isRadical (bool, optional): If True, search for radical ions. Default is False.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- isAdduct (bool, optional): If True, search for adduct ions. Default is False.
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
- ionization_type (str, optional): Ionization type. Default is 'ESI'.
- min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
- max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
- min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
- max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
- mz_error_range (float, optional): m/z error range. Default is 1.5.
- error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
- mz_error_average (float, optional): m/z error average. Default is 0.0.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.