corems.encapsulation.factory.processingSetting
1__author__ = "Yuri E. Corilo" 2__date__ = "Jul 02, 2019" 3 4import dataclasses 5import os 6from typing import List, Dict 7 8from corems.encapsulation.constant import Atoms, Labels 9 10 11@dataclasses.dataclass 12class TransientSetting: 13 """Transient processing settings class 14 15 Attributes 16 ---------- 17 implemented_apodization_function : tuple 18 Available apodization functions 19 apodization_method : str 20 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. 21 For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 22 number_of_truncations : int 23 How many times to truncate the transient prior to Fourier transform 24 number_of_zero_fills : int 25 How many times to zero fille the transient prior to Fourier transform. 26 next_power_of_two : bool 27 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 28 kaiser_beta : float 29 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 30 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 31 32 """ 33 34 implemented_apodization_function: tuple = ( 35 "Hamming", 36 "Hanning", 37 "Blackman", 38 "Full-Sine", 39 "Half-Sine", 40 "Kaiser", 41 "Half-Kaiser", 42 "Rectangle", 43 ) 44 apodization_method: str = "Hanning" 45 number_of_truncations: int = 0 46 number_of_zero_fills: int = 1 47 next_power_of_two: bool = False 48 kaiser_beta: float = 8.6 49 50 def __post_init__(self): 51 # enforce datatype 52 for field in dataclasses.fields(self): 53 value = getattr(self, field.name) 54 if not isinstance(value, field.type): 55 value = field.type(value) 56 setattr(self, field.name, value) 57 58 59@dataclasses.dataclass 60class DataInputSetting: 61 """Data input settings class 62 63 Attributes 64 ---------- 65 header_translate : dict 66 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 67 """ 68 69 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 70 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 71 header_translate: dict = dataclasses.field(default_factory=dict) 72 73 def __post_init__(self): 74 self.header_translate = { 75 "m/z": Labels.mz, 76 "mOz": Labels.mz, 77 "Mass": Labels.mz, 78 "Resolving Power": Labels.rp, 79 "Res.": Labels.rp, 80 "resolution": Labels.rp, 81 "Intensity": Labels.abundance, 82 "Peak Height": Labels.abundance, 83 "I": Labels.abundance, 84 "Abundance": Labels.abundance, 85 "abs_abu": Labels.abundance, 86 "Signal/Noise": Labels.s2n, 87 "S/N": Labels.s2n, 88 "sn": Labels.s2n, 89 } 90 91 def add_mz_label(self, label): 92 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 93 self.header_translate[label] = Labels.mz 94 95 def add_peak_height_label(self, label): 96 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 97 98 self.header_translate[label] = Labels.abundance 99 100 def add_sn_label(self, label): 101 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 102 self.header_translate[label] = Labels.s2n 103 104 def add_resolving_power_label(self, label): 105 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 106 self.header_translate[label] = Labels.rp 107 108 109@dataclasses.dataclass 110class LiquidChromatographSetting: 111 """Liquid chromatograph processing settings class 112 113 Attributes 114 ---------- 115 scans : list or tuple, optional 116 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 117 eic_tolerance_ppm : float, optional 118 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 119 correct_eic_baseline : bool, optional 120 If True, correct the baseline of the extracted ion chromatogram. Default is True. 121 smooth_window : int, optional 122 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 123 smooth_method : str, optional 124 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 125 implemented_smooth_method : tuple, optional 126 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 127 savgol_pol_order : int, optional 128 Polynomial order for Savitzky-Golay smoothing. Default is 2. 129 consecutive_scan_min : int, optional 130 Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended. 131 peak_height_max_percent : float, optional 132 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 133 peak_max_prominence_percent : float, optional 134 1-100 % used for baseline detection. Default is 1. 135 peak_derivative_threshold : float, optional 136 Threshold for defining derivative crossing. Default is 0.0005. 137 min_peak_datapoints : float, optional 138 minimum data point to define a chromatografic peak. Default is 5. 139 noise_threshold_method : str, optional 140 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 141 noise_threshold_methods_implemented : tuple, optional 142 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 143 peak_height_min_percent : float, optional 144 0-100 % used for peak detection. Default is 0.1. 145 eic_signal_threshold : float, optional 146 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 147 eic_buffer_time : float, optional 148 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 149 peak_picking_method : str, optional 150 Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'. 151 implemented_peak_picking_methods : tuple, optional 152 Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology'). 153 ph_smooth_it : int, optional 154 Number of iterations to use for smoothing prior to finding mass features. 155 Used only for "persistent homology" peak picking method. 156 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 157 ph_smooth_radius_mz : int, optional 158 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 159 Used only for "persistent homology" peak picking method. 160 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 161 ph_smooth_radius_scan : int, optional 162 Radius in scan steps for smoothing prior to finding mass features. 163 Used only for "persistent homology" peak picking method. 164 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 165 ph_inten_min_rel : int, optional 166 Relative minimum intensity to use for finding mass features for persistent homology. 167 Used only for "persistent homology" peak picking method. 168 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 169 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 170 ph_persis_min_rel : int, optional 171 Relative minimum persistence for retaining mass features. 172 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 173 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 174 Should be greater to or equal to ph_inten_min_rel. 175 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 176 mass_feature_cluster_mz_tolerance_rel : float, optional 177 Relative m/z tolerance to use for clustering mass features. 178 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 179 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 180 Default is 5E-6 (5 ppm). 181 mass_feature_cluster_rt_tolerance : float, optional 182 Retention time tolerance to use for clustering mass features, in minutes. 183 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 184 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 185 Default is 0.2. 186 ms1_scans_to_average : int, optional 187 Number of MS1 scans to average for mass-feature associated m/zs. 188 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 189 ms1_deconvolution_corr_min : float, optional 190 Minimum correlation to use for deconvoluting MS1 mass features. 191 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 192 Default is 0.8. 193 ms2_dda_rt_tolerance : float, optional 194 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 195 ms2_dda_mz_tolerance : float, optional 196 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 197 ms2_min_fe_score : float, optional 198 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 199 search_as_lipids : bool, optional 200 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 201 include_fragment_types : bool, optional 202 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 203 verbose_processing : bool, optional 204 If True, print verbose processing information. Default is True. 205 """ 206 207 scans: list | tuple = (-1, -1) 208 209 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 210 eic_tolerance_ppm: float = 5 211 correct_eic_baseline = True 212 smooth_window: int = 5 213 smooth_method: str = "savgol" 214 implemented_smooth_method: tuple = ( 215 "savgol", 216 "hanning", 217 "blackman", 218 "bartlett", 219 "flat", 220 "boxcar", 221 ) 222 savgol_pol_order: int = 2 223 consecutive_scan_min: int = 0 224 peak_height_max_percent: float = 10 225 peak_max_prominence_percent: float = 1 226 peak_derivative_threshold: float = 0.0005 227 min_peak_datapoints: float = 5 228 noise_threshold_method: str = "manual_relative_abundance" 229 noise_threshold_methods_implemented: tuple = ( 230 "auto_relative_abundance", 231 "manual_relative_abundance", 232 "second_derivative", 233 ) 234 peak_height_min_percent: float = 0.1 235 eic_signal_threshold: float = 0.01 236 eic_buffer_time = 1.5 237 238 # Parameters used for 2D peak picking 239 peak_picking_method: str = "persistent homology" 240 implemented_peak_picking_methods: tuple = ( 241 "persistent homology", 242 "centroided_persistent_homology", 243 ) 244 245 # Parameters used in persistent homology calculations 246 ph_smooth_it = 1 247 ph_smooth_radius_mz = 0 248 ph_smooth_radius_scan = 1 249 ph_inten_min_rel = 0.001 250 ph_persis_min_rel = 0.001 251 252 # Parameters used to cluster mass features 253 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 254 mass_feature_cluster_rt_tolerance: float = 0.3 255 256 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 257 ms1_scans_to_average: int = 1 258 ms1_deconvolution_corr_min: float = 0.8 259 ms2_dda_rt_tolerance: float = 0.15 260 ms2_dda_mz_tolerance: float = 0.05 261 262 # Parameters used for flash entropy searching and database preparation 263 ms2_min_fe_score: float = 0.2 264 search_as_lipids: bool = False 265 include_fragment_types: bool = False 266 267 # Parameters used for saving the data 268 export_profile_spectra: bool = False 269 export_eics: bool = True 270 export_unprocessed_ms1: bool = False 271 272 # Parameters used for verbose processing 273 verbose_processing: bool = True 274 275 def __post_init__(self): 276 # enforce datatype 277 for field in dataclasses.fields(self): 278 value = getattr(self, field.name) 279 if not isinstance(value, field.type): 280 value = field.type(value) 281 setattr(self, field.name, value) 282 283 284@dataclasses.dataclass 285class MassSpectrumSetting: 286 """Mass spectrum processing settings class 287 288 Attributes 289 ---------- 290 noise_threshold_method : str, optional 291 Method for detecting noise threshold. Default is 'log'. 292 noise_threshold_methods_implemented : tuple, optional 293 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 294 noise_threshold_min_std : int, optional 295 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 296 noise_threshold_min_s2n : float, optional 297 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 298 noise_threshold_min_relative_abundance : float, optional 299 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 300 noise_threshold_absolute_abundance : float, optional 301 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 302 noise_threshold_log_nsigma : int, optional 303 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 304 noise_threshold_log_nsigma_corr_factor : float, optional 305 Correction factor for log noise threshold method. Default is 0.463. 306 noise_threshold_log_nsigma_bins : int, optional 307 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 308 noise_min_mz : float, optional 309 Minimum m/z to use for noise thresholding. Default is 50.0. 310 noise_max_mz : float, optional 311 Maximum m/z to use for noise thresholding. Default is 1200.0. 312 min_picking_mz : float, optional 313 Minimum m/z to use for peak picking. Default is 50.0. 314 max_picking_mz : float, optional 315 Maximum m/z to use for peak picking. Default is 1200.0. 316 picking_point_extrapolate : int, optional 317 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 318 Recommend 3 for reduced profile data or if peak picking faults 319 calib_minimize_method : str, optional 320 Minimization method to use for calibration. Default is 'Powell'. 321 calib_pol_order : int, optional 322 Polynomial order to use for calibration. Default is 2. 323 max_calib_ppm_error : float, optional 324 Maximum ppm error to use for calibration. Default is 1.0. 325 min_calib_ppm_error : float, optional 326 Minimum ppm error to use for calibration. Default is -1.0. 327 calib_sn_threshold : float, optional 328 Signal to noise threshold to use for calibration. Default is 2.0. 329 calibration_ref_match_method: string, optional 330 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 331 calibration_ref_match_tolerance: float, optional 332 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 333 do_calibration : bool, optional 334 If True, perform calibration. Default is True. 335 verbose_processing : bool, optional 336 If True, print verbose processing information. Default is True. 337 """ 338 339 noise_threshold_method: str = "log" 340 341 noise_threshold_methods_implemented: tuple = ( 342 "minima", 343 "signal_noise", 344 "relative_abundance", 345 "absolute_abundance", 346 "log", 347 ) 348 349 noise_threshold_min_std: int = 6 # when using 'minima' method 350 351 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 352 353 noise_threshold_min_relative_abundance: float = ( 354 6 # from 0-100, when using 'relative_abundance' method 355 ) 356 357 noise_threshold_absolute_abundance: float = ( 358 1_000_000 # when using 'absolute_abundance' method 359 ) 360 361 noise_threshold_log_nsigma: int = 6 # when using 'log' method 362 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 363 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 364 365 noise_min_mz: float = 50.0 366 noise_max_mz: float = 1200.0 367 368 min_picking_mz: float = 50.0 369 max_picking_mz: float = 1200.0 370 371 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 372 # This will fix peak picking at spectrum limit issues 373 # 0 to keep normal behaviour, typical value 3 to fix 374 picking_point_extrapolate: int = 3 375 376 calib_minimize_method: str = "Powell" 377 calib_pol_order: int = 2 378 max_calib_ppm_error: float = 1.0 379 min_calib_ppm_error: float = -1.0 380 calib_sn_threshold: float = 2.0 381 calibration_ref_match_method: str = "legacy" 382 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 383 calibration_ref_match_tolerance: float = 0.003 384 calibration_ref_match_std_raw_error_limit: float = 1.5 385 # calib_ref_mzs: list = [0] 386 387 do_calibration: bool = True 388 verbose_processing: bool = True 389 390 def __post_init__(self): 391 # enforce datatype 392 for field in dataclasses.fields(self): 393 value = getattr(self, field.name) 394 if not isinstance(value, field.type): 395 value = field.type(value) 396 setattr(self, field.name, value) 397 398 399@dataclasses.dataclass 400class MassSpecPeakSetting: 401 """Mass spectrum peak processing settings class 402 403 Attributes 404 ---------- 405 kendrick_base : Dict, optional 406 Dictionary specifying the elements and their counts in the Kendrick base. 407 Defaults to {'C': 1, 'H': 2}. 408 kendrick_rounding_method : str, optional 409 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 410 Defaults to 'floor'. 411 implemented_kendrick_rounding_methods : tuple 412 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 413 Defaults to ('floor', 'ceil', 'round'). 414 peak_derivative_threshold : float, optional 415 Threshold for defining derivative crossing. Should be a value between 0 and 1. 416 Defaults to 0.0. 417 peak_min_prominence_percent : float, optional 418 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 419 Defaults to 0.1. 420 min_peak_datapoints : float, optional 421 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 422 Defaults to 5. 423 peak_max_prominence_percent : float, optional 424 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 425 Defaults to 0.1. 426 peak_height_max_percent : float, optional 427 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 428 Defaults to 10. 429 legacy_resolving_power : bool, optional 430 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 431 Defaults to True. 432 legacy_centroid_polyfit : bool, optional 433 Use legacy (numpy polyfit) to fit centroid 434 Default false. 435 """ 436 437 kendrick_base: Dict = dataclasses.field(default_factory=dict) 438 439 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 440 441 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 442 443 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 444 445 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 446 447 min_peak_datapoints: float = 5 # 0-inf used for peak detection 448 449 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 450 451 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 452 453 legacy_resolving_power: bool = ( 454 True # Use the legacy (CoreMS v1) resolving power calculation (True) 455 ) 456 457 legacy_centroid_polyfit: bool = False 458 459 def __post_init__(self): 460 # default to CH2 461 if not self.kendrick_base: 462 self.kendrick_base = {"C": 1, "H": 2} 463 # enforce datatype 464 for field in dataclasses.fields(self): 465 value = getattr(self, field.name) 466 if not isinstance(value, field.type): 467 value = field.type(value) 468 setattr(self, field.name, value) 469 470 471@dataclasses.dataclass 472class GasChromatographSetting: 473 """Gas chromatograph processing settings class 474 475 Attributes 476 ---------- 477 use_deconvolution : bool, optional 478 If True, use deconvolution. Default is False. 479 implemented_smooth_method : tuple, optional 480 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 481 smooth_window : int, optional 482 Window size for smoothing the ion chromatogram. Default is 5. 483 smooth_method : str, optional 484 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 485 savgol_pol_order : int, optional 486 Polynomial order for Savitzky-Golay smoothing. Default is 2. 487 peak_derivative_threshold : float, optional 488 Threshold for defining derivative crossing. Should be a value between 0 and 1. 489 Defaults to 0.0005. 490 peak_height_max_percent : float, optional 491 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 492 Defaults to 10. 493 peak_max_prominence_percent : float, optional 494 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 495 Defaults to 1. 496 min_peak_datapoints : float, optional 497 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 498 Defaults to 5. 499 max_peak_width : float, optional 500 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 501 Defaults to 0.1. 502 noise_threshold_method : str, optional 503 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 504 noise_threshold_methods_implemented : tuple, optional 505 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 506 std_noise_threshold : int, optional 507 Default is 3. 508 peak_height_min_percent : float, optional 509 0-100 % used for peak detection. Default is 0.1. 510 peak_min_prominence_percent : float, optional 511 0-100 % used for peak detection. Default is 0.1. 512 eic_signal_threshold : float, optional 513 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 514 max_rt_distance : float, optional 515 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 516 verbose_processing : bool, optional 517 If True, print verbose processing information. Default is True. 518 """ 519 520 use_deconvolution: bool = False 521 522 implemented_smooth_method: tuple = ( 523 "savgol", 524 "hanning", 525 "blackman", 526 "bartlett", 527 "flat", 528 "boxcar", 529 ) 530 531 smooth_window: int = 5 532 533 smooth_method: str = "savgol" 534 535 savgol_pol_order: int = 2 536 537 peak_derivative_threshold: float = 0.0005 538 539 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 540 541 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 542 543 min_peak_datapoints: float = 5 544 545 max_peak_width: float = 0.1 546 547 noise_threshold_method: str = "manual_relative_abundance" 548 549 noise_threshold_methods_implemented: tuple = ( 550 "auto_relative_abundance", 551 "manual_relative_abundance", 552 "second_derivative", 553 ) 554 555 std_noise_threshold: int = 3 556 557 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 558 559 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 560 561 eic_signal_threshold: float = ( 562 0.01 # 0-100 % used for extracted ion chromatogram peak detection 563 ) 564 565 max_rt_distance: float = ( 566 0.025 # minutes, max distance allowance hierarchical clutter 567 ) 568 569 verbose_processing: bool = True 570 571 def __post_init__(self): 572 # enforce datatype 573 for field in dataclasses.fields(self): 574 value = getattr(self, field.name) 575 if not isinstance(value, field.type): 576 value = field.type(value) 577 setattr(self, field.name, value) 578 579 580@dataclasses.dataclass 581class CompoundSearchSettings: 582 """Settings for compound search 583 584 Attributes 585 ---------- 586 url_database : str, optional 587 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 588 ri_search_range : float, optional 589 Retention index search range. Default is 35. 590 rt_search_range : float, optional 591 Retention time search range, in minutes. Default is 1.0. 592 correlation_threshold : float, optional 593 Threshold for correlation for spectral similarity. Default is 0.5. 594 score_threshold : float, optional 595 Threshold for compsite score. Default is 0.0. 596 ri_spacing : float, optional 597 Retention index spacing. Default is 200. 598 ri_std : float, optional 599 Retention index standard deviation. Default is 3. 600 ri_calibration_compound_names : list, optional 601 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 602 603 """ 604 605 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 606 607 ri_search_range: float = 35 608 609 rt_search_range: float = 1.0 # used for retention index calibration 610 611 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 612 613 score_threshold: float = 0.0 614 615 ri_spacing: float = 200 616 617 ri_std: float = 3 # in standard deviation 618 619 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 620 621 # calculates and export all spectral similarity methods 622 exploratory_mode: bool = False 623 624 score_methods: tuple = ("highest_sim_score", "highest_ss") 625 626 output_score_method: str = "All" 627 628 def __post_init__(self): 629 # enforce datatype 630 self.url_database = os.getenv( 631 "SPECTRAL_GCMS_DATABASE_URL", 632 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 633 ) 634 635 for field in dataclasses.fields(self): 636 value = getattr(self, field.name) 637 if not isinstance(value, field.type): 638 value = field.type(value) 639 setattr(self, field.name, value) 640 641 self.ri_calibration_compound_names = [ 642 "Methyl Caprylate", 643 "Methyl Caprate", 644 "Methyl Pelargonate", 645 "Methyl Laurate", 646 "Methyl Myristate", 647 "Methyl Palmitate", 648 "Methyl Stearate", 649 "Methyl Eicosanoate", 650 "Methyl Docosanoate", 651 "Methyl Linocerate", 652 "Methyl Hexacosanoate", 653 "Methyl Octacosanoate", 654 "Methyl Triacontanoate", 655 ] 656 657 658class MolecularLookupDictSettings: 659 """Settings for molecular searching 660 661 These are used to generate the database entries, do not change. 662 663 Attributes 664 ---------- 665 usedAtoms : dict, optional 666 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 667 min_mz : float, optional 668 Minimum m/z to use for searching. Default is 50.0. 669 max_mz : float, optional 670 Maximum m/z to use for searching. Default is 1200.0. 671 min_dbe : float, optional 672 Minimum double bond equivalent to use for searching. Default is 0. 673 max_dbe : float, optional 674 Maximum double bond equivalent to use for searching. Default is 50. 675 use_pah_line_rule : bool, optional 676 If True, use the PAH line rule. Default is False. 677 isRadical : bool, optional 678 If True, search for radical ions. Default is True. 679 isProtonated : bool, optional 680 If True, search for protonated ions. Default is True. 681 url_database : str, optional 682 URL for the database. Default is None. 683 db_jobs : int, optional 684 Number of jobs to use for database queries. Default is 1. 685 used_atom_valences : dict, optional 686 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 687 688 """ 689 690 ### DO NOT CHANGE IT! These are used to generate the database entries 691 692 ### DO change when creating a new application database 693 694 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 695 696 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 697 ### if you don't want to include one of those atoms set the max and min at 0 698 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 699 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 700 ### NOTE : Adducts atoms have zero covalence 701 ### NOTE : Not using static variable because this class is distributed using multiprocessing 702 def __init__(self): 703 self.usedAtoms = { 704 "C": (1, 90), 705 "H": (4, 200), 706 "O": (0, 12), 707 "N": (0, 0), 708 "S": (0, 0), 709 "P": (0, 0), 710 "Cl": (0, 0), 711 } 712 713 self.min_mz = 50 714 715 self.max_mz = 1200 716 717 self.min_dbe = 0 718 719 self.max_dbe = 50 720 721 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 722 self.use_pah_line_rule = False 723 724 self.isRadical = True 725 726 self.isProtonated = True 727 728 self.url_database = None 729 730 self.db_jobs = 1 731 732 self.used_atom_valences = { 733 "C": 4, 734 "13C": 4, 735 "H": 1, 736 "O": 2, 737 "18O": 2, 738 "N": 3, 739 "S": 2, 740 "34S": 2, 741 "P": 3, 742 "Cl": 1, 743 "37Cl": 1, 744 "Br": 1, 745 "Na": 1, 746 "F": 1, 747 "K": 0, 748 } 749 750 751@dataclasses.dataclass 752class MolecularFormulaSearchSettings: 753 """Settings for molecular searching 754 755 Attributes 756 ---------- 757 use_isotopologue_filter : bool, optional 758 If True, use isotopologue filter. Default is False. 759 isotopologue_filter_threshold : float, optional 760 Threshold for isotopologue filter. Default is 33. 761 isotopologue_filter_atoms : tuple, optional 762 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 763 use_runtime_kendrick_filter : bool, optional 764 If True, use runtime Kendrick filter. Default is False. 765 use_min_peaks_filter : bool, optional 766 If True, use minimum peaks filter. Default is True. 767 min_peaks_per_class : int, optional 768 Minimum number of peaks per class. Default is 15. 769 url_database : str, optional 770 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 771 db_jobs : int, optional 772 Number of jobs to use for database queries. Default is 3. 773 db_chunk_size : int, optional 774 Chunk size to use for database queries. Default is 300. 775 ion_charge : int, optional 776 Ion charge. Default is -1. 777 min_hc_filter : float, optional 778 Minimum hydrogen to carbon ratio. Default is 0.3. 779 max_hc_filter : float, optional 780 Maximum hydrogen to carbon ratio. Default is 3. 781 min_oc_filter : float, optional 782 Minimum oxygen to carbon ratio. Default is 0.0. 783 max_oc_filter : float, optional 784 Maximum oxygen to carbon ratio. Default is 1.2. 785 min_op_filter : float, optional 786 Minimum oxygen to phosphorous ratio. Default is 2. 787 use_pah_line_rule : bool, optional 788 If True, use the PAH line rule. Default is False. 789 min_dbe : float, optional 790 Minimum double bond equivalent to use for searching. Default is 0. 791 max_dbe : float, optional 792 Maximum double bond equivalent to use for searching. Default is 40. 793 mz_error_score_weight : float, optional 794 Weight for m/z error score to contribute to composite score. Default is 0.6. 795 isotopologue_score_weight : float, optional 796 Weight for isotopologue score to contribute to composite score. Default is 0.4. 797 adduct_atoms_neg : tuple, optional 798 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 799 adduct_atoms_pos : tuple, optional 800 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 801 score_methods : tuple, optional 802 Tuple of score method that can be implemented. 803 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 804 score_method : str, optional 805 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 806 output_min_score : float, optional 807 Minimum score for output. Default is 0.1. 808 output_score_method : str, optional 809 Score method to use for output. Default is 'All Candidates'. 810 isRadical : bool, optional 811 If True, search for radical ions. Default is False. 812 isProtonated : bool, optional 813 If True, search for protonated ions. Default is True. 814 isAdduct : bool, optional 815 If True, search for adduct ions. Default is False. 816 usedAtoms : dict, optional 817 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 818 ion_types_excluded : list, optional 819 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 820 ionization_type : str, optional 821 Ionization type. Default is 'ESI'. 822 min_ppm_error : float, optional 823 Minimum ppm error. Default is -10.0. 824 max_ppm_error : float, optional 825 Maximum ppm error. Default is 10.0. 826 min_abun_error : float, optional 827 Minimum abundance error for isotolopologue search. Default is -100.0. 828 max_abun_error : float, optional 829 Maximum abundance error for isotolopologue search. Default is 100.0. 830 mz_error_range : float, optional 831 m/z error range. Default is 1.5. 832 error_method : str, optional 833 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 834 mz_error_average : float, optional 835 m/z error average. Default is 0.0. 836 used_atom_valences : dict, optional 837 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 838 verbose_processing: bool, optional 839 If True, print verbose processing information. Default is True. 840 """ 841 842 verbose_processing: bool = True 843 844 use_isotopologue_filter: bool = False 845 846 isotopologue_filter_threshold: float = 33 847 848 isotopologue_filter_atoms: tuple = ("Cl", "Br") 849 850 use_runtime_kendrick_filter: bool = False 851 852 use_min_peaks_filter: bool = True 853 854 min_peaks_per_class: int = 15 855 856 url_database: str = ( 857 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 858 ) 859 860 db_jobs: int = 3 861 862 db_chunk_size: int = 300 863 864 # query setting======== 865 ion_charge: int = -1 866 867 min_hc_filter: float = 0.3 868 869 max_hc_filter: float = 3 870 871 min_oc_filter: float = 0.0 872 873 max_oc_filter: float = 1.2 874 875 min_op_filter: float = 2 876 877 use_pah_line_rule: bool = False 878 879 min_dbe: float = 0 880 881 max_dbe: float = 40 882 883 mz_error_score_weight: float = 0.6 884 885 isotopologue_score_weight: float = 0.4 886 887 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 888 adduct_atoms_neg: tuple = ("Cl", "Br") 889 890 adduct_atoms_pos: tuple = ("Na", "K") 891 892 score_methods: tuple = ( 893 "S_P_lowest_error", 894 "N_S_P_lowest_error", 895 "lowest_error", 896 "prob_score", 897 "air_filter_error", 898 "water_filter_error", 899 "earth_filter_error", 900 ) 901 902 score_method: str = "prob_score" 903 904 output_min_score: float = 0.1 905 906 output_score_method: str = "All Candidates" 907 908 # depending on the polarity mode it looks for [M].+ , [M].- 909 # query and automatically compile add entry if it doesn't exist 910 911 isRadical: bool = False 912 913 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 914 # query and automatically compile and push options if it doesn't exist 915 isProtonated: bool = True 916 917 isAdduct: bool = False 918 919 usedAtoms: dict = dataclasses.field(default_factory=dict) 920 ion_types_excluded: list = dataclasses.field(default_factory=list) 921 922 # search setting ======== 923 924 ionization_type: str = "ESI" 925 926 # empirically set / needs optimization 927 min_ppm_error: float = -10.0 # ppm 928 929 # empirically set / needs optimization 930 max_ppm_error: float = 10.0 # ppm 931 932 # empirically set / needs optimization set for isotopologue search 933 min_abun_error: float = -100.0 # percentage 934 935 # empirically set / needs optimization set for isotopologue search 936 max_abun_error: float = 100.0 # percentage 937 938 # empirically set / needs optimization 939 mz_error_range: float = 1.5 940 941 # 'distance', 'lowest', 'symmetrical','average' 'None' 942 error_method: str = "None" 943 944 mz_error_average: float = 0.0 945 946 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 947 used_atom_valences: dict = dataclasses.field(default_factory=dict) 948 949 def __post_init__(self): 950 if not self.url_database or self.url_database == "": 951 self.url_database = os.getenv( 952 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 953 ) 954 # enforce datatype 955 for field in dataclasses.fields(self): 956 value = getattr(self, field.name) 957 if not isinstance(value, field.type): 958 value = field.type(value) 959 setattr(self, field.name, value) 960 961 # enforce C and H if either do not exists 962 if "C" not in self.usedAtoms.keys(): 963 self.usedAtoms["C"] = (1, 100) 964 if "H" not in self.usedAtoms.keys(): 965 self.usedAtoms["H"] = (1, 200) 966 967 # add cummon values 968 current_used_atoms = self.used_atom_valences.keys() 969 970 for atom in Atoms.atoms_covalence.keys(): 971 if atom not in current_used_atoms: 972 covalence = Atoms.atoms_covalence.get(atom) 973 974 if isinstance(covalence, int): 975 self.used_atom_valences[atom] = covalence 976 977 else: 978 # will get the first number of all possible covalances, which should be the most commum 979 self.used_atom_valences[atom] = covalence[0]
12@dataclasses.dataclass 13class TransientSetting: 14 """Transient processing settings class 15 16 Attributes 17 ---------- 18 implemented_apodization_function : tuple 19 Available apodization functions 20 apodization_method : str 21 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. 22 For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 23 number_of_truncations : int 24 How many times to truncate the transient prior to Fourier transform 25 number_of_zero_fills : int 26 How many times to zero fille the transient prior to Fourier transform. 27 next_power_of_two : bool 28 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 29 kaiser_beta : float 30 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 31 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 32 33 """ 34 35 implemented_apodization_function: tuple = ( 36 "Hamming", 37 "Hanning", 38 "Blackman", 39 "Full-Sine", 40 "Half-Sine", 41 "Kaiser", 42 "Half-Kaiser", 43 "Rectangle", 44 ) 45 apodization_method: str = "Hanning" 46 number_of_truncations: int = 0 47 number_of_zero_fills: int = 1 48 next_power_of_two: bool = False 49 kaiser_beta: float = 8.6 50 51 def __post_init__(self): 52 # enforce datatype 53 for field in dataclasses.fields(self): 54 value = getattr(self, field.name) 55 if not isinstance(value, field.type): 56 value = field.type(value) 57 setattr(self, field.name, value)
Transient processing settings class
Attributes
- implemented_apodization_function (tuple): Available apodization functions
- apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
- number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
- number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
- next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
- kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
60@dataclasses.dataclass 61class DataInputSetting: 62 """Data input settings class 63 64 Attributes 65 ---------- 66 header_translate : dict 67 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 68 """ 69 70 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 71 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 72 header_translate: dict = dataclasses.field(default_factory=dict) 73 74 def __post_init__(self): 75 self.header_translate = { 76 "m/z": Labels.mz, 77 "mOz": Labels.mz, 78 "Mass": Labels.mz, 79 "Resolving Power": Labels.rp, 80 "Res.": Labels.rp, 81 "resolution": Labels.rp, 82 "Intensity": Labels.abundance, 83 "Peak Height": Labels.abundance, 84 "I": Labels.abundance, 85 "Abundance": Labels.abundance, 86 "abs_abu": Labels.abundance, 87 "Signal/Noise": Labels.s2n, 88 "S/N": Labels.s2n, 89 "sn": Labels.s2n, 90 } 91 92 def add_mz_label(self, label): 93 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 94 self.header_translate[label] = Labels.mz 95 96 def add_peak_height_label(self, label): 97 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 98 99 self.header_translate[label] = Labels.abundance 100 101 def add_sn_label(self, label): 102 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 103 self.header_translate[label] = Labels.s2n 104 105 def add_resolving_power_label(self, label): 106 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 107 self.header_translate[label] = Labels.rp
Data input settings class
Attributes
- header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
92 def add_mz_label(self, label): 93 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 94 self.header_translate[label] = Labels.mz
Add a label to the header_translate dictionary to be translated to the corems label for mz.
96 def add_peak_height_label(self, label): 97 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 98 99 self.header_translate[label] = Labels.abundance
Add a label to the header_translate dictionary to be translated to the corems label for peak height.
101 def add_sn_label(self, label): 102 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 103 self.header_translate[label] = Labels.s2n
Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.
105 def add_resolving_power_label(self, label): 106 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 107 self.header_translate[label] = Labels.rp
Add a label to the header_translate dictionary to be translated to the corems label for resolving power.
110@dataclasses.dataclass 111class LiquidChromatographSetting: 112 """Liquid chromatograph processing settings class 113 114 Attributes 115 ---------- 116 scans : list or tuple, optional 117 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 118 eic_tolerance_ppm : float, optional 119 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 120 correct_eic_baseline : bool, optional 121 If True, correct the baseline of the extracted ion chromatogram. Default is True. 122 smooth_window : int, optional 123 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 124 smooth_method : str, optional 125 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 126 implemented_smooth_method : tuple, optional 127 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 128 savgol_pol_order : int, optional 129 Polynomial order for Savitzky-Golay smoothing. Default is 2. 130 consecutive_scan_min : int, optional 131 Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended. 132 peak_height_max_percent : float, optional 133 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 134 peak_max_prominence_percent : float, optional 135 1-100 % used for baseline detection. Default is 1. 136 peak_derivative_threshold : float, optional 137 Threshold for defining derivative crossing. Default is 0.0005. 138 min_peak_datapoints : float, optional 139 minimum data point to define a chromatografic peak. Default is 5. 140 noise_threshold_method : str, optional 141 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 142 noise_threshold_methods_implemented : tuple, optional 143 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 144 peak_height_min_percent : float, optional 145 0-100 % used for peak detection. Default is 0.1. 146 eic_signal_threshold : float, optional 147 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 148 eic_buffer_time : float, optional 149 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 150 peak_picking_method : str, optional 151 Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'. 152 implemented_peak_picking_methods : tuple, optional 153 Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology'). 154 ph_smooth_it : int, optional 155 Number of iterations to use for smoothing prior to finding mass features. 156 Used only for "persistent homology" peak picking method. 157 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 158 ph_smooth_radius_mz : int, optional 159 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 160 Used only for "persistent homology" peak picking method. 161 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 162 ph_smooth_radius_scan : int, optional 163 Radius in scan steps for smoothing prior to finding mass features. 164 Used only for "persistent homology" peak picking method. 165 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 166 ph_inten_min_rel : int, optional 167 Relative minimum intensity to use for finding mass features for persistent homology. 168 Used only for "persistent homology" peak picking method. 169 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 170 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 171 ph_persis_min_rel : int, optional 172 Relative minimum persistence for retaining mass features. 173 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 174 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 175 Should be greater to or equal to ph_inten_min_rel. 176 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 177 mass_feature_cluster_mz_tolerance_rel : float, optional 178 Relative m/z tolerance to use for clustering mass features. 179 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 180 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 181 Default is 5E-6 (5 ppm). 182 mass_feature_cluster_rt_tolerance : float, optional 183 Retention time tolerance to use for clustering mass features, in minutes. 184 Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. 185 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 186 Default is 0.2. 187 ms1_scans_to_average : int, optional 188 Number of MS1 scans to average for mass-feature associated m/zs. 189 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 190 ms1_deconvolution_corr_min : float, optional 191 Minimum correlation to use for deconvoluting MS1 mass features. 192 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 193 Default is 0.8. 194 ms2_dda_rt_tolerance : float, optional 195 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 196 ms2_dda_mz_tolerance : float, optional 197 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 198 ms2_min_fe_score : float, optional 199 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 200 search_as_lipids : bool, optional 201 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 202 include_fragment_types : bool, optional 203 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 204 verbose_processing : bool, optional 205 If True, print verbose processing information. Default is True. 206 """ 207 208 scans: list | tuple = (-1, -1) 209 210 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 211 eic_tolerance_ppm: float = 5 212 correct_eic_baseline = True 213 smooth_window: int = 5 214 smooth_method: str = "savgol" 215 implemented_smooth_method: tuple = ( 216 "savgol", 217 "hanning", 218 "blackman", 219 "bartlett", 220 "flat", 221 "boxcar", 222 ) 223 savgol_pol_order: int = 2 224 consecutive_scan_min: int = 0 225 peak_height_max_percent: float = 10 226 peak_max_prominence_percent: float = 1 227 peak_derivative_threshold: float = 0.0005 228 min_peak_datapoints: float = 5 229 noise_threshold_method: str = "manual_relative_abundance" 230 noise_threshold_methods_implemented: tuple = ( 231 "auto_relative_abundance", 232 "manual_relative_abundance", 233 "second_derivative", 234 ) 235 peak_height_min_percent: float = 0.1 236 eic_signal_threshold: float = 0.01 237 eic_buffer_time = 1.5 238 239 # Parameters used for 2D peak picking 240 peak_picking_method: str = "persistent homology" 241 implemented_peak_picking_methods: tuple = ( 242 "persistent homology", 243 "centroided_persistent_homology", 244 ) 245 246 # Parameters used in persistent homology calculations 247 ph_smooth_it = 1 248 ph_smooth_radius_mz = 0 249 ph_smooth_radius_scan = 1 250 ph_inten_min_rel = 0.001 251 ph_persis_min_rel = 0.001 252 253 # Parameters used to cluster mass features 254 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 255 mass_feature_cluster_rt_tolerance: float = 0.3 256 257 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 258 ms1_scans_to_average: int = 1 259 ms1_deconvolution_corr_min: float = 0.8 260 ms2_dda_rt_tolerance: float = 0.15 261 ms2_dda_mz_tolerance: float = 0.05 262 263 # Parameters used for flash entropy searching and database preparation 264 ms2_min_fe_score: float = 0.2 265 search_as_lipids: bool = False 266 include_fragment_types: bool = False 267 268 # Parameters used for saving the data 269 export_profile_spectra: bool = False 270 export_eics: bool = True 271 export_unprocessed_ms1: bool = False 272 273 # Parameters used for verbose processing 274 verbose_processing: bool = True 275 276 def __post_init__(self): 277 # enforce datatype 278 for field in dataclasses.fields(self): 279 value = getattr(self, field.name) 280 if not isinstance(value, field.type): 281 value = field.type(value) 282 setattr(self, field.name, value)
Liquid chromatograph processing settings class
Attributes
- scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (0, 1).
- eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
- correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
- smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- consecutive_scan_min (int, optional): Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended.
- peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
- peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
- min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
- peak_picking_method (str, optional): Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
- implemented_peak_picking_methods (tuple, optional): Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
- ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
- ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
- ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
- ph_inten_min_rel (int, optional): Relative minimum intensity to use for finding mass features for persistent homology. Used only for "persistent homology" peak picking method. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- ph_persis_min_rel (int, optional): Relative minimum persistence for retaining mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
- mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
- ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
- ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
- ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
- ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
- ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
- search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
- include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
285@dataclasses.dataclass 286class MassSpectrumSetting: 287 """Mass spectrum processing settings class 288 289 Attributes 290 ---------- 291 noise_threshold_method : str, optional 292 Method for detecting noise threshold. Default is 'log'. 293 noise_threshold_methods_implemented : tuple, optional 294 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 295 noise_threshold_min_std : int, optional 296 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 297 noise_threshold_min_s2n : float, optional 298 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 299 noise_threshold_min_relative_abundance : float, optional 300 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 301 noise_threshold_absolute_abundance : float, optional 302 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 303 noise_threshold_log_nsigma : int, optional 304 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 305 noise_threshold_log_nsigma_corr_factor : float, optional 306 Correction factor for log noise threshold method. Default is 0.463. 307 noise_threshold_log_nsigma_bins : int, optional 308 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 309 noise_min_mz : float, optional 310 Minimum m/z to use for noise thresholding. Default is 50.0. 311 noise_max_mz : float, optional 312 Maximum m/z to use for noise thresholding. Default is 1200.0. 313 min_picking_mz : float, optional 314 Minimum m/z to use for peak picking. Default is 50.0. 315 max_picking_mz : float, optional 316 Maximum m/z to use for peak picking. Default is 1200.0. 317 picking_point_extrapolate : int, optional 318 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 319 Recommend 3 for reduced profile data or if peak picking faults 320 calib_minimize_method : str, optional 321 Minimization method to use for calibration. Default is 'Powell'. 322 calib_pol_order : int, optional 323 Polynomial order to use for calibration. Default is 2. 324 max_calib_ppm_error : float, optional 325 Maximum ppm error to use for calibration. Default is 1.0. 326 min_calib_ppm_error : float, optional 327 Minimum ppm error to use for calibration. Default is -1.0. 328 calib_sn_threshold : float, optional 329 Signal to noise threshold to use for calibration. Default is 2.0. 330 calibration_ref_match_method: string, optional 331 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 332 calibration_ref_match_tolerance: float, optional 333 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 334 do_calibration : bool, optional 335 If True, perform calibration. Default is True. 336 verbose_processing : bool, optional 337 If True, print verbose processing information. Default is True. 338 """ 339 340 noise_threshold_method: str = "log" 341 342 noise_threshold_methods_implemented: tuple = ( 343 "minima", 344 "signal_noise", 345 "relative_abundance", 346 "absolute_abundance", 347 "log", 348 ) 349 350 noise_threshold_min_std: int = 6 # when using 'minima' method 351 352 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 353 354 noise_threshold_min_relative_abundance: float = ( 355 6 # from 0-100, when using 'relative_abundance' method 356 ) 357 358 noise_threshold_absolute_abundance: float = ( 359 1_000_000 # when using 'absolute_abundance' method 360 ) 361 362 noise_threshold_log_nsigma: int = 6 # when using 'log' method 363 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 364 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 365 366 noise_min_mz: float = 50.0 367 noise_max_mz: float = 1200.0 368 369 min_picking_mz: float = 50.0 370 max_picking_mz: float = 1200.0 371 372 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 373 # This will fix peak picking at spectrum limit issues 374 # 0 to keep normal behaviour, typical value 3 to fix 375 picking_point_extrapolate: int = 3 376 377 calib_minimize_method: str = "Powell" 378 calib_pol_order: int = 2 379 max_calib_ppm_error: float = 1.0 380 min_calib_ppm_error: float = -1.0 381 calib_sn_threshold: float = 2.0 382 calibration_ref_match_method: str = "legacy" 383 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 384 calibration_ref_match_tolerance: float = 0.003 385 calibration_ref_match_std_raw_error_limit: float = 1.5 386 # calib_ref_mzs: list = [0] 387 388 do_calibration: bool = True 389 verbose_processing: bool = True 390 391 def __post_init__(self): 392 # enforce datatype 393 for field in dataclasses.fields(self): 394 value = getattr(self, field.name) 395 if not isinstance(value, field.type): 396 value = field.type(value) 397 setattr(self, field.name, value)
Mass spectrum processing settings class
Attributes
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
- noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
- noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
- noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
- noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
- noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
- noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
- noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
- noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
- noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
- min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
- max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
- picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
- calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
- calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
- max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
- min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
- calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
- calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
- calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
- do_calibration (bool, optional): If True, perform calibration. Default is True.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
400@dataclasses.dataclass 401class MassSpecPeakSetting: 402 """Mass spectrum peak processing settings class 403 404 Attributes 405 ---------- 406 kendrick_base : Dict, optional 407 Dictionary specifying the elements and their counts in the Kendrick base. 408 Defaults to {'C': 1, 'H': 2}. 409 kendrick_rounding_method : str, optional 410 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 411 Defaults to 'floor'. 412 implemented_kendrick_rounding_methods : tuple 413 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 414 Defaults to ('floor', 'ceil', 'round'). 415 peak_derivative_threshold : float, optional 416 Threshold for defining derivative crossing. Should be a value between 0 and 1. 417 Defaults to 0.0. 418 peak_min_prominence_percent : float, optional 419 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 420 Defaults to 0.1. 421 min_peak_datapoints : float, optional 422 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 423 Defaults to 5. 424 peak_max_prominence_percent : float, optional 425 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 426 Defaults to 0.1. 427 peak_height_max_percent : float, optional 428 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 429 Defaults to 10. 430 legacy_resolving_power : bool, optional 431 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 432 Defaults to True. 433 legacy_centroid_polyfit : bool, optional 434 Use legacy (numpy polyfit) to fit centroid 435 Default false. 436 """ 437 438 kendrick_base: Dict = dataclasses.field(default_factory=dict) 439 440 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 441 442 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 443 444 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 445 446 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 447 448 min_peak_datapoints: float = 5 # 0-inf used for peak detection 449 450 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 451 452 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 453 454 legacy_resolving_power: bool = ( 455 True # Use the legacy (CoreMS v1) resolving power calculation (True) 456 ) 457 458 legacy_centroid_polyfit: bool = False 459 460 def __post_init__(self): 461 # default to CH2 462 if not self.kendrick_base: 463 self.kendrick_base = {"C": 1, "H": 2} 464 # enforce datatype 465 for field in dataclasses.fields(self): 466 value = getattr(self, field.name) 467 if not isinstance(value, field.type): 468 value = field.type(value) 469 setattr(self, field.name, value)
Mass spectrum peak processing settings class
Attributes
- kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
- kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
- implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
- peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
- legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
472@dataclasses.dataclass 473class GasChromatographSetting: 474 """Gas chromatograph processing settings class 475 476 Attributes 477 ---------- 478 use_deconvolution : bool, optional 479 If True, use deconvolution. Default is False. 480 implemented_smooth_method : tuple, optional 481 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 482 smooth_window : int, optional 483 Window size for smoothing the ion chromatogram. Default is 5. 484 smooth_method : str, optional 485 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 486 savgol_pol_order : int, optional 487 Polynomial order for Savitzky-Golay smoothing. Default is 2. 488 peak_derivative_threshold : float, optional 489 Threshold for defining derivative crossing. Should be a value between 0 and 1. 490 Defaults to 0.0005. 491 peak_height_max_percent : float, optional 492 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 493 Defaults to 10. 494 peak_max_prominence_percent : float, optional 495 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 496 Defaults to 1. 497 min_peak_datapoints : float, optional 498 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 499 Defaults to 5. 500 max_peak_width : float, optional 501 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 502 Defaults to 0.1. 503 noise_threshold_method : str, optional 504 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 505 noise_threshold_methods_implemented : tuple, optional 506 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 507 std_noise_threshold : int, optional 508 Default is 3. 509 peak_height_min_percent : float, optional 510 0-100 % used for peak detection. Default is 0.1. 511 peak_min_prominence_percent : float, optional 512 0-100 % used for peak detection. Default is 0.1. 513 eic_signal_threshold : float, optional 514 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 515 max_rt_distance : float, optional 516 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 517 verbose_processing : bool, optional 518 If True, print verbose processing information. Default is True. 519 """ 520 521 use_deconvolution: bool = False 522 523 implemented_smooth_method: tuple = ( 524 "savgol", 525 "hanning", 526 "blackman", 527 "bartlett", 528 "flat", 529 "boxcar", 530 ) 531 532 smooth_window: int = 5 533 534 smooth_method: str = "savgol" 535 536 savgol_pol_order: int = 2 537 538 peak_derivative_threshold: float = 0.0005 539 540 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 541 542 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 543 544 min_peak_datapoints: float = 5 545 546 max_peak_width: float = 0.1 547 548 noise_threshold_method: str = "manual_relative_abundance" 549 550 noise_threshold_methods_implemented: tuple = ( 551 "auto_relative_abundance", 552 "manual_relative_abundance", 553 "second_derivative", 554 ) 555 556 std_noise_threshold: int = 3 557 558 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 559 560 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 561 562 eic_signal_threshold: float = ( 563 0.01 # 0-100 % used for extracted ion chromatogram peak detection 564 ) 565 566 max_rt_distance: float = ( 567 0.025 # minutes, max distance allowance hierarchical clutter 568 ) 569 570 verbose_processing: bool = True 571 572 def __post_init__(self): 573 # enforce datatype 574 for field in dataclasses.fields(self): 575 value = getattr(self, field.name) 576 if not isinstance(value, field.type): 577 value = field.type(value) 578 setattr(self, field.name, value)
Gas chromatograph processing settings class
Attributes
- use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- std_noise_threshold (int, optional): Default is 3.
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
581@dataclasses.dataclass 582class CompoundSearchSettings: 583 """Settings for compound search 584 585 Attributes 586 ---------- 587 url_database : str, optional 588 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 589 ri_search_range : float, optional 590 Retention index search range. Default is 35. 591 rt_search_range : float, optional 592 Retention time search range, in minutes. Default is 1.0. 593 correlation_threshold : float, optional 594 Threshold for correlation for spectral similarity. Default is 0.5. 595 score_threshold : float, optional 596 Threshold for compsite score. Default is 0.0. 597 ri_spacing : float, optional 598 Retention index spacing. Default is 200. 599 ri_std : float, optional 600 Retention index standard deviation. Default is 3. 601 ri_calibration_compound_names : list, optional 602 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 603 604 """ 605 606 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 607 608 ri_search_range: float = 35 609 610 rt_search_range: float = 1.0 # used for retention index calibration 611 612 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 613 614 score_threshold: float = 0.0 615 616 ri_spacing: float = 200 617 618 ri_std: float = 3 # in standard deviation 619 620 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 621 622 # calculates and export all spectral similarity methods 623 exploratory_mode: bool = False 624 625 score_methods: tuple = ("highest_sim_score", "highest_ss") 626 627 output_score_method: str = "All" 628 629 def __post_init__(self): 630 # enforce datatype 631 self.url_database = os.getenv( 632 "SPECTRAL_GCMS_DATABASE_URL", 633 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 634 ) 635 636 for field in dataclasses.fields(self): 637 value = getattr(self, field.name) 638 if not isinstance(value, field.type): 639 value = field.type(value) 640 setattr(self, field.name, value) 641 642 self.ri_calibration_compound_names = [ 643 "Methyl Caprylate", 644 "Methyl Caprate", 645 "Methyl Pelargonate", 646 "Methyl Laurate", 647 "Methyl Myristate", 648 "Methyl Palmitate", 649 "Methyl Stearate", 650 "Methyl Eicosanoate", 651 "Methyl Docosanoate", 652 "Methyl Linocerate", 653 "Methyl Hexacosanoate", 654 "Methyl Octacosanoate", 655 "Methyl Triacontanoate", 656 ]
Settings for compound search
Attributes
- url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
- ri_search_range (float, optional): Retention index search range. Default is 35.
- rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
- correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
- score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
- ri_spacing (float, optional): Retention index spacing. Default is 200.
- ri_std (float, optional): Retention index standard deviation. Default is 3.
- ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
659class MolecularLookupDictSettings: 660 """Settings for molecular searching 661 662 These are used to generate the database entries, do not change. 663 664 Attributes 665 ---------- 666 usedAtoms : dict, optional 667 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 668 min_mz : float, optional 669 Minimum m/z to use for searching. Default is 50.0. 670 max_mz : float, optional 671 Maximum m/z to use for searching. Default is 1200.0. 672 min_dbe : float, optional 673 Minimum double bond equivalent to use for searching. Default is 0. 674 max_dbe : float, optional 675 Maximum double bond equivalent to use for searching. Default is 50. 676 use_pah_line_rule : bool, optional 677 If True, use the PAH line rule. Default is False. 678 isRadical : bool, optional 679 If True, search for radical ions. Default is True. 680 isProtonated : bool, optional 681 If True, search for protonated ions. Default is True. 682 url_database : str, optional 683 URL for the database. Default is None. 684 db_jobs : int, optional 685 Number of jobs to use for database queries. Default is 1. 686 used_atom_valences : dict, optional 687 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 688 689 """ 690 691 ### DO NOT CHANGE IT! These are used to generate the database entries 692 693 ### DO change when creating a new application database 694 695 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 696 697 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 698 ### if you don't want to include one of those atoms set the max and min at 0 699 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 700 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 701 ### NOTE : Adducts atoms have zero covalence 702 ### NOTE : Not using static variable because this class is distributed using multiprocessing 703 def __init__(self): 704 self.usedAtoms = { 705 "C": (1, 90), 706 "H": (4, 200), 707 "O": (0, 12), 708 "N": (0, 0), 709 "S": (0, 0), 710 "P": (0, 0), 711 "Cl": (0, 0), 712 } 713 714 self.min_mz = 50 715 716 self.max_mz = 1200 717 718 self.min_dbe = 0 719 720 self.max_dbe = 50 721 722 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 723 self.use_pah_line_rule = False 724 725 self.isRadical = True 726 727 self.isProtonated = True 728 729 self.url_database = None 730 731 self.db_jobs = 1 732 733 self.used_atom_valences = { 734 "C": 4, 735 "13C": 4, 736 "H": 1, 737 "O": 2, 738 "18O": 2, 739 "N": 3, 740 "S": 2, 741 "34S": 2, 742 "P": 3, 743 "Cl": 1, 744 "37Cl": 1, 745 "Br": 1, 746 "Na": 1, 747 "F": 1, 748 "K": 0, 749 }
Settings for molecular searching
These are used to generate the database entries, do not change.
Attributes
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
- max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- isRadical (bool, optional): If True, search for radical ions. Default is True.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- url_database (str, optional): URL for the database. Default is None.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
752@dataclasses.dataclass 753class MolecularFormulaSearchSettings: 754 """Settings for molecular searching 755 756 Attributes 757 ---------- 758 use_isotopologue_filter : bool, optional 759 If True, use isotopologue filter. Default is False. 760 isotopologue_filter_threshold : float, optional 761 Threshold for isotopologue filter. Default is 33. 762 isotopologue_filter_atoms : tuple, optional 763 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 764 use_runtime_kendrick_filter : bool, optional 765 If True, use runtime Kendrick filter. Default is False. 766 use_min_peaks_filter : bool, optional 767 If True, use minimum peaks filter. Default is True. 768 min_peaks_per_class : int, optional 769 Minimum number of peaks per class. Default is 15. 770 url_database : str, optional 771 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 772 db_jobs : int, optional 773 Number of jobs to use for database queries. Default is 3. 774 db_chunk_size : int, optional 775 Chunk size to use for database queries. Default is 300. 776 ion_charge : int, optional 777 Ion charge. Default is -1. 778 min_hc_filter : float, optional 779 Minimum hydrogen to carbon ratio. Default is 0.3. 780 max_hc_filter : float, optional 781 Maximum hydrogen to carbon ratio. Default is 3. 782 min_oc_filter : float, optional 783 Minimum oxygen to carbon ratio. Default is 0.0. 784 max_oc_filter : float, optional 785 Maximum oxygen to carbon ratio. Default is 1.2. 786 min_op_filter : float, optional 787 Minimum oxygen to phosphorous ratio. Default is 2. 788 use_pah_line_rule : bool, optional 789 If True, use the PAH line rule. Default is False. 790 min_dbe : float, optional 791 Minimum double bond equivalent to use for searching. Default is 0. 792 max_dbe : float, optional 793 Maximum double bond equivalent to use for searching. Default is 40. 794 mz_error_score_weight : float, optional 795 Weight for m/z error score to contribute to composite score. Default is 0.6. 796 isotopologue_score_weight : float, optional 797 Weight for isotopologue score to contribute to composite score. Default is 0.4. 798 adduct_atoms_neg : tuple, optional 799 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 800 adduct_atoms_pos : tuple, optional 801 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 802 score_methods : tuple, optional 803 Tuple of score method that can be implemented. 804 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 805 score_method : str, optional 806 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 807 output_min_score : float, optional 808 Minimum score for output. Default is 0.1. 809 output_score_method : str, optional 810 Score method to use for output. Default is 'All Candidates'. 811 isRadical : bool, optional 812 If True, search for radical ions. Default is False. 813 isProtonated : bool, optional 814 If True, search for protonated ions. Default is True. 815 isAdduct : bool, optional 816 If True, search for adduct ions. Default is False. 817 usedAtoms : dict, optional 818 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 819 ion_types_excluded : list, optional 820 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 821 ionization_type : str, optional 822 Ionization type. Default is 'ESI'. 823 min_ppm_error : float, optional 824 Minimum ppm error. Default is -10.0. 825 max_ppm_error : float, optional 826 Maximum ppm error. Default is 10.0. 827 min_abun_error : float, optional 828 Minimum abundance error for isotolopologue search. Default is -100.0. 829 max_abun_error : float, optional 830 Maximum abundance error for isotolopologue search. Default is 100.0. 831 mz_error_range : float, optional 832 m/z error range. Default is 1.5. 833 error_method : str, optional 834 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 835 mz_error_average : float, optional 836 m/z error average. Default is 0.0. 837 used_atom_valences : dict, optional 838 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 839 verbose_processing: bool, optional 840 If True, print verbose processing information. Default is True. 841 """ 842 843 verbose_processing: bool = True 844 845 use_isotopologue_filter: bool = False 846 847 isotopologue_filter_threshold: float = 33 848 849 isotopologue_filter_atoms: tuple = ("Cl", "Br") 850 851 use_runtime_kendrick_filter: bool = False 852 853 use_min_peaks_filter: bool = True 854 855 min_peaks_per_class: int = 15 856 857 url_database: str = ( 858 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 859 ) 860 861 db_jobs: int = 3 862 863 db_chunk_size: int = 300 864 865 # query setting======== 866 ion_charge: int = -1 867 868 min_hc_filter: float = 0.3 869 870 max_hc_filter: float = 3 871 872 min_oc_filter: float = 0.0 873 874 max_oc_filter: float = 1.2 875 876 min_op_filter: float = 2 877 878 use_pah_line_rule: bool = False 879 880 min_dbe: float = 0 881 882 max_dbe: float = 40 883 884 mz_error_score_weight: float = 0.6 885 886 isotopologue_score_weight: float = 0.4 887 888 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 889 adduct_atoms_neg: tuple = ("Cl", "Br") 890 891 adduct_atoms_pos: tuple = ("Na", "K") 892 893 score_methods: tuple = ( 894 "S_P_lowest_error", 895 "N_S_P_lowest_error", 896 "lowest_error", 897 "prob_score", 898 "air_filter_error", 899 "water_filter_error", 900 "earth_filter_error", 901 ) 902 903 score_method: str = "prob_score" 904 905 output_min_score: float = 0.1 906 907 output_score_method: str = "All Candidates" 908 909 # depending on the polarity mode it looks for [M].+ , [M].- 910 # query and automatically compile add entry if it doesn't exist 911 912 isRadical: bool = False 913 914 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 915 # query and automatically compile and push options if it doesn't exist 916 isProtonated: bool = True 917 918 isAdduct: bool = False 919 920 usedAtoms: dict = dataclasses.field(default_factory=dict) 921 ion_types_excluded: list = dataclasses.field(default_factory=list) 922 923 # search setting ======== 924 925 ionization_type: str = "ESI" 926 927 # empirically set / needs optimization 928 min_ppm_error: float = -10.0 # ppm 929 930 # empirically set / needs optimization 931 max_ppm_error: float = 10.0 # ppm 932 933 # empirically set / needs optimization set for isotopologue search 934 min_abun_error: float = -100.0 # percentage 935 936 # empirically set / needs optimization set for isotopologue search 937 max_abun_error: float = 100.0 # percentage 938 939 # empirically set / needs optimization 940 mz_error_range: float = 1.5 941 942 # 'distance', 'lowest', 'symmetrical','average' 'None' 943 error_method: str = "None" 944 945 mz_error_average: float = 0.0 946 947 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 948 used_atom_valences: dict = dataclasses.field(default_factory=dict) 949 950 def __post_init__(self): 951 if not self.url_database or self.url_database == "": 952 self.url_database = os.getenv( 953 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 954 ) 955 # enforce datatype 956 for field in dataclasses.fields(self): 957 value = getattr(self, field.name) 958 if not isinstance(value, field.type): 959 value = field.type(value) 960 setattr(self, field.name, value) 961 962 # enforce C and H if either do not exists 963 if "C" not in self.usedAtoms.keys(): 964 self.usedAtoms["C"] = (1, 100) 965 if "H" not in self.usedAtoms.keys(): 966 self.usedAtoms["H"] = (1, 200) 967 968 # add cummon values 969 current_used_atoms = self.used_atom_valences.keys() 970 971 for atom in Atoms.atoms_covalence.keys(): 972 if atom not in current_used_atoms: 973 covalence = Atoms.atoms_covalence.get(atom) 974 975 if isinstance(covalence, int): 976 self.used_atom_valences[atom] = covalence 977 978 else: 979 # will get the first number of all possible covalances, which should be the most commum 980 self.used_atom_valences[atom] = covalence[0]
Settings for molecular searching
Attributes
- use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
- isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
- isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
- use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
- use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
- min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
- url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
- db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
- ion_charge (int, optional): Ion charge. Default is -1.
- min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
- max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
- min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
- max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
- min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
- mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
- isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
- adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
- adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
- score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
- score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
- output_min_score (float, optional): Minimum score for output. Default is 0.1.
- output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
- isRadical (bool, optional): If True, search for radical ions. Default is False.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- isAdduct (bool, optional): If True, search for adduct ions. Default is False.
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
- ionization_type (str, optional): Ionization type. Default is 'ESI'.
- min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
- max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
- min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
- max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
- mz_error_range (float, optional): m/z error range. Default is 1.5.
- error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
- mz_error_average (float, optional): m/z error average. Default is 0.0.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.