corems.encapsulation.factory.processingSetting
1__author__ = "Yuri E. Corilo" 2__date__ = "Jul 02, 2019" 3 4import dataclasses 5import os 6from typing import List, Dict 7 8from corems.encapsulation.constant import Atoms, Labels 9 10 11@dataclasses.dataclass 12class TransientSetting: 13 """Transient processing settings class 14 15 Attributes 16 ---------- 17 implemented_apodization_function : tuple 18 Available apodization functions 19 apodization_method : str 20 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 21 number_of_truncations : int 22 How many times to truncate the transient prior to Fourier transform 23 number_of_zero_fills : int 24 How many times to zero fille the transient prior to Fourier transform. 25 next_power_of_two : bool 26 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 27 kaiser_beta : float 28 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 29 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 30 31 """ 32 33 implemented_apodization_function: tuple = ( 34 "Hamming", 35 "Hanning", 36 "Blackman", 37 "Full-Sine", 38 "Half-Sine", 39 "Kaiser", 40 "Half-Kaiser", 41 ) 42 apodization_method: str = "Hanning" 43 number_of_truncations: int = 0 44 number_of_zero_fills: int = 1 45 next_power_of_two: bool = False 46 kaiser_beta: float = 8.6 47 48 def __post_init__(self): 49 # enforce datatype 50 for field in dataclasses.fields(self): 51 value = getattr(self, field.name) 52 if not isinstance(value, field.type): 53 value = field.type(value) 54 setattr(self, field.name, value) 55 56 57@dataclasses.dataclass 58class DataInputSetting: 59 """Data input settings class 60 61 Attributes 62 ---------- 63 header_translate : dict 64 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 65 """ 66 67 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 68 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 69 header_translate: dict = dataclasses.field(default_factory=dict) 70 71 def __post_init__(self): 72 self.header_translate = { 73 "m/z": Labels.mz, 74 "mOz": Labels.mz, 75 "Mass": Labels.mz, 76 "Resolving Power": Labels.rp, 77 "Res.": Labels.rp, 78 "resolution": Labels.rp, 79 "Intensity": Labels.abundance, 80 "Peak Height": Labels.abundance, 81 "I": Labels.abundance, 82 "Abundance": Labels.abundance, 83 "abs_abu": Labels.abundance, 84 "Signal/Noise": Labels.s2n, 85 "S/N": Labels.s2n, 86 "sn": Labels.s2n, 87 } 88 89 def add_mz_label(self, label): 90 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 91 self.header_translate[label] = Labels.mz 92 93 def add_peak_height_label(self, label): 94 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 95 96 self.header_translate[label] = Labels.abundance 97 98 def add_sn_label(self, label): 99 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 100 self.header_translate[label] = Labels.s2n 101 102 def add_resolving_power_label(self, label): 103 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 104 self.header_translate[label] = Labels.rp 105 106 107@dataclasses.dataclass 108class LiquidChromatographSetting: 109 """Liquid chromatograph processing settings class 110 111 Attributes 112 ---------- 113 scans : list or tuple, optional 114 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 115 eic_tolerance_ppm : float, optional 116 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 117 correct_eic_baseline : bool, optional 118 If True, correct the baseline of the extracted ion chromatogram. Default is True. 119 smooth_window : int, optional 120 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 121 smooth_method : str, optional 122 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 123 implemented_smooth_method : tuple, optional 124 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 125 savgol_pol_order : int, optional 126 Polynomial order for Savitzky-Golay smoothing. Default is 2. 127 peak_height_max_percent : float, optional 128 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 129 peak_max_prominence_percent : float, optional 130 1-100 % used for baseline detection. Default is 1. 131 peak_derivative_threshold : float, optional 132 Threshold for defining derivative crossing. Default is 0.0005. 133 min_peak_datapoints : float, optional 134 minimum data point to define a chromatografic peak. Default is 5. 135 noise_threshold_method : str, optional 136 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 137 noise_threshold_methods_implemented : tuple, optional 138 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 139 peak_height_min_percent : float, optional 140 0-100 % used for peak detection. Default is 0.1. 141 eic_signal_threshold : float, optional 142 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 143 eic_buffer_time : float, optional 144 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 145 ph_smooth_it : int, optional 146 Number of iterations to use for smoothing prior to finding mass features. 147 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 148 ph_smooth_radius_mz : int, optional 149 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 150 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 151 ph_smooth_radius_scan : int, optional 152 Radius in scan steps for smoothing prior to finding mass features. 153 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 154 ph_inten_min_rel : int, optional 155 Relative minimum intensity to use for finding mass features. 156 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 157 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 158 ph_persis_min_rel : int, optional 159 Relative minimum persistence for retaining mass features. 160 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 161 Should be greater to or equal to ph_inten_min_rel. 162 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 163 mass_feature_cluster_mz_tolerance_rel : float, optional 164 Relative m/z tolerance to use for clustering mass features. 165 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 166 Default is 5E-6 (5 ppm). 167 mass_feature_cluster_rt_tolerance : float, optional 168 Retention time tolerance to use for clustering mass features, in minutes. 169 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 170 Default is 0.2. 171 ms1_scans_to_average : int, optional 172 Number of MS1 scans to average for mass-feature associated m/zs. 173 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 174 ms1_deconvolution_corr_min : float, optional 175 Minimum correlation to use for deconvoluting MS1 mass features. 176 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 177 Default is 0.8. 178 ms2_dda_rt_tolerance : float, optional 179 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 180 ms2_dda_mz_tolerance : float, optional 181 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 182 ms2_min_fe_score : float, optional 183 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 184 search_as_lipids : bool, optional 185 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 186 include_fragment_types : bool, optional 187 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 188 verbose_processing : bool, optional 189 If True, print verbose processing information. Default is True. 190 """ 191 192 scans: list | tuple = (-1, -1) 193 194 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 195 eic_tolerance_ppm: float = 5 196 correct_eic_baseline = True 197 smooth_window: int = 5 198 smooth_method: str = "savgol" 199 implemented_smooth_method: tuple = ( 200 "savgol", 201 "hanning", 202 "blackman", 203 "bartlett", 204 "flat", 205 "boxcar", 206 ) 207 savgol_pol_order: int = 2 208 peak_height_max_percent: float = 10 209 peak_max_prominence_percent: float = 1 210 peak_derivative_threshold: float = 0.0005 211 min_peak_datapoints: float = 5 212 noise_threshold_method: str = "manual_relative_abundance" 213 noise_threshold_methods_implemented: tuple = ( 214 "auto_relative_abundance", 215 "manual_relative_abundance", 216 "second_derivative", 217 ) 218 peak_height_min_percent: float = 0.1 219 eic_signal_threshold: float = 0.01 220 eic_buffer_time = 1.5 221 222 # Parameters used for 2D peak picking 223 peak_picking_method: str = "persistent homology" 224 implemented_peak_picking_methods: tuple = ("persistent homology",) 225 226 # Parameters used in persistent homology calculations 227 ph_smooth_it = 1 228 ph_smooth_radius_mz = 0 229 ph_smooth_radius_scan = 1 230 ph_inten_min_rel = 0.001 231 ph_persis_min_rel = 0.001 232 233 # Parameters used to cluster mass features 234 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 235 mass_feature_cluster_rt_tolerance: float = 0.3 236 237 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 238 ms1_scans_to_average: int = 1 239 ms1_deconvolution_corr_min: float = 0.8 240 ms2_dda_rt_tolerance: float = 0.15 241 ms2_dda_mz_tolerance: float = 0.05 242 243 # Parameters used for flash entropy searching and database preparation 244 ms2_min_fe_score: float = 0.2 245 search_as_lipids: bool = False 246 include_fragment_types: bool = False 247 248 # Parameters used for saving the data 249 export_profile_spectra: bool = False 250 export_eics: bool = True 251 export_unprocessed_ms1: bool = False 252 253 # Parameters used for verbose processing 254 verbose_processing: bool = True 255 256 def __post_init__(self): 257 # enforce datatype 258 for field in dataclasses.fields(self): 259 value = getattr(self, field.name) 260 if not isinstance(value, field.type): 261 value = field.type(value) 262 setattr(self, field.name, value) 263 264 265@dataclasses.dataclass 266class MassSpectrumSetting: 267 """Mass spectrum processing settings class 268 269 Attributes 270 ---------- 271 noise_threshold_method : str, optional 272 Method for detecting noise threshold. Default is 'log'. 273 noise_threshold_methods_implemented : tuple, optional 274 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 275 noise_threshold_min_std : int, optional 276 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 277 noise_threshold_min_s2n : float, optional 278 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 279 noise_threshold_min_relative_abundance : float, optional 280 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 281 noise_threshold_absolute_abundance : float, optional 282 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 283 noise_threshold_log_nsigma : int, optional 284 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 285 noise_threshold_log_nsigma_corr_factor : float, optional 286 Correction factor for log noise threshold method. Default is 0.463. 287 noise_threshold_log_nsigma_bins : int, optional 288 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 289 noise_min_mz : float, optional 290 Minimum m/z to use for noise thresholding. Default is 50.0. 291 noise_max_mz : float, optional 292 Maximum m/z to use for noise thresholding. Default is 1200.0. 293 min_picking_mz : float, optional 294 Minimum m/z to use for peak picking. Default is 50.0. 295 max_picking_mz : float, optional 296 Maximum m/z to use for peak picking. Default is 1200.0. 297 picking_point_extrapolate : int, optional 298 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 299 Recommend 3 for reduced profile data or if peak picking faults 300 calib_minimize_method : str, optional 301 Minimization method to use for calibration. Default is 'Powell'. 302 calib_pol_order : int, optional 303 Polynomial order to use for calibration. Default is 2. 304 max_calib_ppm_error : float, optional 305 Maximum ppm error to use for calibration. Default is 1.0. 306 min_calib_ppm_error : float, optional 307 Minimum ppm error to use for calibration. Default is -1.0. 308 calib_sn_threshold : float, optional 309 Signal to noise threshold to use for calibration. Default is 2.0. 310 calibration_ref_match_method: string, optional 311 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 312 calibration_ref_match_tolerance: float, optional 313 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 314 do_calibration : bool, optional 315 If True, perform calibration. Default is True. 316 verbose_processing : bool, optional 317 If True, print verbose processing information. Default is True. 318 """ 319 320 noise_threshold_method: str = "log" 321 322 noise_threshold_methods_implemented: tuple = ( 323 "minima", 324 "signal_noise", 325 "relative_abundance", 326 "absolute_abundance", 327 "log", 328 ) 329 330 noise_threshold_min_std: int = 6 # when using 'minima' method 331 332 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 333 334 noise_threshold_min_relative_abundance: float = ( 335 6 # from 0-100, when using 'relative_abundance' method 336 ) 337 338 noise_threshold_absolute_abundance: float = ( 339 1_000_000 # when using 'absolute_abundance' method 340 ) 341 342 noise_threshold_log_nsigma: int = 6 # when using 'log' method 343 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 344 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 345 346 noise_min_mz: float = 50.0 347 noise_max_mz: float = 1200.0 348 349 min_picking_mz: float = 50.0 350 max_picking_mz: float = 1200.0 351 352 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 353 # This will fix peak picking at spectrum limit issues 354 # 0 to keep normal behaviour, typical value 3 to fix 355 picking_point_extrapolate: int = 3 356 357 calib_minimize_method: str = "Powell" 358 calib_pol_order: int = 2 359 max_calib_ppm_error: float = 1.0 360 min_calib_ppm_error: float = -1.0 361 calib_sn_threshold: float = 2.0 362 calibration_ref_match_method: str = "legacy" 363 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 364 calibration_ref_match_tolerance: float = 0.003 365 calibration_ref_match_std_raw_error_limit: float = 1.5 366 # calib_ref_mzs: list = [0] 367 368 do_calibration: bool = True 369 verbose_processing: bool = True 370 371 def __post_init__(self): 372 # enforce datatype 373 for field in dataclasses.fields(self): 374 value = getattr(self, field.name) 375 if not isinstance(value, field.type): 376 value = field.type(value) 377 setattr(self, field.name, value) 378 379 380@dataclasses.dataclass 381class MassSpecPeakSetting: 382 """Mass spectrum peak processing settings class 383 384 Attributes 385 ---------- 386 kendrick_base : Dict, optional 387 Dictionary specifying the elements and their counts in the Kendrick base. 388 Defaults to {'C': 1, 'H': 2}. 389 kendrick_rounding_method : str, optional 390 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 391 Defaults to 'floor'. 392 implemented_kendrick_rounding_methods : tuple 393 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 394 Defaults to ('floor', 'ceil', 'round'). 395 peak_derivative_threshold : float, optional 396 Threshold for defining derivative crossing. Should be a value between 0 and 1. 397 Defaults to 0.0. 398 peak_min_prominence_percent : float, optional 399 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 400 Defaults to 0.1. 401 min_peak_datapoints : float, optional 402 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 403 Defaults to 5. 404 peak_max_prominence_percent : float, optional 405 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 406 Defaults to 0.1. 407 peak_height_max_percent : float, optional 408 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 409 Defaults to 10. 410 legacy_resolving_power : bool, optional 411 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 412 Defaults to True. 413 legacy_centroid_polyfit : bool, optional 414 Use legacy (numpy polyfit) to fit centroid 415 Default false. 416 """ 417 418 kendrick_base: Dict = dataclasses.field(default_factory=dict) 419 420 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 421 422 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 423 424 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 425 426 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 427 428 min_peak_datapoints: float = 5 # 0-inf used for peak detection 429 430 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 431 432 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 433 434 legacy_resolving_power: bool = ( 435 True # Use the legacy (CoreMS v1) resolving power calculation (True) 436 ) 437 438 legacy_centroid_polyfit: bool = False 439 440 def __post_init__(self): 441 # default to CH2 442 if not self.kendrick_base: 443 self.kendrick_base = {"C": 1, "H": 2} 444 # enforce datatype 445 for field in dataclasses.fields(self): 446 value = getattr(self, field.name) 447 if not isinstance(value, field.type): 448 value = field.type(value) 449 setattr(self, field.name, value) 450 451 452@dataclasses.dataclass 453class GasChromatographSetting: 454 """Gas chromatograph processing settings class 455 456 Attributes 457 ---------- 458 use_deconvolution : bool, optional 459 If True, use deconvolution. Default is False. 460 implemented_smooth_method : tuple, optional 461 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 462 smooth_window : int, optional 463 Window size for smoothing the ion chromatogram. Default is 5. 464 smooth_method : str, optional 465 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 466 savgol_pol_order : int, optional 467 Polynomial order for Savitzky-Golay smoothing. Default is 2. 468 peak_derivative_threshold : float, optional 469 Threshold for defining derivative crossing. Should be a value between 0 and 1. 470 Defaults to 0.0005. 471 peak_height_max_percent : float, optional 472 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 473 Defaults to 10. 474 peak_max_prominence_percent : float, optional 475 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 476 Defaults to 1. 477 min_peak_datapoints : float, optional 478 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 479 Defaults to 5. 480 max_peak_width : float, optional 481 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 482 Defaults to 0.1. 483 noise_threshold_method : str, optional 484 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 485 noise_threshold_methods_implemented : tuple, optional 486 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 487 std_noise_threshold : int, optional 488 Default is 3. 489 peak_height_min_percent : float, optional 490 0-100 % used for peak detection. Default is 0.1. 491 peak_min_prominence_percent : float, optional 492 0-100 % used for peak detection. Default is 0.1. 493 eic_signal_threshold : float, optional 494 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 495 max_rt_distance : float, optional 496 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 497 verbose_processing : bool, optional 498 If True, print verbose processing information. Default is True. 499 """ 500 501 use_deconvolution: bool = False 502 503 implemented_smooth_method: tuple = ( 504 "savgol", 505 "hanning", 506 "blackman", 507 "bartlett", 508 "flat", 509 "boxcar", 510 ) 511 512 smooth_window: int = 5 513 514 smooth_method: str = "savgol" 515 516 savgol_pol_order: int = 2 517 518 peak_derivative_threshold: float = 0.0005 519 520 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 521 522 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 523 524 min_peak_datapoints: float = 5 525 526 max_peak_width: float = 0.1 527 528 noise_threshold_method: str = "manual_relative_abundance" 529 530 noise_threshold_methods_implemented: tuple = ( 531 "auto_relative_abundance", 532 "manual_relative_abundance", 533 "second_derivative", 534 ) 535 536 std_noise_threshold: int = 3 537 538 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 539 540 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 541 542 eic_signal_threshold: float = ( 543 0.01 # 0-100 % used for extracted ion chromatogram peak detection 544 ) 545 546 max_rt_distance: float = ( 547 0.025 # minutes, max distance allowance hierarchical clutter 548 ) 549 550 verbose_processing: bool = True 551 552 def __post_init__(self): 553 # enforce datatype 554 for field in dataclasses.fields(self): 555 value = getattr(self, field.name) 556 if not isinstance(value, field.type): 557 value = field.type(value) 558 setattr(self, field.name, value) 559 560 561@dataclasses.dataclass 562class CompoundSearchSettings: 563 """Settings for compound search 564 565 Attributes 566 ---------- 567 url_database : str, optional 568 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 569 ri_search_range : float, optional 570 Retention index search range. Default is 35. 571 rt_search_range : float, optional 572 Retention time search range, in minutes. Default is 1.0. 573 correlation_threshold : float, optional 574 Threshold for correlation for spectral similarity. Default is 0.5. 575 score_threshold : float, optional 576 Threshold for compsite score. Default is 0.0. 577 ri_spacing : float, optional 578 Retention index spacing. Default is 200. 579 ri_std : float, optional 580 Retention index standard deviation. Default is 3. 581 ri_calibration_compound_names : list, optional 582 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 583 584 """ 585 586 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 587 588 ri_search_range: float = 35 589 590 rt_search_range: float = 1.0 # used for retention index calibration 591 592 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 593 594 score_threshold: float = 0.0 595 596 ri_spacing: float = 200 597 598 ri_std: float = 3 # in standard deviation 599 600 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 601 602 # calculates and export all spectral similarity methods 603 exploratory_mode: bool = False 604 605 score_methods: tuple = ("highest_sim_score", "highest_ss") 606 607 output_score_method: str = "All" 608 609 def __post_init__(self): 610 # enforce datatype 611 self.url_database = os.getenv( 612 "SPECTRAL_GCMS_DATABASE_URL", 613 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 614 ) 615 616 for field in dataclasses.fields(self): 617 value = getattr(self, field.name) 618 if not isinstance(value, field.type): 619 value = field.type(value) 620 setattr(self, field.name, value) 621 622 self.ri_calibration_compound_names = [ 623 "Methyl Caprylate", 624 "Methyl Caprate", 625 "Methyl Pelargonate", 626 "Methyl Laurate", 627 "Methyl Myristate", 628 "Methyl Palmitate", 629 "Methyl Stearate", 630 "Methyl Eicosanoate", 631 "Methyl Docosanoate", 632 "Methyl Linocerate", 633 "Methyl Hexacosanoate", 634 "Methyl Octacosanoate", 635 "Methyl Triacontanoate", 636 ] 637 638 639class MolecularLookupDictSettings: 640 """Settings for molecular searching 641 642 These are used to generate the database entries, do not change. 643 644 Attributes 645 ---------- 646 usedAtoms : dict, optional 647 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 648 min_mz : float, optional 649 Minimum m/z to use for searching. Default is 50.0. 650 max_mz : float, optional 651 Maximum m/z to use for searching. Default is 1200.0. 652 min_dbe : float, optional 653 Minimum double bond equivalent to use for searching. Default is 0. 654 max_dbe : float, optional 655 Maximum double bond equivalent to use for searching. Default is 50. 656 use_pah_line_rule : bool, optional 657 If True, use the PAH line rule. Default is False. 658 isRadical : bool, optional 659 If True, search for radical ions. Default is True. 660 isProtonated : bool, optional 661 If True, search for protonated ions. Default is True. 662 url_database : str, optional 663 URL for the database. Default is None. 664 db_jobs : int, optional 665 Number of jobs to use for database queries. Default is 1. 666 used_atom_valences : dict, optional 667 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 668 669 """ 670 671 ### DO NOT CHANGE IT! These are used to generate the database entries 672 673 ### DO change when creating a new application database 674 675 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 676 677 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 678 ### if you don't want to include one of those atoms set the max and min at 0 679 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 680 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 681 ### NOTE : Adducts atoms have zero covalence 682 ### NOTE : Not using static variable because this class is distributed using multiprocessing 683 def __init__(self): 684 self.usedAtoms = { 685 "C": (1, 90), 686 "H": (4, 200), 687 "O": (0, 12), 688 "N": (0, 0), 689 "S": (0, 0), 690 "P": (0, 0), 691 "Cl": (0, 0), 692 } 693 694 self.min_mz = 50 695 696 self.max_mz = 1200 697 698 self.min_dbe = 0 699 700 self.max_dbe = 50 701 702 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 703 self.use_pah_line_rule = False 704 705 self.isRadical = True 706 707 self.isProtonated = True 708 709 self.url_database = None 710 711 self.db_jobs = 1 712 713 self.used_atom_valences = { 714 "C": 4, 715 "13C": 4, 716 "H": 1, 717 "O": 2, 718 "18O": 2, 719 "N": 3, 720 "S": 2, 721 "34S": 2, 722 "P": 3, 723 "Cl": 1, 724 "37Cl": 1, 725 "Br": 1, 726 "Na": 1, 727 "F": 1, 728 "K": 0, 729 } 730 731 732@dataclasses.dataclass 733class MolecularFormulaSearchSettings: 734 """Settings for molecular searching 735 736 Attributes 737 ---------- 738 use_isotopologue_filter : bool, optional 739 If True, use isotopologue filter. Default is False. 740 isotopologue_filter_threshold : float, optional 741 Threshold for isotopologue filter. Default is 33. 742 isotopologue_filter_atoms : tuple, optional 743 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 744 use_runtime_kendrick_filter : bool, optional 745 If True, use runtime Kendrick filter. Default is False. 746 use_min_peaks_filter : bool, optional 747 If True, use minimum peaks filter. Default is True. 748 min_peaks_per_class : int, optional 749 Minimum number of peaks per class. Default is 15. 750 url_database : str, optional 751 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 752 db_jobs : int, optional 753 Number of jobs to use for database queries. Default is 3. 754 db_chunk_size : int, optional 755 Chunk size to use for database queries. Default is 300. 756 ion_charge : int, optional 757 Ion charge. Default is -1. 758 min_hc_filter : float, optional 759 Minimum hydrogen to carbon ratio. Default is 0.3. 760 max_hc_filter : float, optional 761 Maximum hydrogen to carbon ratio. Default is 3. 762 min_oc_filter : float, optional 763 Minimum oxygen to carbon ratio. Default is 0.0. 764 max_oc_filter : float, optional 765 Maximum oxygen to carbon ratio. Default is 1.2. 766 min_op_filter : float, optional 767 Minimum oxygen to phosphorous ratio. Default is 2. 768 use_pah_line_rule : bool, optional 769 If True, use the PAH line rule. Default is False. 770 min_dbe : float, optional 771 Minimum double bond equivalent to use for searching. Default is 0. 772 max_dbe : float, optional 773 Maximum double bond equivalent to use for searching. Default is 40. 774 mz_error_score_weight : float, optional 775 Weight for m/z error score to contribute to composite score. Default is 0.6. 776 isotopologue_score_weight : float, optional 777 Weight for isotopologue score to contribute to composite score. Default is 0.4. 778 adduct_atoms_neg : tuple, optional 779 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 780 adduct_atoms_pos : tuple, optional 781 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 782 score_methods : tuple, optional 783 Tuple of score method that can be implemented. 784 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 785 score_method : str, optional 786 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 787 output_min_score : float, optional 788 Minimum score for output. Default is 0.1. 789 output_score_method : str, optional 790 Score method to use for output. Default is 'All Candidates'. 791 isRadical : bool, optional 792 If True, search for radical ions. Default is False. 793 isProtonated : bool, optional 794 If True, search for protonated ions. Default is True. 795 isAdduct : bool, optional 796 If True, search for adduct ions. Default is False. 797 usedAtoms : dict, optional 798 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 799 ion_types_excluded : list, optional 800 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 801 ionization_type : str, optional 802 Ionization type. Default is 'ESI'. 803 min_ppm_error : float, optional 804 Minimum ppm error. Default is -10.0. 805 max_ppm_error : float, optional 806 Maximum ppm error. Default is 10.0. 807 min_abun_error : float, optional 808 Minimum abundance error for isotolopologue search. Default is -100.0. 809 max_abun_error : float, optional 810 Maximum abundance error for isotolopologue search. Default is 100.0. 811 mz_error_range : float, optional 812 m/z error range. Default is 1.5. 813 error_method : str, optional 814 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 815 mz_error_average : float, optional 816 m/z error average. Default is 0.0. 817 used_atom_valences : dict, optional 818 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 819 verbose_processing: bool, optional 820 If True, print verbose processing information. Default is True. 821 """ 822 verbose_processing: bool = True 823 824 use_isotopologue_filter: bool = False 825 826 isotopologue_filter_threshold: float = 33 827 828 isotopologue_filter_atoms: tuple = ("Cl", "Br") 829 830 use_runtime_kendrick_filter: bool = False 831 832 use_min_peaks_filter: bool = True 833 834 min_peaks_per_class: int = 15 835 836 url_database: str = ( 837 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 838 ) 839 840 db_jobs: int = 3 841 842 db_chunk_size: int = 300 843 844 # query setting======== 845 ion_charge: int = -1 846 847 min_hc_filter: float = 0.3 848 849 max_hc_filter: float = 3 850 851 min_oc_filter: float = 0.0 852 853 max_oc_filter: float = 1.2 854 855 min_op_filter: float = 2 856 857 use_pah_line_rule: bool = False 858 859 min_dbe: float = 0 860 861 max_dbe: float = 40 862 863 mz_error_score_weight: float = 0.6 864 865 isotopologue_score_weight: float = 0.4 866 867 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 868 adduct_atoms_neg: tuple = ("Cl", "Br") 869 870 adduct_atoms_pos: tuple = ("Na", "K") 871 872 score_methods: tuple = ( 873 "S_P_lowest_error", 874 "N_S_P_lowest_error", 875 "lowest_error", 876 "prob_score", 877 "air_filter_error", 878 "water_filter_error", 879 "earth_filter_error", 880 ) 881 882 score_method: str = "prob_score" 883 884 output_min_score: float = 0.1 885 886 output_score_method: str = "All Candidates" 887 888 # depending on the polarity mode it looks for [M].+ , [M].- 889 # query and automatically compile add entry if it doesn't exist 890 891 isRadical: bool = False 892 893 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 894 # query and automatically compile and push options if it doesn't exist 895 isProtonated: bool = True 896 897 isAdduct: bool = False 898 899 usedAtoms: dict = dataclasses.field(default_factory=dict) 900 ion_types_excluded: list = dataclasses.field(default_factory=list) 901 902 # search setting ======== 903 904 ionization_type: str = "ESI" 905 906 # empirically set / needs optimization 907 min_ppm_error: float = -10.0 # ppm 908 909 # empirically set / needs optimization 910 max_ppm_error: float = 10.0 # ppm 911 912 # empirically set / needs optimization set for isotopologue search 913 min_abun_error: float = -100.0 # percentage 914 915 # empirically set / needs optimization set for isotopologue search 916 max_abun_error: float = 100.0 # percentage 917 918 # empirically set / needs optimization 919 mz_error_range: float = 1.5 920 921 # 'distance', 'lowest', 'symmetrical','average' 'None' 922 error_method: str = "None" 923 924 mz_error_average: float = 0.0 925 926 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 927 used_atom_valences: dict = dataclasses.field(default_factory=dict) 928 929 def __post_init__(self): 930 if not self.url_database or self.url_database == "": 931 self.url_database = os.getenv( 932 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 933 ) 934 # enforce datatype 935 for field in dataclasses.fields(self): 936 value = getattr(self, field.name) 937 if not isinstance(value, field.type): 938 value = field.type(value) 939 setattr(self, field.name, value) 940 941 # enforce C and H if either do not exists 942 if "C" not in self.usedAtoms.keys(): 943 self.usedAtoms["C"] = (1, 100) 944 if "H" not in self.usedAtoms.keys(): 945 self.usedAtoms["H"] = (1, 200) 946 947 # add cummon values 948 current_used_atoms = self.used_atom_valences.keys() 949 950 for atom in Atoms.atoms_covalence.keys(): 951 if atom not in current_used_atoms: 952 covalence = Atoms.atoms_covalence.get(atom) 953 954 if isinstance(covalence, int): 955 self.used_atom_valences[atom] = covalence 956 957 else: 958 # will get the first number of all possible covalances, which should be the most commum 959 self.used_atom_valences[atom] = covalence[0]
12@dataclasses.dataclass 13class TransientSetting: 14 """Transient processing settings class 15 16 Attributes 17 ---------- 18 implemented_apodization_function : tuple 19 Available apodization functions 20 apodization_method : str 21 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. 22 number_of_truncations : int 23 How many times to truncate the transient prior to Fourier transform 24 number_of_zero_fills : int 25 How many times to zero fille the transient prior to Fourier transform. 26 next_power_of_two : bool 27 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 28 kaiser_beta : float 29 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 30 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) 31 32 """ 33 34 implemented_apodization_function: tuple = ( 35 "Hamming", 36 "Hanning", 37 "Blackman", 38 "Full-Sine", 39 "Half-Sine", 40 "Kaiser", 41 "Half-Kaiser", 42 ) 43 apodization_method: str = "Hanning" 44 number_of_truncations: int = 0 45 number_of_zero_fills: int = 1 46 next_power_of_two: bool = False 47 kaiser_beta: float = 8.6 48 49 def __post_init__(self): 50 # enforce datatype 51 for field in dataclasses.fields(self): 52 value = getattr(self, field.name) 53 if not isinstance(value, field.type): 54 value = field.type(value) 55 setattr(self, field.name, value)
Transient processing settings class
Attributes
- implemented_apodization_function (tuple): Available apodization functions
- apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
- number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
- number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
- next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
- kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
58@dataclasses.dataclass 59class DataInputSetting: 60 """Data input settings class 61 62 Attributes 63 ---------- 64 header_translate : dict 65 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} 66 """ 67 68 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER 69 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} 70 header_translate: dict = dataclasses.field(default_factory=dict) 71 72 def __post_init__(self): 73 self.header_translate = { 74 "m/z": Labels.mz, 75 "mOz": Labels.mz, 76 "Mass": Labels.mz, 77 "Resolving Power": Labels.rp, 78 "Res.": Labels.rp, 79 "resolution": Labels.rp, 80 "Intensity": Labels.abundance, 81 "Peak Height": Labels.abundance, 82 "I": Labels.abundance, 83 "Abundance": Labels.abundance, 84 "abs_abu": Labels.abundance, 85 "Signal/Noise": Labels.s2n, 86 "S/N": Labels.s2n, 87 "sn": Labels.s2n, 88 } 89 90 def add_mz_label(self, label): 91 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 92 self.header_translate[label] = Labels.mz 93 94 def add_peak_height_label(self, label): 95 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 96 97 self.header_translate[label] = Labels.abundance 98 99 def add_sn_label(self, label): 100 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 101 self.header_translate[label] = Labels.s2n 102 103 def add_resolving_power_label(self, label): 104 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 105 self.header_translate[label] = Labels.rp
Data input settings class
Attributes
- header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
90 def add_mz_label(self, label): 91 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" 92 self.header_translate[label] = Labels.mz
Add a label to the header_translate dictionary to be translated to the corems label for mz.
94 def add_peak_height_label(self, label): 95 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" 96 97 self.header_translate[label] = Labels.abundance
Add a label to the header_translate dictionary to be translated to the corems label for peak height.
99 def add_sn_label(self, label): 100 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" 101 self.header_translate[label] = Labels.s2n
Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.
103 def add_resolving_power_label(self, label): 104 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" 105 self.header_translate[label] = Labels.rp
Add a label to the header_translate dictionary to be translated to the corems label for resolving power.
108@dataclasses.dataclass 109class LiquidChromatographSetting: 110 """Liquid chromatograph processing settings class 111 112 Attributes 113 ---------- 114 scans : list or tuple, optional 115 List of select scan to average or a tuple containing the range to average. Default is (0, 1). 116 eic_tolerance_ppm : float, optional 117 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. 118 correct_eic_baseline : bool, optional 119 If True, correct the baseline of the extracted ion chromatogram. Default is True. 120 smooth_window : int, optional 121 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. 122 smooth_method : str, optional 123 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 124 implemented_smooth_method : tuple, optional 125 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 126 savgol_pol_order : int, optional 127 Polynomial order for Savitzky-Golay smoothing. Default is 2. 128 peak_height_max_percent : float, optional 129 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. 130 peak_max_prominence_percent : float, optional 131 1-100 % used for baseline detection. Default is 1. 132 peak_derivative_threshold : float, optional 133 Threshold for defining derivative crossing. Default is 0.0005. 134 min_peak_datapoints : float, optional 135 minimum data point to define a chromatografic peak. Default is 5. 136 noise_threshold_method : str, optional 137 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 138 noise_threshold_methods_implemented : tuple, optional 139 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 140 peak_height_min_percent : float, optional 141 0-100 % used for peak detection. Default is 0.1. 142 eic_signal_threshold : float, optional 143 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 144 eic_buffer_time : float, optional 145 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. 146 ph_smooth_it : int, optional 147 Number of iterations to use for smoothing prior to finding mass features. 148 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. 149 ph_smooth_radius_mz : int, optional 150 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 151 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. 152 ph_smooth_radius_scan : int, optional 153 Radius in scan steps for smoothing prior to finding mass features. 154 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. 155 ph_inten_min_rel : int, optional 156 Relative minimum intensity to use for finding mass features. 157 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 158 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 159 ph_persis_min_rel : int, optional 160 Relative minimum persistence for retaining mass features. 161 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). 162 Should be greater to or equal to ph_inten_min_rel. 163 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. 164 mass_feature_cluster_mz_tolerance_rel : float, optional 165 Relative m/z tolerance to use for clustering mass features. 166 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 167 Default is 5E-6 (5 ppm). 168 mass_feature_cluster_rt_tolerance : float, optional 169 Retention time tolerance to use for clustering mass features, in minutes. 170 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 171 Default is 0.2. 172 ms1_scans_to_average : int, optional 173 Number of MS1 scans to average for mass-feature associated m/zs. 174 Called within the LCMSBase.add_associated_ms1() method. Default is 1. 175 ms1_deconvolution_corr_min : float, optional 176 Minimum correlation to use for deconvoluting MS1 mass features. 177 Called within the LCCalculations.deconvolute_ms1_mass_features() method. 178 Default is 0.8. 179 ms2_dda_rt_tolerance : float, optional 180 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. 181 ms2_dda_mz_tolerance : float, optional 182 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. 183 ms2_min_fe_score : float, optional 184 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. 185 search_as_lipids : bool, optional 186 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. 187 include_fragment_types : bool, optional 188 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. 189 verbose_processing : bool, optional 190 If True, print verbose processing information. Default is True. 191 """ 192 193 scans: list | tuple = (-1, -1) 194 195 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing 196 eic_tolerance_ppm: float = 5 197 correct_eic_baseline = True 198 smooth_window: int = 5 199 smooth_method: str = "savgol" 200 implemented_smooth_method: tuple = ( 201 "savgol", 202 "hanning", 203 "blackman", 204 "bartlett", 205 "flat", 206 "boxcar", 207 ) 208 savgol_pol_order: int = 2 209 peak_height_max_percent: float = 10 210 peak_max_prominence_percent: float = 1 211 peak_derivative_threshold: float = 0.0005 212 min_peak_datapoints: float = 5 213 noise_threshold_method: str = "manual_relative_abundance" 214 noise_threshold_methods_implemented: tuple = ( 215 "auto_relative_abundance", 216 "manual_relative_abundance", 217 "second_derivative", 218 ) 219 peak_height_min_percent: float = 0.1 220 eic_signal_threshold: float = 0.01 221 eic_buffer_time = 1.5 222 223 # Parameters used for 2D peak picking 224 peak_picking_method: str = "persistent homology" 225 implemented_peak_picking_methods: tuple = ("persistent homology",) 226 227 # Parameters used in persistent homology calculations 228 ph_smooth_it = 1 229 ph_smooth_radius_mz = 0 230 ph_smooth_radius_scan = 1 231 ph_inten_min_rel = 0.001 232 ph_persis_min_rel = 0.001 233 234 # Parameters used to cluster mass features 235 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 236 mass_feature_cluster_rt_tolerance: float = 0.3 237 238 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features 239 ms1_scans_to_average: int = 1 240 ms1_deconvolution_corr_min: float = 0.8 241 ms2_dda_rt_tolerance: float = 0.15 242 ms2_dda_mz_tolerance: float = 0.05 243 244 # Parameters used for flash entropy searching and database preparation 245 ms2_min_fe_score: float = 0.2 246 search_as_lipids: bool = False 247 include_fragment_types: bool = False 248 249 # Parameters used for saving the data 250 export_profile_spectra: bool = False 251 export_eics: bool = True 252 export_unprocessed_ms1: bool = False 253 254 # Parameters used for verbose processing 255 verbose_processing: bool = True 256 257 def __post_init__(self): 258 # enforce datatype 259 for field in dataclasses.fields(self): 260 value = getattr(self, field.name) 261 if not isinstance(value, field.type): 262 value = field.type(value) 263 setattr(self, field.name, value)
Liquid chromatograph processing settings class
Attributes
- scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (0, 1).
- eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
- correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
- smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
- peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
- min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
- ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
- ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
- ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
- ph_inten_min_rel (int, optional): Relative minimum intensity to use for finding mass features. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- ph_persis_min_rel (int, optional): Relative minimum persistence for retaining mass features. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
- mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
- mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
- ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
- ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
- ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
- ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
- ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
- search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
- include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
266@dataclasses.dataclass 267class MassSpectrumSetting: 268 """Mass spectrum processing settings class 269 270 Attributes 271 ---------- 272 noise_threshold_method : str, optional 273 Method for detecting noise threshold. Default is 'log'. 274 noise_threshold_methods_implemented : tuple, optional 275 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). 276 noise_threshold_min_std : int, optional 277 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. 278 noise_threshold_min_s2n : float, optional 279 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. 280 noise_threshold_min_relative_abundance : float, optional 281 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). 282 noise_threshold_absolute_abundance : float, optional 283 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. 284 noise_threshold_log_nsigma : int, optional 285 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. 286 noise_threshold_log_nsigma_corr_factor : float, optional 287 Correction factor for log noise threshold method. Default is 0.463. 288 noise_threshold_log_nsigma_bins : int, optional 289 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. 290 noise_min_mz : float, optional 291 Minimum m/z to use for noise thresholding. Default is 50.0. 292 noise_max_mz : float, optional 293 Maximum m/z to use for noise thresholding. Default is 1200.0. 294 min_picking_mz : float, optional 295 Minimum m/z to use for peak picking. Default is 50.0. 296 max_picking_mz : float, optional 297 Maximum m/z to use for peak picking. Default is 1200.0. 298 picking_point_extrapolate : int, optional 299 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. 300 Recommend 3 for reduced profile data or if peak picking faults 301 calib_minimize_method : str, optional 302 Minimization method to use for calibration. Default is 'Powell'. 303 calib_pol_order : int, optional 304 Polynomial order to use for calibration. Default is 2. 305 max_calib_ppm_error : float, optional 306 Maximum ppm error to use for calibration. Default is 1.0. 307 min_calib_ppm_error : float, optional 308 Minimum ppm error to use for calibration. Default is -1.0. 309 calib_sn_threshold : float, optional 310 Signal to noise threshold to use for calibration. Default is 2.0. 311 calibration_ref_match_method: string, optional 312 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 313 calibration_ref_match_tolerance: float, optional 314 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 315 do_calibration : bool, optional 316 If True, perform calibration. Default is True. 317 verbose_processing : bool, optional 318 If True, print verbose processing information. Default is True. 319 """ 320 321 noise_threshold_method: str = "log" 322 323 noise_threshold_methods_implemented: tuple = ( 324 "minima", 325 "signal_noise", 326 "relative_abundance", 327 "absolute_abundance", 328 "log", 329 ) 330 331 noise_threshold_min_std: int = 6 # when using 'minima' method 332 333 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method 334 335 noise_threshold_min_relative_abundance: float = ( 336 6 # from 0-100, when using 'relative_abundance' method 337 ) 338 339 noise_threshold_absolute_abundance: float = ( 340 1_000_000 # when using 'absolute_abundance' method 341 ) 342 343 noise_threshold_log_nsigma: int = 6 # when using 'log' method 344 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 345 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise 346 347 noise_min_mz: float = 50.0 348 noise_max_mz: float = 1200.0 349 350 min_picking_mz: float = 50.0 351 max_picking_mz: float = 1200.0 352 353 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis 354 # This will fix peak picking at spectrum limit issues 355 # 0 to keep normal behaviour, typical value 3 to fix 356 picking_point_extrapolate: int = 3 357 358 calib_minimize_method: str = "Powell" 359 calib_pol_order: int = 2 360 max_calib_ppm_error: float = 1.0 361 min_calib_ppm_error: float = -1.0 362 calib_sn_threshold: float = 2.0 363 calibration_ref_match_method: str = "legacy" 364 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") 365 calibration_ref_match_tolerance: float = 0.003 366 calibration_ref_match_std_raw_error_limit: float = 1.5 367 # calib_ref_mzs: list = [0] 368 369 do_calibration: bool = True 370 verbose_processing: bool = True 371 372 def __post_init__(self): 373 # enforce datatype 374 for field in dataclasses.fields(self): 375 value = getattr(self, field.name) 376 if not isinstance(value, field.type): 377 value = field.type(value) 378 setattr(self, field.name, value)
Mass spectrum processing settings class
Attributes
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
- noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
- noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
- noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
- noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
- noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
- noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
- noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
- noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
- noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
- min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
- max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
- picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
- calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
- calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
- max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
- min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
- calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
- calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
- calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
- do_calibration (bool, optional): If True, perform calibration. Default is True.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
381@dataclasses.dataclass 382class MassSpecPeakSetting: 383 """Mass spectrum peak processing settings class 384 385 Attributes 386 ---------- 387 kendrick_base : Dict, optional 388 Dictionary specifying the elements and their counts in the Kendrick base. 389 Defaults to {'C': 1, 'H': 2}. 390 kendrick_rounding_method : str, optional 391 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. 392 Defaults to 'floor'. 393 implemented_kendrick_rounding_methods : tuple 394 Tuple of valid rounding methods for calculating the nominal Kendrick mass. 395 Defaults to ('floor', 'ceil', 'round'). 396 peak_derivative_threshold : float, optional 397 Threshold for defining derivative crossing. Should be a value between 0 and 1. 398 Defaults to 0.0. 399 peak_min_prominence_percent : float, optional 400 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. 401 Defaults to 0.1. 402 min_peak_datapoints : float, optional 403 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 404 Defaults to 5. 405 peak_max_prominence_percent : float, optional 406 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 407 Defaults to 0.1. 408 peak_height_max_percent : float, optional 409 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 410 Defaults to 10. 411 legacy_resolving_power : bool, optional 412 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. 413 Defaults to True. 414 legacy_centroid_polyfit : bool, optional 415 Use legacy (numpy polyfit) to fit centroid 416 Default false. 417 """ 418 419 kendrick_base: Dict = dataclasses.field(default_factory=dict) 420 421 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass 422 423 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") 424 425 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 426 427 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection 428 429 min_peak_datapoints: float = 5 # 0-inf used for peak detection 430 431 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection 432 433 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection 434 435 legacy_resolving_power: bool = ( 436 True # Use the legacy (CoreMS v1) resolving power calculation (True) 437 ) 438 439 legacy_centroid_polyfit: bool = False 440 441 def __post_init__(self): 442 # default to CH2 443 if not self.kendrick_base: 444 self.kendrick_base = {"C": 1, "H": 2} 445 # enforce datatype 446 for field in dataclasses.fields(self): 447 value = getattr(self, field.name) 448 if not isinstance(value, field.type): 449 value = field.type(value) 450 setattr(self, field.name, value)
Mass spectrum peak processing settings class
Attributes
- kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
- kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
- implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
- peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
- legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
453@dataclasses.dataclass 454class GasChromatographSetting: 455 """Gas chromatograph processing settings class 456 457 Attributes 458 ---------- 459 use_deconvolution : bool, optional 460 If True, use deconvolution. Default is False. 461 implemented_smooth_method : tuple, optional 462 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). 463 smooth_window : int, optional 464 Window size for smoothing the ion chromatogram. Default is 5. 465 smooth_method : str, optional 466 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. 467 savgol_pol_order : int, optional 468 Polynomial order for Savitzky-Golay smoothing. Default is 2. 469 peak_derivative_threshold : float, optional 470 Threshold for defining derivative crossing. Should be a value between 0 and 1. 471 Defaults to 0.0005. 472 peak_height_max_percent : float, optional 473 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. 474 Defaults to 10. 475 peak_max_prominence_percent : float, optional 476 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. 477 Defaults to 1. 478 min_peak_datapoints : float, optional 479 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. 480 Defaults to 5. 481 max_peak_width : float, optional 482 Maximum peak width used for peak detection. Should be a value between 0 and infinity. 483 Defaults to 0.1. 484 noise_threshold_method : str, optional 485 Method for detecting noise threshold. Default is 'manual_relative_abundance'. 486 noise_threshold_methods_implemented : tuple, optional 487 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). 488 std_noise_threshold : int, optional 489 Default is 3. 490 peak_height_min_percent : float, optional 491 0-100 % used for peak detection. Default is 0.1. 492 peak_min_prominence_percent : float, optional 493 0-100 % used for peak detection. Default is 0.1. 494 eic_signal_threshold : float, optional 495 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. 496 max_rt_distance : float, optional 497 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. 498 verbose_processing : bool, optional 499 If True, print verbose processing information. Default is True. 500 """ 501 502 use_deconvolution: bool = False 503 504 implemented_smooth_method: tuple = ( 505 "savgol", 506 "hanning", 507 "blackman", 508 "bartlett", 509 "flat", 510 "boxcar", 511 ) 512 513 smooth_window: int = 5 514 515 smooth_method: str = "savgol" 516 517 savgol_pol_order: int = 2 518 519 peak_derivative_threshold: float = 0.0005 520 521 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods 522 523 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection 524 525 min_peak_datapoints: float = 5 526 527 max_peak_width: float = 0.1 528 529 noise_threshold_method: str = "manual_relative_abundance" 530 531 noise_threshold_methods_implemented: tuple = ( 532 "auto_relative_abundance", 533 "manual_relative_abundance", 534 "second_derivative", 535 ) 536 537 std_noise_threshold: int = 3 538 539 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection 540 541 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection 542 543 eic_signal_threshold: float = ( 544 0.01 # 0-100 % used for extracted ion chromatogram peak detection 545 ) 546 547 max_rt_distance: float = ( 548 0.025 # minutes, max distance allowance hierarchical clutter 549 ) 550 551 verbose_processing: bool = True 552 553 def __post_init__(self): 554 # enforce datatype 555 for field in dataclasses.fields(self): 556 value = getattr(self, field.name) 557 if not isinstance(value, field.type): 558 value = field.type(value) 559 setattr(self, field.name, value)
Gas chromatograph processing settings class
Attributes
- use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
- implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
- smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
- smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
- savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
- peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
- peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
- peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
- min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
- max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
- noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
- noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
- std_noise_threshold (int, optional): Default is 3.
- peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
- eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
- max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
562@dataclasses.dataclass 563class CompoundSearchSettings: 564 """Settings for compound search 565 566 Attributes 567 ---------- 568 url_database : str, optional 569 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. 570 ri_search_range : float, optional 571 Retention index search range. Default is 35. 572 rt_search_range : float, optional 573 Retention time search range, in minutes. Default is 1.0. 574 correlation_threshold : float, optional 575 Threshold for correlation for spectral similarity. Default is 0.5. 576 score_threshold : float, optional 577 Threshold for compsite score. Default is 0.0. 578 ri_spacing : float, optional 579 Retention index spacing. Default is 200. 580 ri_std : float, optional 581 Retention index standard deviation. Default is 3. 582 ri_calibration_compound_names : list, optional 583 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. 584 585 """ 586 587 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 588 589 ri_search_range: float = 35 590 591 rt_search_range: float = 1.0 # used for retention index calibration 592 593 correlation_threshold: float = 0.5 # used for calibration, spectral similarity 594 595 score_threshold: float = 0.0 596 597 ri_spacing: float = 200 598 599 ri_std: float = 3 # in standard deviation 600 601 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) 602 603 # calculates and export all spectral similarity methods 604 exploratory_mode: bool = False 605 606 score_methods: tuple = ("highest_sim_score", "highest_ss") 607 608 output_score_method: str = "All" 609 610 def __post_init__(self): 611 # enforce datatype 612 self.url_database = os.getenv( 613 "SPECTRAL_GCMS_DATABASE_URL", 614 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", 615 ) 616 617 for field in dataclasses.fields(self): 618 value = getattr(self, field.name) 619 if not isinstance(value, field.type): 620 value = field.type(value) 621 setattr(self, field.name, value) 622 623 self.ri_calibration_compound_names = [ 624 "Methyl Caprylate", 625 "Methyl Caprate", 626 "Methyl Pelargonate", 627 "Methyl Laurate", 628 "Methyl Myristate", 629 "Methyl Palmitate", 630 "Methyl Stearate", 631 "Methyl Eicosanoate", 632 "Methyl Docosanoate", 633 "Methyl Linocerate", 634 "Methyl Hexacosanoate", 635 "Methyl Octacosanoate", 636 "Methyl Triacontanoate", 637 ]
Settings for compound search
Attributes
- url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
- ri_search_range (float, optional): Retention index search range. Default is 35.
- rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
- correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
- score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
- ri_spacing (float, optional): Retention index spacing. Default is 200.
- ri_std (float, optional): Retention index standard deviation. Default is 3.
- ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
640class MolecularLookupDictSettings: 641 """Settings for molecular searching 642 643 These are used to generate the database entries, do not change. 644 645 Attributes 646 ---------- 647 usedAtoms : dict, optional 648 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 649 min_mz : float, optional 650 Minimum m/z to use for searching. Default is 50.0. 651 max_mz : float, optional 652 Maximum m/z to use for searching. Default is 1200.0. 653 min_dbe : float, optional 654 Minimum double bond equivalent to use for searching. Default is 0. 655 max_dbe : float, optional 656 Maximum double bond equivalent to use for searching. Default is 50. 657 use_pah_line_rule : bool, optional 658 If True, use the PAH line rule. Default is False. 659 isRadical : bool, optional 660 If True, search for radical ions. Default is True. 661 isProtonated : bool, optional 662 If True, search for protonated ions. Default is True. 663 url_database : str, optional 664 URL for the database. Default is None. 665 db_jobs : int, optional 666 Number of jobs to use for database queries. Default is 1. 667 used_atom_valences : dict, optional 668 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 669 670 """ 671 672 ### DO NOT CHANGE IT! These are used to generate the database entries 673 674 ### DO change when creating a new application database 675 676 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below 677 678 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms 679 ### if you don't want to include one of those atoms set the max and min at 0 680 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module 681 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 682 ### NOTE : Adducts atoms have zero covalence 683 ### NOTE : Not using static variable because this class is distributed using multiprocessing 684 def __init__(self): 685 self.usedAtoms = { 686 "C": (1, 90), 687 "H": (4, 200), 688 "O": (0, 12), 689 "N": (0, 0), 690 "S": (0, 0), 691 "P": (0, 0), 692 "Cl": (0, 0), 693 } 694 695 self.min_mz = 50 696 697 self.max_mz = 1200 698 699 self.min_dbe = 0 700 701 self.max_dbe = 50 702 703 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 704 self.use_pah_line_rule = False 705 706 self.isRadical = True 707 708 self.isProtonated = True 709 710 self.url_database = None 711 712 self.db_jobs = 1 713 714 self.used_atom_valences = { 715 "C": 4, 716 "13C": 4, 717 "H": 1, 718 "O": 2, 719 "18O": 2, 720 "N": 3, 721 "S": 2, 722 "34S": 2, 723 "P": 3, 724 "Cl": 1, 725 "37Cl": 1, 726 "Br": 1, 727 "Na": 1, 728 "F": 1, 729 "K": 0, 730 }
Settings for molecular searching
These are used to generate the database entries, do not change.
Attributes
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
- max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- isRadical (bool, optional): If True, search for radical ions. Default is True.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- url_database (str, optional): URL for the database. Default is None.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
733@dataclasses.dataclass 734class MolecularFormulaSearchSettings: 735 """Settings for molecular searching 736 737 Attributes 738 ---------- 739 use_isotopologue_filter : bool, optional 740 If True, use isotopologue filter. Default is False. 741 isotopologue_filter_threshold : float, optional 742 Threshold for isotopologue filter. Default is 33. 743 isotopologue_filter_atoms : tuple, optional 744 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). 745 use_runtime_kendrick_filter : bool, optional 746 If True, use runtime Kendrick filter. Default is False. 747 use_min_peaks_filter : bool, optional 748 If True, use minimum peaks filter. Default is True. 749 min_peaks_per_class : int, optional 750 Minimum number of peaks per class. Default is 15. 751 url_database : str, optional 752 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. 753 db_jobs : int, optional 754 Number of jobs to use for database queries. Default is 3. 755 db_chunk_size : int, optional 756 Chunk size to use for database queries. Default is 300. 757 ion_charge : int, optional 758 Ion charge. Default is -1. 759 min_hc_filter : float, optional 760 Minimum hydrogen to carbon ratio. Default is 0.3. 761 max_hc_filter : float, optional 762 Maximum hydrogen to carbon ratio. Default is 3. 763 min_oc_filter : float, optional 764 Minimum oxygen to carbon ratio. Default is 0.0. 765 max_oc_filter : float, optional 766 Maximum oxygen to carbon ratio. Default is 1.2. 767 min_op_filter : float, optional 768 Minimum oxygen to phosphorous ratio. Default is 2. 769 use_pah_line_rule : bool, optional 770 If True, use the PAH line rule. Default is False. 771 min_dbe : float, optional 772 Minimum double bond equivalent to use for searching. Default is 0. 773 max_dbe : float, optional 774 Maximum double bond equivalent to use for searching. Default is 40. 775 mz_error_score_weight : float, optional 776 Weight for m/z error score to contribute to composite score. Default is 0.6. 777 isotopologue_score_weight : float, optional 778 Weight for isotopologue score to contribute to composite score. Default is 0.4. 779 adduct_atoms_neg : tuple, optional 780 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). 781 adduct_atoms_pos : tuple, optional 782 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). 783 score_methods : tuple, optional 784 Tuple of score method that can be implemented. 785 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). 786 score_method : str, optional 787 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. 788 output_min_score : float, optional 789 Minimum score for output. Default is 0.1. 790 output_score_method : str, optional 791 Score method to use for output. Default is 'All Candidates'. 792 isRadical : bool, optional 793 If True, search for radical ions. Default is False. 794 isProtonated : bool, optional 795 If True, search for protonated ions. Default is True. 796 isAdduct : bool, optional 797 If True, search for adduct ions. Default is False. 798 usedAtoms : dict, optional 799 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. 800 ion_types_excluded : list, optional 801 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. 802 ionization_type : str, optional 803 Ionization type. Default is 'ESI'. 804 min_ppm_error : float, optional 805 Minimum ppm error. Default is -10.0. 806 max_ppm_error : float, optional 807 Maximum ppm error. Default is 10.0. 808 min_abun_error : float, optional 809 Minimum abundance error for isotolopologue search. Default is -100.0. 810 max_abun_error : float, optional 811 Maximum abundance error for isotolopologue search. Default is 100.0. 812 mz_error_range : float, optional 813 m/z error range. Default is 1.5. 814 error_method : str, optional 815 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. 816 mz_error_average : float, optional 817 m/z error average. Default is 0.0. 818 used_atom_valences : dict, optional 819 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. 820 verbose_processing: bool, optional 821 If True, print verbose processing information. Default is True. 822 """ 823 verbose_processing: bool = True 824 825 use_isotopologue_filter: bool = False 826 827 isotopologue_filter_threshold: float = 33 828 829 isotopologue_filter_atoms: tuple = ("Cl", "Br") 830 831 use_runtime_kendrick_filter: bool = False 832 833 use_min_peaks_filter: bool = True 834 835 min_peaks_per_class: int = 15 836 837 url_database: str = ( 838 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" 839 ) 840 841 db_jobs: int = 3 842 843 db_chunk_size: int = 300 844 845 # query setting======== 846 ion_charge: int = -1 847 848 min_hc_filter: float = 0.3 849 850 max_hc_filter: float = 3 851 852 min_oc_filter: float = 0.0 853 854 max_oc_filter: float = 1.2 855 856 min_op_filter: float = 2 857 858 use_pah_line_rule: bool = False 859 860 min_dbe: float = 0 861 862 max_dbe: float = 40 863 864 mz_error_score_weight: float = 0.6 865 866 isotopologue_score_weight: float = 0.4 867 868 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms 869 adduct_atoms_neg: tuple = ("Cl", "Br") 870 871 adduct_atoms_pos: tuple = ("Na", "K") 872 873 score_methods: tuple = ( 874 "S_P_lowest_error", 875 "N_S_P_lowest_error", 876 "lowest_error", 877 "prob_score", 878 "air_filter_error", 879 "water_filter_error", 880 "earth_filter_error", 881 ) 882 883 score_method: str = "prob_score" 884 885 output_min_score: float = 0.1 886 887 output_score_method: str = "All Candidates" 888 889 # depending on the polarity mode it looks for [M].+ , [M].- 890 # query and automatically compile add entry if it doesn't exist 891 892 isRadical: bool = False 893 894 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ 895 # query and automatically compile and push options if it doesn't exist 896 isProtonated: bool = True 897 898 isAdduct: bool = False 899 900 usedAtoms: dict = dataclasses.field(default_factory=dict) 901 ion_types_excluded: list = dataclasses.field(default_factory=list) 902 903 # search setting ======== 904 905 ionization_type: str = "ESI" 906 907 # empirically set / needs optimization 908 min_ppm_error: float = -10.0 # ppm 909 910 # empirically set / needs optimization 911 max_ppm_error: float = 10.0 # ppm 912 913 # empirically set / needs optimization set for isotopologue search 914 min_abun_error: float = -100.0 # percentage 915 916 # empirically set / needs optimization set for isotopologue search 917 max_abun_error: float = 100.0 # percentage 918 919 # empirically set / needs optimization 920 mz_error_range: float = 1.5 921 922 # 'distance', 'lowest', 'symmetrical','average' 'None' 923 error_method: str = "None" 924 925 mz_error_average: float = 0.0 926 927 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) 928 used_atom_valences: dict = dataclasses.field(default_factory=dict) 929 930 def __post_init__(self): 931 if not self.url_database or self.url_database == "": 932 self.url_database = os.getenv( 933 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" 934 ) 935 # enforce datatype 936 for field in dataclasses.fields(self): 937 value = getattr(self, field.name) 938 if not isinstance(value, field.type): 939 value = field.type(value) 940 setattr(self, field.name, value) 941 942 # enforce C and H if either do not exists 943 if "C" not in self.usedAtoms.keys(): 944 self.usedAtoms["C"] = (1, 100) 945 if "H" not in self.usedAtoms.keys(): 946 self.usedAtoms["H"] = (1, 200) 947 948 # add cummon values 949 current_used_atoms = self.used_atom_valences.keys() 950 951 for atom in Atoms.atoms_covalence.keys(): 952 if atom not in current_used_atoms: 953 covalence = Atoms.atoms_covalence.get(atom) 954 955 if isinstance(covalence, int): 956 self.used_atom_valences[atom] = covalence 957 958 else: 959 # will get the first number of all possible covalances, which should be the most commum 960 self.used_atom_valences[atom] = covalence[0]
Settings for molecular searching
Attributes
- use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
- isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
- isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
- use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
- use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
- min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
- url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
- db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
- db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
- ion_charge (int, optional): Ion charge. Default is -1.
- min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
- max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
- min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
- max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
- min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
- use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
- min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
- max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
- mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
- isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
- adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
- adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
- score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
- score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
- output_min_score (float, optional): Minimum score for output. Default is 0.1.
- output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
- isRadical (bool, optional): If True, search for radical ions. Default is False.
- isProtonated (bool, optional): If True, search for protonated ions. Default is True.
- isAdduct (bool, optional): If True, search for adduct ions. Default is False.
- usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
- ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
- ionization_type (str, optional): Ionization type. Default is 'ESI'.
- min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
- max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
- min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
- max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
- mz_error_range (float, optional): m/z error range. Default is 1.5.
- error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
- mz_error_average (float, optional): m/z error average. Default is 0.0.
- used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
- verbose_processing (bool, optional): If True, print verbose processing information. Default is True.