corems.encapsulation.factory.processingSetting

   1__author__ = "Yuri E. Corilo"
   2__date__ = "Jul 02, 2019"
   3
   4import dataclasses
   5import os
   6from typing import List, Dict
   7
   8from corems.encapsulation.constant import Atoms, Labels
   9
  10
  11@dataclasses.dataclass
  12class TransientSetting:
  13    """Transient processing settings class
  14
  15    Attributes
  16    ----------
  17    implemented_apodization_function : tuple
  18        Available apodization functions
  19    apodization_method : str
  20        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
  21        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
  22    number_of_truncations : int
  23        How many times to truncate the transient prior to Fourier transform
  24    number_of_zero_fills : int
  25        How many times to zero fille the transient prior to Fourier transform.
  26    next_power_of_two : bool
  27        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
  28    kaiser_beta : float
  29        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
  30        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
  31
  32    """
  33
  34    implemented_apodization_function: tuple = (
  35        "Hamming",
  36        "Hanning",
  37        "Blackman",
  38        "Full-Sine",
  39        "Half-Sine",
  40        "Kaiser",
  41        "Half-Kaiser",
  42        "Rectangle",
  43    )
  44    apodization_method: str = "Hanning"
  45    number_of_truncations: int = 0
  46    number_of_zero_fills: int = 1
  47    next_power_of_two: bool = False
  48    kaiser_beta: float = 8.6
  49
  50    def __post_init__(self):
  51        # enforce datatype
  52        for field in dataclasses.fields(self):
  53            value = getattr(self, field.name)
  54            if not isinstance(value, field.type):
  55                value = field.type(value)
  56                setattr(self, field.name, value)
  57
  58
  59@dataclasses.dataclass
  60class DataInputSetting:
  61    """Data input settings class
  62
  63    Attributes
  64    ----------
  65    header_translate : dict
  66        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
  67    """
  68
  69    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
  70    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
  71    header_translate: dict = dataclasses.field(default_factory=dict)
  72
  73    def __post_init__(self):
  74        self.header_translate = {
  75            "m/z": Labels.mz,
  76            "mOz": Labels.mz,
  77            "Mass": Labels.mz,
  78            "Resolving Power": Labels.rp,
  79            "Res.": Labels.rp,
  80            "resolution": Labels.rp,
  81            "Intensity": Labels.abundance,
  82            "Peak Height": Labels.abundance,
  83            "I": Labels.abundance,
  84            "Abundance": Labels.abundance,
  85            "abs_abu": Labels.abundance,
  86            "Signal/Noise": Labels.s2n,
  87            "S/N": Labels.s2n,
  88            "sn": Labels.s2n,
  89        }
  90
  91    def add_mz_label(self, label):
  92        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
  93        self.header_translate[label] = Labels.mz
  94
  95    def add_peak_height_label(self, label):
  96        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
  97
  98        self.header_translate[label] = Labels.abundance
  99
 100    def add_sn_label(self, label):
 101        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
 102        self.header_translate[label] = Labels.s2n
 103
 104    def add_resolving_power_label(self, label):
 105        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
 106        self.header_translate[label] = Labels.rp
 107
 108
 109@dataclasses.dataclass
 110class LiquidChromatographSetting:
 111    """Liquid chromatograph processing settings class
 112
 113    Attributes
 114    ----------
 115    scans : list or tuple, optional
 116        List of select scan to average or a tuple containing the range to average.
 117        Default is (-1, -1).
 118    eic_tolerance_ppm : float, optional
 119        Mass tolerance in ppm for extracted ion chromatogram peak detection.
 120        Default is 5.
 121    correct_eic_baseline : bool, optional
 122        If True, correct the baseline of the extracted ion chromatogram.
 123        Default is True.
 124    smooth_window : int, optional
 125        Window size for smoothing the ion chromatogram (extracted or total).
 126        Default is 5.
 127    smooth_method : str, optional
 128        Smoothing method to use. See implemented_smooth_method for options.
 129        Default is 'savgol'.
 130    implemented_smooth_method : tuple, optional
 131        Smoothing methods that can be implemented.
 132        Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
 133    savgol_pol_order : int, optional
 134        Polynomial order for Savitzky-Golay smoothing.
 135        Default is 2.
 136    consecutive_scan_min : int, optional
 137        Minimum number of consecutive scans to consider for peak detection.
 138        Default is 0 for backwards compatibility, but a value of 3 is recommended.
 139    peak_height_max_percent : float, optional
 140        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods.
 141        Default is 10.
 142    peak_max_prominence_percent : float, optional
 143        1-100 % used for baseline detection.
 144        Default is 1.
 145    peak_derivative_threshold : float, optional
 146        Threshold for defining derivative crossing.
 147        Default is 0.0005.
 148    min_peak_datapoints : float, optional
 149        minimum data point to define a chromatografic peak.
 150        Default is 5.
 151    noise_threshold_method : str, optional
 152        Method for detecting noise threshold.
 153        Default is 'manual_relative_abundance'.
 154    noise_threshold_methods_implemented : tuple, optional
 155        Methods for detected noise threshold that can be implemented.
 156        Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
 157    peak_height_min_percent : float, optional
 158        0-100 % used for peak detection.
 159        Default is 0.1.
 160    eic_signal_threshold : float, optional
 161        0-100 % used for extracted ion chromatogram peak detection.
 162        Default is 0.01.
 163    eic_buffer_time : float, optional
 164        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes.
 165        Default is 1.5.
 166    dispersity_index_window : float, optional
 167        Dispersity index window size, in minutes.
 168        Default is 3.0.
 169    noise_window_factor : float, optional
 170        Factor to determine noise estimation window size relative to peak width.
 171        Larger values use wider windows for noise estimation.
 172        For example, a value of 2.0 uses a window size equal to twice the peak width
 173        (depending on it's start and end scans) on each side.
 174        Called within the LCMSMassFeature.calc_noise_score() method.
 175        Default is 2.0.
 176    remove_redundant_mass_features : bool, optional
 177        If True, remove redundant mass features that are likely contaminants based on
 178        their m/z values and scan frequency.
 179        Especially useful for HILIC data where signals do not return to baseline between peaks
 180        or for data with significant background noise.
 181        Called within the LC_Calculations.find_mass_features() method.
 182        Default is False.
 183    redundant_scan_frequency_min : float, optional
 184        Minimum fraction of scans that must contain the m/z to be considered a likely
 185        noise/contaminant when using remove_redundant_mass_features.
 186        Default is 0.1 (10% of scans).
 187    redundant_feature_retain_n : int, optional
 188        Number of features to retain in each group when using remove_redundant_mass_features.
 189        Default is 3.
 190    remove_mass_features_by_peak_metrics : bool, optional
 191        If True, remove mass features based on their peak metrics such as S/N, Gaussian similarity,
 192        dispersity index, and noise score.
 193        Called within the LC_Calculations.add_peak_metrics() method.
 194        Default is False.
 195    mass_feature_attribute_filter_dict : dict, optional
 196        Dictionary specifying filtering criteria for mass feature attributes.
 197        Each key is an attribute name, and each value is a dict with 'value' and 'operator' keys.
 198        
 199        Structure: {attribute_name: {'value': threshold, 'operator': comparison}}
 200        
 201        Available operators:
 202        - '>' or 'greater': Keep features where attribute > threshold
 203        - '<' or 'less': Keep features where attribute < threshold  
 204        - '>=' or 'greater_equal': Keep features where attribute >= threshold
 205        - '<=' or 'less_equal': Keep features where attribute <= threshold
 206        
 207        Examples: 
 208        {
 209            'noise_score_max': {'value': 0.5, 'operator': '>'},  # Keep if noise_score_max > 0.5
 210            'dispersity_index': {'value': 0.1, 'operator': '<'},  # Keep if dispersity_index < 0.1
 211            'gaussian_similarity': {'value': 0.7, 'operator': '>='}  # Keep if gaussian_similarity >= 0.7
 212        }
 213        
 214        Available attributes include: 'noise_score', 'noise_score_min', 'noise_score_max', 
 215        'gaussian_similarity', 'tailing_factor', 'dispersity_index', 'half_height_width', 'intensity'.
 216        Default is {"noise_score_max": {"value": 0.8, "operator": ">="},"noise_score_min": {"value": 0.5, "operator": ">="}},
 217    peak_picking_method : str, optional
 218        Peak picking method to use. See implemented_peak_picking_methods for options.
 219        Default is 'persistent homology'.
 220    implemented_peak_picking_methods : tuple, optional
 221        Peak picking methods that can be implemented.
 222        Default is ('persistent homology', 'centroided_persistent_homology').
 223    ph_smooth_it : int, optional
 224        Number of iterations to use for smoothing prior to finding mass features.
 225        Used only for "persistent homology" peak picking method.
 226        Called within the PHCalculations.find_mass_features_ph() method.
 227        Default is 1.
 228    ph_smooth_radius_mz : int, optional
 229        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
 230        Used only for "persistent homology" peak picking method.
 231        Called within the PHCalculations.find_mass_features_ph() method.
 232        Default is 0.
 233    ph_smooth_radius_scan : int, optional
 234        Radius in scan steps for smoothing prior to finding mass features.
 235        Used only for "persistent homology" peak picking method.
 236        Called within the PHCalculations.find_mass_features_ph() method.
 237        Default is 1.
 238    ph_inten_min_rel : float, optional
 239        Relative minimum intensity to use for finding mass features for persistent homology.
 240        Used only for "persistent homology" peak picking method.
 241        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
 242        Called within the PH_Calculations.find_mass_features() method.
 243        Default is 0.001.
 244    ph_persis_min_rel : float, optional
 245        Relative minimum persistence for retaining mass features.
 246        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
 247        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
 248        Should be greater to or equal to ph_inten_min_rel.
 249        Called within the PH_Calculations.find_mass_features() method.
 250        Default is 0.001.
 251    mass_feature_cluster_mz_tolerance_rel : float, optional
 252        Relative m/z tolerance to use for clustering mass features.
 253        Used for both "persistent homology" and "centroided_persistent_homology"
 254        peak picking methods.
 255        Called with the PHCalculations.cluster_mass_features() and the
 256        LCCalculations.deconvolute_ms1_mass_features() methods.
 257        Default is 5e-6 (5 ppm).
 258    mass_feature_cluster_rt_tolerance : float, optional
 259        Retention time tolerance to use for clustering mass features, in minutes.
 260        Used for both "persistent homology" and "centroided_persistent_homology"
 261        peak picking methods.
 262        Called with the PHCalculations.cluster_mass_features() and the
 263        LCCalculations.deconvolute_ms1_mass_features() methods.
 264        Default is 0.3.
 265    ms1_scans_to_average : int, optional
 266        Number of MS1 scans to average for mass-feature associated m/zs.
 267        Called within the LCMSBase.add_associated_ms1() method.
 268        Default is 1.
 269    ms1_deconvolution_corr_min : float, optional
 270        Minimum correlation to use for deconvoluting MS1 mass features.
 271        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
 272        Default is 0.8.
 273    ms2_dda_rt_tolerance : float, optional
 274        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes.
 275        Called within the LCMSBase.add_associated_ms2_dda() method.
 276        Default is 0.15.
 277    ms2_dda_mz_tolerance : float, optional
 278        Mass tolerance to use for associating MS2 spectra to mass features.
 279        Called within the LCMSBase.add_associated_ms2_dda() method.
 280        Default is 0.05.
 281    ms2_min_fe_score : float, optional
 282        Minimum flash entropy for retaining MS2 annotations.
 283        Called within the LCMSSpectralSearch.fe_search() method.
 284        Default is 0.2.
 285    search_as_lipids : bool, optional
 286        If True, prepare the database for lipid searching.
 287        Called within the LCMSSpectralSearch.fe_prep_search_db() method.
 288        Default is False.
 289    include_fragment_types : bool, optional
 290        If True, include fragment types in the database.
 291        Called within the LCMSSpectralSearch.fe_search() and related methods.
 292        Default is False.
 293    export_profile_spectra : bool, optional
 294        If True, export profile spectra data.
 295        Default is False.
 296    export_eics : bool, optional
 297        If True, export extracted ion chromatograms.
 298        Default is True.
 299    export_unprocessed_ms1 : bool, optional
 300        If True, export unprocessed MS1 data.
 301        Default is False.
 302    export_only_relevant_mass_spectra : bool, optional
 303        If True, export only mass spectra associated with detected mass features:
 304        specifically the apex MS1 scan for each mass feature and the best MS2 scan
 305        for each mass feature (if available). If False, export all mass spectra.
 306        This parameter reduces HDF5 file size by excluding non-feature spectra.
 307        Default is False (backwards compatible - exports all spectra).
 308    verbose_processing : bool, optional
 309        If True, print verbose processing information.
 310        Default is True.
 311    """
 312
 313    scans: list | tuple = (-1, -1)
 314
 315    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
 316    eic_tolerance_ppm: float = 5
 317    correct_eic_baseline = True
 318    smooth_window: int = 5
 319    smooth_method: str = "savgol"
 320    implemented_smooth_method: tuple = (
 321        "savgol",
 322        "hanning",
 323        "blackman",
 324        "bartlett",
 325        "flat",
 326        "boxcar",
 327    )
 328    savgol_pol_order: int = 2
 329    consecutive_scan_min: int = 0
 330    peak_height_max_percent: float = 10
 331    peak_max_prominence_percent: float = 1
 332    peak_derivative_threshold: float = 0.0005
 333    min_peak_datapoints: float = 5
 334    noise_threshold_method: str = "manual_relative_abundance"
 335    noise_threshold_methods_implemented: tuple = (
 336        "auto_relative_abundance",
 337        "manual_relative_abundance",
 338        "second_derivative",
 339    )
 340    peak_height_min_percent: float = 0.1
 341    eic_signal_threshold: float = 0.01
 342    eic_buffer_time = 1.5
 343    dispersity_index_window: float = 3.0  # minutes
 344    noise_window_factor: float = 2.0  # times the peak width for detemining SN for EIC
 345
 346    # Parameters used for filtering mass features after peak picking
 347    remove_redundant_mass_features: bool = False
 348    redundant_scan_frequency_min: float = 0.1
 349    redundant_feature_retain_n: int = 3
 350    remove_mass_features_by_peak_metrics: bool = False
 351    # note that this is a dictionary of dictionaries and set in __post_init__ instead of here
 352    mass_feature_attribute_filter_dict: Dict = dataclasses.field(default_factory=dict)
 353
 354    # Parameters used for 2D peak picking
 355    peak_picking_method: str = "persistent homology"
 356    implemented_peak_picking_methods: tuple = (
 357        "persistent homology",
 358        "centroided_persistent_homology",
 359    )
 360
 361    # Parameters used in persistent homology calculations
 362    ph_smooth_it = 1
 363    ph_smooth_radius_mz = 0
 364    ph_smooth_radius_scan = 1
 365    ph_inten_min_rel = 0.001
 366    ph_persis_min_rel = 0.001
 367
 368    # Parameters used to cluster mass features
 369    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
 370    mass_feature_cluster_rt_tolerance: float = 0.3
 371
 372    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
 373    ms1_scans_to_average: int = 1
 374    ms1_deconvolution_corr_min: float = 0.8
 375    ms2_dda_rt_tolerance: float = 0.15
 376    ms2_dda_mz_tolerance: float = 0.05
 377
 378    # Parameters used for flash entropy searching and database preparation
 379    ms2_min_fe_score: float = 0.2
 380    search_as_lipids: bool = False
 381    include_fragment_types: bool = False
 382
 383    # Parameters used for saving the data
 384    export_profile_spectra: bool = False
 385    export_eics: bool = True
 386    export_unprocessed_ms1: bool = False
 387    export_only_relevant_mass_spectra: bool = False
 388
 389    # Parameters used for verbose processing
 390    verbose_processing: bool = True
 391
 392    def __post_init__(self):
 393        # Set default values for mass_feature_attribute_filter_dict if empty
 394        if not self.mass_feature_attribute_filter_dict:
 395            self.mass_feature_attribute_filter_dict = {
 396                "noise_score_max": {"value": 0.8, "operator": ">="},
 397                "noise_score_min": {"value": 0.5, "operator": ">="},
 398            }
 399        
 400        # enforce datatype
 401        for field in dataclasses.fields(self):
 402            value = getattr(self, field.name)
 403            if not isinstance(value, field.type):
 404                value = field.type(value)
 405                setattr(self, field.name, value)
 406
 407
 408@dataclasses.dataclass
 409class MassSpectrumSetting:
 410    """Mass spectrum processing settings class
 411
 412    Attributes
 413    ----------
 414    noise_threshold_method : str, optional
 415        Method for detecting noise threshold. Default is 'log'.
 416    noise_threshold_methods_implemented : tuple, optional
 417        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
 418    noise_threshold_min_std : int, optional
 419        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
 420    noise_threshold_min_s2n : float, optional
 421        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
 422    noise_threshold_min_relative_abundance : float, optional
 423        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
 424    noise_threshold_absolute_abundance : float, optional
 425        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
 426    noise_threshold_log_nsigma : int, optional
 427        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
 428    noise_threshold_log_nsigma_corr_factor : float, optional
 429        Correction factor for log noise threshold method. Default is 0.463.
 430    noise_threshold_log_nsigma_bins : int, optional
 431        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
 432    noise_min_mz : float, optional
 433        Minimum m/z to use for noise thresholding. Default is 50.0.
 434    noise_max_mz : float, optional
 435        Maximum m/z to use for noise thresholding. Default is 1200.0.
 436    min_picking_mz : float, optional
 437        Minimum m/z to use for peak picking. Default is 50.0.
 438    max_picking_mz : float, optional
 439        Maximum m/z to use for peak picking. Default is 1200.0.
 440    picking_point_extrapolate : int, optional
 441        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
 442        Recommend 3 for reduced profile data or if peak picking faults
 443    calib_minimize_method : str, optional
 444        Minimization method to use for calibration. Default is 'Powell'.
 445    calib_pol_order : int, optional
 446        Polynomial order to use for calibration. Default is 2.
 447    max_calib_ppm_error : float, optional
 448        Maximum ppm error to use for calibration. Default is 1.0.
 449    min_calib_ppm_error : float, optional
 450        Minimum ppm error to use for calibration. Default is -1.0.
 451    calib_sn_threshold : float, optional
 452        Signal to noise threshold to use for calibration. Default is 2.0.
 453    calibration_ref_match_method: string, optional
 454        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
 455    calibration_ref_match_tolerance: float, optional
 456        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
 457    do_calibration : bool, optional
 458        If True, perform calibration. Default is True.
 459    verbose_processing : bool, optional
 460        If True, print verbose processing information. Default is True.
 461    """
 462
 463    noise_threshold_method: str = "log"
 464
 465    noise_threshold_methods_implemented: tuple = (
 466        "minima",
 467        "signal_noise",
 468        "relative_abundance",
 469        "absolute_abundance",
 470        "log",
 471    )
 472
 473    noise_threshold_min_std: int = 6  # when using 'minima' method
 474
 475    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
 476
 477    noise_threshold_min_relative_abundance: float = (
 478        6  # from 0-100, when using 'relative_abundance' method
 479    )
 480
 481    noise_threshold_absolute_abundance: float = (
 482        1_000_000  # when using 'absolute_abundance' method
 483    )
 484
 485    noise_threshold_log_nsigma: int = 6  # when using 'log' method
 486    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
 487    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
 488
 489    noise_min_mz: float = 50.0
 490    noise_max_mz: float = 1200.0
 491
 492    min_picking_mz: float = 50.0
 493    max_picking_mz: float = 1200.0
 494
 495    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
 496    # This will fix peak picking at spectrum limit issues
 497    #  0 to keep normal behaviour, typical value 3 to fix
 498    picking_point_extrapolate: int = 3
 499
 500    calib_minimize_method: str = "Powell"
 501    calib_pol_order: int = 2
 502    max_calib_ppm_error: float = 1.0
 503    min_calib_ppm_error: float = -1.0
 504    calib_sn_threshold: float = 2.0
 505    calibration_ref_match_method: str = "legacy"
 506    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
 507    calibration_ref_match_tolerance: float = 0.003
 508    calibration_ref_match_std_raw_error_limit: float = 1.5
 509    # calib_ref_mzs: list = [0]
 510
 511    do_calibration: bool = True
 512    verbose_processing: bool = True
 513
 514    def __post_init__(self):
 515        # enforce datatype
 516        for field in dataclasses.fields(self):
 517            value = getattr(self, field.name)
 518            if not isinstance(value, field.type):
 519                value = field.type(value)
 520                setattr(self, field.name, value)
 521
 522
 523@dataclasses.dataclass
 524class MassSpecPeakSetting:
 525    """Mass spectrum peak processing settings class
 526
 527    Attributes
 528    ----------
 529    kendrick_base : Dict, optional
 530        Dictionary specifying the elements and their counts in the Kendrick base.
 531        Defaults to {'C': 1, 'H': 2}.
 532    kendrick_rounding_method : str, optional
 533        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
 534        Defaults to 'floor'.
 535    implemented_kendrick_rounding_methods : tuple
 536        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
 537        Defaults to ('floor', 'ceil', 'round').
 538    peak_derivative_threshold : float, optional
 539        Threshold for defining derivative crossing. Should be a value between 0 and 1.
 540        Defaults to 0.0.
 541    peak_min_prominence_percent : float, optional
 542        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
 543        Defaults to 0.1.
 544    min_peak_datapoints : float, optional
 545        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
 546        Defaults to 5.
 547    peak_max_prominence_percent : float, optional
 548        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
 549        Defaults to 0.1.
 550    peak_height_max_percent : float, optional
 551        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
 552        Defaults to 10.
 553    legacy_resolving_power : bool, optional
 554        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
 555        Defaults to True.
 556    legacy_centroid_polyfit : bool, optional
 557        Use legacy (numpy polyfit) to fit centroid
 558        Default false.
 559    """
 560
 561    kendrick_base: Dict = dataclasses.field(default_factory=dict)
 562
 563    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
 564
 565    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
 566
 567    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
 568
 569    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
 570
 571    min_peak_datapoints: float = 5  # 0-inf used for peak detection
 572
 573    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
 574
 575    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
 576
 577    legacy_resolving_power: bool = (
 578        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
 579    )
 580
 581    legacy_centroid_polyfit: bool = False
 582
 583    def __post_init__(self):
 584        # default to CH2
 585        if not self.kendrick_base:
 586            self.kendrick_base = {"C": 1, "H": 2}
 587        # enforce datatype
 588        for field in dataclasses.fields(self):
 589            value = getattr(self, field.name)
 590            if not isinstance(value, field.type):
 591                value = field.type(value)
 592                setattr(self, field.name, value)
 593
 594
 595@dataclasses.dataclass
 596class GasChromatographSetting:
 597    """Gas chromatograph processing settings class
 598
 599    Attributes
 600    ----------
 601    use_deconvolution : bool, optional
 602        If True, use deconvolution. Default is False.
 603    implemented_smooth_method : tuple, optional
 604        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
 605    smooth_window : int, optional
 606        Window size for smoothing the ion chromatogram. Default is 5.
 607    smooth_method : str, optional
 608        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
 609    savgol_pol_order : int, optional
 610        Polynomial order for Savitzky-Golay smoothing. Default is 2.
 611    peak_derivative_threshold : float, optional
 612        Threshold for defining derivative crossing. Should be a value between 0 and 1.
 613        Defaults to 0.0005.
 614    peak_height_max_percent : float, optional
 615        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
 616        Defaults to 10.
 617    peak_max_prominence_percent : float, optional
 618        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
 619        Defaults to 1.
 620    min_peak_datapoints : float, optional
 621        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
 622        Defaults to 5.
 623    max_peak_width : float, optional
 624        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
 625        Defaults to 0.1.
 626    noise_threshold_method : str, optional
 627        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
 628    noise_threshold_methods_implemented : tuple, optional
 629        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
 630    std_noise_threshold : int, optional
 631        Default is 3.
 632    peak_height_min_percent : float, optional
 633        0-100 % used for peak detection. Default is 0.1.
 634    peak_min_prominence_percent : float, optional
 635        0-100 % used for peak detection. Default is 0.1.
 636    eic_signal_threshold : float, optional
 637        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
 638    max_rt_distance : float, optional
 639        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
 640    verbose_processing : bool, optional
 641        If True, print verbose processing information. Default is True.
 642    """
 643
 644    use_deconvolution: bool = False
 645
 646    implemented_smooth_method: tuple = (
 647        "savgol",
 648        "hanning",
 649        "blackman",
 650        "bartlett",
 651        "flat",
 652        "boxcar",
 653    )
 654
 655    smooth_window: int = 5
 656
 657    smooth_method: str = "savgol"
 658
 659    savgol_pol_order: int = 2
 660
 661    peak_derivative_threshold: float = 0.0005
 662
 663    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
 664
 665    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
 666
 667    min_peak_datapoints: float = 5
 668
 669    max_peak_width: float = 0.1
 670
 671    noise_threshold_method: str = "manual_relative_abundance"
 672
 673    noise_threshold_methods_implemented: tuple = (
 674        "auto_relative_abundance",
 675        "manual_relative_abundance",
 676        "second_derivative",
 677    )
 678
 679    std_noise_threshold: int = 3
 680
 681    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
 682
 683    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
 684
 685    eic_signal_threshold: float = (
 686        0.01  # 0-100 % used for extracted ion chromatogram peak detection
 687    )
 688
 689    max_rt_distance: float = (
 690        0.025  # minutes, max distance allowance hierarchical clutter
 691    )
 692
 693    verbose_processing: bool = True
 694
 695    def __post_init__(self):
 696        # enforce datatype
 697        for field in dataclasses.fields(self):
 698            value = getattr(self, field.name)
 699            if not isinstance(value, field.type):
 700                value = field.type(value)
 701                setattr(self, field.name, value)
 702
 703
 704@dataclasses.dataclass
 705class CompoundSearchSettings:
 706    """Settings for compound search
 707
 708    Attributes
 709    ----------
 710    url_database : str, optional
 711        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
 712    ri_search_range : float, optional
 713        Retention index search range. Default is 35.
 714    rt_search_range : float, optional
 715        Retention time search range, in minutes. Default is 1.0.
 716    correlation_threshold : float, optional
 717        Threshold for correlation for spectral similarity. Default is 0.5.
 718    score_threshold : float, optional
 719        Threshold for compsite score. Default is 0.0.
 720    ri_spacing : float, optional
 721        Retention index spacing. Default is 200.
 722    ri_std : float, optional
 723        Retention index standard deviation. Default is 3.
 724    ri_calibration_compound_names : list, optional
 725        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
 726
 727    """
 728
 729    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
 730
 731    ri_search_range: float = 35
 732
 733    rt_search_range: float = 1.0  # used for retention index calibration
 734
 735    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
 736
 737    score_threshold: float = 0.0
 738
 739    ri_spacing: float = 200
 740
 741    ri_std: float = 3  # in standard deviation
 742
 743    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
 744
 745    # calculates and export all spectral similarity methods
 746    exploratory_mode: bool = False
 747
 748    score_methods: tuple = ("highest_sim_score", "highest_ss")
 749
 750    output_score_method: str = "All"
 751
 752    def __post_init__(self):
 753        # enforce datatype
 754        self.url_database = os.getenv(
 755            "SPECTRAL_GCMS_DATABASE_URL",
 756            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
 757        )
 758
 759        for field in dataclasses.fields(self):
 760            value = getattr(self, field.name)
 761            if not isinstance(value, field.type):
 762                value = field.type(value)
 763                setattr(self, field.name, value)
 764
 765        self.ri_calibration_compound_names = [
 766            "Methyl Caprylate",
 767            "Methyl Caprate",
 768            "Methyl Pelargonate",
 769            "Methyl Laurate",
 770            "Methyl Myristate",
 771            "Methyl Palmitate",
 772            "Methyl Stearate",
 773            "Methyl Eicosanoate",
 774            "Methyl Docosanoate",
 775            "Methyl Linocerate",
 776            "Methyl Hexacosanoate",
 777            "Methyl Octacosanoate",
 778            "Methyl Triacontanoate",
 779        ]
 780
 781
 782class MolecularLookupDictSettings:
 783    """Settings for molecular searching
 784
 785    These are used to generate the database entries, do not change.
 786
 787    Attributes
 788    ----------
 789    usedAtoms : dict, optional
 790        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
 791    min_mz : float, optional
 792        Minimum m/z to use for searching. Default is 50.0.
 793    max_mz : float, optional
 794        Maximum m/z to use for searching. Default is 1200.0.
 795    min_dbe : float, optional
 796        Minimum double bond equivalent to use for searching. Default is 0.
 797    max_dbe : float, optional
 798        Maximum double bond equivalent to use for searching. Default is 50.
 799    use_pah_line_rule : bool, optional
 800        If True, use the PAH line rule. Default is False.
 801    isRadical : bool, optional
 802        If True, search for radical ions. Default is True.
 803    isProtonated : bool, optional
 804        If True, search for protonated ions. Default is True.
 805    url_database : str, optional
 806        URL for the database. Default is None.
 807    db_jobs : int, optional
 808        Number of jobs to use for database queries. Default is 1.
 809    used_atom_valences : dict, optional
 810        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
 811
 812    """
 813
 814    ### DO NOT CHANGE IT! These are used to generate the database entries
 815
 816    ### DO change when creating a new application database
 817
 818    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
 819
 820    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
 821    ### if you don't want to include one of those atoms set the max and min at 0
 822    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
 823    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
 824    ### NOTE : Adducts atoms have zero covalence
 825    ### NOTE : Not using static variable because this class is distributed using multiprocessing
 826    def __init__(self):
 827        self.usedAtoms = {
 828            "C": (1, 90),
 829            "H": (4, 200),
 830            "O": (0, 12),
 831            "N": (0, 0),
 832            "S": (0, 0),
 833            "P": (0, 0),
 834            "Cl": (0, 0),
 835        }
 836
 837        self.min_mz = 50
 838
 839        self.max_mz = 1200
 840
 841        self.min_dbe = 0
 842
 843        self.max_dbe = 50
 844
 845        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
 846        self.use_pah_line_rule = False
 847
 848        self.isRadical = True
 849
 850        self.isProtonated = True
 851
 852        self.url_database = None
 853
 854        self.db_jobs = 1
 855
 856        self.used_atom_valences = {
 857            "C": 4,
 858            "13C": 4,
 859            "H": 1,
 860            "O": 2,
 861            "18O": 2,
 862            "N": 3,
 863            "S": 2,
 864            "34S": 2,
 865            "P": 3,
 866            "Cl": 1,
 867            "37Cl": 1,
 868            "Br": 1,
 869            "Na": 1,
 870            "F": 1,
 871            "K": 0,
 872        }
 873
 874
 875@dataclasses.dataclass
 876class MolecularFormulaSearchSettings:
 877    """Settings for molecular searching
 878
 879    Attributes
 880    ----------
 881    use_isotopologue_filter : bool, optional
 882        If True, use isotopologue filter. Default is False.
 883    isotopologue_filter_threshold : float, optional
 884        Threshold for isotopologue filter. Default is 33.
 885    isotopologue_filter_atoms : tuple, optional
 886        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
 887    use_runtime_kendrick_filter : bool, optional
 888        If True, use runtime Kendrick filter. Default is False.
 889    use_min_peaks_filter : bool, optional
 890        If True, use minimum peaks filter. Default is True.
 891    min_peaks_per_class : int, optional
 892        Minimum number of peaks per class. Default is 15.
 893    url_database : str, optional
 894        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
 895    db_jobs : int, optional
 896        Number of jobs to use for database queries. Default is 3.
 897    db_chunk_size : int, optional
 898        Chunk size to use for database queries. Default is 300.
 899    ion_charge : int, optional
 900        Ion charge. Default is -1.
 901    min_hc_filter : float, optional
 902        Minimum hydrogen to carbon ratio. Default is 0.3.
 903    max_hc_filter : float, optional
 904        Maximum hydrogen to carbon ratio. Default is 3.
 905    min_oc_filter : float, optional
 906        Minimum oxygen to carbon ratio. Default is 0.0.
 907    max_oc_filter : float, optional
 908        Maximum oxygen to carbon ratio. Default is 1.2.
 909    min_op_filter : float, optional
 910        Minimum oxygen to phosphorous ratio. Default is 2.
 911    use_pah_line_rule : bool, optional
 912        If True, use the PAH line rule. Default is False.
 913    min_dbe : float, optional
 914        Minimum double bond equivalent to use for searching. Default is 0.
 915    max_dbe : float, optional
 916        Maximum double bond equivalent to use for searching. Default is 40.
 917    mz_error_score_weight : float, optional
 918        Weight for m/z error score to contribute to composite score. Default is 0.6.
 919    isotopologue_score_weight : float, optional
 920        Weight for isotopologue score to contribute to composite score. Default is 0.4.
 921    adduct_atoms_neg : tuple, optional
 922        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
 923    adduct_atoms_pos : tuple, optional
 924        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
 925    score_methods : tuple, optional
 926        Tuple of score method that can be implemented.
 927        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
 928    score_method : str, optional
 929        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
 930    output_min_score : float, optional
 931        Minimum score for output. Default is 0.1.
 932    output_score_method : str, optional
 933        Score method to use for output. Default is 'All Candidates'.
 934    isRadical : bool, optional
 935        If True, search for radical ions. Default is False.
 936    isProtonated : bool, optional
 937        If True, search for protonated ions. Default is True.
 938    isAdduct : bool, optional
 939        If True, search for adduct ions. Default is False.
 940    usedAtoms : dict, optional
 941        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
 942    ion_types_excluded : list, optional
 943        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
 944    ionization_type : str, optional
 945        Ionization type. Default is 'ESI'.
 946    min_ppm_error : float, optional
 947        Minimum ppm error. Default is -10.0.
 948    max_ppm_error : float, optional
 949        Maximum ppm error. Default is 10.0.
 950    min_abun_error : float, optional
 951        Minimum abundance error for isotolopologue search. Default is -100.0.
 952    max_abun_error : float, optional
 953        Maximum abundance error for isotolopologue search. Default is 100.0.
 954    mz_error_range : float, optional
 955        m/z error range. Default is 1.5.
 956    error_method : str, optional
 957        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
 958    mz_error_average : float, optional
 959        m/z error average. Default is 0.0.
 960    used_atom_valences : dict, optional
 961        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
 962    verbose_processing: bool, optional
 963        If True, print verbose processing information. Default is True.
 964    """
 965
 966    verbose_processing: bool = True
 967
 968    use_isotopologue_filter: bool = False
 969
 970    isotopologue_filter_threshold: float = 33
 971
 972    isotopologue_filter_atoms: tuple = ("Cl", "Br")
 973
 974    use_runtime_kendrick_filter: bool = False
 975
 976    use_min_peaks_filter: bool = True
 977
 978    min_peaks_per_class: int = 15
 979
 980    url_database: str = (
 981        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
 982    )
 983
 984    db_jobs: int = 3
 985
 986    db_chunk_size: int = 300
 987
 988    # query setting========
 989    ion_charge: int = -1
 990
 991    min_hc_filter: float = 0.3
 992
 993    max_hc_filter: float = 3
 994
 995    min_oc_filter: float = 0.0
 996
 997    max_oc_filter: float = 1.2
 998
 999    min_op_filter: float = 2
1000
1001    use_pah_line_rule: bool = False
1002
1003    min_dbe: float = 0
1004
1005    max_dbe: float = 40
1006
1007    mz_error_score_weight: float = 0.6
1008
1009    isotopologue_score_weight: float = 0.4
1010
1011    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
1012    adduct_atoms_neg: tuple = ("Cl", "Br")
1013
1014    adduct_atoms_pos: tuple = ("Na", "K")
1015
1016    score_methods: tuple = (
1017        "S_P_lowest_error",
1018        "N_S_P_lowest_error",
1019        "lowest_error",
1020        "prob_score",
1021        "air_filter_error",
1022        "water_filter_error",
1023        "earth_filter_error",
1024    )
1025
1026    score_method: str = "prob_score"
1027
1028    output_min_score: float = 0.1
1029
1030    output_score_method: str = "All Candidates"
1031
1032    # depending on the polarity mode it looks for [M].+ , [M].-
1033    # query and automatically compile add entry if it doesn't exist
1034
1035    isRadical: bool = False
1036
1037    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
1038    # query and automatically compile and push options if it doesn't exist
1039    isProtonated: bool = True
1040
1041    isAdduct: bool = False
1042
1043    usedAtoms: dict = dataclasses.field(default_factory=dict)
1044    ion_types_excluded: list = dataclasses.field(default_factory=list)
1045
1046    # search setting ========
1047
1048    ionization_type: str = "ESI"
1049
1050    # empirically set / needs optimization
1051    min_ppm_error: float = -10.0  # ppm
1052
1053    # empirically set / needs optimization
1054    max_ppm_error: float = 10.0  # ppm
1055
1056    # empirically set / needs optimization set for isotopologue search
1057    min_abun_error: float = -100.0  # percentage
1058
1059    # empirically set / needs optimization set for isotopologue search
1060    max_abun_error: float = 100.0  # percentage
1061
1062    # empirically set / needs optimization
1063    mz_error_range: float = 1.5
1064
1065    # 'distance', 'lowest', 'symmetrical','average' 'None'
1066    error_method: str = "None"
1067
1068    mz_error_average: float = 0.0
1069
1070    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
1071    used_atom_valences: dict = dataclasses.field(default_factory=dict)
1072
1073    def __post_init__(self):
1074        if not self.url_database or self.url_database == "":
1075            self.url_database = os.getenv(
1076                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
1077            )
1078        # enforce datatype
1079        for field in dataclasses.fields(self):
1080            value = getattr(self, field.name)
1081            if not isinstance(value, field.type):
1082                value = field.type(value)
1083                setattr(self, field.name, value)
1084
1085        # enforce C and H if either do not exists
1086        if "C" not in self.usedAtoms.keys():
1087            self.usedAtoms["C"] = (1, 100)
1088        if "H" not in self.usedAtoms.keys():
1089            self.usedAtoms["H"] = (1, 200)
1090
1091        # add cummon values
1092        current_used_atoms = self.used_atom_valences.keys()
1093
1094        for atom in Atoms.atoms_covalence.keys():
1095            if atom not in current_used_atoms:
1096                covalence = Atoms.atoms_covalence.get(atom)
1097
1098                if isinstance(covalence, int):
1099                    self.used_atom_valences[atom] = covalence
1100
1101                else:
1102                    # will get the first number of all possible covalances, which should be the most commum
1103                    self.used_atom_valences[atom] = covalence[0]
@dataclasses.dataclass
class TransientSetting:
12@dataclasses.dataclass
13class TransientSetting:
14    """Transient processing settings class
15
16    Attributes
17    ----------
18    implemented_apodization_function : tuple
19        Available apodization functions
20    apodization_method : str
21        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
22        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
23    number_of_truncations : int
24        How many times to truncate the transient prior to Fourier transform
25    number_of_zero_fills : int
26        How many times to zero fille the transient prior to Fourier transform.
27    next_power_of_two : bool
28        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
29    kaiser_beta : float
30        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
31        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
32
33    """
34
35    implemented_apodization_function: tuple = (
36        "Hamming",
37        "Hanning",
38        "Blackman",
39        "Full-Sine",
40        "Half-Sine",
41        "Kaiser",
42        "Half-Kaiser",
43        "Rectangle",
44    )
45    apodization_method: str = "Hanning"
46    number_of_truncations: int = 0
47    number_of_zero_fills: int = 1
48    next_power_of_two: bool = False
49    kaiser_beta: float = 8.6
50
51    def __post_init__(self):
52        # enforce datatype
53        for field in dataclasses.fields(self):
54            value = getattr(self, field.name)
55            if not isinstance(value, field.type):
56                value = field.type(value)
57                setattr(self, field.name, value)

Transient processing settings class

Attributes
  • implemented_apodization_function (tuple): Available apodization functions
  • apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
  • number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
  • number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
  • next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
  • kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
TransientSetting( implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle'), apodization_method: str = 'Hanning', number_of_truncations: int = 0, number_of_zero_fills: int = 1, next_power_of_two: bool = False, kaiser_beta: float = 8.6)
implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle')
apodization_method: str = 'Hanning'
number_of_truncations: int = 0
number_of_zero_fills: int = 1
next_power_of_two: bool = False
kaiser_beta: float = 8.6
@dataclasses.dataclass
class DataInputSetting:
 60@dataclasses.dataclass
 61class DataInputSetting:
 62    """Data input settings class
 63
 64    Attributes
 65    ----------
 66    header_translate : dict
 67        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
 68    """
 69
 70    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
 71    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
 72    header_translate: dict = dataclasses.field(default_factory=dict)
 73
 74    def __post_init__(self):
 75        self.header_translate = {
 76            "m/z": Labels.mz,
 77            "mOz": Labels.mz,
 78            "Mass": Labels.mz,
 79            "Resolving Power": Labels.rp,
 80            "Res.": Labels.rp,
 81            "resolution": Labels.rp,
 82            "Intensity": Labels.abundance,
 83            "Peak Height": Labels.abundance,
 84            "I": Labels.abundance,
 85            "Abundance": Labels.abundance,
 86            "abs_abu": Labels.abundance,
 87            "Signal/Noise": Labels.s2n,
 88            "S/N": Labels.s2n,
 89            "sn": Labels.s2n,
 90        }
 91
 92    def add_mz_label(self, label):
 93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
 94        self.header_translate[label] = Labels.mz
 95
 96    def add_peak_height_label(self, label):
 97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
 98
 99        self.header_translate[label] = Labels.abundance
100
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n
104
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Data input settings class

Attributes
  • header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
DataInputSetting(header_translate: dict = <factory>)
header_translate: dict
def add_mz_label(self, label):
92    def add_mz_label(self, label):
93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
94        self.header_translate[label] = Labels.mz

Add a label to the header_translate dictionary to be translated to the corems label for mz.

def add_peak_height_label(self, label):
96    def add_peak_height_label(self, label):
97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
98
99        self.header_translate[label] = Labels.abundance

Add a label to the header_translate dictionary to be translated to the corems label for peak height.

def add_sn_label(self, label):
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n

Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.

def add_resolving_power_label(self, label):
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Add a label to the header_translate dictionary to be translated to the corems label for resolving power.

@dataclasses.dataclass
class LiquidChromatographSetting:
110@dataclasses.dataclass
111class LiquidChromatographSetting:
112    """Liquid chromatograph processing settings class
113
114    Attributes
115    ----------
116    scans : list or tuple, optional
117        List of select scan to average or a tuple containing the range to average.
118        Default is (-1, -1).
119    eic_tolerance_ppm : float, optional
120        Mass tolerance in ppm for extracted ion chromatogram peak detection.
121        Default is 5.
122    correct_eic_baseline : bool, optional
123        If True, correct the baseline of the extracted ion chromatogram.
124        Default is True.
125    smooth_window : int, optional
126        Window size for smoothing the ion chromatogram (extracted or total).
127        Default is 5.
128    smooth_method : str, optional
129        Smoothing method to use. See implemented_smooth_method for options.
130        Default is 'savgol'.
131    implemented_smooth_method : tuple, optional
132        Smoothing methods that can be implemented.
133        Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
134    savgol_pol_order : int, optional
135        Polynomial order for Savitzky-Golay smoothing.
136        Default is 2.
137    consecutive_scan_min : int, optional
138        Minimum number of consecutive scans to consider for peak detection.
139        Default is 0 for backwards compatibility, but a value of 3 is recommended.
140    peak_height_max_percent : float, optional
141        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods.
142        Default is 10.
143    peak_max_prominence_percent : float, optional
144        1-100 % used for baseline detection.
145        Default is 1.
146    peak_derivative_threshold : float, optional
147        Threshold for defining derivative crossing.
148        Default is 0.0005.
149    min_peak_datapoints : float, optional
150        minimum data point to define a chromatografic peak.
151        Default is 5.
152    noise_threshold_method : str, optional
153        Method for detecting noise threshold.
154        Default is 'manual_relative_abundance'.
155    noise_threshold_methods_implemented : tuple, optional
156        Methods for detected noise threshold that can be implemented.
157        Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
158    peak_height_min_percent : float, optional
159        0-100 % used for peak detection.
160        Default is 0.1.
161    eic_signal_threshold : float, optional
162        0-100 % used for extracted ion chromatogram peak detection.
163        Default is 0.01.
164    eic_buffer_time : float, optional
165        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes.
166        Default is 1.5.
167    dispersity_index_window : float, optional
168        Dispersity index window size, in minutes.
169        Default is 3.0.
170    noise_window_factor : float, optional
171        Factor to determine noise estimation window size relative to peak width.
172        Larger values use wider windows for noise estimation.
173        For example, a value of 2.0 uses a window size equal to twice the peak width
174        (depending on it's start and end scans) on each side.
175        Called within the LCMSMassFeature.calc_noise_score() method.
176        Default is 2.0.
177    remove_redundant_mass_features : bool, optional
178        If True, remove redundant mass features that are likely contaminants based on
179        their m/z values and scan frequency.
180        Especially useful for HILIC data where signals do not return to baseline between peaks
181        or for data with significant background noise.
182        Called within the LC_Calculations.find_mass_features() method.
183        Default is False.
184    redundant_scan_frequency_min : float, optional
185        Minimum fraction of scans that must contain the m/z to be considered a likely
186        noise/contaminant when using remove_redundant_mass_features.
187        Default is 0.1 (10% of scans).
188    redundant_feature_retain_n : int, optional
189        Number of features to retain in each group when using remove_redundant_mass_features.
190        Default is 3.
191    remove_mass_features_by_peak_metrics : bool, optional
192        If True, remove mass features based on their peak metrics such as S/N, Gaussian similarity,
193        dispersity index, and noise score.
194        Called within the LC_Calculations.add_peak_metrics() method.
195        Default is False.
196    mass_feature_attribute_filter_dict : dict, optional
197        Dictionary specifying filtering criteria for mass feature attributes.
198        Each key is an attribute name, and each value is a dict with 'value' and 'operator' keys.
199        
200        Structure: {attribute_name: {'value': threshold, 'operator': comparison}}
201        
202        Available operators:
203        - '>' or 'greater': Keep features where attribute > threshold
204        - '<' or 'less': Keep features where attribute < threshold  
205        - '>=' or 'greater_equal': Keep features where attribute >= threshold
206        - '<=' or 'less_equal': Keep features where attribute <= threshold
207        
208        Examples: 
209        {
210            'noise_score_max': {'value': 0.5, 'operator': '>'},  # Keep if noise_score_max > 0.5
211            'dispersity_index': {'value': 0.1, 'operator': '<'},  # Keep if dispersity_index < 0.1
212            'gaussian_similarity': {'value': 0.7, 'operator': '>='}  # Keep if gaussian_similarity >= 0.7
213        }
214        
215        Available attributes include: 'noise_score', 'noise_score_min', 'noise_score_max', 
216        'gaussian_similarity', 'tailing_factor', 'dispersity_index', 'half_height_width', 'intensity'.
217        Default is {"noise_score_max": {"value": 0.8, "operator": ">="},"noise_score_min": {"value": 0.5, "operator": ">="}},
218    peak_picking_method : str, optional
219        Peak picking method to use. See implemented_peak_picking_methods for options.
220        Default is 'persistent homology'.
221    implemented_peak_picking_methods : tuple, optional
222        Peak picking methods that can be implemented.
223        Default is ('persistent homology', 'centroided_persistent_homology').
224    ph_smooth_it : int, optional
225        Number of iterations to use for smoothing prior to finding mass features.
226        Used only for "persistent homology" peak picking method.
227        Called within the PHCalculations.find_mass_features_ph() method.
228        Default is 1.
229    ph_smooth_radius_mz : int, optional
230        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
231        Used only for "persistent homology" peak picking method.
232        Called within the PHCalculations.find_mass_features_ph() method.
233        Default is 0.
234    ph_smooth_radius_scan : int, optional
235        Radius in scan steps for smoothing prior to finding mass features.
236        Used only for "persistent homology" peak picking method.
237        Called within the PHCalculations.find_mass_features_ph() method.
238        Default is 1.
239    ph_inten_min_rel : float, optional
240        Relative minimum intensity to use for finding mass features for persistent homology.
241        Used only for "persistent homology" peak picking method.
242        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
243        Called within the PH_Calculations.find_mass_features() method.
244        Default is 0.001.
245    ph_persis_min_rel : float, optional
246        Relative minimum persistence for retaining mass features.
247        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
248        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
249        Should be greater to or equal to ph_inten_min_rel.
250        Called within the PH_Calculations.find_mass_features() method.
251        Default is 0.001.
252    mass_feature_cluster_mz_tolerance_rel : float, optional
253        Relative m/z tolerance to use for clustering mass features.
254        Used for both "persistent homology" and "centroided_persistent_homology"
255        peak picking methods.
256        Called with the PHCalculations.cluster_mass_features() and the
257        LCCalculations.deconvolute_ms1_mass_features() methods.
258        Default is 5e-6 (5 ppm).
259    mass_feature_cluster_rt_tolerance : float, optional
260        Retention time tolerance to use for clustering mass features, in minutes.
261        Used for both "persistent homology" and "centroided_persistent_homology"
262        peak picking methods.
263        Called with the PHCalculations.cluster_mass_features() and the
264        LCCalculations.deconvolute_ms1_mass_features() methods.
265        Default is 0.3.
266    ms1_scans_to_average : int, optional
267        Number of MS1 scans to average for mass-feature associated m/zs.
268        Called within the LCMSBase.add_associated_ms1() method.
269        Default is 1.
270    ms1_deconvolution_corr_min : float, optional
271        Minimum correlation to use for deconvoluting MS1 mass features.
272        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
273        Default is 0.8.
274    ms2_dda_rt_tolerance : float, optional
275        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes.
276        Called within the LCMSBase.add_associated_ms2_dda() method.
277        Default is 0.15.
278    ms2_dda_mz_tolerance : float, optional
279        Mass tolerance to use for associating MS2 spectra to mass features.
280        Called within the LCMSBase.add_associated_ms2_dda() method.
281        Default is 0.05.
282    ms2_min_fe_score : float, optional
283        Minimum flash entropy for retaining MS2 annotations.
284        Called within the LCMSSpectralSearch.fe_search() method.
285        Default is 0.2.
286    search_as_lipids : bool, optional
287        If True, prepare the database for lipid searching.
288        Called within the LCMSSpectralSearch.fe_prep_search_db() method.
289        Default is False.
290    include_fragment_types : bool, optional
291        If True, include fragment types in the database.
292        Called within the LCMSSpectralSearch.fe_search() and related methods.
293        Default is False.
294    export_profile_spectra : bool, optional
295        If True, export profile spectra data.
296        Default is False.
297    export_eics : bool, optional
298        If True, export extracted ion chromatograms.
299        Default is True.
300    export_unprocessed_ms1 : bool, optional
301        If True, export unprocessed MS1 data.
302        Default is False.
303    export_only_relevant_mass_spectra : bool, optional
304        If True, export only mass spectra associated with detected mass features:
305        specifically the apex MS1 scan for each mass feature and the best MS2 scan
306        for each mass feature (if available). If False, export all mass spectra.
307        This parameter reduces HDF5 file size by excluding non-feature spectra.
308        Default is False (backwards compatible - exports all spectra).
309    verbose_processing : bool, optional
310        If True, print verbose processing information.
311        Default is True.
312    """
313
314    scans: list | tuple = (-1, -1)
315
316    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
317    eic_tolerance_ppm: float = 5
318    correct_eic_baseline = True
319    smooth_window: int = 5
320    smooth_method: str = "savgol"
321    implemented_smooth_method: tuple = (
322        "savgol",
323        "hanning",
324        "blackman",
325        "bartlett",
326        "flat",
327        "boxcar",
328    )
329    savgol_pol_order: int = 2
330    consecutive_scan_min: int = 0
331    peak_height_max_percent: float = 10
332    peak_max_prominence_percent: float = 1
333    peak_derivative_threshold: float = 0.0005
334    min_peak_datapoints: float = 5
335    noise_threshold_method: str = "manual_relative_abundance"
336    noise_threshold_methods_implemented: tuple = (
337        "auto_relative_abundance",
338        "manual_relative_abundance",
339        "second_derivative",
340    )
341    peak_height_min_percent: float = 0.1
342    eic_signal_threshold: float = 0.01
343    eic_buffer_time = 1.5
344    dispersity_index_window: float = 3.0  # minutes
345    noise_window_factor: float = 2.0  # times the peak width for detemining SN for EIC
346
347    # Parameters used for filtering mass features after peak picking
348    remove_redundant_mass_features: bool = False
349    redundant_scan_frequency_min: float = 0.1
350    redundant_feature_retain_n: int = 3
351    remove_mass_features_by_peak_metrics: bool = False
352    # note that this is a dictionary of dictionaries and set in __post_init__ instead of here
353    mass_feature_attribute_filter_dict: Dict = dataclasses.field(default_factory=dict)
354
355    # Parameters used for 2D peak picking
356    peak_picking_method: str = "persistent homology"
357    implemented_peak_picking_methods: tuple = (
358        "persistent homology",
359        "centroided_persistent_homology",
360    )
361
362    # Parameters used in persistent homology calculations
363    ph_smooth_it = 1
364    ph_smooth_radius_mz = 0
365    ph_smooth_radius_scan = 1
366    ph_inten_min_rel = 0.001
367    ph_persis_min_rel = 0.001
368
369    # Parameters used to cluster mass features
370    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
371    mass_feature_cluster_rt_tolerance: float = 0.3
372
373    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
374    ms1_scans_to_average: int = 1
375    ms1_deconvolution_corr_min: float = 0.8
376    ms2_dda_rt_tolerance: float = 0.15
377    ms2_dda_mz_tolerance: float = 0.05
378
379    # Parameters used for flash entropy searching and database preparation
380    ms2_min_fe_score: float = 0.2
381    search_as_lipids: bool = False
382    include_fragment_types: bool = False
383
384    # Parameters used for saving the data
385    export_profile_spectra: bool = False
386    export_eics: bool = True
387    export_unprocessed_ms1: bool = False
388    export_only_relevant_mass_spectra: bool = False
389
390    # Parameters used for verbose processing
391    verbose_processing: bool = True
392
393    def __post_init__(self):
394        # Set default values for mass_feature_attribute_filter_dict if empty
395        if not self.mass_feature_attribute_filter_dict:
396            self.mass_feature_attribute_filter_dict = {
397                "noise_score_max": {"value": 0.8, "operator": ">="},
398                "noise_score_min": {"value": 0.5, "operator": ">="},
399            }
400        
401        # enforce datatype
402        for field in dataclasses.fields(self):
403            value = getattr(self, field.name)
404            if not isinstance(value, field.type):
405                value = field.type(value)
406                setattr(self, field.name, value)

Liquid chromatograph processing settings class

Attributes
  • scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (-1, -1).
  • eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
  • correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
  • smooth_method (str, optional): Smoothing method to use. See implemented_smooth_method for options. Default is 'savgol'.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • consecutive_scan_min (int, optional): Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended.
  • peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
  • peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
  • min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
  • dispersity_index_window (float, optional): Dispersity index window size, in minutes. Default is 3.0.
  • noise_window_factor (float, optional): Factor to determine noise estimation window size relative to peak width. Larger values use wider windows for noise estimation. For example, a value of 2.0 uses a window size equal to twice the peak width (depending on it's start and end scans) on each side. Called within the LCMSMassFeature.calc_noise_score() method. Default is 2.0.
  • remove_redundant_mass_features (bool, optional): If True, remove redundant mass features that are likely contaminants based on their m/z values and scan frequency. Especially useful for HILIC data where signals do not return to baseline between peaks or for data with significant background noise. Called within the LC_Calculations.find_mass_features() method. Default is False.
  • redundant_scan_frequency_min (float, optional): Minimum fraction of scans that must contain the m/z to be considered a likely noise/contaminant when using remove_redundant_mass_features. Default is 0.1 (10% of scans).
  • redundant_feature_retain_n (int, optional): Number of features to retain in each group when using remove_redundant_mass_features. Default is 3.
  • remove_mass_features_by_peak_metrics (bool, optional): If True, remove mass features based on their peak metrics such as S/N, Gaussian similarity, dispersity index, and noise score. Called within the LC_Calculations.add_peak_metrics() method. Default is False.
  • mass_feature_attribute_filter_dict (dict, optional): Dictionary specifying filtering criteria for mass feature attributes. Each key is an attribute name, and each value is a dict with 'value' and 'operator' keys.

    Structure: {attribute_name: {'value': threshold, 'operator': comparison}}

    Available operators:

    • '>' or 'greater': Keep features where attribute > threshold
    • '<' or 'less': Keep features where attribute < threshold
    • '>=' or 'greater_equal': Keep features where attribute >= threshold
    • '<=' or 'less_equal': Keep features where attribute <= threshold

    Examples: { 'noise_score_max': {'value': 0.5, 'operator': '>'}, # Keep if noise_score_max > 0.5 'dispersity_index': {'value': 0.1, 'operator': '<'}, # Keep if dispersity_index < 0.1 'gaussian_similarity': {'value': 0.7, 'operator': '>='} # Keep if gaussian_similarity >= 0.7 }

    Available attributes include: 'noise_score', 'noise_score_min', 'noise_score_max', 'gaussian_similarity', 'tailing_factor', 'dispersity_index', 'half_height_width', 'intensity'. Default is {"noise_score_max": {"value": 0.8, "operator": ">="},"noise_score_min": {"value": 0.5, "operator": ">="}},

  • peak_picking_method (str, optional): Peak picking method to use. See implemented_peak_picking_methods for options. Default is 'persistent homology'.
  • implemented_peak_picking_methods (tuple, optional): Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
  • ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 1.
  • ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
  • ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 1.
  • ph_inten_min_rel (float, optional): Relative minimum intensity to use for finding mass features for persistent homology. Used only for "persistent homology" peak picking method. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • ph_persis_min_rel (float, optional): Relative minimum persistence for retaining mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5e-6 (5 ppm).
  • mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.3.
  • ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
  • ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
  • ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
  • ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
  • ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
  • search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
  • include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
  • export_profile_spectra (bool, optional): If True, export profile spectra data. Default is False.
  • export_eics (bool, optional): If True, export extracted ion chromatograms. Default is True.
  • export_unprocessed_ms1 (bool, optional): If True, export unprocessed MS1 data. Default is False.
  • export_only_relevant_mass_spectra (bool, optional): If True, export only mass spectra associated with detected mass features: specifically the apex MS1 scan for each mass feature and the best MS2 scan for each mass feature (if available). If False, export all mass spectra. This parameter reduces HDF5 file size by excluding non-feature spectra. Default is False (backwards compatible - exports all spectra).
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
LiquidChromatographSetting( scans: list | tuple = (-1, -1), eic_tolerance_ppm: float = 5, smooth_window: int = 5, smooth_method: str = 'savgol', implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), savgol_pol_order: int = 2, consecutive_scan_min: int = 0, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, peak_derivative_threshold: float = 0.0005, min_peak_datapoints: float = 5, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), peak_height_min_percent: float = 0.1, eic_signal_threshold: float = 0.01, dispersity_index_window: float = 3.0, noise_window_factor: float = 2.0, remove_redundant_mass_features: bool = False, redundant_scan_frequency_min: float = 0.1, redundant_feature_retain_n: int = 3, remove_mass_features_by_peak_metrics: bool = False, mass_feature_attribute_filter_dict: Dict = <factory>, peak_picking_method: str = 'persistent homology', implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology'), mass_feature_cluster_mz_tolerance_rel: float = 5e-06, mass_feature_cluster_rt_tolerance: float = 0.3, ms1_scans_to_average: int = 1, ms1_deconvolution_corr_min: float = 0.8, ms2_dda_rt_tolerance: float = 0.15, ms2_dda_mz_tolerance: float = 0.05, ms2_min_fe_score: float = 0.2, search_as_lipids: bool = False, include_fragment_types: bool = False, export_profile_spectra: bool = False, export_eics: bool = True, export_unprocessed_ms1: bool = False, export_only_relevant_mass_spectra: bool = False, verbose_processing: bool = True)
scans: list | tuple = (-1, -1)
eic_tolerance_ppm: float = 5
correct_eic_baseline = True
smooth_window: int = 5
smooth_method: str = 'savgol'
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
savgol_pol_order: int = 2
consecutive_scan_min: int = 0
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
peak_derivative_threshold: float = 0.0005
min_peak_datapoints: float = 5
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
peak_height_min_percent: float = 0.1
eic_signal_threshold: float = 0.01
eic_buffer_time = 1.5
dispersity_index_window: float = 3.0
noise_window_factor: float = 2.0
remove_redundant_mass_features: bool = False
redundant_scan_frequency_min: float = 0.1
redundant_feature_retain_n: int = 3
remove_mass_features_by_peak_metrics: bool = False
mass_feature_attribute_filter_dict: Dict
peak_picking_method: str = 'persistent homology'
implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology')
ph_smooth_it = 1
ph_smooth_radius_mz = 0
ph_smooth_radius_scan = 1
ph_inten_min_rel = 0.001
ph_persis_min_rel = 0.001
mass_feature_cluster_mz_tolerance_rel: float = 5e-06
mass_feature_cluster_rt_tolerance: float = 0.3
ms1_scans_to_average: int = 1
ms1_deconvolution_corr_min: float = 0.8
ms2_dda_rt_tolerance: float = 0.15
ms2_dda_mz_tolerance: float = 0.05
ms2_min_fe_score: float = 0.2
search_as_lipids: bool = False
include_fragment_types: bool = False
export_profile_spectra: bool = False
export_eics: bool = True
export_unprocessed_ms1: bool = False
export_only_relevant_mass_spectra: bool = False
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpectrumSetting:
409@dataclasses.dataclass
410class MassSpectrumSetting:
411    """Mass spectrum processing settings class
412
413    Attributes
414    ----------
415    noise_threshold_method : str, optional
416        Method for detecting noise threshold. Default is 'log'.
417    noise_threshold_methods_implemented : tuple, optional
418        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
419    noise_threshold_min_std : int, optional
420        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
421    noise_threshold_min_s2n : float, optional
422        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
423    noise_threshold_min_relative_abundance : float, optional
424        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
425    noise_threshold_absolute_abundance : float, optional
426        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
427    noise_threshold_log_nsigma : int, optional
428        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
429    noise_threshold_log_nsigma_corr_factor : float, optional
430        Correction factor for log noise threshold method. Default is 0.463.
431    noise_threshold_log_nsigma_bins : int, optional
432        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
433    noise_min_mz : float, optional
434        Minimum m/z to use for noise thresholding. Default is 50.0.
435    noise_max_mz : float, optional
436        Maximum m/z to use for noise thresholding. Default is 1200.0.
437    min_picking_mz : float, optional
438        Minimum m/z to use for peak picking. Default is 50.0.
439    max_picking_mz : float, optional
440        Maximum m/z to use for peak picking. Default is 1200.0.
441    picking_point_extrapolate : int, optional
442        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
443        Recommend 3 for reduced profile data or if peak picking faults
444    calib_minimize_method : str, optional
445        Minimization method to use for calibration. Default is 'Powell'.
446    calib_pol_order : int, optional
447        Polynomial order to use for calibration. Default is 2.
448    max_calib_ppm_error : float, optional
449        Maximum ppm error to use for calibration. Default is 1.0.
450    min_calib_ppm_error : float, optional
451        Minimum ppm error to use for calibration. Default is -1.0.
452    calib_sn_threshold : float, optional
453        Signal to noise threshold to use for calibration. Default is 2.0.
454    calibration_ref_match_method: string, optional
455        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
456    calibration_ref_match_tolerance: float, optional
457        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
458    do_calibration : bool, optional
459        If True, perform calibration. Default is True.
460    verbose_processing : bool, optional
461        If True, print verbose processing information. Default is True.
462    """
463
464    noise_threshold_method: str = "log"
465
466    noise_threshold_methods_implemented: tuple = (
467        "minima",
468        "signal_noise",
469        "relative_abundance",
470        "absolute_abundance",
471        "log",
472    )
473
474    noise_threshold_min_std: int = 6  # when using 'minima' method
475
476    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
477
478    noise_threshold_min_relative_abundance: float = (
479        6  # from 0-100, when using 'relative_abundance' method
480    )
481
482    noise_threshold_absolute_abundance: float = (
483        1_000_000  # when using 'absolute_abundance' method
484    )
485
486    noise_threshold_log_nsigma: int = 6  # when using 'log' method
487    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
488    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
489
490    noise_min_mz: float = 50.0
491    noise_max_mz: float = 1200.0
492
493    min_picking_mz: float = 50.0
494    max_picking_mz: float = 1200.0
495
496    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
497    # This will fix peak picking at spectrum limit issues
498    #  0 to keep normal behaviour, typical value 3 to fix
499    picking_point_extrapolate: int = 3
500
501    calib_minimize_method: str = "Powell"
502    calib_pol_order: int = 2
503    max_calib_ppm_error: float = 1.0
504    min_calib_ppm_error: float = -1.0
505    calib_sn_threshold: float = 2.0
506    calibration_ref_match_method: str = "legacy"
507    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
508    calibration_ref_match_tolerance: float = 0.003
509    calibration_ref_match_std_raw_error_limit: float = 1.5
510    # calib_ref_mzs: list = [0]
511
512    do_calibration: bool = True
513    verbose_processing: bool = True
514
515    def __post_init__(self):
516        # enforce datatype
517        for field in dataclasses.fields(self):
518            value = getattr(self, field.name)
519            if not isinstance(value, field.type):
520                value = field.type(value)
521                setattr(self, field.name, value)

Mass spectrum processing settings class

Attributes
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
  • noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
  • noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
  • noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
  • noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
  • noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
  • noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
  • noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
  • noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
  • noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
  • min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
  • max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
  • picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
  • calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
  • calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
  • max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
  • min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
  • calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
  • calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
  • calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
  • do_calibration (bool, optional): If True, perform calibration. Default is True.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MassSpectrumSetting( noise_threshold_method: str = 'log', noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'), noise_threshold_min_std: int = 6, noise_threshold_min_s2n: float = 4, noise_threshold_min_relative_abundance: float = 6, noise_threshold_absolute_abundance: float = 1000000, noise_threshold_log_nsigma: int = 6, noise_threshold_log_nsigma_corr_factor: float = 0.463, noise_threshold_log_nsigma_bins: int = 500, noise_min_mz: float = 50.0, noise_max_mz: float = 1200.0, min_picking_mz: float = 50.0, max_picking_mz: float = 1200.0, picking_point_extrapolate: int = 3, calib_minimize_method: str = 'Powell', calib_pol_order: int = 2, max_calib_ppm_error: float = 1.0, min_calib_ppm_error: float = -1.0, calib_sn_threshold: float = 2.0, calibration_ref_match_method: str = 'legacy', calibration_ref_match_method_implemented: tuple = ('legacy', 'merged'), calibration_ref_match_tolerance: float = 0.003, calibration_ref_match_std_raw_error_limit: float = 1.5, do_calibration: bool = True, verbose_processing: bool = True)
noise_threshold_method: str = 'log'
noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')
noise_threshold_min_std: int = 6
noise_threshold_min_s2n: float = 4
noise_threshold_min_relative_abundance: float = 6
noise_threshold_absolute_abundance: float = 1000000
noise_threshold_log_nsigma: int = 6
noise_threshold_log_nsigma_corr_factor: float = 0.463
noise_threshold_log_nsigma_bins: int = 500
noise_min_mz: float = 50.0
noise_max_mz: float = 1200.0
min_picking_mz: float = 50.0
max_picking_mz: float = 1200.0
picking_point_extrapolate: int = 3
calib_minimize_method: str = 'Powell'
calib_pol_order: int = 2
max_calib_ppm_error: float = 1.0
min_calib_ppm_error: float = -1.0
calib_sn_threshold: float = 2.0
calibration_ref_match_method: str = 'legacy'
calibration_ref_match_method_implemented: tuple = ('legacy', 'merged')
calibration_ref_match_tolerance: float = 0.003
calibration_ref_match_std_raw_error_limit: float = 1.5
do_calibration: bool = True
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpecPeakSetting:
524@dataclasses.dataclass
525class MassSpecPeakSetting:
526    """Mass spectrum peak processing settings class
527
528    Attributes
529    ----------
530    kendrick_base : Dict, optional
531        Dictionary specifying the elements and their counts in the Kendrick base.
532        Defaults to {'C': 1, 'H': 2}.
533    kendrick_rounding_method : str, optional
534        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
535        Defaults to 'floor'.
536    implemented_kendrick_rounding_methods : tuple
537        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
538        Defaults to ('floor', 'ceil', 'round').
539    peak_derivative_threshold : float, optional
540        Threshold for defining derivative crossing. Should be a value between 0 and 1.
541        Defaults to 0.0.
542    peak_min_prominence_percent : float, optional
543        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
544        Defaults to 0.1.
545    min_peak_datapoints : float, optional
546        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
547        Defaults to 5.
548    peak_max_prominence_percent : float, optional
549        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
550        Defaults to 0.1.
551    peak_height_max_percent : float, optional
552        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
553        Defaults to 10.
554    legacy_resolving_power : bool, optional
555        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
556        Defaults to True.
557    legacy_centroid_polyfit : bool, optional
558        Use legacy (numpy polyfit) to fit centroid
559        Default false.
560    """
561
562    kendrick_base: Dict = dataclasses.field(default_factory=dict)
563
564    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
565
566    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
567
568    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
569
570    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
571
572    min_peak_datapoints: float = 5  # 0-inf used for peak detection
573
574    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
575
576    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
577
578    legacy_resolving_power: bool = (
579        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
580    )
581
582    legacy_centroid_polyfit: bool = False
583
584    def __post_init__(self):
585        # default to CH2
586        if not self.kendrick_base:
587            self.kendrick_base = {"C": 1, "H": 2}
588        # enforce datatype
589        for field in dataclasses.fields(self):
590            value = getattr(self, field.name)
591            if not isinstance(value, field.type):
592                value = field.type(value)
593                setattr(self, field.name, value)

Mass spectrum peak processing settings class

Attributes
  • kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
  • kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
  • implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
  • peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
  • legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
MassSpecPeakSetting( kendrick_base: Dict = <factory>, kendrick_rounding_method: str = 'floor', implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round'), peak_derivative_threshold: float = 0.0, peak_min_prominence_percent: float = 0.1, min_peak_datapoints: float = 5, peak_max_prominence_percent: float = 0.1, peak_height_max_percent: float = 10, legacy_resolving_power: bool = True, legacy_centroid_polyfit: bool = False)
kendrick_base: Dict
kendrick_rounding_method: str = 'floor'
implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round')
peak_derivative_threshold: float = 0.0
peak_min_prominence_percent: float = 0.1
min_peak_datapoints: float = 5
peak_max_prominence_percent: float = 0.1
peak_height_max_percent: float = 10
legacy_resolving_power: bool = True
legacy_centroid_polyfit: bool = False
@dataclasses.dataclass
class GasChromatographSetting:
596@dataclasses.dataclass
597class GasChromatographSetting:
598    """Gas chromatograph processing settings class
599
600    Attributes
601    ----------
602    use_deconvolution : bool, optional
603        If True, use deconvolution. Default is False.
604    implemented_smooth_method : tuple, optional
605        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
606    smooth_window : int, optional
607        Window size for smoothing the ion chromatogram. Default is 5.
608    smooth_method : str, optional
609        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
610    savgol_pol_order : int, optional
611        Polynomial order for Savitzky-Golay smoothing. Default is 2.
612    peak_derivative_threshold : float, optional
613        Threshold for defining derivative crossing. Should be a value between 0 and 1.
614        Defaults to 0.0005.
615    peak_height_max_percent : float, optional
616        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
617        Defaults to 10.
618    peak_max_prominence_percent : float, optional
619        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
620        Defaults to 1.
621    min_peak_datapoints : float, optional
622        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
623        Defaults to 5.
624    max_peak_width : float, optional
625        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
626        Defaults to 0.1.
627    noise_threshold_method : str, optional
628        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
629    noise_threshold_methods_implemented : tuple, optional
630        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
631    std_noise_threshold : int, optional
632        Default is 3.
633    peak_height_min_percent : float, optional
634        0-100 % used for peak detection. Default is 0.1.
635    peak_min_prominence_percent : float, optional
636        0-100 % used for peak detection. Default is 0.1.
637    eic_signal_threshold : float, optional
638        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
639    max_rt_distance : float, optional
640        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
641    verbose_processing : bool, optional
642        If True, print verbose processing information. Default is True.
643    """
644
645    use_deconvolution: bool = False
646
647    implemented_smooth_method: tuple = (
648        "savgol",
649        "hanning",
650        "blackman",
651        "bartlett",
652        "flat",
653        "boxcar",
654    )
655
656    smooth_window: int = 5
657
658    smooth_method: str = "savgol"
659
660    savgol_pol_order: int = 2
661
662    peak_derivative_threshold: float = 0.0005
663
664    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
665
666    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
667
668    min_peak_datapoints: float = 5
669
670    max_peak_width: float = 0.1
671
672    noise_threshold_method: str = "manual_relative_abundance"
673
674    noise_threshold_methods_implemented: tuple = (
675        "auto_relative_abundance",
676        "manual_relative_abundance",
677        "second_derivative",
678    )
679
680    std_noise_threshold: int = 3
681
682    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
683
684    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
685
686    eic_signal_threshold: float = (
687        0.01  # 0-100 % used for extracted ion chromatogram peak detection
688    )
689
690    max_rt_distance: float = (
691        0.025  # minutes, max distance allowance hierarchical clutter
692    )
693
694    verbose_processing: bool = True
695
696    def __post_init__(self):
697        # enforce datatype
698        for field in dataclasses.fields(self):
699            value = getattr(self, field.name)
700            if not isinstance(value, field.type):
701                value = field.type(value)
702                setattr(self, field.name, value)

Gas chromatograph processing settings class

Attributes
  • use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
  • smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • std_noise_threshold (int, optional): Default is 3.
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
GasChromatographSetting( use_deconvolution: bool = False, implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), smooth_window: int = 5, smooth_method: str = 'savgol', savgol_pol_order: int = 2, peak_derivative_threshold: float = 0.0005, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, min_peak_datapoints: float = 5, max_peak_width: float = 0.1, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), std_noise_threshold: int = 3, peak_height_min_percent: float = 0.1, peak_min_prominence_percent: float = 0.1, eic_signal_threshold: float = 0.01, max_rt_distance: float = 0.025, verbose_processing: bool = True)
use_deconvolution: bool = False
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
smooth_window: int = 5
smooth_method: str = 'savgol'
savgol_pol_order: int = 2
peak_derivative_threshold: float = 0.0005
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
min_peak_datapoints: float = 5
max_peak_width: float = 0.1
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
std_noise_threshold: int = 3
peak_height_min_percent: float = 0.1
peak_min_prominence_percent: float = 0.1
eic_signal_threshold: float = 0.01
max_rt_distance: float = 0.025
verbose_processing: bool = True
@dataclasses.dataclass
class CompoundSearchSettings:
705@dataclasses.dataclass
706class CompoundSearchSettings:
707    """Settings for compound search
708
709    Attributes
710    ----------
711    url_database : str, optional
712        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
713    ri_search_range : float, optional
714        Retention index search range. Default is 35.
715    rt_search_range : float, optional
716        Retention time search range, in minutes. Default is 1.0.
717    correlation_threshold : float, optional
718        Threshold for correlation for spectral similarity. Default is 0.5.
719    score_threshold : float, optional
720        Threshold for compsite score. Default is 0.0.
721    ri_spacing : float, optional
722        Retention index spacing. Default is 200.
723    ri_std : float, optional
724        Retention index standard deviation. Default is 3.
725    ri_calibration_compound_names : list, optional
726        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
727
728    """
729
730    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
731
732    ri_search_range: float = 35
733
734    rt_search_range: float = 1.0  # used for retention index calibration
735
736    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
737
738    score_threshold: float = 0.0
739
740    ri_spacing: float = 200
741
742    ri_std: float = 3  # in standard deviation
743
744    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
745
746    # calculates and export all spectral similarity methods
747    exploratory_mode: bool = False
748
749    score_methods: tuple = ("highest_sim_score", "highest_ss")
750
751    output_score_method: str = "All"
752
753    def __post_init__(self):
754        # enforce datatype
755        self.url_database = os.getenv(
756            "SPECTRAL_GCMS_DATABASE_URL",
757            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
758        )
759
760        for field in dataclasses.fields(self):
761            value = getattr(self, field.name)
762            if not isinstance(value, field.type):
763                value = field.type(value)
764                setattr(self, field.name, value)
765
766        self.ri_calibration_compound_names = [
767            "Methyl Caprylate",
768            "Methyl Caprate",
769            "Methyl Pelargonate",
770            "Methyl Laurate",
771            "Methyl Myristate",
772            "Methyl Palmitate",
773            "Methyl Stearate",
774            "Methyl Eicosanoate",
775            "Methyl Docosanoate",
776            "Methyl Linocerate",
777            "Methyl Hexacosanoate",
778            "Methyl Octacosanoate",
779            "Methyl Triacontanoate",
780        ]

Settings for compound search

Attributes
  • url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
  • ri_search_range (float, optional): Retention index search range. Default is 35.
  • rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
  • correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
  • score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
  • ri_spacing (float, optional): Retention index spacing. Default is 200.
  • ri_std (float, optional): Retention index standard deviation. Default is 3.
  • ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
CompoundSearchSettings( url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres', ri_search_range: float = 35, rt_search_range: float = 1.0, correlation_threshold: float = 0.5, score_threshold: float = 0.0, ri_spacing: float = 200, ri_std: float = 3, ri_calibration_compound_names: List = <factory>, exploratory_mode: bool = False, score_methods: tuple = ('highest_sim_score', 'highest_ss'), output_score_method: str = 'All')
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres'
ri_search_range: float = 35
rt_search_range: float = 1.0
correlation_threshold: float = 0.5
score_threshold: float = 0.0
ri_spacing: float = 200
ri_std: float = 3
ri_calibration_compound_names: List
exploratory_mode: bool = False
score_methods: tuple = ('highest_sim_score', 'highest_ss')
output_score_method: str = 'All'
class MolecularLookupDictSettings:
783class MolecularLookupDictSettings:
784    """Settings for molecular searching
785
786    These are used to generate the database entries, do not change.
787
788    Attributes
789    ----------
790    usedAtoms : dict, optional
791        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
792    min_mz : float, optional
793        Minimum m/z to use for searching. Default is 50.0.
794    max_mz : float, optional
795        Maximum m/z to use for searching. Default is 1200.0.
796    min_dbe : float, optional
797        Minimum double bond equivalent to use for searching. Default is 0.
798    max_dbe : float, optional
799        Maximum double bond equivalent to use for searching. Default is 50.
800    use_pah_line_rule : bool, optional
801        If True, use the PAH line rule. Default is False.
802    isRadical : bool, optional
803        If True, search for radical ions. Default is True.
804    isProtonated : bool, optional
805        If True, search for protonated ions. Default is True.
806    url_database : str, optional
807        URL for the database. Default is None.
808    db_jobs : int, optional
809        Number of jobs to use for database queries. Default is 1.
810    used_atom_valences : dict, optional
811        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
812
813    """
814
815    ### DO NOT CHANGE IT! These are used to generate the database entries
816
817    ### DO change when creating a new application database
818
819    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
820
821    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
822    ### if you don't want to include one of those atoms set the max and min at 0
823    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
824    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
825    ### NOTE : Adducts atoms have zero covalence
826    ### NOTE : Not using static variable because this class is distributed using multiprocessing
827    def __init__(self):
828        self.usedAtoms = {
829            "C": (1, 90),
830            "H": (4, 200),
831            "O": (0, 12),
832            "N": (0, 0),
833            "S": (0, 0),
834            "P": (0, 0),
835            "Cl": (0, 0),
836        }
837
838        self.min_mz = 50
839
840        self.max_mz = 1200
841
842        self.min_dbe = 0
843
844        self.max_dbe = 50
845
846        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
847        self.use_pah_line_rule = False
848
849        self.isRadical = True
850
851        self.isProtonated = True
852
853        self.url_database = None
854
855        self.db_jobs = 1
856
857        self.used_atom_valences = {
858            "C": 4,
859            "13C": 4,
860            "H": 1,
861            "O": 2,
862            "18O": 2,
863            "N": 3,
864            "S": 2,
865            "34S": 2,
866            "P": 3,
867            "Cl": 1,
868            "37Cl": 1,
869            "Br": 1,
870            "Na": 1,
871            "F": 1,
872            "K": 0,
873        }

Settings for molecular searching

These are used to generate the database entries, do not change.

Attributes
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
  • max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • isRadical (bool, optional): If True, search for radical ions. Default is True.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • url_database (str, optional): URL for the database. Default is None.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
usedAtoms
min_mz
max_mz
min_dbe
max_dbe
use_pah_line_rule
isRadical
isProtonated
url_database
db_jobs
used_atom_valences
@dataclasses.dataclass
class MolecularFormulaSearchSettings:
 876@dataclasses.dataclass
 877class MolecularFormulaSearchSettings:
 878    """Settings for molecular searching
 879
 880    Attributes
 881    ----------
 882    use_isotopologue_filter : bool, optional
 883        If True, use isotopologue filter. Default is False.
 884    isotopologue_filter_threshold : float, optional
 885        Threshold for isotopologue filter. Default is 33.
 886    isotopologue_filter_atoms : tuple, optional
 887        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
 888    use_runtime_kendrick_filter : bool, optional
 889        If True, use runtime Kendrick filter. Default is False.
 890    use_min_peaks_filter : bool, optional
 891        If True, use minimum peaks filter. Default is True.
 892    min_peaks_per_class : int, optional
 893        Minimum number of peaks per class. Default is 15.
 894    url_database : str, optional
 895        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
 896    db_jobs : int, optional
 897        Number of jobs to use for database queries. Default is 3.
 898    db_chunk_size : int, optional
 899        Chunk size to use for database queries. Default is 300.
 900    ion_charge : int, optional
 901        Ion charge. Default is -1.
 902    min_hc_filter : float, optional
 903        Minimum hydrogen to carbon ratio. Default is 0.3.
 904    max_hc_filter : float, optional
 905        Maximum hydrogen to carbon ratio. Default is 3.
 906    min_oc_filter : float, optional
 907        Minimum oxygen to carbon ratio. Default is 0.0.
 908    max_oc_filter : float, optional
 909        Maximum oxygen to carbon ratio. Default is 1.2.
 910    min_op_filter : float, optional
 911        Minimum oxygen to phosphorous ratio. Default is 2.
 912    use_pah_line_rule : bool, optional
 913        If True, use the PAH line rule. Default is False.
 914    min_dbe : float, optional
 915        Minimum double bond equivalent to use for searching. Default is 0.
 916    max_dbe : float, optional
 917        Maximum double bond equivalent to use for searching. Default is 40.
 918    mz_error_score_weight : float, optional
 919        Weight for m/z error score to contribute to composite score. Default is 0.6.
 920    isotopologue_score_weight : float, optional
 921        Weight for isotopologue score to contribute to composite score. Default is 0.4.
 922    adduct_atoms_neg : tuple, optional
 923        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
 924    adduct_atoms_pos : tuple, optional
 925        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
 926    score_methods : tuple, optional
 927        Tuple of score method that can be implemented.
 928        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
 929    score_method : str, optional
 930        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
 931    output_min_score : float, optional
 932        Minimum score for output. Default is 0.1.
 933    output_score_method : str, optional
 934        Score method to use for output. Default is 'All Candidates'.
 935    isRadical : bool, optional
 936        If True, search for radical ions. Default is False.
 937    isProtonated : bool, optional
 938        If True, search for protonated ions. Default is True.
 939    isAdduct : bool, optional
 940        If True, search for adduct ions. Default is False.
 941    usedAtoms : dict, optional
 942        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
 943    ion_types_excluded : list, optional
 944        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
 945    ionization_type : str, optional
 946        Ionization type. Default is 'ESI'.
 947    min_ppm_error : float, optional
 948        Minimum ppm error. Default is -10.0.
 949    max_ppm_error : float, optional
 950        Maximum ppm error. Default is 10.0.
 951    min_abun_error : float, optional
 952        Minimum abundance error for isotolopologue search. Default is -100.0.
 953    max_abun_error : float, optional
 954        Maximum abundance error for isotolopologue search. Default is 100.0.
 955    mz_error_range : float, optional
 956        m/z error range. Default is 1.5.
 957    error_method : str, optional
 958        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
 959    mz_error_average : float, optional
 960        m/z error average. Default is 0.0.
 961    used_atom_valences : dict, optional
 962        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
 963    verbose_processing: bool, optional
 964        If True, print verbose processing information. Default is True.
 965    """
 966
 967    verbose_processing: bool = True
 968
 969    use_isotopologue_filter: bool = False
 970
 971    isotopologue_filter_threshold: float = 33
 972
 973    isotopologue_filter_atoms: tuple = ("Cl", "Br")
 974
 975    use_runtime_kendrick_filter: bool = False
 976
 977    use_min_peaks_filter: bool = True
 978
 979    min_peaks_per_class: int = 15
 980
 981    url_database: str = (
 982        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
 983    )
 984
 985    db_jobs: int = 3
 986
 987    db_chunk_size: int = 300
 988
 989    # query setting========
 990    ion_charge: int = -1
 991
 992    min_hc_filter: float = 0.3
 993
 994    max_hc_filter: float = 3
 995
 996    min_oc_filter: float = 0.0
 997
 998    max_oc_filter: float = 1.2
 999
1000    min_op_filter: float = 2
1001
1002    use_pah_line_rule: bool = False
1003
1004    min_dbe: float = 0
1005
1006    max_dbe: float = 40
1007
1008    mz_error_score_weight: float = 0.6
1009
1010    isotopologue_score_weight: float = 0.4
1011
1012    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
1013    adduct_atoms_neg: tuple = ("Cl", "Br")
1014
1015    adduct_atoms_pos: tuple = ("Na", "K")
1016
1017    score_methods: tuple = (
1018        "S_P_lowest_error",
1019        "N_S_P_lowest_error",
1020        "lowest_error",
1021        "prob_score",
1022        "air_filter_error",
1023        "water_filter_error",
1024        "earth_filter_error",
1025    )
1026
1027    score_method: str = "prob_score"
1028
1029    output_min_score: float = 0.1
1030
1031    output_score_method: str = "All Candidates"
1032
1033    # depending on the polarity mode it looks for [M].+ , [M].-
1034    # query and automatically compile add entry if it doesn't exist
1035
1036    isRadical: bool = False
1037
1038    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
1039    # query and automatically compile and push options if it doesn't exist
1040    isProtonated: bool = True
1041
1042    isAdduct: bool = False
1043
1044    usedAtoms: dict = dataclasses.field(default_factory=dict)
1045    ion_types_excluded: list = dataclasses.field(default_factory=list)
1046
1047    # search setting ========
1048
1049    ionization_type: str = "ESI"
1050
1051    # empirically set / needs optimization
1052    min_ppm_error: float = -10.0  # ppm
1053
1054    # empirically set / needs optimization
1055    max_ppm_error: float = 10.0  # ppm
1056
1057    # empirically set / needs optimization set for isotopologue search
1058    min_abun_error: float = -100.0  # percentage
1059
1060    # empirically set / needs optimization set for isotopologue search
1061    max_abun_error: float = 100.0  # percentage
1062
1063    # empirically set / needs optimization
1064    mz_error_range: float = 1.5
1065
1066    # 'distance', 'lowest', 'symmetrical','average' 'None'
1067    error_method: str = "None"
1068
1069    mz_error_average: float = 0.0
1070
1071    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
1072    used_atom_valences: dict = dataclasses.field(default_factory=dict)
1073
1074    def __post_init__(self):
1075        if not self.url_database or self.url_database == "":
1076            self.url_database = os.getenv(
1077                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
1078            )
1079        # enforce datatype
1080        for field in dataclasses.fields(self):
1081            value = getattr(self, field.name)
1082            if not isinstance(value, field.type):
1083                value = field.type(value)
1084                setattr(self, field.name, value)
1085
1086        # enforce C and H if either do not exists
1087        if "C" not in self.usedAtoms.keys():
1088            self.usedAtoms["C"] = (1, 100)
1089        if "H" not in self.usedAtoms.keys():
1090            self.usedAtoms["H"] = (1, 200)
1091
1092        # add cummon values
1093        current_used_atoms = self.used_atom_valences.keys()
1094
1095        for atom in Atoms.atoms_covalence.keys():
1096            if atom not in current_used_atoms:
1097                covalence = Atoms.atoms_covalence.get(atom)
1098
1099                if isinstance(covalence, int):
1100                    self.used_atom_valences[atom] = covalence
1101
1102                else:
1103                    # will get the first number of all possible covalances, which should be the most commum
1104                    self.used_atom_valences[atom] = covalence[0]

Settings for molecular searching

Attributes
  • use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
  • isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
  • isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
  • use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
  • use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
  • min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
  • url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
  • db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
  • ion_charge (int, optional): Ion charge. Default is -1.
  • min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
  • max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
  • min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
  • max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
  • min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
  • mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
  • isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
  • adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
  • adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
  • score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
  • score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
  • output_min_score (float, optional): Minimum score for output. Default is 0.1.
  • output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
  • isRadical (bool, optional): If True, search for radical ions. Default is False.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • isAdduct (bool, optional): If True, search for adduct ions. Default is False.
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
  • ionization_type (str, optional): Ionization type. Default is 'ESI'.
  • min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
  • max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
  • min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
  • max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
  • mz_error_range (float, optional): m/z error range. Default is 1.5.
  • error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
  • mz_error_average (float, optional): m/z error average. Default is 0.0.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MolecularFormulaSearchSettings( verbose_processing: bool = True, use_isotopologue_filter: bool = False, isotopologue_filter_threshold: float = 33, isotopologue_filter_atoms: tuple = ('Cl', 'Br'), use_runtime_kendrick_filter: bool = False, use_min_peaks_filter: bool = True, min_peaks_per_class: int = 15, url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp', db_jobs: int = 3, db_chunk_size: int = 300, ion_charge: int = -1, min_hc_filter: float = 0.3, max_hc_filter: float = 3, min_oc_filter: float = 0.0, max_oc_filter: float = 1.2, min_op_filter: float = 2, use_pah_line_rule: bool = False, min_dbe: float = 0, max_dbe: float = 40, mz_error_score_weight: float = 0.6, isotopologue_score_weight: float = 0.4, adduct_atoms_neg: tuple = ('Cl', 'Br'), adduct_atoms_pos: tuple = ('Na', 'K'), score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'), score_method: str = 'prob_score', output_min_score: float = 0.1, output_score_method: str = 'All Candidates', isRadical: bool = False, isProtonated: bool = True, isAdduct: bool = False, usedAtoms: dict = <factory>, ion_types_excluded: list = <factory>, ionization_type: str = 'ESI', min_ppm_error: float = -10.0, max_ppm_error: float = 10.0, min_abun_error: float = -100.0, max_abun_error: float = 100.0, mz_error_range: float = 1.5, error_method: str = 'None', mz_error_average: float = 0.0, used_atom_valences: dict = <factory>)
verbose_processing: bool = True
use_isotopologue_filter: bool = False
isotopologue_filter_threshold: float = 33
isotopologue_filter_atoms: tuple = ('Cl', 'Br')
use_runtime_kendrick_filter: bool = False
use_min_peaks_filter: bool = True
min_peaks_per_class: int = 15
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'
db_jobs: int = 3
db_chunk_size: int = 300
ion_charge: int = -1
min_hc_filter: float = 0.3
max_hc_filter: float = 3
min_oc_filter: float = 0.0
max_oc_filter: float = 1.2
min_op_filter: float = 2
use_pah_line_rule: bool = False
min_dbe: float = 0
max_dbe: float = 40
mz_error_score_weight: float = 0.6
isotopologue_score_weight: float = 0.4
adduct_atoms_neg: tuple = ('Cl', 'Br')
adduct_atoms_pos: tuple = ('Na', 'K')
score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error')
score_method: str = 'prob_score'
output_min_score: float = 0.1
output_score_method: str = 'All Candidates'
isRadical: bool = False
isProtonated: bool = True
isAdduct: bool = False
usedAtoms: dict
ion_types_excluded: list
ionization_type: str = 'ESI'
min_ppm_error: float = -10.0
max_ppm_error: float = 10.0
min_abun_error: float = -100.0
max_abun_error: float = 100.0
mz_error_range: float = 1.5
error_method: str = 'None'
mz_error_average: float = 0.0
used_atom_valences: dict