corems.encapsulation.factory.processingSetting

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jul 02, 2019"
  3
  4import dataclasses
  5import os
  6from typing import List, Dict
  7
  8from corems.encapsulation.constant import Atoms, Labels
  9
 10
 11@dataclasses.dataclass
 12class TransientSetting:
 13    """Transient processing settings class
 14
 15    Attributes
 16    ----------
 17    implemented_apodization_function : tuple
 18        Available apodization functions
 19    apodization_method : str
 20        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
 21        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
 22    number_of_truncations : int
 23        How many times to truncate the transient prior to Fourier transform
 24    number_of_zero_fills : int
 25        How many times to zero fille the transient prior to Fourier transform.
 26    next_power_of_two : bool
 27        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
 28    kaiser_beta : float
 29        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
 30        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
 31
 32    """
 33
 34    implemented_apodization_function: tuple = (
 35        "Hamming",
 36        "Hanning",
 37        "Blackman",
 38        "Full-Sine",
 39        "Half-Sine",
 40        "Kaiser",
 41        "Half-Kaiser",
 42        "Rectangle",
 43    )
 44    apodization_method: str = "Hanning"
 45    number_of_truncations: int = 0
 46    number_of_zero_fills: int = 1
 47    next_power_of_two: bool = False
 48    kaiser_beta: float = 8.6
 49
 50    def __post_init__(self):
 51        # enforce datatype
 52        for field in dataclasses.fields(self):
 53            value = getattr(self, field.name)
 54            if not isinstance(value, field.type):
 55                value = field.type(value)
 56                setattr(self, field.name, value)
 57
 58
 59@dataclasses.dataclass
 60class DataInputSetting:
 61    """Data input settings class
 62
 63    Attributes
 64    ----------
 65    header_translate : dict
 66        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
 67    """
 68
 69    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
 70    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
 71    header_translate: dict = dataclasses.field(default_factory=dict)
 72
 73    def __post_init__(self):
 74        self.header_translate = {
 75            "m/z": Labels.mz,
 76            "mOz": Labels.mz,
 77            "Mass": Labels.mz,
 78            "Resolving Power": Labels.rp,
 79            "Res.": Labels.rp,
 80            "resolution": Labels.rp,
 81            "Intensity": Labels.abundance,
 82            "Peak Height": Labels.abundance,
 83            "I": Labels.abundance,
 84            "Abundance": Labels.abundance,
 85            "abs_abu": Labels.abundance,
 86            "Signal/Noise": Labels.s2n,
 87            "S/N": Labels.s2n,
 88            "sn": Labels.s2n,
 89        }
 90
 91    def add_mz_label(self, label):
 92        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
 93        self.header_translate[label] = Labels.mz
 94
 95    def add_peak_height_label(self, label):
 96        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
 97
 98        self.header_translate[label] = Labels.abundance
 99
100    def add_sn_label(self, label):
101        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
102        self.header_translate[label] = Labels.s2n
103
104    def add_resolving_power_label(self, label):
105        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
106        self.header_translate[label] = Labels.rp
107
108
109@dataclasses.dataclass
110class LiquidChromatographSetting:
111    """Liquid chromatograph processing settings class
112
113    Attributes
114    ----------
115    scans : list or tuple, optional
116        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
117    eic_tolerance_ppm : float, optional
118        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
119    correct_eic_baseline : bool, optional
120        If True, correct the baseline of the extracted ion chromatogram. Default is True.
121    smooth_window : int, optional
122        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
123    smooth_method : str, optional
124        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
125    implemented_smooth_method : tuple, optional
126        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
127    savgol_pol_order : int, optional
128        Polynomial order for Savitzky-Golay smoothing. Default is 2.
129    peak_height_max_percent : float, optional
130        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
131    peak_max_prominence_percent : float, optional
132        1-100 % used for baseline detection. Default is 1.
133    peak_derivative_threshold : float, optional
134        Threshold for defining derivative crossing. Default is 0.0005.
135    min_peak_datapoints : float, optional
136        minimum data point to define a chromatografic peak. Default is 5.
137    noise_threshold_method : str, optional
138        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
139    noise_threshold_methods_implemented : tuple, optional
140        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
141    peak_height_min_percent : float, optional
142        0-100 % used for peak detection. Default is 0.1.
143    eic_signal_threshold : float, optional
144        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
145    eic_buffer_time : float, optional
146        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
147    peak_picking_method : str, optional
148        Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
149    implemented_peak_picking_methods : tuple, optional
150        Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
151    ph_smooth_it : int, optional
152        Number of iterations to use for smoothing prior to finding mass features.
153        Used only for "persistent homology" peak picking method.
154        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
155    ph_smooth_radius_mz : int, optional
156        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
157        Used only for "persistent homology" peak picking method.
158        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
159    ph_smooth_radius_scan : int, optional
160        Radius in scan steps for smoothing prior to finding mass features.
161        Used only for "persistent homology" peak picking method.
162        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
163    ph_inten_min_rel : int, optional
164        Relative minimum intensity to use for finding mass features for persistent homology.
165        Used only for "persistent homology" peak picking method.
166        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
167        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
168    ph_persis_min_rel : int, optional
169        Relative minimum persistence for retaining mass features.
170        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
171        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
172        Should be greater to or equal to ph_inten_min_rel.
173        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
174    mass_feature_cluster_mz_tolerance_rel : float, optional
175        Relative m/z tolerance to use for clustering mass features.
176        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
177        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
178        Default is 5E-6 (5 ppm).
179    mass_feature_cluster_rt_tolerance : float, optional
180        Retention time tolerance to use for clustering mass features, in minutes.
181        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
182        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
183        Default is 0.2.
184    ms1_scans_to_average : int, optional
185        Number of MS1 scans to average for mass-feature associated m/zs.
186        Called within the LCMSBase.add_associated_ms1() method. Default is 1.
187    ms1_deconvolution_corr_min : float, optional
188        Minimum correlation to use for deconvoluting MS1 mass features.
189        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
190        Default is 0.8.
191    ms2_dda_rt_tolerance : float, optional
192        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
193    ms2_dda_mz_tolerance : float, optional
194        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
195    ms2_min_fe_score : float, optional
196        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
197    search_as_lipids : bool, optional
198        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
199    include_fragment_types : bool, optional
200        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
201    verbose_processing : bool, optional
202        If True, print verbose processing information. Default is True.
203    """
204
205    scans: list | tuple = (-1, -1)
206
207    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
208    eic_tolerance_ppm: float = 5
209    correct_eic_baseline = True
210    smooth_window: int = 5
211    smooth_method: str = "savgol"
212    implemented_smooth_method: tuple = (
213        "savgol",
214        "hanning",
215        "blackman",
216        "bartlett",
217        "flat",
218        "boxcar",
219    )
220    savgol_pol_order: int = 2
221    peak_height_max_percent: float = 10
222    peak_max_prominence_percent: float = 1
223    peak_derivative_threshold: float = 0.0005
224    min_peak_datapoints: float = 5
225    noise_threshold_method: str = "manual_relative_abundance"
226    noise_threshold_methods_implemented: tuple = (
227        "auto_relative_abundance",
228        "manual_relative_abundance",
229        "second_derivative",
230    )
231    peak_height_min_percent: float = 0.1
232    eic_signal_threshold: float = 0.01
233    eic_buffer_time = 1.5
234
235    # Parameters used for 2D peak picking
236    peak_picking_method: str = "persistent homology"
237    implemented_peak_picking_methods: tuple = (
238        "persistent homology",
239        "centroided_persistent_homology",
240    )
241
242    # Parameters used in persistent homology calculations
243    ph_smooth_it = 1
244    ph_smooth_radius_mz = 0
245    ph_smooth_radius_scan = 1
246    ph_inten_min_rel = 0.001
247    ph_persis_min_rel = 0.001
248
249    # Parameters used to cluster mass features
250    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
251    mass_feature_cluster_rt_tolerance: float = 0.3
252
253    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
254    ms1_scans_to_average: int = 1
255    ms1_deconvolution_corr_min: float = 0.8
256    ms2_dda_rt_tolerance: float = 0.15
257    ms2_dda_mz_tolerance: float = 0.05
258
259    # Parameters used for flash entropy searching and database preparation
260    ms2_min_fe_score: float = 0.2
261    search_as_lipids: bool = False
262    include_fragment_types: bool = False
263
264    # Parameters used for saving the data
265    export_profile_spectra: bool = False
266    export_eics: bool = True
267    export_unprocessed_ms1: bool = False
268
269    # Parameters used for verbose processing
270    verbose_processing: bool = True
271
272    def __post_init__(self):
273        # enforce datatype
274        for field in dataclasses.fields(self):
275            value = getattr(self, field.name)
276            if not isinstance(value, field.type):
277                value = field.type(value)
278                setattr(self, field.name, value)
279
280
281@dataclasses.dataclass
282class MassSpectrumSetting:
283    """Mass spectrum processing settings class
284
285    Attributes
286    ----------
287    noise_threshold_method : str, optional
288        Method for detecting noise threshold. Default is 'log'.
289    noise_threshold_methods_implemented : tuple, optional
290        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
291    noise_threshold_min_std : int, optional
292        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
293    noise_threshold_min_s2n : float, optional
294        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
295    noise_threshold_min_relative_abundance : float, optional
296        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
297    noise_threshold_absolute_abundance : float, optional
298        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
299    noise_threshold_log_nsigma : int, optional
300        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
301    noise_threshold_log_nsigma_corr_factor : float, optional
302        Correction factor for log noise threshold method. Default is 0.463.
303    noise_threshold_log_nsigma_bins : int, optional
304        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
305    noise_min_mz : float, optional
306        Minimum m/z to use for noise thresholding. Default is 50.0.
307    noise_max_mz : float, optional
308        Maximum m/z to use for noise thresholding. Default is 1200.0.
309    min_picking_mz : float, optional
310        Minimum m/z to use for peak picking. Default is 50.0.
311    max_picking_mz : float, optional
312        Maximum m/z to use for peak picking. Default is 1200.0.
313    picking_point_extrapolate : int, optional
314        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
315        Recommend 3 for reduced profile data or if peak picking faults
316    calib_minimize_method : str, optional
317        Minimization method to use for calibration. Default is 'Powell'.
318    calib_pol_order : int, optional
319        Polynomial order to use for calibration. Default is 2.
320    max_calib_ppm_error : float, optional
321        Maximum ppm error to use for calibration. Default is 1.0.
322    min_calib_ppm_error : float, optional
323        Minimum ppm error to use for calibration. Default is -1.0.
324    calib_sn_threshold : float, optional
325        Signal to noise threshold to use for calibration. Default is 2.0.
326    calibration_ref_match_method: string, optional
327        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
328    calibration_ref_match_tolerance: float, optional
329        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
330    do_calibration : bool, optional
331        If True, perform calibration. Default is True.
332    verbose_processing : bool, optional
333        If True, print verbose processing information. Default is True.
334    """
335
336    noise_threshold_method: str = "log"
337
338    noise_threshold_methods_implemented: tuple = (
339        "minima",
340        "signal_noise",
341        "relative_abundance",
342        "absolute_abundance",
343        "log",
344    )
345
346    noise_threshold_min_std: int = 6  # when using 'minima' method
347
348    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
349
350    noise_threshold_min_relative_abundance: float = (
351        6  # from 0-100, when using 'relative_abundance' method
352    )
353
354    noise_threshold_absolute_abundance: float = (
355        1_000_000  # when using 'absolute_abundance' method
356    )
357
358    noise_threshold_log_nsigma: int = 6  # when using 'log' method
359    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
360    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
361
362    noise_min_mz: float = 50.0
363    noise_max_mz: float = 1200.0
364
365    min_picking_mz: float = 50.0
366    max_picking_mz: float = 1200.0
367
368    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
369    # This will fix peak picking at spectrum limit issues
370    #  0 to keep normal behaviour, typical value 3 to fix
371    picking_point_extrapolate: int = 3
372
373    calib_minimize_method: str = "Powell"
374    calib_pol_order: int = 2
375    max_calib_ppm_error: float = 1.0
376    min_calib_ppm_error: float = -1.0
377    calib_sn_threshold: float = 2.0
378    calibration_ref_match_method: str = "legacy"
379    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
380    calibration_ref_match_tolerance: float = 0.003
381    calibration_ref_match_std_raw_error_limit: float = 1.5
382    # calib_ref_mzs: list = [0]
383
384    do_calibration: bool = True
385    verbose_processing: bool = True
386
387    def __post_init__(self):
388        # enforce datatype
389        for field in dataclasses.fields(self):
390            value = getattr(self, field.name)
391            if not isinstance(value, field.type):
392                value = field.type(value)
393                setattr(self, field.name, value)
394
395
396@dataclasses.dataclass
397class MassSpecPeakSetting:
398    """Mass spectrum peak processing settings class
399
400    Attributes
401    ----------
402    kendrick_base : Dict, optional
403        Dictionary specifying the elements and their counts in the Kendrick base.
404        Defaults to {'C': 1, 'H': 2}.
405    kendrick_rounding_method : str, optional
406        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
407        Defaults to 'floor'.
408    implemented_kendrick_rounding_methods : tuple
409        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
410        Defaults to ('floor', 'ceil', 'round').
411    peak_derivative_threshold : float, optional
412        Threshold for defining derivative crossing. Should be a value between 0 and 1.
413        Defaults to 0.0.
414    peak_min_prominence_percent : float, optional
415        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
416        Defaults to 0.1.
417    min_peak_datapoints : float, optional
418        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
419        Defaults to 5.
420    peak_max_prominence_percent : float, optional
421        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
422        Defaults to 0.1.
423    peak_height_max_percent : float, optional
424        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
425        Defaults to 10.
426    legacy_resolving_power : bool, optional
427        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
428        Defaults to True.
429    legacy_centroid_polyfit : bool, optional
430        Use legacy (numpy polyfit) to fit centroid
431        Default false.
432    """
433
434    kendrick_base: Dict = dataclasses.field(default_factory=dict)
435
436    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
437
438    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
439
440    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
441
442    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
443
444    min_peak_datapoints: float = 5  # 0-inf used for peak detection
445
446    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
447
448    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
449
450    legacy_resolving_power: bool = (
451        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
452    )
453
454    legacy_centroid_polyfit: bool = False
455
456    def __post_init__(self):
457        # default to CH2
458        if not self.kendrick_base:
459            self.kendrick_base = {"C": 1, "H": 2}
460        # enforce datatype
461        for field in dataclasses.fields(self):
462            value = getattr(self, field.name)
463            if not isinstance(value, field.type):
464                value = field.type(value)
465                setattr(self, field.name, value)
466
467
468@dataclasses.dataclass
469class GasChromatographSetting:
470    """Gas chromatograph processing settings class
471
472    Attributes
473    ----------
474    use_deconvolution : bool, optional
475        If True, use deconvolution. Default is False.
476    implemented_smooth_method : tuple, optional
477        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
478    smooth_window : int, optional
479        Window size for smoothing the ion chromatogram. Default is 5.
480    smooth_method : str, optional
481        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
482    savgol_pol_order : int, optional
483        Polynomial order for Savitzky-Golay smoothing. Default is 2.
484    peak_derivative_threshold : float, optional
485        Threshold for defining derivative crossing. Should be a value between 0 and 1.
486        Defaults to 0.0005.
487    peak_height_max_percent : float, optional
488        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
489        Defaults to 10.
490    peak_max_prominence_percent : float, optional
491        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
492        Defaults to 1.
493    min_peak_datapoints : float, optional
494        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
495        Defaults to 5.
496    max_peak_width : float, optional
497        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
498        Defaults to 0.1.
499    noise_threshold_method : str, optional
500        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
501    noise_threshold_methods_implemented : tuple, optional
502        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
503    std_noise_threshold : int, optional
504        Default is 3.
505    peak_height_min_percent : float, optional
506        0-100 % used for peak detection. Default is 0.1.
507    peak_min_prominence_percent : float, optional
508        0-100 % used for peak detection. Default is 0.1.
509    eic_signal_threshold : float, optional
510        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
511    max_rt_distance : float, optional
512        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
513    verbose_processing : bool, optional
514        If True, print verbose processing information. Default is True.
515    """
516
517    use_deconvolution: bool = False
518
519    implemented_smooth_method: tuple = (
520        "savgol",
521        "hanning",
522        "blackman",
523        "bartlett",
524        "flat",
525        "boxcar",
526    )
527
528    smooth_window: int = 5
529
530    smooth_method: str = "savgol"
531
532    savgol_pol_order: int = 2
533
534    peak_derivative_threshold: float = 0.0005
535
536    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
537
538    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
539
540    min_peak_datapoints: float = 5
541
542    max_peak_width: float = 0.1
543
544    noise_threshold_method: str = "manual_relative_abundance"
545
546    noise_threshold_methods_implemented: tuple = (
547        "auto_relative_abundance",
548        "manual_relative_abundance",
549        "second_derivative",
550    )
551
552    std_noise_threshold: int = 3
553
554    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
555
556    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
557
558    eic_signal_threshold: float = (
559        0.01  # 0-100 % used for extracted ion chromatogram peak detection
560    )
561
562    max_rt_distance: float = (
563        0.025  # minutes, max distance allowance hierarchical clutter
564    )
565
566    verbose_processing: bool = True
567
568    def __post_init__(self):
569        # enforce datatype
570        for field in dataclasses.fields(self):
571            value = getattr(self, field.name)
572            if not isinstance(value, field.type):
573                value = field.type(value)
574                setattr(self, field.name, value)
575
576
577@dataclasses.dataclass
578class CompoundSearchSettings:
579    """Settings for compound search
580
581    Attributes
582    ----------
583    url_database : str, optional
584        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
585    ri_search_range : float, optional
586        Retention index search range. Default is 35.
587    rt_search_range : float, optional
588        Retention time search range, in minutes. Default is 1.0.
589    correlation_threshold : float, optional
590        Threshold for correlation for spectral similarity. Default is 0.5.
591    score_threshold : float, optional
592        Threshold for compsite score. Default is 0.0.
593    ri_spacing : float, optional
594        Retention index spacing. Default is 200.
595    ri_std : float, optional
596        Retention index standard deviation. Default is 3.
597    ri_calibration_compound_names : list, optional
598        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
599
600    """
601
602    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
603
604    ri_search_range: float = 35
605
606    rt_search_range: float = 1.0  # used for retention index calibration
607
608    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
609
610    score_threshold: float = 0.0
611
612    ri_spacing: float = 200
613
614    ri_std: float = 3  # in standard deviation
615
616    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
617
618    # calculates and export all spectral similarity methods
619    exploratory_mode: bool = False
620
621    score_methods: tuple = ("highest_sim_score", "highest_ss")
622
623    output_score_method: str = "All"
624
625    def __post_init__(self):
626        # enforce datatype
627        self.url_database = os.getenv(
628            "SPECTRAL_GCMS_DATABASE_URL",
629            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
630        )
631
632        for field in dataclasses.fields(self):
633            value = getattr(self, field.name)
634            if not isinstance(value, field.type):
635                value = field.type(value)
636                setattr(self, field.name, value)
637
638        self.ri_calibration_compound_names = [
639            "Methyl Caprylate",
640            "Methyl Caprate",
641            "Methyl Pelargonate",
642            "Methyl Laurate",
643            "Methyl Myristate",
644            "Methyl Palmitate",
645            "Methyl Stearate",
646            "Methyl Eicosanoate",
647            "Methyl Docosanoate",
648            "Methyl Linocerate",
649            "Methyl Hexacosanoate",
650            "Methyl Octacosanoate",
651            "Methyl Triacontanoate",
652        ]
653
654
655class MolecularLookupDictSettings:
656    """Settings for molecular searching
657
658    These are used to generate the database entries, do not change.
659
660    Attributes
661    ----------
662    usedAtoms : dict, optional
663        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
664    min_mz : float, optional
665        Minimum m/z to use for searching. Default is 50.0.
666    max_mz : float, optional
667        Maximum m/z to use for searching. Default is 1200.0.
668    min_dbe : float, optional
669        Minimum double bond equivalent to use for searching. Default is 0.
670    max_dbe : float, optional
671        Maximum double bond equivalent to use for searching. Default is 50.
672    use_pah_line_rule : bool, optional
673        If True, use the PAH line rule. Default is False.
674    isRadical : bool, optional
675        If True, search for radical ions. Default is True.
676    isProtonated : bool, optional
677        If True, search for protonated ions. Default is True.
678    url_database : str, optional
679        URL for the database. Default is None.
680    db_jobs : int, optional
681        Number of jobs to use for database queries. Default is 1.
682    used_atom_valences : dict, optional
683        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
684
685    """
686
687    ### DO NOT CHANGE IT! These are used to generate the database entries
688
689    ### DO change when creating a new application database
690
691    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
692
693    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
694    ### if you don't want to include one of those atoms set the max and min at 0
695    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
696    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
697    ### NOTE : Adducts atoms have zero covalence
698    ### NOTE : Not using static variable because this class is distributed using multiprocessing
699    def __init__(self):
700        self.usedAtoms = {
701            "C": (1, 90),
702            "H": (4, 200),
703            "O": (0, 12),
704            "N": (0, 0),
705            "S": (0, 0),
706            "P": (0, 0),
707            "Cl": (0, 0),
708        }
709
710        self.min_mz = 50
711
712        self.max_mz = 1200
713
714        self.min_dbe = 0
715
716        self.max_dbe = 50
717
718        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
719        self.use_pah_line_rule = False
720
721        self.isRadical = True
722
723        self.isProtonated = True
724
725        self.url_database = None
726
727        self.db_jobs = 1
728
729        self.used_atom_valences = {
730            "C": 4,
731            "13C": 4,
732            "H": 1,
733            "O": 2,
734            "18O": 2,
735            "N": 3,
736            "S": 2,
737            "34S": 2,
738            "P": 3,
739            "Cl": 1,
740            "37Cl": 1,
741            "Br": 1,
742            "Na": 1,
743            "F": 1,
744            "K": 0,
745        }
746
747
748@dataclasses.dataclass
749class MolecularFormulaSearchSettings:
750    """Settings for molecular searching
751
752    Attributes
753    ----------
754    use_isotopologue_filter : bool, optional
755        If True, use isotopologue filter. Default is False.
756    isotopologue_filter_threshold : float, optional
757        Threshold for isotopologue filter. Default is 33.
758    isotopologue_filter_atoms : tuple, optional
759        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
760    use_runtime_kendrick_filter : bool, optional
761        If True, use runtime Kendrick filter. Default is False.
762    use_min_peaks_filter : bool, optional
763        If True, use minimum peaks filter. Default is True.
764    min_peaks_per_class : int, optional
765        Minimum number of peaks per class. Default is 15.
766    url_database : str, optional
767        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
768    db_jobs : int, optional
769        Number of jobs to use for database queries. Default is 3.
770    db_chunk_size : int, optional
771        Chunk size to use for database queries. Default is 300.
772    ion_charge : int, optional
773        Ion charge. Default is -1.
774    min_hc_filter : float, optional
775        Minimum hydrogen to carbon ratio. Default is 0.3.
776    max_hc_filter : float, optional
777        Maximum hydrogen to carbon ratio. Default is 3.
778    min_oc_filter : float, optional
779        Minimum oxygen to carbon ratio. Default is 0.0.
780    max_oc_filter : float, optional
781        Maximum oxygen to carbon ratio. Default is 1.2.
782    min_op_filter : float, optional
783        Minimum oxygen to phosphorous ratio. Default is 2.
784    use_pah_line_rule : bool, optional
785        If True, use the PAH line rule. Default is False.
786    min_dbe : float, optional
787        Minimum double bond equivalent to use for searching. Default is 0.
788    max_dbe : float, optional
789        Maximum double bond equivalent to use for searching. Default is 40.
790    mz_error_score_weight : float, optional
791        Weight for m/z error score to contribute to composite score. Default is 0.6.
792    isotopologue_score_weight : float, optional
793        Weight for isotopologue score to contribute to composite score. Default is 0.4.
794    adduct_atoms_neg : tuple, optional
795        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
796    adduct_atoms_pos : tuple, optional
797        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
798    score_methods : tuple, optional
799        Tuple of score method that can be implemented.
800        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
801    score_method : str, optional
802        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
803    output_min_score : float, optional
804        Minimum score for output. Default is 0.1.
805    output_score_method : str, optional
806        Score method to use for output. Default is 'All Candidates'.
807    isRadical : bool, optional
808        If True, search for radical ions. Default is False.
809    isProtonated : bool, optional
810        If True, search for protonated ions. Default is True.
811    isAdduct : bool, optional
812        If True, search for adduct ions. Default is False.
813    usedAtoms : dict, optional
814        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
815    ion_types_excluded : list, optional
816        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
817    ionization_type : str, optional
818        Ionization type. Default is 'ESI'.
819    min_ppm_error : float, optional
820        Minimum ppm error. Default is -10.0.
821    max_ppm_error : float, optional
822        Maximum ppm error. Default is 10.0.
823    min_abun_error : float, optional
824        Minimum abundance error for isotolopologue search. Default is -100.0.
825    max_abun_error : float, optional
826        Maximum abundance error for isotolopologue search. Default is 100.0.
827    mz_error_range : float, optional
828        m/z error range. Default is 1.5.
829    error_method : str, optional
830        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
831    mz_error_average : float, optional
832        m/z error average. Default is 0.0.
833    used_atom_valences : dict, optional
834        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
835    verbose_processing: bool, optional
836        If True, print verbose processing information. Default is True.
837    """
838
839    verbose_processing: bool = True
840
841    use_isotopologue_filter: bool = False
842
843    isotopologue_filter_threshold: float = 33
844
845    isotopologue_filter_atoms: tuple = ("Cl", "Br")
846
847    use_runtime_kendrick_filter: bool = False
848
849    use_min_peaks_filter: bool = True
850
851    min_peaks_per_class: int = 15
852
853    url_database: str = (
854        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
855    )
856
857    db_jobs: int = 3
858
859    db_chunk_size: int = 300
860
861    # query setting========
862    ion_charge: int = -1
863
864    min_hc_filter: float = 0.3
865
866    max_hc_filter: float = 3
867
868    min_oc_filter: float = 0.0
869
870    max_oc_filter: float = 1.2
871
872    min_op_filter: float = 2
873
874    use_pah_line_rule: bool = False
875
876    min_dbe: float = 0
877
878    max_dbe: float = 40
879
880    mz_error_score_weight: float = 0.6
881
882    isotopologue_score_weight: float = 0.4
883
884    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
885    adduct_atoms_neg: tuple = ("Cl", "Br")
886
887    adduct_atoms_pos: tuple = ("Na", "K")
888
889    score_methods: tuple = (
890        "S_P_lowest_error",
891        "N_S_P_lowest_error",
892        "lowest_error",
893        "prob_score",
894        "air_filter_error",
895        "water_filter_error",
896        "earth_filter_error",
897    )
898
899    score_method: str = "prob_score"
900
901    output_min_score: float = 0.1
902
903    output_score_method: str = "All Candidates"
904
905    # depending on the polarity mode it looks for [M].+ , [M].-
906    # query and automatically compile add entry if it doesn't exist
907
908    isRadical: bool = False
909
910    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
911    # query and automatically compile and push options if it doesn't exist
912    isProtonated: bool = True
913
914    isAdduct: bool = False
915
916    usedAtoms: dict = dataclasses.field(default_factory=dict)
917    ion_types_excluded: list = dataclasses.field(default_factory=list)
918
919    # search setting ========
920
921    ionization_type: str = "ESI"
922
923    # empirically set / needs optimization
924    min_ppm_error: float = -10.0  # ppm
925
926    # empirically set / needs optimization
927    max_ppm_error: float = 10.0  # ppm
928
929    # empirically set / needs optimization set for isotopologue search
930    min_abun_error: float = -100.0  # percentage
931
932    # empirically set / needs optimization set for isotopologue search
933    max_abun_error: float = 100.0  # percentage
934
935    # empirically set / needs optimization
936    mz_error_range: float = 1.5
937
938    # 'distance', 'lowest', 'symmetrical','average' 'None'
939    error_method: str = "None"
940
941    mz_error_average: float = 0.0
942
943    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
944    used_atom_valences: dict = dataclasses.field(default_factory=dict)
945
946    def __post_init__(self):
947        if not self.url_database or self.url_database == "":
948            self.url_database = os.getenv(
949                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
950            )
951        # enforce datatype
952        for field in dataclasses.fields(self):
953            value = getattr(self, field.name)
954            if not isinstance(value, field.type):
955                value = field.type(value)
956                setattr(self, field.name, value)
957
958        # enforce C and H if either do not exists
959        if "C" not in self.usedAtoms.keys():
960            self.usedAtoms["C"] = (1, 100)
961        if "H" not in self.usedAtoms.keys():
962            self.usedAtoms["H"] = (1, 200)
963
964        # add cummon values
965        current_used_atoms = self.used_atom_valences.keys()
966
967        for atom in Atoms.atoms_covalence.keys():
968            if atom not in current_used_atoms:
969                covalence = Atoms.atoms_covalence.get(atom)
970
971                if isinstance(covalence, int):
972                    self.used_atom_valences[atom] = covalence
973
974                else:
975                    # will get the first number of all possible covalances, which should be the most commum
976                    self.used_atom_valences[atom] = covalence[0]
@dataclasses.dataclass
class TransientSetting:
12@dataclasses.dataclass
13class TransientSetting:
14    """Transient processing settings class
15
16    Attributes
17    ----------
18    implemented_apodization_function : tuple
19        Available apodization functions
20    apodization_method : str
21        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
22        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
23    number_of_truncations : int
24        How many times to truncate the transient prior to Fourier transform
25    number_of_zero_fills : int
26        How many times to zero fille the transient prior to Fourier transform.
27    next_power_of_two : bool
28        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
29    kaiser_beta : float
30        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
31        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
32
33    """
34
35    implemented_apodization_function: tuple = (
36        "Hamming",
37        "Hanning",
38        "Blackman",
39        "Full-Sine",
40        "Half-Sine",
41        "Kaiser",
42        "Half-Kaiser",
43        "Rectangle",
44    )
45    apodization_method: str = "Hanning"
46    number_of_truncations: int = 0
47    number_of_zero_fills: int = 1
48    next_power_of_two: bool = False
49    kaiser_beta: float = 8.6
50
51    def __post_init__(self):
52        # enforce datatype
53        for field in dataclasses.fields(self):
54            value = getattr(self, field.name)
55            if not isinstance(value, field.type):
56                value = field.type(value)
57                setattr(self, field.name, value)

Transient processing settings class

Attributes
  • implemented_apodization_function (tuple): Available apodization functions
  • apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
  • number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
  • number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
  • next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
  • kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
TransientSetting( implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle'), apodization_method: str = 'Hanning', number_of_truncations: int = 0, number_of_zero_fills: int = 1, next_power_of_two: bool = False, kaiser_beta: float = 8.6)
implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle')
apodization_method: str = 'Hanning'
number_of_truncations: int = 0
number_of_zero_fills: int = 1
next_power_of_two: bool = False
kaiser_beta: float = 8.6
@dataclasses.dataclass
class DataInputSetting:
 60@dataclasses.dataclass
 61class DataInputSetting:
 62    """Data input settings class
 63
 64    Attributes
 65    ----------
 66    header_translate : dict
 67        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
 68    """
 69
 70    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
 71    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
 72    header_translate: dict = dataclasses.field(default_factory=dict)
 73
 74    def __post_init__(self):
 75        self.header_translate = {
 76            "m/z": Labels.mz,
 77            "mOz": Labels.mz,
 78            "Mass": Labels.mz,
 79            "Resolving Power": Labels.rp,
 80            "Res.": Labels.rp,
 81            "resolution": Labels.rp,
 82            "Intensity": Labels.abundance,
 83            "Peak Height": Labels.abundance,
 84            "I": Labels.abundance,
 85            "Abundance": Labels.abundance,
 86            "abs_abu": Labels.abundance,
 87            "Signal/Noise": Labels.s2n,
 88            "S/N": Labels.s2n,
 89            "sn": Labels.s2n,
 90        }
 91
 92    def add_mz_label(self, label):
 93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
 94        self.header_translate[label] = Labels.mz
 95
 96    def add_peak_height_label(self, label):
 97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
 98
 99        self.header_translate[label] = Labels.abundance
100
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n
104
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Data input settings class

Attributes
  • header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
DataInputSetting(header_translate: dict = <factory>)
header_translate: dict
def add_mz_label(self, label):
92    def add_mz_label(self, label):
93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
94        self.header_translate[label] = Labels.mz

Add a label to the header_translate dictionary to be translated to the corems label for mz.

def add_peak_height_label(self, label):
96    def add_peak_height_label(self, label):
97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
98
99        self.header_translate[label] = Labels.abundance

Add a label to the header_translate dictionary to be translated to the corems label for peak height.

def add_sn_label(self, label):
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n

Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.

def add_resolving_power_label(self, label):
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Add a label to the header_translate dictionary to be translated to the corems label for resolving power.

@dataclasses.dataclass
class LiquidChromatographSetting:
110@dataclasses.dataclass
111class LiquidChromatographSetting:
112    """Liquid chromatograph processing settings class
113
114    Attributes
115    ----------
116    scans : list or tuple, optional
117        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
118    eic_tolerance_ppm : float, optional
119        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
120    correct_eic_baseline : bool, optional
121        If True, correct the baseline of the extracted ion chromatogram. Default is True.
122    smooth_window : int, optional
123        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
124    smooth_method : str, optional
125        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
126    implemented_smooth_method : tuple, optional
127        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
128    savgol_pol_order : int, optional
129        Polynomial order for Savitzky-Golay smoothing. Default is 2.
130    peak_height_max_percent : float, optional
131        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
132    peak_max_prominence_percent : float, optional
133        1-100 % used for baseline detection. Default is 1.
134    peak_derivative_threshold : float, optional
135        Threshold for defining derivative crossing. Default is 0.0005.
136    min_peak_datapoints : float, optional
137        minimum data point to define a chromatografic peak. Default is 5.
138    noise_threshold_method : str, optional
139        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
140    noise_threshold_methods_implemented : tuple, optional
141        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
142    peak_height_min_percent : float, optional
143        0-100 % used for peak detection. Default is 0.1.
144    eic_signal_threshold : float, optional
145        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
146    eic_buffer_time : float, optional
147        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
148    peak_picking_method : str, optional
149        Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
150    implemented_peak_picking_methods : tuple, optional
151        Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
152    ph_smooth_it : int, optional
153        Number of iterations to use for smoothing prior to finding mass features.
154        Used only for "persistent homology" peak picking method.
155        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
156    ph_smooth_radius_mz : int, optional
157        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
158        Used only for "persistent homology" peak picking method.
159        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
160    ph_smooth_radius_scan : int, optional
161        Radius in scan steps for smoothing prior to finding mass features.
162        Used only for "persistent homology" peak picking method.
163        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
164    ph_inten_min_rel : int, optional
165        Relative minimum intensity to use for finding mass features for persistent homology.
166        Used only for "persistent homology" peak picking method.
167        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
168        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
169    ph_persis_min_rel : int, optional
170        Relative minimum persistence for retaining mass features.
171        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
172        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
173        Should be greater to or equal to ph_inten_min_rel.
174        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
175    mass_feature_cluster_mz_tolerance_rel : float, optional
176        Relative m/z tolerance to use for clustering mass features.
177        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
178        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
179        Default is 5E-6 (5 ppm).
180    mass_feature_cluster_rt_tolerance : float, optional
181        Retention time tolerance to use for clustering mass features, in minutes.
182        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
183        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
184        Default is 0.2.
185    ms1_scans_to_average : int, optional
186        Number of MS1 scans to average for mass-feature associated m/zs.
187        Called within the LCMSBase.add_associated_ms1() method. Default is 1.
188    ms1_deconvolution_corr_min : float, optional
189        Minimum correlation to use for deconvoluting MS1 mass features.
190        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
191        Default is 0.8.
192    ms2_dda_rt_tolerance : float, optional
193        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
194    ms2_dda_mz_tolerance : float, optional
195        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
196    ms2_min_fe_score : float, optional
197        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
198    search_as_lipids : bool, optional
199        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
200    include_fragment_types : bool, optional
201        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
202    verbose_processing : bool, optional
203        If True, print verbose processing information. Default is True.
204    """
205
206    scans: list | tuple = (-1, -1)
207
208    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
209    eic_tolerance_ppm: float = 5
210    correct_eic_baseline = True
211    smooth_window: int = 5
212    smooth_method: str = "savgol"
213    implemented_smooth_method: tuple = (
214        "savgol",
215        "hanning",
216        "blackman",
217        "bartlett",
218        "flat",
219        "boxcar",
220    )
221    savgol_pol_order: int = 2
222    peak_height_max_percent: float = 10
223    peak_max_prominence_percent: float = 1
224    peak_derivative_threshold: float = 0.0005
225    min_peak_datapoints: float = 5
226    noise_threshold_method: str = "manual_relative_abundance"
227    noise_threshold_methods_implemented: tuple = (
228        "auto_relative_abundance",
229        "manual_relative_abundance",
230        "second_derivative",
231    )
232    peak_height_min_percent: float = 0.1
233    eic_signal_threshold: float = 0.01
234    eic_buffer_time = 1.5
235
236    # Parameters used for 2D peak picking
237    peak_picking_method: str = "persistent homology"
238    implemented_peak_picking_methods: tuple = (
239        "persistent homology",
240        "centroided_persistent_homology",
241    )
242
243    # Parameters used in persistent homology calculations
244    ph_smooth_it = 1
245    ph_smooth_radius_mz = 0
246    ph_smooth_radius_scan = 1
247    ph_inten_min_rel = 0.001
248    ph_persis_min_rel = 0.001
249
250    # Parameters used to cluster mass features
251    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
252    mass_feature_cluster_rt_tolerance: float = 0.3
253
254    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
255    ms1_scans_to_average: int = 1
256    ms1_deconvolution_corr_min: float = 0.8
257    ms2_dda_rt_tolerance: float = 0.15
258    ms2_dda_mz_tolerance: float = 0.05
259
260    # Parameters used for flash entropy searching and database preparation
261    ms2_min_fe_score: float = 0.2
262    search_as_lipids: bool = False
263    include_fragment_types: bool = False
264
265    # Parameters used for saving the data
266    export_profile_spectra: bool = False
267    export_eics: bool = True
268    export_unprocessed_ms1: bool = False
269
270    # Parameters used for verbose processing
271    verbose_processing: bool = True
272
273    def __post_init__(self):
274        # enforce datatype
275        for field in dataclasses.fields(self):
276            value = getattr(self, field.name)
277            if not isinstance(value, field.type):
278                value = field.type(value)
279                setattr(self, field.name, value)

Liquid chromatograph processing settings class

Attributes
  • scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (0, 1).
  • eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
  • correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
  • smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
  • peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
  • min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
  • peak_picking_method (str, optional): Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
  • implemented_peak_picking_methods (tuple, optional): Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
  • ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
  • ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
  • ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
  • ph_inten_min_rel (int, optional): Relative minimum intensity to use for finding mass features for persistent homology. Used only for "persistent homology" peak picking method. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • ph_persis_min_rel (int, optional): Relative minimum persistence for retaining mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
  • mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
  • ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
  • ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
  • ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
  • ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
  • ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
  • search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
  • include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
LiquidChromatographSetting( scans: list | tuple = (-1, -1), eic_tolerance_ppm: float = 5, smooth_window: int = 5, smooth_method: str = 'savgol', implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), savgol_pol_order: int = 2, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, peak_derivative_threshold: float = 0.0005, min_peak_datapoints: float = 5, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), peak_height_min_percent: float = 0.1, eic_signal_threshold: float = 0.01, peak_picking_method: str = 'persistent homology', implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology'), mass_feature_cluster_mz_tolerance_rel: float = 5e-06, mass_feature_cluster_rt_tolerance: float = 0.3, ms1_scans_to_average: int = 1, ms1_deconvolution_corr_min: float = 0.8, ms2_dda_rt_tolerance: float = 0.15, ms2_dda_mz_tolerance: float = 0.05, ms2_min_fe_score: float = 0.2, search_as_lipids: bool = False, include_fragment_types: bool = False, export_profile_spectra: bool = False, export_eics: bool = True, export_unprocessed_ms1: bool = False, verbose_processing: bool = True)
scans: list | tuple = (-1, -1)
eic_tolerance_ppm: float = 5
correct_eic_baseline = True
smooth_window: int = 5
smooth_method: str = 'savgol'
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
savgol_pol_order: int = 2
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
peak_derivative_threshold: float = 0.0005
min_peak_datapoints: float = 5
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
peak_height_min_percent: float = 0.1
eic_signal_threshold: float = 0.01
eic_buffer_time = 1.5
peak_picking_method: str = 'persistent homology'
implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology')
ph_smooth_it = 1
ph_smooth_radius_mz = 0
ph_smooth_radius_scan = 1
ph_inten_min_rel = 0.001
ph_persis_min_rel = 0.001
mass_feature_cluster_mz_tolerance_rel: float = 5e-06
mass_feature_cluster_rt_tolerance: float = 0.3
ms1_scans_to_average: int = 1
ms1_deconvolution_corr_min: float = 0.8
ms2_dda_rt_tolerance: float = 0.15
ms2_dda_mz_tolerance: float = 0.05
ms2_min_fe_score: float = 0.2
search_as_lipids: bool = False
include_fragment_types: bool = False
export_profile_spectra: bool = False
export_eics: bool = True
export_unprocessed_ms1: bool = False
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpectrumSetting:
282@dataclasses.dataclass
283class MassSpectrumSetting:
284    """Mass spectrum processing settings class
285
286    Attributes
287    ----------
288    noise_threshold_method : str, optional
289        Method for detecting noise threshold. Default is 'log'.
290    noise_threshold_methods_implemented : tuple, optional
291        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
292    noise_threshold_min_std : int, optional
293        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
294    noise_threshold_min_s2n : float, optional
295        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
296    noise_threshold_min_relative_abundance : float, optional
297        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
298    noise_threshold_absolute_abundance : float, optional
299        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
300    noise_threshold_log_nsigma : int, optional
301        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
302    noise_threshold_log_nsigma_corr_factor : float, optional
303        Correction factor for log noise threshold method. Default is 0.463.
304    noise_threshold_log_nsigma_bins : int, optional
305        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
306    noise_min_mz : float, optional
307        Minimum m/z to use for noise thresholding. Default is 50.0.
308    noise_max_mz : float, optional
309        Maximum m/z to use for noise thresholding. Default is 1200.0.
310    min_picking_mz : float, optional
311        Minimum m/z to use for peak picking. Default is 50.0.
312    max_picking_mz : float, optional
313        Maximum m/z to use for peak picking. Default is 1200.0.
314    picking_point_extrapolate : int, optional
315        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
316        Recommend 3 for reduced profile data or if peak picking faults
317    calib_minimize_method : str, optional
318        Minimization method to use for calibration. Default is 'Powell'.
319    calib_pol_order : int, optional
320        Polynomial order to use for calibration. Default is 2.
321    max_calib_ppm_error : float, optional
322        Maximum ppm error to use for calibration. Default is 1.0.
323    min_calib_ppm_error : float, optional
324        Minimum ppm error to use for calibration. Default is -1.0.
325    calib_sn_threshold : float, optional
326        Signal to noise threshold to use for calibration. Default is 2.0.
327    calibration_ref_match_method: string, optional
328        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
329    calibration_ref_match_tolerance: float, optional
330        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
331    do_calibration : bool, optional
332        If True, perform calibration. Default is True.
333    verbose_processing : bool, optional
334        If True, print verbose processing information. Default is True.
335    """
336
337    noise_threshold_method: str = "log"
338
339    noise_threshold_methods_implemented: tuple = (
340        "minima",
341        "signal_noise",
342        "relative_abundance",
343        "absolute_abundance",
344        "log",
345    )
346
347    noise_threshold_min_std: int = 6  # when using 'minima' method
348
349    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
350
351    noise_threshold_min_relative_abundance: float = (
352        6  # from 0-100, when using 'relative_abundance' method
353    )
354
355    noise_threshold_absolute_abundance: float = (
356        1_000_000  # when using 'absolute_abundance' method
357    )
358
359    noise_threshold_log_nsigma: int = 6  # when using 'log' method
360    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
361    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
362
363    noise_min_mz: float = 50.0
364    noise_max_mz: float = 1200.0
365
366    min_picking_mz: float = 50.0
367    max_picking_mz: float = 1200.0
368
369    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
370    # This will fix peak picking at spectrum limit issues
371    #  0 to keep normal behaviour, typical value 3 to fix
372    picking_point_extrapolate: int = 3
373
374    calib_minimize_method: str = "Powell"
375    calib_pol_order: int = 2
376    max_calib_ppm_error: float = 1.0
377    min_calib_ppm_error: float = -1.0
378    calib_sn_threshold: float = 2.0
379    calibration_ref_match_method: str = "legacy"
380    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
381    calibration_ref_match_tolerance: float = 0.003
382    calibration_ref_match_std_raw_error_limit: float = 1.5
383    # calib_ref_mzs: list = [0]
384
385    do_calibration: bool = True
386    verbose_processing: bool = True
387
388    def __post_init__(self):
389        # enforce datatype
390        for field in dataclasses.fields(self):
391            value = getattr(self, field.name)
392            if not isinstance(value, field.type):
393                value = field.type(value)
394                setattr(self, field.name, value)

Mass spectrum processing settings class

Attributes
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
  • noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
  • noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
  • noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
  • noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
  • noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
  • noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
  • noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
  • noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
  • noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
  • min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
  • max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
  • picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
  • calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
  • calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
  • max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
  • min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
  • calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
  • calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
  • calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
  • do_calibration (bool, optional): If True, perform calibration. Default is True.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MassSpectrumSetting( noise_threshold_method: str = 'log', noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'), noise_threshold_min_std: int = 6, noise_threshold_min_s2n: float = 4, noise_threshold_min_relative_abundance: float = 6, noise_threshold_absolute_abundance: float = 1000000, noise_threshold_log_nsigma: int = 6, noise_threshold_log_nsigma_corr_factor: float = 0.463, noise_threshold_log_nsigma_bins: int = 500, noise_min_mz: float = 50.0, noise_max_mz: float = 1200.0, min_picking_mz: float = 50.0, max_picking_mz: float = 1200.0, picking_point_extrapolate: int = 3, calib_minimize_method: str = 'Powell', calib_pol_order: int = 2, max_calib_ppm_error: float = 1.0, min_calib_ppm_error: float = -1.0, calib_sn_threshold: float = 2.0, calibration_ref_match_method: str = 'legacy', calibration_ref_match_method_implemented: tuple = ('legacy', 'merged'), calibration_ref_match_tolerance: float = 0.003, calibration_ref_match_std_raw_error_limit: float = 1.5, do_calibration: bool = True, verbose_processing: bool = True)
noise_threshold_method: str = 'log'
noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')
noise_threshold_min_std: int = 6
noise_threshold_min_s2n: float = 4
noise_threshold_min_relative_abundance: float = 6
noise_threshold_absolute_abundance: float = 1000000
noise_threshold_log_nsigma: int = 6
noise_threshold_log_nsigma_corr_factor: float = 0.463
noise_threshold_log_nsigma_bins: int = 500
noise_min_mz: float = 50.0
noise_max_mz: float = 1200.0
min_picking_mz: float = 50.0
max_picking_mz: float = 1200.0
picking_point_extrapolate: int = 3
calib_minimize_method: str = 'Powell'
calib_pol_order: int = 2
max_calib_ppm_error: float = 1.0
min_calib_ppm_error: float = -1.0
calib_sn_threshold: float = 2.0
calibration_ref_match_method: str = 'legacy'
calibration_ref_match_method_implemented: tuple = ('legacy', 'merged')
calibration_ref_match_tolerance: float = 0.003
calibration_ref_match_std_raw_error_limit: float = 1.5
do_calibration: bool = True
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpecPeakSetting:
397@dataclasses.dataclass
398class MassSpecPeakSetting:
399    """Mass spectrum peak processing settings class
400
401    Attributes
402    ----------
403    kendrick_base : Dict, optional
404        Dictionary specifying the elements and their counts in the Kendrick base.
405        Defaults to {'C': 1, 'H': 2}.
406    kendrick_rounding_method : str, optional
407        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
408        Defaults to 'floor'.
409    implemented_kendrick_rounding_methods : tuple
410        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
411        Defaults to ('floor', 'ceil', 'round').
412    peak_derivative_threshold : float, optional
413        Threshold for defining derivative crossing. Should be a value between 0 and 1.
414        Defaults to 0.0.
415    peak_min_prominence_percent : float, optional
416        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
417        Defaults to 0.1.
418    min_peak_datapoints : float, optional
419        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
420        Defaults to 5.
421    peak_max_prominence_percent : float, optional
422        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
423        Defaults to 0.1.
424    peak_height_max_percent : float, optional
425        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
426        Defaults to 10.
427    legacy_resolving_power : bool, optional
428        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
429        Defaults to True.
430    legacy_centroid_polyfit : bool, optional
431        Use legacy (numpy polyfit) to fit centroid
432        Default false.
433    """
434
435    kendrick_base: Dict = dataclasses.field(default_factory=dict)
436
437    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
438
439    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
440
441    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
442
443    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
444
445    min_peak_datapoints: float = 5  # 0-inf used for peak detection
446
447    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
448
449    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
450
451    legacy_resolving_power: bool = (
452        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
453    )
454
455    legacy_centroid_polyfit: bool = False
456
457    def __post_init__(self):
458        # default to CH2
459        if not self.kendrick_base:
460            self.kendrick_base = {"C": 1, "H": 2}
461        # enforce datatype
462        for field in dataclasses.fields(self):
463            value = getattr(self, field.name)
464            if not isinstance(value, field.type):
465                value = field.type(value)
466                setattr(self, field.name, value)

Mass spectrum peak processing settings class

Attributes
  • kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
  • kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
  • implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
  • peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
  • legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
MassSpecPeakSetting( kendrick_base: Dict = <factory>, kendrick_rounding_method: str = 'floor', implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round'), peak_derivative_threshold: float = 0.0, peak_min_prominence_percent: float = 0.1, min_peak_datapoints: float = 5, peak_max_prominence_percent: float = 0.1, peak_height_max_percent: float = 10, legacy_resolving_power: bool = True, legacy_centroid_polyfit: bool = False)
kendrick_base: Dict
kendrick_rounding_method: str = 'floor'
implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round')
peak_derivative_threshold: float = 0.0
peak_min_prominence_percent: float = 0.1
min_peak_datapoints: float = 5
peak_max_prominence_percent: float = 0.1
peak_height_max_percent: float = 10
legacy_resolving_power: bool = True
legacy_centroid_polyfit: bool = False
@dataclasses.dataclass
class GasChromatographSetting:
469@dataclasses.dataclass
470class GasChromatographSetting:
471    """Gas chromatograph processing settings class
472
473    Attributes
474    ----------
475    use_deconvolution : bool, optional
476        If True, use deconvolution. Default is False.
477    implemented_smooth_method : tuple, optional
478        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
479    smooth_window : int, optional
480        Window size for smoothing the ion chromatogram. Default is 5.
481    smooth_method : str, optional
482        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
483    savgol_pol_order : int, optional
484        Polynomial order for Savitzky-Golay smoothing. Default is 2.
485    peak_derivative_threshold : float, optional
486        Threshold for defining derivative crossing. Should be a value between 0 and 1.
487        Defaults to 0.0005.
488    peak_height_max_percent : float, optional
489        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
490        Defaults to 10.
491    peak_max_prominence_percent : float, optional
492        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
493        Defaults to 1.
494    min_peak_datapoints : float, optional
495        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
496        Defaults to 5.
497    max_peak_width : float, optional
498        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
499        Defaults to 0.1.
500    noise_threshold_method : str, optional
501        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
502    noise_threshold_methods_implemented : tuple, optional
503        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
504    std_noise_threshold : int, optional
505        Default is 3.
506    peak_height_min_percent : float, optional
507        0-100 % used for peak detection. Default is 0.1.
508    peak_min_prominence_percent : float, optional
509        0-100 % used for peak detection. Default is 0.1.
510    eic_signal_threshold : float, optional
511        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
512    max_rt_distance : float, optional
513        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
514    verbose_processing : bool, optional
515        If True, print verbose processing information. Default is True.
516    """
517
518    use_deconvolution: bool = False
519
520    implemented_smooth_method: tuple = (
521        "savgol",
522        "hanning",
523        "blackman",
524        "bartlett",
525        "flat",
526        "boxcar",
527    )
528
529    smooth_window: int = 5
530
531    smooth_method: str = "savgol"
532
533    savgol_pol_order: int = 2
534
535    peak_derivative_threshold: float = 0.0005
536
537    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
538
539    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
540
541    min_peak_datapoints: float = 5
542
543    max_peak_width: float = 0.1
544
545    noise_threshold_method: str = "manual_relative_abundance"
546
547    noise_threshold_methods_implemented: tuple = (
548        "auto_relative_abundance",
549        "manual_relative_abundance",
550        "second_derivative",
551    )
552
553    std_noise_threshold: int = 3
554
555    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
556
557    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
558
559    eic_signal_threshold: float = (
560        0.01  # 0-100 % used for extracted ion chromatogram peak detection
561    )
562
563    max_rt_distance: float = (
564        0.025  # minutes, max distance allowance hierarchical clutter
565    )
566
567    verbose_processing: bool = True
568
569    def __post_init__(self):
570        # enforce datatype
571        for field in dataclasses.fields(self):
572            value = getattr(self, field.name)
573            if not isinstance(value, field.type):
574                value = field.type(value)
575                setattr(self, field.name, value)

Gas chromatograph processing settings class

Attributes
  • use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
  • smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • std_noise_threshold (int, optional): Default is 3.
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
GasChromatographSetting( use_deconvolution: bool = False, implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), smooth_window: int = 5, smooth_method: str = 'savgol', savgol_pol_order: int = 2, peak_derivative_threshold: float = 0.0005, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, min_peak_datapoints: float = 5, max_peak_width: float = 0.1, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), std_noise_threshold: int = 3, peak_height_min_percent: float = 0.1, peak_min_prominence_percent: float = 0.1, eic_signal_threshold: float = 0.01, max_rt_distance: float = 0.025, verbose_processing: bool = True)
use_deconvolution: bool = False
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
smooth_window: int = 5
smooth_method: str = 'savgol'
savgol_pol_order: int = 2
peak_derivative_threshold: float = 0.0005
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
min_peak_datapoints: float = 5
max_peak_width: float = 0.1
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
std_noise_threshold: int = 3
peak_height_min_percent: float = 0.1
peak_min_prominence_percent: float = 0.1
eic_signal_threshold: float = 0.01
max_rt_distance: float = 0.025
verbose_processing: bool = True
@dataclasses.dataclass
class CompoundSearchSettings:
578@dataclasses.dataclass
579class CompoundSearchSettings:
580    """Settings for compound search
581
582    Attributes
583    ----------
584    url_database : str, optional
585        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
586    ri_search_range : float, optional
587        Retention index search range. Default is 35.
588    rt_search_range : float, optional
589        Retention time search range, in minutes. Default is 1.0.
590    correlation_threshold : float, optional
591        Threshold for correlation for spectral similarity. Default is 0.5.
592    score_threshold : float, optional
593        Threshold for compsite score. Default is 0.0.
594    ri_spacing : float, optional
595        Retention index spacing. Default is 200.
596    ri_std : float, optional
597        Retention index standard deviation. Default is 3.
598    ri_calibration_compound_names : list, optional
599        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
600
601    """
602
603    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
604
605    ri_search_range: float = 35
606
607    rt_search_range: float = 1.0  # used for retention index calibration
608
609    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
610
611    score_threshold: float = 0.0
612
613    ri_spacing: float = 200
614
615    ri_std: float = 3  # in standard deviation
616
617    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
618
619    # calculates and export all spectral similarity methods
620    exploratory_mode: bool = False
621
622    score_methods: tuple = ("highest_sim_score", "highest_ss")
623
624    output_score_method: str = "All"
625
626    def __post_init__(self):
627        # enforce datatype
628        self.url_database = os.getenv(
629            "SPECTRAL_GCMS_DATABASE_URL",
630            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
631        )
632
633        for field in dataclasses.fields(self):
634            value = getattr(self, field.name)
635            if not isinstance(value, field.type):
636                value = field.type(value)
637                setattr(self, field.name, value)
638
639        self.ri_calibration_compound_names = [
640            "Methyl Caprylate",
641            "Methyl Caprate",
642            "Methyl Pelargonate",
643            "Methyl Laurate",
644            "Methyl Myristate",
645            "Methyl Palmitate",
646            "Methyl Stearate",
647            "Methyl Eicosanoate",
648            "Methyl Docosanoate",
649            "Methyl Linocerate",
650            "Methyl Hexacosanoate",
651            "Methyl Octacosanoate",
652            "Methyl Triacontanoate",
653        ]

Settings for compound search

Attributes
  • url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
  • ri_search_range (float, optional): Retention index search range. Default is 35.
  • rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
  • correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
  • score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
  • ri_spacing (float, optional): Retention index spacing. Default is 200.
  • ri_std (float, optional): Retention index standard deviation. Default is 3.
  • ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
CompoundSearchSettings( url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres', ri_search_range: float = 35, rt_search_range: float = 1.0, correlation_threshold: float = 0.5, score_threshold: float = 0.0, ri_spacing: float = 200, ri_std: float = 3, ri_calibration_compound_names: List = <factory>, exploratory_mode: bool = False, score_methods: tuple = ('highest_sim_score', 'highest_ss'), output_score_method: str = 'All')
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres'
ri_search_range: float = 35
rt_search_range: float = 1.0
correlation_threshold: float = 0.5
score_threshold: float = 0.0
ri_spacing: float = 200
ri_std: float = 3
ri_calibration_compound_names: List
exploratory_mode: bool = False
score_methods: tuple = ('highest_sim_score', 'highest_ss')
output_score_method: str = 'All'
class MolecularLookupDictSettings:
656class MolecularLookupDictSettings:
657    """Settings for molecular searching
658
659    These are used to generate the database entries, do not change.
660
661    Attributes
662    ----------
663    usedAtoms : dict, optional
664        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
665    min_mz : float, optional
666        Minimum m/z to use for searching. Default is 50.0.
667    max_mz : float, optional
668        Maximum m/z to use for searching. Default is 1200.0.
669    min_dbe : float, optional
670        Minimum double bond equivalent to use for searching. Default is 0.
671    max_dbe : float, optional
672        Maximum double bond equivalent to use for searching. Default is 50.
673    use_pah_line_rule : bool, optional
674        If True, use the PAH line rule. Default is False.
675    isRadical : bool, optional
676        If True, search for radical ions. Default is True.
677    isProtonated : bool, optional
678        If True, search for protonated ions. Default is True.
679    url_database : str, optional
680        URL for the database. Default is None.
681    db_jobs : int, optional
682        Number of jobs to use for database queries. Default is 1.
683    used_atom_valences : dict, optional
684        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
685
686    """
687
688    ### DO NOT CHANGE IT! These are used to generate the database entries
689
690    ### DO change when creating a new application database
691
692    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
693
694    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
695    ### if you don't want to include one of those atoms set the max and min at 0
696    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
697    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
698    ### NOTE : Adducts atoms have zero covalence
699    ### NOTE : Not using static variable because this class is distributed using multiprocessing
700    def __init__(self):
701        self.usedAtoms = {
702            "C": (1, 90),
703            "H": (4, 200),
704            "O": (0, 12),
705            "N": (0, 0),
706            "S": (0, 0),
707            "P": (0, 0),
708            "Cl": (0, 0),
709        }
710
711        self.min_mz = 50
712
713        self.max_mz = 1200
714
715        self.min_dbe = 0
716
717        self.max_dbe = 50
718
719        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
720        self.use_pah_line_rule = False
721
722        self.isRadical = True
723
724        self.isProtonated = True
725
726        self.url_database = None
727
728        self.db_jobs = 1
729
730        self.used_atom_valences = {
731            "C": 4,
732            "13C": 4,
733            "H": 1,
734            "O": 2,
735            "18O": 2,
736            "N": 3,
737            "S": 2,
738            "34S": 2,
739            "P": 3,
740            "Cl": 1,
741            "37Cl": 1,
742            "Br": 1,
743            "Na": 1,
744            "F": 1,
745            "K": 0,
746        }

Settings for molecular searching

These are used to generate the database entries, do not change.

Attributes
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
  • max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • isRadical (bool, optional): If True, search for radical ions. Default is True.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • url_database (str, optional): URL for the database. Default is None.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
usedAtoms
min_mz
max_mz
min_dbe
max_dbe
use_pah_line_rule
isRadical
isProtonated
url_database
db_jobs
used_atom_valences
@dataclasses.dataclass
class MolecularFormulaSearchSettings:
749@dataclasses.dataclass
750class MolecularFormulaSearchSettings:
751    """Settings for molecular searching
752
753    Attributes
754    ----------
755    use_isotopologue_filter : bool, optional
756        If True, use isotopologue filter. Default is False.
757    isotopologue_filter_threshold : float, optional
758        Threshold for isotopologue filter. Default is 33.
759    isotopologue_filter_atoms : tuple, optional
760        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
761    use_runtime_kendrick_filter : bool, optional
762        If True, use runtime Kendrick filter. Default is False.
763    use_min_peaks_filter : bool, optional
764        If True, use minimum peaks filter. Default is True.
765    min_peaks_per_class : int, optional
766        Minimum number of peaks per class. Default is 15.
767    url_database : str, optional
768        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
769    db_jobs : int, optional
770        Number of jobs to use for database queries. Default is 3.
771    db_chunk_size : int, optional
772        Chunk size to use for database queries. Default is 300.
773    ion_charge : int, optional
774        Ion charge. Default is -1.
775    min_hc_filter : float, optional
776        Minimum hydrogen to carbon ratio. Default is 0.3.
777    max_hc_filter : float, optional
778        Maximum hydrogen to carbon ratio. Default is 3.
779    min_oc_filter : float, optional
780        Minimum oxygen to carbon ratio. Default is 0.0.
781    max_oc_filter : float, optional
782        Maximum oxygen to carbon ratio. Default is 1.2.
783    min_op_filter : float, optional
784        Minimum oxygen to phosphorous ratio. Default is 2.
785    use_pah_line_rule : bool, optional
786        If True, use the PAH line rule. Default is False.
787    min_dbe : float, optional
788        Minimum double bond equivalent to use for searching. Default is 0.
789    max_dbe : float, optional
790        Maximum double bond equivalent to use for searching. Default is 40.
791    mz_error_score_weight : float, optional
792        Weight for m/z error score to contribute to composite score. Default is 0.6.
793    isotopologue_score_weight : float, optional
794        Weight for isotopologue score to contribute to composite score. Default is 0.4.
795    adduct_atoms_neg : tuple, optional
796        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
797    adduct_atoms_pos : tuple, optional
798        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
799    score_methods : tuple, optional
800        Tuple of score method that can be implemented.
801        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
802    score_method : str, optional
803        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
804    output_min_score : float, optional
805        Minimum score for output. Default is 0.1.
806    output_score_method : str, optional
807        Score method to use for output. Default is 'All Candidates'.
808    isRadical : bool, optional
809        If True, search for radical ions. Default is False.
810    isProtonated : bool, optional
811        If True, search for protonated ions. Default is True.
812    isAdduct : bool, optional
813        If True, search for adduct ions. Default is False.
814    usedAtoms : dict, optional
815        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
816    ion_types_excluded : list, optional
817        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
818    ionization_type : str, optional
819        Ionization type. Default is 'ESI'.
820    min_ppm_error : float, optional
821        Minimum ppm error. Default is -10.0.
822    max_ppm_error : float, optional
823        Maximum ppm error. Default is 10.0.
824    min_abun_error : float, optional
825        Minimum abundance error for isotolopologue search. Default is -100.0.
826    max_abun_error : float, optional
827        Maximum abundance error for isotolopologue search. Default is 100.0.
828    mz_error_range : float, optional
829        m/z error range. Default is 1.5.
830    error_method : str, optional
831        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
832    mz_error_average : float, optional
833        m/z error average. Default is 0.0.
834    used_atom_valences : dict, optional
835        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
836    verbose_processing: bool, optional
837        If True, print verbose processing information. Default is True.
838    """
839
840    verbose_processing: bool = True
841
842    use_isotopologue_filter: bool = False
843
844    isotopologue_filter_threshold: float = 33
845
846    isotopologue_filter_atoms: tuple = ("Cl", "Br")
847
848    use_runtime_kendrick_filter: bool = False
849
850    use_min_peaks_filter: bool = True
851
852    min_peaks_per_class: int = 15
853
854    url_database: str = (
855        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
856    )
857
858    db_jobs: int = 3
859
860    db_chunk_size: int = 300
861
862    # query setting========
863    ion_charge: int = -1
864
865    min_hc_filter: float = 0.3
866
867    max_hc_filter: float = 3
868
869    min_oc_filter: float = 0.0
870
871    max_oc_filter: float = 1.2
872
873    min_op_filter: float = 2
874
875    use_pah_line_rule: bool = False
876
877    min_dbe: float = 0
878
879    max_dbe: float = 40
880
881    mz_error_score_weight: float = 0.6
882
883    isotopologue_score_weight: float = 0.4
884
885    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
886    adduct_atoms_neg: tuple = ("Cl", "Br")
887
888    adduct_atoms_pos: tuple = ("Na", "K")
889
890    score_methods: tuple = (
891        "S_P_lowest_error",
892        "N_S_P_lowest_error",
893        "lowest_error",
894        "prob_score",
895        "air_filter_error",
896        "water_filter_error",
897        "earth_filter_error",
898    )
899
900    score_method: str = "prob_score"
901
902    output_min_score: float = 0.1
903
904    output_score_method: str = "All Candidates"
905
906    # depending on the polarity mode it looks for [M].+ , [M].-
907    # query and automatically compile add entry if it doesn't exist
908
909    isRadical: bool = False
910
911    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
912    # query and automatically compile and push options if it doesn't exist
913    isProtonated: bool = True
914
915    isAdduct: bool = False
916
917    usedAtoms: dict = dataclasses.field(default_factory=dict)
918    ion_types_excluded: list = dataclasses.field(default_factory=list)
919
920    # search setting ========
921
922    ionization_type: str = "ESI"
923
924    # empirically set / needs optimization
925    min_ppm_error: float = -10.0  # ppm
926
927    # empirically set / needs optimization
928    max_ppm_error: float = 10.0  # ppm
929
930    # empirically set / needs optimization set for isotopologue search
931    min_abun_error: float = -100.0  # percentage
932
933    # empirically set / needs optimization set for isotopologue search
934    max_abun_error: float = 100.0  # percentage
935
936    # empirically set / needs optimization
937    mz_error_range: float = 1.5
938
939    # 'distance', 'lowest', 'symmetrical','average' 'None'
940    error_method: str = "None"
941
942    mz_error_average: float = 0.0
943
944    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
945    used_atom_valences: dict = dataclasses.field(default_factory=dict)
946
947    def __post_init__(self):
948        if not self.url_database or self.url_database == "":
949            self.url_database = os.getenv(
950                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
951            )
952        # enforce datatype
953        for field in dataclasses.fields(self):
954            value = getattr(self, field.name)
955            if not isinstance(value, field.type):
956                value = field.type(value)
957                setattr(self, field.name, value)
958
959        # enforce C and H if either do not exists
960        if "C" not in self.usedAtoms.keys():
961            self.usedAtoms["C"] = (1, 100)
962        if "H" not in self.usedAtoms.keys():
963            self.usedAtoms["H"] = (1, 200)
964
965        # add cummon values
966        current_used_atoms = self.used_atom_valences.keys()
967
968        for atom in Atoms.atoms_covalence.keys():
969            if atom not in current_used_atoms:
970                covalence = Atoms.atoms_covalence.get(atom)
971
972                if isinstance(covalence, int):
973                    self.used_atom_valences[atom] = covalence
974
975                else:
976                    # will get the first number of all possible covalances, which should be the most commum
977                    self.used_atom_valences[atom] = covalence[0]

Settings for molecular searching

Attributes
  • use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
  • isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
  • isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
  • use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
  • use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
  • min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
  • url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
  • db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
  • ion_charge (int, optional): Ion charge. Default is -1.
  • min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
  • max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
  • min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
  • max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
  • min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
  • mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
  • isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
  • adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
  • adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
  • score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
  • score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
  • output_min_score (float, optional): Minimum score for output. Default is 0.1.
  • output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
  • isRadical (bool, optional): If True, search for radical ions. Default is False.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • isAdduct (bool, optional): If True, search for adduct ions. Default is False.
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
  • ionization_type (str, optional): Ionization type. Default is 'ESI'.
  • min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
  • max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
  • min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
  • max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
  • mz_error_range (float, optional): m/z error range. Default is 1.5.
  • error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
  • mz_error_average (float, optional): m/z error average. Default is 0.0.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MolecularFormulaSearchSettings( verbose_processing: bool = True, use_isotopologue_filter: bool = False, isotopologue_filter_threshold: float = 33, isotopologue_filter_atoms: tuple = ('Cl', 'Br'), use_runtime_kendrick_filter: bool = False, use_min_peaks_filter: bool = True, min_peaks_per_class: int = 15, url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp', db_jobs: int = 3, db_chunk_size: int = 300, ion_charge: int = -1, min_hc_filter: float = 0.3, max_hc_filter: float = 3, min_oc_filter: float = 0.0, max_oc_filter: float = 1.2, min_op_filter: float = 2, use_pah_line_rule: bool = False, min_dbe: float = 0, max_dbe: float = 40, mz_error_score_weight: float = 0.6, isotopologue_score_weight: float = 0.4, adduct_atoms_neg: tuple = ('Cl', 'Br'), adduct_atoms_pos: tuple = ('Na', 'K'), score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'), score_method: str = 'prob_score', output_min_score: float = 0.1, output_score_method: str = 'All Candidates', isRadical: bool = False, isProtonated: bool = True, isAdduct: bool = False, usedAtoms: dict = <factory>, ion_types_excluded: list = <factory>, ionization_type: str = 'ESI', min_ppm_error: float = -10.0, max_ppm_error: float = 10.0, min_abun_error: float = -100.0, max_abun_error: float = 100.0, mz_error_range: float = 1.5, error_method: str = 'None', mz_error_average: float = 0.0, used_atom_valences: dict = <factory>)
verbose_processing: bool = True
use_isotopologue_filter: bool = False
isotopologue_filter_threshold: float = 33
isotopologue_filter_atoms: tuple = ('Cl', 'Br')
use_runtime_kendrick_filter: bool = False
use_min_peaks_filter: bool = True
min_peaks_per_class: int = 15
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'
db_jobs: int = 3
db_chunk_size: int = 300
ion_charge: int = -1
min_hc_filter: float = 0.3
max_hc_filter: float = 3
min_oc_filter: float = 0.0
max_oc_filter: float = 1.2
min_op_filter: float = 2
use_pah_line_rule: bool = False
min_dbe: float = 0
max_dbe: float = 40
mz_error_score_weight: float = 0.6
isotopologue_score_weight: float = 0.4
adduct_atoms_neg: tuple = ('Cl', 'Br')
adduct_atoms_pos: tuple = ('Na', 'K')
score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error')
score_method: str = 'prob_score'
output_min_score: float = 0.1
output_score_method: str = 'All Candidates'
isRadical: bool = False
isProtonated: bool = True
isAdduct: bool = False
usedAtoms: dict
ion_types_excluded: list
ionization_type: str = 'ESI'
min_ppm_error: float = -10.0
max_ppm_error: float = 10.0
min_abun_error: float = -100.0
max_abun_error: float = 100.0
mz_error_range: float = 1.5
error_method: str = 'None'
mz_error_average: float = 0.0
used_atom_valences: dict