corems.encapsulation.factory.processingSetting

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jul 02, 2019"
  3
  4import dataclasses
  5import os
  6from typing import List, Dict
  7
  8from corems.encapsulation.constant import Atoms, Labels
  9
 10
 11@dataclasses.dataclass
 12class TransientSetting:
 13    """Transient processing settings class
 14
 15    Attributes
 16    ----------
 17    implemented_apodization_function : tuple
 18        Available apodization functions
 19    apodization_method : str
 20        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
 21        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
 22    number_of_truncations : int
 23        How many times to truncate the transient prior to Fourier transform
 24    number_of_zero_fills : int
 25        How many times to zero fille the transient prior to Fourier transform.
 26    next_power_of_two : bool
 27        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
 28    kaiser_beta : float
 29        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
 30        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
 31
 32    """
 33
 34    implemented_apodization_function: tuple = (
 35        "Hamming",
 36        "Hanning",
 37        "Blackman",
 38        "Full-Sine",
 39        "Half-Sine",
 40        "Kaiser",
 41        "Half-Kaiser",
 42        "Rectangle",
 43    )
 44    apodization_method: str = "Hanning"
 45    number_of_truncations: int = 0
 46    number_of_zero_fills: int = 1
 47    next_power_of_two: bool = False
 48    kaiser_beta: float = 8.6
 49
 50    def __post_init__(self):
 51        # enforce datatype
 52        for field in dataclasses.fields(self):
 53            value = getattr(self, field.name)
 54            if not isinstance(value, field.type):
 55                value = field.type(value)
 56                setattr(self, field.name, value)
 57
 58
 59@dataclasses.dataclass
 60class DataInputSetting:
 61    """Data input settings class
 62
 63    Attributes
 64    ----------
 65    header_translate : dict
 66        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
 67    """
 68
 69    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
 70    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
 71    header_translate: dict = dataclasses.field(default_factory=dict)
 72
 73    def __post_init__(self):
 74        self.header_translate = {
 75            "m/z": Labels.mz,
 76            "mOz": Labels.mz,
 77            "Mass": Labels.mz,
 78            "Resolving Power": Labels.rp,
 79            "Res.": Labels.rp,
 80            "resolution": Labels.rp,
 81            "Intensity": Labels.abundance,
 82            "Peak Height": Labels.abundance,
 83            "I": Labels.abundance,
 84            "Abundance": Labels.abundance,
 85            "abs_abu": Labels.abundance,
 86            "Signal/Noise": Labels.s2n,
 87            "S/N": Labels.s2n,
 88            "sn": Labels.s2n,
 89        }
 90
 91    def add_mz_label(self, label):
 92        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
 93        self.header_translate[label] = Labels.mz
 94
 95    def add_peak_height_label(self, label):
 96        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
 97
 98        self.header_translate[label] = Labels.abundance
 99
100    def add_sn_label(self, label):
101        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
102        self.header_translate[label] = Labels.s2n
103
104    def add_resolving_power_label(self, label):
105        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
106        self.header_translate[label] = Labels.rp
107
108
109@dataclasses.dataclass
110class LiquidChromatographSetting:
111    """Liquid chromatograph processing settings class
112
113    Attributes
114    ----------
115    scans : list or tuple, optional
116        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
117    eic_tolerance_ppm : float, optional
118        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
119    correct_eic_baseline : bool, optional
120        If True, correct the baseline of the extracted ion chromatogram. Default is True.
121    smooth_window : int, optional
122        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
123    smooth_method : str, optional
124        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
125    implemented_smooth_method : tuple, optional
126        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
127    savgol_pol_order : int, optional
128        Polynomial order for Savitzky-Golay smoothing. Default is 2.
129    consecutive_scan_min : int, optional
130        Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended.
131    peak_height_max_percent : float, optional
132        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
133    peak_max_prominence_percent : float, optional
134        1-100 % used for baseline detection. Default is 1.
135    peak_derivative_threshold : float, optional
136        Threshold for defining derivative crossing. Default is 0.0005.
137    min_peak_datapoints : float, optional
138        minimum data point to define a chromatografic peak. Default is 5.
139    noise_threshold_method : str, optional
140        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
141    noise_threshold_methods_implemented : tuple, optional
142        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
143    peak_height_min_percent : float, optional
144        0-100 % used for peak detection. Default is 0.1.
145    eic_signal_threshold : float, optional
146        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
147    eic_buffer_time : float, optional
148        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
149    peak_picking_method : str, optional
150        Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
151    implemented_peak_picking_methods : tuple, optional
152        Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
153    ph_smooth_it : int, optional
154        Number of iterations to use for smoothing prior to finding mass features.
155        Used only for "persistent homology" peak picking method.
156        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
157    ph_smooth_radius_mz : int, optional
158        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
159        Used only for "persistent homology" peak picking method.
160        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
161    ph_smooth_radius_scan : int, optional
162        Radius in scan steps for smoothing prior to finding mass features.
163        Used only for "persistent homology" peak picking method.
164        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
165    ph_inten_min_rel : int, optional
166        Relative minimum intensity to use for finding mass features for persistent homology.
167        Used only for "persistent homology" peak picking method.
168        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
169        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
170    ph_persis_min_rel : int, optional
171        Relative minimum persistence for retaining mass features.
172        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
173        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
174        Should be greater to or equal to ph_inten_min_rel.
175        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
176    mass_feature_cluster_mz_tolerance_rel : float, optional
177        Relative m/z tolerance to use for clustering mass features.
178        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
179        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
180        Default is 5E-6 (5 ppm).
181    mass_feature_cluster_rt_tolerance : float, optional
182        Retention time tolerance to use for clustering mass features, in minutes.
183        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
184        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
185        Default is 0.2.
186    ms1_scans_to_average : int, optional
187        Number of MS1 scans to average for mass-feature associated m/zs.
188        Called within the LCMSBase.add_associated_ms1() method. Default is 1.
189    ms1_deconvolution_corr_min : float, optional
190        Minimum correlation to use for deconvoluting MS1 mass features.
191        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
192        Default is 0.8.
193    ms2_dda_rt_tolerance : float, optional
194        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
195    ms2_dda_mz_tolerance : float, optional
196        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
197    ms2_min_fe_score : float, optional
198        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
199    search_as_lipids : bool, optional
200        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
201    include_fragment_types : bool, optional
202        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
203    verbose_processing : bool, optional
204        If True, print verbose processing information. Default is True.
205    """
206
207    scans: list | tuple = (-1, -1)
208
209    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
210    eic_tolerance_ppm: float = 5
211    correct_eic_baseline = True
212    smooth_window: int = 5
213    smooth_method: str = "savgol"
214    implemented_smooth_method: tuple = (
215        "savgol",
216        "hanning",
217        "blackman",
218        "bartlett",
219        "flat",
220        "boxcar",
221    )
222    savgol_pol_order: int = 2
223    consecutive_scan_min: int = 0
224    peak_height_max_percent: float = 10
225    peak_max_prominence_percent: float = 1
226    peak_derivative_threshold: float = 0.0005
227    min_peak_datapoints: float = 5
228    noise_threshold_method: str = "manual_relative_abundance"
229    noise_threshold_methods_implemented: tuple = (
230        "auto_relative_abundance",
231        "manual_relative_abundance",
232        "second_derivative",
233    )
234    peak_height_min_percent: float = 0.1
235    eic_signal_threshold: float = 0.01
236    eic_buffer_time = 1.5
237
238    # Parameters used for 2D peak picking
239    peak_picking_method: str = "persistent homology"
240    implemented_peak_picking_methods: tuple = (
241        "persistent homology",
242        "centroided_persistent_homology",
243    )
244
245    # Parameters used in persistent homology calculations
246    ph_smooth_it = 1
247    ph_smooth_radius_mz = 0
248    ph_smooth_radius_scan = 1
249    ph_inten_min_rel = 0.001
250    ph_persis_min_rel = 0.001
251
252    # Parameters used to cluster mass features
253    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
254    mass_feature_cluster_rt_tolerance: float = 0.3
255
256    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
257    ms1_scans_to_average: int = 1
258    ms1_deconvolution_corr_min: float = 0.8
259    ms2_dda_rt_tolerance: float = 0.15
260    ms2_dda_mz_tolerance: float = 0.05
261
262    # Parameters used for flash entropy searching and database preparation
263    ms2_min_fe_score: float = 0.2
264    search_as_lipids: bool = False
265    include_fragment_types: bool = False
266
267    # Parameters used for saving the data
268    export_profile_spectra: bool = False
269    export_eics: bool = True
270    export_unprocessed_ms1: bool = False
271
272    # Parameters used for verbose processing
273    verbose_processing: bool = True
274
275    def __post_init__(self):
276        # enforce datatype
277        for field in dataclasses.fields(self):
278            value = getattr(self, field.name)
279            if not isinstance(value, field.type):
280                value = field.type(value)
281                setattr(self, field.name, value)
282
283
284@dataclasses.dataclass
285class MassSpectrumSetting:
286    """Mass spectrum processing settings class
287
288    Attributes
289    ----------
290    noise_threshold_method : str, optional
291        Method for detecting noise threshold. Default is 'log'.
292    noise_threshold_methods_implemented : tuple, optional
293        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
294    noise_threshold_min_std : int, optional
295        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
296    noise_threshold_min_s2n : float, optional
297        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
298    noise_threshold_min_relative_abundance : float, optional
299        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
300    noise_threshold_absolute_abundance : float, optional
301        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
302    noise_threshold_log_nsigma : int, optional
303        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
304    noise_threshold_log_nsigma_corr_factor : float, optional
305        Correction factor for log noise threshold method. Default is 0.463.
306    noise_threshold_log_nsigma_bins : int, optional
307        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
308    noise_min_mz : float, optional
309        Minimum m/z to use for noise thresholding. Default is 50.0.
310    noise_max_mz : float, optional
311        Maximum m/z to use for noise thresholding. Default is 1200.0.
312    min_picking_mz : float, optional
313        Minimum m/z to use for peak picking. Default is 50.0.
314    max_picking_mz : float, optional
315        Maximum m/z to use for peak picking. Default is 1200.0.
316    picking_point_extrapolate : int, optional
317        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
318        Recommend 3 for reduced profile data or if peak picking faults
319    calib_minimize_method : str, optional
320        Minimization method to use for calibration. Default is 'Powell'.
321    calib_pol_order : int, optional
322        Polynomial order to use for calibration. Default is 2.
323    max_calib_ppm_error : float, optional
324        Maximum ppm error to use for calibration. Default is 1.0.
325    min_calib_ppm_error : float, optional
326        Minimum ppm error to use for calibration. Default is -1.0.
327    calib_sn_threshold : float, optional
328        Signal to noise threshold to use for calibration. Default is 2.0.
329    calibration_ref_match_method: string, optional
330        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
331    calibration_ref_match_tolerance: float, optional
332        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
333    do_calibration : bool, optional
334        If True, perform calibration. Default is True.
335    verbose_processing : bool, optional
336        If True, print verbose processing information. Default is True.
337    """
338
339    noise_threshold_method: str = "log"
340
341    noise_threshold_methods_implemented: tuple = (
342        "minima",
343        "signal_noise",
344        "relative_abundance",
345        "absolute_abundance",
346        "log",
347    )
348
349    noise_threshold_min_std: int = 6  # when using 'minima' method
350
351    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
352
353    noise_threshold_min_relative_abundance: float = (
354        6  # from 0-100, when using 'relative_abundance' method
355    )
356
357    noise_threshold_absolute_abundance: float = (
358        1_000_000  # when using 'absolute_abundance' method
359    )
360
361    noise_threshold_log_nsigma: int = 6  # when using 'log' method
362    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
363    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
364
365    noise_min_mz: float = 50.0
366    noise_max_mz: float = 1200.0
367
368    min_picking_mz: float = 50.0
369    max_picking_mz: float = 1200.0
370
371    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
372    # This will fix peak picking at spectrum limit issues
373    #  0 to keep normal behaviour, typical value 3 to fix
374    picking_point_extrapolate: int = 3
375
376    calib_minimize_method: str = "Powell"
377    calib_pol_order: int = 2
378    max_calib_ppm_error: float = 1.0
379    min_calib_ppm_error: float = -1.0
380    calib_sn_threshold: float = 2.0
381    calibration_ref_match_method: str = "legacy"
382    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
383    calibration_ref_match_tolerance: float = 0.003
384    calibration_ref_match_std_raw_error_limit: float = 1.5
385    # calib_ref_mzs: list = [0]
386
387    do_calibration: bool = True
388    verbose_processing: bool = True
389
390    def __post_init__(self):
391        # enforce datatype
392        for field in dataclasses.fields(self):
393            value = getattr(self, field.name)
394            if not isinstance(value, field.type):
395                value = field.type(value)
396                setattr(self, field.name, value)
397
398
399@dataclasses.dataclass
400class MassSpecPeakSetting:
401    """Mass spectrum peak processing settings class
402
403    Attributes
404    ----------
405    kendrick_base : Dict, optional
406        Dictionary specifying the elements and their counts in the Kendrick base.
407        Defaults to {'C': 1, 'H': 2}.
408    kendrick_rounding_method : str, optional
409        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
410        Defaults to 'floor'.
411    implemented_kendrick_rounding_methods : tuple
412        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
413        Defaults to ('floor', 'ceil', 'round').
414    peak_derivative_threshold : float, optional
415        Threshold for defining derivative crossing. Should be a value between 0 and 1.
416        Defaults to 0.0.
417    peak_min_prominence_percent : float, optional
418        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
419        Defaults to 0.1.
420    min_peak_datapoints : float, optional
421        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
422        Defaults to 5.
423    peak_max_prominence_percent : float, optional
424        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
425        Defaults to 0.1.
426    peak_height_max_percent : float, optional
427        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
428        Defaults to 10.
429    legacy_resolving_power : bool, optional
430        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
431        Defaults to True.
432    legacy_centroid_polyfit : bool, optional
433        Use legacy (numpy polyfit) to fit centroid
434        Default false.
435    """
436
437    kendrick_base: Dict = dataclasses.field(default_factory=dict)
438
439    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
440
441    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
442
443    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
444
445    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
446
447    min_peak_datapoints: float = 5  # 0-inf used for peak detection
448
449    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
450
451    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
452
453    legacy_resolving_power: bool = (
454        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
455    )
456
457    legacy_centroid_polyfit: bool = False
458
459    def __post_init__(self):
460        # default to CH2
461        if not self.kendrick_base:
462            self.kendrick_base = {"C": 1, "H": 2}
463        # enforce datatype
464        for field in dataclasses.fields(self):
465            value = getattr(self, field.name)
466            if not isinstance(value, field.type):
467                value = field.type(value)
468                setattr(self, field.name, value)
469
470
471@dataclasses.dataclass
472class GasChromatographSetting:
473    """Gas chromatograph processing settings class
474
475    Attributes
476    ----------
477    use_deconvolution : bool, optional
478        If True, use deconvolution. Default is False.
479    implemented_smooth_method : tuple, optional
480        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
481    smooth_window : int, optional
482        Window size for smoothing the ion chromatogram. Default is 5.
483    smooth_method : str, optional
484        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
485    savgol_pol_order : int, optional
486        Polynomial order for Savitzky-Golay smoothing. Default is 2.
487    peak_derivative_threshold : float, optional
488        Threshold for defining derivative crossing. Should be a value between 0 and 1.
489        Defaults to 0.0005.
490    peak_height_max_percent : float, optional
491        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
492        Defaults to 10.
493    peak_max_prominence_percent : float, optional
494        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
495        Defaults to 1.
496    min_peak_datapoints : float, optional
497        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
498        Defaults to 5.
499    max_peak_width : float, optional
500        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
501        Defaults to 0.1.
502    noise_threshold_method : str, optional
503        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
504    noise_threshold_methods_implemented : tuple, optional
505        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
506    std_noise_threshold : int, optional
507        Default is 3.
508    peak_height_min_percent : float, optional
509        0-100 % used for peak detection. Default is 0.1.
510    peak_min_prominence_percent : float, optional
511        0-100 % used for peak detection. Default is 0.1.
512    eic_signal_threshold : float, optional
513        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
514    max_rt_distance : float, optional
515        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
516    verbose_processing : bool, optional
517        If True, print verbose processing information. Default is True.
518    """
519
520    use_deconvolution: bool = False
521
522    implemented_smooth_method: tuple = (
523        "savgol",
524        "hanning",
525        "blackman",
526        "bartlett",
527        "flat",
528        "boxcar",
529    )
530
531    smooth_window: int = 5
532
533    smooth_method: str = "savgol"
534
535    savgol_pol_order: int = 2
536
537    peak_derivative_threshold: float = 0.0005
538
539    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
540
541    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
542
543    min_peak_datapoints: float = 5
544
545    max_peak_width: float = 0.1
546
547    noise_threshold_method: str = "manual_relative_abundance"
548
549    noise_threshold_methods_implemented: tuple = (
550        "auto_relative_abundance",
551        "manual_relative_abundance",
552        "second_derivative",
553    )
554
555    std_noise_threshold: int = 3
556
557    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
558
559    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
560
561    eic_signal_threshold: float = (
562        0.01  # 0-100 % used for extracted ion chromatogram peak detection
563    )
564
565    max_rt_distance: float = (
566        0.025  # minutes, max distance allowance hierarchical clutter
567    )
568
569    verbose_processing: bool = True
570
571    def __post_init__(self):
572        # enforce datatype
573        for field in dataclasses.fields(self):
574            value = getattr(self, field.name)
575            if not isinstance(value, field.type):
576                value = field.type(value)
577                setattr(self, field.name, value)
578
579
580@dataclasses.dataclass
581class CompoundSearchSettings:
582    """Settings for compound search
583
584    Attributes
585    ----------
586    url_database : str, optional
587        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
588    ri_search_range : float, optional
589        Retention index search range. Default is 35.
590    rt_search_range : float, optional
591        Retention time search range, in minutes. Default is 1.0.
592    correlation_threshold : float, optional
593        Threshold for correlation for spectral similarity. Default is 0.5.
594    score_threshold : float, optional
595        Threshold for compsite score. Default is 0.0.
596    ri_spacing : float, optional
597        Retention index spacing. Default is 200.
598    ri_std : float, optional
599        Retention index standard deviation. Default is 3.
600    ri_calibration_compound_names : list, optional
601        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
602
603    """
604
605    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
606
607    ri_search_range: float = 35
608
609    rt_search_range: float = 1.0  # used for retention index calibration
610
611    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
612
613    score_threshold: float = 0.0
614
615    ri_spacing: float = 200
616
617    ri_std: float = 3  # in standard deviation
618
619    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
620
621    # calculates and export all spectral similarity methods
622    exploratory_mode: bool = False
623
624    score_methods: tuple = ("highest_sim_score", "highest_ss")
625
626    output_score_method: str = "All"
627
628    def __post_init__(self):
629        # enforce datatype
630        self.url_database = os.getenv(
631            "SPECTRAL_GCMS_DATABASE_URL",
632            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
633        )
634
635        for field in dataclasses.fields(self):
636            value = getattr(self, field.name)
637            if not isinstance(value, field.type):
638                value = field.type(value)
639                setattr(self, field.name, value)
640
641        self.ri_calibration_compound_names = [
642            "Methyl Caprylate",
643            "Methyl Caprate",
644            "Methyl Pelargonate",
645            "Methyl Laurate",
646            "Methyl Myristate",
647            "Methyl Palmitate",
648            "Methyl Stearate",
649            "Methyl Eicosanoate",
650            "Methyl Docosanoate",
651            "Methyl Linocerate",
652            "Methyl Hexacosanoate",
653            "Methyl Octacosanoate",
654            "Methyl Triacontanoate",
655        ]
656
657
658class MolecularLookupDictSettings:
659    """Settings for molecular searching
660
661    These are used to generate the database entries, do not change.
662
663    Attributes
664    ----------
665    usedAtoms : dict, optional
666        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
667    min_mz : float, optional
668        Minimum m/z to use for searching. Default is 50.0.
669    max_mz : float, optional
670        Maximum m/z to use for searching. Default is 1200.0.
671    min_dbe : float, optional
672        Minimum double bond equivalent to use for searching. Default is 0.
673    max_dbe : float, optional
674        Maximum double bond equivalent to use for searching. Default is 50.
675    use_pah_line_rule : bool, optional
676        If True, use the PAH line rule. Default is False.
677    isRadical : bool, optional
678        If True, search for radical ions. Default is True.
679    isProtonated : bool, optional
680        If True, search for protonated ions. Default is True.
681    url_database : str, optional
682        URL for the database. Default is None.
683    db_jobs : int, optional
684        Number of jobs to use for database queries. Default is 1.
685    used_atom_valences : dict, optional
686        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
687
688    """
689
690    ### DO NOT CHANGE IT! These are used to generate the database entries
691
692    ### DO change when creating a new application database
693
694    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
695
696    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
697    ### if you don't want to include one of those atoms set the max and min at 0
698    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
699    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
700    ### NOTE : Adducts atoms have zero covalence
701    ### NOTE : Not using static variable because this class is distributed using multiprocessing
702    def __init__(self):
703        self.usedAtoms = {
704            "C": (1, 90),
705            "H": (4, 200),
706            "O": (0, 12),
707            "N": (0, 0),
708            "S": (0, 0),
709            "P": (0, 0),
710            "Cl": (0, 0),
711        }
712
713        self.min_mz = 50
714
715        self.max_mz = 1200
716
717        self.min_dbe = 0
718
719        self.max_dbe = 50
720
721        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
722        self.use_pah_line_rule = False
723
724        self.isRadical = True
725
726        self.isProtonated = True
727
728        self.url_database = None
729
730        self.db_jobs = 1
731
732        self.used_atom_valences = {
733            "C": 4,
734            "13C": 4,
735            "H": 1,
736            "O": 2,
737            "18O": 2,
738            "N": 3,
739            "S": 2,
740            "34S": 2,
741            "P": 3,
742            "Cl": 1,
743            "37Cl": 1,
744            "Br": 1,
745            "Na": 1,
746            "F": 1,
747            "K": 0,
748        }
749
750
751@dataclasses.dataclass
752class MolecularFormulaSearchSettings:
753    """Settings for molecular searching
754
755    Attributes
756    ----------
757    use_isotopologue_filter : bool, optional
758        If True, use isotopologue filter. Default is False.
759    isotopologue_filter_threshold : float, optional
760        Threshold for isotopologue filter. Default is 33.
761    isotopologue_filter_atoms : tuple, optional
762        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
763    use_runtime_kendrick_filter : bool, optional
764        If True, use runtime Kendrick filter. Default is False.
765    use_min_peaks_filter : bool, optional
766        If True, use minimum peaks filter. Default is True.
767    min_peaks_per_class : int, optional
768        Minimum number of peaks per class. Default is 15.
769    url_database : str, optional
770        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
771    db_jobs : int, optional
772        Number of jobs to use for database queries. Default is 3.
773    db_chunk_size : int, optional
774        Chunk size to use for database queries. Default is 300.
775    ion_charge : int, optional
776        Ion charge. Default is -1.
777    min_hc_filter : float, optional
778        Minimum hydrogen to carbon ratio. Default is 0.3.
779    max_hc_filter : float, optional
780        Maximum hydrogen to carbon ratio. Default is 3.
781    min_oc_filter : float, optional
782        Minimum oxygen to carbon ratio. Default is 0.0.
783    max_oc_filter : float, optional
784        Maximum oxygen to carbon ratio. Default is 1.2.
785    min_op_filter : float, optional
786        Minimum oxygen to phosphorous ratio. Default is 2.
787    use_pah_line_rule : bool, optional
788        If True, use the PAH line rule. Default is False.
789    min_dbe : float, optional
790        Minimum double bond equivalent to use for searching. Default is 0.
791    max_dbe : float, optional
792        Maximum double bond equivalent to use for searching. Default is 40.
793    mz_error_score_weight : float, optional
794        Weight for m/z error score to contribute to composite score. Default is 0.6.
795    isotopologue_score_weight : float, optional
796        Weight for isotopologue score to contribute to composite score. Default is 0.4.
797    adduct_atoms_neg : tuple, optional
798        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
799    adduct_atoms_pos : tuple, optional
800        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
801    score_methods : tuple, optional
802        Tuple of score method that can be implemented.
803        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
804    score_method : str, optional
805        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
806    output_min_score : float, optional
807        Minimum score for output. Default is 0.1.
808    output_score_method : str, optional
809        Score method to use for output. Default is 'All Candidates'.
810    isRadical : bool, optional
811        If True, search for radical ions. Default is False.
812    isProtonated : bool, optional
813        If True, search for protonated ions. Default is True.
814    isAdduct : bool, optional
815        If True, search for adduct ions. Default is False.
816    usedAtoms : dict, optional
817        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
818    ion_types_excluded : list, optional
819        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
820    ionization_type : str, optional
821        Ionization type. Default is 'ESI'.
822    min_ppm_error : float, optional
823        Minimum ppm error. Default is -10.0.
824    max_ppm_error : float, optional
825        Maximum ppm error. Default is 10.0.
826    min_abun_error : float, optional
827        Minimum abundance error for isotolopologue search. Default is -100.0.
828    max_abun_error : float, optional
829        Maximum abundance error for isotolopologue search. Default is 100.0.
830    mz_error_range : float, optional
831        m/z error range. Default is 1.5.
832    error_method : str, optional
833        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
834    mz_error_average : float, optional
835        m/z error average. Default is 0.0.
836    used_atom_valences : dict, optional
837        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
838    verbose_processing: bool, optional
839        If True, print verbose processing information. Default is True.
840    """
841
842    verbose_processing: bool = True
843
844    use_isotopologue_filter: bool = False
845
846    isotopologue_filter_threshold: float = 33
847
848    isotopologue_filter_atoms: tuple = ("Cl", "Br")
849
850    use_runtime_kendrick_filter: bool = False
851
852    use_min_peaks_filter: bool = True
853
854    min_peaks_per_class: int = 15
855
856    url_database: str = (
857        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
858    )
859
860    db_jobs: int = 3
861
862    db_chunk_size: int = 300
863
864    # query setting========
865    ion_charge: int = -1
866
867    min_hc_filter: float = 0.3
868
869    max_hc_filter: float = 3
870
871    min_oc_filter: float = 0.0
872
873    max_oc_filter: float = 1.2
874
875    min_op_filter: float = 2
876
877    use_pah_line_rule: bool = False
878
879    min_dbe: float = 0
880
881    max_dbe: float = 40
882
883    mz_error_score_weight: float = 0.6
884
885    isotopologue_score_weight: float = 0.4
886
887    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
888    adduct_atoms_neg: tuple = ("Cl", "Br")
889
890    adduct_atoms_pos: tuple = ("Na", "K")
891
892    score_methods: tuple = (
893        "S_P_lowest_error",
894        "N_S_P_lowest_error",
895        "lowest_error",
896        "prob_score",
897        "air_filter_error",
898        "water_filter_error",
899        "earth_filter_error",
900    )
901
902    score_method: str = "prob_score"
903
904    output_min_score: float = 0.1
905
906    output_score_method: str = "All Candidates"
907
908    # depending on the polarity mode it looks for [M].+ , [M].-
909    # query and automatically compile add entry if it doesn't exist
910
911    isRadical: bool = False
912
913    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
914    # query and automatically compile and push options if it doesn't exist
915    isProtonated: bool = True
916
917    isAdduct: bool = False
918
919    usedAtoms: dict = dataclasses.field(default_factory=dict)
920    ion_types_excluded: list = dataclasses.field(default_factory=list)
921
922    # search setting ========
923
924    ionization_type: str = "ESI"
925
926    # empirically set / needs optimization
927    min_ppm_error: float = -10.0  # ppm
928
929    # empirically set / needs optimization
930    max_ppm_error: float = 10.0  # ppm
931
932    # empirically set / needs optimization set for isotopologue search
933    min_abun_error: float = -100.0  # percentage
934
935    # empirically set / needs optimization set for isotopologue search
936    max_abun_error: float = 100.0  # percentage
937
938    # empirically set / needs optimization
939    mz_error_range: float = 1.5
940
941    # 'distance', 'lowest', 'symmetrical','average' 'None'
942    error_method: str = "None"
943
944    mz_error_average: float = 0.0
945
946    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
947    used_atom_valences: dict = dataclasses.field(default_factory=dict)
948
949    def __post_init__(self):
950        if not self.url_database or self.url_database == "":
951            self.url_database = os.getenv(
952                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
953            )
954        # enforce datatype
955        for field in dataclasses.fields(self):
956            value = getattr(self, field.name)
957            if not isinstance(value, field.type):
958                value = field.type(value)
959                setattr(self, field.name, value)
960
961        # enforce C and H if either do not exists
962        if "C" not in self.usedAtoms.keys():
963            self.usedAtoms["C"] = (1, 100)
964        if "H" not in self.usedAtoms.keys():
965            self.usedAtoms["H"] = (1, 200)
966
967        # add cummon values
968        current_used_atoms = self.used_atom_valences.keys()
969
970        for atom in Atoms.atoms_covalence.keys():
971            if atom not in current_used_atoms:
972                covalence = Atoms.atoms_covalence.get(atom)
973
974                if isinstance(covalence, int):
975                    self.used_atom_valences[atom] = covalence
976
977                else:
978                    # will get the first number of all possible covalances, which should be the most commum
979                    self.used_atom_valences[atom] = covalence[0]
@dataclasses.dataclass
class TransientSetting:
12@dataclasses.dataclass
13class TransientSetting:
14    """Transient processing settings class
15
16    Attributes
17    ----------
18    implemented_apodization_function : tuple
19        Available apodization functions
20    apodization_method : str
21        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode.
22        For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
23    number_of_truncations : int
24        How many times to truncate the transient prior to Fourier transform
25    number_of_zero_fills : int
26        How many times to zero fille the transient prior to Fourier transform.
27    next_power_of_two : bool
28        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
29    kaiser_beta : float
30        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
31        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
32
33    """
34
35    implemented_apodization_function: tuple = (
36        "Hamming",
37        "Hanning",
38        "Blackman",
39        "Full-Sine",
40        "Half-Sine",
41        "Kaiser",
42        "Half-Kaiser",
43        "Rectangle",
44    )
45    apodization_method: str = "Hanning"
46    number_of_truncations: int = 0
47    number_of_zero_fills: int = 1
48    next_power_of_two: bool = False
49    kaiser_beta: float = 8.6
50
51    def __post_init__(self):
52        # enforce datatype
53        for field in dataclasses.fields(self):
54            value = getattr(self, field.name)
55            if not isinstance(value, field.type):
56                value = field.type(value)
57                setattr(self, field.name, value)

Transient processing settings class

Attributes
  • implemented_apodization_function (tuple): Available apodization functions
  • apodization_method (str): Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
  • number_of_truncations (int): How many times to truncate the transient prior to Fourier transform
  • number_of_zero_fills (int): How many times to zero fille the transient prior to Fourier transform.
  • next_power_of_two (bool): If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
  • kaiser_beta (float): Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
TransientSetting( implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle'), apodization_method: str = 'Hanning', number_of_truncations: int = 0, number_of_zero_fills: int = 1, next_power_of_two: bool = False, kaiser_beta: float = 8.6)
implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser', 'Rectangle')
apodization_method: str = 'Hanning'
number_of_truncations: int = 0
number_of_zero_fills: int = 1
next_power_of_two: bool = False
kaiser_beta: float = 8.6
@dataclasses.dataclass
class DataInputSetting:
 60@dataclasses.dataclass
 61class DataInputSetting:
 62    """Data input settings class
 63
 64    Attributes
 65    ----------
 66    header_translate : dict
 67        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
 68    """
 69
 70    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
 71    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
 72    header_translate: dict = dataclasses.field(default_factory=dict)
 73
 74    def __post_init__(self):
 75        self.header_translate = {
 76            "m/z": Labels.mz,
 77            "mOz": Labels.mz,
 78            "Mass": Labels.mz,
 79            "Resolving Power": Labels.rp,
 80            "Res.": Labels.rp,
 81            "resolution": Labels.rp,
 82            "Intensity": Labels.abundance,
 83            "Peak Height": Labels.abundance,
 84            "I": Labels.abundance,
 85            "Abundance": Labels.abundance,
 86            "abs_abu": Labels.abundance,
 87            "Signal/Noise": Labels.s2n,
 88            "S/N": Labels.s2n,
 89            "sn": Labels.s2n,
 90        }
 91
 92    def add_mz_label(self, label):
 93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
 94        self.header_translate[label] = Labels.mz
 95
 96    def add_peak_height_label(self, label):
 97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
 98
 99        self.header_translate[label] = Labels.abundance
100
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n
104
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Data input settings class

Attributes
  • header_translate (dict): Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
DataInputSetting(header_translate: dict = <factory>)
header_translate: dict
def add_mz_label(self, label):
92    def add_mz_label(self, label):
93        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
94        self.header_translate[label] = Labels.mz

Add a label to the header_translate dictionary to be translated to the corems label for mz.

def add_peak_height_label(self, label):
96    def add_peak_height_label(self, label):
97        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
98
99        self.header_translate[label] = Labels.abundance

Add a label to the header_translate dictionary to be translated to the corems label for peak height.

def add_sn_label(self, label):
101    def add_sn_label(self, label):
102        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
103        self.header_translate[label] = Labels.s2n

Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.

def add_resolving_power_label(self, label):
105    def add_resolving_power_label(self, label):
106        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
107        self.header_translate[label] = Labels.rp

Add a label to the header_translate dictionary to be translated to the corems label for resolving power.

@dataclasses.dataclass
class LiquidChromatographSetting:
110@dataclasses.dataclass
111class LiquidChromatographSetting:
112    """Liquid chromatograph processing settings class
113
114    Attributes
115    ----------
116    scans : list or tuple, optional
117        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
118    eic_tolerance_ppm : float, optional
119        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
120    correct_eic_baseline : bool, optional
121        If True, correct the baseline of the extracted ion chromatogram. Default is True.
122    smooth_window : int, optional
123        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
124    smooth_method : str, optional
125        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
126    implemented_smooth_method : tuple, optional
127        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
128    savgol_pol_order : int, optional
129        Polynomial order for Savitzky-Golay smoothing. Default is 2.
130    consecutive_scan_min : int, optional
131        Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended.
132    peak_height_max_percent : float, optional
133        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
134    peak_max_prominence_percent : float, optional
135        1-100 % used for baseline detection. Default is 1.
136    peak_derivative_threshold : float, optional
137        Threshold for defining derivative crossing. Default is 0.0005.
138    min_peak_datapoints : float, optional
139        minimum data point to define a chromatografic peak. Default is 5.
140    noise_threshold_method : str, optional
141        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
142    noise_threshold_methods_implemented : tuple, optional
143        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
144    peak_height_min_percent : float, optional
145        0-100 % used for peak detection. Default is 0.1.
146    eic_signal_threshold : float, optional
147        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
148    eic_buffer_time : float, optional
149        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
150    peak_picking_method : str, optional
151        Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
152    implemented_peak_picking_methods : tuple, optional
153        Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
154    ph_smooth_it : int, optional
155        Number of iterations to use for smoothing prior to finding mass features.
156        Used only for "persistent homology" peak picking method.
157        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
158    ph_smooth_radius_mz : int, optional
159        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
160        Used only for "persistent homology" peak picking method.
161        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
162    ph_smooth_radius_scan : int, optional
163        Radius in scan steps for smoothing prior to finding mass features.
164        Used only for "persistent homology" peak picking method.
165        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
166    ph_inten_min_rel : int, optional
167        Relative minimum intensity to use for finding mass features for persistent homology.
168        Used only for "persistent homology" peak picking method.
169        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
170        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
171    ph_persis_min_rel : int, optional
172        Relative minimum persistence for retaining mass features.
173        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
174        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
175        Should be greater to or equal to ph_inten_min_rel.
176        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
177    mass_feature_cluster_mz_tolerance_rel : float, optional
178        Relative m/z tolerance to use for clustering mass features.
179        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
180        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
181        Default is 5E-6 (5 ppm).
182    mass_feature_cluster_rt_tolerance : float, optional
183        Retention time tolerance to use for clustering mass features, in minutes.
184        Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods.
185        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
186        Default is 0.2.
187    ms1_scans_to_average : int, optional
188        Number of MS1 scans to average for mass-feature associated m/zs.
189        Called within the LCMSBase.add_associated_ms1() method. Default is 1.
190    ms1_deconvolution_corr_min : float, optional
191        Minimum correlation to use for deconvoluting MS1 mass features.
192        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
193        Default is 0.8.
194    ms2_dda_rt_tolerance : float, optional
195        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
196    ms2_dda_mz_tolerance : float, optional
197        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
198    ms2_min_fe_score : float, optional
199        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
200    search_as_lipids : bool, optional
201        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
202    include_fragment_types : bool, optional
203        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
204    verbose_processing : bool, optional
205        If True, print verbose processing information. Default is True.
206    """
207
208    scans: list | tuple = (-1, -1)
209
210    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
211    eic_tolerance_ppm: float = 5
212    correct_eic_baseline = True
213    smooth_window: int = 5
214    smooth_method: str = "savgol"
215    implemented_smooth_method: tuple = (
216        "savgol",
217        "hanning",
218        "blackman",
219        "bartlett",
220        "flat",
221        "boxcar",
222    )
223    savgol_pol_order: int = 2
224    consecutive_scan_min: int = 0
225    peak_height_max_percent: float = 10
226    peak_max_prominence_percent: float = 1
227    peak_derivative_threshold: float = 0.0005
228    min_peak_datapoints: float = 5
229    noise_threshold_method: str = "manual_relative_abundance"
230    noise_threshold_methods_implemented: tuple = (
231        "auto_relative_abundance",
232        "manual_relative_abundance",
233        "second_derivative",
234    )
235    peak_height_min_percent: float = 0.1
236    eic_signal_threshold: float = 0.01
237    eic_buffer_time = 1.5
238
239    # Parameters used for 2D peak picking
240    peak_picking_method: str = "persistent homology"
241    implemented_peak_picking_methods: tuple = (
242        "persistent homology",
243        "centroided_persistent_homology",
244    )
245
246    # Parameters used in persistent homology calculations
247    ph_smooth_it = 1
248    ph_smooth_radius_mz = 0
249    ph_smooth_radius_scan = 1
250    ph_inten_min_rel = 0.001
251    ph_persis_min_rel = 0.001
252
253    # Parameters used to cluster mass features
254    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
255    mass_feature_cluster_rt_tolerance: float = 0.3
256
257    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
258    ms1_scans_to_average: int = 1
259    ms1_deconvolution_corr_min: float = 0.8
260    ms2_dda_rt_tolerance: float = 0.15
261    ms2_dda_mz_tolerance: float = 0.05
262
263    # Parameters used for flash entropy searching and database preparation
264    ms2_min_fe_score: float = 0.2
265    search_as_lipids: bool = False
266    include_fragment_types: bool = False
267
268    # Parameters used for saving the data
269    export_profile_spectra: bool = False
270    export_eics: bool = True
271    export_unprocessed_ms1: bool = False
272
273    # Parameters used for verbose processing
274    verbose_processing: bool = True
275
276    def __post_init__(self):
277        # enforce datatype
278        for field in dataclasses.fields(self):
279            value = getattr(self, field.name)
280            if not isinstance(value, field.type):
281                value = field.type(value)
282                setattr(self, field.name, value)

Liquid chromatograph processing settings class

Attributes
  • scans (list or tuple, optional): List of select scan to average or a tuple containing the range to average. Default is (0, 1).
  • eic_tolerance_ppm (float, optional): Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
  • correct_eic_baseline (bool, optional): If True, correct the baseline of the extracted ion chromatogram. Default is True.
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
  • smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • consecutive_scan_min (int, optional): Minimum number of consecutive scans to consider for peak detection. Default is 0 for backwards compatibility, but a value of 3 is recommended.
  • peak_height_max_percent (float, optional): 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
  • peak_max_prominence_percent (float, optional): 1-100 % used for baseline detection. Default is 1.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Default is 0.0005.
  • min_peak_datapoints (float, optional): minimum data point to define a chromatografic peak. Default is 5.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
  • peak_picking_method (str, optional): Peak picking method to use. Default is 'persistent homology'. Other options are 'centroided_persistent_homology'.
  • implemented_peak_picking_methods (tuple, optional): Peak picking methods that can be implemented. Default is ('persistent homology', 'centroided_persistent_homology').
  • ph_smooth_it (int, optional): Number of iterations to use for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
  • ph_smooth_radius_mz (int, optional): Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
  • ph_smooth_radius_scan (int, optional): Radius in scan steps for smoothing prior to finding mass features. Used only for "persistent homology" peak picking method. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
  • ph_inten_min_rel (int, optional): Relative minimum intensity to use for finding mass features for persistent homology. Used only for "persistent homology" peak picking method. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • ph_persis_min_rel (int, optional): Relative minimum persistence for retaining mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • mass_feature_cluster_mz_tolerance_rel (float, optional): Relative m/z tolerance to use for clustering mass features. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
  • mass_feature_cluster_rt_tolerance (float, optional): Retention time tolerance to use for clustering mass features, in minutes. Used for both "persistent homology" and "centroided_persistent_homology" peak picking methods. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
  • ms1_scans_to_average (int, optional): Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
  • ms1_deconvolution_corr_min (float, optional): Minimum correlation to use for deconvoluting MS1 mass features. Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
  • ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
  • ms2_dda_mz_tolerance (float, optional): Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
  • ms2_min_fe_score (float, optional): Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
  • search_as_lipids (bool, optional): If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
  • include_fragment_types (bool, optional): If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
LiquidChromatographSetting( scans: list | tuple = (-1, -1), eic_tolerance_ppm: float = 5, smooth_window: int = 5, smooth_method: str = 'savgol', implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), savgol_pol_order: int = 2, consecutive_scan_min: int = 0, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, peak_derivative_threshold: float = 0.0005, min_peak_datapoints: float = 5, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), peak_height_min_percent: float = 0.1, eic_signal_threshold: float = 0.01, peak_picking_method: str = 'persistent homology', implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology'), mass_feature_cluster_mz_tolerance_rel: float = 5e-06, mass_feature_cluster_rt_tolerance: float = 0.3, ms1_scans_to_average: int = 1, ms1_deconvolution_corr_min: float = 0.8, ms2_dda_rt_tolerance: float = 0.15, ms2_dda_mz_tolerance: float = 0.05, ms2_min_fe_score: float = 0.2, search_as_lipids: bool = False, include_fragment_types: bool = False, export_profile_spectra: bool = False, export_eics: bool = True, export_unprocessed_ms1: bool = False, verbose_processing: bool = True)
scans: list | tuple = (-1, -1)
eic_tolerance_ppm: float = 5
correct_eic_baseline = True
smooth_window: int = 5
smooth_method: str = 'savgol'
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
savgol_pol_order: int = 2
consecutive_scan_min: int = 0
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
peak_derivative_threshold: float = 0.0005
min_peak_datapoints: float = 5
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
peak_height_min_percent: float = 0.1
eic_signal_threshold: float = 0.01
eic_buffer_time = 1.5
peak_picking_method: str = 'persistent homology'
implemented_peak_picking_methods: tuple = ('persistent homology', 'centroided_persistent_homology')
ph_smooth_it = 1
ph_smooth_radius_mz = 0
ph_smooth_radius_scan = 1
ph_inten_min_rel = 0.001
ph_persis_min_rel = 0.001
mass_feature_cluster_mz_tolerance_rel: float = 5e-06
mass_feature_cluster_rt_tolerance: float = 0.3
ms1_scans_to_average: int = 1
ms1_deconvolution_corr_min: float = 0.8
ms2_dda_rt_tolerance: float = 0.15
ms2_dda_mz_tolerance: float = 0.05
ms2_min_fe_score: float = 0.2
search_as_lipids: bool = False
include_fragment_types: bool = False
export_profile_spectra: bool = False
export_eics: bool = True
export_unprocessed_ms1: bool = False
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpectrumSetting:
285@dataclasses.dataclass
286class MassSpectrumSetting:
287    """Mass spectrum processing settings class
288
289    Attributes
290    ----------
291    noise_threshold_method : str, optional
292        Method for detecting noise threshold. Default is 'log'.
293    noise_threshold_methods_implemented : tuple, optional
294        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
295    noise_threshold_min_std : int, optional
296        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
297    noise_threshold_min_s2n : float, optional
298        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
299    noise_threshold_min_relative_abundance : float, optional
300        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
301    noise_threshold_absolute_abundance : float, optional
302        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
303    noise_threshold_log_nsigma : int, optional
304        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
305    noise_threshold_log_nsigma_corr_factor : float, optional
306        Correction factor for log noise threshold method. Default is 0.463.
307    noise_threshold_log_nsigma_bins : int, optional
308        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
309    noise_min_mz : float, optional
310        Minimum m/z to use for noise thresholding. Default is 50.0.
311    noise_max_mz : float, optional
312        Maximum m/z to use for noise thresholding. Default is 1200.0.
313    min_picking_mz : float, optional
314        Minimum m/z to use for peak picking. Default is 50.0.
315    max_picking_mz : float, optional
316        Maximum m/z to use for peak picking. Default is 1200.0.
317    picking_point_extrapolate : int, optional
318        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
319        Recommend 3 for reduced profile data or if peak picking faults
320    calib_minimize_method : str, optional
321        Minimization method to use for calibration. Default is 'Powell'.
322    calib_pol_order : int, optional
323        Polynomial order to use for calibration. Default is 2.
324    max_calib_ppm_error : float, optional
325        Maximum ppm error to use for calibration. Default is 1.0.
326    min_calib_ppm_error : float, optional
327        Minimum ppm error to use for calibration. Default is -1.0.
328    calib_sn_threshold : float, optional
329        Signal to noise threshold to use for calibration. Default is 2.0.
330    calibration_ref_match_method: string, optional
331        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
332    calibration_ref_match_tolerance: float, optional
333        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
334    do_calibration : bool, optional
335        If True, perform calibration. Default is True.
336    verbose_processing : bool, optional
337        If True, print verbose processing information. Default is True.
338    """
339
340    noise_threshold_method: str = "log"
341
342    noise_threshold_methods_implemented: tuple = (
343        "minima",
344        "signal_noise",
345        "relative_abundance",
346        "absolute_abundance",
347        "log",
348    )
349
350    noise_threshold_min_std: int = 6  # when using 'minima' method
351
352    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
353
354    noise_threshold_min_relative_abundance: float = (
355        6  # from 0-100, when using 'relative_abundance' method
356    )
357
358    noise_threshold_absolute_abundance: float = (
359        1_000_000  # when using 'absolute_abundance' method
360    )
361
362    noise_threshold_log_nsigma: int = 6  # when using 'log' method
363    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
364    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
365
366    noise_min_mz: float = 50.0
367    noise_max_mz: float = 1200.0
368
369    min_picking_mz: float = 50.0
370    max_picking_mz: float = 1200.0
371
372    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
373    # This will fix peak picking at spectrum limit issues
374    #  0 to keep normal behaviour, typical value 3 to fix
375    picking_point_extrapolate: int = 3
376
377    calib_minimize_method: str = "Powell"
378    calib_pol_order: int = 2
379    max_calib_ppm_error: float = 1.0
380    min_calib_ppm_error: float = -1.0
381    calib_sn_threshold: float = 2.0
382    calibration_ref_match_method: str = "legacy"
383    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
384    calibration_ref_match_tolerance: float = 0.003
385    calibration_ref_match_std_raw_error_limit: float = 1.5
386    # calib_ref_mzs: list = [0]
387
388    do_calibration: bool = True
389    verbose_processing: bool = True
390
391    def __post_init__(self):
392        # enforce datatype
393        for field in dataclasses.fields(self):
394            value = getattr(self, field.name)
395            if not isinstance(value, field.type):
396                value = field.type(value)
397                setattr(self, field.name, value)

Mass spectrum processing settings class

Attributes
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'log'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
  • noise_threshold_min_std (int, optional): Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
  • noise_threshold_min_s2n (float, optional): Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
  • noise_threshold_min_relative_abundance (float, optional): Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
  • noise_threshold_absolute_abundance (float, optional): Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
  • noise_threshold_log_nsigma (int, optional): Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
  • noise_threshold_log_nsigma_corr_factor (float, optional): Correction factor for log noise threshold method. Default is 0.463.
  • noise_threshold_log_nsigma_bins (int, optional): Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
  • noise_min_mz (float, optional): Minimum m/z to use for noise thresholding. Default is 50.0.
  • noise_max_mz (float, optional): Maximum m/z to use for noise thresholding. Default is 1200.0.
  • min_picking_mz (float, optional): Minimum m/z to use for peak picking. Default is 50.0.
  • max_picking_mz (float, optional): Maximum m/z to use for peak picking. Default is 1200.0.
  • picking_point_extrapolate (int, optional): How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. Recommend 3 for reduced profile data or if peak picking faults
  • calib_minimize_method (str, optional): Minimization method to use for calibration. Default is 'Powell'.
  • calib_pol_order (int, optional): Polynomial order to use for calibration. Default is 2.
  • max_calib_ppm_error (float, optional): Maximum ppm error to use for calibration. Default is 1.0.
  • min_calib_ppm_error (float, optional): Minimum ppm error to use for calibration. Default is -1.0.
  • calib_sn_threshold (float, optional): Signal to noise threshold to use for calibration. Default is 2.0.
  • calibration_ref_match_method (string, optional): Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
  • calibration_ref_match_tolerance (float, optional): If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
  • do_calibration (bool, optional): If True, perform calibration. Default is True.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MassSpectrumSetting( noise_threshold_method: str = 'log', noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'), noise_threshold_min_std: int = 6, noise_threshold_min_s2n: float = 4, noise_threshold_min_relative_abundance: float = 6, noise_threshold_absolute_abundance: float = 1000000, noise_threshold_log_nsigma: int = 6, noise_threshold_log_nsigma_corr_factor: float = 0.463, noise_threshold_log_nsigma_bins: int = 500, noise_min_mz: float = 50.0, noise_max_mz: float = 1200.0, min_picking_mz: float = 50.0, max_picking_mz: float = 1200.0, picking_point_extrapolate: int = 3, calib_minimize_method: str = 'Powell', calib_pol_order: int = 2, max_calib_ppm_error: float = 1.0, min_calib_ppm_error: float = -1.0, calib_sn_threshold: float = 2.0, calibration_ref_match_method: str = 'legacy', calibration_ref_match_method_implemented: tuple = ('legacy', 'merged'), calibration_ref_match_tolerance: float = 0.003, calibration_ref_match_std_raw_error_limit: float = 1.5, do_calibration: bool = True, verbose_processing: bool = True)
noise_threshold_method: str = 'log'
noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')
noise_threshold_min_std: int = 6
noise_threshold_min_s2n: float = 4
noise_threshold_min_relative_abundance: float = 6
noise_threshold_absolute_abundance: float = 1000000
noise_threshold_log_nsigma: int = 6
noise_threshold_log_nsigma_corr_factor: float = 0.463
noise_threshold_log_nsigma_bins: int = 500
noise_min_mz: float = 50.0
noise_max_mz: float = 1200.0
min_picking_mz: float = 50.0
max_picking_mz: float = 1200.0
picking_point_extrapolate: int = 3
calib_minimize_method: str = 'Powell'
calib_pol_order: int = 2
max_calib_ppm_error: float = 1.0
min_calib_ppm_error: float = -1.0
calib_sn_threshold: float = 2.0
calibration_ref_match_method: str = 'legacy'
calibration_ref_match_method_implemented: tuple = ('legacy', 'merged')
calibration_ref_match_tolerance: float = 0.003
calibration_ref_match_std_raw_error_limit: float = 1.5
do_calibration: bool = True
verbose_processing: bool = True
@dataclasses.dataclass
class MassSpecPeakSetting:
400@dataclasses.dataclass
401class MassSpecPeakSetting:
402    """Mass spectrum peak processing settings class
403
404    Attributes
405    ----------
406    kendrick_base : Dict, optional
407        Dictionary specifying the elements and their counts in the Kendrick base.
408        Defaults to {'C': 1, 'H': 2}.
409    kendrick_rounding_method : str, optional
410        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
411        Defaults to 'floor'.
412    implemented_kendrick_rounding_methods : tuple
413        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
414        Defaults to ('floor', 'ceil', 'round').
415    peak_derivative_threshold : float, optional
416        Threshold for defining derivative crossing. Should be a value between 0 and 1.
417        Defaults to 0.0.
418    peak_min_prominence_percent : float, optional
419        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
420        Defaults to 0.1.
421    min_peak_datapoints : float, optional
422        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
423        Defaults to 5.
424    peak_max_prominence_percent : float, optional
425        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
426        Defaults to 0.1.
427    peak_height_max_percent : float, optional
428        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
429        Defaults to 10.
430    legacy_resolving_power : bool, optional
431        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
432        Defaults to True.
433    legacy_centroid_polyfit : bool, optional
434        Use legacy (numpy polyfit) to fit centroid
435        Default false.
436    """
437
438    kendrick_base: Dict = dataclasses.field(default_factory=dict)
439
440    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
441
442    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
443
444    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
445
446    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
447
448    min_peak_datapoints: float = 5  # 0-inf used for peak detection
449
450    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
451
452    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
453
454    legacy_resolving_power: bool = (
455        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
456    )
457
458    legacy_centroid_polyfit: bool = False
459
460    def __post_init__(self):
461        # default to CH2
462        if not self.kendrick_base:
463            self.kendrick_base = {"C": 1, "H": 2}
464        # enforce datatype
465        for field in dataclasses.fields(self):
466            value = getattr(self, field.name)
467            if not isinstance(value, field.type):
468                value = field.type(value)
469                setattr(self, field.name, value)

Mass spectrum peak processing settings class

Attributes
  • kendrick_base (Dict, optional): Dictionary specifying the elements and their counts in the Kendrick base. Defaults to {'C': 1, 'H': 2}.
  • kendrick_rounding_method (str, optional): Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. Defaults to 'floor'.
  • implemented_kendrick_rounding_methods (tuple): Tuple of valid rounding methods for calculating the nominal Kendrick mass. Defaults to ('floor', 'ceil', 'round').
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0.
  • peak_min_prominence_percent (float, optional): Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. Defaults to 0.1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 0.1.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • legacy_resolving_power (bool, optional): Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. Defaults to True.
  • legacy_centroid_polyfit (bool, optional): Use legacy (numpy polyfit) to fit centroid Default false.
MassSpecPeakSetting( kendrick_base: Dict = <factory>, kendrick_rounding_method: str = 'floor', implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round'), peak_derivative_threshold: float = 0.0, peak_min_prominence_percent: float = 0.1, min_peak_datapoints: float = 5, peak_max_prominence_percent: float = 0.1, peak_height_max_percent: float = 10, legacy_resolving_power: bool = True, legacy_centroid_polyfit: bool = False)
kendrick_base: Dict
kendrick_rounding_method: str = 'floor'
implemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round')
peak_derivative_threshold: float = 0.0
peak_min_prominence_percent: float = 0.1
min_peak_datapoints: float = 5
peak_max_prominence_percent: float = 0.1
peak_height_max_percent: float = 10
legacy_resolving_power: bool = True
legacy_centroid_polyfit: bool = False
@dataclasses.dataclass
class GasChromatographSetting:
472@dataclasses.dataclass
473class GasChromatographSetting:
474    """Gas chromatograph processing settings class
475
476    Attributes
477    ----------
478    use_deconvolution : bool, optional
479        If True, use deconvolution. Default is False.
480    implemented_smooth_method : tuple, optional
481        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
482    smooth_window : int, optional
483        Window size for smoothing the ion chromatogram. Default is 5.
484    smooth_method : str, optional
485        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
486    savgol_pol_order : int, optional
487        Polynomial order for Savitzky-Golay smoothing. Default is 2.
488    peak_derivative_threshold : float, optional
489        Threshold for defining derivative crossing. Should be a value between 0 and 1.
490        Defaults to 0.0005.
491    peak_height_max_percent : float, optional
492        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
493        Defaults to 10.
494    peak_max_prominence_percent : float, optional
495        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
496        Defaults to 1.
497    min_peak_datapoints : float, optional
498        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
499        Defaults to 5.
500    max_peak_width : float, optional
501        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
502        Defaults to 0.1.
503    noise_threshold_method : str, optional
504        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
505    noise_threshold_methods_implemented : tuple, optional
506        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
507    std_noise_threshold : int, optional
508        Default is 3.
509    peak_height_min_percent : float, optional
510        0-100 % used for peak detection. Default is 0.1.
511    peak_min_prominence_percent : float, optional
512        0-100 % used for peak detection. Default is 0.1.
513    eic_signal_threshold : float, optional
514        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
515    max_rt_distance : float, optional
516        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
517    verbose_processing : bool, optional
518        If True, print verbose processing information. Default is True.
519    """
520
521    use_deconvolution: bool = False
522
523    implemented_smooth_method: tuple = (
524        "savgol",
525        "hanning",
526        "blackman",
527        "bartlett",
528        "flat",
529        "boxcar",
530    )
531
532    smooth_window: int = 5
533
534    smooth_method: str = "savgol"
535
536    savgol_pol_order: int = 2
537
538    peak_derivative_threshold: float = 0.0005
539
540    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
541
542    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
543
544    min_peak_datapoints: float = 5
545
546    max_peak_width: float = 0.1
547
548    noise_threshold_method: str = "manual_relative_abundance"
549
550    noise_threshold_methods_implemented: tuple = (
551        "auto_relative_abundance",
552        "manual_relative_abundance",
553        "second_derivative",
554    )
555
556    std_noise_threshold: int = 3
557
558    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
559
560    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
561
562    eic_signal_threshold: float = (
563        0.01  # 0-100 % used for extracted ion chromatogram peak detection
564    )
565
566    max_rt_distance: float = (
567        0.025  # minutes, max distance allowance hierarchical clutter
568    )
569
570    verbose_processing: bool = True
571
572    def __post_init__(self):
573        # enforce datatype
574        for field in dataclasses.fields(self):
575            value = getattr(self, field.name)
576            if not isinstance(value, field.type):
577                value = field.type(value)
578                setattr(self, field.name, value)

Gas chromatograph processing settings class

Attributes
  • use_deconvolution (bool, optional): If True, use deconvolution. Default is False.
  • implemented_smooth_method (tuple, optional): Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
  • smooth_window (int, optional): Window size for smoothing the ion chromatogram. Default is 5.
  • smooth_method (str, optional): Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
  • savgol_pol_order (int, optional): Polynomial order for Savitzky-Golay smoothing. Default is 2.
  • peak_derivative_threshold (float, optional): Threshold for defining derivative crossing. Should be a value between 0 and 1. Defaults to 0.0005.
  • peak_height_max_percent (float, optional): Maximum height percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 10.
  • peak_max_prominence_percent (float, optional): Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. Defaults to 1.
  • min_peak_datapoints (float, optional): Minimum number of data points used for peak detection. Should be a value between 0 and infinity. Defaults to 5.
  • max_peak_width (float, optional): Maximum peak width used for peak detection. Should be a value between 0 and infinity. Defaults to 0.1.
  • noise_threshold_method (str, optional): Method for detecting noise threshold. Default is 'manual_relative_abundance'.
  • noise_threshold_methods_implemented (tuple, optional): Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
  • std_noise_threshold (int, optional): Default is 3.
  • peak_height_min_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • peak_min_prominence_percent (float, optional): 0-100 % used for peak detection. Default is 0.1.
  • eic_signal_threshold (float, optional): 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
  • max_rt_distance (float, optional): Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
GasChromatographSetting( use_deconvolution: bool = False, implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), smooth_window: int = 5, smooth_method: str = 'savgol', savgol_pol_order: int = 2, peak_derivative_threshold: float = 0.0005, peak_height_max_percent: float = 10, peak_max_prominence_percent: float = 1, min_peak_datapoints: float = 5, max_peak_width: float = 0.1, noise_threshold_method: str = 'manual_relative_abundance', noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), std_noise_threshold: int = 3, peak_height_min_percent: float = 0.1, peak_min_prominence_percent: float = 0.1, eic_signal_threshold: float = 0.01, max_rt_distance: float = 0.025, verbose_processing: bool = True)
use_deconvolution: bool = False
implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
smooth_window: int = 5
smooth_method: str = 'savgol'
savgol_pol_order: int = 2
peak_derivative_threshold: float = 0.0005
peak_height_max_percent: float = 10
peak_max_prominence_percent: float = 1
min_peak_datapoints: float = 5
max_peak_width: float = 0.1
noise_threshold_method: str = 'manual_relative_abundance'
noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
std_noise_threshold: int = 3
peak_height_min_percent: float = 0.1
peak_min_prominence_percent: float = 0.1
eic_signal_threshold: float = 0.01
max_rt_distance: float = 0.025
verbose_processing: bool = True
@dataclasses.dataclass
class CompoundSearchSettings:
581@dataclasses.dataclass
582class CompoundSearchSettings:
583    """Settings for compound search
584
585    Attributes
586    ----------
587    url_database : str, optional
588        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
589    ri_search_range : float, optional
590        Retention index search range. Default is 35.
591    rt_search_range : float, optional
592        Retention time search range, in minutes. Default is 1.0.
593    correlation_threshold : float, optional
594        Threshold for correlation for spectral similarity. Default is 0.5.
595    score_threshold : float, optional
596        Threshold for compsite score. Default is 0.0.
597    ri_spacing : float, optional
598        Retention index spacing. Default is 200.
599    ri_std : float, optional
600        Retention index standard deviation. Default is 3.
601    ri_calibration_compound_names : list, optional
602        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
603
604    """
605
606    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
607
608    ri_search_range: float = 35
609
610    rt_search_range: float = 1.0  # used for retention index calibration
611
612    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
613
614    score_threshold: float = 0.0
615
616    ri_spacing: float = 200
617
618    ri_std: float = 3  # in standard deviation
619
620    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
621
622    # calculates and export all spectral similarity methods
623    exploratory_mode: bool = False
624
625    score_methods: tuple = ("highest_sim_score", "highest_ss")
626
627    output_score_method: str = "All"
628
629    def __post_init__(self):
630        # enforce datatype
631        self.url_database = os.getenv(
632            "SPECTRAL_GCMS_DATABASE_URL",
633            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
634        )
635
636        for field in dataclasses.fields(self):
637            value = getattr(self, field.name)
638            if not isinstance(value, field.type):
639                value = field.type(value)
640                setattr(self, field.name, value)
641
642        self.ri_calibration_compound_names = [
643            "Methyl Caprylate",
644            "Methyl Caprate",
645            "Methyl Pelargonate",
646            "Methyl Laurate",
647            "Methyl Myristate",
648            "Methyl Palmitate",
649            "Methyl Stearate",
650            "Methyl Eicosanoate",
651            "Methyl Docosanoate",
652            "Methyl Linocerate",
653            "Methyl Hexacosanoate",
654            "Methyl Octacosanoate",
655            "Methyl Triacontanoate",
656        ]

Settings for compound search

Attributes
  • url_database (str, optional): URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
  • ri_search_range (float, optional): Retention index search range. Default is 35.
  • rt_search_range (float, optional): Retention time search range, in minutes. Default is 1.0.
  • correlation_threshold (float, optional): Threshold for correlation for spectral similarity. Default is 0.5.
  • score_threshold (float, optional): Threshold for compsite score. Default is 0.0.
  • ri_spacing (float, optional): Retention index spacing. Default is 200.
  • ri_std (float, optional): Retention index standard deviation. Default is 3.
  • ri_calibration_compound_names (list, optional): List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
CompoundSearchSettings( url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres', ri_search_range: float = 35, rt_search_range: float = 1.0, correlation_threshold: float = 0.5, score_threshold: float = 0.0, ri_spacing: float = 200, ri_std: float = 3, ri_calibration_compound_names: List = <factory>, exploratory_mode: bool = False, score_methods: tuple = ('highest_sim_score', 'highest_ss'), output_score_method: str = 'All')
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres'
ri_search_range: float = 35
rt_search_range: float = 1.0
correlation_threshold: float = 0.5
score_threshold: float = 0.0
ri_spacing: float = 200
ri_std: float = 3
ri_calibration_compound_names: List
exploratory_mode: bool = False
score_methods: tuple = ('highest_sim_score', 'highest_ss')
output_score_method: str = 'All'
class MolecularLookupDictSettings:
659class MolecularLookupDictSettings:
660    """Settings for molecular searching
661
662    These are used to generate the database entries, do not change.
663
664    Attributes
665    ----------
666    usedAtoms : dict, optional
667        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
668    min_mz : float, optional
669        Minimum m/z to use for searching. Default is 50.0.
670    max_mz : float, optional
671        Maximum m/z to use for searching. Default is 1200.0.
672    min_dbe : float, optional
673        Minimum double bond equivalent to use for searching. Default is 0.
674    max_dbe : float, optional
675        Maximum double bond equivalent to use for searching. Default is 50.
676    use_pah_line_rule : bool, optional
677        If True, use the PAH line rule. Default is False.
678    isRadical : bool, optional
679        If True, search for radical ions. Default is True.
680    isProtonated : bool, optional
681        If True, search for protonated ions. Default is True.
682    url_database : str, optional
683        URL for the database. Default is None.
684    db_jobs : int, optional
685        Number of jobs to use for database queries. Default is 1.
686    used_atom_valences : dict, optional
687        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
688
689    """
690
691    ### DO NOT CHANGE IT! These are used to generate the database entries
692
693    ### DO change when creating a new application database
694
695    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
696
697    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
698    ### if you don't want to include one of those atoms set the max and min at 0
699    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
700    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
701    ### NOTE : Adducts atoms have zero covalence
702    ### NOTE : Not using static variable because this class is distributed using multiprocessing
703    def __init__(self):
704        self.usedAtoms = {
705            "C": (1, 90),
706            "H": (4, 200),
707            "O": (0, 12),
708            "N": (0, 0),
709            "S": (0, 0),
710            "P": (0, 0),
711            "Cl": (0, 0),
712        }
713
714        self.min_mz = 50
715
716        self.max_mz = 1200
717
718        self.min_dbe = 0
719
720        self.max_dbe = 50
721
722        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
723        self.use_pah_line_rule = False
724
725        self.isRadical = True
726
727        self.isProtonated = True
728
729        self.url_database = None
730
731        self.db_jobs = 1
732
733        self.used_atom_valences = {
734            "C": 4,
735            "13C": 4,
736            "H": 1,
737            "O": 2,
738            "18O": 2,
739            "N": 3,
740            "S": 2,
741            "34S": 2,
742            "P": 3,
743            "Cl": 1,
744            "37Cl": 1,
745            "Br": 1,
746            "Na": 1,
747            "F": 1,
748            "K": 0,
749        }

Settings for molecular searching

These are used to generate the database entries, do not change.

Attributes
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • min_mz (float, optional): Minimum m/z to use for searching. Default is 50.0.
  • max_mz (float, optional): Maximum m/z to use for searching. Default is 1200.0.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 50.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • isRadical (bool, optional): If True, search for radical ions. Default is True.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • url_database (str, optional): URL for the database. Default is None.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 1.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
usedAtoms
min_mz
max_mz
min_dbe
max_dbe
use_pah_line_rule
isRadical
isProtonated
url_database
db_jobs
used_atom_valences
@dataclasses.dataclass
class MolecularFormulaSearchSettings:
752@dataclasses.dataclass
753class MolecularFormulaSearchSettings:
754    """Settings for molecular searching
755
756    Attributes
757    ----------
758    use_isotopologue_filter : bool, optional
759        If True, use isotopologue filter. Default is False.
760    isotopologue_filter_threshold : float, optional
761        Threshold for isotopologue filter. Default is 33.
762    isotopologue_filter_atoms : tuple, optional
763        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
764    use_runtime_kendrick_filter : bool, optional
765        If True, use runtime Kendrick filter. Default is False.
766    use_min_peaks_filter : bool, optional
767        If True, use minimum peaks filter. Default is True.
768    min_peaks_per_class : int, optional
769        Minimum number of peaks per class. Default is 15.
770    url_database : str, optional
771        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
772    db_jobs : int, optional
773        Number of jobs to use for database queries. Default is 3.
774    db_chunk_size : int, optional
775        Chunk size to use for database queries. Default is 300.
776    ion_charge : int, optional
777        Ion charge. Default is -1.
778    min_hc_filter : float, optional
779        Minimum hydrogen to carbon ratio. Default is 0.3.
780    max_hc_filter : float, optional
781        Maximum hydrogen to carbon ratio. Default is 3.
782    min_oc_filter : float, optional
783        Minimum oxygen to carbon ratio. Default is 0.0.
784    max_oc_filter : float, optional
785        Maximum oxygen to carbon ratio. Default is 1.2.
786    min_op_filter : float, optional
787        Minimum oxygen to phosphorous ratio. Default is 2.
788    use_pah_line_rule : bool, optional
789        If True, use the PAH line rule. Default is False.
790    min_dbe : float, optional
791        Minimum double bond equivalent to use for searching. Default is 0.
792    max_dbe : float, optional
793        Maximum double bond equivalent to use for searching. Default is 40.
794    mz_error_score_weight : float, optional
795        Weight for m/z error score to contribute to composite score. Default is 0.6.
796    isotopologue_score_weight : float, optional
797        Weight for isotopologue score to contribute to composite score. Default is 0.4.
798    adduct_atoms_neg : tuple, optional
799        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
800    adduct_atoms_pos : tuple, optional
801        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
802    score_methods : tuple, optional
803        Tuple of score method that can be implemented.
804        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
805    score_method : str, optional
806        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
807    output_min_score : float, optional
808        Minimum score for output. Default is 0.1.
809    output_score_method : str, optional
810        Score method to use for output. Default is 'All Candidates'.
811    isRadical : bool, optional
812        If True, search for radical ions. Default is False.
813    isProtonated : bool, optional
814        If True, search for protonated ions. Default is True.
815    isAdduct : bool, optional
816        If True, search for adduct ions. Default is False.
817    usedAtoms : dict, optional
818        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
819    ion_types_excluded : list, optional
820        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
821    ionization_type : str, optional
822        Ionization type. Default is 'ESI'.
823    min_ppm_error : float, optional
824        Minimum ppm error. Default is -10.0.
825    max_ppm_error : float, optional
826        Maximum ppm error. Default is 10.0.
827    min_abun_error : float, optional
828        Minimum abundance error for isotolopologue search. Default is -100.0.
829    max_abun_error : float, optional
830        Maximum abundance error for isotolopologue search. Default is 100.0.
831    mz_error_range : float, optional
832        m/z error range. Default is 1.5.
833    error_method : str, optional
834        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
835    mz_error_average : float, optional
836        m/z error average. Default is 0.0.
837    used_atom_valences : dict, optional
838        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
839    verbose_processing: bool, optional
840        If True, print verbose processing information. Default is True.
841    """
842
843    verbose_processing: bool = True
844
845    use_isotopologue_filter: bool = False
846
847    isotopologue_filter_threshold: float = 33
848
849    isotopologue_filter_atoms: tuple = ("Cl", "Br")
850
851    use_runtime_kendrick_filter: bool = False
852
853    use_min_peaks_filter: bool = True
854
855    min_peaks_per_class: int = 15
856
857    url_database: str = (
858        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
859    )
860
861    db_jobs: int = 3
862
863    db_chunk_size: int = 300
864
865    # query setting========
866    ion_charge: int = -1
867
868    min_hc_filter: float = 0.3
869
870    max_hc_filter: float = 3
871
872    min_oc_filter: float = 0.0
873
874    max_oc_filter: float = 1.2
875
876    min_op_filter: float = 2
877
878    use_pah_line_rule: bool = False
879
880    min_dbe: float = 0
881
882    max_dbe: float = 40
883
884    mz_error_score_weight: float = 0.6
885
886    isotopologue_score_weight: float = 0.4
887
888    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
889    adduct_atoms_neg: tuple = ("Cl", "Br")
890
891    adduct_atoms_pos: tuple = ("Na", "K")
892
893    score_methods: tuple = (
894        "S_P_lowest_error",
895        "N_S_P_lowest_error",
896        "lowest_error",
897        "prob_score",
898        "air_filter_error",
899        "water_filter_error",
900        "earth_filter_error",
901    )
902
903    score_method: str = "prob_score"
904
905    output_min_score: float = 0.1
906
907    output_score_method: str = "All Candidates"
908
909    # depending on the polarity mode it looks for [M].+ , [M].-
910    # query and automatically compile add entry if it doesn't exist
911
912    isRadical: bool = False
913
914    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
915    # query and automatically compile and push options if it doesn't exist
916    isProtonated: bool = True
917
918    isAdduct: bool = False
919
920    usedAtoms: dict = dataclasses.field(default_factory=dict)
921    ion_types_excluded: list = dataclasses.field(default_factory=list)
922
923    # search setting ========
924
925    ionization_type: str = "ESI"
926
927    # empirically set / needs optimization
928    min_ppm_error: float = -10.0  # ppm
929
930    # empirically set / needs optimization
931    max_ppm_error: float = 10.0  # ppm
932
933    # empirically set / needs optimization set for isotopologue search
934    min_abun_error: float = -100.0  # percentage
935
936    # empirically set / needs optimization set for isotopologue search
937    max_abun_error: float = 100.0  # percentage
938
939    # empirically set / needs optimization
940    mz_error_range: float = 1.5
941
942    # 'distance', 'lowest', 'symmetrical','average' 'None'
943    error_method: str = "None"
944
945    mz_error_average: float = 0.0
946
947    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
948    used_atom_valences: dict = dataclasses.field(default_factory=dict)
949
950    def __post_init__(self):
951        if not self.url_database or self.url_database == "":
952            self.url_database = os.getenv(
953                "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
954            )
955        # enforce datatype
956        for field in dataclasses.fields(self):
957            value = getattr(self, field.name)
958            if not isinstance(value, field.type):
959                value = field.type(value)
960                setattr(self, field.name, value)
961
962        # enforce C and H if either do not exists
963        if "C" not in self.usedAtoms.keys():
964            self.usedAtoms["C"] = (1, 100)
965        if "H" not in self.usedAtoms.keys():
966            self.usedAtoms["H"] = (1, 200)
967
968        # add cummon values
969        current_used_atoms = self.used_atom_valences.keys()
970
971        for atom in Atoms.atoms_covalence.keys():
972            if atom not in current_used_atoms:
973                covalence = Atoms.atoms_covalence.get(atom)
974
975                if isinstance(covalence, int):
976                    self.used_atom_valences[atom] = covalence
977
978                else:
979                    # will get the first number of all possible covalances, which should be the most commum
980                    self.used_atom_valences[atom] = covalence[0]

Settings for molecular searching

Attributes
  • use_isotopologue_filter (bool, optional): If True, use isotopologue filter. Default is False.
  • isotopologue_filter_threshold (float, optional): Threshold for isotopologue filter. Default is 33.
  • isotopologue_filter_atoms (tuple, optional): Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
  • use_runtime_kendrick_filter (bool, optional): If True, use runtime Kendrick filter. Default is False.
  • use_min_peaks_filter (bool, optional): If True, use minimum peaks filter. Default is True.
  • min_peaks_per_class (int, optional): Minimum number of peaks per class. Default is 15.
  • url_database (str, optional): URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
  • db_jobs (int, optional): Number of jobs to use for database queries. Default is 3.
  • db_chunk_size (int, optional): Chunk size to use for database queries. Default is 300.
  • ion_charge (int, optional): Ion charge. Default is -1.
  • min_hc_filter (float, optional): Minimum hydrogen to carbon ratio. Default is 0.3.
  • max_hc_filter (float, optional): Maximum hydrogen to carbon ratio. Default is 3.
  • min_oc_filter (float, optional): Minimum oxygen to carbon ratio. Default is 0.0.
  • max_oc_filter (float, optional): Maximum oxygen to carbon ratio. Default is 1.2.
  • min_op_filter (float, optional): Minimum oxygen to phosphorous ratio. Default is 2.
  • use_pah_line_rule (bool, optional): If True, use the PAH line rule. Default is False.
  • min_dbe (float, optional): Minimum double bond equivalent to use for searching. Default is 0.
  • max_dbe (float, optional): Maximum double bond equivalent to use for searching. Default is 40.
  • mz_error_score_weight (float, optional): Weight for m/z error score to contribute to composite score. Default is 0.6.
  • isotopologue_score_weight (float, optional): Weight for isotopologue score to contribute to composite score. Default is 0.4.
  • adduct_atoms_neg (tuple, optional): Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
  • adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
  • score_methods (tuple, optional): Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
  • score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
  • output_min_score (float, optional): Minimum score for output. Default is 0.1.
  • output_score_method (str, optional): Score method to use for output. Default is 'All Candidates'.
  • isRadical (bool, optional): If True, search for radical ions. Default is False.
  • isProtonated (bool, optional): If True, search for protonated ions. Default is True.
  • isAdduct (bool, optional): If True, search for adduct ions. Default is False.
  • usedAtoms (dict, optional): Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
  • ion_types_excluded (list, optional): List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
  • ionization_type (str, optional): Ionization type. Default is 'ESI'.
  • min_ppm_error (float, optional): Minimum ppm error. Default is -10.0.
  • max_ppm_error (float, optional): Maximum ppm error. Default is 10.0.
  • min_abun_error (float, optional): Minimum abundance error for isotolopologue search. Default is -100.0.
  • max_abun_error (float, optional): Maximum abundance error for isotolopologue search. Default is 100.0.
  • mz_error_range (float, optional): m/z error range. Default is 1.5.
  • error_method (str, optional): Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
  • mz_error_average (float, optional): m/z error average. Default is 0.0.
  • used_atom_valences (dict, optional): Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
  • verbose_processing (bool, optional): If True, print verbose processing information. Default is True.
MolecularFormulaSearchSettings( verbose_processing: bool = True, use_isotopologue_filter: bool = False, isotopologue_filter_threshold: float = 33, isotopologue_filter_atoms: tuple = ('Cl', 'Br'), use_runtime_kendrick_filter: bool = False, use_min_peaks_filter: bool = True, min_peaks_per_class: int = 15, url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp', db_jobs: int = 3, db_chunk_size: int = 300, ion_charge: int = -1, min_hc_filter: float = 0.3, max_hc_filter: float = 3, min_oc_filter: float = 0.0, max_oc_filter: float = 1.2, min_op_filter: float = 2, use_pah_line_rule: bool = False, min_dbe: float = 0, max_dbe: float = 40, mz_error_score_weight: float = 0.6, isotopologue_score_weight: float = 0.4, adduct_atoms_neg: tuple = ('Cl', 'Br'), adduct_atoms_pos: tuple = ('Na', 'K'), score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'), score_method: str = 'prob_score', output_min_score: float = 0.1, output_score_method: str = 'All Candidates', isRadical: bool = False, isProtonated: bool = True, isAdduct: bool = False, usedAtoms: dict = <factory>, ion_types_excluded: list = <factory>, ionization_type: str = 'ESI', min_ppm_error: float = -10.0, max_ppm_error: float = 10.0, min_abun_error: float = -100.0, max_abun_error: float = 100.0, mz_error_range: float = 1.5, error_method: str = 'None', mz_error_average: float = 0.0, used_atom_valences: dict = <factory>)
verbose_processing: bool = True
use_isotopologue_filter: bool = False
isotopologue_filter_threshold: float = 33
isotopologue_filter_atoms: tuple = ('Cl', 'Br')
use_runtime_kendrick_filter: bool = False
use_min_peaks_filter: bool = True
min_peaks_per_class: int = 15
url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'
db_jobs: int = 3
db_chunk_size: int = 300
ion_charge: int = -1
min_hc_filter: float = 0.3
max_hc_filter: float = 3
min_oc_filter: float = 0.0
max_oc_filter: float = 1.2
min_op_filter: float = 2
use_pah_line_rule: bool = False
min_dbe: float = 0
max_dbe: float = 40
mz_error_score_weight: float = 0.6
isotopologue_score_weight: float = 0.4
adduct_atoms_neg: tuple = ('Cl', 'Br')
adduct_atoms_pos: tuple = ('Na', 'K')
score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error')
score_method: str = 'prob_score'
output_min_score: float = 0.1
output_score_method: str = 'All Candidates'
isRadical: bool = False
isProtonated: bool = True
isAdduct: bool = False
usedAtoms: dict
ion_types_excluded: list
ionization_type: str = 'ESI'
min_ppm_error: float = -10.0
max_ppm_error: float = 10.0
min_abun_error: float = -100.0
max_abun_error: float = 100.0
mz_error_range: float = 1.5
error_method: str = 'None'
mz_error_average: float = 0.0
used_atom_valences: dict