corems.molecular_id.factory.classification

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jan 31, 2020"
  3
  4from collections.abc import Mapping
  5
  6from matplotlib import pyplot as plt
  7from numpy import linspace
  8
  9from corems.encapsulation.constant import Atoms, Labels
 10
 11flatten_list = lambda l: [item for sublist in l for item in sublist]
 12
 13
 14class HeteroatomsClassification(Mapping):
 15    """Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
 16
 17    Parameters
 18    ----------
 19    mass_spectrum : MassSpectrum
 20        The mass spectrum object.
 21    choose_molecular_formula : bool, optional
 22        If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
 23
 24    Raises
 25    ------
 26    Exception
 27        If no molecular formula is associated with any mspeak objects.
 28
 29    Attributes
 30    ----------
 31    _ms_grouped_class : dict
 32        A dictionary of classes and a list of ms_peak objects.
 33    choose_mf : bool
 34        If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered.
 35    total_peaks : int
 36        The total number of peaks.
 37    sum_abundance : float
 38        The sum of the abundance of all peaks.
 39    min_max_mz : tuple
 40        The minimum and maximum mz values.
 41    min_max_abundance : tuple
 42        The minimum and maximum abundance values.
 43    min_ppm_error : float
 44        The minimum ppm error.
 45    max_ppm_error : float
 46        The maximum ppm error.
 47    all_identified_atoms : list
 48        A list of all identified atoms.
 49
 50    Methods
 51    -------
 52    * __len__().
 53        Returns the number of classes.
 54    * __getitem__(classe)
 55        Returns the molecular formula list for specified class.
 56    * __iter__().
 57        Returns an iterator over the keys of the dictionary.
 58    * get_classes(threshold_perc=1, isotopologue=True).
 59        Returns a list of classes with abundance percentile above threshold.
 60    * molecular_formula_string(classe).
 61        Returns a list of molecular formula string for specified class.
 62    * molecular_formula(classe).
 63        Returns a list of molecular formula for specified class.
 64    * carbon_number(classe).
 65        Returns a list of carbon number for specified class.
 66    * atom_count(atom, classe).
 67        Returns a list of atom count for specified class.
 68    * dbe(classe).
 69        Returns a list of DBE for specified class.
 70    * atoms_ratio(classe, numerator, denominator).
 71        Returns a list of atoms ratio for specified class.
 72    * mz_exp(classe).
 73        Returns a list of experimental mz for specified class.
 74    * abundance(classe).
 75        Returns a list of abundance for specified class.
 76    * mz_error(classe).
 77        Returns a list of mz error for specified class.
 78    * mz_calc(classe).
 79        Returns a list of calculated mz for specified class.
 80    * peaks_count_percentile(classe).
 81        Returns the peaks count percentile of a specific class.
 82    * abundance_count_percentile(classe).
 83        Returns the abundance percentile of a specific class.
 84    * mz_exp_assigned().
 85        Returns a list of experimental mz for all assigned classes.
 86    * abundance_assigned().
 87        Returns a list of abundance for all classes.
 88    * mz_exp_all().
 89        Returns a list of mz for all classes.
 90
 91    """
 92
 93    # Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
 94
 95    #   class obj behaves as a dictionary of classes and return a list of ms_peak obj
 96
 97    def __init__(self, mass_spectrum, choose_molecular_formula=True):
 98        def sort_atoms_method(atom):
 99            """Sort atoms by order of appearance in the Atoms class"""
100            return [Atoms.atoms_order.index(atom)]
101
102        self._ms_grouped_class = dict()
103
104        self.choose_mf = choose_molecular_formula
105
106        # mapping for ms peaks without any molecular formula associated
107        self._ms_grouped_class[Labels.unassigned] = list()
108
109        self.total_peaks = 0
110
111        self.sum_abundance = 0
112
113        self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp)
114
115        self.min_max_abundance = (
116            mass_spectrum.min_abundance,
117            mass_spectrum.max_abundance,
118        )
119
120        self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error
121
122        self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error
123
124        check_assign = False
125
126        all_used_atoms = set()
127
128        for ms_peak in mass_spectrum:
129            self.total_peaks += 1
130
131            self.sum_abundance += ms_peak.abundance
132
133            if not ms_peak.is_assigned:
134                self._ms_grouped_class.get(Labels.unassigned).append(ms_peak)
135
136            else:
137                check_assign = True
138
139                if choose_molecular_formula:
140                    mf = ms_peak.best_molecular_formula_candidate
141
142                    classes = [mf.class_label]
143
144                    for atom in mf.atoms:
145                        all_used_atoms.add(atom)
146
147                else:
148                    classes = []
149
150                    for mf in ms_peak:
151                        classes.append(mf.class_label)
152
153                        for atom in mf.atoms:
154                            all_used_atoms.add(atom)
155
156                for classe in classes:
157                    if classe in self._ms_grouped_class.keys():
158                        self._ms_grouped_class.get(classe).append(ms_peak)
159
160                    else:
161                        self._ms_grouped_class[classe] = [ms_peak]
162
163        self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method)
164
165        if not check_assign:
166            raise Exception("No molecular formula associated with any mspeak objects")
167
168    def __len__(self):
169        """Return the number of classes"""
170        return len(self._ms_grouped_class)
171
172    def __getitem__(self, classe):
173        """Return the molecular formula list for specified class"""
174        return self._ms_grouped_class.get(classe)
175
176    def __iter__(self):
177        """Return an iterator over the keys of the dictionary."""
178        return iter(self._ms_grouped_class)
179
180    def get_classes(self, threshold_perc=1, isotopologue=True):
181        """Return a list of classes with abundance percentile above threshold"""
182        classes = list()
183        for classe in self.keys():
184            if classe != Labels.unassigned:
185                if self.abundance_count_percentile(classe) > threshold_perc:
186                    if classe != Labels.unassigned:
187                        # access first molecular formula inside the first ms peak and check isotopologue
188                        if not isotopologue and self.get(classe)[0][0].is_isotopologue:
189                            continue
190
191                    classes.append(classe)
192        # TODO sort classes chemically here too
193        return classes
194
195    def molecular_formula_string(
196        self,
197        classe,
198    ):
199        """Return a list of molecular formula string for specified class"""
200        if self.choose_mf:
201            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
202        else:
203            return [
204                mf
205                for mspeak in self[classe]
206                for mf in mspeak
207                if mf.class_label == classe
208            ]
209
210    def molecular_formula(
211        self,
212        classe,
213    ):
214        """Return a list of molecular formula for specified class"""
215        if self.choose_mf:
216            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
217        else:
218            return [
219                mf
220                for mspeak in self[classe]
221                for mf in mspeak
222                if mf.class_label == classe
223            ]
224
225    def carbon_number(self, classe):
226        """Return a list of carbon number for specified class"""
227        if self.choose_mf:
228            return [
229                mspeak.best_molecular_formula_candidate.get("C")
230                for mspeak in self[classe]
231            ]
232        else:
233            return [
234                mf.get("C")
235                for mspeak in self[classe]
236                for mf in mspeak
237                if mf.class_label == classe
238            ]
239
240    def atom_count(self, atom, classe):
241        """Return a list of atom count for specified class"""
242
243        if self.choose_mf:
244            return [
245                mspeak.best_molecular_formula_candidate.get(atom)
246                for mspeak in self[classe]
247            ]
248        else:
249            return [
250                mf.get(atom)
251                for mspeak in self[classe]
252                for mf in mspeak
253                if mf.class_label == classe
254            ]
255
256    def dbe(self, classe):
257        """Return a list of DBE for specified class"""
258        if self.choose_mf:
259            return [
260                mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]
261            ]
262        else:
263            return [
264                mf.dbe
265                for mspeak in self[classe]
266                for mf in mspeak
267                if mf.class_label == classe
268            ]
269
270    def atoms_ratio(self, classe, numerator, denominator):
271        """Return a list of atoms ratio for specified class"""
272        return [
273            mf.get(numerator) / mf.get(denominator)
274            for mf in self.molecular_formula(classe)
275        ]
276
277    def mz_exp(self, classe):
278        """Return a list of experimental mz for specified class"""
279        if self.choose_mf or classe == Labels.unassigned:
280            return [mspeak.mz_exp for mspeak in self[classe]]
281
282        else:
283            return [
284                mspeak.mz_exp
285                for mspeak in self[classe]
286                for mf in mspeak
287                if mf.class_label == classe
288            ]
289
290    def abundance(self, classe):
291        """Return a list of abundance for specified class"""
292        if self.choose_mf or classe == Labels.unassigned:
293            return [mspeak.abundance for mspeak in self[classe]]
294
295        else:
296            return [
297                mspeak.abundance
298                for mspeak in self[classe]
299                for mf in mspeak
300                if mf.class_label == classe
301            ]
302
303    def mz_error(self, classe):
304        """Return a list of mz error for specified class"""
305        if classe != Labels.unassigned:
306            if self.choose_mf:
307                return [
308                    mspeak.best_molecular_formula_candidate.mz_error
309                    for mspeak in self[classe]
310                ]
311
312            else:
313                return [
314                    mf.mz_error
315                    for mspeak in self[classe]
316                    for mf in mspeak
317                    if mf.class_label == classe
318                ]
319
320    def mz_calc(self, classe):
321        """Return a list of calculated mz for specified class"""
322        if self.choose_mf:
323            return [
324                mspeak.best_molecular_formula_candidate.mz_calc
325                for mspeak in self[classe]
326            ]
327
328        else:
329            return [
330                mf.mz_calc
331                for mspeak in self[classe]
332                for mf in mspeak
333                if mf.class_label == classe
334            ]
335
336    def peaks_count_percentile(self, classe):
337        """Return the peaks count percentile of a specific class"""
338        return (len(self[classe]) / self.total_peaks) * 100
339
340    def abundance_count_percentile(self, classe):
341        """Return the abundance percentile of a specific class"""
342        return (
343            sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance
344        ) * 100
345
346    def mz_exp_assigned(self):
347        """Return a list of experimental mz for all assigned classes"""
348        classes = self.keys()
349
350        return [
351            mspeak.mz_exp
352            for classe in classes
353            for mspeak in self[classe]
354            if classe != Labels.unassigned
355        ]
356
357    def abundance_assigned(self):
358        """Return a list of abundance for all classes"""
359        classes = self.keys()
360
361        return [
362            mspeak.abundance
363            for classe in classes
364            for mspeak in self[classe]
365            if classe != Labels.unassigned
366        ]
367
368    def mz_exp_all(self):
369        """Return a list of mz for all classes"""
370        classes = self.keys()
371
372        return flatten_list(
373            [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]
374        )
375
376    def mz_error_all(self):
377        """Return a list of mz error for all classes"""
378        classes = self.keys()
379
380        return flatten_list(
381            [self.mz_error(classe) for classe in classes if classe != Labels.unassigned]
382        )
383
384    def carbon_number_all(self):
385        """Return a list of carbon number for all classes"""
386        classes = self.keys()
387
388        return flatten_list(
389            [
390                self.carbon_number(classe)
391                for classe in classes
392                if classe != Labels.unassigned
393            ]
394        )
395
396    def dbe_all(self):
397        """Return a list of DBE for all classes"""
398        classes = self.keys()
399
400        return flatten_list(
401            [self.dbe(classe) for classe in classes if classe != Labels.unassigned]
402        )
403
404    def atoms_ratio_all(self, numerator, denominator):
405        """Return a list of atoms ratio for all classes"""
406        classes = self.keys()
407
408        return flatten_list(
409            [
410                self.atoms_ratio(classe, numerator, denominator)
411                for classe in classes
412                if classe != Labels.unassigned
413            ]
414        )
415
416    def to_dataframe(
417        self,
418        include_isotopologue=False,
419        abundance_perc_threshold=5,
420        include_unassigned=False,
421    ):
422        """Return a pandas dataframe with all the data from the class
423
424        Parameters
425        ----------
426        include_isotopologue : bool, optional
427            Include isotopologues, by default False
428        abundance_perc_threshold : int, optional
429            Abundance percentile threshold, by default 5
430        include_unassigned : bool, optional
431            Include unassigned peaks, by default False
432
433        Returns
434        -------
435        DataFrame
436            Pandas dataframe with all the data from the class
437        """
438        from pandas import DataFrame
439
440        columns_labels = [
441            "mz",
442            "calibrated_mz",
443            "calculated_m_z",
444            "abundance",
445            "resolving_power",
446            "sn",
447            "ion_charge",
448            "mass_error",
449            "DBE",
450            "class",
451            "HC",
452            "OC",
453            "ion_type",
454            "is_isotopologue",
455            "class_abundance",
456            "class_count",
457        ]
458
459        dict_data_list = []
460
461        for classe, list_mspeaks in self.items():
462            percent_abundance = self.abundance_count_percentile(classe)
463
464            # ignores low abundant classes
465            if abundance_perc_threshold < abundance_perc_threshold:
466                continue
467
468            peaks_count_percentile = self.peaks_count_percentile(classe)
469
470            for ms_peak in list_mspeaks:
471                if ms_peak.is_assigned:
472                    for m_formula in ms_peak:
473                        # ignores isotopologues
474                        if not include_isotopologue and m_formula.is_isotopologue:
475                            continue
476
477                        formula_dict = m_formula.to_dict()
478
479                        dict_result = {
480                            "mz": ms_peak._mz_exp,
481                            "calibrated_mz": ms_peak.mz_exp,
482                            "calculated_mz": m_formula.mz_calc,
483                            "abundance": ms_peak.abundance,
484                            "resolving_power": ms_peak.resolving_power,
485                            "sn": ms_peak.signal_to_noise,
486                            "ion_charge": ms_peak.ion_charge,
487                            "mass_error": m_formula.mz_error,
488                            "DBE": m_formula.dbe,
489                            "class": classe,
490                            "HC": m_formula.H_C,
491                            "OC": m_formula.O_C,
492                            "ion_type": str(m_formula.ion_type.lower().encode("utf-8")),
493                            "is_isotopologue": int(m_formula.is_isotopologue),
494                            "class_abundance": percent_abundance,
495                            "class_count": peaks_count_percentile,
496                        }
497
498                        for atom in formula_dict.keys():
499                            dict_result[atom] = formula_dict.get(atom)
500
501                    dict_data_list.append(dict_result)
502
503                else:
504                    if not include_unassigned:
505                        continue
506
507                    dict_result = {
508                        "mz": ms_peak._mz_exp,
509                        "calibrated_mz": ms_peak.mz_exp,
510                        "abundance": ms_peak.abundance,
511                        "resolving_power": ms_peak.resolving_power,
512                        "sn": ms_peak.signal_to_noise,
513                        "ion_charge": ms_peak.ion_charge,
514                        "class": classe,
515                        "class_abundance": percent_abundance,
516                        "class_count": percent_abundance,
517                    }
518
519                    dict_data_list.append(dict_result)
520
521        columns = columns_labels + self.all_identified_atoms
522
523        return DataFrame(dict_data_list, columns=columns)
524
525    def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"):
526        """Plot stick mass spectrum of all classes
527
528        Parameters
529        ----------
530        assigned_color : str, optional
531            Matplotlib color for the assigned peaks, by default "b"
532        unassigned_color : str, optional
533            Matplotlib color for the unassigned peaks, by default "r"
534
535        Returns
536        -------
537        ax : matplotlib.axes
538            Matplotlib axes object
539        """
540        mz_assigned = self.mz_exp_assigned()
541        abundance_assigned = self.abundance_assigned()
542
543        mz_not_assigned = self.mz_exp(Labels.unassigned)
544        abundance_not_assigned = self.abundance(Labels.unassigned)
545
546        ax = plt.gca()
547
548        for plot_obj in ax.stem(
549            mz_assigned,
550            abundance_assigned,
551            linefmt="-",
552            markerfmt=" ",
553            label="Assigned",
554        ):
555            plt.setp(plot_obj, "color", assigned_color, "linewidth", 2)
556
557        for plot_obj in ax.stem(
558            mz_not_assigned,
559            abundance_not_assigned,
560            linefmt="-",
561            markerfmt=" ",
562            label="Unassigned",
563        ):
564            plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2)
565
566        ax.set_xlabel("$\t{m/z}$", fontsize=12)
567        ax.set_ylabel("Abundance", fontsize=12)
568        ax.tick_params(axis="both", which="major", labelsize=12)
569
570        ax.axes.spines["top"].set_visible(False)
571        ax.axes.spines["right"].set_visible(False)
572
573        ax.get_yaxis().set_visible(False)
574        ax.spines["left"].set_visible(False)
575        plt.legend()
576
577        return ax
578
579    def plot_mz_error(self, color="g"):
580        """Plot mz error scatter plot of all classes
581
582        Parameters
583        ----------
584        color : str, optional
585            Matplotlib color, by default "g"
586
587        Returns
588        -------
589        ax : matplotlib.axes
590            Matplotlib axes object
591        """
592        ax = plt.gca()
593
594        mz_assigned = self.mz_exp_all()
595        mz_error = self.mz_error_all()
596
597        ax.scatter(mz_assigned, mz_error, c=color)
598
599        ax.set_xlabel("$\t{m/z}$", fontsize=12)
600        ax.set_ylabel("Error (ppm)", fontsize=12)
601        ax.tick_params(axis="both", which="major", labelsize=12)
602
603        ax.axes.spines["top"].set_visible(True)
604        ax.axes.spines["right"].set_visible(True)
605
606        ax.get_yaxis().set_visible(True)
607        ax.spines["left"].set_visible(True)
608
609        ax.set_xlim(self.min_max_mz)
610        ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
611
612        return ax
613
614    def plot_mz_error_class(self, classe, color="g"):
615        """Plot mz error scatter plot of a specific class
616
617        Parameters
618        ----------
619        classe : str
620            Class name
621        color : str, optional
622            Matplotlib color, by default "g"
623
624        Returns
625        -------
626        ax : matplotlib.axes
627            Matplotlib axes object
628
629        """
630        if classe != Labels.unassigned:
631            ax = plt.gca()
632
633            abun_perc = self.abundance_count_percentile(classe)
634            mz_assigned = self.mz_exp(classe)
635            mz_error = self.mz_error(classe)
636
637            ax.scatter(mz_assigned, mz_error, c=color)
638
639            title = "%s, %.2f %%" % (classe, abun_perc)
640            ax.set_title(title)
641            ax.set_xlabel("$\t{m/z}$", fontsize=12)
642            ax.set_ylabel("Error (ppm)", fontsize=12)
643            ax.tick_params(axis="both", which="major", labelsize=12)
644
645            ax.axes.spines["top"].set_visible(True)
646            ax.axes.spines["right"].set_visible(True)
647
648            ax.get_yaxis().set_visible(True)
649            ax.spines["left"].set_visible(True)
650
651            ax.set_xlim(self.min_max_mz)
652            ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
653
654            return ax
655
656    def plot_ms_class(self, classe, color="g"):
657        """Plot stick mass spectrum of a specific class
658
659        Parameters
660        ----------
661        classe : str
662            Class name
663        color : str, optional
664            Matplotlib color, by default "g"
665
666        Returns
667        -------
668        ax : matplotlib.axes
669            Matplotlib axes object
670
671        """
672        if classe != Labels.unassigned:
673            ax = plt.gca()
674
675            abun_perc = self.abundance_count_percentile(classe)
676            mz_assigned = self.mz_exp(classe)
677            abundance_assigned = self.abundance(classe)
678
679            for plot_obj in ax.stem(
680                mz_assigned, abundance_assigned, linefmt="-", markerfmt=" "
681            ):
682                plt.setp(plot_obj, "color", color, "linewidth", 2)
683
684            title = "%s, %.2f %%" % (classe, abun_perc)
685            ax.set_title(title)
686            ax.set_xlabel("$\t{m/z}$", fontsize=12)
687            ax.set_ylabel("Abundance", fontsize=12)
688            ax.tick_params(axis="both", which="major", labelsize=12)
689
690            ax.axes.spines["top"].set_visible(False)
691            ax.axes.spines["right"].set_visible(False)
692
693            ax.get_yaxis().set_visible(False)
694            ax.spines["left"].set_visible(False)
695
696            ax.set_xlim(self.min_max_mz)
697            ax.set_ylim(self.min_max_abundance)
698
699            return ax
700
701    def plot_van_krevelen(
702        self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"
703    ):
704        """Plot Van Krevelen Diagram
705
706        Parameters
707        ----------
708        classe : str
709            Class name
710        max_hc : float, optional
711            Max H/C ratio, by default 2.5
712        max_oc : float, optional
713            Max O/C ratio, by default 2
714        ticks_number : int, optional
715            Number of ticks, by default 5
716        color : str, optional
717            Matplotlib color, by default "viridis"
718
719        Returns
720        -------
721        ax : matplotlib.axes
722            Matplotlib axes object
723        abun_perc : float
724            Class percentile of the relative abundance
725        """
726        if classe != Labels.unassigned:
727            # get data
728            abun_perc = self.abundance_count_percentile(classe)
729            hc = self.atoms_ratio(classe, "H", "C")
730            oc = self.atoms_ratio(classe, "O", "C")
731            abundance = self.abundance(classe)
732
733            # plot data
734            ax = plt.gca()
735
736            ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
737
738            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
739
740            title = "%s, %.2f %%" % (classe, abun_perc)
741            ax.set_title(title)
742            ax.set_xlabel("O/C", fontsize=16)
743            ax.set_ylabel("H/C", fontsize=16)
744            ax.tick_params(axis="both", which="major", labelsize=18)
745            ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
746            ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
747
748            # returns matplot axes obj and the class percentile of the relative abundance
749
750            return ax, abun_perc
751
752    def plot_dbe_vs_carbon_number(
753        self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"
754    ):
755        """Plot DBE vs Carbon Number
756
757        Parameters
758        ----------
759        classe : str
760            Class name
761        max_c : int, optional
762            Max Carbon Number, by default 50
763        max_dbe : int, optional
764            Max DBE, by default 40
765        dbe_incr : int, optional
766            DBE increment, by default 5
767        c_incr : int, optional
768            Carbon Number increment, by default 10
769        color : str, optional
770            Matplotlib color, by default "viridis"
771
772        Returns
773        -------
774        ax : matplotlib.axes
775            Matplotlib axes object
776        abun_perc : float
777            Class percentile of the relative abundance
778        """
779        if classe != Labels.unassigned:
780            # get data
781            abun_perc = self.abundance_count_percentile(classe)
782            carbon_number = self.carbon_number(classe)
783            dbe = self.dbe(classe)
784            abundance = self.abundance(classe)
785
786            # plot data
787            ax = plt.gca()
788
789            ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
790
791            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
792
793            title = "%s, %.2f %%" % (classe, abun_perc)
794            ax.set_title(title)
795            ax.set_xlabel("Carbon number", fontsize=16)
796            ax.set_ylabel("DBE", fontsize=16)
797            ax.tick_params(axis="both", which="major", labelsize=18)
798            ax.set_xticks(range(0, max_c, c_incr))
799            ax.set_yticks(range(0, max_dbe, dbe_incr))
800
801            # returns matplot axes obj and the class percentile of the relative abundance
802
803            return ax, abun_perc
def flatten_list(l):
12flatten_list = lambda l: [item for sublist in l for item in sublist]
class HeteroatomsClassification(collections.abc.Mapping):
 15class HeteroatomsClassification(Mapping):
 16    """Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
 17
 18    Parameters
 19    ----------
 20    mass_spectrum : MassSpectrum
 21        The mass spectrum object.
 22    choose_molecular_formula : bool, optional
 23        If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
 24
 25    Raises
 26    ------
 27    Exception
 28        If no molecular formula is associated with any mspeak objects.
 29
 30    Attributes
 31    ----------
 32    _ms_grouped_class : dict
 33        A dictionary of classes and a list of ms_peak objects.
 34    choose_mf : bool
 35        If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered.
 36    total_peaks : int
 37        The total number of peaks.
 38    sum_abundance : float
 39        The sum of the abundance of all peaks.
 40    min_max_mz : tuple
 41        The minimum and maximum mz values.
 42    min_max_abundance : tuple
 43        The minimum and maximum abundance values.
 44    min_ppm_error : float
 45        The minimum ppm error.
 46    max_ppm_error : float
 47        The maximum ppm error.
 48    all_identified_atoms : list
 49        A list of all identified atoms.
 50
 51    Methods
 52    -------
 53    * __len__().
 54        Returns the number of classes.
 55    * __getitem__(classe)
 56        Returns the molecular formula list for specified class.
 57    * __iter__().
 58        Returns an iterator over the keys of the dictionary.
 59    * get_classes(threshold_perc=1, isotopologue=True).
 60        Returns a list of classes with abundance percentile above threshold.
 61    * molecular_formula_string(classe).
 62        Returns a list of molecular formula string for specified class.
 63    * molecular_formula(classe).
 64        Returns a list of molecular formula for specified class.
 65    * carbon_number(classe).
 66        Returns a list of carbon number for specified class.
 67    * atom_count(atom, classe).
 68        Returns a list of atom count for specified class.
 69    * dbe(classe).
 70        Returns a list of DBE for specified class.
 71    * atoms_ratio(classe, numerator, denominator).
 72        Returns a list of atoms ratio for specified class.
 73    * mz_exp(classe).
 74        Returns a list of experimental mz for specified class.
 75    * abundance(classe).
 76        Returns a list of abundance for specified class.
 77    * mz_error(classe).
 78        Returns a list of mz error for specified class.
 79    * mz_calc(classe).
 80        Returns a list of calculated mz for specified class.
 81    * peaks_count_percentile(classe).
 82        Returns the peaks count percentile of a specific class.
 83    * abundance_count_percentile(classe).
 84        Returns the abundance percentile of a specific class.
 85    * mz_exp_assigned().
 86        Returns a list of experimental mz for all assigned classes.
 87    * abundance_assigned().
 88        Returns a list of abundance for all classes.
 89    * mz_exp_all().
 90        Returns a list of mz for all classes.
 91
 92    """
 93
 94    # Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
 95
 96    #   class obj behaves as a dictionary of classes and return a list of ms_peak obj
 97
 98    def __init__(self, mass_spectrum, choose_molecular_formula=True):
 99        def sort_atoms_method(atom):
100            """Sort atoms by order of appearance in the Atoms class"""
101            return [Atoms.atoms_order.index(atom)]
102
103        self._ms_grouped_class = dict()
104
105        self.choose_mf = choose_molecular_formula
106
107        # mapping for ms peaks without any molecular formula associated
108        self._ms_grouped_class[Labels.unassigned] = list()
109
110        self.total_peaks = 0
111
112        self.sum_abundance = 0
113
114        self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp)
115
116        self.min_max_abundance = (
117            mass_spectrum.min_abundance,
118            mass_spectrum.max_abundance,
119        )
120
121        self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error
122
123        self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error
124
125        check_assign = False
126
127        all_used_atoms = set()
128
129        for ms_peak in mass_spectrum:
130            self.total_peaks += 1
131
132            self.sum_abundance += ms_peak.abundance
133
134            if not ms_peak.is_assigned:
135                self._ms_grouped_class.get(Labels.unassigned).append(ms_peak)
136
137            else:
138                check_assign = True
139
140                if choose_molecular_formula:
141                    mf = ms_peak.best_molecular_formula_candidate
142
143                    classes = [mf.class_label]
144
145                    for atom in mf.atoms:
146                        all_used_atoms.add(atom)
147
148                else:
149                    classes = []
150
151                    for mf in ms_peak:
152                        classes.append(mf.class_label)
153
154                        for atom in mf.atoms:
155                            all_used_atoms.add(atom)
156
157                for classe in classes:
158                    if classe in self._ms_grouped_class.keys():
159                        self._ms_grouped_class.get(classe).append(ms_peak)
160
161                    else:
162                        self._ms_grouped_class[classe] = [ms_peak]
163
164        self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method)
165
166        if not check_assign:
167            raise Exception("No molecular formula associated with any mspeak objects")
168
169    def __len__(self):
170        """Return the number of classes"""
171        return len(self._ms_grouped_class)
172
173    def __getitem__(self, classe):
174        """Return the molecular formula list for specified class"""
175        return self._ms_grouped_class.get(classe)
176
177    def __iter__(self):
178        """Return an iterator over the keys of the dictionary."""
179        return iter(self._ms_grouped_class)
180
181    def get_classes(self, threshold_perc=1, isotopologue=True):
182        """Return a list of classes with abundance percentile above threshold"""
183        classes = list()
184        for classe in self.keys():
185            if classe != Labels.unassigned:
186                if self.abundance_count_percentile(classe) > threshold_perc:
187                    if classe != Labels.unassigned:
188                        # access first molecular formula inside the first ms peak and check isotopologue
189                        if not isotopologue and self.get(classe)[0][0].is_isotopologue:
190                            continue
191
192                    classes.append(classe)
193        # TODO sort classes chemically here too
194        return classes
195
196    def molecular_formula_string(
197        self,
198        classe,
199    ):
200        """Return a list of molecular formula string for specified class"""
201        if self.choose_mf:
202            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
203        else:
204            return [
205                mf
206                for mspeak in self[classe]
207                for mf in mspeak
208                if mf.class_label == classe
209            ]
210
211    def molecular_formula(
212        self,
213        classe,
214    ):
215        """Return a list of molecular formula for specified class"""
216        if self.choose_mf:
217            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
218        else:
219            return [
220                mf
221                for mspeak in self[classe]
222                for mf in mspeak
223                if mf.class_label == classe
224            ]
225
226    def carbon_number(self, classe):
227        """Return a list of carbon number for specified class"""
228        if self.choose_mf:
229            return [
230                mspeak.best_molecular_formula_candidate.get("C")
231                for mspeak in self[classe]
232            ]
233        else:
234            return [
235                mf.get("C")
236                for mspeak in self[classe]
237                for mf in mspeak
238                if mf.class_label == classe
239            ]
240
241    def atom_count(self, atom, classe):
242        """Return a list of atom count for specified class"""
243
244        if self.choose_mf:
245            return [
246                mspeak.best_molecular_formula_candidate.get(atom)
247                for mspeak in self[classe]
248            ]
249        else:
250            return [
251                mf.get(atom)
252                for mspeak in self[classe]
253                for mf in mspeak
254                if mf.class_label == classe
255            ]
256
257    def dbe(self, classe):
258        """Return a list of DBE for specified class"""
259        if self.choose_mf:
260            return [
261                mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]
262            ]
263        else:
264            return [
265                mf.dbe
266                for mspeak in self[classe]
267                for mf in mspeak
268                if mf.class_label == classe
269            ]
270
271    def atoms_ratio(self, classe, numerator, denominator):
272        """Return a list of atoms ratio for specified class"""
273        return [
274            mf.get(numerator) / mf.get(denominator)
275            for mf in self.molecular_formula(classe)
276        ]
277
278    def mz_exp(self, classe):
279        """Return a list of experimental mz for specified class"""
280        if self.choose_mf or classe == Labels.unassigned:
281            return [mspeak.mz_exp for mspeak in self[classe]]
282
283        else:
284            return [
285                mspeak.mz_exp
286                for mspeak in self[classe]
287                for mf in mspeak
288                if mf.class_label == classe
289            ]
290
291    def abundance(self, classe):
292        """Return a list of abundance for specified class"""
293        if self.choose_mf or classe == Labels.unassigned:
294            return [mspeak.abundance for mspeak in self[classe]]
295
296        else:
297            return [
298                mspeak.abundance
299                for mspeak in self[classe]
300                for mf in mspeak
301                if mf.class_label == classe
302            ]
303
304    def mz_error(self, classe):
305        """Return a list of mz error for specified class"""
306        if classe != Labels.unassigned:
307            if self.choose_mf:
308                return [
309                    mspeak.best_molecular_formula_candidate.mz_error
310                    for mspeak in self[classe]
311                ]
312
313            else:
314                return [
315                    mf.mz_error
316                    for mspeak in self[classe]
317                    for mf in mspeak
318                    if mf.class_label == classe
319                ]
320
321    def mz_calc(self, classe):
322        """Return a list of calculated mz for specified class"""
323        if self.choose_mf:
324            return [
325                mspeak.best_molecular_formula_candidate.mz_calc
326                for mspeak in self[classe]
327            ]
328
329        else:
330            return [
331                mf.mz_calc
332                for mspeak in self[classe]
333                for mf in mspeak
334                if mf.class_label == classe
335            ]
336
337    def peaks_count_percentile(self, classe):
338        """Return the peaks count percentile of a specific class"""
339        return (len(self[classe]) / self.total_peaks) * 100
340
341    def abundance_count_percentile(self, classe):
342        """Return the abundance percentile of a specific class"""
343        return (
344            sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance
345        ) * 100
346
347    def mz_exp_assigned(self):
348        """Return a list of experimental mz for all assigned classes"""
349        classes = self.keys()
350
351        return [
352            mspeak.mz_exp
353            for classe in classes
354            for mspeak in self[classe]
355            if classe != Labels.unassigned
356        ]
357
358    def abundance_assigned(self):
359        """Return a list of abundance for all classes"""
360        classes = self.keys()
361
362        return [
363            mspeak.abundance
364            for classe in classes
365            for mspeak in self[classe]
366            if classe != Labels.unassigned
367        ]
368
369    def mz_exp_all(self):
370        """Return a list of mz for all classes"""
371        classes = self.keys()
372
373        return flatten_list(
374            [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]
375        )
376
377    def mz_error_all(self):
378        """Return a list of mz error for all classes"""
379        classes = self.keys()
380
381        return flatten_list(
382            [self.mz_error(classe) for classe in classes if classe != Labels.unassigned]
383        )
384
385    def carbon_number_all(self):
386        """Return a list of carbon number for all classes"""
387        classes = self.keys()
388
389        return flatten_list(
390            [
391                self.carbon_number(classe)
392                for classe in classes
393                if classe != Labels.unassigned
394            ]
395        )
396
397    def dbe_all(self):
398        """Return a list of DBE for all classes"""
399        classes = self.keys()
400
401        return flatten_list(
402            [self.dbe(classe) for classe in classes if classe != Labels.unassigned]
403        )
404
405    def atoms_ratio_all(self, numerator, denominator):
406        """Return a list of atoms ratio for all classes"""
407        classes = self.keys()
408
409        return flatten_list(
410            [
411                self.atoms_ratio(classe, numerator, denominator)
412                for classe in classes
413                if classe != Labels.unassigned
414            ]
415        )
416
417    def to_dataframe(
418        self,
419        include_isotopologue=False,
420        abundance_perc_threshold=5,
421        include_unassigned=False,
422    ):
423        """Return a pandas dataframe with all the data from the class
424
425        Parameters
426        ----------
427        include_isotopologue : bool, optional
428            Include isotopologues, by default False
429        abundance_perc_threshold : int, optional
430            Abundance percentile threshold, by default 5
431        include_unassigned : bool, optional
432            Include unassigned peaks, by default False
433
434        Returns
435        -------
436        DataFrame
437            Pandas dataframe with all the data from the class
438        """
439        from pandas import DataFrame
440
441        columns_labels = [
442            "mz",
443            "calibrated_mz",
444            "calculated_m_z",
445            "abundance",
446            "resolving_power",
447            "sn",
448            "ion_charge",
449            "mass_error",
450            "DBE",
451            "class",
452            "HC",
453            "OC",
454            "ion_type",
455            "is_isotopologue",
456            "class_abundance",
457            "class_count",
458        ]
459
460        dict_data_list = []
461
462        for classe, list_mspeaks in self.items():
463            percent_abundance = self.abundance_count_percentile(classe)
464
465            # ignores low abundant classes
466            if abundance_perc_threshold < abundance_perc_threshold:
467                continue
468
469            peaks_count_percentile = self.peaks_count_percentile(classe)
470
471            for ms_peak in list_mspeaks:
472                if ms_peak.is_assigned:
473                    for m_formula in ms_peak:
474                        # ignores isotopologues
475                        if not include_isotopologue and m_formula.is_isotopologue:
476                            continue
477
478                        formula_dict = m_formula.to_dict()
479
480                        dict_result = {
481                            "mz": ms_peak._mz_exp,
482                            "calibrated_mz": ms_peak.mz_exp,
483                            "calculated_mz": m_formula.mz_calc,
484                            "abundance": ms_peak.abundance,
485                            "resolving_power": ms_peak.resolving_power,
486                            "sn": ms_peak.signal_to_noise,
487                            "ion_charge": ms_peak.ion_charge,
488                            "mass_error": m_formula.mz_error,
489                            "DBE": m_formula.dbe,
490                            "class": classe,
491                            "HC": m_formula.H_C,
492                            "OC": m_formula.O_C,
493                            "ion_type": str(m_formula.ion_type.lower().encode("utf-8")),
494                            "is_isotopologue": int(m_formula.is_isotopologue),
495                            "class_abundance": percent_abundance,
496                            "class_count": peaks_count_percentile,
497                        }
498
499                        for atom in formula_dict.keys():
500                            dict_result[atom] = formula_dict.get(atom)
501
502                    dict_data_list.append(dict_result)
503
504                else:
505                    if not include_unassigned:
506                        continue
507
508                    dict_result = {
509                        "mz": ms_peak._mz_exp,
510                        "calibrated_mz": ms_peak.mz_exp,
511                        "abundance": ms_peak.abundance,
512                        "resolving_power": ms_peak.resolving_power,
513                        "sn": ms_peak.signal_to_noise,
514                        "ion_charge": ms_peak.ion_charge,
515                        "class": classe,
516                        "class_abundance": percent_abundance,
517                        "class_count": percent_abundance,
518                    }
519
520                    dict_data_list.append(dict_result)
521
522        columns = columns_labels + self.all_identified_atoms
523
524        return DataFrame(dict_data_list, columns=columns)
525
526    def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"):
527        """Plot stick mass spectrum of all classes
528
529        Parameters
530        ----------
531        assigned_color : str, optional
532            Matplotlib color for the assigned peaks, by default "b"
533        unassigned_color : str, optional
534            Matplotlib color for the unassigned peaks, by default "r"
535
536        Returns
537        -------
538        ax : matplotlib.axes
539            Matplotlib axes object
540        """
541        mz_assigned = self.mz_exp_assigned()
542        abundance_assigned = self.abundance_assigned()
543
544        mz_not_assigned = self.mz_exp(Labels.unassigned)
545        abundance_not_assigned = self.abundance(Labels.unassigned)
546
547        ax = plt.gca()
548
549        for plot_obj in ax.stem(
550            mz_assigned,
551            abundance_assigned,
552            linefmt="-",
553            markerfmt=" ",
554            label="Assigned",
555        ):
556            plt.setp(plot_obj, "color", assigned_color, "linewidth", 2)
557
558        for plot_obj in ax.stem(
559            mz_not_assigned,
560            abundance_not_assigned,
561            linefmt="-",
562            markerfmt=" ",
563            label="Unassigned",
564        ):
565            plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2)
566
567        ax.set_xlabel("$\t{m/z}$", fontsize=12)
568        ax.set_ylabel("Abundance", fontsize=12)
569        ax.tick_params(axis="both", which="major", labelsize=12)
570
571        ax.axes.spines["top"].set_visible(False)
572        ax.axes.spines["right"].set_visible(False)
573
574        ax.get_yaxis().set_visible(False)
575        ax.spines["left"].set_visible(False)
576        plt.legend()
577
578        return ax
579
580    def plot_mz_error(self, color="g"):
581        """Plot mz error scatter plot of all classes
582
583        Parameters
584        ----------
585        color : str, optional
586            Matplotlib color, by default "g"
587
588        Returns
589        -------
590        ax : matplotlib.axes
591            Matplotlib axes object
592        """
593        ax = plt.gca()
594
595        mz_assigned = self.mz_exp_all()
596        mz_error = self.mz_error_all()
597
598        ax.scatter(mz_assigned, mz_error, c=color)
599
600        ax.set_xlabel("$\t{m/z}$", fontsize=12)
601        ax.set_ylabel("Error (ppm)", fontsize=12)
602        ax.tick_params(axis="both", which="major", labelsize=12)
603
604        ax.axes.spines["top"].set_visible(True)
605        ax.axes.spines["right"].set_visible(True)
606
607        ax.get_yaxis().set_visible(True)
608        ax.spines["left"].set_visible(True)
609
610        ax.set_xlim(self.min_max_mz)
611        ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
612
613        return ax
614
615    def plot_mz_error_class(self, classe, color="g"):
616        """Plot mz error scatter plot of a specific class
617
618        Parameters
619        ----------
620        classe : str
621            Class name
622        color : str, optional
623            Matplotlib color, by default "g"
624
625        Returns
626        -------
627        ax : matplotlib.axes
628            Matplotlib axes object
629
630        """
631        if classe != Labels.unassigned:
632            ax = plt.gca()
633
634            abun_perc = self.abundance_count_percentile(classe)
635            mz_assigned = self.mz_exp(classe)
636            mz_error = self.mz_error(classe)
637
638            ax.scatter(mz_assigned, mz_error, c=color)
639
640            title = "%s, %.2f %%" % (classe, abun_perc)
641            ax.set_title(title)
642            ax.set_xlabel("$\t{m/z}$", fontsize=12)
643            ax.set_ylabel("Error (ppm)", fontsize=12)
644            ax.tick_params(axis="both", which="major", labelsize=12)
645
646            ax.axes.spines["top"].set_visible(True)
647            ax.axes.spines["right"].set_visible(True)
648
649            ax.get_yaxis().set_visible(True)
650            ax.spines["left"].set_visible(True)
651
652            ax.set_xlim(self.min_max_mz)
653            ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
654
655            return ax
656
657    def plot_ms_class(self, classe, color="g"):
658        """Plot stick mass spectrum of a specific class
659
660        Parameters
661        ----------
662        classe : str
663            Class name
664        color : str, optional
665            Matplotlib color, by default "g"
666
667        Returns
668        -------
669        ax : matplotlib.axes
670            Matplotlib axes object
671
672        """
673        if classe != Labels.unassigned:
674            ax = plt.gca()
675
676            abun_perc = self.abundance_count_percentile(classe)
677            mz_assigned = self.mz_exp(classe)
678            abundance_assigned = self.abundance(classe)
679
680            for plot_obj in ax.stem(
681                mz_assigned, abundance_assigned, linefmt="-", markerfmt=" "
682            ):
683                plt.setp(plot_obj, "color", color, "linewidth", 2)
684
685            title = "%s, %.2f %%" % (classe, abun_perc)
686            ax.set_title(title)
687            ax.set_xlabel("$\t{m/z}$", fontsize=12)
688            ax.set_ylabel("Abundance", fontsize=12)
689            ax.tick_params(axis="both", which="major", labelsize=12)
690
691            ax.axes.spines["top"].set_visible(False)
692            ax.axes.spines["right"].set_visible(False)
693
694            ax.get_yaxis().set_visible(False)
695            ax.spines["left"].set_visible(False)
696
697            ax.set_xlim(self.min_max_mz)
698            ax.set_ylim(self.min_max_abundance)
699
700            return ax
701
702    def plot_van_krevelen(
703        self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"
704    ):
705        """Plot Van Krevelen Diagram
706
707        Parameters
708        ----------
709        classe : str
710            Class name
711        max_hc : float, optional
712            Max H/C ratio, by default 2.5
713        max_oc : float, optional
714            Max O/C ratio, by default 2
715        ticks_number : int, optional
716            Number of ticks, by default 5
717        color : str, optional
718            Matplotlib color, by default "viridis"
719
720        Returns
721        -------
722        ax : matplotlib.axes
723            Matplotlib axes object
724        abun_perc : float
725            Class percentile of the relative abundance
726        """
727        if classe != Labels.unassigned:
728            # get data
729            abun_perc = self.abundance_count_percentile(classe)
730            hc = self.atoms_ratio(classe, "H", "C")
731            oc = self.atoms_ratio(classe, "O", "C")
732            abundance = self.abundance(classe)
733
734            # plot data
735            ax = plt.gca()
736
737            ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
738
739            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
740
741            title = "%s, %.2f %%" % (classe, abun_perc)
742            ax.set_title(title)
743            ax.set_xlabel("O/C", fontsize=16)
744            ax.set_ylabel("H/C", fontsize=16)
745            ax.tick_params(axis="both", which="major", labelsize=18)
746            ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
747            ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
748
749            # returns matplot axes obj and the class percentile of the relative abundance
750
751            return ax, abun_perc
752
753    def plot_dbe_vs_carbon_number(
754        self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"
755    ):
756        """Plot DBE vs Carbon Number
757
758        Parameters
759        ----------
760        classe : str
761            Class name
762        max_c : int, optional
763            Max Carbon Number, by default 50
764        max_dbe : int, optional
765            Max DBE, by default 40
766        dbe_incr : int, optional
767            DBE increment, by default 5
768        c_incr : int, optional
769            Carbon Number increment, by default 10
770        color : str, optional
771            Matplotlib color, by default "viridis"
772
773        Returns
774        -------
775        ax : matplotlib.axes
776            Matplotlib axes object
777        abun_perc : float
778            Class percentile of the relative abundance
779        """
780        if classe != Labels.unassigned:
781            # get data
782            abun_perc = self.abundance_count_percentile(classe)
783            carbon_number = self.carbon_number(classe)
784            dbe = self.dbe(classe)
785            abundance = self.abundance(classe)
786
787            # plot data
788            ax = plt.gca()
789
790            ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
791
792            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
793
794            title = "%s, %.2f %%" % (classe, abun_perc)
795            ax.set_title(title)
796            ax.set_xlabel("Carbon number", fontsize=16)
797            ax.set_ylabel("DBE", fontsize=16)
798            ax.tick_params(axis="both", which="major", labelsize=18)
799            ax.set_xticks(range(0, max_c, c_incr))
800            ax.set_yticks(range(0, max_dbe, dbe_incr))
801
802            # returns matplot axes obj and the class percentile of the relative abundance
803
804            return ax, abun_perc

Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)

Parameters
  • mass_spectrum (MassSpectrum): The mass spectrum object.
  • choose_molecular_formula (bool, optional): If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
Raises
  • Exception: If no molecular formula is associated with any mspeak objects.
Attributes
  • _ms_grouped_class (dict): A dictionary of classes and a list of ms_peak objects.
  • choose_mf (bool): If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered.
  • total_peaks (int): The total number of peaks.
  • sum_abundance (float): The sum of the abundance of all peaks.
  • min_max_mz (tuple): The minimum and maximum mz values.
  • min_max_abundance (tuple): The minimum and maximum abundance values.
  • min_ppm_error (float): The minimum ppm error.
  • max_ppm_error (float): The maximum ppm error.
  • all_identified_atoms (list): A list of all identified atoms.
Methods
  • __len__(). Returns the number of classes.
  • __getitem__(classe) Returns the molecular formula list for specified class.
  • __iter__(). Returns an iterator over the keys of the dictionary.
  • get_classes(threshold_perc=1, isotopologue=True). Returns a list of classes with abundance percentile above threshold.
  • molecular_formula_string(classe). Returns a list of molecular formula string for specified class.
  • molecular_formula(classe). Returns a list of molecular formula for specified class.
  • carbon_number(classe). Returns a list of carbon number for specified class.
  • atom_count(atom, classe). Returns a list of atom count for specified class.
  • dbe(classe). Returns a list of DBE for specified class.
  • atoms_ratio(classe, numerator, denominator). Returns a list of atoms ratio for specified class.
  • mz_exp(classe). Returns a list of experimental mz for specified class.
  • abundance(classe). Returns a list of abundance for specified class.
  • mz_error(classe). Returns a list of mz error for specified class.
  • mz_calc(classe). Returns a list of calculated mz for specified class.
  • peaks_count_percentile(classe). Returns the peaks count percentile of a specific class.
  • abundance_count_percentile(classe). Returns the abundance percentile of a specific class.
  • mz_exp_assigned(). Returns a list of experimental mz for all assigned classes.
  • abundance_assigned(). Returns a list of abundance for all classes.
  • mz_exp_all(). Returns a list of mz for all classes.
HeteroatomsClassification(mass_spectrum, choose_molecular_formula=True)
 98    def __init__(self, mass_spectrum, choose_molecular_formula=True):
 99        def sort_atoms_method(atom):
100            """Sort atoms by order of appearance in the Atoms class"""
101            return [Atoms.atoms_order.index(atom)]
102
103        self._ms_grouped_class = dict()
104
105        self.choose_mf = choose_molecular_formula
106
107        # mapping for ms peaks without any molecular formula associated
108        self._ms_grouped_class[Labels.unassigned] = list()
109
110        self.total_peaks = 0
111
112        self.sum_abundance = 0
113
114        self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp)
115
116        self.min_max_abundance = (
117            mass_spectrum.min_abundance,
118            mass_spectrum.max_abundance,
119        )
120
121        self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error
122
123        self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error
124
125        check_assign = False
126
127        all_used_atoms = set()
128
129        for ms_peak in mass_spectrum:
130            self.total_peaks += 1
131
132            self.sum_abundance += ms_peak.abundance
133
134            if not ms_peak.is_assigned:
135                self._ms_grouped_class.get(Labels.unassigned).append(ms_peak)
136
137            else:
138                check_assign = True
139
140                if choose_molecular_formula:
141                    mf = ms_peak.best_molecular_formula_candidate
142
143                    classes = [mf.class_label]
144
145                    for atom in mf.atoms:
146                        all_used_atoms.add(atom)
147
148                else:
149                    classes = []
150
151                    for mf in ms_peak:
152                        classes.append(mf.class_label)
153
154                        for atom in mf.atoms:
155                            all_used_atoms.add(atom)
156
157                for classe in classes:
158                    if classe in self._ms_grouped_class.keys():
159                        self._ms_grouped_class.get(classe).append(ms_peak)
160
161                    else:
162                        self._ms_grouped_class[classe] = [ms_peak]
163
164        self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method)
165
166        if not check_assign:
167            raise Exception("No molecular formula associated with any mspeak objects")
choose_mf
total_peaks
sum_abundance
min_max_mz
min_max_abundance
min_ppm_error
max_ppm_error
all_identified_atoms
def get_classes(self, threshold_perc=1, isotopologue=True):
181    def get_classes(self, threshold_perc=1, isotopologue=True):
182        """Return a list of classes with abundance percentile above threshold"""
183        classes = list()
184        for classe in self.keys():
185            if classe != Labels.unassigned:
186                if self.abundance_count_percentile(classe) > threshold_perc:
187                    if classe != Labels.unassigned:
188                        # access first molecular formula inside the first ms peak and check isotopologue
189                        if not isotopologue and self.get(classe)[0][0].is_isotopologue:
190                            continue
191
192                    classes.append(classe)
193        # TODO sort classes chemically here too
194        return classes

Return a list of classes with abundance percentile above threshold

def molecular_formula_string(self, classe):
196    def molecular_formula_string(
197        self,
198        classe,
199    ):
200        """Return a list of molecular formula string for specified class"""
201        if self.choose_mf:
202            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
203        else:
204            return [
205                mf
206                for mspeak in self[classe]
207                for mf in mspeak
208                if mf.class_label == classe
209            ]

Return a list of molecular formula string for specified class

def molecular_formula(self, classe):
211    def molecular_formula(
212        self,
213        classe,
214    ):
215        """Return a list of molecular formula for specified class"""
216        if self.choose_mf:
217            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
218        else:
219            return [
220                mf
221                for mspeak in self[classe]
222                for mf in mspeak
223                if mf.class_label == classe
224            ]

Return a list of molecular formula for specified class

def carbon_number(self, classe):
226    def carbon_number(self, classe):
227        """Return a list of carbon number for specified class"""
228        if self.choose_mf:
229            return [
230                mspeak.best_molecular_formula_candidate.get("C")
231                for mspeak in self[classe]
232            ]
233        else:
234            return [
235                mf.get("C")
236                for mspeak in self[classe]
237                for mf in mspeak
238                if mf.class_label == classe
239            ]

Return a list of carbon number for specified class

def atom_count(self, atom, classe):
241    def atom_count(self, atom, classe):
242        """Return a list of atom count for specified class"""
243
244        if self.choose_mf:
245            return [
246                mspeak.best_molecular_formula_candidate.get(atom)
247                for mspeak in self[classe]
248            ]
249        else:
250            return [
251                mf.get(atom)
252                for mspeak in self[classe]
253                for mf in mspeak
254                if mf.class_label == classe
255            ]

Return a list of atom count for specified class

def dbe(self, classe):
257    def dbe(self, classe):
258        """Return a list of DBE for specified class"""
259        if self.choose_mf:
260            return [
261                mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]
262            ]
263        else:
264            return [
265                mf.dbe
266                for mspeak in self[classe]
267                for mf in mspeak
268                if mf.class_label == classe
269            ]

Return a list of DBE for specified class

def atoms_ratio(self, classe, numerator, denominator):
271    def atoms_ratio(self, classe, numerator, denominator):
272        """Return a list of atoms ratio for specified class"""
273        return [
274            mf.get(numerator) / mf.get(denominator)
275            for mf in self.molecular_formula(classe)
276        ]

Return a list of atoms ratio for specified class

def mz_exp(self, classe):
278    def mz_exp(self, classe):
279        """Return a list of experimental mz for specified class"""
280        if self.choose_mf or classe == Labels.unassigned:
281            return [mspeak.mz_exp for mspeak in self[classe]]
282
283        else:
284            return [
285                mspeak.mz_exp
286                for mspeak in self[classe]
287                for mf in mspeak
288                if mf.class_label == classe
289            ]

Return a list of experimental mz for specified class

def abundance(self, classe):
291    def abundance(self, classe):
292        """Return a list of abundance for specified class"""
293        if self.choose_mf or classe == Labels.unassigned:
294            return [mspeak.abundance for mspeak in self[classe]]
295
296        else:
297            return [
298                mspeak.abundance
299                for mspeak in self[classe]
300                for mf in mspeak
301                if mf.class_label == classe
302            ]

Return a list of abundance for specified class

def mz_error(self, classe):
304    def mz_error(self, classe):
305        """Return a list of mz error for specified class"""
306        if classe != Labels.unassigned:
307            if self.choose_mf:
308                return [
309                    mspeak.best_molecular_formula_candidate.mz_error
310                    for mspeak in self[classe]
311                ]
312
313            else:
314                return [
315                    mf.mz_error
316                    for mspeak in self[classe]
317                    for mf in mspeak
318                    if mf.class_label == classe
319                ]

Return a list of mz error for specified class

def mz_calc(self, classe):
321    def mz_calc(self, classe):
322        """Return a list of calculated mz for specified class"""
323        if self.choose_mf:
324            return [
325                mspeak.best_molecular_formula_candidate.mz_calc
326                for mspeak in self[classe]
327            ]
328
329        else:
330            return [
331                mf.mz_calc
332                for mspeak in self[classe]
333                for mf in mspeak
334                if mf.class_label == classe
335            ]

Return a list of calculated mz for specified class

def peaks_count_percentile(self, classe):
337    def peaks_count_percentile(self, classe):
338        """Return the peaks count percentile of a specific class"""
339        return (len(self[classe]) / self.total_peaks) * 100

Return the peaks count percentile of a specific class

def abundance_count_percentile(self, classe):
341    def abundance_count_percentile(self, classe):
342        """Return the abundance percentile of a specific class"""
343        return (
344            sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance
345        ) * 100

Return the abundance percentile of a specific class

def mz_exp_assigned(self):
347    def mz_exp_assigned(self):
348        """Return a list of experimental mz for all assigned classes"""
349        classes = self.keys()
350
351        return [
352            mspeak.mz_exp
353            for classe in classes
354            for mspeak in self[classe]
355            if classe != Labels.unassigned
356        ]

Return a list of experimental mz for all assigned classes

def abundance_assigned(self):
358    def abundance_assigned(self):
359        """Return a list of abundance for all classes"""
360        classes = self.keys()
361
362        return [
363            mspeak.abundance
364            for classe in classes
365            for mspeak in self[classe]
366            if classe != Labels.unassigned
367        ]

Return a list of abundance for all classes

def mz_exp_all(self):
369    def mz_exp_all(self):
370        """Return a list of mz for all classes"""
371        classes = self.keys()
372
373        return flatten_list(
374            [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]
375        )

Return a list of mz for all classes

def mz_error_all(self):
377    def mz_error_all(self):
378        """Return a list of mz error for all classes"""
379        classes = self.keys()
380
381        return flatten_list(
382            [self.mz_error(classe) for classe in classes if classe != Labels.unassigned]
383        )

Return a list of mz error for all classes

def carbon_number_all(self):
385    def carbon_number_all(self):
386        """Return a list of carbon number for all classes"""
387        classes = self.keys()
388
389        return flatten_list(
390            [
391                self.carbon_number(classe)
392                for classe in classes
393                if classe != Labels.unassigned
394            ]
395        )

Return a list of carbon number for all classes

def dbe_all(self):
397    def dbe_all(self):
398        """Return a list of DBE for all classes"""
399        classes = self.keys()
400
401        return flatten_list(
402            [self.dbe(classe) for classe in classes if classe != Labels.unassigned]
403        )

Return a list of DBE for all classes

def atoms_ratio_all(self, numerator, denominator):
405    def atoms_ratio_all(self, numerator, denominator):
406        """Return a list of atoms ratio for all classes"""
407        classes = self.keys()
408
409        return flatten_list(
410            [
411                self.atoms_ratio(classe, numerator, denominator)
412                for classe in classes
413                if classe != Labels.unassigned
414            ]
415        )

Return a list of atoms ratio for all classes

def to_dataframe( self, include_isotopologue=False, abundance_perc_threshold=5, include_unassigned=False):
417    def to_dataframe(
418        self,
419        include_isotopologue=False,
420        abundance_perc_threshold=5,
421        include_unassigned=False,
422    ):
423        """Return a pandas dataframe with all the data from the class
424
425        Parameters
426        ----------
427        include_isotopologue : bool, optional
428            Include isotopologues, by default False
429        abundance_perc_threshold : int, optional
430            Abundance percentile threshold, by default 5
431        include_unassigned : bool, optional
432            Include unassigned peaks, by default False
433
434        Returns
435        -------
436        DataFrame
437            Pandas dataframe with all the data from the class
438        """
439        from pandas import DataFrame
440
441        columns_labels = [
442            "mz",
443            "calibrated_mz",
444            "calculated_m_z",
445            "abundance",
446            "resolving_power",
447            "sn",
448            "ion_charge",
449            "mass_error",
450            "DBE",
451            "class",
452            "HC",
453            "OC",
454            "ion_type",
455            "is_isotopologue",
456            "class_abundance",
457            "class_count",
458        ]
459
460        dict_data_list = []
461
462        for classe, list_mspeaks in self.items():
463            percent_abundance = self.abundance_count_percentile(classe)
464
465            # ignores low abundant classes
466            if abundance_perc_threshold < abundance_perc_threshold:
467                continue
468
469            peaks_count_percentile = self.peaks_count_percentile(classe)
470
471            for ms_peak in list_mspeaks:
472                if ms_peak.is_assigned:
473                    for m_formula in ms_peak:
474                        # ignores isotopologues
475                        if not include_isotopologue and m_formula.is_isotopologue:
476                            continue
477
478                        formula_dict = m_formula.to_dict()
479
480                        dict_result = {
481                            "mz": ms_peak._mz_exp,
482                            "calibrated_mz": ms_peak.mz_exp,
483                            "calculated_mz": m_formula.mz_calc,
484                            "abundance": ms_peak.abundance,
485                            "resolving_power": ms_peak.resolving_power,
486                            "sn": ms_peak.signal_to_noise,
487                            "ion_charge": ms_peak.ion_charge,
488                            "mass_error": m_formula.mz_error,
489                            "DBE": m_formula.dbe,
490                            "class": classe,
491                            "HC": m_formula.H_C,
492                            "OC": m_formula.O_C,
493                            "ion_type": str(m_formula.ion_type.lower().encode("utf-8")),
494                            "is_isotopologue": int(m_formula.is_isotopologue),
495                            "class_abundance": percent_abundance,
496                            "class_count": peaks_count_percentile,
497                        }
498
499                        for atom in formula_dict.keys():
500                            dict_result[atom] = formula_dict.get(atom)
501
502                    dict_data_list.append(dict_result)
503
504                else:
505                    if not include_unassigned:
506                        continue
507
508                    dict_result = {
509                        "mz": ms_peak._mz_exp,
510                        "calibrated_mz": ms_peak.mz_exp,
511                        "abundance": ms_peak.abundance,
512                        "resolving_power": ms_peak.resolving_power,
513                        "sn": ms_peak.signal_to_noise,
514                        "ion_charge": ms_peak.ion_charge,
515                        "class": classe,
516                        "class_abundance": percent_abundance,
517                        "class_count": percent_abundance,
518                    }
519
520                    dict_data_list.append(dict_result)
521
522        columns = columns_labels + self.all_identified_atoms
523
524        return DataFrame(dict_data_list, columns=columns)

Return a pandas dataframe with all the data from the class

Parameters
  • include_isotopologue (bool, optional): Include isotopologues, by default False
  • abundance_perc_threshold (int, optional): Abundance percentile threshold, by default 5
  • include_unassigned (bool, optional): Include unassigned peaks, by default False
Returns
  • DataFrame: Pandas dataframe with all the data from the class
def plot_ms_assigned_unassigned(self, assigned_color='b', unassigned_color='r'):
526    def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"):
527        """Plot stick mass spectrum of all classes
528
529        Parameters
530        ----------
531        assigned_color : str, optional
532            Matplotlib color for the assigned peaks, by default "b"
533        unassigned_color : str, optional
534            Matplotlib color for the unassigned peaks, by default "r"
535
536        Returns
537        -------
538        ax : matplotlib.axes
539            Matplotlib axes object
540        """
541        mz_assigned = self.mz_exp_assigned()
542        abundance_assigned = self.abundance_assigned()
543
544        mz_not_assigned = self.mz_exp(Labels.unassigned)
545        abundance_not_assigned = self.abundance(Labels.unassigned)
546
547        ax = plt.gca()
548
549        for plot_obj in ax.stem(
550            mz_assigned,
551            abundance_assigned,
552            linefmt="-",
553            markerfmt=" ",
554            label="Assigned",
555        ):
556            plt.setp(plot_obj, "color", assigned_color, "linewidth", 2)
557
558        for plot_obj in ax.stem(
559            mz_not_assigned,
560            abundance_not_assigned,
561            linefmt="-",
562            markerfmt=" ",
563            label="Unassigned",
564        ):
565            plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2)
566
567        ax.set_xlabel("$\t{m/z}$", fontsize=12)
568        ax.set_ylabel("Abundance", fontsize=12)
569        ax.tick_params(axis="both", which="major", labelsize=12)
570
571        ax.axes.spines["top"].set_visible(False)
572        ax.axes.spines["right"].set_visible(False)
573
574        ax.get_yaxis().set_visible(False)
575        ax.spines["left"].set_visible(False)
576        plt.legend()
577
578        return ax

Plot stick mass spectrum of all classes

Parameters
  • assigned_color (str, optional): Matplotlib color for the assigned peaks, by default "b"
  • unassigned_color (str, optional): Matplotlib color for the unassigned peaks, by default "r"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
def plot_mz_error(self, color='g'):
580    def plot_mz_error(self, color="g"):
581        """Plot mz error scatter plot of all classes
582
583        Parameters
584        ----------
585        color : str, optional
586            Matplotlib color, by default "g"
587
588        Returns
589        -------
590        ax : matplotlib.axes
591            Matplotlib axes object
592        """
593        ax = plt.gca()
594
595        mz_assigned = self.mz_exp_all()
596        mz_error = self.mz_error_all()
597
598        ax.scatter(mz_assigned, mz_error, c=color)
599
600        ax.set_xlabel("$\t{m/z}$", fontsize=12)
601        ax.set_ylabel("Error (ppm)", fontsize=12)
602        ax.tick_params(axis="both", which="major", labelsize=12)
603
604        ax.axes.spines["top"].set_visible(True)
605        ax.axes.spines["right"].set_visible(True)
606
607        ax.get_yaxis().set_visible(True)
608        ax.spines["left"].set_visible(True)
609
610        ax.set_xlim(self.min_max_mz)
611        ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
612
613        return ax

Plot mz error scatter plot of all classes

Parameters
  • color (str, optional): Matplotlib color, by default "g"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
def plot_mz_error_class(self, classe, color='g'):
615    def plot_mz_error_class(self, classe, color="g"):
616        """Plot mz error scatter plot of a specific class
617
618        Parameters
619        ----------
620        classe : str
621            Class name
622        color : str, optional
623            Matplotlib color, by default "g"
624
625        Returns
626        -------
627        ax : matplotlib.axes
628            Matplotlib axes object
629
630        """
631        if classe != Labels.unassigned:
632            ax = plt.gca()
633
634            abun_perc = self.abundance_count_percentile(classe)
635            mz_assigned = self.mz_exp(classe)
636            mz_error = self.mz_error(classe)
637
638            ax.scatter(mz_assigned, mz_error, c=color)
639
640            title = "%s, %.2f %%" % (classe, abun_perc)
641            ax.set_title(title)
642            ax.set_xlabel("$\t{m/z}$", fontsize=12)
643            ax.set_ylabel("Error (ppm)", fontsize=12)
644            ax.tick_params(axis="both", which="major", labelsize=12)
645
646            ax.axes.spines["top"].set_visible(True)
647            ax.axes.spines["right"].set_visible(True)
648
649            ax.get_yaxis().set_visible(True)
650            ax.spines["left"].set_visible(True)
651
652            ax.set_xlim(self.min_max_mz)
653            ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
654
655            return ax

Plot mz error scatter plot of a specific class

Parameters
  • classe (str): Class name
  • color (str, optional): Matplotlib color, by default "g"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
def plot_ms_class(self, classe, color='g'):
657    def plot_ms_class(self, classe, color="g"):
658        """Plot stick mass spectrum of a specific class
659
660        Parameters
661        ----------
662        classe : str
663            Class name
664        color : str, optional
665            Matplotlib color, by default "g"
666
667        Returns
668        -------
669        ax : matplotlib.axes
670            Matplotlib axes object
671
672        """
673        if classe != Labels.unassigned:
674            ax = plt.gca()
675
676            abun_perc = self.abundance_count_percentile(classe)
677            mz_assigned = self.mz_exp(classe)
678            abundance_assigned = self.abundance(classe)
679
680            for plot_obj in ax.stem(
681                mz_assigned, abundance_assigned, linefmt="-", markerfmt=" "
682            ):
683                plt.setp(plot_obj, "color", color, "linewidth", 2)
684
685            title = "%s, %.2f %%" % (classe, abun_perc)
686            ax.set_title(title)
687            ax.set_xlabel("$\t{m/z}$", fontsize=12)
688            ax.set_ylabel("Abundance", fontsize=12)
689            ax.tick_params(axis="both", which="major", labelsize=12)
690
691            ax.axes.spines["top"].set_visible(False)
692            ax.axes.spines["right"].set_visible(False)
693
694            ax.get_yaxis().set_visible(False)
695            ax.spines["left"].set_visible(False)
696
697            ax.set_xlim(self.min_max_mz)
698            ax.set_ylim(self.min_max_abundance)
699
700            return ax

Plot stick mass spectrum of a specific class

Parameters
  • classe (str): Class name
  • color (str, optional): Matplotlib color, by default "g"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
def plot_van_krevelen(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color='viridis'):
702    def plot_van_krevelen(
703        self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"
704    ):
705        """Plot Van Krevelen Diagram
706
707        Parameters
708        ----------
709        classe : str
710            Class name
711        max_hc : float, optional
712            Max H/C ratio, by default 2.5
713        max_oc : float, optional
714            Max O/C ratio, by default 2
715        ticks_number : int, optional
716            Number of ticks, by default 5
717        color : str, optional
718            Matplotlib color, by default "viridis"
719
720        Returns
721        -------
722        ax : matplotlib.axes
723            Matplotlib axes object
724        abun_perc : float
725            Class percentile of the relative abundance
726        """
727        if classe != Labels.unassigned:
728            # get data
729            abun_perc = self.abundance_count_percentile(classe)
730            hc = self.atoms_ratio(classe, "H", "C")
731            oc = self.atoms_ratio(classe, "O", "C")
732            abundance = self.abundance(classe)
733
734            # plot data
735            ax = plt.gca()
736
737            ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
738
739            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
740
741            title = "%s, %.2f %%" % (classe, abun_perc)
742            ax.set_title(title)
743            ax.set_xlabel("O/C", fontsize=16)
744            ax.set_ylabel("H/C", fontsize=16)
745            ax.tick_params(axis="both", which="major", labelsize=18)
746            ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
747            ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
748
749            # returns matplot axes obj and the class percentile of the relative abundance
750
751            return ax, abun_perc

Plot Van Krevelen Diagram

Parameters
  • classe (str): Class name
  • max_hc (float, optional): Max H/C ratio, by default 2.5
  • max_oc (float, optional): Max O/C ratio, by default 2
  • ticks_number (int, optional): Number of ticks, by default 5
  • color (str, optional): Matplotlib color, by default "viridis"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
  • abun_perc (float): Class percentile of the relative abundance
def plot_dbe_vs_carbon_number( self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color='viridis'):
753    def plot_dbe_vs_carbon_number(
754        self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"
755    ):
756        """Plot DBE vs Carbon Number
757
758        Parameters
759        ----------
760        classe : str
761            Class name
762        max_c : int, optional
763            Max Carbon Number, by default 50
764        max_dbe : int, optional
765            Max DBE, by default 40
766        dbe_incr : int, optional
767            DBE increment, by default 5
768        c_incr : int, optional
769            Carbon Number increment, by default 10
770        color : str, optional
771            Matplotlib color, by default "viridis"
772
773        Returns
774        -------
775        ax : matplotlib.axes
776            Matplotlib axes object
777        abun_perc : float
778            Class percentile of the relative abundance
779        """
780        if classe != Labels.unassigned:
781            # get data
782            abun_perc = self.abundance_count_percentile(classe)
783            carbon_number = self.carbon_number(classe)
784            dbe = self.dbe(classe)
785            abundance = self.abundance(classe)
786
787            # plot data
788            ax = plt.gca()
789
790            ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
791
792            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
793
794            title = "%s, %.2f %%" % (classe, abun_perc)
795            ax.set_title(title)
796            ax.set_xlabel("Carbon number", fontsize=16)
797            ax.set_ylabel("DBE", fontsize=16)
798            ax.tick_params(axis="both", which="major", labelsize=18)
799            ax.set_xticks(range(0, max_c, c_incr))
800            ax.set_yticks(range(0, max_dbe, dbe_incr))
801
802            # returns matplot axes obj and the class percentile of the relative abundance
803
804            return ax, abun_perc

Plot DBE vs Carbon Number

Parameters
  • classe (str): Class name
  • max_c (int, optional): Max Carbon Number, by default 50
  • max_dbe (int, optional): Max DBE, by default 40
  • dbe_incr (int, optional): DBE increment, by default 5
  • c_incr (int, optional): Carbon Number increment, by default 10
  • color (str, optional): Matplotlib color, by default "viridis"
Returns
  • ax (matplotlib.axes): Matplotlib axes object
  • abun_perc (float): Class percentile of the relative abundance
Inherited Members
collections.abc.Mapping
get
keys
items
values