corems.mass_spectrum.input.massList

View Source

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 12, 2019"
  3
  4import warnings
  5
  6from corems.encapsulation.constant import Atoms, Labels
  7from corems.mass_spectrum.factory.MassSpectrumClasses import (
  8    MassSpecCentroid,
  9    MassSpecProfile,
 10)
 11from corems.mass_spectrum.input.baseClass import MassListBaseClass
 12from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula
 13
 14
 15class ReadCoremsMasslist(MassListBaseClass):
 16    """
 17    The ReadCoremsMasslist object reads processed mass list data types
 18    and returns the mass spectrum obj with the molecular formula obj
 19
 20    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid**
 21    Please see MassListBaseClass for more details
 22
 23    """
 24
 25    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
 26        """
 27        Get the mass spectrum object from the processed mass list data.
 28
 29        Parameters
 30        ----------
 31        loadSettings : bool, optional
 32            Whether to load the settings for the mass spectrum. Default is True.
 33
 34        Returns
 35        -------
 36        MassSpecCentroid
 37            The mass spectrum object.
 38
 39        Raises
 40        ------
 41        ValueError
 42            If the input file is not a valid CoreMS file.
 43        """
 44
 45        dataframe = self.get_dataframe()
 46
 47        if not set(
 48            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
 49        ).issubset(dataframe.columns):
 50            raise ValueError(
 51                "%s it is not a valid CoreMS file" % str(self.file_location)
 52            )
 53
 54        self.check_columns(dataframe.columns)
 55
 56        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
 57
 58        polarity = dataframe["Ion Charge"].values[0]
 59
 60        output_parameters = self.get_output_parameters(polarity)
 61
 62        mass_spec_obj = MassSpecCentroid(
 63            dataframe.to_dict(orient="list"), output_parameters
 64        )
 65
 66        if loadSettings is True:
 67            self.load_settings(mass_spec_obj, output_parameters)
 68
 69        self.add_molecular_formula(mass_spec_obj, dataframe)
 70
 71        return mass_spec_obj
 72
 73    def add_molecular_formula(self, mass_spec_obj, dataframe):
 74        """
 75        Add molecular formula information to the mass spectrum object.
 76
 77        Parameters
 78        ----------
 79        mass_spec_obj : MassSpecCentroid
 80            The mass spectrum object to add the molecular formula to.
 81        dataframe : pandas.DataFrame
 82            The processed mass list data.
 83
 84        """
 85
 86        # check if is coreMS file
 87        if "Is Isotopologue" in dataframe:
 88            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
 89            dataframe = dataframe.reset_index(drop=True)
 90
 91            mz_exp_df = dataframe[Labels.mz].astype(float)
 92            formula_df = dataframe[
 93                dataframe.columns.intersection(Atoms.atoms_order)
 94            ].copy()
 95            formula_df.fillna(0, inplace=True)
 96            formula_df.replace(b"nan", 0, inplace=True)
 97
 98            ion_type_df = dataframe["Ion Type"]
 99            ion_charge_df = dataframe["Ion Charge"]
100            is_isotopologue_df = dataframe["Is Isotopologue"]
101            if "Adduct" in dataframe:
102                adduct_df = dataframe["Adduct"]
103            else:
104                adduct_df = None
105
106        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
107
108        for df_index, mz_exp in enumerate(mz_exp_df):
109            bad_mf = False
110            counts = 0
111
112            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
113
114            if "Is Isotopologue" in dataframe:
115                atoms = list(formula_df.columns.astype(str))
116                counts = list(formula_df.iloc[df_index].astype(int))
117
118                formula_dict = dict(zip(atoms, counts))
119
120                # Drop any atoms with 0 counts
121                formula_dict = {
122                    atom: formula_dict[atom]
123                    for atom in formula_dict
124                    if formula_dict[atom] > 0
125                }
126
127            if sum(counts) > 0:
128                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
129                if adduct_df is not None:
130                    adduct_atom = str(adduct_df[df_index])
131                    if adduct_atom == "None":
132                        adduct_atom = None
133                else:
134                    adduct_atom = None
135
136                # If not isotopologue, cast as MolecularFormula
137                if not bool(int(is_isotopologue_df[df_index])):
138                    mfobj = MolecularFormula(
139                        formula_dict,
140                        int(ion_charge_df[df_index]),
141                        mspeak_parent=mass_spec_obj[ms_peak_index],
142                        ion_type=ion_type,
143                        adduct_atom=adduct_atom,
144                    )
145
146                # if is isotopologue, recast as MolecularFormulaIsotopologue
147                if bool(int(is_isotopologue_df[df_index])):
148                    # First make a MolecularFormula object for the parent so we can get probabilities etc
149                    formula_list_parent = {}
150                    for atom in formula_dict:
151                        if atom in Atoms.isotopes.keys():
152                            formula_list_parent[atom] = formula_dict[atom]
153                        else:
154                            # remove any numbers from the atom name to cast as a mono-isotopic atom
155                            atom_mono = atom.strip("0123456789")
156                            if (
157                                atom_mono in Atoms.isotopes.keys()
158                                and atom_mono in formula_list_parent.keys()
159                            ):
160                                formula_list_parent[atom_mono] = (
161                                    formula_list_parent[atom_mono] + formula_dict[atom]
162                                )
163                            elif atom_mono in Atoms.isotopes.keys():
164                                formula_list_parent[atom_mono] = formula_dict[atom]
165                            else:
166                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
167                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
168                    mono_mfobj = MolecularFormula(
169                        formula_list_parent,
170                        int(ion_charge_df[df_index]),
171                        mspeak_parent=mass_spec_obj[mono_index],
172                        ion_type=ion_type,
173                        adduct_atom=adduct_atom,
174                    )
175
176                    # Next, generate isotopologues from the parent
177                    isos = list(
178                        mono_mfobj.isotopologues(
179                            min_abundance=mass_spec_obj.abundance.min()*0.01,
180                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
181                            dynamic_range=mass_spec_obj.dynamic_range,
182                        )
183                    )
184
185                    # Finally, find the isotopologue that matches the formula_dict
186                    matched_isos = []
187                    for iso in isos:
188                        # If match was already found, exit the loop
189                        if len(matched_isos) > 0:
190                            break
191                        else:
192                            # Check the atoms match
193                            if set(iso.atoms) == set(formula_dict.keys()):
194                                # Check the values of the atoms match
195                                if all(
196                                    [
197                                        iso[atom] == formula_dict[atom]
198                                        for atom in formula_dict
199                                    ]
200                                ):
201                                    matched_isos.append(iso)
202
203                    if len(matched_isos) == 0:
204                        #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190
205                        warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}")
206                        bad_mf = True
207                    else:
208                        bad_mf = False                   
209                        mfobj = matched_isos[0]
210
211                        # Add the mono isotopic index, confidence score and isotopologue similarity
212                        mfobj.mspeak_index_mono_isotopic = int(
213                            dataframe.iloc[df_index]["Mono Isotopic Index"]
214                        )
215                if not bad_mf:
216                    # Add the confidence score and isotopologue similarity and average MZ error score
217                    if "m/z Error Score" in dataframe:
218                        mfobj._mass_error_average_score = float(
219                            dataframe.iloc[df_index]["m/z Error Score"]
220                        )
221                    if "Confidence Score" in dataframe:
222                        mfobj._confidence_score = float(
223                            dataframe.iloc[df_index]["Confidence Score"]
224                        )
225                    if "Isotopologue Similarity" in dataframe:
226                        mfobj._isotopologue_similarity = float(
227                            dataframe.iloc[df_index]["Isotopologue Similarity"]
228                        )
229                    mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
230
231
232class ReadMassList(MassListBaseClass):
233    """
234    The ReadMassList object reads unprocessed mass list data types
235    and returns the mass spectrum object.
236
237    Parameters
238    ----------
239    MassListBaseClass : class
240        The base class for reading mass list data types.
241
242    Methods
243    -------
244    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
245
246    """
247
248    def get_mass_spectrum(
249        self,
250        polarity: int,
251        scan: int = 0,
252        auto_process: bool = True,
253        loadSettings: bool = True,
254    ):
255        """
256        Reads mass list data types and returns the mass spectrum object.
257
258        Parameters
259        ----------
260        polarity : int
261            The polarity of the mass spectrum (+1 or -1).
262        scan : int, optional
263            The scan number of the mass spectrum (default is 0).
264        auto_process : bool, optional
265            Flag indicating whether to automatically process the mass spectrum (default is True).
266        loadSettings : bool, optional
267            Flag indicating whether to load settings for the mass spectrum (default is True).
268
269        Returns
270        -------
271        mass_spec : MassSpecCentroid or MassSpecProfile
272            The mass spectrum object.
273
274        """
275
276        # delimiter = "  " or " " or  "," or "\t" etc
277
278        if self.isCentroid:
279            dataframe = self.get_dataframe()
280
281            self.check_columns(dataframe.columns)
282
283            self.clean_data_frame(dataframe)
284
285            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
286
287            output_parameters = self.get_output_parameters(polarity)
288
289            mass_spec = MassSpecCentroid(
290                dataframe.to_dict(orient="list"),
291                output_parameters,
292                auto_process=auto_process,
293            )
294
295            if loadSettings:
296                self.load_settings(mass_spec, output_parameters)
297
298            return mass_spec
299
300        else:
301            dataframe = self.get_dataframe()
302
303            self.check_columns(dataframe.columns)
304
305            output_parameters = self.get_output_parameters(polarity)
306
307            self.clean_data_frame(dataframe)
308
309            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
310
311            mass_spec = MassSpecProfile(
312                dataframe.to_dict(orient="list"),
313                output_parameters,
314                auto_process=auto_process,
315            )
316
317            if loadSettings:
318                self.load_settings(mass_spec, output_parameters)
319
320            return mass_spec
321
322
323class ReadBrukerXMLList(MassListBaseClass):
324    """
325    The ReadBrukerXMLList object reads Bruker XML objects
326    and returns the mass spectrum object.
327    See MassListBaseClass for details
328
329    Parameters
330    ----------
331    MassListBaseClass : class
332        The base class for reading mass list data types and returning the mass spectrum object.
333
334    Methods
335    -------
336    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
337
338    """
339
340    def get_mass_spectrum(
341        self,
342        polarity: bool = None,
343        scan: int = 0,
344        auto_process: bool = True,
345        loadSettings: bool = True,
346    ):
347        """
348        Reads mass list data types and returns the mass spectrum object.
349
350        Parameters
351        ----------
352        polarity : bool, optional
353            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
354        scan : int, optional
355            The scan number of the mass spectrum. Default is 0.
356        auto_process : bool, optional
357            Whether to automatically process the mass spectrum. Default is True.
358        loadSettings : bool, optional
359            Whether to load the settings for the mass spectrum. Default is True.
360
361        Returns
362        -------
363        mass_spec : MassSpecCentroid
364            The mass spectrum object representing the centroided mass spectrum.
365        """
366        # delimiter = "  " or " " or  "," or "\t" etc
367
368        if polarity == None:
369            polarity = self.get_xml_polarity()
370        dataframe = self.get_dataframe()
371
372        self.check_columns(dataframe.columns)
373
374        self.clean_data_frame(dataframe)
375
376        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
377
378        output_parameters = self.get_output_parameters(polarity)
379
380        mass_spec = MassSpecCentroid(
381            dataframe.to_dict(orient="list"),
382            output_parameters,
383            auto_process=auto_process,
384        )
385
386        if loadSettings:
387            self.load_settings(mass_spec, output_parameters)
388
389        return mass_spec

class ReadCoremsMasslist(corems.mass_spectrum.input.baseClass.MassListBaseClass): View Source

 16class ReadCoremsMasslist(MassListBaseClass):
 17    """
 18    The ReadCoremsMasslist object reads processed mass list data types
 19    and returns the mass spectrum obj with the molecular formula obj
 20
 21    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid**
 22    Please see MassListBaseClass for more details
 23
 24    """
 25
 26    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
 27        """
 28        Get the mass spectrum object from the processed mass list data.
 29
 30        Parameters
 31        ----------
 32        loadSettings : bool, optional
 33            Whether to load the settings for the mass spectrum. Default is True.
 34
 35        Returns
 36        -------
 37        MassSpecCentroid
 38            The mass spectrum object.
 39
 40        Raises
 41        ------
 42        ValueError
 43            If the input file is not a valid CoreMS file.
 44        """
 45
 46        dataframe = self.get_dataframe()
 47
 48        if not set(
 49            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
 50        ).issubset(dataframe.columns):
 51            raise ValueError(
 52                "%s it is not a valid CoreMS file" % str(self.file_location)
 53            )
 54
 55        self.check_columns(dataframe.columns)
 56
 57        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
 58
 59        polarity = dataframe["Ion Charge"].values[0]
 60
 61        output_parameters = self.get_output_parameters(polarity)
 62
 63        mass_spec_obj = MassSpecCentroid(
 64            dataframe.to_dict(orient="list"), output_parameters
 65        )
 66
 67        if loadSettings is True:
 68            self.load_settings(mass_spec_obj, output_parameters)
 69
 70        self.add_molecular_formula(mass_spec_obj, dataframe)
 71
 72        return mass_spec_obj
 73
 74    def add_molecular_formula(self, mass_spec_obj, dataframe):
 75        """
 76        Add molecular formula information to the mass spectrum object.
 77
 78        Parameters
 79        ----------
 80        mass_spec_obj : MassSpecCentroid
 81            The mass spectrum object to add the molecular formula to.
 82        dataframe : pandas.DataFrame
 83            The processed mass list data.
 84
 85        """
 86
 87        # check if is coreMS file
 88        if "Is Isotopologue" in dataframe:
 89            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
 90            dataframe = dataframe.reset_index(drop=True)
 91
 92            mz_exp_df = dataframe[Labels.mz].astype(float)
 93            formula_df = dataframe[
 94                dataframe.columns.intersection(Atoms.atoms_order)
 95            ].copy()
 96            formula_df.fillna(0, inplace=True)
 97            formula_df.replace(b"nan", 0, inplace=True)
 98
 99            ion_type_df = dataframe["Ion Type"]
100            ion_charge_df = dataframe["Ion Charge"]
101            is_isotopologue_df = dataframe["Is Isotopologue"]
102            if "Adduct" in dataframe:
103                adduct_df = dataframe["Adduct"]
104            else:
105                adduct_df = None
106
107        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
108
109        for df_index, mz_exp in enumerate(mz_exp_df):
110            bad_mf = False
111            counts = 0
112
113            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
114
115            if "Is Isotopologue" in dataframe:
116                atoms = list(formula_df.columns.astype(str))
117                counts = list(formula_df.iloc[df_index].astype(int))
118
119                formula_dict = dict(zip(atoms, counts))
120
121                # Drop any atoms with 0 counts
122                formula_dict = {
123                    atom: formula_dict[atom]
124                    for atom in formula_dict
125                    if formula_dict[atom] > 0
126                }
127
128            if sum(counts) > 0:
129                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
130                if adduct_df is not None:
131                    adduct_atom = str(adduct_df[df_index])
132                    if adduct_atom == "None":
133                        adduct_atom = None
134                else:
135                    adduct_atom = None
136
137                # If not isotopologue, cast as MolecularFormula
138                if not bool(int(is_isotopologue_df[df_index])):
139                    mfobj = MolecularFormula(
140                        formula_dict,
141                        int(ion_charge_df[df_index]),
142                        mspeak_parent=mass_spec_obj[ms_peak_index],
143                        ion_type=ion_type,
144                        adduct_atom=adduct_atom,
145                    )
146
147                # if is isotopologue, recast as MolecularFormulaIsotopologue
148                if bool(int(is_isotopologue_df[df_index])):
149                    # First make a MolecularFormula object for the parent so we can get probabilities etc
150                    formula_list_parent = {}
151                    for atom in formula_dict:
152                        if atom in Atoms.isotopes.keys():
153                            formula_list_parent[atom] = formula_dict[atom]
154                        else:
155                            # remove any numbers from the atom name to cast as a mono-isotopic atom
156                            atom_mono = atom.strip("0123456789")
157                            if (
158                                atom_mono in Atoms.isotopes.keys()
159                                and atom_mono in formula_list_parent.keys()
160                            ):
161                                formula_list_parent[atom_mono] = (
162                                    formula_list_parent[atom_mono] + formula_dict[atom]
163                                )
164                            elif atom_mono in Atoms.isotopes.keys():
165                                formula_list_parent[atom_mono] = formula_dict[atom]
166                            else:
167                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
168                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
169                    mono_mfobj = MolecularFormula(
170                        formula_list_parent,
171                        int(ion_charge_df[df_index]),
172                        mspeak_parent=mass_spec_obj[mono_index],
173                        ion_type=ion_type,
174                        adduct_atom=adduct_atom,
175                    )
176
177                    # Next, generate isotopologues from the parent
178                    isos = list(
179                        mono_mfobj.isotopologues(
180                            min_abundance=mass_spec_obj.abundance.min()*0.01,
181                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
182                            dynamic_range=mass_spec_obj.dynamic_range,
183                        )
184                    )
185
186                    # Finally, find the isotopologue that matches the formula_dict
187                    matched_isos = []
188                    for iso in isos:
189                        # If match was already found, exit the loop
190                        if len(matched_isos) > 0:
191                            break
192                        else:
193                            # Check the atoms match
194                            if set(iso.atoms) == set(formula_dict.keys()):
195                                # Check the values of the atoms match
196                                if all(
197                                    [
198                                        iso[atom] == formula_dict[atom]
199                                        for atom in formula_dict
200                                    ]
201                                ):
202                                    matched_isos.append(iso)
203
204                    if len(matched_isos) == 0:
205                        #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190
206                        warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}")
207                        bad_mf = True
208                    else:
209                        bad_mf = False                   
210                        mfobj = matched_isos[0]
211
212                        # Add the mono isotopic index, confidence score and isotopologue similarity
213                        mfobj.mspeak_index_mono_isotopic = int(
214                            dataframe.iloc[df_index]["Mono Isotopic Index"]
215                        )
216                if not bad_mf:
217                    # Add the confidence score and isotopologue similarity and average MZ error score
218                    if "m/z Error Score" in dataframe:
219                        mfobj._mass_error_average_score = float(
220                            dataframe.iloc[df_index]["m/z Error Score"]
221                        )
222                    if "Confidence Score" in dataframe:
223                        mfobj._confidence_score = float(
224                            dataframe.iloc[df_index]["Confidence Score"]
225                        )
226                    if "Isotopologue Similarity" in dataframe:
227                        mfobj._isotopologue_similarity = float(
228                            dataframe.iloc[df_index]["Isotopologue Similarity"]
229                        )
230                    mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)

The ReadCoremsMasslist object reads processed mass list data types and returns the mass spectrum obj with the molecular formula obj

Only available for centroid mass spectrum type: it will ignore the parameter isCentroid Please see MassListBaseClass for more details

def get_mass_spectrum( self, loadSettings: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid: View Source

26    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
27        """
28        Get the mass spectrum object from the processed mass list data.
29
30        Parameters
31        ----------
32        loadSettings : bool, optional
33            Whether to load the settings for the mass spectrum. Default is True.
34
35        Returns
36        -------
37        MassSpecCentroid
38            The mass spectrum object.
39
40        Raises
41        ------
42        ValueError
43            If the input file is not a valid CoreMS file.
44        """
45
46        dataframe = self.get_dataframe()
47
48        if not set(
49            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
50        ).issubset(dataframe.columns):
51            raise ValueError(
52                "%s it is not a valid CoreMS file" % str(self.file_location)
53            )
54
55        self.check_columns(dataframe.columns)
56
57        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
58
59        polarity = dataframe["Ion Charge"].values[0]
60
61        output_parameters = self.get_output_parameters(polarity)
62
63        mass_spec_obj = MassSpecCentroid(
64            dataframe.to_dict(orient="list"), output_parameters
65        )
66
67        if loadSettings is True:
68            self.load_settings(mass_spec_obj, output_parameters)
69
70        self.add_molecular_formula(mass_spec_obj, dataframe)
71
72        return mass_spec_obj

Get the mass spectrum object from the processed mass list data.

Parameters

loadSettings (bool, optional): Whether to load the settings for the mass spectrum. Default is True.

Returns

MassSpecCentroid: The mass spectrum object.

Raises

ValueError: If the input file is not a valid CoreMS file.

def add_molecular_formula(self, mass_spec_obj, dataframe): View Source

 74    def add_molecular_formula(self, mass_spec_obj, dataframe):
 75        """
 76        Add molecular formula information to the mass spectrum object.
 77
 78        Parameters
 79        ----------
 80        mass_spec_obj : MassSpecCentroid
 81            The mass spectrum object to add the molecular formula to.
 82        dataframe : pandas.DataFrame
 83            The processed mass list data.
 84
 85        """
 86
 87        # check if is coreMS file
 88        if "Is Isotopologue" in dataframe:
 89            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
 90            dataframe = dataframe.reset_index(drop=True)
 91
 92            mz_exp_df = dataframe[Labels.mz].astype(float)
 93            formula_df = dataframe[
 94                dataframe.columns.intersection(Atoms.atoms_order)
 95            ].copy()
 96            formula_df.fillna(0, inplace=True)
 97            formula_df.replace(b"nan", 0, inplace=True)
 98
 99            ion_type_df = dataframe["Ion Type"]
100            ion_charge_df = dataframe["Ion Charge"]
101            is_isotopologue_df = dataframe["Is Isotopologue"]
102            if "Adduct" in dataframe:
103                adduct_df = dataframe["Adduct"]
104            else:
105                adduct_df = None
106
107        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
108
109        for df_index, mz_exp in enumerate(mz_exp_df):
110            bad_mf = False
111            counts = 0
112
113            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
114
115            if "Is Isotopologue" in dataframe:
116                atoms = list(formula_df.columns.astype(str))
117                counts = list(formula_df.iloc[df_index].astype(int))
118
119                formula_dict = dict(zip(atoms, counts))
120
121                # Drop any atoms with 0 counts
122                formula_dict = {
123                    atom: formula_dict[atom]
124                    for atom in formula_dict
125                    if formula_dict[atom] > 0
126                }
127
128            if sum(counts) > 0:
129                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
130                if adduct_df is not None:
131                    adduct_atom = str(adduct_df[df_index])
132                    if adduct_atom == "None":
133                        adduct_atom = None
134                else:
135                    adduct_atom = None
136
137                # If not isotopologue, cast as MolecularFormula
138                if not bool(int(is_isotopologue_df[df_index])):
139                    mfobj = MolecularFormula(
140                        formula_dict,
141                        int(ion_charge_df[df_index]),
142                        mspeak_parent=mass_spec_obj[ms_peak_index],
143                        ion_type=ion_type,
144                        adduct_atom=adduct_atom,
145                    )
146
147                # if is isotopologue, recast as MolecularFormulaIsotopologue
148                if bool(int(is_isotopologue_df[df_index])):
149                    # First make a MolecularFormula object for the parent so we can get probabilities etc
150                    formula_list_parent = {}
151                    for atom in formula_dict:
152                        if atom in Atoms.isotopes.keys():
153                            formula_list_parent[atom] = formula_dict[atom]
154                        else:
155                            # remove any numbers from the atom name to cast as a mono-isotopic atom
156                            atom_mono = atom.strip("0123456789")
157                            if (
158                                atom_mono in Atoms.isotopes.keys()
159                                and atom_mono in formula_list_parent.keys()
160                            ):
161                                formula_list_parent[atom_mono] = (
162                                    formula_list_parent[atom_mono] + formula_dict[atom]
163                                )
164                            elif atom_mono in Atoms.isotopes.keys():
165                                formula_list_parent[atom_mono] = formula_dict[atom]
166                            else:
167                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
168                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
169                    mono_mfobj = MolecularFormula(
170                        formula_list_parent,
171                        int(ion_charge_df[df_index]),
172                        mspeak_parent=mass_spec_obj[mono_index],
173                        ion_type=ion_type,
174                        adduct_atom=adduct_atom,
175                    )
176
177                    # Next, generate isotopologues from the parent
178                    isos = list(
179                        mono_mfobj.isotopologues(
180                            min_abundance=mass_spec_obj.abundance.min()*0.01,
181                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
182                            dynamic_range=mass_spec_obj.dynamic_range,
183                        )
184                    )
185
186                    # Finally, find the isotopologue that matches the formula_dict
187                    matched_isos = []
188                    for iso in isos:
189                        # If match was already found, exit the loop
190                        if len(matched_isos) > 0:
191                            break
192                        else:
193                            # Check the atoms match
194                            if set(iso.atoms) == set(formula_dict.keys()):
195                                # Check the values of the atoms match
196                                if all(
197                                    [
198                                        iso[atom] == formula_dict[atom]
199                                        for atom in formula_dict
200                                    ]
201                                ):
202                                    matched_isos.append(iso)
203
204                    if len(matched_isos) == 0:
205                        #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190
206                        warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}")
207                        bad_mf = True
208                    else:
209                        bad_mf = False                   
210                        mfobj = matched_isos[0]
211
212                        # Add the mono isotopic index, confidence score and isotopologue similarity
213                        mfobj.mspeak_index_mono_isotopic = int(
214                            dataframe.iloc[df_index]["Mono Isotopic Index"]
215                        )
216                if not bad_mf:
217                    # Add the confidence score and isotopologue similarity and average MZ error score
218                    if "m/z Error Score" in dataframe:
219                        mfobj._mass_error_average_score = float(
220                            dataframe.iloc[df_index]["m/z Error Score"]
221                        )
222                    if "Confidence Score" in dataframe:
223                        mfobj._confidence_score = float(
224                            dataframe.iloc[df_index]["Confidence Score"]
225                        )
226                    if "Isotopologue Similarity" in dataframe:
227                        mfobj._isotopologue_similarity = float(
228                            dataframe.iloc[df_index]["Isotopologue Similarity"]
229                        )
230                    mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)

Add molecular formula information to the mass spectrum object.

Parameters

mass_spec_obj (MassSpecCentroid): The mass spectrum object to add the molecular formula to.
dataframe (pandas.DataFrame): The processed mass list data.

Inherited Members

corems.mass_spectrum.input.baseClass.MassListBaseClass: MassListBaseClass; file_location; header_lines; isCentroid; isThermoProfile; headerless; analyzer; instrument_label; sample_name; parameters; set_parameter_from_toml; set_parameter_from_json; data_type; delimiter; encoding_detector; set_data_type; get_dataframe; load_settings; get_output_parameters; clean_data_frame; check_columns; read_xml_peaks; get_xml_polarity

class ReadMassList(corems.mass_spectrum.input.baseClass.MassListBaseClass): View Source

233class ReadMassList(MassListBaseClass):
234    """
235    The ReadMassList object reads unprocessed mass list data types
236    and returns the mass spectrum object.
237
238    Parameters
239    ----------
240    MassListBaseClass : class
241        The base class for reading mass list data types.
242
243    Methods
244    -------
245    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
246
247    """
248
249    def get_mass_spectrum(
250        self,
251        polarity: int,
252        scan: int = 0,
253        auto_process: bool = True,
254        loadSettings: bool = True,
255    ):
256        """
257        Reads mass list data types and returns the mass spectrum object.
258
259        Parameters
260        ----------
261        polarity : int
262            The polarity of the mass spectrum (+1 or -1).
263        scan : int, optional
264            The scan number of the mass spectrum (default is 0).
265        auto_process : bool, optional
266            Flag indicating whether to automatically process the mass spectrum (default is True).
267        loadSettings : bool, optional
268            Flag indicating whether to load settings for the mass spectrum (default is True).
269
270        Returns
271        -------
272        mass_spec : MassSpecCentroid or MassSpecProfile
273            The mass spectrum object.
274
275        """
276
277        # delimiter = "  " or " " or  "," or "\t" etc
278
279        if self.isCentroid:
280            dataframe = self.get_dataframe()
281
282            self.check_columns(dataframe.columns)
283
284            self.clean_data_frame(dataframe)
285
286            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
287
288            output_parameters = self.get_output_parameters(polarity)
289
290            mass_spec = MassSpecCentroid(
291                dataframe.to_dict(orient="list"),
292                output_parameters,
293                auto_process=auto_process,
294            )
295
296            if loadSettings:
297                self.load_settings(mass_spec, output_parameters)
298
299            return mass_spec
300
301        else:
302            dataframe = self.get_dataframe()
303
304            self.check_columns(dataframe.columns)
305
306            output_parameters = self.get_output_parameters(polarity)
307
308            self.clean_data_frame(dataframe)
309
310            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
311
312            mass_spec = MassSpecProfile(
313                dataframe.to_dict(orient="list"),
314                output_parameters,
315                auto_process=auto_process,
316            )
317
318            if loadSettings:
319                self.load_settings(mass_spec, output_parameters)
320
321            return mass_spec

The ReadMassList object reads unprocessed mass list data types and returns the mass spectrum object.

Parameters

MassListBaseClass (class): The base class for reading mass list data types.

Methods

get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.

def get_mass_spectrum( self, polarity: int, scan: int = 0, auto_process: bool = True, loadSettings: bool = True): View Source

249    def get_mass_spectrum(
250        self,
251        polarity: int,
252        scan: int = 0,
253        auto_process: bool = True,
254        loadSettings: bool = True,
255    ):
256        """
257        Reads mass list data types and returns the mass spectrum object.
258
259        Parameters
260        ----------
261        polarity : int
262            The polarity of the mass spectrum (+1 or -1).
263        scan : int, optional
264            The scan number of the mass spectrum (default is 0).
265        auto_process : bool, optional
266            Flag indicating whether to automatically process the mass spectrum (default is True).
267        loadSettings : bool, optional
268            Flag indicating whether to load settings for the mass spectrum (default is True).
269
270        Returns
271        -------
272        mass_spec : MassSpecCentroid or MassSpecProfile
273            The mass spectrum object.
274
275        """
276
277        # delimiter = "  " or " " or  "," or "\t" etc
278
279        if self.isCentroid:
280            dataframe = self.get_dataframe()
281
282            self.check_columns(dataframe.columns)
283
284            self.clean_data_frame(dataframe)
285
286            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
287
288            output_parameters = self.get_output_parameters(polarity)
289
290            mass_spec = MassSpecCentroid(
291                dataframe.to_dict(orient="list"),
292                output_parameters,
293                auto_process=auto_process,
294            )
295
296            if loadSettings:
297                self.load_settings(mass_spec, output_parameters)
298
299            return mass_spec
300
301        else:
302            dataframe = self.get_dataframe()
303
304            self.check_columns(dataframe.columns)
305
306            output_parameters = self.get_output_parameters(polarity)
307
308            self.clean_data_frame(dataframe)
309
310            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
311
312            mass_spec = MassSpecProfile(
313                dataframe.to_dict(orient="list"),
314                output_parameters,
315                auto_process=auto_process,
316            )
317
318            if loadSettings:
319                self.load_settings(mass_spec, output_parameters)
320
321            return mass_spec

Reads mass list data types and returns the mass spectrum object.

Parameters

polarity (int): The polarity of the mass spectrum (+1 or -1).
scan (int, optional): The scan number of the mass spectrum (default is 0).
auto_process (bool, optional): Flag indicating whether to automatically process the mass spectrum (default is True).
loadSettings (bool, optional): Flag indicating whether to load settings for the mass spectrum (default is True).

Returns

mass_spec (MassSpecCentroid or MassSpecProfile): The mass spectrum object.

Inherited Members

corems.mass_spectrum.input.baseClass.MassListBaseClass: MassListBaseClass; file_location; header_lines; isCentroid; isThermoProfile; headerless; analyzer; instrument_label; sample_name; parameters; set_parameter_from_toml; set_parameter_from_json; data_type; delimiter; encoding_detector; set_data_type; get_dataframe; load_settings; get_output_parameters; clean_data_frame; check_columns; read_xml_peaks; get_xml_polarity

class ReadBrukerXMLList(corems.mass_spectrum.input.baseClass.MassListBaseClass): View Source

324class ReadBrukerXMLList(MassListBaseClass):
325    """
326    The ReadBrukerXMLList object reads Bruker XML objects
327    and returns the mass spectrum object.
328    See MassListBaseClass for details
329
330    Parameters
331    ----------
332    MassListBaseClass : class
333        The base class for reading mass list data types and returning the mass spectrum object.
334
335    Methods
336    -------
337    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
338
339    """
340
341    def get_mass_spectrum(
342        self,
343        polarity: bool = None,
344        scan: int = 0,
345        auto_process: bool = True,
346        loadSettings: bool = True,
347    ):
348        """
349        Reads mass list data types and returns the mass spectrum object.
350
351        Parameters
352        ----------
353        polarity : bool, optional
354            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
355        scan : int, optional
356            The scan number of the mass spectrum. Default is 0.
357        auto_process : bool, optional
358            Whether to automatically process the mass spectrum. Default is True.
359        loadSettings : bool, optional
360            Whether to load the settings for the mass spectrum. Default is True.
361
362        Returns
363        -------
364        mass_spec : MassSpecCentroid
365            The mass spectrum object representing the centroided mass spectrum.
366        """
367        # delimiter = "  " or " " or  "," or "\t" etc
368
369        if polarity == None:
370            polarity = self.get_xml_polarity()
371        dataframe = self.get_dataframe()
372
373        self.check_columns(dataframe.columns)
374
375        self.clean_data_frame(dataframe)
376
377        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
378
379        output_parameters = self.get_output_parameters(polarity)
380
381        mass_spec = MassSpecCentroid(
382            dataframe.to_dict(orient="list"),
383            output_parameters,
384            auto_process=auto_process,
385        )
386
387        if loadSettings:
388            self.load_settings(mass_spec, output_parameters)
389
390        return mass_spec

The ReadBrukerXMLList object reads Bruker XML objects and returns the mass spectrum object. See MassListBaseClass for details

Parameters

MassListBaseClass (class): The base class for reading mass list data types and returning the mass spectrum object.

Methods

get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.

def get_mass_spectrum( self, polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True): View Source

341    def get_mass_spectrum(
342        self,
343        polarity: bool = None,
344        scan: int = 0,
345        auto_process: bool = True,
346        loadSettings: bool = True,
347    ):
348        """
349        Reads mass list data types and returns the mass spectrum object.
350
351        Parameters
352        ----------
353        polarity : bool, optional
354            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
355        scan : int, optional
356            The scan number of the mass spectrum. Default is 0.
357        auto_process : bool, optional
358            Whether to automatically process the mass spectrum. Default is True.
359        loadSettings : bool, optional
360            Whether to load the settings for the mass spectrum. Default is True.
361
362        Returns
363        -------
364        mass_spec : MassSpecCentroid
365            The mass spectrum object representing the centroided mass spectrum.
366        """
367        # delimiter = "  " or " " or  "," or "\t" etc
368
369        if polarity == None:
370            polarity = self.get_xml_polarity()
371        dataframe = self.get_dataframe()
372
373        self.check_columns(dataframe.columns)
374
375        self.clean_data_frame(dataframe)
376
377        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
378
379        output_parameters = self.get_output_parameters(polarity)
380
381        mass_spec = MassSpecCentroid(
382            dataframe.to_dict(orient="list"),
383            output_parameters,
384            auto_process=auto_process,
385        )
386
387        if loadSettings:
388            self.load_settings(mass_spec, output_parameters)
389
390        return mass_spec

Reads mass list data types and returns the mass spectrum object.

Parameters

polarity (bool, optional): The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
scan (int, optional): The scan number of the mass spectrum. Default is 0.
auto_process (bool, optional): Whether to automatically process the mass spectrum. Default is True.
loadSettings (bool, optional): Whether to load the settings for the mass spectrum. Default is True.

Returns

mass_spec (MassSpecCentroid): The mass spectrum object representing the centroided mass spectrum.

Inherited Members

corems.mass_spectrum.input.baseClass.MassListBaseClass: MassListBaseClass; file_location; header_lines; isCentroid; isThermoProfile; headerless; analyzer; instrument_label; sample_name; parameters; set_parameter_from_toml; set_parameter_from_json; data_type; delimiter; encoding_detector; set_data_type; get_dataframe; load_settings; get_output_parameters; clean_data_frame; check_columns; read_xml_peaks; get_xml_polarity