corems.mass_spectrum.output.export

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Nov 11, 2019"
  3
  4import json
  5from datetime import datetime, timezone
  6from pathlib import Path
  7from threading import Thread
  8
  9import h5py
 10import toml
 11from numpy import NaN, empty
 12from pandas import DataFrame
 13
 14from corems.encapsulation.constant import Atoms, Labels #Labels is accessed in the eval() function
 15from corems.encapsulation.output import parameter_to_dict
 16from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecfromFreq
 17
 18
 19class HighResMassSpecExport(Thread):
 20    """A class for exporting high-resolution mass spectra.
 21
 22    Parameters
 23    ----------
 24    out_file_path : str
 25        The output file path.
 26    mass_spectrum : MassSpectrum
 27        The mass spectrum to export.
 28    output_type : str, optional
 29        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
 30
 31    Attributes
 32    ----------
 33    output_file : Path
 34        The output file path.
 35    output_type : str
 36        The type of output file.
 37    mass_spectrum : MassSpectrum
 38        The mass spectrum to export.
 39    atoms_order_list : list
 40        The list of assigned atoms in the order specified by Atoms.atoms_order list.
 41    columns_label : list
 42        The column labels in order.
 43
 44    Methods
 45    -------
 46    * save().
 47        Save the mass spectrum data to the output file.
 48    * run().
 49        Run the export process.
 50    * get_pandas_df().
 51        Returns the mass spectrum data as a pandas DataFrame.
 52    * write_settings(output_path, mass_spectrum).
 53        Writes the settings of the mass spectrum to a JSON file.
 54    * to_pandas(write_metadata=True).
 55        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
 56    * to_excel(write_metadata=True).
 57        Exports the mass spectrum data to an Excel file.
 58    * to_csv(write_metadata=True).
 59        Exports the mass spectrum data to a CSV file.
 60    * to_json().
 61        Exports the mass spectrum data to a JSON string.
 62    * to_hdf().
 63        Exports the mass spectrum data to an HDF5 file.
 64    * parameters_to_toml().
 65        Converts the mass spectrum parameters to a TOML string.
 66    * parameters_to_json().
 67        Converts the mass spectrum parameters to a JSON string.
 68    * get_mass_spec_attrs(mass_spectrum).
 69        Returns the mass spectrum attributes as a dictionary.
 70    * get_all_used_atoms_in_order(mass_spectrum).
 71        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
 72    * list_dict_to_list(mass_spectrum, is_hdf5=False).
 73        Returns the mass spectrum data as a list of dictionaries.
 74    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
 75        Returns the mass spectrum data as a list of dictionaries.
 76
 77    """
 78
 79    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
 80        Thread.__init__(self)
 81
 82        self.output_file = Path(out_file_path)
 83
 84        # 'excel', 'csv' or 'pandas'
 85        self.output_type = output_type
 86
 87        self.mass_spectrum = mass_spectrum
 88
 89        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
 90        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
 91
 92        self._init_columns()
 93
 94    def _init_columns(self):
 95        """Initialize the columns for the mass spectrum output."""
 96        # column labels in order
 97        self.columns_label = [
 98            "Index",
 99            "m/z",
100            "Calibrated m/z",
101            "Calculated m/z",
102            "Peak Height",
103            "Peak Area",
104            "Resolving Power",
105            "S/N",
106            "Ion Charge",
107            "m/z Error (ppm)",
108            "m/z Error Score",
109            "Isotopologue Similarity",
110            "Confidence Score",
111            "DBE",
112            "O/C",
113            "H/C",
114            "Heteroatom Class",
115            "Ion Type",
116            "Adduct",
117            "Is Isotopologue",
118            "Mono Isotopic Index",
119            "Molecular Formula",
120        ]
121
122    @property
123    def output_type(self):
124        """Returns the output type of the mass spectrum."""
125        return self._output_type
126
127    @output_type.setter
128    def output_type(self, output_type):
129        output_types = ["excel", "csv", "pandas", "hdf5"]
130        if output_type in output_types:
131            self._output_type = output_type
132        else:
133            raise TypeError(
134                'Supported types are "excel", "csv" or "pandas", %s entered'
135                % output_type
136            )
137
138    def save(self):
139        """Save the mass spectrum data to the output file.
140
141        Raises
142        ------
143        ValueError
144            If the output type is not supported.
145        """
146
147        if self.output_type == "excel":
148            self.to_excel()
149        elif self.output_type == "csv":
150            self.to_csv()
151        elif self.output_type == "pandas":
152            self.to_pandas()
153        elif self.output_type == "hdf5":
154            self.to_hdf()
155        else:
156            raise ValueError(
157                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
158                % self.output_type
159            )
160
161    def run(self):
162        """Run the export process.
163
164        This method is called when the thread starts.
165        It calls the save method to perform the export."""
166        self.save()
167
168    def get_pandas_df(self, additional_columns=None):
169        """Returns the mass spectrum data as a pandas DataFrame.
170
171        Parameters
172        ----------
173        additional_columns : list, optional
174            Additional columns to include in the DataFrame. Defaults to None.
175            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
176
177        Returns
178        -------
179        DataFrame
180            The mass spectrum data as a pandas DataFrame.
181        """
182        if additional_columns is not None:
183            possible_additional_columns = [
184                "Aromaticity Index",
185                "NOSC",
186                "Aromaticity Index (modified)",
187            ]
188            if additional_columns:
189                for column in additional_columns:
190                    if column not in possible_additional_columns:
191                        raise ValueError("Invalid additional column: %s" % column)
192            columns = (
193                self.columns_label
194                + additional_columns
195                + self.get_all_used_atoms_in_order(self.mass_spectrum)
196            )
197        else:
198            columns = self.columns_label + self.get_all_used_atoms_in_order(
199                self.mass_spectrum
200            )
201        dict_data_list = self.get_list_dict_data(
202            self.mass_spectrum, additional_columns=additional_columns
203        )
204        df = DataFrame(dict_data_list, columns=columns)
205        df.name = self.output_file
206        return df
207
208    def write_settings(self, output_path, mass_spectrum):
209        """Writes the settings of the mass spectrum to a JSON file.
210
211        Parameters
212        ----------
213        output_path : str
214            The output file path.
215        mass_spectrum : MassSpectrum
216            The mass spectrum to export.
217        """
218
219        import json
220
221        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
222
223        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
224        dict_setting["analyzer"] = mass_spectrum.analyzer
225        dict_setting["instrument_label"] = mass_spectrum.instrument_label
226        dict_setting["sample_name"] = mass_spectrum.sample_name
227
228        with open(
229            output_path.with_suffix(".json"),
230            "w",
231            encoding="utf8",
232        ) as outfile:
233            output = json.dumps(
234                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
235            )
236            outfile.write(output)
237
238    def to_pandas(self, write_metadata=True):
239        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
240
241        Parameters
242        ----------
243        write_metadata : bool, optional
244            Whether to write the metadata to a JSON file. Defaults to True.
245        """
246
247        columns = self.columns_label + self.get_all_used_atoms_in_order(
248            self.mass_spectrum
249        )
250
251        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
252
253        df = DataFrame(dict_data_list, columns=columns)
254
255        df.to_pickle(self.output_file.with_suffix(".pkl"))
256
257        if write_metadata:
258            self.write_settings(self.output_file, self.mass_spectrum)
259
260    def to_excel(self, write_metadata=True):
261        """Exports the mass spectrum data to an Excel file.
262
263        Parameters
264        ----------
265        write_metadata : bool, optional
266            Whether to write the metadata to a JSON file. Defaults to True.
267        """
268
269        columns = self.columns_label + self.get_all_used_atoms_in_order(
270            self.mass_spectrum
271        )
272
273        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
274
275        df = DataFrame(dict_data_list, columns=columns)
276
277        df.to_excel(self.output_file.with_suffix(".xlsx"))
278
279        if write_metadata:
280            self.write_settings(self.output_file, self.mass_spectrum)
281
282    def to_csv(self, write_metadata=True):
283        """Exports the mass spectrum data to a CSV file.
284
285        Parameters
286        ----------
287        write_metadata : bool, optional
288            Whether to write the metadata to a JSON file. Defaults to True.
289        """
290
291        columns = self.columns_label + self.get_all_used_atoms_in_order(
292            self.mass_spectrum
293        )
294
295        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
296
297        import csv
298
299        try:
300            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
301                writer = csv.DictWriter(csvfile, fieldnames=columns)
302                writer.writeheader()
303                for data in dict_data_list:
304                    writer.writerow(data)
305            if write_metadata:
306                self.write_settings(self.output_file, self.mass_spectrum)
307
308        except IOError as ioerror:
309            print(ioerror)
310
311    def to_json(self):
312        """Exports the mass spectrum data to a JSON string."""
313
314        columns = self.columns_label + self.get_all_used_atoms_in_order(
315            self.mass_spectrum
316        )
317
318        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
319
320        df = DataFrame(dict_data_list, columns=columns)
321
322        # for key, values in dict_data.items():
323        #    if not values: dict_data[key] = NaN
324
325        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
326        return df.to_json(orient="records")
327
328    def add_mass_spectrum_to_hdf5(
329        self,
330        hdf_handle,
331        mass_spectrum,
332        group_key,
333        mass_spectra_group=None,
334        export_raw=True,
335    ):
336        """Adds the mass spectrum data to an HDF5 file.
337
338        Parameters
339        ----------
340        hdf_handle : h5py.File
341            The HDF5 file handle.
342        mass_spectrum : MassSpectrum
343            The mass spectrum to add to the HDF5 file.
344        group_key : str
345            The group key (where to add the mass spectrum data within the HDF5 file).
346        mass_spectra_group : h5py.Group, optional
347            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
348        export_raw : bool, optional
349            Whether to export the raw data. Defaults to True.
350            If False, only the processed data (peaks) is exported (essentially centroided data).
351        """
352        if mass_spectra_group is None:
353            # Check if the file has the necessary attributes and add them if not
354            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
355            if not hdf_handle.attrs.get("date_utc"):
356                timenow = str(
357                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
358                )
359                hdf_handle.attrs["date_utc"] = timenow
360                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
361                hdf_handle.attrs["data_structure"] = "mass_spectrum"
362                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
363                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
364                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
365
366        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
367
368        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
369
370        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
371
372        columns_labels = json.dumps(
373            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
374            sort_keys=False,
375            indent=4,
376            separators=(",", ": "),
377        )
378
379        group_key = group_key
380
381        if mass_spectra_group is not None:
382            hdf_handle = mass_spectra_group
383
384        if group_key not in hdf_handle.keys():
385            scan_group = hdf_handle.create_group(group_key)
386
387            # If there is raw data (from profile data) save it
388            if not mass_spectrum.is_centroid and export_raw:
389                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
390
391                mz_abun_array[0] = mass_spectrum.abundance_profile
392                mz_abun_array[1] = mass_spectrum.mz_exp_profile
393
394                raw_ms_dataset = scan_group.create_dataset(
395                    "raw_ms", data=mz_abun_array, dtype="f8"
396                )
397
398            else:
399                #  create empy dataset for missing raw data
400                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
401
402            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
403
404            if isinstance(mass_spectrum, MassSpecfromFreq):
405                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
406                    setting_dicts.get("TransientSetting"),
407                    sort_keys=False,
408                    indent=4,
409                    separators=(",", ": "),
410                )
411
412        else:
413            scan_group = hdf_handle.get(group_key)
414
415        # if there is not processed data len = 0, otherwise len() will return next index
416        index_processed_data = str(len(scan_group.keys()))
417
418        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
419
420        processed_dset = scan_group.create_dataset(
421            index_processed_data, data=list_results
422        )
423
424        processed_dset.attrs["date_utc"] = timenow
425
426        processed_dset.attrs["ColumnsLabels"] = columns_labels
427
428        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
429            setting_dicts.get("MoleculaSearch"),
430            sort_keys=False,
431            indent=4,
432            separators=(",", ": "),
433        )
434
435        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
436            setting_dicts.get("MassSpecPeak"),
437            sort_keys=False,
438            indent=4,
439            separators=(",", ": "),
440        )
441
442        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
443            setting_dicts.get("MassSpectrum"),
444            sort_keys=False,
445            indent=4,
446            separators=(",", ": "),
447        )
448
449    def to_hdf(self):
450        """Exports the mass spectrum data to an HDF5 file."""
451
452        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
453            self.add_mass_spectrum_to_hdf5(
454                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
455            )
456
457    def parameters_to_toml(self):
458        """Converts the mass spectrum parameters to a TOML string.
459
460        Returns
461        -------
462        str
463            The TOML string of the mass spectrum parameters.
464        """
465
466        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
467
468        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
469        dict_setting["analyzer"] = self.mass_spectrum.analyzer
470        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
471        dict_setting["sample_name"] = self.mass_spectrum.sample_name
472
473        output = toml.dumps(dict_setting)
474
475        return output
476
477    def parameters_to_json(self):
478        """Converts the mass spectrum parameters to a JSON string.
479
480        Returns
481        -------
482        str
483            The JSON string of the mass spectrum parameters.
484        """
485
486        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
487
488        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
489        dict_setting["analyzer"] = self.mass_spectrum.analyzer
490        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
491        dict_setting["sample_name"] = self.mass_spectrum.sample_name
492
493        output = json.dumps(dict_setting)
494
495        return output
496
497    def get_mass_spec_attrs(self, mass_spectrum):
498        """Returns the mass spectrum attributes as a dictionary.
499
500        Parameters
501        ----------
502        mass_spectrum : MassSpectrum
503            The mass spectrum to export.
504
505        Returns
506        -------
507        dict
508            The mass spectrum attributes.
509        """
510
511        dict_ms_attrs = {}
512        dict_ms_attrs["polarity"] = mass_spectrum.polarity
513        dict_ms_attrs["rt"] = mass_spectrum.retention_time
514        dict_ms_attrs["tic"] = mass_spectrum.tic
515        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
516        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
517        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
518        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
519        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
520        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
521        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
522
523        return dict_ms_attrs
524
525    def get_all_used_atoms_in_order(self, mass_spectrum):
526        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
527
528        Parameters
529        ----------
530        mass_spectrum : MassSpectrum
531            The mass spectrum to export.
532
533        Returns
534        -------
535        list
536            The list of assigned atoms in the order specified by Atoms.atoms_order list.
537        """
538
539        atoms_in_order = Atoms.atoms_order
540        all_used_atoms = set()
541        if mass_spectrum:
542            for ms_peak in mass_spectrum:
543                if ms_peak:
544                    for m_formula in ms_peak:
545                        for atom in m_formula.atoms:
546                            all_used_atoms.add(atom)
547
548        def sort_method(atom):
549            return [atoms_in_order.index(atom)]
550
551        return sorted(all_used_atoms, key=sort_method)
552
553    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
554        """Returns the mass spectrum data as a list of dictionaries.
555
556        Parameters
557        ----------
558        mass_spectrum : MassSpectrum
559            The mass spectrum to export.
560        is_hdf5 : bool, optional
561            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
562
563        Returns
564        -------
565        list
566            The mass spectrum data as a list of dictionaries.
567        """
568
569        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
570            mass_spectrum
571        )
572
573        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
574
575        all_lines = []
576        for dict_res in dict_list:
577            result_line = [NaN] * len(column_labels)
578
579            for label, value in dict_res.items():
580                label_index = column_labels.index(label)
581                result_line[label_index] = value
582
583            all_lines.append(result_line)
584
585        return all_lines
586
587    def get_list_dict_data(
588        self,
589        mass_spectrum,
590        include_no_match=True,
591        include_isotopologues=True,
592        isotopologue_inline=True,
593        no_match_inline=False,
594        is_hdf5=False,
595        additional_columns=None,
596    ):
597        """Returns the mass spectrum data as a list of dictionaries.
598
599        Parameters
600        ----------
601        mass_spectrum : MassSpectrum
602            The mass spectrum to export.
603        include_no_match : bool, optional
604            Whether to include unassigned (no match) data. Defaults to True.
605        include_isotopologues : bool, optional
606            Whether to include isotopologues. Defaults to True.
607        isotopologue_inline : bool, optional
608            Whether to include isotopologues inline. Defaults to True.
609        no_match_inline : bool, optional
610            Whether to include unassigned (no match) data inline. Defaults to False.
611        is_hdf5 : bool, optional
612            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
613
614        Returns
615        -------
616        list
617            The mass spectrum data as a list of dictionaries.
618        """
619
620        dict_data_list = []
621
622        if is_hdf5:
623            encode = ".encode('utf-8')"
624        else:
625            encode = ""
626
627        def add_no_match_dict_data(index, ms_peak):
628            """
629            Export dictionary of mspeak info for unassigned (no match) data
630            """
631            dict_result = {
632                "Index": index,
633                "m/z": ms_peak._mz_exp,
634                "Calibrated m/z": ms_peak.mz_exp,
635                "Peak Height": ms_peak.abundance,
636                "Peak Area": ms_peak.area,
637                "Resolving Power": ms_peak.resolving_power,
638                "S/N": ms_peak.signal_to_noise,
639                "Ion Charge": ms_peak.ion_charge,
640                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
641            }
642
643            dict_data_list.append(dict_result)
644
645        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
646            """
647            Export dictionary of mspeak info for assigned (match) data
648            """
649            formula_dict = mformula.to_dict()
650
651            dict_result = {
652                "Index": index,
653                "m/z": ms_peak._mz_exp,
654                "Calibrated m/z": ms_peak.mz_exp,
655                "Calculated m/z": mformula.mz_calc,
656                "Peak Height": ms_peak.abundance,
657                "Peak Area": ms_peak.area,
658                "Resolving Power": ms_peak.resolving_power,
659                "S/N": ms_peak.signal_to_noise,
660                "Ion Charge": ms_peak.ion_charge,
661                "m/z Error (ppm)": mformula.mz_error,
662                "Confidence Score": mformula.confidence_score,
663                "Isotopologue Similarity": mformula.isotopologue_similarity,
664                "m/z Error Score": mformula.average_mz_error_score,
665                "DBE": mformula.dbe,
666                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
667                "H/C": mformula.H_C,
668                "O/C": mformula.O_C,
669                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
670                "Is Isotopologue": int(mformula.is_isotopologue),
671                "Molecular Formula": eval("mformula.string{}".format(encode)),
672            }
673            if additional_columns is not None:
674                possible_dict = {
675                    "Aromaticity Index": mformula.A_I,
676                    "NOSC": mformula.nosc,
677                    "Aromaticity Index (modified)": mformula.A_I_mod,
678                }
679                for column in additional_columns:
680                    dict_result[column] = possible_dict.get(column)
681
682            if mformula.adduct_atom:
683                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
684
685            if mformula.is_isotopologue:
686                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
687
688            if self.atoms_order_list is None:
689                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
690            else:
691                atoms_order_list = self.atoms_order_list
692
693            for atom in atoms_order_list:
694                if atom in formula_dict.keys():
695                    dict_result[atom] = formula_dict.get(atom)
696
697            dict_data_list.append(dict_result)
698
699        score_methods = mass_spectrum.molecular_search_settings.score_methods
700        selected_score_method = (
701            mass_spectrum.molecular_search_settings.output_score_method
702        )
703
704        if selected_score_method in score_methods:
705            # temp set score method as the one chosen in the output
706            current_method = mass_spectrum.molecular_search_settings.score_method
707            mass_spectrum.molecular_search_settings.score_method = selected_score_method
708
709            for index, ms_peak in enumerate(mass_spectrum):
710                # print(ms_peak.mz_exp)
711
712                if ms_peak:
713                    m_formula = ms_peak.best_molecular_formula_candidate
714
715                    if m_formula:
716                        if not m_formula.is_isotopologue:
717                            add_match_dict_data(
718                                index,
719                                ms_peak,
720                                m_formula,
721                                additional_columns=additional_columns,
722                            )
723
724                            for (
725                                iso_mspeak_index,
726                                iso_mf_formula,
727                            ) in m_formula.mspeak_mf_isotopologues_indexes:
728                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
729                                add_match_dict_data(
730                                    iso_mspeak_index,
731                                    iso_ms_peak,
732                                    iso_mf_formula,
733                                    additional_columns=additional_columns,
734                                )
735                else:
736                    if include_no_match and no_match_inline:
737                        add_no_match_dict_data(index, ms_peak)
738
739            if include_no_match and not no_match_inline:
740                for index, ms_peak in enumerate(mass_spectrum):
741                    if not ms_peak:
742                        add_no_match_dict_data(index, ms_peak)
743            # reset score method as the one chosen in the output
744            mass_spectrum.molecular_search_settings.score_method = current_method
745
746        else:
747            for index, ms_peak in enumerate(mass_spectrum):
748                # check if there is a molecular formula candidate for the msPeak
749
750                if ms_peak:
751                    # m_formula = ms_peak.molecular_formula_lowest_error
752                    for m_formula in ms_peak:
753                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
754                            if (
755                                m_formula.confidence_score
756                                >= mass_spectrum.molecular_search_settings.output_min_score
757                            ):
758                                if m_formula.is_isotopologue:  # isotopologues inline
759                                    if include_isotopologues and isotopologue_inline:
760                                        add_match_dict_data(
761                                            index,
762                                            ms_peak,
763                                            m_formula,
764                                            additional_columns=additional_columns,
765                                        )
766                                else:
767                                    add_match_dict_data(
768                                        index,
769                                        ms_peak,
770                                        m_formula,
771                                        additional_columns=additional_columns,
772                                    )  # add monoisotopic peak
773
774                            # cutoff because of low score
775                            else:
776                                add_no_match_dict_data(index, ms_peak)
777
778                        else:
779                            if m_formula.is_isotopologue:  # isotopologues inline
780                                if include_isotopologues and isotopologue_inline:
781                                    add_match_dict_data(
782                                        index,
783                                        ms_peak,
784                                        m_formula,
785                                        additional_columns=additional_columns,
786                                    )
787                            else:
788                                add_match_dict_data(
789                                    index,
790                                    ms_peak,
791                                    m_formula,
792                                    additional_columns=additional_columns,
793                                )  # add monoisotopic peak
794                else:
795                    # include not_match
796                    if include_no_match and no_match_inline:
797                        add_no_match_dict_data(index, ms_peak)
798
799            if include_isotopologues and not isotopologue_inline:
800                for index, ms_peak in enumerate(mass_spectrum):
801                    for m_formula in ms_peak:
802                        if m_formula.is_isotopologue:
803                            if (
804                                m_formula.confidence_score
805                                >= mass_spectrum.molecular_search_settings.output_min_score
806                            ):
807                                add_match_dict_data(
808                                    index,
809                                    ms_peak,
810                                    m_formula,
811                                    additional_columns=additional_columns,
812                                )
813
814            if include_no_match and not no_match_inline:
815                for index, ms_peak in enumerate(mass_spectrum):
816                    if not ms_peak:
817                        add_no_match_dict_data(index, ms_peak)
818
819        # remove duplicated add_match data possibly introduced on the output_score_filter step
820        res = []
821        [res.append(x) for x in dict_data_list if x not in res]
822
823        return res
class HighResMassSpecExport(threading.Thread):
 20class HighResMassSpecExport(Thread):
 21    """A class for exporting high-resolution mass spectra.
 22
 23    Parameters
 24    ----------
 25    out_file_path : str
 26        The output file path.
 27    mass_spectrum : MassSpectrum
 28        The mass spectrum to export.
 29    output_type : str, optional
 30        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
 31
 32    Attributes
 33    ----------
 34    output_file : Path
 35        The output file path.
 36    output_type : str
 37        The type of output file.
 38    mass_spectrum : MassSpectrum
 39        The mass spectrum to export.
 40    atoms_order_list : list
 41        The list of assigned atoms in the order specified by Atoms.atoms_order list.
 42    columns_label : list
 43        The column labels in order.
 44
 45    Methods
 46    -------
 47    * save().
 48        Save the mass spectrum data to the output file.
 49    * run().
 50        Run the export process.
 51    * get_pandas_df().
 52        Returns the mass spectrum data as a pandas DataFrame.
 53    * write_settings(output_path, mass_spectrum).
 54        Writes the settings of the mass spectrum to a JSON file.
 55    * to_pandas(write_metadata=True).
 56        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
 57    * to_excel(write_metadata=True).
 58        Exports the mass spectrum data to an Excel file.
 59    * to_csv(write_metadata=True).
 60        Exports the mass spectrum data to a CSV file.
 61    * to_json().
 62        Exports the mass spectrum data to a JSON string.
 63    * to_hdf().
 64        Exports the mass spectrum data to an HDF5 file.
 65    * parameters_to_toml().
 66        Converts the mass spectrum parameters to a TOML string.
 67    * parameters_to_json().
 68        Converts the mass spectrum parameters to a JSON string.
 69    * get_mass_spec_attrs(mass_spectrum).
 70        Returns the mass spectrum attributes as a dictionary.
 71    * get_all_used_atoms_in_order(mass_spectrum).
 72        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
 73    * list_dict_to_list(mass_spectrum, is_hdf5=False).
 74        Returns the mass spectrum data as a list of dictionaries.
 75    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
 76        Returns the mass spectrum data as a list of dictionaries.
 77
 78    """
 79
 80    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
 81        Thread.__init__(self)
 82
 83        self.output_file = Path(out_file_path)
 84
 85        # 'excel', 'csv' or 'pandas'
 86        self.output_type = output_type
 87
 88        self.mass_spectrum = mass_spectrum
 89
 90        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
 91        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
 92
 93        self._init_columns()
 94
 95    def _init_columns(self):
 96        """Initialize the columns for the mass spectrum output."""
 97        # column labels in order
 98        self.columns_label = [
 99            "Index",
100            "m/z",
101            "Calibrated m/z",
102            "Calculated m/z",
103            "Peak Height",
104            "Peak Area",
105            "Resolving Power",
106            "S/N",
107            "Ion Charge",
108            "m/z Error (ppm)",
109            "m/z Error Score",
110            "Isotopologue Similarity",
111            "Confidence Score",
112            "DBE",
113            "O/C",
114            "H/C",
115            "Heteroatom Class",
116            "Ion Type",
117            "Adduct",
118            "Is Isotopologue",
119            "Mono Isotopic Index",
120            "Molecular Formula",
121        ]
122
123    @property
124    def output_type(self):
125        """Returns the output type of the mass spectrum."""
126        return self._output_type
127
128    @output_type.setter
129    def output_type(self, output_type):
130        output_types = ["excel", "csv", "pandas", "hdf5"]
131        if output_type in output_types:
132            self._output_type = output_type
133        else:
134            raise TypeError(
135                'Supported types are "excel", "csv" or "pandas", %s entered'
136                % output_type
137            )
138
139    def save(self):
140        """Save the mass spectrum data to the output file.
141
142        Raises
143        ------
144        ValueError
145            If the output type is not supported.
146        """
147
148        if self.output_type == "excel":
149            self.to_excel()
150        elif self.output_type == "csv":
151            self.to_csv()
152        elif self.output_type == "pandas":
153            self.to_pandas()
154        elif self.output_type == "hdf5":
155            self.to_hdf()
156        else:
157            raise ValueError(
158                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
159                % self.output_type
160            )
161
162    def run(self):
163        """Run the export process.
164
165        This method is called when the thread starts.
166        It calls the save method to perform the export."""
167        self.save()
168
169    def get_pandas_df(self, additional_columns=None):
170        """Returns the mass spectrum data as a pandas DataFrame.
171
172        Parameters
173        ----------
174        additional_columns : list, optional
175            Additional columns to include in the DataFrame. Defaults to None.
176            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
177
178        Returns
179        -------
180        DataFrame
181            The mass spectrum data as a pandas DataFrame.
182        """
183        if additional_columns is not None:
184            possible_additional_columns = [
185                "Aromaticity Index",
186                "NOSC",
187                "Aromaticity Index (modified)",
188            ]
189            if additional_columns:
190                for column in additional_columns:
191                    if column not in possible_additional_columns:
192                        raise ValueError("Invalid additional column: %s" % column)
193            columns = (
194                self.columns_label
195                + additional_columns
196                + self.get_all_used_atoms_in_order(self.mass_spectrum)
197            )
198        else:
199            columns = self.columns_label + self.get_all_used_atoms_in_order(
200                self.mass_spectrum
201            )
202        dict_data_list = self.get_list_dict_data(
203            self.mass_spectrum, additional_columns=additional_columns
204        )
205        df = DataFrame(dict_data_list, columns=columns)
206        df.name = self.output_file
207        return df
208
209    def write_settings(self, output_path, mass_spectrum):
210        """Writes the settings of the mass spectrum to a JSON file.
211
212        Parameters
213        ----------
214        output_path : str
215            The output file path.
216        mass_spectrum : MassSpectrum
217            The mass spectrum to export.
218        """
219
220        import json
221
222        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
223
224        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
225        dict_setting["analyzer"] = mass_spectrum.analyzer
226        dict_setting["instrument_label"] = mass_spectrum.instrument_label
227        dict_setting["sample_name"] = mass_spectrum.sample_name
228
229        with open(
230            output_path.with_suffix(".json"),
231            "w",
232            encoding="utf8",
233        ) as outfile:
234            output = json.dumps(
235                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
236            )
237            outfile.write(output)
238
239    def to_pandas(self, write_metadata=True):
240        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
241
242        Parameters
243        ----------
244        write_metadata : bool, optional
245            Whether to write the metadata to a JSON file. Defaults to True.
246        """
247
248        columns = self.columns_label + self.get_all_used_atoms_in_order(
249            self.mass_spectrum
250        )
251
252        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
253
254        df = DataFrame(dict_data_list, columns=columns)
255
256        df.to_pickle(self.output_file.with_suffix(".pkl"))
257
258        if write_metadata:
259            self.write_settings(self.output_file, self.mass_spectrum)
260
261    def to_excel(self, write_metadata=True):
262        """Exports the mass spectrum data to an Excel file.
263
264        Parameters
265        ----------
266        write_metadata : bool, optional
267            Whether to write the metadata to a JSON file. Defaults to True.
268        """
269
270        columns = self.columns_label + self.get_all_used_atoms_in_order(
271            self.mass_spectrum
272        )
273
274        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
275
276        df = DataFrame(dict_data_list, columns=columns)
277
278        df.to_excel(self.output_file.with_suffix(".xlsx"))
279
280        if write_metadata:
281            self.write_settings(self.output_file, self.mass_spectrum)
282
283    def to_csv(self, write_metadata=True):
284        """Exports the mass spectrum data to a CSV file.
285
286        Parameters
287        ----------
288        write_metadata : bool, optional
289            Whether to write the metadata to a JSON file. Defaults to True.
290        """
291
292        columns = self.columns_label + self.get_all_used_atoms_in_order(
293            self.mass_spectrum
294        )
295
296        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
297
298        import csv
299
300        try:
301            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
302                writer = csv.DictWriter(csvfile, fieldnames=columns)
303                writer.writeheader()
304                for data in dict_data_list:
305                    writer.writerow(data)
306            if write_metadata:
307                self.write_settings(self.output_file, self.mass_spectrum)
308
309        except IOError as ioerror:
310            print(ioerror)
311
312    def to_json(self):
313        """Exports the mass spectrum data to a JSON string."""
314
315        columns = self.columns_label + self.get_all_used_atoms_in_order(
316            self.mass_spectrum
317        )
318
319        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
320
321        df = DataFrame(dict_data_list, columns=columns)
322
323        # for key, values in dict_data.items():
324        #    if not values: dict_data[key] = NaN
325
326        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
327        return df.to_json(orient="records")
328
329    def add_mass_spectrum_to_hdf5(
330        self,
331        hdf_handle,
332        mass_spectrum,
333        group_key,
334        mass_spectra_group=None,
335        export_raw=True,
336    ):
337        """Adds the mass spectrum data to an HDF5 file.
338
339        Parameters
340        ----------
341        hdf_handle : h5py.File
342            The HDF5 file handle.
343        mass_spectrum : MassSpectrum
344            The mass spectrum to add to the HDF5 file.
345        group_key : str
346            The group key (where to add the mass spectrum data within the HDF5 file).
347        mass_spectra_group : h5py.Group, optional
348            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
349        export_raw : bool, optional
350            Whether to export the raw data. Defaults to True.
351            If False, only the processed data (peaks) is exported (essentially centroided data).
352        """
353        if mass_spectra_group is None:
354            # Check if the file has the necessary attributes and add them if not
355            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
356            if not hdf_handle.attrs.get("date_utc"):
357                timenow = str(
358                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
359                )
360                hdf_handle.attrs["date_utc"] = timenow
361                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
362                hdf_handle.attrs["data_structure"] = "mass_spectrum"
363                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
364                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
365                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
366
367        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
368
369        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
370
371        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
372
373        columns_labels = json.dumps(
374            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
375            sort_keys=False,
376            indent=4,
377            separators=(",", ": "),
378        )
379
380        group_key = group_key
381
382        if mass_spectra_group is not None:
383            hdf_handle = mass_spectra_group
384
385        if group_key not in hdf_handle.keys():
386            scan_group = hdf_handle.create_group(group_key)
387
388            # If there is raw data (from profile data) save it
389            if not mass_spectrum.is_centroid and export_raw:
390                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
391
392                mz_abun_array[0] = mass_spectrum.abundance_profile
393                mz_abun_array[1] = mass_spectrum.mz_exp_profile
394
395                raw_ms_dataset = scan_group.create_dataset(
396                    "raw_ms", data=mz_abun_array, dtype="f8"
397                )
398
399            else:
400                #  create empy dataset for missing raw data
401                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
402
403            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
404
405            if isinstance(mass_spectrum, MassSpecfromFreq):
406                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
407                    setting_dicts.get("TransientSetting"),
408                    sort_keys=False,
409                    indent=4,
410                    separators=(",", ": "),
411                )
412
413        else:
414            scan_group = hdf_handle.get(group_key)
415
416        # if there is not processed data len = 0, otherwise len() will return next index
417        index_processed_data = str(len(scan_group.keys()))
418
419        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
420
421        processed_dset = scan_group.create_dataset(
422            index_processed_data, data=list_results
423        )
424
425        processed_dset.attrs["date_utc"] = timenow
426
427        processed_dset.attrs["ColumnsLabels"] = columns_labels
428
429        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
430            setting_dicts.get("MoleculaSearch"),
431            sort_keys=False,
432            indent=4,
433            separators=(",", ": "),
434        )
435
436        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
437            setting_dicts.get("MassSpecPeak"),
438            sort_keys=False,
439            indent=4,
440            separators=(",", ": "),
441        )
442
443        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
444            setting_dicts.get("MassSpectrum"),
445            sort_keys=False,
446            indent=4,
447            separators=(",", ": "),
448        )
449
450    def to_hdf(self):
451        """Exports the mass spectrum data to an HDF5 file."""
452
453        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
454            self.add_mass_spectrum_to_hdf5(
455                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
456            )
457
458    def parameters_to_toml(self):
459        """Converts the mass spectrum parameters to a TOML string.
460
461        Returns
462        -------
463        str
464            The TOML string of the mass spectrum parameters.
465        """
466
467        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
468
469        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
470        dict_setting["analyzer"] = self.mass_spectrum.analyzer
471        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
472        dict_setting["sample_name"] = self.mass_spectrum.sample_name
473
474        output = toml.dumps(dict_setting)
475
476        return output
477
478    def parameters_to_json(self):
479        """Converts the mass spectrum parameters to a JSON string.
480
481        Returns
482        -------
483        str
484            The JSON string of the mass spectrum parameters.
485        """
486
487        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
488
489        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
490        dict_setting["analyzer"] = self.mass_spectrum.analyzer
491        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
492        dict_setting["sample_name"] = self.mass_spectrum.sample_name
493
494        output = json.dumps(dict_setting)
495
496        return output
497
498    def get_mass_spec_attrs(self, mass_spectrum):
499        """Returns the mass spectrum attributes as a dictionary.
500
501        Parameters
502        ----------
503        mass_spectrum : MassSpectrum
504            The mass spectrum to export.
505
506        Returns
507        -------
508        dict
509            The mass spectrum attributes.
510        """
511
512        dict_ms_attrs = {}
513        dict_ms_attrs["polarity"] = mass_spectrum.polarity
514        dict_ms_attrs["rt"] = mass_spectrum.retention_time
515        dict_ms_attrs["tic"] = mass_spectrum.tic
516        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
517        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
518        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
519        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
520        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
521        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
522        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
523
524        return dict_ms_attrs
525
526    def get_all_used_atoms_in_order(self, mass_spectrum):
527        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
528
529        Parameters
530        ----------
531        mass_spectrum : MassSpectrum
532            The mass spectrum to export.
533
534        Returns
535        -------
536        list
537            The list of assigned atoms in the order specified by Atoms.atoms_order list.
538        """
539
540        atoms_in_order = Atoms.atoms_order
541        all_used_atoms = set()
542        if mass_spectrum:
543            for ms_peak in mass_spectrum:
544                if ms_peak:
545                    for m_formula in ms_peak:
546                        for atom in m_formula.atoms:
547                            all_used_atoms.add(atom)
548
549        def sort_method(atom):
550            return [atoms_in_order.index(atom)]
551
552        return sorted(all_used_atoms, key=sort_method)
553
554    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
555        """Returns the mass spectrum data as a list of dictionaries.
556
557        Parameters
558        ----------
559        mass_spectrum : MassSpectrum
560            The mass spectrum to export.
561        is_hdf5 : bool, optional
562            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
563
564        Returns
565        -------
566        list
567            The mass spectrum data as a list of dictionaries.
568        """
569
570        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
571            mass_spectrum
572        )
573
574        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
575
576        all_lines = []
577        for dict_res in dict_list:
578            result_line = [NaN] * len(column_labels)
579
580            for label, value in dict_res.items():
581                label_index = column_labels.index(label)
582                result_line[label_index] = value
583
584            all_lines.append(result_line)
585
586        return all_lines
587
588    def get_list_dict_data(
589        self,
590        mass_spectrum,
591        include_no_match=True,
592        include_isotopologues=True,
593        isotopologue_inline=True,
594        no_match_inline=False,
595        is_hdf5=False,
596        additional_columns=None,
597    ):
598        """Returns the mass spectrum data as a list of dictionaries.
599
600        Parameters
601        ----------
602        mass_spectrum : MassSpectrum
603            The mass spectrum to export.
604        include_no_match : bool, optional
605            Whether to include unassigned (no match) data. Defaults to True.
606        include_isotopologues : bool, optional
607            Whether to include isotopologues. Defaults to True.
608        isotopologue_inline : bool, optional
609            Whether to include isotopologues inline. Defaults to True.
610        no_match_inline : bool, optional
611            Whether to include unassigned (no match) data inline. Defaults to False.
612        is_hdf5 : bool, optional
613            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
614
615        Returns
616        -------
617        list
618            The mass spectrum data as a list of dictionaries.
619        """
620
621        dict_data_list = []
622
623        if is_hdf5:
624            encode = ".encode('utf-8')"
625        else:
626            encode = ""
627
628        def add_no_match_dict_data(index, ms_peak):
629            """
630            Export dictionary of mspeak info for unassigned (no match) data
631            """
632            dict_result = {
633                "Index": index,
634                "m/z": ms_peak._mz_exp,
635                "Calibrated m/z": ms_peak.mz_exp,
636                "Peak Height": ms_peak.abundance,
637                "Peak Area": ms_peak.area,
638                "Resolving Power": ms_peak.resolving_power,
639                "S/N": ms_peak.signal_to_noise,
640                "Ion Charge": ms_peak.ion_charge,
641                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
642            }
643
644            dict_data_list.append(dict_result)
645
646        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
647            """
648            Export dictionary of mspeak info for assigned (match) data
649            """
650            formula_dict = mformula.to_dict()
651
652            dict_result = {
653                "Index": index,
654                "m/z": ms_peak._mz_exp,
655                "Calibrated m/z": ms_peak.mz_exp,
656                "Calculated m/z": mformula.mz_calc,
657                "Peak Height": ms_peak.abundance,
658                "Peak Area": ms_peak.area,
659                "Resolving Power": ms_peak.resolving_power,
660                "S/N": ms_peak.signal_to_noise,
661                "Ion Charge": ms_peak.ion_charge,
662                "m/z Error (ppm)": mformula.mz_error,
663                "Confidence Score": mformula.confidence_score,
664                "Isotopologue Similarity": mformula.isotopologue_similarity,
665                "m/z Error Score": mformula.average_mz_error_score,
666                "DBE": mformula.dbe,
667                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
668                "H/C": mformula.H_C,
669                "O/C": mformula.O_C,
670                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
671                "Is Isotopologue": int(mformula.is_isotopologue),
672                "Molecular Formula": eval("mformula.string{}".format(encode)),
673            }
674            if additional_columns is not None:
675                possible_dict = {
676                    "Aromaticity Index": mformula.A_I,
677                    "NOSC": mformula.nosc,
678                    "Aromaticity Index (modified)": mformula.A_I_mod,
679                }
680                for column in additional_columns:
681                    dict_result[column] = possible_dict.get(column)
682
683            if mformula.adduct_atom:
684                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
685
686            if mformula.is_isotopologue:
687                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
688
689            if self.atoms_order_list is None:
690                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
691            else:
692                atoms_order_list = self.atoms_order_list
693
694            for atom in atoms_order_list:
695                if atom in formula_dict.keys():
696                    dict_result[atom] = formula_dict.get(atom)
697
698            dict_data_list.append(dict_result)
699
700        score_methods = mass_spectrum.molecular_search_settings.score_methods
701        selected_score_method = (
702            mass_spectrum.molecular_search_settings.output_score_method
703        )
704
705        if selected_score_method in score_methods:
706            # temp set score method as the one chosen in the output
707            current_method = mass_spectrum.molecular_search_settings.score_method
708            mass_spectrum.molecular_search_settings.score_method = selected_score_method
709
710            for index, ms_peak in enumerate(mass_spectrum):
711                # print(ms_peak.mz_exp)
712
713                if ms_peak:
714                    m_formula = ms_peak.best_molecular_formula_candidate
715
716                    if m_formula:
717                        if not m_formula.is_isotopologue:
718                            add_match_dict_data(
719                                index,
720                                ms_peak,
721                                m_formula,
722                                additional_columns=additional_columns,
723                            )
724
725                            for (
726                                iso_mspeak_index,
727                                iso_mf_formula,
728                            ) in m_formula.mspeak_mf_isotopologues_indexes:
729                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
730                                add_match_dict_data(
731                                    iso_mspeak_index,
732                                    iso_ms_peak,
733                                    iso_mf_formula,
734                                    additional_columns=additional_columns,
735                                )
736                else:
737                    if include_no_match and no_match_inline:
738                        add_no_match_dict_data(index, ms_peak)
739
740            if include_no_match and not no_match_inline:
741                for index, ms_peak in enumerate(mass_spectrum):
742                    if not ms_peak:
743                        add_no_match_dict_data(index, ms_peak)
744            # reset score method as the one chosen in the output
745            mass_spectrum.molecular_search_settings.score_method = current_method
746
747        else:
748            for index, ms_peak in enumerate(mass_spectrum):
749                # check if there is a molecular formula candidate for the msPeak
750
751                if ms_peak:
752                    # m_formula = ms_peak.molecular_formula_lowest_error
753                    for m_formula in ms_peak:
754                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
755                            if (
756                                m_formula.confidence_score
757                                >= mass_spectrum.molecular_search_settings.output_min_score
758                            ):
759                                if m_formula.is_isotopologue:  # isotopologues inline
760                                    if include_isotopologues and isotopologue_inline:
761                                        add_match_dict_data(
762                                            index,
763                                            ms_peak,
764                                            m_formula,
765                                            additional_columns=additional_columns,
766                                        )
767                                else:
768                                    add_match_dict_data(
769                                        index,
770                                        ms_peak,
771                                        m_formula,
772                                        additional_columns=additional_columns,
773                                    )  # add monoisotopic peak
774
775                            # cutoff because of low score
776                            else:
777                                add_no_match_dict_data(index, ms_peak)
778
779                        else:
780                            if m_formula.is_isotopologue:  # isotopologues inline
781                                if include_isotopologues and isotopologue_inline:
782                                    add_match_dict_data(
783                                        index,
784                                        ms_peak,
785                                        m_formula,
786                                        additional_columns=additional_columns,
787                                    )
788                            else:
789                                add_match_dict_data(
790                                    index,
791                                    ms_peak,
792                                    m_formula,
793                                    additional_columns=additional_columns,
794                                )  # add monoisotopic peak
795                else:
796                    # include not_match
797                    if include_no_match and no_match_inline:
798                        add_no_match_dict_data(index, ms_peak)
799
800            if include_isotopologues and not isotopologue_inline:
801                for index, ms_peak in enumerate(mass_spectrum):
802                    for m_formula in ms_peak:
803                        if m_formula.is_isotopologue:
804                            if (
805                                m_formula.confidence_score
806                                >= mass_spectrum.molecular_search_settings.output_min_score
807                            ):
808                                add_match_dict_data(
809                                    index,
810                                    ms_peak,
811                                    m_formula,
812                                    additional_columns=additional_columns,
813                                )
814
815            if include_no_match and not no_match_inline:
816                for index, ms_peak in enumerate(mass_spectrum):
817                    if not ms_peak:
818                        add_no_match_dict_data(index, ms_peak)
819
820        # remove duplicated add_match data possibly introduced on the output_score_filter step
821        res = []
822        [res.append(x) for x in dict_data_list if x not in res]
823
824        return res

A class for exporting high-resolution mass spectra.

Parameters
  • out_file_path (str): The output file path.
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
  • output_type (str, optional): The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
Attributes
  • output_file (Path): The output file path.
  • output_type (str): The type of output file.
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
  • atoms_order_list (list): The list of assigned atoms in the order specified by Atoms.atoms_order list.
  • columns_label (list): The column labels in order.
Methods
  • save(). Save the mass spectrum data to the output file.
  • run(). Run the export process.
  • get_pandas_df(). Returns the mass spectrum data as a pandas DataFrame.
  • write_settings(output_path, mass_spectrum). Writes the settings of the mass spectrum to a JSON file.
  • to_pandas(write_metadata=True). Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
  • to_excel(write_metadata=True). Exports the mass spectrum data to an Excel file.
  • to_csv(write_metadata=True). Exports the mass spectrum data to a CSV file.
  • to_json(). Exports the mass spectrum data to a JSON string.
  • to_hdf(). Exports the mass spectrum data to an HDF5 file.
  • parameters_to_toml(). Converts the mass spectrum parameters to a TOML string.
  • parameters_to_json(). Converts the mass spectrum parameters to a JSON string.
  • get_mass_spec_attrs(mass_spectrum). Returns the mass spectrum attributes as a dictionary.
  • get_all_used_atoms_in_order(mass_spectrum). Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
  • list_dict_to_list(mass_spectrum, is_hdf5=False). Returns the mass spectrum data as a list of dictionaries.
  • get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False). Returns the mass spectrum data as a list of dictionaries.
HighResMassSpecExport(out_file_path, mass_spectrum, output_type='excel')
80    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
81        Thread.__init__(self)
82
83        self.output_file = Path(out_file_path)
84
85        # 'excel', 'csv' or 'pandas'
86        self.output_type = output_type
87
88        self.mass_spectrum = mass_spectrum
89
90        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
91        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
92
93        self._init_columns()

This constructor should always be called with keyword arguments. Arguments are:

group should be None; reserved for future extension when a ThreadGroup class is implemented.

target is the callable object to be invoked by the run() method. Defaults to None, meaning nothing is called.

name is the thread name. By default, a unique name is constructed of the form "Thread-N" where N is a small decimal number.

args is the argument tuple for the target invocation. Defaults to ().

kwargs is a dictionary of keyword arguments for the target invocation. Defaults to {}.

If a subclass overrides the constructor, it must make sure to invoke the base class constructor (Thread.__init__()) before doing anything else to the thread.

output_file
output_type

Returns the output type of the mass spectrum.

mass_spectrum
atoms_order_list
def save(self):
139    def save(self):
140        """Save the mass spectrum data to the output file.
141
142        Raises
143        ------
144        ValueError
145            If the output type is not supported.
146        """
147
148        if self.output_type == "excel":
149            self.to_excel()
150        elif self.output_type == "csv":
151            self.to_csv()
152        elif self.output_type == "pandas":
153            self.to_pandas()
154        elif self.output_type == "hdf5":
155            self.to_hdf()
156        else:
157            raise ValueError(
158                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
159                % self.output_type
160            )

Save the mass spectrum data to the output file.

Raises
  • ValueError: If the output type is not supported.
def run(self):
162    def run(self):
163        """Run the export process.
164
165        This method is called when the thread starts.
166        It calls the save method to perform the export."""
167        self.save()

Run the export process.

This method is called when the thread starts. It calls the save method to perform the export.

def get_pandas_df(self, additional_columns=None):
169    def get_pandas_df(self, additional_columns=None):
170        """Returns the mass spectrum data as a pandas DataFrame.
171
172        Parameters
173        ----------
174        additional_columns : list, optional
175            Additional columns to include in the DataFrame. Defaults to None.
176            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
177
178        Returns
179        -------
180        DataFrame
181            The mass spectrum data as a pandas DataFrame.
182        """
183        if additional_columns is not None:
184            possible_additional_columns = [
185                "Aromaticity Index",
186                "NOSC",
187                "Aromaticity Index (modified)",
188            ]
189            if additional_columns:
190                for column in additional_columns:
191                    if column not in possible_additional_columns:
192                        raise ValueError("Invalid additional column: %s" % column)
193            columns = (
194                self.columns_label
195                + additional_columns
196                + self.get_all_used_atoms_in_order(self.mass_spectrum)
197            )
198        else:
199            columns = self.columns_label + self.get_all_used_atoms_in_order(
200                self.mass_spectrum
201            )
202        dict_data_list = self.get_list_dict_data(
203            self.mass_spectrum, additional_columns=additional_columns
204        )
205        df = DataFrame(dict_data_list, columns=columns)
206        df.name = self.output_file
207        return df

Returns the mass spectrum data as a pandas DataFrame.

Parameters
  • additional_columns (list, optional): Additional columns to include in the DataFrame. Defaults to None. Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
Returns
  • DataFrame: The mass spectrum data as a pandas DataFrame.
def write_settings(self, output_path, mass_spectrum):
209    def write_settings(self, output_path, mass_spectrum):
210        """Writes the settings of the mass spectrum to a JSON file.
211
212        Parameters
213        ----------
214        output_path : str
215            The output file path.
216        mass_spectrum : MassSpectrum
217            The mass spectrum to export.
218        """
219
220        import json
221
222        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
223
224        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
225        dict_setting["analyzer"] = mass_spectrum.analyzer
226        dict_setting["instrument_label"] = mass_spectrum.instrument_label
227        dict_setting["sample_name"] = mass_spectrum.sample_name
228
229        with open(
230            output_path.with_suffix(".json"),
231            "w",
232            encoding="utf8",
233        ) as outfile:
234            output = json.dumps(
235                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
236            )
237            outfile.write(output)

Writes the settings of the mass spectrum to a JSON file.

Parameters
  • output_path (str): The output file path.
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
def to_pandas(self, write_metadata=True):
239    def to_pandas(self, write_metadata=True):
240        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
241
242        Parameters
243        ----------
244        write_metadata : bool, optional
245            Whether to write the metadata to a JSON file. Defaults to True.
246        """
247
248        columns = self.columns_label + self.get_all_used_atoms_in_order(
249            self.mass_spectrum
250        )
251
252        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
253
254        df = DataFrame(dict_data_list, columns=columns)
255
256        df.to_pickle(self.output_file.with_suffix(".pkl"))
257
258        if write_metadata:
259            self.write_settings(self.output_file, self.mass_spectrum)

Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.

Parameters
  • write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
def to_excel(self, write_metadata=True):
261    def to_excel(self, write_metadata=True):
262        """Exports the mass spectrum data to an Excel file.
263
264        Parameters
265        ----------
266        write_metadata : bool, optional
267            Whether to write the metadata to a JSON file. Defaults to True.
268        """
269
270        columns = self.columns_label + self.get_all_used_atoms_in_order(
271            self.mass_spectrum
272        )
273
274        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
275
276        df = DataFrame(dict_data_list, columns=columns)
277
278        df.to_excel(self.output_file.with_suffix(".xlsx"))
279
280        if write_metadata:
281            self.write_settings(self.output_file, self.mass_spectrum)

Exports the mass spectrum data to an Excel file.

Parameters
  • write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
def to_csv(self, write_metadata=True):
283    def to_csv(self, write_metadata=True):
284        """Exports the mass spectrum data to a CSV file.
285
286        Parameters
287        ----------
288        write_metadata : bool, optional
289            Whether to write the metadata to a JSON file. Defaults to True.
290        """
291
292        columns = self.columns_label + self.get_all_used_atoms_in_order(
293            self.mass_spectrum
294        )
295
296        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
297
298        import csv
299
300        try:
301            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
302                writer = csv.DictWriter(csvfile, fieldnames=columns)
303                writer.writeheader()
304                for data in dict_data_list:
305                    writer.writerow(data)
306            if write_metadata:
307                self.write_settings(self.output_file, self.mass_spectrum)
308
309        except IOError as ioerror:
310            print(ioerror)

Exports the mass spectrum data to a CSV file.

Parameters
  • write_metadata (bool, optional): Whether to write the metadata to a JSON file. Defaults to True.
def to_json(self):
312    def to_json(self):
313        """Exports the mass spectrum data to a JSON string."""
314
315        columns = self.columns_label + self.get_all_used_atoms_in_order(
316            self.mass_spectrum
317        )
318
319        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
320
321        df = DataFrame(dict_data_list, columns=columns)
322
323        # for key, values in dict_data.items():
324        #    if not values: dict_data[key] = NaN
325
326        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
327        return df.to_json(orient="records")

Exports the mass spectrum data to a JSON string.

def add_mass_spectrum_to_hdf5( self, hdf_handle, mass_spectrum, group_key, mass_spectra_group=None, export_raw=True):
329    def add_mass_spectrum_to_hdf5(
330        self,
331        hdf_handle,
332        mass_spectrum,
333        group_key,
334        mass_spectra_group=None,
335        export_raw=True,
336    ):
337        """Adds the mass spectrum data to an HDF5 file.
338
339        Parameters
340        ----------
341        hdf_handle : h5py.File
342            The HDF5 file handle.
343        mass_spectrum : MassSpectrum
344            The mass spectrum to add to the HDF5 file.
345        group_key : str
346            The group key (where to add the mass spectrum data within the HDF5 file).
347        mass_spectra_group : h5py.Group, optional
348            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
349        export_raw : bool, optional
350            Whether to export the raw data. Defaults to True.
351            If False, only the processed data (peaks) is exported (essentially centroided data).
352        """
353        if mass_spectra_group is None:
354            # Check if the file has the necessary attributes and add them if not
355            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
356            if not hdf_handle.attrs.get("date_utc"):
357                timenow = str(
358                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
359                )
360                hdf_handle.attrs["date_utc"] = timenow
361                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
362                hdf_handle.attrs["data_structure"] = "mass_spectrum"
363                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
364                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
365                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
366
367        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
368
369        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
370
371        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
372
373        columns_labels = json.dumps(
374            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
375            sort_keys=False,
376            indent=4,
377            separators=(",", ": "),
378        )
379
380        group_key = group_key
381
382        if mass_spectra_group is not None:
383            hdf_handle = mass_spectra_group
384
385        if group_key not in hdf_handle.keys():
386            scan_group = hdf_handle.create_group(group_key)
387
388            # If there is raw data (from profile data) save it
389            if not mass_spectrum.is_centroid and export_raw:
390                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
391
392                mz_abun_array[0] = mass_spectrum.abundance_profile
393                mz_abun_array[1] = mass_spectrum.mz_exp_profile
394
395                raw_ms_dataset = scan_group.create_dataset(
396                    "raw_ms", data=mz_abun_array, dtype="f8"
397                )
398
399            else:
400                #  create empy dataset for missing raw data
401                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
402
403            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
404
405            if isinstance(mass_spectrum, MassSpecfromFreq):
406                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
407                    setting_dicts.get("TransientSetting"),
408                    sort_keys=False,
409                    indent=4,
410                    separators=(",", ": "),
411                )
412
413        else:
414            scan_group = hdf_handle.get(group_key)
415
416        # if there is not processed data len = 0, otherwise len() will return next index
417        index_processed_data = str(len(scan_group.keys()))
418
419        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
420
421        processed_dset = scan_group.create_dataset(
422            index_processed_data, data=list_results
423        )
424
425        processed_dset.attrs["date_utc"] = timenow
426
427        processed_dset.attrs["ColumnsLabels"] = columns_labels
428
429        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
430            setting_dicts.get("MoleculaSearch"),
431            sort_keys=False,
432            indent=4,
433            separators=(",", ": "),
434        )
435
436        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
437            setting_dicts.get("MassSpecPeak"),
438            sort_keys=False,
439            indent=4,
440            separators=(",", ": "),
441        )
442
443        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
444            setting_dicts.get("MassSpectrum"),
445            sort_keys=False,
446            indent=4,
447            separators=(",", ": "),
448        )

Adds the mass spectrum data to an HDF5 file.

Parameters
  • hdf_handle (h5py.File): The HDF5 file handle.
  • mass_spectrum (MassSpectrum): The mass spectrum to add to the HDF5 file.
  • group_key (str): The group key (where to add the mass spectrum data within the HDF5 file).
  • mass_spectra_group (h5py.Group, optional): The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
  • export_raw (bool, optional): Whether to export the raw data. Defaults to True. If False, only the processed data (peaks) is exported (essentially centroided data).
def to_hdf(self):
450    def to_hdf(self):
451        """Exports the mass spectrum data to an HDF5 file."""
452
453        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
454            self.add_mass_spectrum_to_hdf5(
455                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
456            )

Exports the mass spectrum data to an HDF5 file.

def parameters_to_toml(self):
458    def parameters_to_toml(self):
459        """Converts the mass spectrum parameters to a TOML string.
460
461        Returns
462        -------
463        str
464            The TOML string of the mass spectrum parameters.
465        """
466
467        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
468
469        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
470        dict_setting["analyzer"] = self.mass_spectrum.analyzer
471        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
472        dict_setting["sample_name"] = self.mass_spectrum.sample_name
473
474        output = toml.dumps(dict_setting)
475
476        return output

Converts the mass spectrum parameters to a TOML string.

Returns
  • str: The TOML string of the mass spectrum parameters.
def parameters_to_json(self):
478    def parameters_to_json(self):
479        """Converts the mass spectrum parameters to a JSON string.
480
481        Returns
482        -------
483        str
484            The JSON string of the mass spectrum parameters.
485        """
486
487        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
488
489        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
490        dict_setting["analyzer"] = self.mass_spectrum.analyzer
491        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
492        dict_setting["sample_name"] = self.mass_spectrum.sample_name
493
494        output = json.dumps(dict_setting)
495
496        return output

Converts the mass spectrum parameters to a JSON string.

Returns
  • str: The JSON string of the mass spectrum parameters.
def get_mass_spec_attrs(self, mass_spectrum):
498    def get_mass_spec_attrs(self, mass_spectrum):
499        """Returns the mass spectrum attributes as a dictionary.
500
501        Parameters
502        ----------
503        mass_spectrum : MassSpectrum
504            The mass spectrum to export.
505
506        Returns
507        -------
508        dict
509            The mass spectrum attributes.
510        """
511
512        dict_ms_attrs = {}
513        dict_ms_attrs["polarity"] = mass_spectrum.polarity
514        dict_ms_attrs["rt"] = mass_spectrum.retention_time
515        dict_ms_attrs["tic"] = mass_spectrum.tic
516        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
517        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
518        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
519        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
520        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
521        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
522        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
523
524        return dict_ms_attrs

Returns the mass spectrum attributes as a dictionary.

Parameters
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
Returns
  • dict: The mass spectrum attributes.
def get_all_used_atoms_in_order(self, mass_spectrum):
526    def get_all_used_atoms_in_order(self, mass_spectrum):
527        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
528
529        Parameters
530        ----------
531        mass_spectrum : MassSpectrum
532            The mass spectrum to export.
533
534        Returns
535        -------
536        list
537            The list of assigned atoms in the order specified by Atoms.atoms_order list.
538        """
539
540        atoms_in_order = Atoms.atoms_order
541        all_used_atoms = set()
542        if mass_spectrum:
543            for ms_peak in mass_spectrum:
544                if ms_peak:
545                    for m_formula in ms_peak:
546                        for atom in m_formula.atoms:
547                            all_used_atoms.add(atom)
548
549        def sort_method(atom):
550            return [atoms_in_order.index(atom)]
551
552        return sorted(all_used_atoms, key=sort_method)

Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.

Parameters
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
Returns
  • list: The list of assigned atoms in the order specified by Atoms.atoms_order list.
def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
554    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
555        """Returns the mass spectrum data as a list of dictionaries.
556
557        Parameters
558        ----------
559        mass_spectrum : MassSpectrum
560            The mass spectrum to export.
561        is_hdf5 : bool, optional
562            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
563
564        Returns
565        -------
566        list
567            The mass spectrum data as a list of dictionaries.
568        """
569
570        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
571            mass_spectrum
572        )
573
574        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
575
576        all_lines = []
577        for dict_res in dict_list:
578            result_line = [NaN] * len(column_labels)
579
580            for label, value in dict_res.items():
581                label_index = column_labels.index(label)
582                result_line[label_index] = value
583
584            all_lines.append(result_line)
585
586        return all_lines

Returns the mass spectrum data as a list of dictionaries.

Parameters
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
  • is_hdf5 (bool, optional): Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
Returns
  • list: The mass spectrum data as a list of dictionaries.
def get_list_dict_data( self, mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False, additional_columns=None):
588    def get_list_dict_data(
589        self,
590        mass_spectrum,
591        include_no_match=True,
592        include_isotopologues=True,
593        isotopologue_inline=True,
594        no_match_inline=False,
595        is_hdf5=False,
596        additional_columns=None,
597    ):
598        """Returns the mass spectrum data as a list of dictionaries.
599
600        Parameters
601        ----------
602        mass_spectrum : MassSpectrum
603            The mass spectrum to export.
604        include_no_match : bool, optional
605            Whether to include unassigned (no match) data. Defaults to True.
606        include_isotopologues : bool, optional
607            Whether to include isotopologues. Defaults to True.
608        isotopologue_inline : bool, optional
609            Whether to include isotopologues inline. Defaults to True.
610        no_match_inline : bool, optional
611            Whether to include unassigned (no match) data inline. Defaults to False.
612        is_hdf5 : bool, optional
613            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
614
615        Returns
616        -------
617        list
618            The mass spectrum data as a list of dictionaries.
619        """
620
621        dict_data_list = []
622
623        if is_hdf5:
624            encode = ".encode('utf-8')"
625        else:
626            encode = ""
627
628        def add_no_match_dict_data(index, ms_peak):
629            """
630            Export dictionary of mspeak info for unassigned (no match) data
631            """
632            dict_result = {
633                "Index": index,
634                "m/z": ms_peak._mz_exp,
635                "Calibrated m/z": ms_peak.mz_exp,
636                "Peak Height": ms_peak.abundance,
637                "Peak Area": ms_peak.area,
638                "Resolving Power": ms_peak.resolving_power,
639                "S/N": ms_peak.signal_to_noise,
640                "Ion Charge": ms_peak.ion_charge,
641                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
642            }
643
644            dict_data_list.append(dict_result)
645
646        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
647            """
648            Export dictionary of mspeak info for assigned (match) data
649            """
650            formula_dict = mformula.to_dict()
651
652            dict_result = {
653                "Index": index,
654                "m/z": ms_peak._mz_exp,
655                "Calibrated m/z": ms_peak.mz_exp,
656                "Calculated m/z": mformula.mz_calc,
657                "Peak Height": ms_peak.abundance,
658                "Peak Area": ms_peak.area,
659                "Resolving Power": ms_peak.resolving_power,
660                "S/N": ms_peak.signal_to_noise,
661                "Ion Charge": ms_peak.ion_charge,
662                "m/z Error (ppm)": mformula.mz_error,
663                "Confidence Score": mformula.confidence_score,
664                "Isotopologue Similarity": mformula.isotopologue_similarity,
665                "m/z Error Score": mformula.average_mz_error_score,
666                "DBE": mformula.dbe,
667                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
668                "H/C": mformula.H_C,
669                "O/C": mformula.O_C,
670                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
671                "Is Isotopologue": int(mformula.is_isotopologue),
672                "Molecular Formula": eval("mformula.string{}".format(encode)),
673            }
674            if additional_columns is not None:
675                possible_dict = {
676                    "Aromaticity Index": mformula.A_I,
677                    "NOSC": mformula.nosc,
678                    "Aromaticity Index (modified)": mformula.A_I_mod,
679                }
680                for column in additional_columns:
681                    dict_result[column] = possible_dict.get(column)
682
683            if mformula.adduct_atom:
684                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
685
686            if mformula.is_isotopologue:
687                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
688
689            if self.atoms_order_list is None:
690                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
691            else:
692                atoms_order_list = self.atoms_order_list
693
694            for atom in atoms_order_list:
695                if atom in formula_dict.keys():
696                    dict_result[atom] = formula_dict.get(atom)
697
698            dict_data_list.append(dict_result)
699
700        score_methods = mass_spectrum.molecular_search_settings.score_methods
701        selected_score_method = (
702            mass_spectrum.molecular_search_settings.output_score_method
703        )
704
705        if selected_score_method in score_methods:
706            # temp set score method as the one chosen in the output
707            current_method = mass_spectrum.molecular_search_settings.score_method
708            mass_spectrum.molecular_search_settings.score_method = selected_score_method
709
710            for index, ms_peak in enumerate(mass_spectrum):
711                # print(ms_peak.mz_exp)
712
713                if ms_peak:
714                    m_formula = ms_peak.best_molecular_formula_candidate
715
716                    if m_formula:
717                        if not m_formula.is_isotopologue:
718                            add_match_dict_data(
719                                index,
720                                ms_peak,
721                                m_formula,
722                                additional_columns=additional_columns,
723                            )
724
725                            for (
726                                iso_mspeak_index,
727                                iso_mf_formula,
728                            ) in m_formula.mspeak_mf_isotopologues_indexes:
729                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
730                                add_match_dict_data(
731                                    iso_mspeak_index,
732                                    iso_ms_peak,
733                                    iso_mf_formula,
734                                    additional_columns=additional_columns,
735                                )
736                else:
737                    if include_no_match and no_match_inline:
738                        add_no_match_dict_data(index, ms_peak)
739
740            if include_no_match and not no_match_inline:
741                for index, ms_peak in enumerate(mass_spectrum):
742                    if not ms_peak:
743                        add_no_match_dict_data(index, ms_peak)
744            # reset score method as the one chosen in the output
745            mass_spectrum.molecular_search_settings.score_method = current_method
746
747        else:
748            for index, ms_peak in enumerate(mass_spectrum):
749                # check if there is a molecular formula candidate for the msPeak
750
751                if ms_peak:
752                    # m_formula = ms_peak.molecular_formula_lowest_error
753                    for m_formula in ms_peak:
754                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
755                            if (
756                                m_formula.confidence_score
757                                >= mass_spectrum.molecular_search_settings.output_min_score
758                            ):
759                                if m_formula.is_isotopologue:  # isotopologues inline
760                                    if include_isotopologues and isotopologue_inline:
761                                        add_match_dict_data(
762                                            index,
763                                            ms_peak,
764                                            m_formula,
765                                            additional_columns=additional_columns,
766                                        )
767                                else:
768                                    add_match_dict_data(
769                                        index,
770                                        ms_peak,
771                                        m_formula,
772                                        additional_columns=additional_columns,
773                                    )  # add monoisotopic peak
774
775                            # cutoff because of low score
776                            else:
777                                add_no_match_dict_data(index, ms_peak)
778
779                        else:
780                            if m_formula.is_isotopologue:  # isotopologues inline
781                                if include_isotopologues and isotopologue_inline:
782                                    add_match_dict_data(
783                                        index,
784                                        ms_peak,
785                                        m_formula,
786                                        additional_columns=additional_columns,
787                                    )
788                            else:
789                                add_match_dict_data(
790                                    index,
791                                    ms_peak,
792                                    m_formula,
793                                    additional_columns=additional_columns,
794                                )  # add monoisotopic peak
795                else:
796                    # include not_match
797                    if include_no_match and no_match_inline:
798                        add_no_match_dict_data(index, ms_peak)
799
800            if include_isotopologues and not isotopologue_inline:
801                for index, ms_peak in enumerate(mass_spectrum):
802                    for m_formula in ms_peak:
803                        if m_formula.is_isotopologue:
804                            if (
805                                m_formula.confidence_score
806                                >= mass_spectrum.molecular_search_settings.output_min_score
807                            ):
808                                add_match_dict_data(
809                                    index,
810                                    ms_peak,
811                                    m_formula,
812                                    additional_columns=additional_columns,
813                                )
814
815            if include_no_match and not no_match_inline:
816                for index, ms_peak in enumerate(mass_spectrum):
817                    if not ms_peak:
818                        add_no_match_dict_data(index, ms_peak)
819
820        # remove duplicated add_match data possibly introduced on the output_score_filter step
821        res = []
822        [res.append(x) for x in dict_data_list if x not in res]
823
824        return res

Returns the mass spectrum data as a list of dictionaries.

Parameters
  • mass_spectrum (MassSpectrum): The mass spectrum to export.
  • include_no_match (bool, optional): Whether to include unassigned (no match) data. Defaults to True.
  • include_isotopologues (bool, optional): Whether to include isotopologues. Defaults to True.
  • isotopologue_inline (bool, optional): Whether to include isotopologues inline. Defaults to True.
  • no_match_inline (bool, optional): Whether to include unassigned (no match) data inline. Defaults to False.
  • is_hdf5 (bool, optional): Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
Returns
  • list: The mass spectrum data as a list of dictionaries.
Inherited Members
threading.Thread
start
join
name
ident
is_alive
daemon
isDaemon
setDaemon
getName
setName
native_id