corems.mass_spectrum.input.massList
1__author__ = "Yuri E. Corilo" 2__date__ = "Jun 12, 2019" 3 4import warnings 5 6from corems.encapsulation.constant import Atoms, Labels 7from corems.mass_spectrum.factory.MassSpectrumClasses import ( 8 MassSpecCentroid, 9 MassSpecProfile, 10) 11from corems.mass_spectrum.input.baseClass import MassListBaseClass 12from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula 13 14 15class ReadCoremsMasslist(MassListBaseClass): 16 """ 17 The ReadCoremsMasslist object reads processed mass list data types 18 and returns the mass spectrum obj with the molecular formula obj 19 20 **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid** 21 Please see MassListBaseClass for more details 22 23 """ 24 25 def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid: 26 """ 27 Get the mass spectrum object from the processed mass list data. 28 29 Parameters 30 ---------- 31 loadSettings : bool, optional 32 Whether to load the settings for the mass spectrum. Default is True. 33 34 Returns 35 ------- 36 MassSpecCentroid 37 The mass spectrum object. 38 39 Raises 40 ------ 41 ValueError 42 If the input file is not a valid CoreMS file. 43 """ 44 45 dataframe = self.get_dataframe() 46 47 if not set( 48 ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"] 49 ).issubset(dataframe.columns): 50 raise ValueError( 51 "%s it is not a valid CoreMS file" % str(self.file_location) 52 ) 53 54 self.check_columns(dataframe.columns) 55 56 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 57 58 polarity = dataframe["Ion Charge"].values[0] 59 60 output_parameters = self.get_output_parameters(polarity) 61 62 mass_spec_obj = MassSpecCentroid( 63 dataframe.to_dict(orient="list"), output_parameters 64 ) 65 66 if loadSettings is True: 67 self.load_settings(mass_spec_obj, output_parameters) 68 69 self.add_molecular_formula(mass_spec_obj, dataframe) 70 71 return mass_spec_obj 72 73 def add_molecular_formula(self, mass_spec_obj, dataframe): 74 """ 75 Add molecular formula information to the mass spectrum object. 76 77 Parameters 78 ---------- 79 mass_spec_obj : MassSpecCentroid 80 The mass spectrum object to add the molecular formula to. 81 dataframe : pandas.DataFrame 82 The processed mass list data. 83 84 """ 85 86 # check if is coreMS file 87 if "Is Isotopologue" in dataframe: 88 # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp) 89 dataframe = dataframe.reset_index(drop=True) 90 91 mz_exp_df = dataframe[Labels.mz].astype(float) 92 formula_df = dataframe[ 93 dataframe.columns.intersection(Atoms.atoms_order) 94 ].copy() 95 formula_df.fillna(0, inplace=True) 96 formula_df.replace(b"nan", 0, inplace=True) 97 98 ion_type_df = dataframe["Ion Type"] 99 ion_charge_df = dataframe["Ion Charge"] 100 is_isotopologue_df = dataframe["Is Isotopologue"] 101 if "Adduct" in dataframe: 102 adduct_df = dataframe["Adduct"] 103 else: 104 adduct_df = None 105 106 mass_spec_mz_exp_list = mass_spec_obj.mz_exp 107 108 for df_index, mz_exp in enumerate(mz_exp_df): 109 bad_mf = False 110 counts = 0 111 112 ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp)) 113 114 if "Is Isotopologue" in dataframe: 115 atoms = list(formula_df.columns.astype(str)) 116 counts = list(formula_df.iloc[df_index].astype(int)) 117 118 formula_dict = dict(zip(atoms, counts)) 119 120 # Drop any atoms with 0 counts 121 formula_dict = { 122 atom: formula_dict[atom] 123 for atom in formula_dict 124 if formula_dict[atom] > 0 125 } 126 127 if sum(counts) > 0: 128 ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index])) 129 if adduct_df is not None: 130 adduct_atom = str(adduct_df[df_index]) 131 if adduct_atom == "None": 132 adduct_atom = None 133 else: 134 adduct_atom = None 135 136 # If not isotopologue, cast as MolecularFormula 137 if not bool(int(is_isotopologue_df[df_index])): 138 mfobj = MolecularFormula( 139 formula_dict, 140 int(ion_charge_df[df_index]), 141 mspeak_parent=mass_spec_obj[ms_peak_index], 142 ion_type=ion_type, 143 adduct_atom=adduct_atom, 144 ) 145 146 # if is isotopologue, recast as MolecularFormulaIsotopologue 147 if bool(int(is_isotopologue_df[df_index])): 148 # First make a MolecularFormula object for the parent so we can get probabilities etc 149 formula_list_parent = {} 150 for atom in formula_dict: 151 if atom in Atoms.isotopes.keys(): 152 formula_list_parent[atom] = formula_dict[atom] 153 else: 154 # remove any numbers from the atom name to cast as a mono-isotopic atom 155 atom_mono = atom.strip("0123456789") 156 if ( 157 atom_mono in Atoms.isotopes.keys() 158 and atom_mono in formula_list_parent.keys() 159 ): 160 formula_list_parent[atom_mono] = ( 161 formula_list_parent[atom_mono] + formula_dict[atom] 162 ) 163 elif atom_mono in Atoms.isotopes.keys(): 164 formula_list_parent[atom_mono] = formula_dict[atom] 165 else: 166 warnings.warn(f"Atom {atom} not in Atoms.atoms_order") 167 mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"]) 168 mono_mfobj = MolecularFormula( 169 formula_list_parent, 170 int(ion_charge_df[df_index]), 171 mspeak_parent=mass_spec_obj[mono_index], 172 ion_type=ion_type, 173 adduct_atom=adduct_atom, 174 ) 175 176 # Next, generate isotopologues from the parent 177 isos = list( 178 mono_mfobj.isotopologues( 179 min_abundance=mass_spec_obj.abundance.min()*0.01, 180 current_mono_abundance=mass_spec_obj[mono_index].abundance, 181 dynamic_range=mass_spec_obj.dynamic_range, 182 ) 183 ) 184 185 # Finally, find the isotopologue that matches the formula_dict 186 matched_isos = [] 187 for iso in isos: 188 # If match was already found, exit the loop 189 if len(matched_isos) > 0: 190 break 191 else: 192 # Check the atoms match 193 if set(iso.atoms) == set(formula_dict.keys()): 194 # Check the values of the atoms match 195 if all( 196 [ 197 iso[atom] == formula_dict[atom] 198 for atom in formula_dict 199 ] 200 ): 201 matched_isos.append(iso) 202 203 if len(matched_isos) == 0: 204 #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190 205 warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}") 206 bad_mf = True 207 else: 208 bad_mf = False 209 mfobj = matched_isos[0] 210 211 # Add the mono isotopic index, confidence score and isotopologue similarity 212 mfobj.mspeak_index_mono_isotopic = int( 213 dataframe.iloc[df_index]["Mono Isotopic Index"] 214 ) 215 if not bad_mf: 216 # Add the confidence score and isotopologue similarity and average MZ error score 217 if "m/z Error Score" in dataframe: 218 mfobj._mass_error_average_score = float( 219 dataframe.iloc[df_index]["m/z Error Score"] 220 ) 221 if "Confidence Score" in dataframe: 222 mfobj._confidence_score = float( 223 dataframe.iloc[df_index]["Confidence Score"] 224 ) 225 if "Isotopologue Similarity" in dataframe: 226 mfobj._isotopologue_similarity = float( 227 dataframe.iloc[df_index]["Isotopologue Similarity"] 228 ) 229 mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj) 230 231 232class ReadMassList(MassListBaseClass): 233 """ 234 The ReadMassList object reads unprocessed mass list data types 235 and returns the mass spectrum object. 236 237 Parameters 238 ---------- 239 MassListBaseClass : class 240 The base class for reading mass list data types. 241 242 Methods 243 ------- 244 * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object. 245 246 """ 247 248 def get_mass_spectrum( 249 self, 250 polarity: int, 251 scan: int = 0, 252 auto_process: bool = True, 253 loadSettings: bool = True, 254 ): 255 """ 256 Reads mass list data types and returns the mass spectrum object. 257 258 Parameters 259 ---------- 260 polarity : int 261 The polarity of the mass spectrum (+1 or -1). 262 scan : int, optional 263 The scan number of the mass spectrum (default is 0). 264 auto_process : bool, optional 265 Flag indicating whether to automatically process the mass spectrum (default is True). 266 loadSettings : bool, optional 267 Flag indicating whether to load settings for the mass spectrum (default is True). 268 269 Returns 270 ------- 271 mass_spec : MassSpecCentroid or MassSpecProfile 272 The mass spectrum object. 273 274 """ 275 276 # delimiter = " " or " " or "," or "\t" etc 277 278 if self.isCentroid: 279 dataframe = self.get_dataframe() 280 281 self.check_columns(dataframe.columns) 282 283 self.clean_data_frame(dataframe) 284 285 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 286 287 output_parameters = self.get_output_parameters(polarity) 288 289 mass_spec = MassSpecCentroid( 290 dataframe.to_dict(orient="list"), 291 output_parameters, 292 auto_process=auto_process, 293 ) 294 295 if loadSettings: 296 self.load_settings(mass_spec, output_parameters) 297 298 return mass_spec 299 300 else: 301 dataframe = self.get_dataframe() 302 303 self.check_columns(dataframe.columns) 304 305 output_parameters = self.get_output_parameters(polarity) 306 307 self.clean_data_frame(dataframe) 308 309 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 310 311 mass_spec = MassSpecProfile( 312 dataframe.to_dict(orient="list"), 313 output_parameters, 314 auto_process=auto_process, 315 ) 316 317 if loadSettings: 318 self.load_settings(mass_spec, output_parameters) 319 320 return mass_spec 321 322 323class ReadBrukerXMLList(MassListBaseClass): 324 """ 325 The ReadBrukerXMLList object reads Bruker XML objects 326 and returns the mass spectrum object. 327 See MassListBaseClass for details 328 329 Parameters 330 ---------- 331 MassListBaseClass : class 332 The base class for reading mass list data types and returning the mass spectrum object. 333 334 Methods 335 ------- 336 * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object. 337 338 """ 339 340 def get_mass_spectrum( 341 self, 342 polarity: bool = None, 343 scan: int = 0, 344 auto_process: bool = True, 345 loadSettings: bool = True, 346 ): 347 """ 348 Reads mass list data types and returns the mass spectrum object. 349 350 Parameters 351 ---------- 352 polarity : bool, optional 353 The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file. 354 scan : int, optional 355 The scan number of the mass spectrum. Default is 0. 356 auto_process : bool, optional 357 Whether to automatically process the mass spectrum. Default is True. 358 loadSettings : bool, optional 359 Whether to load the settings for the mass spectrum. Default is True. 360 361 Returns 362 ------- 363 mass_spec : MassSpecCentroid 364 The mass spectrum object representing the centroided mass spectrum. 365 """ 366 # delimiter = " " or " " or "," or "\t" etc 367 368 if polarity == None: 369 polarity = self.get_xml_polarity() 370 dataframe = self.get_dataframe() 371 372 self.check_columns(dataframe.columns) 373 374 self.clean_data_frame(dataframe) 375 376 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 377 378 output_parameters = self.get_output_parameters(polarity) 379 380 mass_spec = MassSpecCentroid( 381 dataframe.to_dict(orient="list"), 382 output_parameters, 383 auto_process=auto_process, 384 ) 385 386 if loadSettings: 387 self.load_settings(mass_spec, output_parameters) 388 389 return mass_spec
16class ReadCoremsMasslist(MassListBaseClass): 17 """ 18 The ReadCoremsMasslist object reads processed mass list data types 19 and returns the mass spectrum obj with the molecular formula obj 20 21 **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid** 22 Please see MassListBaseClass for more details 23 24 """ 25 26 def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid: 27 """ 28 Get the mass spectrum object from the processed mass list data. 29 30 Parameters 31 ---------- 32 loadSettings : bool, optional 33 Whether to load the settings for the mass spectrum. Default is True. 34 35 Returns 36 ------- 37 MassSpecCentroid 38 The mass spectrum object. 39 40 Raises 41 ------ 42 ValueError 43 If the input file is not a valid CoreMS file. 44 """ 45 46 dataframe = self.get_dataframe() 47 48 if not set( 49 ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"] 50 ).issubset(dataframe.columns): 51 raise ValueError( 52 "%s it is not a valid CoreMS file" % str(self.file_location) 53 ) 54 55 self.check_columns(dataframe.columns) 56 57 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 58 59 polarity = dataframe["Ion Charge"].values[0] 60 61 output_parameters = self.get_output_parameters(polarity) 62 63 mass_spec_obj = MassSpecCentroid( 64 dataframe.to_dict(orient="list"), output_parameters 65 ) 66 67 if loadSettings is True: 68 self.load_settings(mass_spec_obj, output_parameters) 69 70 self.add_molecular_formula(mass_spec_obj, dataframe) 71 72 return mass_spec_obj 73 74 def add_molecular_formula(self, mass_spec_obj, dataframe): 75 """ 76 Add molecular formula information to the mass spectrum object. 77 78 Parameters 79 ---------- 80 mass_spec_obj : MassSpecCentroid 81 The mass spectrum object to add the molecular formula to. 82 dataframe : pandas.DataFrame 83 The processed mass list data. 84 85 """ 86 87 # check if is coreMS file 88 if "Is Isotopologue" in dataframe: 89 # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp) 90 dataframe = dataframe.reset_index(drop=True) 91 92 mz_exp_df = dataframe[Labels.mz].astype(float) 93 formula_df = dataframe[ 94 dataframe.columns.intersection(Atoms.atoms_order) 95 ].copy() 96 formula_df.fillna(0, inplace=True) 97 formula_df.replace(b"nan", 0, inplace=True) 98 99 ion_type_df = dataframe["Ion Type"] 100 ion_charge_df = dataframe["Ion Charge"] 101 is_isotopologue_df = dataframe["Is Isotopologue"] 102 if "Adduct" in dataframe: 103 adduct_df = dataframe["Adduct"] 104 else: 105 adduct_df = None 106 107 mass_spec_mz_exp_list = mass_spec_obj.mz_exp 108 109 for df_index, mz_exp in enumerate(mz_exp_df): 110 bad_mf = False 111 counts = 0 112 113 ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp)) 114 115 if "Is Isotopologue" in dataframe: 116 atoms = list(formula_df.columns.astype(str)) 117 counts = list(formula_df.iloc[df_index].astype(int)) 118 119 formula_dict = dict(zip(atoms, counts)) 120 121 # Drop any atoms with 0 counts 122 formula_dict = { 123 atom: formula_dict[atom] 124 for atom in formula_dict 125 if formula_dict[atom] > 0 126 } 127 128 if sum(counts) > 0: 129 ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index])) 130 if adduct_df is not None: 131 adduct_atom = str(adduct_df[df_index]) 132 if adduct_atom == "None": 133 adduct_atom = None 134 else: 135 adduct_atom = None 136 137 # If not isotopologue, cast as MolecularFormula 138 if not bool(int(is_isotopologue_df[df_index])): 139 mfobj = MolecularFormula( 140 formula_dict, 141 int(ion_charge_df[df_index]), 142 mspeak_parent=mass_spec_obj[ms_peak_index], 143 ion_type=ion_type, 144 adduct_atom=adduct_atom, 145 ) 146 147 # if is isotopologue, recast as MolecularFormulaIsotopologue 148 if bool(int(is_isotopologue_df[df_index])): 149 # First make a MolecularFormula object for the parent so we can get probabilities etc 150 formula_list_parent = {} 151 for atom in formula_dict: 152 if atom in Atoms.isotopes.keys(): 153 formula_list_parent[atom] = formula_dict[atom] 154 else: 155 # remove any numbers from the atom name to cast as a mono-isotopic atom 156 atom_mono = atom.strip("0123456789") 157 if ( 158 atom_mono in Atoms.isotopes.keys() 159 and atom_mono in formula_list_parent.keys() 160 ): 161 formula_list_parent[atom_mono] = ( 162 formula_list_parent[atom_mono] + formula_dict[atom] 163 ) 164 elif atom_mono in Atoms.isotopes.keys(): 165 formula_list_parent[atom_mono] = formula_dict[atom] 166 else: 167 warnings.warn(f"Atom {atom} not in Atoms.atoms_order") 168 mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"]) 169 mono_mfobj = MolecularFormula( 170 formula_list_parent, 171 int(ion_charge_df[df_index]), 172 mspeak_parent=mass_spec_obj[mono_index], 173 ion_type=ion_type, 174 adduct_atom=adduct_atom, 175 ) 176 177 # Next, generate isotopologues from the parent 178 isos = list( 179 mono_mfobj.isotopologues( 180 min_abundance=mass_spec_obj.abundance.min()*0.01, 181 current_mono_abundance=mass_spec_obj[mono_index].abundance, 182 dynamic_range=mass_spec_obj.dynamic_range, 183 ) 184 ) 185 186 # Finally, find the isotopologue that matches the formula_dict 187 matched_isos = [] 188 for iso in isos: 189 # If match was already found, exit the loop 190 if len(matched_isos) > 0: 191 break 192 else: 193 # Check the atoms match 194 if set(iso.atoms) == set(formula_dict.keys()): 195 # Check the values of the atoms match 196 if all( 197 [ 198 iso[atom] == formula_dict[atom] 199 for atom in formula_dict 200 ] 201 ): 202 matched_isos.append(iso) 203 204 if len(matched_isos) == 0: 205 #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190 206 warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}") 207 bad_mf = True 208 else: 209 bad_mf = False 210 mfobj = matched_isos[0] 211 212 # Add the mono isotopic index, confidence score and isotopologue similarity 213 mfobj.mspeak_index_mono_isotopic = int( 214 dataframe.iloc[df_index]["Mono Isotopic Index"] 215 ) 216 if not bad_mf: 217 # Add the confidence score and isotopologue similarity and average MZ error score 218 if "m/z Error Score" in dataframe: 219 mfobj._mass_error_average_score = float( 220 dataframe.iloc[df_index]["m/z Error Score"] 221 ) 222 if "Confidence Score" in dataframe: 223 mfobj._confidence_score = float( 224 dataframe.iloc[df_index]["Confidence Score"] 225 ) 226 if "Isotopologue Similarity" in dataframe: 227 mfobj._isotopologue_similarity = float( 228 dataframe.iloc[df_index]["Isotopologue Similarity"] 229 ) 230 mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
The ReadCoremsMasslist object reads processed mass list data types and returns the mass spectrum obj with the molecular formula obj
Only available for centroid mass spectrum type: it will ignore the parameter isCentroid Please see MassListBaseClass for more details
def
get_mass_spectrum( self, loadSettings: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid:
26 def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid: 27 """ 28 Get the mass spectrum object from the processed mass list data. 29 30 Parameters 31 ---------- 32 loadSettings : bool, optional 33 Whether to load the settings for the mass spectrum. Default is True. 34 35 Returns 36 ------- 37 MassSpecCentroid 38 The mass spectrum object. 39 40 Raises 41 ------ 42 ValueError 43 If the input file is not a valid CoreMS file. 44 """ 45 46 dataframe = self.get_dataframe() 47 48 if not set( 49 ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"] 50 ).issubset(dataframe.columns): 51 raise ValueError( 52 "%s it is not a valid CoreMS file" % str(self.file_location) 53 ) 54 55 self.check_columns(dataframe.columns) 56 57 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 58 59 polarity = dataframe["Ion Charge"].values[0] 60 61 output_parameters = self.get_output_parameters(polarity) 62 63 mass_spec_obj = MassSpecCentroid( 64 dataframe.to_dict(orient="list"), output_parameters 65 ) 66 67 if loadSettings is True: 68 self.load_settings(mass_spec_obj, output_parameters) 69 70 self.add_molecular_formula(mass_spec_obj, dataframe) 71 72 return mass_spec_obj
Get the mass spectrum object from the processed mass list data.
Parameters
- loadSettings (bool, optional): Whether to load the settings for the mass spectrum. Default is True.
Returns
- MassSpecCentroid: The mass spectrum object.
Raises
- ValueError: If the input file is not a valid CoreMS file.
def
add_molecular_formula(self, mass_spec_obj, dataframe):
74 def add_molecular_formula(self, mass_spec_obj, dataframe): 75 """ 76 Add molecular formula information to the mass spectrum object. 77 78 Parameters 79 ---------- 80 mass_spec_obj : MassSpecCentroid 81 The mass spectrum object to add the molecular formula to. 82 dataframe : pandas.DataFrame 83 The processed mass list data. 84 85 """ 86 87 # check if is coreMS file 88 if "Is Isotopologue" in dataframe: 89 # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp) 90 dataframe = dataframe.reset_index(drop=True) 91 92 mz_exp_df = dataframe[Labels.mz].astype(float) 93 formula_df = dataframe[ 94 dataframe.columns.intersection(Atoms.atoms_order) 95 ].copy() 96 formula_df.fillna(0, inplace=True) 97 formula_df.replace(b"nan", 0, inplace=True) 98 99 ion_type_df = dataframe["Ion Type"] 100 ion_charge_df = dataframe["Ion Charge"] 101 is_isotopologue_df = dataframe["Is Isotopologue"] 102 if "Adduct" in dataframe: 103 adduct_df = dataframe["Adduct"] 104 else: 105 adduct_df = None 106 107 mass_spec_mz_exp_list = mass_spec_obj.mz_exp 108 109 for df_index, mz_exp in enumerate(mz_exp_df): 110 bad_mf = False 111 counts = 0 112 113 ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp)) 114 115 if "Is Isotopologue" in dataframe: 116 atoms = list(formula_df.columns.astype(str)) 117 counts = list(formula_df.iloc[df_index].astype(int)) 118 119 formula_dict = dict(zip(atoms, counts)) 120 121 # Drop any atoms with 0 counts 122 formula_dict = { 123 atom: formula_dict[atom] 124 for atom in formula_dict 125 if formula_dict[atom] > 0 126 } 127 128 if sum(counts) > 0: 129 ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index])) 130 if adduct_df is not None: 131 adduct_atom = str(adduct_df[df_index]) 132 if adduct_atom == "None": 133 adduct_atom = None 134 else: 135 adduct_atom = None 136 137 # If not isotopologue, cast as MolecularFormula 138 if not bool(int(is_isotopologue_df[df_index])): 139 mfobj = MolecularFormula( 140 formula_dict, 141 int(ion_charge_df[df_index]), 142 mspeak_parent=mass_spec_obj[ms_peak_index], 143 ion_type=ion_type, 144 adduct_atom=adduct_atom, 145 ) 146 147 # if is isotopologue, recast as MolecularFormulaIsotopologue 148 if bool(int(is_isotopologue_df[df_index])): 149 # First make a MolecularFormula object for the parent so we can get probabilities etc 150 formula_list_parent = {} 151 for atom in formula_dict: 152 if atom in Atoms.isotopes.keys(): 153 formula_list_parent[atom] = formula_dict[atom] 154 else: 155 # remove any numbers from the atom name to cast as a mono-isotopic atom 156 atom_mono = atom.strip("0123456789") 157 if ( 158 atom_mono in Atoms.isotopes.keys() 159 and atom_mono in formula_list_parent.keys() 160 ): 161 formula_list_parent[atom_mono] = ( 162 formula_list_parent[atom_mono] + formula_dict[atom] 163 ) 164 elif atom_mono in Atoms.isotopes.keys(): 165 formula_list_parent[atom_mono] = formula_dict[atom] 166 else: 167 warnings.warn(f"Atom {atom} not in Atoms.atoms_order") 168 mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"]) 169 mono_mfobj = MolecularFormula( 170 formula_list_parent, 171 int(ion_charge_df[df_index]), 172 mspeak_parent=mass_spec_obj[mono_index], 173 ion_type=ion_type, 174 adduct_atom=adduct_atom, 175 ) 176 177 # Next, generate isotopologues from the parent 178 isos = list( 179 mono_mfobj.isotopologues( 180 min_abundance=mass_spec_obj.abundance.min()*0.01, 181 current_mono_abundance=mass_spec_obj[mono_index].abundance, 182 dynamic_range=mass_spec_obj.dynamic_range, 183 ) 184 ) 185 186 # Finally, find the isotopologue that matches the formula_dict 187 matched_isos = [] 188 for iso in isos: 189 # If match was already found, exit the loop 190 if len(matched_isos) > 0: 191 break 192 else: 193 # Check the atoms match 194 if set(iso.atoms) == set(formula_dict.keys()): 195 # Check the values of the atoms match 196 if all( 197 [ 198 iso[atom] == formula_dict[atom] 199 for atom in formula_dict 200 ] 201 ): 202 matched_isos.append(iso) 203 204 if len(matched_isos) == 0: 205 #FIXME: This should not occur see https://code.emsl.pnl.gov/mass-spectrometry/corems/-/issues/190 206 warnings.warn(f"No isotopologue matched the formula_dict: {formula_dict}") 207 bad_mf = True 208 else: 209 bad_mf = False 210 mfobj = matched_isos[0] 211 212 # Add the mono isotopic index, confidence score and isotopologue similarity 213 mfobj.mspeak_index_mono_isotopic = int( 214 dataframe.iloc[df_index]["Mono Isotopic Index"] 215 ) 216 if not bad_mf: 217 # Add the confidence score and isotopologue similarity and average MZ error score 218 if "m/z Error Score" in dataframe: 219 mfobj._mass_error_average_score = float( 220 dataframe.iloc[df_index]["m/z Error Score"] 221 ) 222 if "Confidence Score" in dataframe: 223 mfobj._confidence_score = float( 224 dataframe.iloc[df_index]["Confidence Score"] 225 ) 226 if "Isotopologue Similarity" in dataframe: 227 mfobj._isotopologue_similarity = float( 228 dataframe.iloc[df_index]["Isotopologue Similarity"] 229 ) 230 mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
Add molecular formula information to the mass spectrum object.
Parameters
- mass_spec_obj (MassSpecCentroid): The mass spectrum object to add the molecular formula to.
- dataframe (pandas.DataFrame): The processed mass list data.
Inherited Members
- corems.mass_spectrum.input.baseClass.MassListBaseClass
- MassListBaseClass
- file_location
- header_lines
- isCentroid
- isThermoProfile
- headerless
- analyzer
- instrument_label
- sample_name
- parameters
- set_parameter_from_toml
- set_parameter_from_json
- data_type
- delimiter
- encoding_detector
- set_data_type
- get_dataframe
- load_settings
- get_output_parameters
- clean_data_frame
- check_columns
- read_xml_peaks
- get_xml_polarity
233class ReadMassList(MassListBaseClass): 234 """ 235 The ReadMassList object reads unprocessed mass list data types 236 and returns the mass spectrum object. 237 238 Parameters 239 ---------- 240 MassListBaseClass : class 241 The base class for reading mass list data types. 242 243 Methods 244 ------- 245 * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object. 246 247 """ 248 249 def get_mass_spectrum( 250 self, 251 polarity: int, 252 scan: int = 0, 253 auto_process: bool = True, 254 loadSettings: bool = True, 255 ): 256 """ 257 Reads mass list data types and returns the mass spectrum object. 258 259 Parameters 260 ---------- 261 polarity : int 262 The polarity of the mass spectrum (+1 or -1). 263 scan : int, optional 264 The scan number of the mass spectrum (default is 0). 265 auto_process : bool, optional 266 Flag indicating whether to automatically process the mass spectrum (default is True). 267 loadSettings : bool, optional 268 Flag indicating whether to load settings for the mass spectrum (default is True). 269 270 Returns 271 ------- 272 mass_spec : MassSpecCentroid or MassSpecProfile 273 The mass spectrum object. 274 275 """ 276 277 # delimiter = " " or " " or "," or "\t" etc 278 279 if self.isCentroid: 280 dataframe = self.get_dataframe() 281 282 self.check_columns(dataframe.columns) 283 284 self.clean_data_frame(dataframe) 285 286 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 287 288 output_parameters = self.get_output_parameters(polarity) 289 290 mass_spec = MassSpecCentroid( 291 dataframe.to_dict(orient="list"), 292 output_parameters, 293 auto_process=auto_process, 294 ) 295 296 if loadSettings: 297 self.load_settings(mass_spec, output_parameters) 298 299 return mass_spec 300 301 else: 302 dataframe = self.get_dataframe() 303 304 self.check_columns(dataframe.columns) 305 306 output_parameters = self.get_output_parameters(polarity) 307 308 self.clean_data_frame(dataframe) 309 310 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 311 312 mass_spec = MassSpecProfile( 313 dataframe.to_dict(orient="list"), 314 output_parameters, 315 auto_process=auto_process, 316 ) 317 318 if loadSettings: 319 self.load_settings(mass_spec, output_parameters) 320 321 return mass_spec
The ReadMassList object reads unprocessed mass list data types and returns the mass spectrum object.
Parameters
- MassListBaseClass (class): The base class for reading mass list data types.
Methods
- get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
def
get_mass_spectrum( self, polarity: int, scan: int = 0, auto_process: bool = True, loadSettings: bool = True):
249 def get_mass_spectrum( 250 self, 251 polarity: int, 252 scan: int = 0, 253 auto_process: bool = True, 254 loadSettings: bool = True, 255 ): 256 """ 257 Reads mass list data types and returns the mass spectrum object. 258 259 Parameters 260 ---------- 261 polarity : int 262 The polarity of the mass spectrum (+1 or -1). 263 scan : int, optional 264 The scan number of the mass spectrum (default is 0). 265 auto_process : bool, optional 266 Flag indicating whether to automatically process the mass spectrum (default is True). 267 loadSettings : bool, optional 268 Flag indicating whether to load settings for the mass spectrum (default is True). 269 270 Returns 271 ------- 272 mass_spec : MassSpecCentroid or MassSpecProfile 273 The mass spectrum object. 274 275 """ 276 277 # delimiter = " " or " " or "," or "\t" etc 278 279 if self.isCentroid: 280 dataframe = self.get_dataframe() 281 282 self.check_columns(dataframe.columns) 283 284 self.clean_data_frame(dataframe) 285 286 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 287 288 output_parameters = self.get_output_parameters(polarity) 289 290 mass_spec = MassSpecCentroid( 291 dataframe.to_dict(orient="list"), 292 output_parameters, 293 auto_process=auto_process, 294 ) 295 296 if loadSettings: 297 self.load_settings(mass_spec, output_parameters) 298 299 return mass_spec 300 301 else: 302 dataframe = self.get_dataframe() 303 304 self.check_columns(dataframe.columns) 305 306 output_parameters = self.get_output_parameters(polarity) 307 308 self.clean_data_frame(dataframe) 309 310 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 311 312 mass_spec = MassSpecProfile( 313 dataframe.to_dict(orient="list"), 314 output_parameters, 315 auto_process=auto_process, 316 ) 317 318 if loadSettings: 319 self.load_settings(mass_spec, output_parameters) 320 321 return mass_spec
Reads mass list data types and returns the mass spectrum object.
Parameters
- polarity (int): The polarity of the mass spectrum (+1 or -1).
- scan (int, optional): The scan number of the mass spectrum (default is 0).
- auto_process (bool, optional): Flag indicating whether to automatically process the mass spectrum (default is True).
- loadSettings (bool, optional): Flag indicating whether to load settings for the mass spectrum (default is True).
Returns
- mass_spec (MassSpecCentroid or MassSpecProfile): The mass spectrum object.
Inherited Members
- corems.mass_spectrum.input.baseClass.MassListBaseClass
- MassListBaseClass
- file_location
- header_lines
- isCentroid
- isThermoProfile
- headerless
- analyzer
- instrument_label
- sample_name
- parameters
- set_parameter_from_toml
- set_parameter_from_json
- data_type
- delimiter
- encoding_detector
- set_data_type
- get_dataframe
- load_settings
- get_output_parameters
- clean_data_frame
- check_columns
- read_xml_peaks
- get_xml_polarity
324class ReadBrukerXMLList(MassListBaseClass): 325 """ 326 The ReadBrukerXMLList object reads Bruker XML objects 327 and returns the mass spectrum object. 328 See MassListBaseClass for details 329 330 Parameters 331 ---------- 332 MassListBaseClass : class 333 The base class for reading mass list data types and returning the mass spectrum object. 334 335 Methods 336 ------- 337 * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object. 338 339 """ 340 341 def get_mass_spectrum( 342 self, 343 polarity: bool = None, 344 scan: int = 0, 345 auto_process: bool = True, 346 loadSettings: bool = True, 347 ): 348 """ 349 Reads mass list data types and returns the mass spectrum object. 350 351 Parameters 352 ---------- 353 polarity : bool, optional 354 The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file. 355 scan : int, optional 356 The scan number of the mass spectrum. Default is 0. 357 auto_process : bool, optional 358 Whether to automatically process the mass spectrum. Default is True. 359 loadSettings : bool, optional 360 Whether to load the settings for the mass spectrum. Default is True. 361 362 Returns 363 ------- 364 mass_spec : MassSpecCentroid 365 The mass spectrum object representing the centroided mass spectrum. 366 """ 367 # delimiter = " " or " " or "," or "\t" etc 368 369 if polarity == None: 370 polarity = self.get_xml_polarity() 371 dataframe = self.get_dataframe() 372 373 self.check_columns(dataframe.columns) 374 375 self.clean_data_frame(dataframe) 376 377 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 378 379 output_parameters = self.get_output_parameters(polarity) 380 381 mass_spec = MassSpecCentroid( 382 dataframe.to_dict(orient="list"), 383 output_parameters, 384 auto_process=auto_process, 385 ) 386 387 if loadSettings: 388 self.load_settings(mass_spec, output_parameters) 389 390 return mass_spec
The ReadBrukerXMLList object reads Bruker XML objects and returns the mass spectrum object. See MassListBaseClass for details
Parameters
- MassListBaseClass (class): The base class for reading mass list data types and returning the mass spectrum object.
Methods
- get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
def
get_mass_spectrum( self, polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True):
341 def get_mass_spectrum( 342 self, 343 polarity: bool = None, 344 scan: int = 0, 345 auto_process: bool = True, 346 loadSettings: bool = True, 347 ): 348 """ 349 Reads mass list data types and returns the mass spectrum object. 350 351 Parameters 352 ---------- 353 polarity : bool, optional 354 The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file. 355 scan : int, optional 356 The scan number of the mass spectrum. Default is 0. 357 auto_process : bool, optional 358 Whether to automatically process the mass spectrum. Default is True. 359 loadSettings : bool, optional 360 Whether to load the settings for the mass spectrum. Default is True. 361 362 Returns 363 ------- 364 mass_spec : MassSpecCentroid 365 The mass spectrum object representing the centroided mass spectrum. 366 """ 367 # delimiter = " " or " " or "," or "\t" etc 368 369 if polarity == None: 370 polarity = self.get_xml_polarity() 371 dataframe = self.get_dataframe() 372 373 self.check_columns(dataframe.columns) 374 375 self.clean_data_frame(dataframe) 376 377 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 378 379 output_parameters = self.get_output_parameters(polarity) 380 381 mass_spec = MassSpecCentroid( 382 dataframe.to_dict(orient="list"), 383 output_parameters, 384 auto_process=auto_process, 385 ) 386 387 if loadSettings: 388 self.load_settings(mass_spec, output_parameters) 389 390 return mass_spec
Reads mass list data types and returns the mass spectrum object.
Parameters
- polarity (bool, optional): The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
- scan (int, optional): The scan number of the mass spectrum. Default is 0.
- auto_process (bool, optional): Whether to automatically process the mass spectrum. Default is True.
- loadSettings (bool, optional): Whether to load the settings for the mass spectrum. Default is True.
Returns
- mass_spec (MassSpecCentroid): The mass spectrum object representing the centroided mass spectrum.
Inherited Members
- corems.mass_spectrum.input.baseClass.MassListBaseClass
- MassListBaseClass
- file_location
- header_lines
- isCentroid
- isThermoProfile
- headerless
- analyzer
- instrument_label
- sample_name
- parameters
- set_parameter_from_toml
- set_parameter_from_json
- data_type
- delimiter
- encoding_detector
- set_data_type
- get_dataframe
- load_settings
- get_output_parameters
- clean_data_frame
- check_columns
- read_xml_peaks
- get_xml_polarity