corems.mass_spectrum.calc.KendrickGroup

View Source

  1class KendrickGrouping:
  2    """Class for Kendrick grouping of mass spectra.
  3
  4    Methods
  5    -------
  6    * mz_odd_even_index_lists().
  7        Get odd and even indexes lists.
  8    * calc_error(current, test).
  9        Calculate the error between two values.
 10    * populate_kendrick_index_dict_error(list_indexes, sort=True).
 11        Populate the Kendrick index dictionary based on error.
 12    * populate_kendrick_index_dict_rounding(list_indexes, sort=True).
 13        Populate the Kendrick index dictionary based on rounding.
 14    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index).
 15        Sort the Kendrick index dictionary based on abundance.
 16    * kendrick_groups_indexes(sort=True).
 17        Get the Kendrick groups indexes dictionary.
 18
 19    """
 20
 21    def mz_odd_even_index_lists(self):
 22        """Get odd and even indexes lists.
 23
 24        Returns
 25        -------
 26        tuple
 27            A tuple containing the lists of even and odd indexes.
 28
 29        """
 30        even_idx = []
 31        odd_idx = []
 32
 33        for i, mspeak in enumerate(self.mspeaks):
 34            if mspeak.nominal_mz_exp % 2 == 0:
 35                even_idx.append(i)
 36            else:
 37                odd_idx.append(i)
 38
 39        return even_idx, odd_idx
 40
 41    def calc_error(self, current: float, test: float):
 42        """Calculate the error between two values.
 43
 44        Parameters
 45        ----------
 46        current : float
 47            The current value.
 48        test : float
 49            The test value.
 50
 51        Returns
 52        -------
 53        float
 54            The calculated error.
 55
 56        """
 57        return ((test - current) / current) * 1e6
 58
 59    def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True):
 60        """Populate the Kendrick index dictionary based on error.
 61
 62        Parameters
 63        ----------
 64        list_indexes : list
 65            The list of indexes.
 66        sort : bool, optional
 67            Whether to sort the dictionary by abundance (default is True).
 68
 69        Returns
 70        -------
 71        dict
 72            The Kendrick index dictionary.
 73
 74        """
 75
 76        def error():
 77            return abs(current_kmd_reference - next_mspeak.kmd)
 78
 79        already_found = []
 80
 81        all_results = []
 82
 83        for i in list_indexes:
 84            result_indexes = []
 85
 86            mspeak = self.mspeaks[i]
 87
 88            current_kmd_reference = mspeak.kmd
 89
 90            for j in list_indexes:
 91                if j not in already_found and j != i:
 92                    next_mspeak = self.mspeaks[j]
 93
 94                    if error() <= 0.001:
 95                        result_indexes.append(j)
 96                        already_found.append(j)
 97
 98                        current_kmd_reference = next_mspeak.kmd
 99
100            if result_indexes and len(result_indexes) > 3:
101                already_found.append(i)
102
103                result_indexes.insert(0, i)
104
105                all_results.append(result_indexes)
106            else:
107                for w in result_indexes:
108                    already_found.remove(w)
109
110        kendrick_group_index = {
111            i: indexes_list for i, indexes_list in enumerate(all_results)
112        }
113
114        # return dictionary with the keys sorted by sum of the abundances
115        if sort:
116            return dict(
117                sorted(
118                    kendrick_group_index.items(),
119                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
120                    reverse=False,
121                )
122            )
123
124        else:
125            return kendrick_group_index
126
127    def populate_kendrick_index_dict_rounding(
128        self, list_indexes: list, sort: bool = True
129    ):
130        """Populate the Kendrick index dictionary based on rounding.
131
132        Parameters
133        ----------
134        list_indexes : list
135            The list of indexes.
136        sort : bool, optional
137            Whether to sort the dictionary by abundance (default is True).
138
139        Returns
140        -------
141        dict
142            The Kendrick index dictionary.
143
144        """
145        kendrick_group_index = {}
146
147        for i in list_indexes:
148            mspeak = self.mspeaks[i]
149
150            group = round(mspeak.kmd * 100)
151
152            if group not in kendrick_group_index:
153                kendrick_group_index[group] = [i]
154
155            else:
156                last_index = kendrick_group_index[group][-1]
157
158                if self.parameters.mass_spectrum.verbose_processing:
159                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd))
160
161                if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001:
162                    kendrick_group_index[group].append(i)
163
164            # return dictionary with the keys sorted by sum of the abundances
165        if sort:
166            return dict(
167                sorted(
168                    kendrick_group_index.items(),
169                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
170                    reverse=True,
171                )
172            )
173
174        else:
175            return kendrick_group_index
176
177    def sort_abundance_kendrick_dict(
178        self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict
179    ):
180        """Sort the Kendrick index dictionary based on abundance.
181
182        Parameters
183        ----------
184        even_kendrick_group_index : dict
185            The Kendrick index dictionary for even indexes.
186        odd_kendrick_group_index : dict
187            The Kendrick index dictionary for odd indexes.
188
189        Returns
190        -------
191        dict
192            The sorted Kendrick index dictionary.
193
194        """
195        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
196
197        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
198
199        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
200
201        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
202
203        if sum_even >= sum_odd:
204            even_kendrick_group_index.update(odd_kendrick_group_index)
205
206            return even_kendrick_group_index
207
208        else:
209            odd_kendrick_group_index.update(even_kendrick_group_index)
210
211            return odd_kendrick_group_index
212
213    def kendrick_groups_indexes(self, sort: bool = True):
214        """Get the Kendrick groups indexes dictionary.
215
216        Parameters
217        ----------
218        sort : bool, optional
219            Whether to sort the dictionary by abundance (default is True).
220
221        Returns
222        -------
223        dict
224            The Kendrick groups indexes dictionary.
225
226        """
227        even_idx, odd_idx = self.mz_odd_even_index_lists()
228
229        even_kendrick_group_index = self.populate_kendrick_index_dict_error(
230            even_idx, sort=sort
231        )
232
233        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(
234            odd_idx, sort=sort
235        )
236
237        return self.sort_abundance_kendrick_dict(
238            even_kendrick_group_index, odd_kendrick_group_index
239        )

class KendrickGrouping: View Source

  2class KendrickGrouping:
  3    """Class for Kendrick grouping of mass spectra.
  4
  5    Methods
  6    -------
  7    * mz_odd_even_index_lists().
  8        Get odd and even indexes lists.
  9    * calc_error(current, test).
 10        Calculate the error between two values.
 11    * populate_kendrick_index_dict_error(list_indexes, sort=True).
 12        Populate the Kendrick index dictionary based on error.
 13    * populate_kendrick_index_dict_rounding(list_indexes, sort=True).
 14        Populate the Kendrick index dictionary based on rounding.
 15    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index).
 16        Sort the Kendrick index dictionary based on abundance.
 17    * kendrick_groups_indexes(sort=True).
 18        Get the Kendrick groups indexes dictionary.
 19
 20    """
 21
 22    def mz_odd_even_index_lists(self):
 23        """Get odd and even indexes lists.
 24
 25        Returns
 26        -------
 27        tuple
 28            A tuple containing the lists of even and odd indexes.
 29
 30        """
 31        even_idx = []
 32        odd_idx = []
 33
 34        for i, mspeak in enumerate(self.mspeaks):
 35            if mspeak.nominal_mz_exp % 2 == 0:
 36                even_idx.append(i)
 37            else:
 38                odd_idx.append(i)
 39
 40        return even_idx, odd_idx
 41
 42    def calc_error(self, current: float, test: float):
 43        """Calculate the error between two values.
 44
 45        Parameters
 46        ----------
 47        current : float
 48            The current value.
 49        test : float
 50            The test value.
 51
 52        Returns
 53        -------
 54        float
 55            The calculated error.
 56
 57        """
 58        return ((test - current) / current) * 1e6
 59
 60    def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True):
 61        """Populate the Kendrick index dictionary based on error.
 62
 63        Parameters
 64        ----------
 65        list_indexes : list
 66            The list of indexes.
 67        sort : bool, optional
 68            Whether to sort the dictionary by abundance (default is True).
 69
 70        Returns
 71        -------
 72        dict
 73            The Kendrick index dictionary.
 74
 75        """
 76
 77        def error():
 78            return abs(current_kmd_reference - next_mspeak.kmd)
 79
 80        already_found = []
 81
 82        all_results = []
 83
 84        for i in list_indexes:
 85            result_indexes = []
 86
 87            mspeak = self.mspeaks[i]
 88
 89            current_kmd_reference = mspeak.kmd
 90
 91            for j in list_indexes:
 92                if j not in already_found and j != i:
 93                    next_mspeak = self.mspeaks[j]
 94
 95                    if error() <= 0.001:
 96                        result_indexes.append(j)
 97                        already_found.append(j)
 98
 99                        current_kmd_reference = next_mspeak.kmd
100
101            if result_indexes and len(result_indexes) > 3:
102                already_found.append(i)
103
104                result_indexes.insert(0, i)
105
106                all_results.append(result_indexes)
107            else:
108                for w in result_indexes:
109                    already_found.remove(w)
110
111        kendrick_group_index = {
112            i: indexes_list for i, indexes_list in enumerate(all_results)
113        }
114
115        # return dictionary with the keys sorted by sum of the abundances
116        if sort:
117            return dict(
118                sorted(
119                    kendrick_group_index.items(),
120                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
121                    reverse=False,
122                )
123            )
124
125        else:
126            return kendrick_group_index
127
128    def populate_kendrick_index_dict_rounding(
129        self, list_indexes: list, sort: bool = True
130    ):
131        """Populate the Kendrick index dictionary based on rounding.
132
133        Parameters
134        ----------
135        list_indexes : list
136            The list of indexes.
137        sort : bool, optional
138            Whether to sort the dictionary by abundance (default is True).
139
140        Returns
141        -------
142        dict
143            The Kendrick index dictionary.
144
145        """
146        kendrick_group_index = {}
147
148        for i in list_indexes:
149            mspeak = self.mspeaks[i]
150
151            group = round(mspeak.kmd * 100)
152
153            if group not in kendrick_group_index:
154                kendrick_group_index[group] = [i]
155
156            else:
157                last_index = kendrick_group_index[group][-1]
158
159                if self.parameters.mass_spectrum.verbose_processing:
160                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd))
161
162                if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001:
163                    kendrick_group_index[group].append(i)
164
165            # return dictionary with the keys sorted by sum of the abundances
166        if sort:
167            return dict(
168                sorted(
169                    kendrick_group_index.items(),
170                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
171                    reverse=True,
172                )
173            )
174
175        else:
176            return kendrick_group_index
177
178    def sort_abundance_kendrick_dict(
179        self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict
180    ):
181        """Sort the Kendrick index dictionary based on abundance.
182
183        Parameters
184        ----------
185        even_kendrick_group_index : dict
186            The Kendrick index dictionary for even indexes.
187        odd_kendrick_group_index : dict
188            The Kendrick index dictionary for odd indexes.
189
190        Returns
191        -------
192        dict
193            The sorted Kendrick index dictionary.
194
195        """
196        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
197
198        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
199
200        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
201
202        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
203
204        if sum_even >= sum_odd:
205            even_kendrick_group_index.update(odd_kendrick_group_index)
206
207            return even_kendrick_group_index
208
209        else:
210            odd_kendrick_group_index.update(even_kendrick_group_index)
211
212            return odd_kendrick_group_index
213
214    def kendrick_groups_indexes(self, sort: bool = True):
215        """Get the Kendrick groups indexes dictionary.
216
217        Parameters
218        ----------
219        sort : bool, optional
220            Whether to sort the dictionary by abundance (default is True).
221
222        Returns
223        -------
224        dict
225            The Kendrick groups indexes dictionary.
226
227        """
228        even_idx, odd_idx = self.mz_odd_even_index_lists()
229
230        even_kendrick_group_index = self.populate_kendrick_index_dict_error(
231            even_idx, sort=sort
232        )
233
234        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(
235            odd_idx, sort=sort
236        )
237
238        return self.sort_abundance_kendrick_dict(
239            even_kendrick_group_index, odd_kendrick_group_index
240        )

Class for Kendrick grouping of mass spectra.

Methods

mz_odd_even_index_lists(). Get odd and even indexes lists.
calc_error(current, test). Calculate the error between two values.
populate_kendrick_index_dict_error(list_indexes, sort=True). Populate the Kendrick index dictionary based on error.
populate_kendrick_index_dict_rounding(list_indexes, sort=True). Populate the Kendrick index dictionary based on rounding.
sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). Sort the Kendrick index dictionary based on abundance.
kendrick_groups_indexes(sort=True). Get the Kendrick groups indexes dictionary.

def mz_odd_even_index_lists(self): View Source

22    def mz_odd_even_index_lists(self):
23        """Get odd and even indexes lists.
24
25        Returns
26        -------
27        tuple
28            A tuple containing the lists of even and odd indexes.
29
30        """
31        even_idx = []
32        odd_idx = []
33
34        for i, mspeak in enumerate(self.mspeaks):
35            if mspeak.nominal_mz_exp % 2 == 0:
36                even_idx.append(i)
37            else:
38                odd_idx.append(i)
39
40        return even_idx, odd_idx

Get odd and even indexes lists.

Returns

tuple: A tuple containing the lists of even and odd indexes.

def calc_error(self, current: float, test: float): View Source

42    def calc_error(self, current: float, test: float):
43        """Calculate the error between two values.
44
45        Parameters
46        ----------
47        current : float
48            The current value.
49        test : float
50            The test value.
51
52        Returns
53        -------
54        float
55            The calculated error.
56
57        """
58        return ((test - current) / current) * 1e6

Calculate the error between two values.

Parameters

current (float): The current value.
test (float): The test value.

Returns

float: The calculated error.

def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True): View Source

 60    def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True):
 61        """Populate the Kendrick index dictionary based on error.
 62
 63        Parameters
 64        ----------
 65        list_indexes : list
 66            The list of indexes.
 67        sort : bool, optional
 68            Whether to sort the dictionary by abundance (default is True).
 69
 70        Returns
 71        -------
 72        dict
 73            The Kendrick index dictionary.
 74
 75        """
 76
 77        def error():
 78            return abs(current_kmd_reference - next_mspeak.kmd)
 79
 80        already_found = []
 81
 82        all_results = []
 83
 84        for i in list_indexes:
 85            result_indexes = []
 86
 87            mspeak = self.mspeaks[i]
 88
 89            current_kmd_reference = mspeak.kmd
 90
 91            for j in list_indexes:
 92                if j not in already_found and j != i:
 93                    next_mspeak = self.mspeaks[j]
 94
 95                    if error() <= 0.001:
 96                        result_indexes.append(j)
 97                        already_found.append(j)
 98
 99                        current_kmd_reference = next_mspeak.kmd
100
101            if result_indexes and len(result_indexes) > 3:
102                already_found.append(i)
103
104                result_indexes.insert(0, i)
105
106                all_results.append(result_indexes)
107            else:
108                for w in result_indexes:
109                    already_found.remove(w)
110
111        kendrick_group_index = {
112            i: indexes_list for i, indexes_list in enumerate(all_results)
113        }
114
115        # return dictionary with the keys sorted by sum of the abundances
116        if sort:
117            return dict(
118                sorted(
119                    kendrick_group_index.items(),
120                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
121                    reverse=False,
122                )
123            )
124
125        else:
126            return kendrick_group_index

Populate the Kendrick index dictionary based on error.

Parameters

list_indexes (list): The list of indexes.
sort (bool, optional): Whether to sort the dictionary by abundance (default is True).

Returns

dict: The Kendrick index dictionary.

def populate_kendrick_index_dict_rounding(self, list_indexes: list, sort: bool = True): View Source

128    def populate_kendrick_index_dict_rounding(
129        self, list_indexes: list, sort: bool = True
130    ):
131        """Populate the Kendrick index dictionary based on rounding.
132
133        Parameters
134        ----------
135        list_indexes : list
136            The list of indexes.
137        sort : bool, optional
138            Whether to sort the dictionary by abundance (default is True).
139
140        Returns
141        -------
142        dict
143            The Kendrick index dictionary.
144
145        """
146        kendrick_group_index = {}
147
148        for i in list_indexes:
149            mspeak = self.mspeaks[i]
150
151            group = round(mspeak.kmd * 100)
152
153            if group not in kendrick_group_index:
154                kendrick_group_index[group] = [i]
155
156            else:
157                last_index = kendrick_group_index[group][-1]
158
159                if self.parameters.mass_spectrum.verbose_processing:
160                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd))
161
162                if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001:
163                    kendrick_group_index[group].append(i)
164
165            # return dictionary with the keys sorted by sum of the abundances
166        if sort:
167            return dict(
168                sorted(
169                    kendrick_group_index.items(),
170                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
171                    reverse=True,
172                )
173            )
174
175        else:
176            return kendrick_group_index

Populate the Kendrick index dictionary based on rounding.

Parameters

list_indexes (list): The list of indexes.
sort (bool, optional): Whether to sort the dictionary by abundance (default is True).

Returns

dict: The Kendrick index dictionary.

def sort_abundance_kendrick_dict( self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict): View Source

178    def sort_abundance_kendrick_dict(
179        self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict
180    ):
181        """Sort the Kendrick index dictionary based on abundance.
182
183        Parameters
184        ----------
185        even_kendrick_group_index : dict
186            The Kendrick index dictionary for even indexes.
187        odd_kendrick_group_index : dict
188            The Kendrick index dictionary for odd indexes.
189
190        Returns
191        -------
192        dict
193            The sorted Kendrick index dictionary.
194
195        """
196        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
197
198        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
199
200        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
201
202        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
203
204        if sum_even >= sum_odd:
205            even_kendrick_group_index.update(odd_kendrick_group_index)
206
207            return even_kendrick_group_index
208
209        else:
210            odd_kendrick_group_index.update(even_kendrick_group_index)
211
212            return odd_kendrick_group_index

Sort the Kendrick index dictionary based on abundance.

Parameters

even_kendrick_group_index (dict): The Kendrick index dictionary for even indexes.
odd_kendrick_group_index (dict): The Kendrick index dictionary for odd indexes.

Returns

dict: The sorted Kendrick index dictionary.

def kendrick_groups_indexes(self, sort: bool = True): View Source

214    def kendrick_groups_indexes(self, sort: bool = True):
215        """Get the Kendrick groups indexes dictionary.
216
217        Parameters
218        ----------
219        sort : bool, optional
220            Whether to sort the dictionary by abundance (default is True).
221
222        Returns
223        -------
224        dict
225            The Kendrick groups indexes dictionary.
226
227        """
228        even_idx, odd_idx = self.mz_odd_even_index_lists()
229
230        even_kendrick_group_index = self.populate_kendrick_index_dict_error(
231            even_idx, sort=sort
232        )
233
234        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(
235            odd_idx, sort=sort
236        )
237
238        return self.sort_abundance_kendrick_dict(
239            even_kendrick_group_index, odd_kendrick_group_index
240        )

Get the Kendrick groups indexes dictionary.

Parameters

sort (bool, optional): Whether to sort the dictionary by abundance (default is True).

Returns

dict: The Kendrick groups indexes dictionary.