corems.molecular_id.calc.SpectralSimilarity

View Source

  1__author__ = "Yuri E. Corilo"
  2__date__ = "Jun 09, 2021"
  3
  4from numpy.fft import rfft
  5from scipy.stats import pearsonr, spearmanr, kendalltau
  6from numpy import (
  7    power,
  8    dot,
  9    absolute,
 10    sqrt,
 11)
 12from numpy import sum as np_sum
 13from numpy.linalg import norm
 14from pandas import DataFrame
 15import numpy as np
 16
 17methods_name = {
 18    # "entropy_distance": "Entropy Distance",
 19    # "weighted_entropy_distance": "Dynamic weighted entropy Distance",
 20    "chebyshev_distance": "Chebyshev Distance",
 21    "squared_euclidean_distance": "Squared Euclidean Distance",
 22    "fidelity_similarity": "Fidelity Similarity",
 23    "matusita_distance": "Matusita Distance",
 24    "squared_chord_distance": "Squared-chord Distance",
 25    # "bhattacharya_1_distance": "Bhattacharya 1 Distance",
 26    # "bhattacharya_2_distance": "Bhattacharya 2 Distance",
 27    "harmonic_mean_similarity": "Harmonic mean Distance",
 28    "Pearson_chi_squared_distance": "Pearson Chi Squared Distance",
 29    "Neyman_chi_squared_distance": "Neyman Chi Squared Distance",
 30    "probabilistic_symmetric_chi_squared_distance": "Probabilistic symmetric X2 Distance",
 31    "topsoe_distance": "Topsoe Distance",
 32    "chernoff_distance": "Chernoff Distance",
 33    "ruzicka_distance": "Ruzicka Distance",
 34    "roberts_distance": "Roberts Distance",
 35    # "intersection_distance": "Intersection Distance",
 36    "motyka_distance": "Motyka Distance",
 37    "canberra_distance": "Canberra Distance",
 38    "canberra_metric": "Canberra Metric",
 39    "kulczynski_1_distance": "Kulczynski 1 Distance",
 40    # "baroni_urbani_buser_distance": "Baroni-Urbani-Buser Distance",
 41    # "penrose_size_distance": "Penrose size Distance",
 42    # "mean_character_distance": "Mean character Distance",
 43    "lorentzian_distance": "Lorentzian Distance",
 44    # "penrose_shape_distance": "Penrose shape Distance",
 45    "clark_distance": "Clark Distance",
 46    "hellinger_distance": "Hellinger Distance",
 47    "whittaker_index_of_association_distance": "Whittaker index of association Distance",
 48    # "similarity_index_distance": "Similarity Index Distance",
 49    # "improved_similarity_distance": "Improved Similarity",
 50    # "absolute_value_distance": "Absolute Value Distance",
 51    "spectral_contrast_angle_distance": "Spectral Contrast Angle",
 52    "wave_hedges_distance": "Wave Hedges Distance",
 53    "dice_similarity": "Dice Similarity",
 54    "inner_product_distance": "Inner Product Distance",
 55    "divergence_distance": "Divergence Distance",
 56    "jensen_difference_distance": "Jensen Differences Distance",
 57    "kumar_johnson_distance": "Kumar Johnson Distance",
 58    "avg_l_distance": "Avg (L1, L8) Distance",
 59    "vicis_wave_hadges_distance": "Vicis Wave Hadges Distance",
 60    "vicis_symmetric_chi_squared_1_distance": "Vicis-Symmetric X2 1 Distance",
 61    "vicis_symmetric_chi_squared_2_distance": "Vicis-Symmetric X2 2 Distance",
 62    "vicis_symmetric_chi_squared_3_distance": "Vicis-Symmetric X2 3 Distance",
 63    "max_symmetric_chi_squared_distance": "Max Symmetric Chi Squared Distance",
 64    "min_symmetric_chi_squared_distance": "Min Symmetric Chi Squared Distance",
 65    # "ms_for_id_v1": "MSforID Distance version 1",
 66    # "ms_for_id": "MSforID Distance",
 67    "additive_sym_chi_sq": "Additive Symmetric Chi Squared",
 68    "bhattacharya_distance": "Battacharya Distance",
 69    "generalized_ochiai_index": "Generalized Ochiai Index",
 70    "gower_distance": "Gower Distance",
 71    "impr_sqrt_cosine_sim": "Improved Square Root Cosine Similarity",
 72    "intersection_sim": "Intersection Similarity",
 73    "j_divergence": "J Divergence",
 74    "jensen_shannon_index": "Jensen Shannon Index",
 75    "k_divergence": "K Divergence",
 76    "VW6": "VW6",
 77    "VW5": "VW5",
 78    "VW4": "VW4",
 79    "VW3": "VW3",
 80    "VW2": "VW2",
 81    "VW1": "VW1",
 82    "taneja_divergence": "Taneja Divergence",
 83    "symmetric_chi_squared_distance": "Symmetric Chi Squared Distance",
 84    "squared_chi_squared_distance": "Squared Chi Squared Distance",
 85    "square_root_cosine_correlation": "Square Root Cosine Correlation",
 86    "sorensen_distance": "Sorensen Distance",
 87    "Minokowski_3": "Minokowski 3 Distance",
 88    "Minokowski_4": "Minokowski 4 Distance",
 89    "kumarjohnson_divergence": "Kumar Johnson Divergence",
 90    "kumarhassebrook_similarity": "Kumar Hassebrook Similarity",
 91    "kullbackleibler_divergence": "Kullback Leibler Divergence",
 92    "soergel_distance": "Soergel Distance",
 93}
 94
 95methods_scale = {
 96    "entropy": [0, np.log(4)],
 97    "weighted_entropy": [0, np.log(4)],
 98    "absolute_value": [0, 2],
 99    "avg_l": [0, 1.5],
100    "bhattacharya_1": [0, np.arccos(0) ** 2],
101    "bhattacharya_2": [0, np.inf],
102    "canberra": [0, np.inf],
103    "clark": [0, np.inf],
104    "divergence": [0, np.inf],
105    "euclidean": [0, np.sqrt(2)],
106    "hellinger": [0, np.inf],
107    "improved_similarity": [0, np.inf],
108    "lorentzian": [0, np.inf],
109    "manhattan": [0, 2],
110    "matusita": [0, np.sqrt(2)],
111    "mean_character": [0, 2],
112    "motyka": [-0.5, 0],
113    "ms_for_id": [-np.inf, 0],
114    "ms_for_id_v1": [0, np.inf],
115    "pearson_correlation": [-1, 1],
116    "penrose_shape": [0, np.sqrt(2)],
117    "penrose_size": [0, np.inf],
118    "probabilistic_symmetric_chi_squared": [0, 1],
119    "similarity_index": [0, np.inf],
120    "squared_chord": [0, 2],
121    "squared_euclidean": [0, 2],
122    "symmetric_chi_squared": [0, 0.5 * np.sqrt(2)],
123    "topsoe": [0, np.sqrt(2)],
124    "vicis_symmetric_chi_squared_3": [0, 2],
125    "wave_hedges": [0, np.inf],
126    "whittaker_index_of_association": [0, np.inf],
127}
128
129
130class SpectralSimilarity:
131    """Class containing methods for calculating spectral similarity between two mass spectra.
132
133    Parameters
134    ----------
135    ms_mz_abun_dict : dict
136        Dictionary of mass to abundance values for the experimental mass spectrum.
137    ref_obj : dict
138        Dictionary of mass to abundance values for the reference mass spectrum.
139    norm_func : function
140        Function to normalize the abundance values.
141
142    Attributes
143    ----------
144    normalize_func : function
145        Function to normalize the abundance values.
146    ms_mz_abun_dict : dict
147        Dictionary of mass to abundance values for the experimental mass spectrum.
148    ref_obj : dict
149        Dictionary of mass to abundance values for the reference mass spectrum.
150    exp_abun : list
151        List of abundance values for the experimental mass spectrum.
152    exp_mz : list
153        List of mass values for the experimental mass spectrum.
154    ref_mz : list
155        List of mass values for the reference mass spectrum.
156    ref_abun : list
157        List of abundance values for the reference mass spectrum.
158    ref_mz_abun_dict : dict
159        Dictionary of mass to abundance values for the reference mass spectrum.
160    df : DataFrame
161        DataFrame containing the experimental and reference mass spectrum data.
162    zero_filled_u_l : tuple
163        Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
164    common_mz_values : list
165        List of common mass values between the experimental and reference mass spectra.
166    n_x_y : int
167        Number of common mass values between the experimental and reference mass spectra.
168
169    Methods
170    -------
171    * nan_fill(df, fill_with=0).
172        Fill missing mass values with a given value.
173    * normalize(x, y, norm_func=sum).
174        Normalize the abundance values.
175    * weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10).
176        Calculate the weighted cosine correlation between the experimental and reference mass spectra.
177    * cosine_correlation().
178        Calculate the cosine correlation between the experimental and reference mass spectra.
179    * stein_scott().
180        Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
181    * pearson_correlation().
182        Calculate the Pearson correlation between the experimental and reference mass spectra.
183    * spearman_correlation().
184        Calculate the Spearman correlation between the experimental and reference mass spectra.
185
186
187    """
188
189    def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum):
190        self.normalize_func = norm_func
191        self.ms_mz_abun_dict = ms_mz_abun_dict
192        self.ref_obj = ref_obj
193
194        self.exp_abun = list(self.ms_mz_abun_dict.values())
195        self.exp_mz = list(self.ms_mz_abun_dict.keys())
196
197        self.ref_mz = self.ref_obj.get("mz")
198        self.ref_abun = self.ref_obj.get("abundance")
199
200        self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun))
201
202        # parse to dataframe, easier to zerofill and tranpose
203        self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict])
204
205        # fill missing mz with abundance 0
206        x, y = self.nan_fill(self.df, fill_with=1e-10)
207
208        self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func)
209
210        # filter out the mass values that have zero intensities in self.exp_abun
211        exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0])
212
213        # filter out the mass values that have zero intensities in self.ref_mz
214        ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0])
215
216        # find the intersection/common mass values of both ref and exp, and sort them
217        self.common_mz_values = sorted(
218            list(exp_mz_filtered.intersection(ref_mz_filtered))
219        )
220
221        # find the number of common mass values (after filtering 0s)
222        self.n_x_y = len(self.common_mz_values)
223        # print(self.n_x_y)
224
225    def nan_fill(self, df, fill_with=0):
226        """Fill missing mass values with a given value.
227
228        Parameters
229        ----------
230        df : DataFrame
231            DataFrame containing the experimental and reference mass spectrum data.
232        fill_with : float
233            Value to fill missing mass values with.
234
235        Returns
236        -------
237        x : list
238            List of abundance values for the experimental mass spectrum.
239        y : list
240            List of abundance values for the reference mass spectrum."""
241        df.fillna(fill_with, inplace=True)
242
243        return df.T[0].values, df.T[1].values
244
245    def normalize(self, x, y, norm_func=sum):
246        """Normalize the abundance values.
247
248        Parameters
249        ----------
250        x : list
251            List of abundance values for the experimental mass spectrum.
252        y : list
253            List of abundance values for the reference mass spectrum.
254        norm_func : function
255            Function to normalize the abundance values.
256            Default is sum
257
258        Returns
259        -------
260        u_l : tuple
261            Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
262        """
263        if norm_func is not None:
264            u_l = (x / norm_func(x), y / norm_func(y))
265            return u_l
266        else:
267            return (x, y)
268
269    def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10):
270        """Calculate the weighted cosine correlation between the experimental and reference mass spectra.
271
272        Parameters
273        ----------
274        a : float
275            Weighting factor for the abundance values.
276            Default is 0.5
277        b : float
278            Weighting factor for the mass values.
279            Default is 1.3
280        nanfill : float
281            Value to fill missing mass values with.
282            Default is 1e-10
283
284        Returns
285        -------
286        correlation : float
287            Weighted cosine correlation between the experimental and reference mass spectra.
288        """
289        # create dict['mz'] = abundance, for experimental data
290        # ms_mz_abun_dict = mass_spec.mz_abun_dict
291        # weight exp data
292
293        xc = power(self.exp_abun, a) * power(self.exp_mz, b)
294
295        # track back to individual mz
296        weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc))
297
298        # weight ref data
299        yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b)
300
301        ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc))
302
303        # parse to dataframe, easier to zerofill and tranpose
304        df = DataFrame([weighted_exp_dict, ref_mz_abun_dict])
305
306        # fill missing mz with weight {abun**a}{m/z**b} to 0
307        x, y = self.nan_fill(df, fill_with=nanfill)
308
309        # correlation = (1 - cosine(x, y))
310
311        correlation = dot(x, y) / (norm(x) * norm(y))
312
313        return correlation
314
315    def cosine_correlation(self):
316        """Calculate the cosine correlation between the experimental and reference mass spectra.
317
318        Returns
319        -------
320        correlation : float
321            Cosine correlation between the experimental and reference mass spectra.
322
323        """
324        # calculate cosine correlation,
325        x = self.zero_filled_u_l[0]
326        y = self.zero_filled_u_l[1]
327
328        # correlation = (1 - cosine(x, y))
329
330        correlation = dot(x, y) / (norm(x) * norm(y))
331
332        return correlation
333
334    def stein_scott(self):
335        """Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
336
337        Returns
338        -------
339        s_ss_x_y : float
340            Stein-Scott similarity between the experimental and reference mass spectra.
341        s_ss_x_y_nist : float
342            Stein-Scott similarity between the experimental and reference mass spectra.
343        """
344        # TODO check this code
345        if self.n_x_y == 0:
346            return 0, 0
347
348        # count number of non-zero abundance/peak intensity values
349        n_x = sum(a != 0 for a in self.exp_abun)
350
351        s_r_x_y = 0
352
353        a, b = 1, 0
354
355        for i in range(1, self.n_x_y):
356            current_value = self.common_mz_values[i]
357            previous_value = self.common_mz_values[i - 1]
358
359            y_i = self.ref_mz_abun_dict[current_value]
360            y_i_minus1 = self.ref_mz_abun_dict[previous_value]
361
362            lc_current = power(y_i, a) * power(current_value, b)
363            lc_previous = power(y_i_minus1, a) * power(previous_value, b)
364
365            x_i = self.ms_mz_abun_dict[current_value]
366            x_i_minus1 = self.ms_mz_abun_dict[previous_value]
367
368            uc_current = power(x_i, a) * power(current_value, b)
369            uc_previous = power(x_i_minus1, a) * power(previous_value, b)
370
371            T1 = lc_current / lc_previous
372
373            T2 = uc_previous / uc_current
374
375            temp_computation = T1 * T2
376
377            n = 0
378            if temp_computation <= 1:
379                n = 1
380            else:
381                n = -1
382
383            s_r_x_y = s_r_x_y + power(temp_computation, n)
384
385        # finish the calculation of S_R(X,Y)
386
387        s_r_x_y = s_r_x_y / self.n_x_y
388        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
389        s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0)
390
391        s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)
392
393        s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0)
394
395        s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (
396            n_x + self.n_x_y
397        )
398        # final step
399
400        return s_ss_x_y, s_ss_x_y_nist
401
402    def pearson_correlation(
403        self,
404    ):
405        """Calculate the Pearson correlation between the experimental and reference mass spectra.
406
407        Returns
408        -------
409        correlation : float
410            Pearson correlation between the experimental and reference mass spectra.
411        """
412        correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
413
414        return correlation[0]
415
416    def spearman_correlation(self):
417        """Calculate the Spearman correlation between the experimental and reference mass spectra.
418
419        Returns
420        -------
421        coorelation : float
422            Spearman correlation between the experimental and reference mass spectra.
423        """
424        # calculate Spearman correlation
425        # ## TODO - Check axis
426        correlation = spearmanr(
427            self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0
428        )
429
430        return correlation[0]
431
432    def kendall_tau(self):
433        """Calculate the Kendall's tau correlation between the experimental and reference mass spectra.
434
435        Returns
436        -------
437        correlation : float
438            Kendall's tau correlation between the experimental and reference mass spectra."""
439        # create dict['mz'] = abundance, for experimental data
440        # self.ms_mz_abun_dict = mass_spec.mz_abun_dict
441
442        # create dict['mz'] = abundance, for experimental data
443
444        # calculate Kendall's tau
445        correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
446
447        return correlation[0]
448
449    def dft_correlation(self):
450        """Calculate the DFT correlation between the experimental and reference mass spectra.
451
452        Returns
453        -------
454        correlation : float
455            DFT correlation between the experimental and reference mass spectra.
456        """
457        if self.n_x_y == 0:
458            return 0
459
460        # count number of non-zero abundance/peak intensity values
461        n_x = sum(a != 0 for a in self.exp_abun)
462
463        x, y = self.nan_fill(self.df, fill_with=0)
464
465        x, y = self.normalize(x, y, norm_func=self.normalize_func)
466
467        # get the Fourier transform of x and y
468        x_dft = rfft(x).real
469        y_dft = rfft(y).real
470
471        s_dft_xy = dot(x_dft, y_dft) / (norm(x_dft) * norm(y_dft))
472
473        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
474        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
475
476        # final step
477        s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y)
478
479        return s_dft
480
481    def dwt_correlation(self):
482        """Calculate the DWT correlation between the experimental and reference mass spectra.
483
484        Returns
485        -------
486        correlation : float
487            DWT correlation between the experimental and reference mass spectra.
488
489        Notes
490        -----
491        This function requires the PyWavelets library to be installed.
492            This is not a default requirement as this function is not widely used.
493        """
494
495        from pywt import dwt
496
497        if self.n_x_y == 0:
498            return 0
499
500        # count number of non-zero abundance/peak intensity values
501        n_x = sum(a != 0 for a in self.exp_abun)
502
503        # calculate cosine correlation,
504        x, y = self.nan_fill(self.df, fill_with=0)
505
506        x, y = self.normalize(x, y, norm_func=self.normalize_func)
507
508        # Make x and y into an array
509        x_a = list(x)
510        y_a = list(y)
511
512        # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function)
513        # Will only use the detail dwt (dwtDd
514        x_dwtD = dwt(x_a, "db2")[1]
515        y_dwtD = dwt(y_a, "db2")[1]
516
517        s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD))
518
519        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
520        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
521
522        # final step
523        s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y)
524
525        return s_dwt
526
527    def euclidean_distance(self):
528        """Calculate the Euclidean distance between the experimental and reference mass spectra.
529
530        Returns
531        -------
532        correlation : float
533            Euclidean distance between the experimental and reference mass spectra.
534        """
535        # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
536        qlist = self.zero_filled_u_l[0]
537        rlist = self.zero_filled_u_l[1]
538
539        correlation = sqrt(np_sum(power(qlist - rlist, 2)))
540
541        return correlation
542
543    def manhattan_distance(self):
544        """Calculate the Manhattan distance between the experimental and reference mass spectra.
545
546        Returns
547        -------
548        correlation : float
549            Manhattan distance between the experimental and reference mass spectra.
550        """
551        qlist = self.zero_filled_u_l[0]
552        rlist = self.zero_filled_u_l[1]
553
554        return np_sum(absolute(qlist - rlist))
555
556    def jaccard_distance(self):
557        """Calculate the Jaccard distance between the experimental and reference mass spectra.
558
559        Returns
560        -------
561        correlation : float
562            Jaccard distance between the experimental and reference mass spectra.
563        """
564
565        def jaccard_similarity(list1, list2):
566            intersection = len(list(set(list1).intersection(list2)))
567            union = (len(list1) + len(list2)) - intersection
568            return float(intersection) / union
569
570        qlist = self.zero_filled_u_l[0]
571        rlist = self.zero_filled_u_l[1]
572
573        return np_sum(power(qlist - rlist, 2)) / (
574            np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist)
575        )
576        # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
577        # @return correlation
578
579    def extra_distances(self):
580        """Function to calculate distances using additional metrics defined in math_distance.py
581
582        Currently, calculates all distances.
583
584        Returns
585        -------
586        dict_res : dict
587            Dictionary containing the distances between the experimental and reference mass spectra.
588
589        """
590        from corems.molecular_id.calc import math_distance
591
592        # qlist = self.zero_filled_u_l[2]
593        # rlist = self.zero_filled_u_l[3]
594
595        dict_res = {}
596
597        for method in methods_name:
598            # function_name = method + "_distance"
599            function_name = method
600            if hasattr(math_distance, function_name):
601                f = getattr(math_distance, function_name)
602
603                if function_name == "canberra_metric":
604                    x, y = self.nan_fill(self.df, fill_with=0)
605
606                    qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func)
607                    # print("qlist:")
608                    # print(qlist)
609                    # print("rlist:")
610                    # print(rlist)
611
612                else:
613                    qlist = self.zero_filled_u_l[0]
614                    rlist = self.zero_filled_u_l[1]
615
616                dist = f(qlist, rlist)
617                # if method == "Minokowski_3":
618                #    print("qlist:")
619                #    print(qlist)
620                #    print("rlist")
621                #    print(rlist)
622                #    exit()
623                # if dist == np.nan or dis == np.inf:
624                # print(self.exp_abun)
625                # print(self.exp_mz)
626                # print(function_name)
627                # print(len(self.exp_abun))
628                # print(len(self.exp_mz))
629                # print(self.zero_filled_u_l[1])
630                dict_res[method] = dist
631
632        return dict_res

methods_name = {'chebyshev_distance': 'Chebyshev Distance', 'squared_euclidean_distance': 'Squared Euclidean Distance', 'fidelity_similarity': 'Fidelity Similarity', 'matusita_distance': 'Matusita Distance', 'squared_chord_distance': 'Squared-chord Distance', 'harmonic_mean_similarity': 'Harmonic mean Distance', 'Pearson_chi_squared_distance': 'Pearson Chi Squared Distance', 'Neyman_chi_squared_distance': 'Neyman Chi Squared Distance', 'probabilistic_symmetric_chi_squared_distance': 'Probabilistic symmetric X2 Distance', 'topsoe_distance': 'Topsoe Distance', 'chernoff_distance': 'Chernoff Distance', 'ruzicka_distance': 'Ruzicka Distance', 'roberts_distance': 'Roberts Distance', 'motyka_distance': 'Motyka Distance', 'canberra_distance': 'Canberra Distance', 'canberra_metric': 'Canberra Metric', 'kulczynski_1_distance': 'Kulczynski 1 Distance', 'lorentzian_distance': 'Lorentzian Distance', 'clark_distance': 'Clark Distance', 'hellinger_distance': 'Hellinger Distance', 'whittaker_index_of_association_distance': 'Whittaker index of association Distance', 'spectral_contrast_angle_distance': 'Spectral Contrast Angle', 'wave_hedges_distance': 'Wave Hedges Distance', 'dice_similarity': 'Dice Similarity', 'inner_product_distance': 'Inner Product Distance', 'divergence_distance': 'Divergence Distance', 'jensen_difference_distance': 'Jensen Differences Distance', 'kumar_johnson_distance': 'Kumar Johnson Distance', 'avg_l_distance': 'Avg (L1, L8) Distance', 'vicis_wave_hadges_distance': 'Vicis Wave Hadges Distance', 'vicis_symmetric_chi_squared_1_distance': 'Vicis-Symmetric X2 1 Distance', 'vicis_symmetric_chi_squared_2_distance': 'Vicis-Symmetric X2 2 Distance', 'vicis_symmetric_chi_squared_3_distance': 'Vicis-Symmetric X2 3 Distance', 'max_symmetric_chi_squared_distance': 'Max Symmetric Chi Squared Distance', 'min_symmetric_chi_squared_distance': 'Min Symmetric Chi Squared Distance', 'additive_sym_chi_sq': 'Additive Symmetric Chi Squared', 'bhattacharya_distance': 'Battacharya Distance', 'generalized_ochiai_index': 'Generalized Ochiai Index', 'gower_distance': 'Gower Distance', 'impr_sqrt_cosine_sim': 'Improved Square Root Cosine Similarity', 'intersection_sim': 'Intersection Similarity', 'j_divergence': 'J Divergence', 'jensen_shannon_index': 'Jensen Shannon Index', 'k_divergence': 'K Divergence', 'VW6': 'VW6', 'VW5': 'VW5', 'VW4': 'VW4', 'VW3': 'VW3', 'VW2': 'VW2', 'VW1': 'VW1', 'taneja_divergence': 'Taneja Divergence', 'symmetric_chi_squared_distance': 'Symmetric Chi Squared Distance', 'squared_chi_squared_distance': 'Squared Chi Squared Distance', 'square_root_cosine_correlation': 'Square Root Cosine Correlation', 'sorensen_distance': 'Sorensen Distance', 'Minokowski_3': 'Minokowski 3 Distance', 'Minokowski_4': 'Minokowski 4 Distance', 'kumarjohnson_divergence': 'Kumar Johnson Divergence', 'kumarhassebrook_similarity': 'Kumar Hassebrook Similarity', 'kullbackleibler_divergence': 'Kullback Leibler Divergence', 'soergel_distance': 'Soergel Distance'}

methods_scale = {'entropy': [0, 1.3862943611198906], 'weighted_entropy': [0, 1.3862943611198906], 'absolute_value': [0, 2], 'avg_l': [0, 1.5], 'bhattacharya_1': [0, 2.4674011002723395], 'bhattacharya_2': [0, inf], 'canberra': [0, inf], 'clark': [0, inf], 'divergence': [0, inf], 'euclidean': [0, 1.4142135623730951], 'hellinger': [0, inf], 'improved_similarity': [0, inf], 'lorentzian': [0, inf], 'manhattan': [0, 2], 'matusita': [0, 1.4142135623730951], 'mean_character': [0, 2], 'motyka': [-0.5, 0], 'ms_for_id': [-inf, 0], 'ms_for_id_v1': [0, inf], 'pearson_correlation': [-1, 1], 'penrose_shape': [0, 1.4142135623730951], 'penrose_size': [0, inf], 'probabilistic_symmetric_chi_squared': [0, 1], 'similarity_index': [0, inf], 'squared_chord': [0, 2], 'squared_euclidean': [0, 2], 'symmetric_chi_squared': [0, 0.7071067811865476], 'topsoe': [0, 1.4142135623730951], 'vicis_symmetric_chi_squared_3': [0, 2], 'wave_hedges': [0, inf], 'whittaker_index_of_association': [0, inf]}