griptomo.core.ig_extract_features

  1import igraph as ig
  2import numpy as np
  3import time
  4
  5
  6def igraph_calc_graph_features(G):
  7    """
  8    Calculate various graph features for an igraph.Graph object.
  9
 10    Parameters
 11    ----------
 12    G : igraph.Graph
 13        The input graph.
 14
 15    Returns
 16    -------
 17    dict
 18        A dictionary containing the calculated graph features:
 19        - "total n nodes": Total number of nodes in the graph.
 20        - "total n edges": Total number of edges in the graph.
 21        - "n nodes": Number of nodes in the largest connected component.
 22        - "n edges": Number of edges in the largest connected component.
 23        - "density": Density of the graph.
 24        - "diameter": Diameter of the graph.
 25        - "avg clustering": Average clustering coefficient of the graph.
 26        - "max closeness centrality": Maximum closeness centrality in the graph.
 27        - "max eigenvector centrality": Maximum eigenvector centrality in the graph.
 28        - "degree assortativity": Degree assortativity of the graph.
 29        - "max clique number": Size of the largest clique in the graph.
 30        - "n communities": Number of communities detected using the fast greedy algorithm.
 31        - "avg path length": Average path length in the graph.
 32        - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs.
 33
 34    Raises
 35    ------
 36    ValueError
 37        If the input is not an igraph.Graph object.
 38    """
 39    if not isinstance(G, ig.Graph):
 40        raise ValueError("Input must be an igraph.Graph object")
 41
 42    G_feat = {}
 43    G_feat["total n nodes"] = G.vcount()
 44    G_feat["total n edges"] = G.ecount()
 45
 46    if not G.is_connected():
 47        # graph isn't connected --> use largest connected component
 48        # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components
 49        G = G.clusters().giant()
 50
 51    G_feat["n nodes"] = float(G.vcount())
 52    G_feat["n edges"] = float(G.ecount())
 53    G_feat["density"] = G.density()
 54    G_feat["diameter"] = G.diameter()
 55    clustering = G.transitivity_local_undirected(mode="zero")
 56    if sum(clustering) == 0 or len(clustering) == 0:
 57        G_feat["avg clustering"] = sum(clustering)
 58    else:
 59        G_feat["avg clustering"] = sum(clustering) / len(clustering)
 60    G_feat["max closeness centrality"] = np.max(list(G.closeness()))
 61    G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False))
 62    G_feat["degree assortativity"] = G.assortativity_degree()
 63    G_feat["max clique number"] = len(max(G.largest_cliques(), key=len))
 64    G_feat["n communities"] = G.community_fastgreedy().optimal_count
 65    G_feat["avg path length"] = float(G.average_path_length())
 66    G_feat["max betweenness centrality"] = round(
 67        (
 68            np.max(G.betweenness())
 69            / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2)
 70        ),
 71        15,
 72    )
 73
 74    return G_feat
 75
 76
 77def igraph_calc_graph_features_timed(G, skip_clique_num=False):
 78    """
 79    Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object.
 80
 81    Parameters
 82    ----------
 83    G : igraph.Graph
 84        The input graph for which features are to be calculated.
 85    skip_clique_num : bool, optional
 86        If True, the calculation of the largest clique number will be skipped. Default is False.
 87
 88    Returns
 89    -------
 90    tuple
 91        A tuple containing two dictionaries:
 92        - G_feat (dict): A dictionary with keys as feature names and values as the computed feature values.
 93            Features include:
 94            - "total n nodes": Total number of nodes in the graph.
 95            - "total n edges": Total number of edges in the graph.
 96            - "n nodes": Number of nodes in the largest connected component.
 97            - "n edges": Number of edges in the largest connected component.
 98            - "density": Density of the graph.
 99            - "diameter": Diameter of the graph.
100            - "avg clustering": Average clustering coefficient of the graph.
101            - "max closeness centrality": Maximum closeness centrality in the graph.
102            - "max eigenvector centrality": Maximum eigenvector centrality in the graph.
103            - "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1.
104            - "max clique number": Size of the largest clique in the graph.
105            - "n communities": Number of communities detected using the fast greedy algorithm.
106            - "avg path length": Average path length in the graph.
107            - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized.
108
109        - G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature.
110    """
111    if not isinstance(G, ig.Graph):
112        raise ValueError("Input must be an igraph.Graph object")
113
114    G_feat = {}
115    G_time = {}
116    start_time = time.perf_counter()
117    G_feat["total n nodes"] = G.vcount()
118    G_time["total n nodes"] = time.perf_counter() - start_time
119
120    start_time = time.perf_counter()
121    G_feat["total n edges"] = G.ecount()
122    G_time["total n edges"] = time.perf_counter() - start_time
123
124    if not G.is_connected():
125        # graph isn't connected --> use largest connected component
126        # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components
127        start_time = time.perf_counter()
128        G = G.clusters().giant()
129        G_time["connected component"] = time.perf_counter() - start_time
130    else:
131        G_time["connected component"] = 0
132
133    start_time = time.perf_counter()
134    G_feat["n nodes"] = float(G.vcount())
135    G_time["n nodes"] = time.perf_counter() - start_time
136
137    start_time = time.perf_counter()
138    G_feat["n edges"] = float(G.ecount())
139    G_time["n edges"] = time.perf_counter() - start_time
140
141    start_time = time.perf_counter()
142    G_feat["density"] = G.density()
143    G_time["density"] = time.perf_counter() - start_time
144
145    start_time = time.perf_counter()
146    G_feat["diameter"] = G.diameter()
147    G_time["diameter"] = time.perf_counter() - start_time
148
149    start_time = time.perf_counter()
150    clustering = G.transitivity_local_undirected(mode="zero")
151    G_time["clustering"] = time.perf_counter() - start_time
152
153    start_time = time.perf_counter()
154    if sum(clustering) == 0 or len(clustering) == 0:
155        G_feat["avg clustering"] = sum(clustering)
156    else:
157        G_feat["avg clustering"] = sum(clustering) / len(clustering)
158    G_time["avg clustering"] = time.perf_counter() - start_time
159
160    start_time = time.perf_counter()
161    G_feat["max closeness centrality"] = np.max(list(G.closeness()))
162    G_time["max closeness centrality"] = time.perf_counter() - start_time
163
164    start_time = time.perf_counter()
165    G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False))
166    G_time["max eigenvector centrality"] = time.perf_counter() - start_time
167
168    start_time = time.perf_counter()
169    G_feat["degree assortativity"] = (
170        G.assortativity_degree() + 1
171    ) / 2  # normalized between 0 and 1.
172    G_time["degree assortativity"] = time.perf_counter() - start_time
173
174    if not skip_clique_num:
175        start_time = time.perf_counter()
176        G_feat["max clique number"] = len(max(G.largest_cliques(), key=len))
177        G_time["max clique number"] = time.perf_counter() - start_time
178
179    start_time = time.perf_counter()
180    G_feat["n communities"] = G.community_fastgreedy().optimal_count
181    G_time["n communities"] = time.perf_counter() - start_time
182
183    start_time = time.perf_counter()
184    G_feat["avg path length"] = float(G.average_path_length())
185    G_time["avg path length"] = time.perf_counter() - start_time
186
187    start_time = time.perf_counter()
188    G_feat["max betweenness centrality"] = round(
189        (
190            np.max(G.betweenness())
191            / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2)
192        ),
193        15,
194    )
195    G_time["max betweenness centrality"] = time.perf_counter() - start_time
196
197    return G_feat, G_time
def igraph_calc_graph_features(G):
 7def igraph_calc_graph_features(G):
 8    """
 9    Calculate various graph features for an igraph.Graph object.
10
11    Parameters
12    ----------
13    G : igraph.Graph
14        The input graph.
15
16    Returns
17    -------
18    dict
19        A dictionary containing the calculated graph features:
20        - "total n nodes": Total number of nodes in the graph.
21        - "total n edges": Total number of edges in the graph.
22        - "n nodes": Number of nodes in the largest connected component.
23        - "n edges": Number of edges in the largest connected component.
24        - "density": Density of the graph.
25        - "diameter": Diameter of the graph.
26        - "avg clustering": Average clustering coefficient of the graph.
27        - "max closeness centrality": Maximum closeness centrality in the graph.
28        - "max eigenvector centrality": Maximum eigenvector centrality in the graph.
29        - "degree assortativity": Degree assortativity of the graph.
30        - "max clique number": Size of the largest clique in the graph.
31        - "n communities": Number of communities detected using the fast greedy algorithm.
32        - "avg path length": Average path length in the graph.
33        - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs.
34
35    Raises
36    ------
37    ValueError
38        If the input is not an igraph.Graph object.
39    """
40    if not isinstance(G, ig.Graph):
41        raise ValueError("Input must be an igraph.Graph object")
42
43    G_feat = {}
44    G_feat["total n nodes"] = G.vcount()
45    G_feat["total n edges"] = G.ecount()
46
47    if not G.is_connected():
48        # graph isn't connected --> use largest connected component
49        # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components
50        G = G.clusters().giant()
51
52    G_feat["n nodes"] = float(G.vcount())
53    G_feat["n edges"] = float(G.ecount())
54    G_feat["density"] = G.density()
55    G_feat["diameter"] = G.diameter()
56    clustering = G.transitivity_local_undirected(mode="zero")
57    if sum(clustering) == 0 or len(clustering) == 0:
58        G_feat["avg clustering"] = sum(clustering)
59    else:
60        G_feat["avg clustering"] = sum(clustering) / len(clustering)
61    G_feat["max closeness centrality"] = np.max(list(G.closeness()))
62    G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False))
63    G_feat["degree assortativity"] = G.assortativity_degree()
64    G_feat["max clique number"] = len(max(G.largest_cliques(), key=len))
65    G_feat["n communities"] = G.community_fastgreedy().optimal_count
66    G_feat["avg path length"] = float(G.average_path_length())
67    G_feat["max betweenness centrality"] = round(
68        (
69            np.max(G.betweenness())
70            / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2)
71        ),
72        15,
73    )
74
75    return G_feat

Calculate various graph features for an igraph.Graph object.

Parameters
  • G (igraph.Graph): The input graph.
Returns
  • dict: A dictionary containing the calculated graph features:
    • "total n nodes": Total number of nodes in the graph.
    • "total n edges": Total number of edges in the graph.
    • "n nodes": Number of nodes in the largest connected component.
    • "n edges": Number of edges in the largest connected component.
    • "density": Density of the graph.
    • "diameter": Diameter of the graph.
    • "avg clustering": Average clustering coefficient of the graph.
    • "max closeness centrality": Maximum closeness centrality in the graph.
    • "max eigenvector centrality": Maximum eigenvector centrality in the graph.
    • "degree assortativity": Degree assortativity of the graph.
    • "max clique number": Size of the largest clique in the graph.
    • "n communities": Number of communities detected using the fast greedy algorithm.
    • "avg path length": Average path length in the graph.
    • "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs.
Raises
  • ValueError: If the input is not an igraph.Graph object.
def igraph_calc_graph_features_timed(G, skip_clique_num=False):
 78def igraph_calc_graph_features_timed(G, skip_clique_num=False):
 79    """
 80    Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object.
 81
 82    Parameters
 83    ----------
 84    G : igraph.Graph
 85        The input graph for which features are to be calculated.
 86    skip_clique_num : bool, optional
 87        If True, the calculation of the largest clique number will be skipped. Default is False.
 88
 89    Returns
 90    -------
 91    tuple
 92        A tuple containing two dictionaries:
 93        - G_feat (dict): A dictionary with keys as feature names and values as the computed feature values.
 94            Features include:
 95            - "total n nodes": Total number of nodes in the graph.
 96            - "total n edges": Total number of edges in the graph.
 97            - "n nodes": Number of nodes in the largest connected component.
 98            - "n edges": Number of edges in the largest connected component.
 99            - "density": Density of the graph.
100            - "diameter": Diameter of the graph.
101            - "avg clustering": Average clustering coefficient of the graph.
102            - "max closeness centrality": Maximum closeness centrality in the graph.
103            - "max eigenvector centrality": Maximum eigenvector centrality in the graph.
104            - "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1.
105            - "max clique number": Size of the largest clique in the graph.
106            - "n communities": Number of communities detected using the fast greedy algorithm.
107            - "avg path length": Average path length in the graph.
108            - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized.
109
110        - G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature.
111    """
112    if not isinstance(G, ig.Graph):
113        raise ValueError("Input must be an igraph.Graph object")
114
115    G_feat = {}
116    G_time = {}
117    start_time = time.perf_counter()
118    G_feat["total n nodes"] = G.vcount()
119    G_time["total n nodes"] = time.perf_counter() - start_time
120
121    start_time = time.perf_counter()
122    G_feat["total n edges"] = G.ecount()
123    G_time["total n edges"] = time.perf_counter() - start_time
124
125    if not G.is_connected():
126        # graph isn't connected --> use largest connected component
127        # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components
128        start_time = time.perf_counter()
129        G = G.clusters().giant()
130        G_time["connected component"] = time.perf_counter() - start_time
131    else:
132        G_time["connected component"] = 0
133
134    start_time = time.perf_counter()
135    G_feat["n nodes"] = float(G.vcount())
136    G_time["n nodes"] = time.perf_counter() - start_time
137
138    start_time = time.perf_counter()
139    G_feat["n edges"] = float(G.ecount())
140    G_time["n edges"] = time.perf_counter() - start_time
141
142    start_time = time.perf_counter()
143    G_feat["density"] = G.density()
144    G_time["density"] = time.perf_counter() - start_time
145
146    start_time = time.perf_counter()
147    G_feat["diameter"] = G.diameter()
148    G_time["diameter"] = time.perf_counter() - start_time
149
150    start_time = time.perf_counter()
151    clustering = G.transitivity_local_undirected(mode="zero")
152    G_time["clustering"] = time.perf_counter() - start_time
153
154    start_time = time.perf_counter()
155    if sum(clustering) == 0 or len(clustering) == 0:
156        G_feat["avg clustering"] = sum(clustering)
157    else:
158        G_feat["avg clustering"] = sum(clustering) / len(clustering)
159    G_time["avg clustering"] = time.perf_counter() - start_time
160
161    start_time = time.perf_counter()
162    G_feat["max closeness centrality"] = np.max(list(G.closeness()))
163    G_time["max closeness centrality"] = time.perf_counter() - start_time
164
165    start_time = time.perf_counter()
166    G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False))
167    G_time["max eigenvector centrality"] = time.perf_counter() - start_time
168
169    start_time = time.perf_counter()
170    G_feat["degree assortativity"] = (
171        G.assortativity_degree() + 1
172    ) / 2  # normalized between 0 and 1.
173    G_time["degree assortativity"] = time.perf_counter() - start_time
174
175    if not skip_clique_num:
176        start_time = time.perf_counter()
177        G_feat["max clique number"] = len(max(G.largest_cliques(), key=len))
178        G_time["max clique number"] = time.perf_counter() - start_time
179
180    start_time = time.perf_counter()
181    G_feat["n communities"] = G.community_fastgreedy().optimal_count
182    G_time["n communities"] = time.perf_counter() - start_time
183
184    start_time = time.perf_counter()
185    G_feat["avg path length"] = float(G.average_path_length())
186    G_time["avg path length"] = time.perf_counter() - start_time
187
188    start_time = time.perf_counter()
189    G_feat["max betweenness centrality"] = round(
190        (
191            np.max(G.betweenness())
192            / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2)
193        ),
194        15,
195    )
196    G_time["max betweenness centrality"] = time.perf_counter() - start_time
197
198    return G_feat, G_time

Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object.

Parameters
  • G (igraph.Graph): The input graph for which features are to be calculated.
  • skip_clique_num (bool, optional): If True, the calculation of the largest clique number will be skipped. Default is False.
Returns
  • tuple: A tuple containing two dictionaries:

    • G_feat (dict): A dictionary with keys as feature names and values as the computed feature values. Features include:
    • "total n nodes": Total number of nodes in the graph.
    • "total n edges": Total number of edges in the graph.
    • "n nodes": Number of nodes in the largest connected component.
    • "n edges": Number of edges in the largest connected component.
    • "density": Density of the graph.
    • "diameter": Diameter of the graph.
    • "avg clustering": Average clustering coefficient of the graph.
    • "max closeness centrality": Maximum closeness centrality in the graph.
    • "max eigenvector centrality": Maximum eigenvector centrality in the graph.
    • "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1.
    • "max clique number": Size of the largest clique in the graph.
    • "n communities": Number of communities detected using the fast greedy algorithm.
    • "avg path length": Average path length in the graph.
    • "max betweenness centrality": Maximum betweenness centrality in the graph, normalized.

    • G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature.