griptomo.core.ig_extract_features
1import igraph as ig 2import numpy as np 3import time 4 5 6def igraph_calc_graph_features(G): 7 """ 8 Calculate various graph features for an igraph.Graph object. 9 10 Parameters 11 ---------- 12 G : igraph.Graph 13 The input graph. 14 15 Returns 16 ------- 17 dict 18 A dictionary containing the calculated graph features: 19 - "total n nodes": Total number of nodes in the graph. 20 - "total n edges": Total number of edges in the graph. 21 - "n nodes": Number of nodes in the largest connected component. 22 - "n edges": Number of edges in the largest connected component. 23 - "density": Density of the graph. 24 - "diameter": Diameter of the graph. 25 - "avg clustering": Average clustering coefficient of the graph. 26 - "max closeness centrality": Maximum closeness centrality in the graph. 27 - "max eigenvector centrality": Maximum eigenvector centrality in the graph. 28 - "degree assortativity": Degree assortativity of the graph. 29 - "max clique number": Size of the largest clique in the graph. 30 - "n communities": Number of communities detected using the fast greedy algorithm. 31 - "avg path length": Average path length in the graph. 32 - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs. 33 34 Raises 35 ------ 36 ValueError 37 If the input is not an igraph.Graph object. 38 """ 39 if not isinstance(G, ig.Graph): 40 raise ValueError("Input must be an igraph.Graph object") 41 42 G_feat = {} 43 G_feat["total n nodes"] = G.vcount() 44 G_feat["total n edges"] = G.ecount() 45 46 if not G.is_connected(): 47 # graph isn't connected --> use largest connected component 48 # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components 49 G = G.clusters().giant() 50 51 G_feat["n nodes"] = float(G.vcount()) 52 G_feat["n edges"] = float(G.ecount()) 53 G_feat["density"] = G.density() 54 G_feat["diameter"] = G.diameter() 55 clustering = G.transitivity_local_undirected(mode="zero") 56 if sum(clustering) == 0 or len(clustering) == 0: 57 G_feat["avg clustering"] = sum(clustering) 58 else: 59 G_feat["avg clustering"] = sum(clustering) / len(clustering) 60 G_feat["max closeness centrality"] = np.max(list(G.closeness())) 61 G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False)) 62 G_feat["degree assortativity"] = G.assortativity_degree() 63 G_feat["max clique number"] = len(max(G.largest_cliques(), key=len)) 64 G_feat["n communities"] = G.community_fastgreedy().optimal_count 65 G_feat["avg path length"] = float(G.average_path_length()) 66 G_feat["max betweenness centrality"] = round( 67 ( 68 np.max(G.betweenness()) 69 / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2) 70 ), 71 15, 72 ) 73 74 return G_feat 75 76 77def igraph_calc_graph_features_timed(G, skip_clique_num=False): 78 """ 79 Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object. 80 81 Parameters 82 ---------- 83 G : igraph.Graph 84 The input graph for which features are to be calculated. 85 skip_clique_num : bool, optional 86 If True, the calculation of the largest clique number will be skipped. Default is False. 87 88 Returns 89 ------- 90 tuple 91 A tuple containing two dictionaries: 92 - G_feat (dict): A dictionary with keys as feature names and values as the computed feature values. 93 Features include: 94 - "total n nodes": Total number of nodes in the graph. 95 - "total n edges": Total number of edges in the graph. 96 - "n nodes": Number of nodes in the largest connected component. 97 - "n edges": Number of edges in the largest connected component. 98 - "density": Density of the graph. 99 - "diameter": Diameter of the graph. 100 - "avg clustering": Average clustering coefficient of the graph. 101 - "max closeness centrality": Maximum closeness centrality in the graph. 102 - "max eigenvector centrality": Maximum eigenvector centrality in the graph. 103 - "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1. 104 - "max clique number": Size of the largest clique in the graph. 105 - "n communities": Number of communities detected using the fast greedy algorithm. 106 - "avg path length": Average path length in the graph. 107 - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized. 108 109 - G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature. 110 """ 111 if not isinstance(G, ig.Graph): 112 raise ValueError("Input must be an igraph.Graph object") 113 114 G_feat = {} 115 G_time = {} 116 start_time = time.perf_counter() 117 G_feat["total n nodes"] = G.vcount() 118 G_time["total n nodes"] = time.perf_counter() - start_time 119 120 start_time = time.perf_counter() 121 G_feat["total n edges"] = G.ecount() 122 G_time["total n edges"] = time.perf_counter() - start_time 123 124 if not G.is_connected(): 125 # graph isn't connected --> use largest connected component 126 # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components 127 start_time = time.perf_counter() 128 G = G.clusters().giant() 129 G_time["connected component"] = time.perf_counter() - start_time 130 else: 131 G_time["connected component"] = 0 132 133 start_time = time.perf_counter() 134 G_feat["n nodes"] = float(G.vcount()) 135 G_time["n nodes"] = time.perf_counter() - start_time 136 137 start_time = time.perf_counter() 138 G_feat["n edges"] = float(G.ecount()) 139 G_time["n edges"] = time.perf_counter() - start_time 140 141 start_time = time.perf_counter() 142 G_feat["density"] = G.density() 143 G_time["density"] = time.perf_counter() - start_time 144 145 start_time = time.perf_counter() 146 G_feat["diameter"] = G.diameter() 147 G_time["diameter"] = time.perf_counter() - start_time 148 149 start_time = time.perf_counter() 150 clustering = G.transitivity_local_undirected(mode="zero") 151 G_time["clustering"] = time.perf_counter() - start_time 152 153 start_time = time.perf_counter() 154 if sum(clustering) == 0 or len(clustering) == 0: 155 G_feat["avg clustering"] = sum(clustering) 156 else: 157 G_feat["avg clustering"] = sum(clustering) / len(clustering) 158 G_time["avg clustering"] = time.perf_counter() - start_time 159 160 start_time = time.perf_counter() 161 G_feat["max closeness centrality"] = np.max(list(G.closeness())) 162 G_time["max closeness centrality"] = time.perf_counter() - start_time 163 164 start_time = time.perf_counter() 165 G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False)) 166 G_time["max eigenvector centrality"] = time.perf_counter() - start_time 167 168 start_time = time.perf_counter() 169 G_feat["degree assortativity"] = ( 170 G.assortativity_degree() + 1 171 ) / 2 # normalized between 0 and 1. 172 G_time["degree assortativity"] = time.perf_counter() - start_time 173 174 if not skip_clique_num: 175 start_time = time.perf_counter() 176 G_feat["max clique number"] = len(max(G.largest_cliques(), key=len)) 177 G_time["max clique number"] = time.perf_counter() - start_time 178 179 start_time = time.perf_counter() 180 G_feat["n communities"] = G.community_fastgreedy().optimal_count 181 G_time["n communities"] = time.perf_counter() - start_time 182 183 start_time = time.perf_counter() 184 G_feat["avg path length"] = float(G.average_path_length()) 185 G_time["avg path length"] = time.perf_counter() - start_time 186 187 start_time = time.perf_counter() 188 G_feat["max betweenness centrality"] = round( 189 ( 190 np.max(G.betweenness()) 191 / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2) 192 ), 193 15, 194 ) 195 G_time["max betweenness centrality"] = time.perf_counter() - start_time 196 197 return G_feat, G_time
def
igraph_calc_graph_features(G):
7def igraph_calc_graph_features(G): 8 """ 9 Calculate various graph features for an igraph.Graph object. 10 11 Parameters 12 ---------- 13 G : igraph.Graph 14 The input graph. 15 16 Returns 17 ------- 18 dict 19 A dictionary containing the calculated graph features: 20 - "total n nodes": Total number of nodes in the graph. 21 - "total n edges": Total number of edges in the graph. 22 - "n nodes": Number of nodes in the largest connected component. 23 - "n edges": Number of edges in the largest connected component. 24 - "density": Density of the graph. 25 - "diameter": Diameter of the graph. 26 - "avg clustering": Average clustering coefficient of the graph. 27 - "max closeness centrality": Maximum closeness centrality in the graph. 28 - "max eigenvector centrality": Maximum eigenvector centrality in the graph. 29 - "degree assortativity": Degree assortativity of the graph. 30 - "max clique number": Size of the largest clique in the graph. 31 - "n communities": Number of communities detected using the fast greedy algorithm. 32 - "avg path length": Average path length in the graph. 33 - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs. 34 35 Raises 36 ------ 37 ValueError 38 If the input is not an igraph.Graph object. 39 """ 40 if not isinstance(G, ig.Graph): 41 raise ValueError("Input must be an igraph.Graph object") 42 43 G_feat = {} 44 G_feat["total n nodes"] = G.vcount() 45 G_feat["total n edges"] = G.ecount() 46 47 if not G.is_connected(): 48 # graph isn't connected --> use largest connected component 49 # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components 50 G = G.clusters().giant() 51 52 G_feat["n nodes"] = float(G.vcount()) 53 G_feat["n edges"] = float(G.ecount()) 54 G_feat["density"] = G.density() 55 G_feat["diameter"] = G.diameter() 56 clustering = G.transitivity_local_undirected(mode="zero") 57 if sum(clustering) == 0 or len(clustering) == 0: 58 G_feat["avg clustering"] = sum(clustering) 59 else: 60 G_feat["avg clustering"] = sum(clustering) / len(clustering) 61 G_feat["max closeness centrality"] = np.max(list(G.closeness())) 62 G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False)) 63 G_feat["degree assortativity"] = G.assortativity_degree() 64 G_feat["max clique number"] = len(max(G.largest_cliques(), key=len)) 65 G_feat["n communities"] = G.community_fastgreedy().optimal_count 66 G_feat["avg path length"] = float(G.average_path_length()) 67 G_feat["max betweenness centrality"] = round( 68 ( 69 np.max(G.betweenness()) 70 / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2) 71 ), 72 15, 73 ) 74 75 return G_feat
Calculate various graph features for an igraph.Graph object.
Parameters
- G (igraph.Graph): The input graph.
Returns
- dict: A dictionary containing the calculated graph features:
- "total n nodes": Total number of nodes in the graph.
- "total n edges": Total number of edges in the graph.
- "n nodes": Number of nodes in the largest connected component.
- "n edges": Number of edges in the largest connected component.
- "density": Density of the graph.
- "diameter": Diameter of the graph.
- "avg clustering": Average clustering coefficient of the graph.
- "max closeness centrality": Maximum closeness centrality in the graph.
- "max eigenvector centrality": Maximum eigenvector centrality in the graph.
- "degree assortativity": Degree assortativity of the graph.
- "max clique number": Size of the largest clique in the graph.
- "n communities": Number of communities detected using the fast greedy algorithm.
- "avg path length": Average path length in the graph.
- "max betweenness centrality": Maximum betweenness centrality in the graph, normalized by the number of possible node pairs.
Raises
- ValueError: If the input is not an igraph.Graph object.
def
igraph_calc_graph_features_timed(G, skip_clique_num=False):
78def igraph_calc_graph_features_timed(G, skip_clique_num=False): 79 """ 80 Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object. 81 82 Parameters 83 ---------- 84 G : igraph.Graph 85 The input graph for which features are to be calculated. 86 skip_clique_num : bool, optional 87 If True, the calculation of the largest clique number will be skipped. Default is False. 88 89 Returns 90 ------- 91 tuple 92 A tuple containing two dictionaries: 93 - G_feat (dict): A dictionary with keys as feature names and values as the computed feature values. 94 Features include: 95 - "total n nodes": Total number of nodes in the graph. 96 - "total n edges": Total number of edges in the graph. 97 - "n nodes": Number of nodes in the largest connected component. 98 - "n edges": Number of edges in the largest connected component. 99 - "density": Density of the graph. 100 - "diameter": Diameter of the graph. 101 - "avg clustering": Average clustering coefficient of the graph. 102 - "max closeness centrality": Maximum closeness centrality in the graph. 103 - "max eigenvector centrality": Maximum eigenvector centrality in the graph. 104 - "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1. 105 - "max clique number": Size of the largest clique in the graph. 106 - "n communities": Number of communities detected using the fast greedy algorithm. 107 - "avg path length": Average path length in the graph. 108 - "max betweenness centrality": Maximum betweenness centrality in the graph, normalized. 109 110 - G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature. 111 """ 112 if not isinstance(G, ig.Graph): 113 raise ValueError("Input must be an igraph.Graph object") 114 115 G_feat = {} 116 G_time = {} 117 start_time = time.perf_counter() 118 G_feat["total n nodes"] = G.vcount() 119 G_time["total n nodes"] = time.perf_counter() - start_time 120 121 start_time = time.perf_counter() 122 G_feat["total n edges"] = G.ecount() 123 G_time["total n edges"] = time.perf_counter() - start_time 124 125 if not G.is_connected(): 126 # graph isn't connected --> use largest connected component 127 # see ref: https://igraph.org/python/doc/igraph.GraphBase-class.html#components 128 start_time = time.perf_counter() 129 G = G.clusters().giant() 130 G_time["connected component"] = time.perf_counter() - start_time 131 else: 132 G_time["connected component"] = 0 133 134 start_time = time.perf_counter() 135 G_feat["n nodes"] = float(G.vcount()) 136 G_time["n nodes"] = time.perf_counter() - start_time 137 138 start_time = time.perf_counter() 139 G_feat["n edges"] = float(G.ecount()) 140 G_time["n edges"] = time.perf_counter() - start_time 141 142 start_time = time.perf_counter() 143 G_feat["density"] = G.density() 144 G_time["density"] = time.perf_counter() - start_time 145 146 start_time = time.perf_counter() 147 G_feat["diameter"] = G.diameter() 148 G_time["diameter"] = time.perf_counter() - start_time 149 150 start_time = time.perf_counter() 151 clustering = G.transitivity_local_undirected(mode="zero") 152 G_time["clustering"] = time.perf_counter() - start_time 153 154 start_time = time.perf_counter() 155 if sum(clustering) == 0 or len(clustering) == 0: 156 G_feat["avg clustering"] = sum(clustering) 157 else: 158 G_feat["avg clustering"] = sum(clustering) / len(clustering) 159 G_time["avg clustering"] = time.perf_counter() - start_time 160 161 start_time = time.perf_counter() 162 G_feat["max closeness centrality"] = np.max(list(G.closeness())) 163 G_time["max closeness centrality"] = time.perf_counter() - start_time 164 165 start_time = time.perf_counter() 166 G_feat["max eigenvector centrality"] = np.max(G.eigenvector_centrality(scale=False)) 167 G_time["max eigenvector centrality"] = time.perf_counter() - start_time 168 169 start_time = time.perf_counter() 170 G_feat["degree assortativity"] = ( 171 G.assortativity_degree() + 1 172 ) / 2 # normalized between 0 and 1. 173 G_time["degree assortativity"] = time.perf_counter() - start_time 174 175 if not skip_clique_num: 176 start_time = time.perf_counter() 177 G_feat["max clique number"] = len(max(G.largest_cliques(), key=len)) 178 G_time["max clique number"] = time.perf_counter() - start_time 179 180 start_time = time.perf_counter() 181 G_feat["n communities"] = G.community_fastgreedy().optimal_count 182 G_time["n communities"] = time.perf_counter() - start_time 183 184 start_time = time.perf_counter() 185 G_feat["avg path length"] = float(G.average_path_length()) 186 G_time["avg path length"] = time.perf_counter() - start_time 187 188 start_time = time.perf_counter() 189 G_feat["max betweenness centrality"] = round( 190 ( 191 np.max(G.betweenness()) 192 / (((G_feat["n nodes"] - 1) * (G_feat["n nodes"] - 2)) / 2) 193 ), 194 15, 195 ) 196 G_time["max betweenness centrality"] = time.perf_counter() - start_time 197 198 return G_feat, G_time
Calculate various graph features and the time taken to compute each feature for a given igraph.Graph object.
Parameters
- G (igraph.Graph): The input graph for which features are to be calculated.
- skip_clique_num (bool, optional): If True, the calculation of the largest clique number will be skipped. Default is False.
Returns
tuple: A tuple containing two dictionaries:
- G_feat (dict): A dictionary with keys as feature names and values as the computed feature values. Features include:
- "total n nodes": Total number of nodes in the graph.
- "total n edges": Total number of edges in the graph.
- "n nodes": Number of nodes in the largest connected component.
- "n edges": Number of edges in the largest connected component.
- "density": Density of the graph.
- "diameter": Diameter of the graph.
- "avg clustering": Average clustering coefficient of the graph.
- "max closeness centrality": Maximum closeness centrality in the graph.
- "max eigenvector centrality": Maximum eigenvector centrality in the graph.
- "degree assortativity": Degree assortativity of the graph, normalized between 0 and 1.
- "max clique number": Size of the largest clique in the graph.
- "n communities": Number of communities detected using the fast greedy algorithm.
- "avg path length": Average path length in the graph.
"max betweenness centrality": Maximum betweenness centrality in the graph, normalized.
G_time (dict): A dictionary with keys as feature names and values as the time, in seconds, taken to compute each feature.