|
| 1 | +from collections import defaultdict |
| 2 | +from graph import Graph |
| 3 | +import numpy as np |
| 4 | +from utils import cmap2C |
| 5 | + |
| 6 | + |
| 7 | +def normalized_adj_wgt(graph): |
| 8 | + adj_wgt = graph.adj_wgt |
| 9 | + adj_idx = graph.adj_idx |
| 10 | + norm_wgt = np.zeros(adj_wgt.shape, dtype=np.float32) |
| 11 | + degree = graph.degree |
| 12 | + for i in range(graph.node_num): |
| 13 | + for j in range(adj_idx[i], adj_idx[i + 1]): |
| 14 | + neigh = graph.adj_list[j] |
| 15 | + norm_wgt[j] = adj_wgt[neigh] / np.sqrt(degree[i] * degree[neigh]) |
| 16 | + return norm_wgt |
| 17 | + |
| 18 | +############################################### |
| 19 | +# This section of code adapted from jiongqianliang/MILE # |
| 20 | +# http://jiongqianliang.com/MILE/ # |
| 21 | +############################################### |
| 22 | + |
| 23 | +def generate_hybrid_matching(max_node_wgt, graph): |
| 24 | + '''Generate matchings using the hybrid method. It changes the cmap in graph object, |
| 25 | + return groups array and coarse_graph_size.''' |
| 26 | + node_num = graph.node_num |
| 27 | + adj_list = graph.adj_list # big array for neighbors. |
| 28 | + adj_idx = graph.adj_idx # beginning idx of neighbors. |
| 29 | + adj_wgt = graph.adj_wgt # weight on edge |
| 30 | + node_wgt = graph.node_wgt # weight on node |
| 31 | + cmap = graph.cmap |
| 32 | + norm_adj_wgt = normalized_adj_wgt(graph) |
| 33 | + groups = [] # a list of groups, each group corresponding to one coarse node. |
| 34 | + matched = [False] * node_num |
| 35 | + |
| 36 | + # SEM: structural equivalence matching. |
| 37 | + jaccard_idx_preprocess(graph, matched, groups) |
| 38 | + #print("# groups have perfect jaccard idx (1.0): %d" % len(groups)) |
| 39 | + degree = [adj_idx[i + 1] - adj_idx[i] for i in range(0, node_num)] |
| 40 | + |
| 41 | + sorted_idx = np.argsort(degree) |
| 42 | + for idx in sorted_idx: |
| 43 | + if matched[idx]: |
| 44 | + continue |
| 45 | + max_idx = idx |
| 46 | + max_wgt = -1 |
| 47 | + for j in range(adj_idx[idx], adj_idx[idx + 1]): |
| 48 | + neigh = adj_list[j] |
| 49 | + if neigh == idx: # KEY: exclude self-loop. Otherwise, mostly matching with itself. |
| 50 | + continue |
| 51 | + curr_wgt = norm_adj_wgt[j] |
| 52 | + if ((not matched[neigh]) and max_wgt < curr_wgt and node_wgt[idx] + node_wgt[neigh] <= max_node_wgt): |
| 53 | + max_idx = neigh |
| 54 | + max_wgt = curr_wgt |
| 55 | + # it might happen that max_idx is idx, which means cannot find a match for the node. |
| 56 | + matched[idx] = matched[max_idx] = True |
| 57 | + if idx == max_idx: |
| 58 | + groups.append([idx]) |
| 59 | + else: |
| 60 | + groups.append([idx, max_idx]) |
| 61 | + coarse_graph_size = 0 |
| 62 | + for idx in range(len(groups)): |
| 63 | + for ele in groups[idx]: |
| 64 | + cmap[ele] = coarse_graph_size |
| 65 | + coarse_graph_size += 1 |
| 66 | + return (groups, coarse_graph_size) |
| 67 | + |
| 68 | + |
| 69 | +def jaccard_idx_preprocess(graph, matched, groups): |
| 70 | + '''Use hashmap to find out nodes with exactly same neighbors.''' |
| 71 | + neighs2node = defaultdict(list) |
| 72 | + for i in range(graph.node_num): |
| 73 | + neighs = str(sorted(graph.get_neighs(i))) |
| 74 | + neighs2node[neighs].append(i) |
| 75 | + for key in neighs2node.keys(): |
| 76 | + g = neighs2node[key] |
| 77 | + if len(g) > 1: |
| 78 | + for node in g: |
| 79 | + matched[node] = True |
| 80 | + groups.append(g) |
| 81 | + return |
| 82 | + |
| 83 | + |
| 84 | +def create_coarse_graph(graph, groups, coarse_graph_size): |
| 85 | + '''create the coarser graph and return it based on the groups array and coarse_graph_size''' |
| 86 | + coarse_graph = Graph(coarse_graph_size, graph.edge_num) |
| 87 | + coarse_graph.finer = graph |
| 88 | + graph.coarser = coarse_graph |
| 89 | + cmap = graph.cmap |
| 90 | + adj_list = graph.adj_list |
| 91 | + adj_idx = graph.adj_idx |
| 92 | + adj_wgt = graph.adj_wgt |
| 93 | + node_wgt = graph.node_wgt |
| 94 | + |
| 95 | + coarse_adj_list = coarse_graph.adj_list |
| 96 | + coarse_adj_idx = coarse_graph.adj_idx |
| 97 | + coarse_adj_wgt = coarse_graph.adj_wgt |
| 98 | + coarse_node_wgt = coarse_graph.node_wgt |
| 99 | + coarse_degree = coarse_graph.degree |
| 100 | + |
| 101 | + coarse_adj_idx[0] = 0 |
| 102 | + nedges = 0 # number of edges in the coarse graph |
| 103 | + for idx in range(len(groups)): # idx in the graph |
| 104 | + coarse_node_idx = idx |
| 105 | + neigh_dict = dict() # coarser graph neighbor node --> its location idx in adj_list. |
| 106 | + group = groups[idx] |
| 107 | + for i in range(len(group)): |
| 108 | + merged_node = group[i] |
| 109 | + if (i == 0): |
| 110 | + coarse_node_wgt[coarse_node_idx] = node_wgt[merged_node] |
| 111 | + else: |
| 112 | + coarse_node_wgt[coarse_node_idx] += node_wgt[merged_node] |
| 113 | + |
| 114 | + istart = adj_idx[merged_node] |
| 115 | + iend = adj_idx[merged_node + 1] |
| 116 | + for j in range(istart, iend): |
| 117 | + k = cmap[adj_list[ |
| 118 | + j]] # adj_list[j] is the neigh of v; k is the new mapped id of adj_list[j] in coarse graph. |
| 119 | + if k not in neigh_dict: # add new neigh |
| 120 | + coarse_adj_list[nedges] = k |
| 121 | + coarse_adj_wgt[nedges] = adj_wgt[j] |
| 122 | + neigh_dict[k] = nedges |
| 123 | + nedges += 1 |
| 124 | + else: # increase weight to the existing neigh |
| 125 | + coarse_adj_wgt[neigh_dict[k]] += adj_wgt[j] |
| 126 | + # add weights to the degree. For now, we retain the loop. |
| 127 | + coarse_degree[coarse_node_idx] += adj_wgt[j] |
| 128 | + |
| 129 | + coarse_node_idx += 1 |
| 130 | + coarse_adj_idx[coarse_node_idx] = nedges |
| 131 | + |
| 132 | + coarse_graph.edge_num = nedges |
| 133 | + |
| 134 | + coarse_graph.resize_adj(nedges) |
| 135 | + C = cmap2C(cmap) # construct the matching matrix. |
| 136 | + graph.C = C |
| 137 | + coarse_graph.A = C.transpose().dot(graph.A).dot(C) |
| 138 | + return coarse_graph |
0 commit comments