chatgpt实现louvain算法

Louvain算法是一种用于社区发现的算法,其主要思想是将网络中的节点划分为不同的社区,使得同一社区内的节点之间具有更高的相似度,而不同社区之间的相似度则较低。下面是在Python中实现Louvain算法的步骤:

导入所需的库

import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities

构建网络

G = nx.Graph()
G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (4, 5), (4, 6), (5, 6)])

运行Louvain算法

communities = greedy_modularity_communities(G)

输出结果

print(list(communities))

以上代码将节点划分为不同的社区,并输出结果。需要注意的是,这里的网络是一个无向图,如果是有向图,需要使用DiGraph()函数构建网络。此外,还可以通过调整参数来优化社区划分的效果,例如调整分辨率参数等。

louvain算法:

import networkx as nxdef louvain(G):# 初始化每个节点所属的社区node2cp = dict((n, i) for i, n in enumerate(G.nodes()))# 计算每条边的初始模块度增益edgeGain = calcInitialGain(G, node2cp)# 当模块度增益不再上升时退出循环while True:modChanged = Falsefor node in G.nodes():curCp = node2cp[node]curMod = calcModularity(G, node2cp)bestCp, bestGain = curCp, 0for neighbor in G.neighbors(node):if node == neighbor:continue# 计算当前节点移动到邻居所属的社区时的模块度增益newCp = node2cp[neighbor]newMod = calcModularity(G, node2cp, node, newCp, edgeGain)gain = newMod - curModif gain > bestGain:bestGain = gainbestCp = newCpif bestCp != curCp:modChanged = Truenode2cp[node] = bestCp# 更新所有当前节点所在社区的邻接点的模块度增益for neighbor in G.neighbors(node):if node == neighbor:continueif node2cp[neighbor] == curCp:edgeGain[(min(node, neighbor), max(node, neighbor))] += 2 * bestGain# 计算当前社区的数量curCpNum = len(set(node2cp.values()))# 合并相邻的社区,直到模块度增益不再上升为止if not modChanged or curCpNum == 1:break# 构建邻接表adjDict = {cp:set() for cp in range(curCpNum)}for n1, n2, d in G.edges(data=True):if node2cp[n1] != node2cp[n2]:adjDict[node2cp[n1]].add(node2cp[n2])adjDict[node2cp[n2]].add(node2cp[n1]])# 找到当前社区可以合并的社区编号mergeDict = {}for cp, adj in adjDict.items():maxGain, bestCp = 0, cpfor adjCp in adj:gain = calcMergeGain(G, node2cp, cp, adjCp, edgeGain)if gain > maxGain:maxGain, bestCp = gain, adjCpmergeDict[bestCp] = cp# 更新节点所属社区for node, cp in node2cp.items():if cp in mergeDict:node2cp[node] = mergeDict[cp]# 更新边的模块度增益for edge, gain in edgeGain.items():cp1, cp2 = node2cp[edge[0]], node2cp[edge[1]]if cp1 == cp2:edgeGain[edge] = calcEdgeGain(G, node2cp, cp1)return [set([n for n, cp in node2cp.items() if cp == c]) for c in set(node2cp.values())]def calcInitialGain(G, node2cp):edgeGain = {}for n1, n2, d in G.edges(data=True):cp1, cp2 = node2cp[n1], node2cp[n2]if cp1 == cp2:edgeGain[(min(n1, n2), max(n1, n2))] = d['weight'] - G.degree(n1) * G.degree(n2) / (2 * G.size())else:edgeGain[(min(n1, n2), max(n1, n2))] = 0return edgeGaindef calcModularity(G, node2cp, testNode=None, newCp=None, edgeGain=None):if edgeGain is None:edgeGain = calcInitialGain(G, node2cp)

试了一下。不太行。
重新看看

阶段一:

	通过局部调整节点所在社区使得网络的模块度达到最优, 进而实现对社区的划分;初始时, 将每个节点看作一个单独的社区, 计算模块度 Q;对于每个节点 i, 逐个计算将其移动到邻点所在的社区后模块度增益 ∆Q;将 i 移动到使得 ∆Q > 0 最大的社区内;重复进行 2、3 直至移动任何节点都不会增加模块度, 

阶段一结束。
阶段二:

	将同一社区的节点聚合成一个“超级节点”, 构建新的网络;化点: 一个社区作为一个“超级节点”;建边: 原社区间只要有边就给对应的“超级节点”连一条边, 社区内有边则给对应的“超级节点”建一个自环;赋权: 原社区间所有边上的权重之和赋给对应“超级节点”间的边, 社区内所有边上的权重各自算两边加和赋

回到阶段一

代码如下:

# coding=utf-8
import collections
import random
import read_graph as rgdef load_graph(path):G = collections.defaultdict(dict) # 设置空白默认字典,无向图#DG=collections.defaultdict(dict) # 设置空白默认字典,有向图with open(path) as text:for line in text:vertices = line.strip().split()# .strip()删除字符串line开头结尾处得空格 # .split()按照空格分隔# 返回一个列表,含有3个字符串元素v_i = int(vertices[0])v_j = int(vertices[1])w = float(vertices[2])G[v_i][v_j] = wreturn Gclass Vertex():def __init__(self, vid, cid, nodes, k_in=0):self._vid = vidself._cid = cidself._nodes = nodesself._kin = k_in  # 结点内部的边的权重class Louvain():def __init__(self, G):self._G = Gself._m = 0  # 边数量# 下面两个空字典存放信息self._cid_vertices = {}  # 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合)self._vid_vertex = {}    # 需维护的关于结点的信息(结点编号,相应的Vertex实例)for vid in self._G.keys():self._cid_vertices[vid] = set([vid])self._vid_vertex[vid] = Vertex(vid, vid, set([vid]))self._m += sum([1 for neighbor in self._G[vid].keys() ])def first_stage(self):mod_inc = False  # 用于判断算法是否可终止visit_sequence = self._G.keys()random.shuffle(list(visit_sequence))while True:can_stop = True  # 第一阶段是否可终止for v_vid in visit_sequence:v_cid = self._vid_vertex[v_vid]._cidk_v = sum(self._G[v_vid].values()) + self._vid_vertex[v_vid]._kincid_Q = {}for w_vid in self._G[v_vid].keys():w_cid = self._vid_vertex[w_vid]._cidif w_cid in cid_Q:continueelse:tot = sum([sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]])if w_cid == v_cid:tot -= k_vk_v_in = sum([v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]])delta_Q = k_v_in - k_v * tot / self._m  # 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m)cid_Q[w_cid] = delta_Qcid, max_delta_Q = sorted(cid_Q.items(), key=lambda item: item[1], reverse=True)[0]if max_delta_Q > 0.0 and cid != v_cid:self._vid_vertex[v_vid]._cid = cidself._cid_vertices[cid].add(v_vid)self._cid_vertices[v_cid].remove(v_vid)can_stop = Falsemod_inc = Trueif can_stop:breakreturn mod_incdef second_stage(self):cid_vertices = {}vid_vertex = {}for cid, vertices in self._cid_vertices.items():if len(vertices) == 0:continuenew_vertex = Vertex(cid, cid, set())for vid in vertices:new_vertex._nodes.update(self._vid_vertex[vid]._nodes)new_vertex._kin += self._vid_vertex[vid]._kinfor k, v in self._G[vid].items():if k in vertices:new_vertex._kin += v / 2.0cid_vertices[cid] = set([cid])vid_vertex[cid] = new_vertexG = collections.defaultdict(dict)for cid1, vertices1 in self._cid_vertices.items():if len(vertices1) == 0:continuefor cid2, vertices2 in self._cid_vertices.items():if cid2 <= cid1 or len(vertices2) == 0:continueedge_weight = 0.0for vid in vertices1:for k, v in self._G[vid].items():if k in vertices2:edge_weight += vif edge_weight != 0:G[cid1][cid2] = edge_weightG[cid2][cid1] = edge_weightself._cid_vertices = cid_verticesself._vid_vertex = vid_vertexself._G = Gdef get_communities(self):communities = []for vertices in self._cid_vertices.values():if len(vertices) != 0:c = set()for vid in vertices:c.update(self._vid_vertex[vid]._nodes)communities.append(c)return communitiesdef execute(self):iter_time = 1while True:iter_time += 1mod_inc = self.first_stage()if mod_inc:self.second_stage()else:breakreturn self.get_communities()if __name__ == '__main__':G = load_graph('data_graph/karate_34_graph.txt')algorithm = Louvain(G)communities = algorithm.execute()# 按照社区大小从大到小排序输出communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序count = 0for communitie in communities:count += 1print("社区", count, " ", communitie)

结束

如有问题可联系我。


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部