Coverage for lingpy/thirdparty/linkcomm/link_clustering.py : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/env python # encoding: utf-8
# link_clustering.py # Jim Bagrow, Yong-Yeol Ahn # Last Modified: 2010-08-27
# Copyright 2008,2009,2010 James Bagrow, Yong-Yeol Ahn # # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
changes 2010-08-27: * all three output files now contain the same community id numbers * comm2nodes and comm2edges both present the cid as the first entry of each line. Previously only comm2nodes did this. * implemented weighted version, added '-w' switch * expanded help string to explain input and outputs """
# this code was taken and modified from # https://github.com/bagrow/linkcomm/blob/master/python/link_clustering.py # accessed on 07.10.2013 (JML)
"""partition density"""
# print "clustering..." excluded by JML
else:
# (1.0, (None, None)) takes care of the special case where the last # merging gives the maximum partition density (e.g. a single clique).
#self.list_D.append( (0.0,self.list_D[-1][1]) ) # add final val
"""Get all the edge similarities. Input dict maps nodes to sets of neighbors. Output is a list of decorated edge-pairs, (1-sim,eij,eik), ordered by similarity. """ # print "computing similarities..." excluded by JML
"""Same as similarities_unweighted but using tanimoto coefficient. `adj' is a dict mapping nodes to sets of neighbors, ij2wij is a dict mapping an edge (ni,nj) tuple to the weight wij of that edge. """ # print "computing similarities..." excluded by JML
#print ind, 100.0*ind/len(adj)
#if __name__ == '__main__': # # build option parser: # class MyParser(OptionParser): # def format_epilog(self, formatter): # return self.epilog # # usage = "usage: python %prog [options] filename" # description = """The link communities method of Ahn, Bagrow, and Lehmann, Nature, 2010: # www.nature.com/nature/journal/v466/n7307/full/nature09182.html (doi:10.1038/nature09182) # """ # epilog = """ # #Input: # An edgelist file where each line represents an edge: # node_i <delimiter> node_j <newline> # if unweighted, or # node_i <delimiter> node_j <delimiter> weight_ij <newline> # if weighted. # #Output: # Three text files with extensions .edge2comm.txt, .comm2edges.txt, # and .comm2nodes.txt store the communities. # # edge2comm, an edge on each line followed by the community # id (cid) of the edge's link comm: # node_i <delimiter> node_j <delimiter> cid <newline> # # comm2edges, a list of edges representing one community per line: # cid <delimiter> ni,nj <delimiter> nx,ny [...] <newline> # # comm2nodes, a list of nodes representing one community per line: # cid <delimiter> ni <delimiter> nj [...] <newline> # # The output filename contains the threshold at which the dendrogram # was cut, if applicable, or the threshold where the maximum # partition density was found, and the value of the partition # density. # # If no threshold was given to cut the dendrogram, a file ending with # `_thr_D.txt' is generated, containing the partition density as a # function of clustering threshold. #""" # parser = MyParser(usage, description=description,epilog=epilog) # parser.add_option("-d", "--delimiter", dest="delimiter", default="\t", # help="delimiter of input & output files [default: tab]") # parser.add_option("-t", "--threshold", dest="threshold", type="float", default=None, # help="threshold to cut the dendrogram (optional)") # parser.add_option("-w", "--weighted", dest="is_weighted", action="store_true", default=False, # help="is the network weighted?") # # # parse options: # (options, args) = parser.parse_args() # if len(args) != 1: # parser.error("incorrect number of arguments") # delimiter = options.delimiter # if delimiter == '\\t': # delimiter = '\t' # threshold = options.threshold # is_weighted = options.is_weighted # # # print "# loading network from edgelist..." # basename = os.path.splitext(args[0])[0] # if is_weighted: # adj,edges,ij2wij = read_edgelist_weighted(args[0], delimiter=delimiter) # else: # adj,edges = read_edgelist_unweighted(args[0], delimiter=delimiter) # # # # run the method: # if threshold is not None: # if is_weighted: # edge2cid,D_thr = HLC( adj,edges ).single_linkage( threshold, w=ij2wij ) # else: # edge2cid,D_thr = HLC( adj,edges ).single_linkage( threshold ) # print "# D_thr = %f" % D_thr # write_edge2cid( edge2cid,"%s_thrS%f_thrD%f" % (basename,threshold,D_thr), delimiter=delimiter ) # else: # if is_weighted: # edge2cid,S_max,D_max,list_D = HLC( adj,edges ).single_linkage( w=ij2wij ) # else: # edge2cid,S_max,D_max,list_D = HLC( adj,edges ).single_linkage() # f = open("%s_thr_D.txt" % basename,'w') # for s,D in list_D: # print >>f, s, D # f.close() # print "# D_max = %f\n# S_max = %f" % (D_max,S_max) # write_edge2cid( edge2cid,"%s_maxS%f_maxD%f" % (basename,S_max,D_max), delimiter=delimiter )
|