python-igraph manual

For using igraph from Python

   Home       Trees       Indices       Help   
Package igraph :: Module clustering
[hide private]

Source Code for Module igraph.clustering

   1  # vim:ts=4:sw=4:sts=4:et 
   2  # -*- coding: utf-8 -*- 
   3  """Classes related to graph clustering. 
   4   
   5  @undocumented: _handle_mark_groups_arg_for_clustering, _prepare_community_comparison""" 
   6   
   7  __license__ = u""" 
   8  Copyright (C) 2006-2012  Tamás Nepusz <ntamas@gmail.com> 
   9  Pázmány Péter sétány 1/a, 1117 Budapest, Hungary 
  10   
  11  This program is free software; you can redistribute it and/or modify 
  12  it under the terms of the GNU General Public License as published by 
  13  the Free Software Foundation; either version 2 of the License, or 
  14  (at your option) any later version. 
  15   
  16  This program is distributed in the hope that it will be useful, 
  17  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  19  GNU General Public License for more details. 
  20   
  21  You should have received a copy of the GNU General Public License 
  22  along with this program; if not, write to the Free Software 
  23  Foundation, Inc.,  51 Franklin Street, Fifth Floor, Boston, MA 
  24  02110-1301 USA 
  25  """ 
  26   
  27  from copy import deepcopy 
  28  from itertools import izip 
  29  from math import pi 
  30  from cStringIO import StringIO 
  31   
  32  from igraph import community_to_membership 
  33  from igraph.compat import property 
  34  from igraph.configuration import Configuration 
  35  from igraph.datatypes import UniqueIdGenerator 
  36  from igraph.drawing.colors import ClusterColoringPalette 
  37  from igraph.statistics import Histogram 
  38  from igraph.summary import _get_wrapper_for_width 
  39  from igraph.utils import str_to_orientation 
40 41 -class Clustering(object):
42 """Class representing a clustering of an arbitrary ordered set. 43 44 This is now used as a base for L{VertexClustering}, but it might be 45 useful for other purposes as well. 46 47 Members of an individual cluster can be accessed by the C{[]} operator: 48 49 >>> cl = Clustering([0,0,0,0,1,1,1,2,2,2,2]) 50 >>> cl[0] 51 [0, 1, 2, 3] 52 53 The membership vector can be accessed by the C{membership} property: 54 55 >>> cl.membership 56 [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2] 57 58 The number of clusters can be retrieved by the C{len} function: 59 60 >>> len(cl) 61 3 62 63 You can iterate over the clustering object as if it were a regular list 64 of clusters: 65 66 >>> for cluster in cl: 67 ... print " ".join(str(idx) for idx in cluster) 68 ... 69 0 1 2 3 70 4 5 6 71 7 8 9 10 72 73 If you need all the clusters at once as lists, you can simply convert 74 the clustering object to a list: 75 76 >>> cluster_list = list(cl) 77 >>> print cluster_list 78 [[0, 1, 2, 3], [4, 5, 6], [7, 8, 9, 10]] 79 80 @undocumented: _formatted_cluster_iterator 81 """ 82
83 - def __init__(self, membership, params = None):
84 """Constructor. 85 86 @param membership: the membership list -- that is, the cluster 87 index in which each element of the set belongs to. 88 @param params: additional parameters to be stored in this 89 object's dictionary.""" 90 self._membership = list(membership) 91 if len(self._membership)>0: 92 self._len = max(m for m in self._membership if m is not None)+1 93 else: 94 self._len = 0 95 96 if params: 97 self.__dict__.update(params) 98
99 - def __getitem__(self, idx):
100 """Returns the members of the specified cluster. 101 102 @param idx: the index of the cluster 103 @return: the members of the specified cluster as a list 104 @raise IndexError: if the index is out of bounds""" 105 if idx < 0 or idx >= self._len: 106 raise IndexError("cluster index out of range") 107 return [i for i, e in enumerate(self._membership) if e == idx] 108
109 - def __iter__(self):
110 """Iterates over the clusters in this clustering. 111 112 This method will return a generator that generates the clusters 113 one by one.""" 114 clusters = [[] for _ in xrange(self._len)] 115 for idx, cluster in enumerate(self._membership): 116 clusters[cluster].append(idx) 117 return iter(clusters) 118
119 - def __len__(self):
120 """Returns the number of clusters. 121 122 @return: the number of clusters 123 """ 124 return self._len 125
126 - def __str__(self):
127 return self.summary(verbosity=1, width=78) 128
129 - def as_cover(self):
130 """Returns a L{Cover} that contains the same clusters as this clustering.""" 131 return Cover(self._graph, self) 132
133 - def compare_to(self, other, *args, **kwds):
134 """Compares this clustering to another one using some similarity or 135 distance metric. 136 137 This is a convenience method that simply calls L{compare_communities} 138 with the two clusterings as arguments. Any extra positional or keyword 139 argument is also forwarded to L{compare_communities}.""" 140 return compare_communities(self, other, *args, **kwds) 141 142 @property
143 - def membership(self):
144 """Returns the membership vector.""" 145 return self._membership[:] 146 147 @property
148 - def n(self):
149 """Returns the number of elements covered by this clustering.""" 150 return len(self._membership) 151
152 - def size(self, idx):
153 """Returns the size of a given cluster. 154 155 @param idx: the cluster in which we are interested. 156 """ 157 return len(self[idx]) 158
159 - def sizes(self, *args):
160 """Returns the size of given clusters. 161 162 The indices are given as positional arguments. If there are no 163 positional arguments, the function will return the sizes of all clusters. 164 """ 165 counts = [0] * len(self) 166 for x in self._membership: 167 counts[x] += 1 168 169 if args: 170 return [counts[idx] for idx in args] 171 172 return counts 173
174 - def size_histogram(self, bin_width = 1):
175 """Returns the histogram of cluster sizes. 176 177 @param bin_width: the bin width of the histogram 178 @return: a L{Histogram} object 179 """ 180 return Histogram(bin_width, self.sizes()) 181
182 - def summary(self, verbosity=0, width=None):
183 """Returns the summary of the clustering. 184 185 The summary includes the number of items and clusters, and also the 186 list of members for each of the clusters if the verbosity is nonzero. 187 188 @param verbosity: determines whether the cluster members should be 189 printed. Zero verbosity prints the number of items and clusters only. 190 @return: the summary of the clustering as a string. 191 """ 192 out = StringIO() 193 print >>out, "Clustering with %d elements and %d clusters" % \ 194 (len(self._membership), len(self)) 195 196 if verbosity < 1: 197 return out.getvalue().strip() 198 199 ndigits = len(str(len(self))) 200 wrapper = _get_wrapper_for_width(width, 201 subsequent_indent = " " * (ndigits+3)) 202 203 for idx, cluster in enumerate(self._formatted_cluster_iterator()): 204 wrapper.initial_indent = "[%*d] " % (ndigits, idx) 205 print >>out, "\n".join(wrapper.wrap(cluster)) 206 207 return out.getvalue().strip() 208
210 """Iterates over the clusters and formats them into a string to be 211 presented in the summary.""" 212 for cluster in self: 213 yield ", ".join(str(member) for member in cluster)
214
215 216 -class VertexClustering(Clustering):
217 """The clustering of the vertex set of a graph. 218 219 This class extends L{Clustering} by linking it to a specific L{Graph} object 220 and by optionally storing the modularity score of the clustering. 221 It also provides some handy methods like getting the subgraph corresponding 222 to a cluster and such. 223 224 @note: since this class is linked to a L{Graph}, destroying the graph by the 225 C{del} operator does not free the memory occupied by the graph if there 226 exists a L{VertexClustering} that references the L{Graph}. 227 228 @undocumented: _formatted_cluster_iterator 229 """ 230 231 # Allow None to be passed to __plot__ as the "palette" keyword argument 232 _default_palette = None 233
234 - def __init__(self, graph, membership = None, modularity = None, \ 235 params = None, modularity_params = None):
236 """Creates a clustering object for a given graph. 237 238 @param graph: the graph that will be associated to the clustering 239 @param membership: the membership list. The length of the list must 240 be equal to the number of vertices in the graph. If C{None}, every 241 vertex is assumed to belong to the same cluster. 242 @param modularity: the modularity score of the clustering. If C{None}, 243 it will be calculated when needed. 244 @param params: additional parameters to be stored in this object. 245 @param modularity_params: arguments that should be passed to 246 L{Graph.modularity} when the modularity is (re)calculated. If the 247 original graph was weighted, you should pass a dictionary 248 containing a C{weight} key with the appropriate value here. 249 """ 250 if membership is None: 251 Clustering.__init__(self, [0]*graph.vcount(), params) 252 else: 253 if len(membership) != graph.vcount(): 254 raise ValueError("membership list has invalid length") 255 Clustering.__init__(self, membership, params) 256 257 self._graph = graph 258 self._modularity = modularity 259 self._modularity_dirty = modularity is None 260 if modularity_params is None: 261 self._modularity_params = {} 262 else: 263 self._modularity_params = dict(modularity_params) 264 265 # pylint: disable-msg=C0103 266 @classmethod
267 - def FromAttribute(cls, graph, attribute, intervals=None, params=None):
268 """Creates a vertex clustering based on the value of a vertex attribute. 269 270 Vertices having the same attribute will correspond to the same cluster. 271 272 @param graph: the graph on which we are working 273 @param attribute: name of the attribute on which the clustering 274 is based. 275 @param intervals: for numeric attributes, you can either pass a single 276 number or a list of numbers here. A single number means that the 277 vertices will be put in bins of that width and vertices ending up 278 in the same bin will be in the same cluster. A list of numbers 279 specify the bin positions explicitly; e.g., C{[10, 20, 30]} means 280 that there will be four categories: vertices with the attribute 281 value less than 10, between 10 and 20, between 20 and 30 and over 30. 282 Intervals are closed from the left and open from the right. 283 @param params: additional parameters to be stored in this object. 284 285 @return: a new VertexClustering object 286 """ 287 from bisect import bisect 288 289 def safeintdiv(x, y): 290 """Safe integer division that handles None gracefully""" 291 if x is None: 292 return None 293 return int(x / y) 294 295 def safebisect(intervals, x): 296 """Safe list bisection that handles None gracefully""" 297 if x is None: 298 return None 299 return bisect(intervals, x) 300 301 try: 302 _ = iter(intervals) 303 iterable = True 304 except TypeError: 305 iterable = False 306 if intervals is None: 307 vec = graph.vs[attribute] 308 elif iterable: 309 intervals = list(intervals) 310 vec = [safebisect(intervals, x) for x in graph.vs[attribute]] 311 else: 312 intervals = float(intervals) 313 vec = [safeintdiv(x, intervals) for x in graph.vs[attribute]] 314 315 idgen = UniqueIdGenerator() 316 idgen[None] = None 317 vec = [idgen[i] for i in vec] 318 return cls(graph, vec, None, params) 319
320 - def as_cover(self):
321 """Returns a L{VertexCover} that contains the same clusters as this 322 clustering.""" 323 return VertexCover(self._graph, self) 324
325 - def cluster_graph(self, combine_vertices=None, combine_edges=None):
326 """Returns a graph where each cluster is contracted into a single 327 vertex. 328 329 In the resulting graph, vertex M{i} represents cluster M{i} in this 330 clustering. Vertex M{i} and M{j} will be connected if there was 331 at least one connected vertex pair M{(a, b)} in the original graph such 332 that vertex M{a} was in cluster M{i} and vertex M{b} was in cluster 333 M{j}. 334 335 @param combine_vertices: specifies how to derive the attributes of 336 the vertices in the new graph from the attributes of the old ones. 337 See L{Graph.contract_vertices()} for more details. 338 @param combine_edges: specifies how to derive the attributes of the 339 edges in the new graph from the attributes of the old ones. See 340 L{Graph.simplify()} for more details. If you specify C{False} 341 here, edges will not be combined, and the number of edges between 342 the vertices representing the original clusters will be equal to 343 the number of edges between the members of those clusters in the 344 original graph. 345 346 @return: the new graph. 347 """ 348 result = self.graph.copy() 349 result.contract_vertices(self.membership, combine_vertices) 350 if combine_edges != False: 351 result.simplify(combine_edges=combine_edges) 352 return result 353
354 - def crossing(self):
355 """Returns a boolean vector where element M{i} is C{True} iff edge 356 M{i} lies between clusters, C{False} otherwise.""" 357 membership = self.membership 358 return [membership[v1] != membership[v2] \ 359 for v1, v2 in self.graph.get_edgelist()] 360 361 @property
362 - def modularity(self):
363 """Returns the modularity score""" 364 if self._modularity_dirty: 365 return self._recalculate_modularity_safe() 366 return self._modularity 367 q = modularity 368 369 @property
370 - def graph(self):
371 """Returns the graph belonging to this object""" 372 return self._graph 373
374 - def recalculate_modularity(self):
375 """Recalculates the stored modularity value. 376 377 This method must be called before querying the modularity score of the 378 clustering through the class member C{modularity} or C{q} if the 379 graph has been modified (edges have been added or removed) since the 380 creation of the L{VertexClustering} object. 381 382 @return: the new modularity score 383 """ 384 self._modularity = self._graph.modularity(self._membership, 385 **self._modularity_params) 386 self._modularity_dirty = False 387 return self._modularity 388
390 """Recalculates the stored modularity value and swallows all exceptions 391 raised by the modularity function (if any). 392 393 @return: the new modularity score or C{None} if the modularity function 394 could not be calculated. 395 """ 396 try: 397 return self.recalculate_modularity() 398 except: 399 return None 400 finally: 401 self._modularity_dirty = False 402
403 - def subgraph(self, idx):
404 """Get the subgraph belonging to a given cluster. 405 406 @param idx: the cluster index 407 @return: a copy of the subgraph 408 @precondition: the vertex set of the graph hasn't been modified since 409 the moment the clustering was constructed. 410 """ 411 return self._graph.subgraph(self[idx]) 412 413
414 - def subgraphs(self):
415 """Gets all the subgraphs belonging to each of the clusters. 416 417 @return: a list containing copies of the subgraphs 418 @precondition: the vertex set of the graph hasn't been modified since 419 the moment the clustering was constructed. 420 """ 421 return [self._graph.subgraph(cl) for cl in self] 422 423
424 - def giant(self):
425 """Returns the giant community of the clustered graph. 426 427 The giant component a community for which no larger community exists. 428 @note: there can be multiple giant communities, this method will return 429 the copy of an arbitrary one if there are multiple giant communities. 430 431 @return: a copy of the giant community. 432 @precondition: the vertex set of the graph hasn't been modified since 433 the moment the clustering was constructed. 434 """ 435 ss = self.sizes() 436 max_size = max(ss) 437 return self.subgraph(ss.index(max_size)) 438
439 - def __plot__(self, context, bbox, palette, *args, **kwds):
440 """Plots the clustering to the given Cairo context in the given 441 bounding box. 442 443 This is done by calling L{Graph.__plot__()} with the same arguments, but 444 coloring the graph vertices according to the current clustering (unless 445 overridden by the C{vertex_color} argument explicitly). 446 447 This method understands all the positional and keyword arguments that 448 are understood by L{Graph.__plot__()}, only the differences will be 449 highlighted here: 450 451 - C{mark_groups}: whether to highlight some of the vertex groups by 452 colored polygons. Besides the values accepted by L{Graph.__plot__} 453 (i.e., a dict mapping colors to vertex indices, a list containing 454 lists of vertex indices, or C{False}), the following are also 455 accepted: 456 457 - C{True}: all the groups will be highlighted, the colors matching 458 the corresponding color indices from the current palette 459 (see the C{palette} keyword argument of L{Graph.__plot__}. 460 461 - A dict mapping cluster indices or tuples of vertex indices to 462 color names. The given clusters or vertex groups will be 463 highlighted by the given colors. 464 465 - A list of cluster indices. This is equivalent to passing a 466 dict mapping numeric color indices from the current palette 467 to cluster indices; therefore, the cluster referred to by element 468 I{i} of the list will be highlighted by color I{i} from the 469 palette. 470 471 The value of the C{plotting.mark_groups} configuration key is also 472 taken into account here; if that configuration key is C{True} and 473 C{mark_groups} is not given explicitly, it will automatically be set 474 to C{True}. 475 476 In place of lists of vertex indices, you may also use L{VertexSeq} 477 instances. 478 479 In place of color names, you may also use color indices into the 480 current palette. C{None} as a color name will mean that the 481 corresponding group is ignored. 482 483 - C{palette}: the palette used to resolve numeric color indices to RGBA 484 values. By default, this is an instance of L{ClusterColoringPalette}. 485 486 @see: L{Graph.__plot__()} for more supported keyword arguments. 487 """ 488 if "edge_color" not in kwds and "color" not in self.graph.edge_attributes(): 489 # Set up a default edge coloring based on internal vs external edges 490 colors = ["grey20", "grey80"] 491 kwds["edge_color"] = [colors[is_crossing] 492 for is_crossing in self.crossing()] 493 494 if palette is None: 495 palette = ClusterColoringPalette(len(self)) 496 497 if "mark_groups" not in kwds: 498 if Configuration.instance()["plotting.mark_groups"]: 499 kwds["mark_groups"] = ( 500 (group, color) for color, group in enumerate(self) 501 ) 502 else: 503 kwds["mark_groups"] = _handle_mark_groups_arg_for_clustering( 504 kwds["mark_groups"], self) 505 506 if "vertex_color" not in kwds: 507 kwds["vertex_color"] = self.membership 508 509 return self._graph.__plot__(context, bbox, palette, *args, **kwds) 510
512 """Iterates over the clusters and formats them into a string to be 513 presented in the summary.""" 514 if self._graph.is_named(): 515 names = self._graph.vs["name"] 516 for cluster in self: 517 yield ", ".join(str(names[member]) for member in cluster) 518 else: 519 for cluster in self: 520 yield ", ".join(str(member) for member in cluster)
521
522 523 ############################################################################### 524 525 -class Dendrogram(object):
526 """The hierarchical clustering (dendrogram) of some dataset. 527 528 A hierarchical clustering means that we know not only the way the 529 elements are separated into groups, but also the exact history of 530 how individual elements were joined into larger subgroups. 531 532 This class internally represents the hierarchy by a matrix with n rows 533 and 2 columns -- or more precisely, a list of lists of size 2. This is 534 exactly the same as the original format used by C{igraph}'s C core. 535 The M{i}th row of the matrix contains the indices of the two clusters 536 being joined in time step M{i}. The joint group will be represented by 537 the ID M{n+i}, with M{i} starting from one. The ID of the joint group 538 will be referenced in the upcoming steps instead of any of its individual 539 members. So, IDs less than or equal to M{n} (where M{n} is the number of 540 rows in the matrix) mean the original members of the dataset (with ID 541 from 0 to M{n}), while IDs up from M{n+1} mean joint groups. As an 542 example, take a look at the dendrogram and the internal representation of 543 a given clustering of five nodes:: 544 545 0 -+ 546 | 547 1 -+-+ 548 | 549 2 ---+-+ <====> [[0, 1], [3, 4], [2, 5], [6, 7]] 550 | 551 3 -+ | 552 | | 553 4 -+---+--- 554 555 @undocumented: _item_box_size, _plot_item, _traverse_inorder 556 """ 557
558 - def __init__(self, merges):
559 """Creates a hierarchical clustering. 560 561 @param merges: the merge history either in matrix or tuple format""" 562 self._merges = [tuple(pair) for pair in merges] 563 self._nmerges = len(self._merges) 564 if self._nmerges: 565 self._nitems = max(self._merges[-1])-self._nmerges+2 566 else: 567 self._nitems = 0 568 self._names = None 569 570 @staticmethod
571 - def _convert_matrix_to_tuple_repr(merges, n=None):
572 """Converts the matrix representation of a clustering to a tuple 573 representation. 574 575 @param merges: the matrix representation of the clustering 576 @return: the tuple representation of the clustering 577 """ 578 if n is None: 579 n = len(merges)+1 580 tuple_repr = range(n) 581 idxs = range(n) 582 for rowidx, row in enumerate(merges): 583 i, j = row 584 try: 585 idxi, idxj = idxs[i], idxs[j] 586 tuple_repr[idxi] = (tuple_repr[idxi], tuple_repr[idxj]) 587 tuple_repr[idxj] = None 588 except IndexError: 589 raise ValueError("malformed matrix, subgroup referenced "+ 590 "before being created in step %d" % rowidx) 591 idxs.append(j) 592 return [x for x in tuple_repr if x is not None] 593
594 - def _traverse_inorder(self):
595 """Conducts an inorder traversal of the merge tree. 596 597 The inorder traversal returns the nodes on the last level in the order 598 they should be drawn so that no edges cross each other. 599 600 @return: the result of the inorder traversal in a list.""" 601 result = [] 602 seen_nodes = set() 603 604 for node_index in reversed(xrange(self._nitems+self._nmerges)): 605 if node_index in seen_nodes: 606 continue 607 608 stack = [node_index] 609 while stack: 610 last = stack.pop() 611 seen_nodes.add(last) 612 if last < self._nitems: 613 # 'last' is a regular node so the traversal ends here, we 614 # can append it to the results 615 result.append(last) 616 else: 617 # 'last' is a merge node, so let us proceed with the entry 618 # where this merge node was created 619 stack.extend(self._merges[last-self._nitems]) 620 621 return result 622
623 - def __str__(self):
624 return self.summary(verbosity=1) 625
626 - def format(self, format="newick"):
627 """Formats the dendrogram in a foreign format. 628 629 Currently only the Newick format is supported. 630 631 Example: 632 633 >>> d = Dendrogram([(2, 3), (0, 1), (4, 5)]) 634 >>> d.format() 635 '((2,3)4,(0,1)5)6;' 636 >>> d.names = list("ABCDEFG") 637 >>> d.format() 638 '((C,D)E,(A,B)F)G;' 639 """ 640 if format == "newick": 641 n = self._nitems + self._nmerges 642 if self._names is None: 643 nodes = range(n) 644 else: 645 nodes = list(self._names) 646 if len(nodes) < n: 647 nodes.extend("" for _ in xrange(n - len(nodes))) 648 for k, (i, j) in enumerate(self._merges, self._nitems): 649 nodes[k] = "(%s,%s)%s" % (nodes[i], nodes[j], nodes[k]) 650 nodes[i] = nodes[j] = None 651 return nodes[-1] + ";" 652 raise ValueError("unsupported format: %r" % format) 653
654 - def summary(self, verbosity=0, max_leaf_count=40):
655 """Returns the summary of the dendrogram. 656 657 The summary includes the number of leafs and branches, and also an 658 ASCII art representation of the dendrogram unless it is too large. 659 660 @param verbosity: determines whether the ASCII representation of the 661 dendrogram should be printed. Zero verbosity prints only the number 662 of leafs and branches. 663 @param max_leaf_count: the maximal number of leafs to print in the 664 ASCII representation. If the dendrogram has more leafs than this 665 limit, the ASCII representation will not be printed even if the 666 verbosity is larger than or equal to 1. 667 @return: the summary of the dendrogram as a string. 668 """ 669 out = StringIO() 670 print >>out, "Dendrogram, %d elements, %d merges" % \ 671 (self._nitems, self._nmerges) 672 673 if self._nitems == 0 or verbosity < 1 or self._nitems > max_leaf_count: 674 return out.getvalue().strip() 675 676 print >>out 677 678 positions = [None] * self._nitems 679 inorder = self._traverse_inorder() 680 distance = 2 681 level_distance = 2 682 nextp = 0 683 for idx, element in enumerate(inorder): 684 positions[element] = nextp 685 inorder[idx] = str(element) 686 nextp += max(distance, len(inorder[idx])+1) 687 688 width = max(positions)+1 689 690 # Print the nodes on the lowest level 691 print >>out, (" " * (distance-1)).join(inorder) 692 midx = 0 693 max_community_idx = self._nitems 694 while midx < self._nmerges: 695 char_array = [" "] * width 696 for position in positions: 697 if position >= 0: 698 char_array[position] = "|" 699 char_str = "".join(char_array) 700 for _ in xrange(level_distance-1): 701 print >>out, char_str # Print the lines 702 703 cidx_incr = 0 704 while midx < self._nmerges: 705 id1, id2 = self._merges[midx] 706 if id1 >= max_community_idx or id2 >= max_community_idx: 707 break 708 midx += 1 709 710 pos1, pos2 = positions[id1], positions[id2] 711 positions[id1], positions[id2] = -1, -1 712 713 if pos1 > pos2: 714 pos1, pos2 = pos2, pos1 715 positions.append((pos1+pos2) // 2) 716 717 dashes = "-" * (pos2 - pos1 - 1) 718 char_array[pos1:(pos2+1)] = "`%s'" % dashes 719 720 cidx_incr += 1 721 722 max_community_idx += cidx_incr 723 724 print >>out, "".join(char_array) 725 726 return out.getvalue().strip() 727
728 - def _item_box_size(self, context, horiz, idx):
729 """Calculates the amount of space needed for drawing an 730 individual vertex at the bottom of the dendrogram.""" 731 if self._names is None or self._names[idx] is None: 732 x_bearing, _, _, height, x_advance, _ = context.text_extents("") 733 else: 734 x_bearing, _, _, height, x_advance, _ = context.text_extents(str(self._names[idx])) 735 736 if horiz: 737 return x_advance - x_bearing, height 738 return height, x_advance - x_bearing 739 740 # pylint: disable-msg=R0913
741 - def _plot_item(self, context, horiz, idx, x, y):
742 """Plots a dendrogram item to the given Cairo context 743 744 @param context: the Cairo context we are plotting on 745 @param horiz: whether the dendrogram is horizontally oriented 746 @param idx: the index of the item 747 @param x: the X position of the item 748 @param y: the Y position of the item 749 """ 750 if self._names is None or self._names[idx] is None: 751 return 752 753 height = self._item_box_size(context, True, idx)[1] 754 if horiz: 755 context.move_to(x, y+height) 756 context.show_text(str(self._names[idx])) 757 else: 758 context.save() 759 context.translate(x, y) 760 context.rotate(-pi/2.) 761 context.move_to(0, height) 762 context.show_text(str(self._names[idx])) 763 context.restore() 764 765 # pylint: disable-msg=C0103,W0613 766 # W0613 = unused argument 'palette'
767 - def __plot__(self, context, bbox, palette, *args, **kwds):
768 """Draws the dendrogram on the given Cairo context 769 770 Supported keyword arguments are: 771 772 - C{orientation}: the orientation of the dendrogram. Must be one of 773 the following values: C{left-right}, C{bottom-top}, C{right-left} 774 or C{top-bottom}. Individual elements are always placed at the 775 former edge and merges are performed towards the latter edge. 776 Possible aliases: C{horizontal} = C{left-right}, 777 C{vertical} = C{bottom-top}, C{lr} = C{left-right}, 778 C{rl} = C{right-left}, C{tb} = C{top-bottom}, C{bt} = C{bottom-top}. 779 The default is C{left-right}. 780 781 """ 782 from igraph.layout import Layout 783 784 if self._names is None: 785 self._names = [str(x) for x in xrange(self._nitems)] 786 787 orientation = str_to_orientation(kwds.get("orientation", "lr"), 788 reversed_vertical=True) 789 horiz = orientation in ("lr", "rl") 790 791 # Get the font height 792 font_height = context.font_extents()[2] 793 794 # Calculate space needed for individual items at the 795 # bottom of the dendrogram 796 item_boxes = [self._item_box_size(context, horiz, idx) \ 797 for idx in xrange(self._nitems)] 798 799 # Small correction for cases when the right edge of the labels is 800 # aligned with the tips of the dendrogram branches 801 ygap = 2 if orientation == "bt" else 0 802 xgap = 2 if orientation == "lr" else 0 803 item_boxes = [(x+xgap, y+ygap) for x, y in item_boxes] 804 805 # Calculate coordinates 806 layout = Layout([(0, 0)] * self._nitems, dim=2) 807 inorder = self._traverse_inorder() 808 if not horiz: 809 x, y = 0, 0 810 for idx, element in enumerate(inorder): 811 layout[element] = (x, 0) 812 x += max(font_height, item_boxes[element][0]) 813 814 for id1, id2 in self._merges: 815 y += 1 816 layout.append(((layout[id1][0]+layout[id2][0])/2., y)) 817 818 # Mirror or rotate the layout if necessary 819 if orientation == "bt": 820 layout.mirror(1) 821 else: 822 x, y = 0, 0 823 for idx, element in enumerate(inorder): 824 layout[element] = (0, y) 825 y += max(font_height, item_boxes[element][1]) 826 827 for id1, id2 in self._merges: 828 x += 1 829 layout.append((x, (layout[id1][1]+layout[id2][1])/2.)) 830 831 # Mirror or rotate the layout if necessary 832 if orientation == "rl": 833 layout.mirror(0) 834 835 # Rescale layout to the bounding box 836 maxw = max(e[0] for e in item_boxes) 837 maxh = max(e[1] for e in item_boxes) 838 839 # w, h: width and height of the area containing the dendrogram 840 # tree without the items. 841 # delta_x, delta_y: displacement of the dendrogram tree 842 width, height = float(bbox.width), float(bbox.height) 843 delta_x, delta_y = 0, 0 844 if horiz: 845 width -= maxw 846 if orientation == "lr": 847 delta_x = maxw 848 else: 849 height -= maxh 850 if orientation == "tb": 851 delta_y = maxh 852 853 if horiz: 854 delta_y += font_height / 2. 855 else: 856 delta_x += font_height / 2. 857 layout.fit_into((delta_x, delta_y, width - delta_x, height - delta_y), 858 keep_aspect_ratio=False) 859 860 context.save() 861 862 context.translate(bbox.left, bbox.top) 863 context.set_source_rgb(0., 0., 0.) 864 context.set_line_width(1) 865 866 # Draw items 867 if horiz: 868 sgn = 0 if orientation == "rl" else -1 869 for idx in xrange(self._nitems): 870 x = layout[idx][0] + sgn * item_boxes[idx][0] 871 y = layout[idx][1] - item_boxes[idx][1]/2. 872 self._plot_item(context, horiz, idx, x, y) 873 else: 874 sgn = 1 if orientation == "bt" else 0 875 for idx in xrange(self._nitems): 876 x = layout[idx][0] - item_boxes[idx][0]/2. 877 y = layout[idx][1] + sgn * item_boxes[idx][1] 878 self._plot_item(context, horiz, idx, x, y) 879 880 # Draw dendrogram lines 881 if not horiz: 882 for idx, (id1, id2) in enumerate(self._merges): 883 x0, y0 = layout[id1] 884 x1, y1 = layout[id2] 885 x2, y2 = layout[idx + self._nitems] 886 context.move_to(x0, y0) 887 context.line_to(x0, y2) 888 context.line_to(x1, y2) 889 context.line_to(x1, y1) 890 context.stroke() 891 else: 892 for idx, (id1, id2) in enumerate(self._merges): 893 x0, y0 = layout[id1] 894 x1, y1 = layout[id2] 895 x2, y2 = layout[idx + self._nitems] 896 context.move_to(x0, y0) 897 context.line_to(x2, y0) 898 context.line_to(x2, y1) 899 context.line_to(x1, y1) 900 context.stroke() 901 902 context.restore() 903 904 @property
905 - def merges(self):
906 """Returns the performed merges in matrix format""" 907 return deepcopy(self._merges) 908 909 @property
910 - def names(self):
911 """Returns the names of the nodes in the dendrogram""" 912 return self._names 913 914 @names.setter
915 - def names(self, items):
916 """Sets the names of the nodes in the dendrogram""" 917 if items is None: 918 self._names = None 919 return 920 921 items = list(items) 922 if len(items) < self._nitems: 923 raise ValueError("must specify at least %d names" % self._nitems) 924 925 n = self._nitems + self._nmerges 926 self._names = items[:n] 927 if len(self._names) < n: 928 self._names.extend("" for _ in xrange(n-len(self._names)))
929
930 931 -class VertexDendrogram(Dendrogram):
932 """The dendrogram resulting from the hierarchical clustering of the 933 vertex set of a graph.""" 934
935 - def __init__(self, graph, merges, optimal_count = None, params = None, 936 modularity_params = None):
937 """Creates a dendrogram object for a given graph. 938 939 @param graph: the graph that will be associated to the clustering 940 @param merges: the merges performed given in matrix form. 941 @param optimal_count: the optimal number of clusters where the 942 dendrogram should be cut. This is a hint usually provided by the 943 clustering algorithm that produces the dendrogram. C{None} means 944 that such a hint is not available; the optimal count will then be 945 selected based on the modularity in such a case. 946 @param params: additional parameters to be stored in this object. 947 @param modularity_params: arguments that should be passed to 948 L{Graph.modularity} when the modularity is (re)calculated. If the 949 original graph was weighted, you should pass a dictionary 950 containing a C{weight} key with the appropriate value here. 951 """ 952 Dendrogram.__init__(self, merges) 953 self._graph = graph 954 self._optimal_count = optimal_count 955 if modularity_params is None: 956 self._modularity_params = {} 957 else: 958 self._modularity_params = dict(modularity_params) 959
960 - def as_clustering(self, n=None):
961 """Cuts the dendrogram at the given level and returns a corresponding 962 L{VertexClustering} object. 963 964 @param n: the desired number of clusters. Merges are replayed from the 965 beginning until the membership vector has exactly M{n} distinct elements 966 or until there are no more recorded merges, whichever happens first. 967 If C{None}, the optimal count hint given by the clustering algorithm 968 will be used If the optimal count was not given either, it will be 969 calculated by selecting the level where the modularity is maximal. 970 @return: a new L{VertexClustering} object. 971 """ 972 if n is None: 973 n = self.optimal_count 974 num_elts = self._graph.vcount() 975 idgen = UniqueIdGenerator() 976 membership = community_to_membership(self._merges, num_elts, \ 977 num_elts - n) 978 membership = [idgen[m] for m in membership] 979 return VertexClustering(self._graph, membership, 980 modularity_params=self._modularity_params) 981 982 @property
983 - def optimal_count(self):
984 """Returns the optimal number of clusters for this dendrogram. 985 986 If an optimal count hint was given at construction time, this 987 property simply returns the hint. If such a count was not given, 988 this method calculates the optimal number of clusters by maximizing 989 the modularity along all the possible cuts in the dendrogram. 990 """ 991 if self._optimal_count is not None: 992 return self._optimal_count 993 994 n = self._graph.vcount() 995 max_q, optimal_count = 0, 1 996 for step in xrange(min(n-1, len(self._merges))): 997 membs = community_to_membership(self._merges, n, step) 998 q = self._graph.modularity(membs, **self._modularity_params) 999 if q > max_q: 1000 optimal_count = n-step 1001 max_q = q 1002 self._optimal_count = optimal_count 1003 return optimal_count 1004 1005 @optimal_count.setter
1006 - def optimal_count(self, value):
1007 self._optimal_count = max(int(value), 1) 1008
1009 - def __plot__(self, context, bbox, palette, *args, **kwds):
1010 """Draws the vertex dendrogram on the given Cairo context 1011 1012 See L{Dendrogram.__plot__} for the list of supported keyword 1013 arguments.""" 1014 from igraph.drawing.metamagic import AttributeCollectorBase 1015 1016 class VisualVertexBuilder(AttributeCollectorBase): 1017 _kwds_prefix = "vertex_" 1018 label = None 1019 1020 builder = VisualVertexBuilder(self._graph.vs, kwds) 1021 self._names = [vertex.label for vertex in builder] 1022 self._names = [name if name is not None else str(idx) 1023 for idx, name in enumerate(self._names)] 1024 result = Dendrogram.__plot__(self, context, bbox, palette, \ 1025 *args, **kwds) 1026 del self._names 1027 1028 return result
1029
1030 ############################################################################### 1031 1032 -class Cover(object):
1033 """Class representing a cover of an arbitrary ordered set. 1034 1035 Covers are similar to clusterings, but each element of the set may 1036 belong to more than one cluster in a cover, and elements not belonging 1037 to any cluster are also allowed. 1038 1039 L{Cover} instances provide a similar API as L{Clustering} instances; 1040 for instance, iterating over a L{Cover} will iterate over the clusters 1041 just like with a regular L{Clustering} instance. However, they are not 1042 derived from each other or from a common superclass, and there might 1043 be functions that exist only in one of them or the other. 1044 1045 Clusters of an individual cover can be accessed by the C{[]} operator: 1046 1047 >>> cl = Cover([[0,1,2,3], [2,3,4], [0,1,6]]) 1048 >>> cl[0] 1049 [0, 1, 2, 3] 1050 1051 The membership vector can be accessed by the C{membership} property. 1052 Note that contrary to L{Clustering} instances, the membership vector 1053 will contain lists that contain the cluster indices each item belongs 1054 to: 1055 1056 >>> cl.membership 1057 [[0, 2], [0, 2], [0, 1], [0, 1], [1], [], [2]] 1058 1059 The number of clusters can be retrieved by the C{len} function: 1060 1061 >>> len(cl) 1062 3 1063 1064 You can iterate over the cover as if it were a regular list of 1065 clusters: 1066 1067 >>> for cluster in cl: 1068 ... print " ".join(str(idx) for idx in cluster) 1069 ... 1070 0 1 2 3 1071 2 3 4 1072 0 1 6 1073 1074 If you need all the clusters at once as lists, you can simply convert 1075 the cover to a list: 1076 1077 >>> cluster_list = list(cl) 1078 >>> print cluster_list 1079 [[0, 1, 2, 3], [2, 3, 4], [0, 1, 6]] 1080 1081 L{Clustering} objects can readily be converted to L{Cover} objects 1082 using the constructor: 1083 1084 >>> clustering = Clustering([0, 0, 0, 0, 1, 1, 1, 2, 2, 2]) 1085 >>> cover = Cover(clustering) 1086 >>> list(clustering) == list(cover) 1087 True 1088 1089 @undocumented: _formatted_cluster_iterator 1090 """ 1091
1092 - def __init__(self, clusters, n=0):
1093 """Constructs a cover with the given clusters. 1094 1095 @param clusters: the clusters in this cover, as a list or iterable. 1096 Each cluster is specified by a list or tuple that contains the 1097 IDs of the items in this cluster. IDs start from zero. 1098 1099 @param n: the total number of elements in the set that is covered 1100 by this cover. If it is less than the number of unique elements 1101 found in all the clusters, we will simply use the number of unique 1102 elements, so it is safe to leave this at zero. You only have to 1103 specify this parameter if there are some elements that are covered 1104 by none of the clusters. 1105 """ 1106 1107 self._clusters = [list(cluster) for cluster in clusters] 1108 try: 1109 self._n = max(max(cluster)+1 for cluster in self._clusters if cluster) 1110 except ValueError: 1111 self._n = 0 1112 self._n = max(n, self._n) 1113
1114 - def __getitem__(self, index):
1115 """Returns the cluster with the given index.""" 1116 return self._clusters[index] 1117
1118 - def __iter__(self):
1119 """Iterates over the clusters in this cover.""" 1120 return iter(self._clusters) 1121
1122 - def __len__(self):
1123 """Returns the number of clusters in this cover.""" 1124 return len(self._clusters) 1125
1126 - def __str__(self):
1127 """Returns a string representation of the cover.""" 1128 return self.summary(verbosity=1, width=78) 1129 1130 @property
1131 - def membership(self):
1132 """Returns the membership vector of this cover. 1133 1134 The membership vector of a cover covering I{n} elements is a list of 1135 length I{n}, where element I{i} contains the cluster indices of the 1136 I{i}th item. 1137 """ 1138 result = [[] for _ in xrange(self._n)] 1139 for idx, cluster in enumerate(self): 1140 for item in cluster: 1141 result[item].append(idx) 1142 return result 1143 1144 @property
1145 - def n(self):
1146 """Returns the number of elements in the set covered by this cover.""" 1147 return self._n 1148
1149 - def size(self, idx):
1150 """Returns the size of a given cluster. 1151 1152 @param idx: the cluster in which we are interested. 1153 """ 1154 return len(self[idx]) 1155
1156 - def sizes(self, *args):
1157 """Returns the size of given clusters. 1158 1159 The indices are given as positional arguments. If there are no 1160 positional arguments, the function will return the sizes of all clusters. 1161 """ 1162 if args: 1163 return [len(self._clusters[idx]) for idx in args] 1164 return [len(cluster) for cluster in self] 1165
1166 - def size_histogram(self, bin_width = 1):
1167 """Returns the histogram of cluster sizes. 1168 1169 @param bin_width: the bin width of the histogram 1170 @return: a L{Histogram} object 1171 """ 1172 return Histogram(bin_width, self.sizes()) 1173
1174 - def summary(self, verbosity=0, width=None):
1175 """Returns the summary of the cover. 1176 1177 The summary includes the number of items and clusters, and also the 1178 list of members for each of the clusters if the verbosity is nonzero. 1179 1180 @param verbosity: determines whether the cluster members should be 1181 printed. Zero verbosity prints the number of items and clusters only. 1182 @return: the summary of the cover as a string. 1183 """ 1184 out = StringIO() 1185 print >>out, "Cover with %d clusters" % len(self) 1186 1187 if verbosity < 1: 1188 return out.getvalue().strip() 1189 1190 ndigits = len(str(len(self))) 1191 wrapper = _get_wrapper_for_width(width, 1192 subsequent_indent = " " * (ndigits+3)) 1193 1194 for idx, cluster in enumerate(self._formatted_cluster_iterator()): 1195 wrapper.initial_indent = "[%*d] " % (ndigits, idx) 1196 print >>out, "\n".join(wrapper.wrap(cluster)) 1197 1198 return out.getvalue().strip() 1199
1201 """Iterates over the clusters and formats them into a string to be 1202 presented in the summary.""" 1203 for cluster in self: 1204 yield ", ".join(str(member) for member in cluster)
1205
1206 1207 -class VertexCover(Cover):
1208 """The cover of the vertex set of a graph. 1209 1210 This class extends L{Cover} by linking it to a specific L{Graph} object. 1211 It also provides some handy methods like getting the subgraph corresponding 1212 to a cluster and such. 1213 1214 @note: since this class is linked to a L{Graph}, destroying the graph by the 1215 C{del} operator does not free the memory occupied by the graph if there 1216 exists a L{VertexCover} that references the L{Graph}. 1217 1218 @undocumented: _formatted_cluster_iterator 1219 """ 1220
1221 - def __init__(self, graph, clusters = None):
1222 """Creates a cover object for a given graph. 1223 1224 @param graph: the graph that will be associated to the cover 1225 @param clusters: the list of clusters. If C{None}, it is assumed 1226 that there is only a single cluster that covers the whole graph. 1227 """ 1228 if clusters is None: 1229 clusters = [range(graph.vcount())] 1230 1231 Cover.__init__(self, clusters, n = graph.vcount()) 1232 if self._n > graph.vcount(): 1233 raise ValueError("cluster list contains vertex ID larger than the " 1234 "number of vertices in the graph") 1235 1236 self._graph = graph 1237
1238 - def crossing(self):
1239 """Returns a boolean vector where element M{i} is C{True} iff edge 1240 M{i} lies between clusters, C{False} otherwise.""" 1241 membership = [frozenset(cluster) for cluster in self.membership] 1242 return [membership[v1].isdisjoint(membership[v2]) \ 1243 for v1, v2 in self.graph.get_edgelist()] 1244 1245 @property
1246 - def graph(self):
1247 """Returns the graph belonging to this object""" 1248 return self._graph 1249
1250 - def subgraph(self, idx):
1251 """Get the subgraph belonging to a given cluster. 1252 1253 @param idx: the cluster index 1254 @return: a copy of the subgraph 1255 @precondition: the vertex set of the graph hasn't been modified since 1256 the moment the cover was constructed. 1257 """ 1258 return self._graph.subgraph(self[idx]) 1259
1260 - def subgraphs(self):
1261 """Gets all the subgraphs belonging to each of the clusters. 1262 1263 @return: a list containing copies of the subgraphs 1264 @precondition: the vertex set of the graph hasn't been modified since 1265 the moment the cover was constructed. 1266 """ 1267 return [self._graph.subgraph(cl) for cl in self] 1268
1269 - def __plot__(self, context, bbox, palette, *args, **kwds):
1270 """Plots the cover to the given Cairo context in the given 1271 bounding box. 1272 1273 This is done by calling L{Graph.__plot__()} with the same arguments, but 1274 drawing nice colored blobs around the vertex groups. 1275 1276 This method understands all the positional and keyword arguments that 1277 are understood by L{Graph.__plot__()}, only the differences will be 1278 highlighted here: 1279 1280 - C{mark_groups}: whether to highlight the vertex clusters by 1281 colored polygons. Besides the values accepted by L{Graph.__plot__} 1282 (i.e., a dict mapping colors to vertex indices, a list containing 1283 lists of vertex indices, or C{False}), the following are also 1284 accepted: 1285 1286 - C{True}: all the clusters will be highlighted, the colors matching 1287 the corresponding color indices from the current palette 1288 (see the C{palette} keyword argument of L{Graph.__plot__}. 1289 1290 - A dict mapping cluster indices or tuples of vertex indices to 1291 color names. The given clusters or vertex groups will be 1292 highlighted by the given colors. 1293 1294 - A list of cluster indices. This is equivalent to passing a 1295 dict mapping numeric color indices from the current palette 1296 to cluster indices; therefore, the cluster referred to by element 1297 I{i} of the list will be highlighted by color I{i} from the 1298 palette. 1299 1300 The value of the C{plotting.mark_groups} configuration key is also 1301 taken into account here; if that configuration key is C{True} and 1302 C{mark_groups} is not given explicitly, it will automatically be set 1303 to C{True}. 1304 1305 In place of lists of vertex indices, you may also use L{VertexSeq} 1306 instances. 1307 1308 In place of color names, you may also use color indices into the 1309 current palette. C{None} as a color name will mean that the 1310 corresponding group is ignored. 1311 1312 - C{palette}: the palette used to resolve numeric color indices to RGBA 1313 values. By default, this is an instance of L{ClusterColoringPalette}. 1314 1315 @see: L{Graph.__plot__()} for more supported keyword arguments. 1316 """ 1317 if "edge_color" not in kwds and "color" not in self.graph.edge_attributes(): 1318 # Set up a default edge coloring based on internal vs external edges 1319 colors = ["grey20", "grey80"] 1320 kwds["edge_color"] = [colors[is_crossing] 1321 for is_crossing in self.crossing()] 1322 1323 if "palette" in kwds: 1324 palette = kwds["palette"] 1325 else: 1326 palette = ClusterColoringPalette(len(self)) 1327 1328 if "mark_groups" not in kwds: 1329 if Configuration.instance()["plotting.mark_groups"]: 1330 kwds["mark_groups"] = enumerate(self) 1331 else: 1332 kwds["mark_groups"] = _handle_mark_groups_arg_for_clustering( 1333 kwds["mark_groups"], self) 1334 1335 return self._graph.__plot__(context, bbox, palette, *args, **kwds) 1336
1338 """Iterates over the clusters and formats them into a string to be 1339 presented in the summary.""" 1340 if self._graph.is_named(): 1341 names = self._graph.vs["name"] 1342 for cluster in self: 1343 yield ", ".join(str(names[member]) for member in cluster) 1344 else: 1345 for cluster in self: 1346 yield ", ".join(str(member) for member in cluster)
1347
1348 1349 -class CohesiveBlocks(VertexCover):
1350 """The cohesive block structure of a graph. 1351 1352 Instances of this type are created by L{Graph.cohesive_blocks()}. See 1353 the documentation of L{Graph.cohesive_blocks()} for an explanation of 1354 what cohesive blocks are. 1355 1356 This class provides a few more methods that make handling of cohesive 1357 block structures easier. 1358 """ 1359
1360 - def __init__(self, graph, blocks = None, cohesion = None, parent = None):
1361 """Constructs a new cohesive block structure for the given graph. 1362 1363 If any of I{blocks}, I{cohesion} or I{parent} is C{None}, all the 1364 arguments will be ignored and L{Graph.cohesive_blocks()} will be 1365 called to calculate the cohesive blocks. Otherwise, these three 1366 variables should describe the *result* of a cohesive block structure 1367 calculation. Chances are that you never have to construct L{CohesiveBlocks} 1368 instances directly, just use L{Graph.cohesive_blocks()}. 1369 1370 @param graph: the graph itself 1371 @param blocks: a list containing the blocks; each block is described 1372 as a list containing vertex IDs. 1373 @param cohesion: the cohesion of each block. The length of this list 1374 must be equal to the length of I{blocks}. 1375 @param parent: the parent block of each block. Negative values or 1376 C{None} mean that there is no parent block for that block. There 1377 should be only one parent block, which covers the entire graph. 1378 @see: Graph.cohesive_blocks() 1379 """ 1380 if blocks is None or cohesion is None or parent is None: 1381 blocks, cohesion, parent = graph.cohesive_blocks() 1382 1383 VertexCover.__init__(self, graph, blocks) 1384 1385 self._cohesion = cohesion 1386 self._parent = parent 1387 for idx, p in enumerate(self._parent): 1388 if p < 0: 1389 self._parent[idx] = None 1390
1391 - def cohesion(self, idx):
1392 """Returns the cohesion of the group with the given index.""" 1393 return self._cohesion[idx] 1394
1395 - def cohesions(self):
1396 """Returns the list of cohesion values for each group.""" 1397 return self._cohesion[:] 1398
1399 - def hierarchy(self):
1400 """Returns a new graph that describes the hierarchical relationships 1401 between the groups. 1402 1403 The new graph will be a directed tree; an edge will point from 1404 vertex M{i} to vertex M{j} if group M{i} is a superset of group M{j}. 1405 In other words, the edges point downwards. 1406 """ 1407 from igraph import Graph 1408 edges = [pair for pair in izip(self._parent, xrange(len(self))) 1409 if pair[0] is not None] 1410 return Graph(edges, directed=True) 1411
1412 - def max_cohesion(self, idx):
1413 """Finds the maximum cohesion score among all the groups that contain 1414 the given vertex.""" 1415 result = 0 1416 for cohesion, cluster in izip(self._cohesion, self._clusters): 1417 if idx in cluster: 1418 result = max(result, cohesion) 1419 return result 1420
1421 - def max_cohesions(self):
1422 """For each vertex in the graph, returns the maximum cohesion score 1423 among all the groups that contain the vertex.""" 1424 result = [0] * self._graph.vcount() 1425 for cohesion, cluster in izip(self._cohesion, self._clusters): 1426 for idx in cluster: 1427 result[idx] = max(result[idx], cohesion) 1428 return result 1429
1430 - def parent(self, idx):
1431 """Returns the parent group index of the group with the given index 1432 or C{None} if the given group is the root.""" 1433 return self._parent[idx] 1434
1435 - def parents(self):
1436 """Returns the list of parent group indices for each group or C{None} 1437 if the given group is the root.""" 1438 return self._parent[:] 1439
1440 - def __plot__(self, context, bbox, palette, *args, **kwds):
1441 """Plots the cohesive block structure to the given Cairo context in 1442 the given bounding box. 1443 1444 Since a L{CohesiveBlocks} instance is also a L{VertexCover}, keyword 1445 arguments accepted by L{VertexCover.__plot__()} are also accepted here. 1446 The only difference is that the vertices are colored according to their 1447 maximal cohesions by default, and groups are marked by colored blobs 1448 except the last group which encapsulates the whole graph. 1449 1450 See the documentation of L{VertexCover.__plot__()} for more details. 1451 """ 1452 prepare_groups = False 1453 if "mark_groups" not in kwds: 1454 if Configuration.instance()["plotting.mark_groups"]: 1455 prepare_groups = True 1456 elif kwds["mark_groups"] == True: 1457 prepare_groups = True 1458 1459 if prepare_groups: 1460 colors = [pair for pair in enumerate(self.cohesions()) 1461 if pair[1] > 1] 1462 kwds["mark_groups"] = colors 1463 1464 if "vertex_color" not in kwds: 1465 kwds["vertex_color"] = self.max_cohesions() 1466 1467 return VertexCover.__plot__(self, context, bbox, palette, *args, **kwds)
1468
1469 -def _handle_mark_groups_arg_for_clustering(mark_groups, clustering):
1470 """Handles the mark_groups=... keyword argument in plotting methods of 1471 clusterings. 1472 1473 This is an internal method, you shouldn't need to mess around with it. 1474 Its purpose is to handle the extended semantics of the mark_groups=... 1475 keyword argument in the C{__plot__} method of L{VertexClustering} and 1476 L{VertexCover} instances, namely the feature that numeric IDs are resolved 1477 to clusters automatically. 1478 """ 1479 # Handle the case of mark_groups = True, mark_groups containing a list or 1480 # tuple of cluster IDs, and and mark_groups yielding (cluster ID, color) 1481 # pairs 1482 if mark_groups is True: 1483 group_iter = ((group, color) for color, group in enumerate(clustering)) 1484 elif isinstance(mark_groups, dict): 1485 group_iter = mark_groups.iteritems() 1486 elif hasattr(mark_groups, "__getitem__") and hasattr(mark_groups, "__len__"): 1487 # Lists, tuples 1488 try: 1489 first = mark_groups[0] 1490 except: 1491 # Hmm. Maybe not a list or tuple? 1492 first = None 1493 if first is not None: 1494 # Okay. Is the first element of the list a single number? 1495 if isinstance(first, (int, long)): 1496 # Yes. Seems like we have a list of cluster indices. 1497 # Assign color indices automatically. 1498 group_iter = ((group, color) 1499 for color, group in enumerate(mark_groups)) 1500 else: 1501 # No. Seems like we have good ol' group-color pairs. 1502 group_iter = mark_groups 1503 else: 1504 group_iter = mark_groups 1505 elif hasattr(mark_groups, "__iter__"): 1506 # Iterators etc 1507 group_iter = mark_groups 1508 else: 1509 group_iter = {}.iteritems() 1510 1511 def cluster_index_resolver(): 1512 for group, color in group_iter: 1513 if isinstance(group, (int, long)): 1514 group = clustering[group] 1515 yield group, color 1516 1517 return cluster_index_resolver() 1518
1519 ############################################################## 1520 1521 -def _prepare_community_comparison(comm1, comm2, remove_none=False):
1522 """Auxiliary method that takes two community structures either as 1523 membership lists or instances of L{Clustering}, and returns a 1524 tuple whose two elements are membership lists. 1525 1526 This is used by L{compare_communities} and L{split_join_distance}. 1527 1528 @param comm1: the first community structure as a membership list or 1529 as a L{Clustering} object. 1530 @param comm2: the second community structure as a membership list or 1531 as a L{Clustering} object. 1532 @param remove_none: whether to remove C{None} entries from the membership 1533 lists. If C{remove_none} is C{False}, a C{None} entry in either C{comm1} 1534 or C{comm2} will result in an exception. If C{remove_none} is C{True}, 1535 C{None} values are filtered away and only the remaining lists are 1536 compared. 1537 """ 1538 def _ensure_list(obj): 1539 if isinstance(obj, Clustering): 1540 return obj.membership 1541 return list(obj) 1542 1543 vec1, vec2 = _ensure_list(comm1), _ensure_list(comm2) 1544 if len(vec1) != len(vec2): 1545 raise ValueError("the two membership vectors must be equal in length") 1546 1547 if remove_none and (None in vec1 or None in vec2): 1548 idxs_to_remove = [i for i in xrange(len(vec1)) \ 1549 if vec1[i] is None or vec2[i] is None] 1550 idxs_to_remove.reverse() 1551 n = len(vec1) 1552 for i in idxs_to_remove: 1553 n -= 1 1554 vec1[i], vec1[n] = vec1[n], vec1[i] 1555 vec2[i], vec2[n] = vec2[n], vec2[i] 1556 del vec1[n:] 1557 del vec2[n:] 1558 1559 return vec1, vec2 1560
1561 1562 -def compare_communities(comm1, comm2, method="vi", remove_none=False):
1563 """Compares two community structures using various distance measures. 1564 1565 @param comm1: the first community structure as a membership list or 1566 as a L{Clustering} object. 1567 @param comm2: the second community structure as a membership list or 1568 as a L{Clustering} object. 1569 @param method: the measure to use. C{"vi"} or C{"meila"} means the 1570 variation of information metric of Meila (2003), C{"nmi"} or C{"danon"} 1571 means the normalized mutual information as defined by Danon et al (2005), 1572 C{"split-join"} means the split-join distance of van Dongen (2000), 1573 C{"rand"} means the Rand index of Rand (1971), C{"adjusted_rand"} 1574 means the adjusted Rand index of Hubert and Arabie (1985). 1575 @param remove_none: whether to remove C{None} entries from the membership 1576 lists. This is handy if your L{Clustering} object was constructed using 1577 L{VertexClustering.FromAttribute} using an attribute which was not defined 1578 for all the vertices. If C{remove_none} is C{False}, a C{None} entry in 1579 either C{comm1} or C{comm2} will result in an exception. If C{remove_none} 1580 is C{True}, C{None} values are filtered away and only the remaining lists 1581 are compared. 1582 1583 @return: the calculated measure. 1584 @newfield ref: Reference 1585 @ref: Meila M: Comparing clusterings by the variation of information. 1586 In: Scholkopf B, Warmuth MK (eds). Learning Theory and Kernel 1587 Machines: 16th Annual Conference on Computational Learning Theory 1588 and 7th Kernel Workship, COLT/Kernel 2003, Washington, DC, USA. 1589 Lecture Notes in Computer Science, vol. 2777, Springer, 2003. 1590 ISBN: 978-3-540-40720-1. 1591 @ref: Danon L, Diaz-Guilera A, Duch J, Arenas A: Comparing community 1592 structure identification. J Stat Mech P09008, 2005. 1593 @ref: van Dongen D: Performance criteria for graph clustering and Markov 1594 cluster experiments. Technical Report INS-R0012, National Research 1595 Institute for Mathematics and Computer Science in the Netherlands, 1596 Amsterdam, May 2000. 1597 @ref: Rand WM: Objective criteria for the evaluation of clustering 1598 methods. J Am Stat Assoc 66(336):846-850, 1971. 1599 @ref: Hubert L and Arabie P: Comparing partitions. Journal of 1600 Classification 2:193-218, 1985. 1601 """ 1602 import igraph._igraph 1603 vec1, vec2 = _prepare_community_comparison(comm1, comm2, remove_none) 1604 return igraph._igraph._compare_communities(vec1, vec2, method) 1605
1606 1607 -def split_join_distance(comm1, comm2, remove_none=False):
1608 """Calculates the split-join distance between two community structures. 1609 1610 The split-join distance is a distance measure defined on the space of 1611 partitions of a given set. It is the sum of the projection distance of 1612 one partition from the other and vice versa, where the projection 1613 number of A from B is if calculated as follows: 1614 1615 1. For each set in A, find the set in B with which it has the 1616 maximal overlap, and take note of the size of the overlap. 1617 1618 2. Take the sum of the maximal overlap sizes for each set in A. 1619 1620 3. Subtract the sum from M{n}, the number of elements in the 1621 partition. 1622 1623 Note that the projection distance is asymmetric, that's why it has to be 1624 calculated in both directions and then added together. This function 1625 returns the projection distance of C{comm1} from C{comm2} and the 1626 projection distance of C{comm2} from C{comm1}, and returns them in a pair. 1627 The actual split-join distance is the sum of the two distances. The reason 1628 why it is presented this way is that one of the elements being zero then 1629 implies that one of the partitions is a subpartition of the other (and if 1630 it is close to zero, then one of the partitions is close to being a 1631 subpartition of the other). 1632 1633 @param comm1: the first community structure as a membership list or 1634 as a L{Clustering} object. 1635 @param comm2: the second community structure as a membership list or 1636 as a L{Clustering} object. 1637 @param remove_none: whether to remove C{None} entries from the membership 1638 lists. This is handy if your L{Clustering} object was constructed using 1639 L{VertexClustering.FromAttribute} using an attribute which was not defined 1640 for all the vertices. If C{remove_none} is C{False}, a C{None} entry in 1641 either C{comm1} or C{comm2} will result in an exception. If C{remove_none} 1642 is C{True}, C{None} values are filtered away and only the remaining lists 1643 are compared. 1644 1645 @return: the projection distance of C{comm1} from C{comm2} and vice versa 1646 in a tuple. The split-join distance is the sum of the two. 1647 @newfield ref: Reference 1648 @ref: van Dongen D: Performance criteria for graph clustering and Markov 1649 cluster experiments. Technical Report INS-R0012, National Research 1650 Institute for Mathematics and Computer Science in the Netherlands, 1651 Amsterdam, May 2000. 1652 1653 @see: L{compare_communities()} with C{method = "split-join"} if you are 1654 not interested in the individual projection distances but only the 1655 sum of them. 1656 """ 1657 import igraph._igraph 1658 vec1, vec2 = _prepare_community_comparison(comm1, comm2, remove_none) 1659 return igraph._igraph._split_join_distance(vec1, vec2) 1660

   Home       Trees       Indices       Help