python-igraph manual

For using igraph from Python

   Home       Trees       Indices       Help   
Package igraph :: Package remote :: Module nexus
[hide private]

Source Code for Module igraph.remote.nexus

  1  # vim:ts=4:sw=4:sts=4:et 
  2  # -*- coding: utf-8 -*- 
  3  """Interface to the Nexus online graph repository. 
  4   
  5  The classes in this file facilitate access to the Nexus online graph 
  6  repository at U{http://nexus.igraph.org}. 
  7   
  8  The main entry point of this package is the C{Nexus} variable, which is 
  9  an instance of L{NexusConnection}. Use L{NexusConnection.get} to get a particular 
 10  network from Nexus, L{NexusConnection.list} to list networks having a given set of 
 11  tags, L{NexusConnection.search} to search in the dataset descriptions, or 
 12  L{NexusConnection.info} to show the info sheet of a dataset.""" 
 13   
 14  from gzip import GzipFile 
 15  from itertools import izip 
 16  from textwrap import TextWrapper 
 17  from urllib import urlencode 
 18  from urlparse import urlparse, urlunparse 
 19  from textwrap import TextWrapper 
 20   
 21  from igraph.compat import property, BytesIO 
 22  from igraph.configuration import Configuration 
 23  from igraph.utils import multidict 
 24   
 25  import re 
 26  import urllib2 
 27   
 28  __all__ = ["Nexus", "NexusConnection"] 
 29   
 30  __license__ = u"""\ 
 31  Copyright (C) 2006-2012  Tamás Nepusz <ntamas@gmail.com> 
 32  Pázmány Péter sétány 1/a, 1117 Budapest, Hungary 
 33   
 34  This program is free software; you can redistribute it and/or modify 
 35  it under the terms of the GNU General Public License as published by 
 36  the Free Software Foundation; either version 2 of the License, or 
 37  (at your option) any later version. 
 38   
 39  This program is distributed in the hope that it will be useful, 
 40  but WITHOUT ANY WARRANTY; without even the implied warranty of 
 41  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 42  GNU General Public License for more details. 
 43   
 44  You should have received a copy of the GNU General Public License 
 45  along with this program; if not, write to the Free Software 
 46  Foundation, Inc.,  51 Franklin Street, Fifth Floor, Boston, MA 
 47  02110-1301 USA 
 48  """ 
49 50 -class NexusConnection(object):
51 """Connection to a remote Nexus server. 52 53 In most cases, you will not have to instantiate this object, just use 54 the global L{Nexus} variable which is an instance of L{NexusConnection} 55 and connects to the Nexus repository at U{http://nexus.igraph.org}. 56 57 Example: 58 59 >>> print Nexus.info("karate") #doctest:+ELLIPSIS 60 Nexus dataset 'karate' (#1) 61 vertices/edges: 34/78 62 name: Zachary's karate club 63 tags: social network; undirected; weighted 64 ... 65 >>> karate = Nexus.get("karate") 66 >>> from igraph import summary 67 >>> summary(karate) 68 IGRAPH UNW- 34 78 -- Zachary's karate club network 69 + attr: Author (g), Citation (g), name (g), Faction (v), id (v), name (v), weight (e) 70 71 @undocumented: _get_response, _parse_dataset_id, _parse_text_response, 72 _ensure_uncompressed""" 73
74 - def __init__(self, nexus_url=None):
75 """Constructs a connection to a remote Nexus server. 76 77 @param nexus_url: the root URL of the remote server. Leave it at its 78 default value (C{None}) unless you have set up your own Nexus server 79 and you want to connect to that. C{None} fetches the URL from 80 igraph's configuration file or uses the default URL if no URL 81 is specified in the configuration file. 82 """ 83 self.debug = False 84 self.url = nexus_url 85 self._opener = urllib2.build_opener() 86
87 - def get(self, id):
88 """Retrieves the dataset with the given ID from Nexus. 89 90 Dataset IDs are formatted as follows: the name of a dataset on its own 91 means that a single network should be returned if the dataset contains 92 a single network, or multiple networks should be returned if the dataset 93 contains multiple networks. When the name is followed by a dot and a 94 network ID, only a single network will be returned: the one that has the 95 given network ID. When the name is followed by a dot and a star, a 96 dictionary mapping network IDs to networks will be returned even if the 97 original dataset contains a single network only. 98 99 E.g., getting C{"karate"} would return a single network since the 100 Zachary karate club dataset contains one network only. Getting 101 C{"karate.*"} on the other hand would return a dictionary with one 102 entry that contains the Zachary karate club network. 103 104 @param id: the ID of the dataset to retrieve. 105 @return: an instance of L{Graph} (if a single graph has to be returned) 106 or a dictionary mapping network IDs to instances of L{Graph}. 107 """ 108 from igraph import load 109 110 dataset_id, network_id = self._parse_dataset_id(id) 111 112 params = dict(format="Python-igraph", id=dataset_id) 113 response = self._get_response("/api/dataset", params, compressed=True) 114 response = self._ensure_uncompressed(response) 115 result = load(response, format="pickle") 116 117 if network_id is None: 118 # If result contains a single network only, return that network. 119 # Otherwise return the whole dictionary 120 if not isinstance(result, dict): 121 return result 122 if len(result) == 1: 123 return result[result.keys()[0]] 124 return result 125 126 if network_id == "*": 127 # Return a dict no matter what 128 if not isinstance(result, dict): 129 result = dict(dataset_id=result) 130 return result 131 132 return result[network_id] 133
134 - def info(self, id):
135 """Retrieves informations about the dataset with the given numeric 136 or string ID from Nexus. 137 138 @param id: the numeric or string ID of the dataset to retrieve. 139 @return: an instance of L{NexusDatasetInfo}. 140 """ 141 params = dict(format="text", id=id) 142 response = self._get_response("/api/dataset_info", params) 143 return NexusDatasetInfo.FromMultiDict(self._parse_text_response(response)) 144
145 - def list(self, tags=None, operator="or", order="date"):
146 """Retrieves a list of datasets matching a set of tags from Nexus. 147 148 @param tags: the tags the returned datasets should have. C{None} 149 retrieves all the datasets, a single string retrieves datasets 150 having that given tag. Multiple tags may also be specified as 151 a list, tuple or any other iterable. 152 @param operator: when multiple tags are given, this argument 153 specifies whether the retrieved datasets should match all 154 the tags (C{"and"}) or any of them (C{"or"}). 155 @param order: the order of entries; it must be one of C{"date"}, 156 C{"name"} or C{"popularity"}. 157 @return: a L{NexusDatasetInfoList} object, which basically acts like a 158 list and yields L{NexusDatasetInfo} objects. The list is populated 159 lazily; i.e. the requests will be fired only when needed. 160 """ 161 params = dict(format="text", order=order) 162 if tags is not None: 163 if not hasattr(tags, "__iter__") or isinstance(tags, basestring): 164 params["tag"] = str(tags) 165 else: 166 params["tag"] = "|".join(str(tag) for tag in tags) 167 params["operator"] = operator 168 169 return NexusDatasetInfoList(self, "/api/dataset_info", params) 170
171 - def search(self, query, order="date"):
172 """Retrieves a list of datasets matching a query string from Nexus. 173 174 @param query: the query string. Searches are case insensitive and 175 Nexus searches for complete words only. The special word OR 176 can be used to find datasets that contain any of the given words 177 (instead of all of them). Exact phrases must be enclosed in 178 quotes in the search string. See the Nexus webpage for more 179 information at U{http://nexus.igraph.org/web/docs#searching}. 180 @param order: the order of entries; it must be one of C{"date"}, 181 C{"name"} or C{"popularity"}. 182 @return: a L{NexusDatasetInfoList} object, which basically acts like a 183 list and yields L{NexusDatasetInfo} objects. The list is populated 184 lazily; i.e. the requests will be fired only when needed. 185 """ 186 params = dict(q=query, order=order, format="text") 187 return NexusDatasetInfoList(self, "/api/search", params) 188 189 @staticmethod
190 - def _ensure_uncompressed(response):
191 """Expects an HTTP response object, checks its Content-Encoding header, 192 decompresses the data and returns an in-memory buffer holding the 193 uncompressed data.""" 194 compressed = response.headers.get("Content-Encoding") == "gzip" 195 if not compressed: 196 content_disp = response.headers.get("Content-Disposition", "") 197 compressed = bool(re.match(r'attachment; *filename=.*\.gz\"?$', 198 content_disp)) 199 if compressed: 200 return GzipFile(fileobj=BytesIO(response.read()), mode="rb") 201 return response 202
203 - def _get_response(self, path, params={}, compressed=False):
204 """Sends a request to Nexus at the given path with the given parameters 205 and returns a file-like object for the response. `compressed` denotes 206 whether we accept compressed responses.""" 207 if self.url is None: 208 url = Configuration.instance()["remote.nexus.url"] 209 else: 210 url = self.url 211 url = "%s%s?%s" % (url, path, urlencode(params)) 212 request = urllib2.Request(url) 213 if compressed: 214 request.add_header("Accept-Encoding", "gzip") 215 if self.debug: 216 print "[debug] Sending request: %s" % url 217 return self._opener.open(request) 218 219 @staticmethod
220 - def _parse_dataset_id(id):
221 """Parses a dataset ID used in the `get` request. 222 223 Returns the dataset ID and the network ID (the latter being C{None} 224 if the original ID did not contain a network ID ). 225 """ 226 dataset_id, _, network_id = str(id).partition(".") 227 if not network_id: 228 network_id = None 229 return dataset_id, network_id 230 231 @staticmethod
232 - def _parse_text_response(response):
233 """Parses a plain text formatted response from Nexus. 234 235 Plain text formatted responses consist of key-value pairs, separated 236 by C{":"}. Values may span multiple lines; in this case, the key is 237 omitted after the first line and the extra lines start with 238 whitespace. 239 240 Examples: 241 242 >>> d = Nexus._parse_text_response("Id: 17\\nName: foo") 243 >>> sorted(d.items()) 244 [('Id', '17'), ('Name', 'foo')] 245 >>> d = Nexus._parse_text_response("Id: 42\\nName: foo\\n .\\n bar") 246 >>> sorted(d.items()) 247 [('Id', '42'), ('Name', 'foo\\n\\nbar')] 248 """ 249 if isinstance(response, basestring): 250 response = response.split("\n") 251 252 result = multidict() 253 key, value = None, [] 254 for line in response: 255 line = line.rstrip() 256 if not line: 257 continue 258 if key is not None and line[0] in ' \t': 259 # Line continuation 260 line = line.lstrip() 261 if line == '.': 262 line = '' 263 value.append(line) 264 else: 265 # Key-value pair 266 if key is not None: 267 result.add(key, "\n".join(value)) 268 key, value = line.split(":", 1) 269 value = [value.strip()] 270 271 if key is not None: 272 result.add(key, "\n".join(value)) 273 274 return result 275 276 @property
277 - def url(self):
278 """Returns the root URL of the Nexus repository the connection is 279 communicating with.""" 280 return self._url 281 282 @url.setter
283 - def url(self, value):
284 """Sets the root URL of the Nexus repository the connection is 285 communicating with.""" 286 if value is None: 287 self._url = None 288 else: 289 value = str(value) 290 parts = urlparse(value, "http", False) 291 self._url = urlunparse(parts) 292 if self._url and self._url[-1] == "/": 293 self._url = self._url[:-1]
294
295 296 -class NexusDatasetInfo(object):
297 """Information about a dataset in the Nexus repository. 298 299 @undocumented: _update_from_multidict, vertices_edges""" 300
301 - def __init__(self, id=None, sid=None, name=None, networks=None, 302 vertices=None, edges=None, tags=None, attributes=None, rest=None):
303 self._conn = None 304 self.id = id 305 self.sid = sid 306 self.name = name 307 self.vertices = vertices 308 self.edges = edges 309 self.tags = tags 310 self.attributes = attributes 311 if networks is None: 312 self.networks = [] 313 elif not isinstance(networks, (str, unicode)): 314 self.networks = list(networks) 315 else: 316 self.networks = [networks] 317 if rest: 318 self.rest = multidict(rest) 319 else: 320 self.rest = None 321 322 @property
323 - def vertices_edges(self):
324 if self.vertices is None or self.edges is None: 325 return "" 326 elif isinstance(self.vertices, (list, tuple)) and isinstance(self.edges, (list, tuple)): 327 return " ".join("%s/%s" % (v,e) for v, e in izip(self.vertices, self.edges)) 328 else: 329 return "%s/%s" % (self.vertices, self.edges) 330 331 @vertices_edges.setter
332 - def vertices_edges(self, value):
333 if value is None: 334 self.vertices, self.edges = None, None 335 return 336 337 value = value.strip().split(" ") 338 if len(value) == 0: 339 self.vertices, self.edges = None, None 340 elif len(value) == 1: 341 self.vertices, self.edges = map(int, value[0].split("/")) 342 else: 343 self.vertices = [] 344 self.edges = [] 345 for ve in value: 346 v, e = ve.split("/", 1) 347 self.vertices.append(int(v)) 348 self.edges.append(int(e)) 349
350 - def __repr__(self):
351 params = "(id=%(id)r, sid=%(sid)r, name=%(name)r, networks=%(networks)r, "\ 352 "vertices=%(vertices)r, edges=%(edges)r, tags=%(tags)r, "\ 353 "attributes=%(attributes)r, rest=%(rest)r)" % self.__dict__ 354 return "%s%s" % (self.__class__.__name__, params) 355
356 - def __str__(self):
357 if self.networks and len(self.networks) > 1: 358 lines = ["Nexus dataset '%s' (#%s) with %d networks" % \ 359 (self.sid, self.id, len(self.networks))] 360 else: 361 lines = ["Nexus dataset '%(sid)s' (#%(id)s)" % self.__dict__] 362 363 lines.append("vertices/edges: %s" % self.vertices_edges) 364 365 if self.name: 366 lines.append("name: %s" % self.name) 367 if self.tags: 368 lines.append("tags: %s" % "; ".join(self.tags)) 369 370 if self.rest: 371 wrapper = TextWrapper(width=76, subsequent_indent=' ') 372 373 keys = sorted(self.rest.iterkeys()) 374 if "attribute" in self.rest: 375 keys.remove("attribute") 376 keys.append("attribute") 377 378 for key in keys: 379 for value in self.rest.getlist(key): 380 paragraphs = str(value).splitlines() 381 wrapper.initial_indent = "%s: " % key 382 for paragraph in paragraphs: 383 ls = wrapper.wrap(paragraph) 384 if ls: 385 lines.extend(wrapper.wrap(paragraph)) 386 else: 387 lines.append(" .") 388 wrapper.initial_indent = " " 389 390 return "\n".join(lines) 391
392 - def _update_from_multidict(self, params):
393 """Updates the dataset object from a multidict representation of 394 key-value pairs, similar to the ones provided by the Nexus API in 395 plain text response.""" 396 self.id = params.get("id") 397 self.sid = params.get("sid") 398 self.name = params.get("name") 399 self.vertices = params.get("vertices") 400 self.edges = params.get("edges") 401 self.tags = params.get("tags") 402 403 networks = params.get("networks") 404 if networks: 405 self.networks = networks.split() 406 407 keys_to_ignore = set("id sid name vertices edges tags networks".split()) 408 409 if self.vertices is None and self.edges is None: 410 # Try "vertices/edges" 411 self.vertices_edges = params.get("vertices/edges") 412 keys_to_ignore.add("vertices/edges") 413 414 if self.rest is None: 415 self.rest = multidict() 416 for k in set(params.iterkeys()) - keys_to_ignore: 417 for v in params.getlist(k): 418 self.rest.add(k, v) 419 420 if self.id: 421 self.id = int(self.id) 422 if self.vertices and not isinstance(self.vertices, (list, tuple)): 423 self.vertices = int(self.vertices) 424 if self.edges and not isinstance(self.edges, (list, tuple)): 425 self.edges = int(self.edges) 426 if self.tags is not None: 427 self.tags = self.tags.split(";") 428 429 @classmethod
430 - def FromMultiDict(cls, dict):
431 """Constructs a Nexus dataset object from a multidict representation 432 of key-value pairs, similar to the ones provided by the Nexus API in 433 plain text response.""" 434 result = cls() 435 result._update_from_multidict(dict) 436 return result 437
438 - def download(self, network_id=None):
439 """Retrieves the actual dataset from Nexus. 440 441 @param network_id: if the dataset contains multiple networks, the ID 442 of the network to be retrieved. C{None} returns a single network if 443 the dataset contains a single network, or a dictionary of networks 444 if the dataset contains more than one network. C{"*"} retrieves 445 a dictionary even if the dataset contains a single network only. 446 447 @return: a L{Graph} instance or a dictionary mapping network names to 448 L{Graph} instances. 449 """ 450 if self.id is None: 451 raise ValueError("dataset ID is empty") 452 conn = self._conn or Nexus 453 if network_id is None: 454 return conn.get(self.id) 455 return conn.get("%s.%s" % (self.id, network_id)) 456 457 get = download 458
459 460 -class NexusDatasetInfoList(object):
461 """A read-only list-like object that can be used to retrieve the items 462 from a Nexus search result. 463 """ 464
465 - def __init__(self, connection, method, params):
466 """Constructs a Nexus dataset list that will use the given connection 467 and the given parameters to retrieve the search results. 468 469 @param connection: a Nexus connection object 470 @param method: the URL of the Nexus API method to call 471 @param params: the parameters to pass in the GET requests, in the 472 form of a Python dictionary. 473 """ 474 self._conn = connection 475 self._method = str(method) 476 self._params = params 477 self._length = None 478 self._datasets = [] 479 self._blocksize = 10 480
481 - def _fetch_results(self, index):
482 """Fetches the results from Nexus such that the result item with the 483 given index will be available (unless the result list is shorter than 484 the given index of course).""" 485 # Calculate the start offset 486 page = index // self._blocksize 487 offset = page * self._blocksize 488 self._params["offset"] = offset 489 self._params["limit"] = self._blocksize 490 491 # Ensure that self._datasets has the necessary length 492 diff = (page+1) * self._blocksize - len(self._datasets) 493 if diff > 0: 494 self._datasets.extend([None] * diff) 495 496 response = self._conn._get_response(self._method, self._params) 497 current_dataset = None 498 for line in response: 499 key, value = line.strip().split(": ", 1) 500 key = key.lower() 501 502 if key == "totalsize": 503 # Total number of items in the search result 504 self._length = int(value) 505 elif key == "id": 506 # Starting a new dataset 507 if current_dataset: 508 self._datasets[offset] = current_dataset 509 offset += 1 510 current_dataset = NexusDatasetInfo(id=int(value)) 511 current_dataset._conn = self._conn 512 elif key == "sid": 513 current_dataset.sid = value 514 elif key == "name": 515 current_dataset.name = value 516 elif key == "vertices": 517 current_dataset.vertices = int(value) 518 elif key == "edges": 519 current_dataset.edges = int(value) 520 elif key == "vertices/edges": 521 current_dataset.vertices_edges = value 522 elif key == "tags": 523 current_dataset.tags = value.split(";") 524 525 if current_dataset: 526 self._datasets[offset] = current_dataset 527 528
529 - def __getitem__(self, index):
530 if len(self._datasets) <= index: 531 self._fetch_results(index) 532 elif self._datasets[index] is None: 533 self._fetch_results(index) 534 return self._datasets[index] 535
536 - def __iter__(self):
537 for i in xrange(len(self)): 538 yield self[i] 539
540 - def __len__(self):
541 """Returns the number of result items.""" 542 if self._length is None: 543 self._fetch_results(0) 544 return self._length 545
546 - def __str__(self):
547 """Converts the Nexus result list into a nice human-readable format.""" 548 max_index_length = len(str(len(self))) + 2 549 indent = "\n" + " " * (max_index_length+1) 550 551 result = [] 552 for index, item in enumerate(self): 553 formatted_item = ("[%d]" % index).rjust(max_index_length) + " " + \ 554 str(item).replace("\n", indent) 555 result.append(formatted_item) 556 return "\n".join(result)
557 558 Nexus = NexusConnection() 559

   Home       Trees       Indices       Help