Package doapfiend :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module doapfiend.utils

  1   
  2  """ 
  3   
  4  utils.py 
  5  ======== 
  6   
  7  Misc utilities for doapfiend 
  8  ---------------------------- 
  9   
 10  General purpose helper functions and classes for doapfiend 
 11  You'll probably want to use doaplib for most cases. 
 12   
 13  License: BSD-2 
 14   
 15  """ 
 16   
 17  #pylint: disable-msg=C0103 
 18   
 19  import urllib 
 20  import logging 
 21  import urlparse 
 22  from httplib import HTTPConnection 
 23  from urllib2 import build_opener, HTTPError, ProxyHandler, URLError 
 24   
 25   
 26  __docformat__ = 'epytext' 
 27   
 28  LOG = logging.getLogger('doapfiend') 
 29   
 30  COLOR = {'normal': "\033[0m", 
 31            'bold': "\033[1m", 
 32            'underline': "\033[4m", 
 33            'blink': "\033[5m", 
 34            'reverse': "\033[7m", 
 35            'black': "\033[30m", 
 36            'red': "\033[31m", 
 37            'green': "\033[32m", 
 38            'yellow': "\033[33m", 
 39            'blue': "\033[34m", 
 40            'magenta': "\033[35m", 
 41            'cyan': "\033[36m", 
 42            'white': "\033[37m"} 
 43   
 44   
45 -class NotFoundError(Exception):
46 47 '''DOAP not found''' 48 49 #pylint: disable-msg=W0231
50 - def __init__(self, err_msg):
51 '''Initialize attributes''' 52 self.err_msg = err_msg
53
54 - def __str__(self):
55 return repr(self.err_msg)
56 57
58 -def http_filesize(url):
59 """ 60 Get the size of file without downloading it. 61 bla bla bla 62 blaba 63 64 @param url: URL of file 65 @type url: string 66 67 @rtype: string 68 @return: Size of file 69 70 Usage: 71 72 >>> http_filesize('http://trac.doapspace.org/test_file.txt') 73 '160' 74 """ 75 76 host, path = urlparse.urlsplit(url)[1:3] 77 if ':' in host: 78 # port specified, try to use it 79 host, port = host.split(':', 1) 80 try: 81 port = int(port) 82 except ValueError: 83 LOG.error('invalid port number %r' % port) 84 return False 85 else: 86 # no port specified, use default port 87 port = None 88 connection = HTTPConnection(host, port=port) 89 connection.request("HEAD", path) 90 resp = connection.getresponse() 91 return resp.getheader('content-length')
92 93
94 -def http_exists(url):
95 """ 96 A quick way to check if a file exists on the web. 97 98 @param url: URL of the document 99 @type url: string 100 @rtype: boolean 101 @return: True or False 102 103 Usage: 104 105 >>> http_exists('http://www.python.org/') 106 True 107 >>> http_exists('http://www.python.org/PenguinOnTheTelly') 108 False 109 """ 110 111 host, path = urlparse.urlsplit(url)[1:3] 112 if ':' in host: 113 #port specified, try to use it 114 host, port = host.split(':', 1) 115 try: 116 port = int(port) 117 except ValueError: 118 LOG.error('invalid port number %r' % port) 119 return False 120 else: 121 #no port specified, use default port 122 port = None 123 connection = HTTPConnection(host, port=port) 124 connection.request("HEAD", path) 125 resp = connection.getresponse() 126 if resp.status == 200: # normal 'found' status 127 found = True 128 elif resp.status == 302: # recurse on temporary redirect 129 found = http_exists(urlparse.urljoin(url, 130 resp.getheader('location', ''))) 131 else: # everything else -> not found 132 LOG.info("Status %d %s : %s" % (resp.status, resp.reason, url)) 133 found = False 134 return found
135 136
137 -def is_content_type(url_or_file, content_type):
138 """ 139 Tells whether the URL or pseudofile from urllib.urlopen is of 140 the required content type. 141 142 @param url_or_file: URL or file path 143 @type url_or_file: string 144 @param content_type: Content type we're looking for 145 @type content_type: string 146 147 @rtype: boolean 148 @returns: True if it can return the Content type we want 149 150 Usage: 151 152 >>> is_content_type('http://doapspace.org/doap/sf/nlyrics.rdf', \ 153 'application/rdf+xml') 154 True 155 >>> is_content_type('http://doapspace.org/', 'application/rdf+xml') 156 False 157 """ 158 try: 159 if isinstance(url_or_file, str): 160 thefile = urllib.urlopen(url_or_file) 161 else: 162 thefile = url_or_file 163 result = thefile.info().gettype() == content_type.lower() 164 if thefile is not url_or_file: 165 thefile.close() 166 except IOError: 167 result = False 168 return result
169 170
171 -def fetch_file(url, proxy=None):
172 ''' 173 Download file by URL 174 175 @param url: URL of a file 176 @type url: string 177 178 @param proxy: URL of HTTP Proxy 179 @type proxy: string 180 181 @return: File 182 @rtype: string 183 184 ''' 185 if not url.startswith('http://') and not url.startswith('ftp://'): 186 return open(url, 'r').read() 187 LOG.debug('Fetching ' + url) 188 if proxy: 189 opener = build_opener(ProxyHandler({'http': proxy})) 190 else: 191 opener = build_opener() 192 opener.addheaders = [('Accept', 'application/rdf+xml'), 193 ('User-agent', 194 'Mozilla/5.0 (compatible; doapfiend ' + 195 'http://trac.doapspace.org/doapfiend)')] 196 try: 197 result = opener.open(url) 198 except HTTPError, err_msg: 199 if err_msg.code == 404: 200 raise NotFoundError('Not found: %s' % url) 201 else: 202 LOG.error(err_msg) 203 except URLError, err_msg: 204 LOG.error(err_msg) 205 return 206 return result.read()
207 208 209 if __name__ == '__main__': 210 import doctest 211 doctest.testmod() 212