1
2 """
3
4 utils.py
5 ========
6
7 Misc utilities for doapfiend
8 ----------------------------
9
10 General purpose helper functions and classes for doapfiend
11 You'll probably want to use doaplib for most cases.
12
13 License: BSD-2
14
15 """
16
17
18
19 import urllib
20 import logging
21 import urlparse
22 from httplib import HTTPConnection
23 from urllib2 import build_opener, HTTPError, ProxyHandler, URLError
24
25
26 __docformat__ = 'epytext'
27
28 LOG = logging.getLogger('doapfiend')
29
30 COLOR = {'normal': "\033[0m",
31 'bold': "\033[1m",
32 'underline': "\033[4m",
33 'blink': "\033[5m",
34 'reverse': "\033[7m",
35 'black': "\033[30m",
36 'red': "\033[31m",
37 'green': "\033[32m",
38 'yellow': "\033[33m",
39 'blue': "\033[34m",
40 'magenta': "\033[35m",
41 'cyan': "\033[36m",
42 'white': "\033[37m"}
43
44
46
47 '''DOAP not found'''
48
49
51 '''Initialize attributes'''
52 self.err_msg = err_msg
53
55 return repr(self.err_msg)
56
57
59 """
60 Get the size of file without downloading it.
61 bla bla bla
62 blaba
63
64 @param url: URL of file
65 @type url: string
66
67 @rtype: string
68 @return: Size of file
69
70 Usage:
71
72 >>> http_filesize('http://trac.doapspace.org/test_file.txt')
73 '160'
74 """
75
76 host, path = urlparse.urlsplit(url)[1:3]
77 if ':' in host:
78
79 host, port = host.split(':', 1)
80 try:
81 port = int(port)
82 except ValueError:
83 LOG.error('invalid port number %r' % port)
84 return False
85 else:
86
87 port = None
88 connection = HTTPConnection(host, port=port)
89 connection.request("HEAD", path)
90 resp = connection.getresponse()
91 return resp.getheader('content-length')
92
93
95 """
96 A quick way to check if a file exists on the web.
97
98 @param url: URL of the document
99 @type url: string
100 @rtype: boolean
101 @return: True or False
102
103 Usage:
104
105 >>> http_exists('http://www.python.org/')
106 True
107 >>> http_exists('http://www.python.org/PenguinOnTheTelly')
108 False
109 """
110
111 host, path = urlparse.urlsplit(url)[1:3]
112 if ':' in host:
113
114 host, port = host.split(':', 1)
115 try:
116 port = int(port)
117 except ValueError:
118 LOG.error('invalid port number %r' % port)
119 return False
120 else:
121
122 port = None
123 connection = HTTPConnection(host, port=port)
124 connection.request("HEAD", path)
125 resp = connection.getresponse()
126 if resp.status == 200:
127 found = True
128 elif resp.status == 302:
129 found = http_exists(urlparse.urljoin(url,
130 resp.getheader('location', '')))
131 else:
132 LOG.info("Status %d %s : %s" % (resp.status, resp.reason, url))
133 found = False
134 return found
135
136
137 -def is_content_type(url_or_file, content_type):
138 """
139 Tells whether the URL or pseudofile from urllib.urlopen is of
140 the required content type.
141
142 @param url_or_file: URL or file path
143 @type url_or_file: string
144 @param content_type: Content type we're looking for
145 @type content_type: string
146
147 @rtype: boolean
148 @returns: True if it can return the Content type we want
149
150 Usage:
151
152 >>> is_content_type('http://doapspace.org/doap/sf/nlyrics.rdf', \
153 'application/rdf+xml')
154 True
155 >>> is_content_type('http://doapspace.org/', 'application/rdf+xml')
156 False
157 """
158 try:
159 if isinstance(url_or_file, str):
160 thefile = urllib.urlopen(url_or_file)
161 else:
162 thefile = url_or_file
163 result = thefile.info().gettype() == content_type.lower()
164 if thefile is not url_or_file:
165 thefile.close()
166 except IOError:
167 result = False
168 return result
169
170
172 '''
173 Download file by URL
174
175 @param url: URL of a file
176 @type url: string
177
178 @param proxy: URL of HTTP Proxy
179 @type proxy: string
180
181 @return: File
182 @rtype: string
183
184 '''
185 if not url.startswith('http://') and not url.startswith('ftp://'):
186 return open(url, 'r').read()
187 LOG.debug('Fetching ' + url)
188 if proxy:
189 opener = build_opener(ProxyHandler({'http': proxy}))
190 else:
191 opener = build_opener()
192 opener.addheaders = [('Accept', 'application/rdf+xml'),
193 ('User-agent',
194 'Mozilla/5.0 (compatible; doapfiend ' +
195 'http://trac.doapspace.org/doapfiend)')]
196 try:
197 result = opener.open(url)
198 except HTTPError, err_msg:
199 if err_msg.code == 404:
200 raise NotFoundError('Not found: %s' % url)
201 else:
202 LOG.error(err_msg)
203 except URLError, err_msg:
204 LOG.error(err_msg)
205 return
206 return result.read()
207
208
209 if __name__ == '__main__':
210 import doctest
211 doctest.testmod()
212