1
2 """
3 This module contains the HTTP fetcher interface and several implementations.
4 """
5
6 __all__ = ['fetch', 'getDefaultFetcher', 'setDefaultFetcher', 'HTTPResponse',
7 'HTTPFetcher', 'createHTTPFetcher', 'HTTPFetchingError', 'HTTPError']
8
9 import urllib2
10 import time
11 import cStringIO
12 import sys
13
14 import openid
15 import openid.urinorm
16
17
18 try:
19 import pycurl
20 except ImportError:
21 pycurl = None
22
23 USER_AGENT = "python-openid/%s (%s)" % (openid.__version__, sys.platform)
24
25 -def fetch(url, body=None, headers=None):
26 """Invoke the fetch method on the default fetcher. Most users
27 should need only this method.
28
29 @raises Exception: any exceptions that may be raised by the default fetcher
30 """
31 fetcher = getDefaultFetcher()
32 return fetcher.fetch(url, body, headers)
33
35 """Create a default HTTP fetcher instance
36
37 prefers Curl to urllib2."""
38 if pycurl is None:
39 fetcher = Urllib2Fetcher()
40 else:
41 fetcher = CurlHTTPFetcher()
42
43 return fetcher
44
45
46
47
48 _default_fetcher = None
49
63
65 """Set the default fetcher
66
67 @param fetcher: The fetcher to use as the default HTTP fetcher
68 @type fetcher: HTTPFetcher
69
70 @param wrap_exceptions: Whether to wrap exceptions thrown by the
71 fetcher wil HTTPFetchingError so that they may be caught
72 easier. By default, exceptions will be wrapped. In general,
73 unwrapped fetchers are useful for debugging of fetching errors
74 or if your fetcher raises well-known exceptions that you would
75 like to catch.
76 @type wrap_exceptions: bool
77 """
78 global _default_fetcher
79 if fetcher is None or not wrap_exceptions:
80 _default_fetcher = fetcher
81 else:
82 _default_fetcher = ExceptionWrappingFetcher(fetcher)
83
85 """Whether the currently set HTTP fetcher is a Curl HTTP fetcher."""
86 return isinstance(getDefaultFetcher(), CurlHTTPFetcher)
87
89 """XXX document attributes"""
90 headers = None
91 status = None
92 body = None
93 final_url = None
94
95 - def __init__(self, final_url=None, status=None, headers=None, body=None):
100
102 return "<%s status %s for %s>" % (self.__class__.__name__,
103 self.status,
104 self.final_url)
105
107 """
108 This class is the interface for openid HTTP fetchers. This
109 interface is only important if you need to write a new fetcher for
110 some reason.
111 """
112
113 - def fetch(self, url, body=None, headers=None):
114 """
115 This performs an HTTP POST or GET, following redirects along
116 the way. If a body is specified, then the request will be a
117 POST. Otherwise, it will be a GET.
118
119
120 @param headers: HTTP headers to include with the request
121 @type headers: {str:str}
122
123 @return: An object representing the server's HTTP response. If
124 there are network or protocol errors, an exception will be
125 raised. HTTP error responses, like 404 or 500, do not
126 cause exceptions.
127
128 @rtype: L{HTTPResponse}
129
130 @raise Exception: Different implementations will raise
131 different errors based on the underlying HTTP library.
132 """
133 raise NotImplementedError
134
136 return url.startswith('http://') or url.startswith('https://')
137
139 """Exception that is wrapped around all exceptions that are raised
140 by the underlying fetcher when using the ExceptionWrappingFetcher
141
142 @ivar why: The exception that caused this exception
143 """
145 Exception.__init__(self, why)
146 self.why = why
147
149 """Fetcher that wraps another fetcher, causing all exceptions
150
151 @cvar uncaught_exceptions: Exceptions that should be exposed to the
152 user if they are raised by the fetch call
153 """
154
155 uncaught_exceptions = (SystemExit, KeyboardInterrupt, MemoryError)
156
158 self.fetcher = fetcher
159
160 - def fetch(self, *args, **kwargs):
161 try:
162 return self.fetcher.fetch(*args, **kwargs)
163 except self.uncaught_exceptions:
164 raise
165 except:
166 exc_cls, exc_inst = sys.exc_info()[:2]
167 if exc_inst is None:
168
169 exc_inst = exc_cls
170
171 raise HTTPFetchingError(why=exc_inst)
172
174 """An C{L{HTTPFetcher}} that uses urllib2.
175 """
176 - def fetch(self, url, body=None, headers=None):
177 if not _allowedURL(url):
178 raise ValueError('Bad URL scheme: %r' % (url,))
179
180 if headers is None:
181 headers = {}
182
183 headers.setdefault(
184 'User-Agent',
185 "%s Python-urllib/%s" % (USER_AGENT, urllib2.__version__,))
186
187 req = urllib2.Request(url, data=body, headers=headers)
188 try:
189 f = urllib2.urlopen(req)
190 try:
191 return self._makeResponse(f)
192 finally:
193 f.close()
194 except urllib2.HTTPError, why:
195 try:
196 return self._makeResponse(why)
197 finally:
198 why.close()
199
201 resp = HTTPResponse()
202 resp.body = urllib2_response.read()
203 resp.final_url = urllib2_response.geturl()
204 resp.headers = dict(urllib2_response.info().items())
205
206 if hasattr(urllib2_response, 'code'):
207 resp.status = urllib2_response.code
208 else:
209 resp.status = 200
210
211 return resp
212
214 """
215 This exception is raised by the C{L{CurlHTTPFetcher}} when it
216 encounters an exceptional situation fetching a URL.
217 """
218 pass
219
220
222 """
223 An C{L{HTTPFetcher}} that uses pycurl for fetching.
224 See U{http://pycurl.sourceforge.net/}.
225 """
226 ALLOWED_TIME = 20
227
229 HTTPFetcher.__init__(self)
230 if pycurl is None:
231 raise RuntimeError('Cannot find pycurl library')
232
234 header_file.seek(0)
235
236
237 unused_http_status_line = header_file.readline()
238 lines = [line.strip() for line in header_file]
239
240
241 empty_line = lines.pop()
242 if empty_line:
243 raise HTTPError("No blank line at end of headers: %r" % (line,))
244
245 headers = {}
246 for line in lines:
247 try:
248 name, value = line.split(':', 1)
249 except ValueError:
250 raise HTTPError(
251 "Malformed HTTP header line in response: %r" % (line,))
252
253 value = value.strip()
254
255
256 name = name.lower()
257 headers[name] = value
258
259 return headers
260
262
263
264 return _allowedURL(url)
265
266 - def fetch(self, url, body=None, headers=None):
267 stop = int(time.time()) + self.ALLOWED_TIME
268 off = self.ALLOWED_TIME
269
270 if headers is None:
271 headers = {}
272
273 headers.setdefault('User-Agent',
274 "%s %s" % (USER_AGENT, pycurl.version,))
275
276 header_list = []
277 if headers is not None:
278 for header_name, header_value in headers.iteritems():
279 header_list.append('%s: %s' % (header_name, header_value))
280
281 c = pycurl.Curl()
282 try:
283 c.setopt(pycurl.NOSIGNAL, 1)
284
285 if header_list:
286 c.setopt(pycurl.HTTPHEADER, header_list)
287
288
289 if body is not None:
290 c.setopt(pycurl.POST, 1)
291 c.setopt(pycurl.POSTFIELDS, body)
292
293 while off > 0:
294 if not self._checkURL(url):
295 raise HTTPError("Fetching URL not allowed: %r" % (url,))
296
297 data = cStringIO.StringIO()
298 response_header_data = cStringIO.StringIO()
299 c.setopt(pycurl.WRITEFUNCTION, data.write)
300 c.setopt(pycurl.HEADERFUNCTION, response_header_data.write)
301 c.setopt(pycurl.TIMEOUT, off)
302 c.setopt(pycurl.URL, openid.urinorm.urinorm(url))
303
304 c.perform()
305
306 response_headers = self._parseHeaders(response_header_data)
307 code = c.getinfo(pycurl.RESPONSE_CODE)
308 if code in [301, 302, 303, 307]:
309 url = response_headers.get('location')
310 if url is None:
311 raise HTTPError(
312 'Redirect (%s) returned without a location' % code)
313
314
315 c.setopt(pycurl.POST, 0)
316
317
318
319 else:
320 resp = HTTPResponse()
321 resp.headers = response_headers
322 resp.status = code
323 resp.final_url = url
324 resp.body = data.getvalue()
325 return resp
326
327 off = stop - int(time.time())
328
329 raise HTTPError("Timed out fetching: %r" % (url,))
330 finally:
331 c.close()
332