forked from gevent/gevent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebproxy.py
executable file
·132 lines (114 loc) · 4.35 KB
/
webproxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python
"""A web application that retrieves other websites for you.
To start serving the application on port 8088, type
python webproxy.py
To start the server on some other interface/port, use
python -m gevent.wsgi -p 8000 -i 0.0.0.0 webproxy.py
"""
from __future__ import print_function
from gevent import monkey; monkey.patch_all()
import sys
import re
import traceback
from cgi import escape
try:
import urllib2
from urlparse import urlparse
from urllib import unquote
except ImportError:
from urllib import request as urllib2
from urllib.parse import urlparse
from urllib.parse import unquote
LISTEN = ":8088"
def application(env, start_response):
proxy_url = 'http://%s/' % env['HTTP_HOST']
method = env['REQUEST_METHOD']
path = env['PATH_INFO']
if env['QUERY_STRING']:
path += '?' + env['QUERY_STRING']
path = path.lstrip('/')
if (method, path) == ('GET', ''):
start_response('200 OK', [('Content-Type', 'text/html')])
return [FORM]
elif method == 'GET':
return proxy(path, start_response, proxy_url)
elif (method, path) == ('POST', ''):
key, value = env['wsgi.input'].read().strip().split('=')
assert key == 'url', repr(key)
start_response('302 Found', [('Location', join(proxy_url, unquote(value)))])
elif method == 'POST':
start_response('404 Not Found', [])
else:
start_response('501 Not Implemented', [])
return []
def proxy(path, start_response, proxy_url):
if '://' not in path:
path = 'http://' + path
try:
try:
response = urllib2.urlopen(path)
except urllib2.HTTPError as ex:
response = ex
print('%s: %s %s' % (path, response.code, response.msg))
headers = [(k, v) for (k, v) in response.headers.items() if k not in drop_headers]
scheme, netloc, path, params, query, fragment = urlparse(path)
host = (scheme or 'http') + '://' + netloc
except Exception as ex:
sys.stderr.write('error while reading %s:\n' % path)
traceback.print_exc()
tb = traceback.format_exc()
start_response('502 Bad Gateway', [('Content-Type', 'text/html')])
error_str = escape(str(ex) or ex.__class__.__name__ or 'Error')
return ['<h1>%s</h1><h2>%s</h2><pre>%s</pre>' % (error_str, escape(path), escape(tb))]
else:
start_response('%s %s' % (response.code, response.msg), headers)
data = response.read()
data = fix_links(data, proxy_url, host)
return [data]
def join(url1, *rest):
if not rest:
return url1
url2, rest = rest[0], rest[1:]
if url1.endswith('/'):
if url2.startswith('/'):
return join(url1 + url2[1:], *rest)
else:
return join(url1 + url2, *rest)
elif url2.startswith('/'):
return join(url1 + url2, *rest)
else:
return join(url1 + '/' + url2, *rest)
def fix_links(data, proxy_url, host_url):
"""
>>> fix_links("><img src=images/hp0.gif width=158", 'http://127.0.0.1:8088', 'www.google.com')
'><img src="http://127.0.0.1:8088/www.google.com/images/hp0.gif" width=158'
"""
def fix_link_cb(m):
url = m.group('url')
if '://' in url:
result = m.group('before') + '"' + join(proxy_url, url) + '"'
else:
result = m.group('before') + '"' + join(proxy_url, host_url, url) + '"'
#print('replaced %r -> %r' % (m.group(0), result))
return result
data = _link_re_1.sub(fix_link_cb, data)
data = _link_re_2.sub(fix_link_cb, data)
return data
_link_re_1 = re.compile('''(?P<before>(href|src|action)\s*=\s*)(?P<quote>['"])(?P<url>[^#].*?)(?P=quote)''')
_link_re_2 = re.compile('''(?P<before>(href|src|action)\s*=\s*)(?P<url>[^'"#>][^ >]*)''')
drop_headers = ['transfer-encoding', 'set-cookie']
FORM = """<html><head>
<title>Web Proxy - gevent example</title></head><body>
<table width=60% height=100% align=center>
<tr height=30%><td align=center valign=bottom>Type in URL you want to visit and press Enter</td></tr>
<tr><td align=center valign=top>
<form action=/ method=post>
<input size=80 name=url value="http://www.gevent.org"/>
</form>
</td></tr>
</table></body></table>
"""
if __name__ == '__main__':
from gevent.pywsgi import WSGIServer
print('Serving on %s...' % LISTEN)
WSGIServer(LISTEN, application).serve_forever()