Skip to content

Commit

Permalink
title added
Browse files Browse the repository at this point in the history
  • Loading branch information
fagci committed Mar 30, 2021
1 parent cb7d0db commit 51ba7ad
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 8 deletions.
29 changes: 23 additions & 6 deletions fortune_http_unseen.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,34 @@
LOG_FILE = LOCAL_DIR / 'http_unseen.txt'
DISALLOW_RE = re.compile(
r'^User-agent:\s+\*$[\n\r]+^Disallow:\s+/$', re.IGNORECASE | re.MULTILINE)
TITLE_RE = re.compile(r'<title[^>]*>([^<]+)', re.IGNORECASE)
H1_RE = re.compile(r'<h1[^>]*>([^<]+)', re.IGNORECASE)


def check_host(ip, lock):
with HTTPConnection(ip, 80, timeout=1.5) as c:
response = c.get('/robots.txt')
if response.ok:
if DISALLOW_RE.findall(response.body):
with lock:
print(ip)
with LOG_FILE.open('a') as f:
f.write('%s\n' % ip)

if not response.ok:
return

if not DISALLOW_RE.findall(response.body):
return

# coz connection closes often after 1st request
with HTTPConnection(ip, 80) as c:
page = c.get('/').body

t_match = TITLE_RE.findall(page)
h_match = H1_RE.findall(page)

title = t_match[0] if t_match else h_match[0] if h_match else '-'
title = title.strip().replace('\n', ' ').replace('\r', '')

with lock:
print(ip, title)
with LOG_FILE.open('a') as f:
f.write('%s %s\n' % (ip, title))


def main(c=10_000_000, w=16):
Expand Down
10 changes: 8 additions & 2 deletions lib/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,14 @@ def get(self, url):

try:
connection.sendall(str(request).encode())
# TODO: get overall response
data = connection.recv(1024).decode(errors='ignore')
maxlen = 1024 * 1024 * 128
data_bytes = b''
while True:
d = connection.recv(1024)
if not d or len(data_bytes) > maxlen:
break
data_bytes += d
data = data_bytes.decode(errors='ignore')
if data.startswith('HTTP/'):
return Response(data)
except OSError:
Expand Down

0 comments on commit 51ba7ad

Please sign in to comment.