firebot/webretriever.py

83 lines
2.3 KiB
Python
Executable File

#! /usr/bin/env python
import asynchat
import adns
import urlparse
import socket
resolver = adns.init()
proxy = None
class WebRetriever(asynchat.async_chat):
def __init__(self, url, body_cb):
asynchat.async_chat.__init__(self)
self.body_cb = body_cb
if proxy:
self.host, self.port = proxy
self.query = ''
self.fragment = ''
self.path = url
else:
(self.scheme,
self.netloc,
self.path,
self.query,
self.fragment) = urlparse.urlsplit(url)
assert self.scheme == 'http'
try:
self.host, port = self.netloc.split(':')
self.port = int(port)
except ValueError:
self.host = self.netloc
self.port = 80
self.set_terminator('\n')
self.in_headers = True
self.inbuf = ''
self.body = []
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
self.dnsq = resolver.submit(self.host, adns.rr.A)
self.resolved = False
def readable(self):
if not self.resolved:
try:
self.resolved = self.dnsq.check()
self.connect((self.resolved[3][0], self.port))
except adns.NotReady:
return False
return asynchat.async_chat.readable(self)
def writable(self):
return self.resolved and asynchat.async_chat.writable(self)
def collect_incoming_data(self, data):
self.inbuf += data
def handle_connect(self):
path = urlparse.urlunsplit((None, None, self.path, self.query, self.fragment))
self.push('GET %s HTTP/1.0\r\n' % path)
self.push('Host: %s\r\n' % self.host)
self.push('\r\n')
def found_terminator(self):
data, self.inbuf = self.inbuf, ''
if self.in_headers:
if not data.strip():
self.in_headers = False
else:
self.body.append(data + self.get_terminator())
def handle_close(self):
asynchat.async_chat.close(self)
self.body_cb(self.body)
if __name__ == '__main__':
import asyncore
def p(data):
print ''.join(data)
e = WebRetriever('http://quote.yahoo.com/d/quotes.csv?s=wgrd&f=sl1d1t1c1ohgvj1pp2owern&e=.csv', p)
asyncore.loop()