Skip to content

Commit

Permalink
support pyhttpx
Browse files Browse the repository at this point in the history
  • Loading branch information
conlin-huang committed Mar 28, 2023
1 parent ce56a4f commit e7f7c1a
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 2 deletions.
2 changes: 1 addition & 1 deletion aioscrapy/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.7
1.2.8
58 changes: 58 additions & 0 deletions aioscrapy/core/downloader/handlers/pyhttpx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import asyncio
import logging
from urllib.parse import urlparse

import pyhttpx

from aioscrapy import Request
from aioscrapy.core.downloader.handlers import BaseDownloadHandler
from aioscrapy.http import HtmlResponse
from aioscrapy.settings import Settings

logger = logging.getLogger(__name__)


class PyhttpxHandler(BaseDownloadHandler):

def __init__(self, settings):
self.settings: Settings = settings
self.pyhttpx_client_args: dict = self.settings.get('PYHTTPX_CLIENT_ARGS', {})
self.verify_ssl: bool = self.settings.get("VERIFY_SSL")
self.loop = asyncio.get_running_loop()

@classmethod
def from_settings(cls, settings: Settings):
return cls(settings)

async def download_request(self, request: Request, _) -> HtmlResponse:
kwargs = {
'timeout': self.settings.get('DOWNLOAD_TIMEOUT'),
'cookies': dict(request.cookies),
'data': request.body or None,
'verify': self.verify_ssl
}
headers = request.headers or self.settings.get('DEFAULT_REQUEST_HEADERS')
kwargs['headers'] = headers

proxy = request.meta.get("proxy")
if proxy:
parsed_url = urlparse(proxy)
kwargs["proxies"] = {'https': parsed_url.netloc.split('@')[-1]}
if parsed_url.password or parsed_url.username:
kwargs['proxy_auth'] = (parsed_url.username, parsed_url.password)
logger.debug(f"use proxy {proxy}: {request.url}")

session_args = self.pyhttpx_client_args.copy()
session = pyhttpx.HttpSession(**session_args)
response = await asyncio.to_thread(session.request, request.method, request.url, **kwargs)
return HtmlResponse(
'',
status=response.status_code,
headers=response.headers,
body=response.content,
cookies=dict(response.cookies),
encoding=response.encoding
)

async def close(self):
pass
55 changes: 55 additions & 0 deletions example/singlespider/demo_pyhttpx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import logging

from aioscrapy import Request
from aioscrapy.spiders import Spider
from aioscrapy.http import Response

logger = logging.getLogger(__name__)


class DemoPyhttpxSpider(Spider):
name = 'DemoPyhttpxSpider'

custom_settings = dict(
USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
# DOWNLOAD_DELAY=3,
# RANDOMIZE_DOWNLOAD_DELAY=True,
CONCURRENT_REQUESTS=1,
LOG_LEVEL='INFO',
CLOSE_SPIDER_ON_IDLE=True,
DOWNLOAD_HANDLERS={
'http': 'aioscrapy.core.downloader.handlers.pyhttpx.PyhttpxHandler',
'https': 'aioscrapy.core.downloader.handlers.pyhttpx.PyhttpxHandler',
},
PYHTTPX_CLIENT_ARGS=dict(
browser_type='chrome',
http2=True
)
)

start_urls = ['https://tls.peet.ws/api/all']

@staticmethod
async def process_request(request, spider):
""" request middleware """
pass

@staticmethod
async def process_response(request, response, spider):
""" response middleware """
return response

@staticmethod
async def process_exception(request, exception, spider):
""" exception middleware """
pass

async def parse(self, response: Response):
print(response.text)

async def process_item(self, item):
print(item)


if __name__ == '__main__':
DemoPyhttpxSpider.start()
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
extras_require = {
"all": [
"aiomysql>=0.1.1", "httpx[http2]>=0.23.0", "aio-pika>=8.1.1",
"cryptography", "motor>=3.1.1", "playwright>=1.31.1"
"cryptography", "motor>=3.1.1", "playwright>=1.31.1", "pyhttpx>=2.10.1"
],
"aiomysql": ["aiomysql>=0.1.1", "cryptography"],
"httpx": ["httpx[http2]>=0.23.0"],
"aio-pika": ["aio-pika>=8.1.1"],
"mongo": ["motor>=3.1.1"],
"playwright": ["playwright>=1.31.1"],
"pyhttpx": ["pyhttpx>=2.10.1"]
}

setup(
Expand Down

0 comments on commit e7f7c1a

Please sign in to comment.