修改_make_response(),未完成

This commit is contained in:
g1879 2022-04-10 19:13:57 +08:00
parent a89c6941c1
commit b39470f922
4 changed files with 86 additions and 121 deletions

View File

@ -350,11 +350,3 @@ class BasePage(BaseParser):
retry: int = None,
interval: float = None):
pass
@abstractmethod
def _try_to_connect(self,
to_url: str,
times: int = 0,
interval: float = 1,
show_errmsg: bool = False, ):
pass

View File

@ -9,6 +9,7 @@ from typing import Union
from requests import Session
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
from selenium import webdriver
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
from selenium.webdriver.chrome.options import Options
@ -316,12 +317,13 @@ class Drission(object):
if self._session is None:
self._session = Session()
attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
if 'headers' in data:
self._session.headers = CaseInsensitiveDict(data['headers'])
if 'cookies' in data:
self.set_cookies(data['cookies'], set_session=True)
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
for i in attrs:
if i in data:
self._session.__setattr__(i, data[i])

View File

@ -204,26 +204,26 @@ class MixPage(SessionPage, DriverPage, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).get_cookies(as_dict)
def _try_to_connect(self,
to_url: str,
times: int = 0,
interval: float = 1,
mode: str = 'get',
data: dict = None,
show_errmsg: bool = False,
**kwargs):
"""尝试连接,重试若干次 \n
:param to_url: 要访问的url
:param times: 重试次数
:param interval: 重试间隔
:param show_errmsg: 是否抛出异常
:param kwargs: 连接参数
:return: s模式为Response对象d模式为bool或None
"""
if self._mode == 'd':
return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
elif self._mode == 's':
return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
# def _try_to_connect(self,
# to_url: str,
# times: int = 0,
# interval: float = 1,
# mode: str = 'get',
# data: dict = None,
# show_errmsg: bool = False,
# **kwargs):
# """尝试连接,重试若干次 \n
# :param to_url: 要访问的url
# :param times: 重试次数
# :param interval: 重试间隔(秒)
# :param show_errmsg: 是否抛出异常
# :param kwargs: 连接参数
# :return: s模式为Response对象d模式为bool或None
# """
# if self._mode == 'd':
# return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
# elif self._mode == 's':
# return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
# ----------------MixPage独有属性和方法-----------------------
@property
@ -336,7 +336,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
# 使用requests访问url并判断可用性
if by_requests:
self.cookies_to_session()
r = self._make_response(self.url, **{'timeout': 3})[0]
r = self._make_response(self.url)[0]
return r.ok if r else False
def close_driver(self) -> None:

View File

@ -163,46 +163,46 @@ class SessionPage(BasePage):
else:
return [_cookie_to_dict(cookie) for cookie in cookies]
def _try_to_connect(self,
to_url: str,
times: int = 0,
interval: float = 1,
mode: str = 'get',
data: Union[dict, str] = None,
show_errmsg: bool = False,
**kwargs) -> Union[Response, None]:
"""尝试连接,重试若干次 \n
:param to_url: 要访问的url
:param times: 重试次数
:param interval: 重试间隔
:param mode: 连接方式'get' 'post'
:param data: post方式提交的数据
:param show_errmsg: 是否抛出异常
:param kwargs: 连接参数
:return: HTMLResponse对象
"""
err = None
r = None
for _ in range(times + 1):
try:
r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
except Exception as e:
err = e
r = None
if r and (r.content != b'' or r.status_code in (403, 404)):
break
if _ < times:
sleep(interval)
if show_errmsg:
print(f'重试 {to_url}')
if not r and show_errmsg:
raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
return r
# def _try_to_connect(self,
# to_url: str,
# times: int = 0,
# interval: float = 1,
# mode: str = 'get',
# data: Union[dict, str] = None,
# show_errmsg: bool = False,
# **kwargs) -> Union[Response, None]:
# """尝试连接,重试若干次 \n
# :param to_url: 要访问的url
# :param times: 重试次数
# :param interval: 重试间隔(秒)
# :param mode: 连接方式,'get' 或 'post'
# :param data: post方式提交的数据
# :param show_errmsg: 是否抛出异常
# :param kwargs: 连接参数
# :return: HTMLResponse对象
# """
# err = None
# r = None
#
# for _ in range(times + 1):
# try:
# r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
# except Exception as e:
# err = e
# r = None
#
# if r and (r.content != b'' or r.status_code in (403, 404)):
# break
#
# if _ < times:
# sleep(interval)
# if show_errmsg:
# print(f'重试 {to_url}')
#
# if not r and show_errmsg:
# raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
#
# return r
# ----------------session独有属性和方法-----------------------
@property
@ -286,8 +286,9 @@ class SessionPage(BasePage):
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
# 设置referer和host值
hostname = urlparse(url).hostname
scheme = urlparse(url).scheme
parsed_url = urlparse(url)
hostname = parsed_url.hostname
scheme = parsed_url.scheme
if not _check_headers(kwargs, self.session.headers, 'Referer'):
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
if 'Host' not in kwargs['headers']:
@ -300,71 +301,41 @@ class SessionPage(BasePage):
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
for i in range(retry + 1):
e = None
try:
if mode == 'get':
r = self.session.get(url, **kwargs)
elif mode == 'post':
r = self.session.post(url, data=data, **kwargs)
raise ConnectionError
print(r.url)
if r:
print(r.request.headers)
e = 'Success'
r = _set_charset(r)
return r, e
return _set_charset(r), 'Success'
except Exception as e:
if show_errmsg:
raise e
pass
# if r and (r.content != b'' or r.status_code in (403, 404)):
# break
if i < retry:
sleep(interval)
if show_errmsg:
print(f'重试 {url}')
if r is None:
return None, '连接失败'
if show_errmsg:
if e:
raise e
else:
raise ConnectionError('连接失败')
return None, '连接失败' if e is None else e
if not r.ok:
if show_errmsg:
raise ConnectionError(f'状态码:{r.status_code}')
return r, f'状态码:{r.status_code}'
# try:
# r = None
# if mode == 'get':
# r = self.session.get(url, **kwargs)
# elif mode == 'post':
# r = self.session.post(url, data=data, **kwargs)
#
# if r is None:
# return None, '连接失败'
#
# except Exception as e:
# if show_errmsg:
# raise e
#
# return None, e
#
# else:
# # ----------------获取并设置编码开始-----------------
# # 在headers中获取编码
# content_type = r.headers.get('content-type', '').lower()
# charset = search(r'charset[=: ]*(.*)?[;]', content_type)
#
# if charset:
# r.encoding = charset.group(1)
#
# # 在headers中获取不到编码且如果是网页
# elif content_type.replace(' ', '').startswith('text/html'):
# re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
#
# if re_result:
# charset = re_result.group(1).decode()
# else:
# charset = r.apparent_encoding
#
# r.encoding = charset
# # ----------------获取并设置编码结束-----------------
#
# return r, 'Success'
def _check_headers(kwargs, headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool:
"""检查kwargs或headers中是否有arg所示属性"""