mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修改_make_response(),未完成
This commit is contained in:
parent
a89c6941c1
commit
b39470f922
@ -350,11 +350,3 @@ class BasePage(BaseParser):
|
||||
retry: int = None,
|
||||
interval: float = None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
interval: float = 1,
|
||||
show_errmsg: bool = False, ):
|
||||
pass
|
||||
|
@ -9,6 +9,7 @@ from typing import Union
|
||||
|
||||
from requests import Session
|
||||
from requests.cookies import RequestsCookieJar
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
@ -316,12 +317,13 @@ class Drission(object):
|
||||
if self._session is None:
|
||||
self._session = Session()
|
||||
|
||||
attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify',
|
||||
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
|
||||
|
||||
if 'headers' in data:
|
||||
self._session.headers = CaseInsensitiveDict(data['headers'])
|
||||
if 'cookies' in data:
|
||||
self.set_cookies(data['cookies'], set_session=True)
|
||||
|
||||
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify',
|
||||
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
|
||||
for i in attrs:
|
||||
if i in data:
|
||||
self._session.__setattr__(i, data[i])
|
||||
|
@ -204,26 +204,26 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).get_cookies(as_dict)
|
||||
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
interval: float = 1,
|
||||
mode: str = 'get',
|
||||
data: dict = None,
|
||||
show_errmsg: bool = False,
|
||||
**kwargs):
|
||||
"""尝试连接,重试若干次 \n
|
||||
:param to_url: 要访问的url
|
||||
:param times: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param show_errmsg: 是否抛出异常
|
||||
:param kwargs: 连接参数
|
||||
:return: s模式为Response对象,d模式为bool或None
|
||||
"""
|
||||
if self._mode == 'd':
|
||||
return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
|
||||
elif self._mode == 's':
|
||||
return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
|
||||
# def _try_to_connect(self,
|
||||
# to_url: str,
|
||||
# times: int = 0,
|
||||
# interval: float = 1,
|
||||
# mode: str = 'get',
|
||||
# data: dict = None,
|
||||
# show_errmsg: bool = False,
|
||||
# **kwargs):
|
||||
# """尝试连接,重试若干次 \n
|
||||
# :param to_url: 要访问的url
|
||||
# :param times: 重试次数
|
||||
# :param interval: 重试间隔(秒)
|
||||
# :param show_errmsg: 是否抛出异常
|
||||
# :param kwargs: 连接参数
|
||||
# :return: s模式为Response对象,d模式为bool或None
|
||||
# """
|
||||
# if self._mode == 'd':
|
||||
# return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
|
||||
# elif self._mode == 's':
|
||||
# return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
|
||||
|
||||
# ----------------MixPage独有属性和方法-----------------------
|
||||
@property
|
||||
@ -336,7 +336,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
# 使用requests访问url并判断可用性
|
||||
if by_requests:
|
||||
self.cookies_to_session()
|
||||
r = self._make_response(self.url, **{'timeout': 3})[0]
|
||||
r = self._make_response(self.url)[0]
|
||||
return r.ok if r else False
|
||||
|
||||
def close_driver(self) -> None:
|
||||
|
@ -163,46 +163,46 @@ class SessionPage(BasePage):
|
||||
else:
|
||||
return [_cookie_to_dict(cookie) for cookie in cookies]
|
||||
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
interval: float = 1,
|
||||
mode: str = 'get',
|
||||
data: Union[dict, str] = None,
|
||||
show_errmsg: bool = False,
|
||||
**kwargs) -> Union[Response, None]:
|
||||
"""尝试连接,重试若干次 \n
|
||||
:param to_url: 要访问的url
|
||||
:param times: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param mode: 连接方式,'get' 或 'post'
|
||||
:param data: post方式提交的数据
|
||||
:param show_errmsg: 是否抛出异常
|
||||
:param kwargs: 连接参数
|
||||
:return: HTMLResponse对象
|
||||
"""
|
||||
err = None
|
||||
r = None
|
||||
|
||||
for _ in range(times + 1):
|
||||
try:
|
||||
r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
|
||||
except Exception as e:
|
||||
err = e
|
||||
r = None
|
||||
|
||||
if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||
break
|
||||
|
||||
if _ < times:
|
||||
sleep(interval)
|
||||
if show_errmsg:
|
||||
print(f'重试 {to_url}')
|
||||
|
||||
if not r and show_errmsg:
|
||||
raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
|
||||
|
||||
return r
|
||||
# def _try_to_connect(self,
|
||||
# to_url: str,
|
||||
# times: int = 0,
|
||||
# interval: float = 1,
|
||||
# mode: str = 'get',
|
||||
# data: Union[dict, str] = None,
|
||||
# show_errmsg: bool = False,
|
||||
# **kwargs) -> Union[Response, None]:
|
||||
# """尝试连接,重试若干次 \n
|
||||
# :param to_url: 要访问的url
|
||||
# :param times: 重试次数
|
||||
# :param interval: 重试间隔(秒)
|
||||
# :param mode: 连接方式,'get' 或 'post'
|
||||
# :param data: post方式提交的数据
|
||||
# :param show_errmsg: 是否抛出异常
|
||||
# :param kwargs: 连接参数
|
||||
# :return: HTMLResponse对象
|
||||
# """
|
||||
# err = None
|
||||
# r = None
|
||||
#
|
||||
# for _ in range(times + 1):
|
||||
# try:
|
||||
# r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
|
||||
# except Exception as e:
|
||||
# err = e
|
||||
# r = None
|
||||
#
|
||||
# if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||
# break
|
||||
#
|
||||
# if _ < times:
|
||||
# sleep(interval)
|
||||
# if show_errmsg:
|
||||
# print(f'重试 {to_url}')
|
||||
#
|
||||
# if not r and show_errmsg:
|
||||
# raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
|
||||
#
|
||||
# return r
|
||||
|
||||
# ----------------session独有属性和方法-----------------------
|
||||
@property
|
||||
@ -286,8 +286,9 @@ class SessionPage(BasePage):
|
||||
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
|
||||
|
||||
# 设置referer和host值
|
||||
hostname = urlparse(url).hostname
|
||||
scheme = urlparse(url).scheme
|
||||
parsed_url = urlparse(url)
|
||||
hostname = parsed_url.hostname
|
||||
scheme = parsed_url.scheme
|
||||
if not _check_headers(kwargs, self.session.headers, 'Referer'):
|
||||
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
|
||||
if 'Host' not in kwargs['headers']:
|
||||
@ -300,71 +301,41 @@ class SessionPage(BasePage):
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
for i in range(retry + 1):
|
||||
e = None
|
||||
try:
|
||||
if mode == 'get':
|
||||
r = self.session.get(url, **kwargs)
|
||||
elif mode == 'post':
|
||||
r = self.session.post(url, data=data, **kwargs)
|
||||
raise ConnectionError
|
||||
|
||||
print(r.url)
|
||||
if r:
|
||||
print(r.request.headers)
|
||||
e = 'Success'
|
||||
r = _set_charset(r)
|
||||
return r, e
|
||||
return _set_charset(r), 'Success'
|
||||
|
||||
except Exception as e:
|
||||
if show_errmsg:
|
||||
raise e
|
||||
pass
|
||||
|
||||
# if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||
# break
|
||||
|
||||
if i < retry:
|
||||
sleep(interval)
|
||||
if show_errmsg:
|
||||
print(f'重试 {url}')
|
||||
|
||||
if r is None:
|
||||
return None, '连接失败'
|
||||
if show_errmsg:
|
||||
if e:
|
||||
raise e
|
||||
else:
|
||||
raise ConnectionError('连接失败')
|
||||
return None, '连接失败' if e is None else e
|
||||
|
||||
if not r.ok:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'状态码:{r.status_code}')
|
||||
return r, f'状态码:{r.status_code}'
|
||||
|
||||
# try:
|
||||
# r = None
|
||||
# if mode == 'get':
|
||||
# r = self.session.get(url, **kwargs)
|
||||
# elif mode == 'post':
|
||||
# r = self.session.post(url, data=data, **kwargs)
|
||||
#
|
||||
# if r is None:
|
||||
# return None, '连接失败'
|
||||
#
|
||||
# except Exception as e:
|
||||
# if show_errmsg:
|
||||
# raise e
|
||||
#
|
||||
# return None, e
|
||||
#
|
||||
# else:
|
||||
# # ----------------获取并设置编码开始-----------------
|
||||
# # 在headers中获取编码
|
||||
# content_type = r.headers.get('content-type', '').lower()
|
||||
# charset = search(r'charset[=: ]*(.*)?[;]', content_type)
|
||||
#
|
||||
# if charset:
|
||||
# r.encoding = charset.group(1)
|
||||
#
|
||||
# # 在headers中获取不到编码,且如果是网页
|
||||
# elif content_type.replace(' ', '').startswith('text/html'):
|
||||
# re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
|
||||
#
|
||||
# if re_result:
|
||||
# charset = re_result.group(1).decode()
|
||||
# else:
|
||||
# charset = r.apparent_encoding
|
||||
#
|
||||
# r.encoding = charset
|
||||
# # ----------------获取并设置编码结束-----------------
|
||||
#
|
||||
# return r, 'Success'
|
||||
|
||||
|
||||
def _check_headers(kwargs, headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool:
|
||||
"""检查kwargs或headers中是否有arg所示属性"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user