mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修改_make_response(),未完成
This commit is contained in:
parent
a89c6941c1
commit
b39470f922
@ -350,11 +350,3 @@ class BasePage(BaseParser):
|
|||||||
retry: int = None,
|
retry: int = None,
|
||||||
interval: float = None):
|
interval: float = None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def _try_to_connect(self,
|
|
||||||
to_url: str,
|
|
||||||
times: int = 0,
|
|
||||||
interval: float = 1,
|
|
||||||
show_errmsg: bool = False, ):
|
|
||||||
pass
|
|
||||||
|
@ -9,6 +9,7 @@ from typing import Union
|
|||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
|
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
@ -316,12 +317,13 @@ class Drission(object):
|
|||||||
if self._session is None:
|
if self._session is None:
|
||||||
self._session = Session()
|
self._session = Session()
|
||||||
|
|
||||||
attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify',
|
if 'headers' in data:
|
||||||
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
|
self._session.headers = CaseInsensitiveDict(data['headers'])
|
||||||
|
|
||||||
if 'cookies' in data:
|
if 'cookies' in data:
|
||||||
self.set_cookies(data['cookies'], set_session=True)
|
self.set_cookies(data['cookies'], set_session=True)
|
||||||
|
|
||||||
|
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify',
|
||||||
|
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
|
||||||
for i in attrs:
|
for i in attrs:
|
||||||
if i in data:
|
if i in data:
|
||||||
self._session.__setattr__(i, data[i])
|
self._session.__setattr__(i, data[i])
|
||||||
|
@ -204,26 +204,26 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
|||||||
elif self._mode == 'd':
|
elif self._mode == 'd':
|
||||||
return super(SessionPage, self).get_cookies(as_dict)
|
return super(SessionPage, self).get_cookies(as_dict)
|
||||||
|
|
||||||
def _try_to_connect(self,
|
# def _try_to_connect(self,
|
||||||
to_url: str,
|
# to_url: str,
|
||||||
times: int = 0,
|
# times: int = 0,
|
||||||
interval: float = 1,
|
# interval: float = 1,
|
||||||
mode: str = 'get',
|
# mode: str = 'get',
|
||||||
data: dict = None,
|
# data: dict = None,
|
||||||
show_errmsg: bool = False,
|
# show_errmsg: bool = False,
|
||||||
**kwargs):
|
# **kwargs):
|
||||||
"""尝试连接,重试若干次 \n
|
# """尝试连接,重试若干次 \n
|
||||||
:param to_url: 要访问的url
|
# :param to_url: 要访问的url
|
||||||
:param times: 重试次数
|
# :param times: 重试次数
|
||||||
:param interval: 重试间隔(秒)
|
# :param interval: 重试间隔(秒)
|
||||||
:param show_errmsg: 是否抛出异常
|
# :param show_errmsg: 是否抛出异常
|
||||||
:param kwargs: 连接参数
|
# :param kwargs: 连接参数
|
||||||
:return: s模式为Response对象,d模式为bool或None
|
# :return: s模式为Response对象,d模式为bool或None
|
||||||
"""
|
# """
|
||||||
if self._mode == 'd':
|
# if self._mode == 'd':
|
||||||
return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
|
# return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg)
|
||||||
elif self._mode == 's':
|
# elif self._mode == 's':
|
||||||
return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
|
# return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs)
|
||||||
|
|
||||||
# ----------------MixPage独有属性和方法-----------------------
|
# ----------------MixPage独有属性和方法-----------------------
|
||||||
@property
|
@property
|
||||||
@ -336,7 +336,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
|||||||
# 使用requests访问url并判断可用性
|
# 使用requests访问url并判断可用性
|
||||||
if by_requests:
|
if by_requests:
|
||||||
self.cookies_to_session()
|
self.cookies_to_session()
|
||||||
r = self._make_response(self.url, **{'timeout': 3})[0]
|
r = self._make_response(self.url)[0]
|
||||||
return r.ok if r else False
|
return r.ok if r else False
|
||||||
|
|
||||||
def close_driver(self) -> None:
|
def close_driver(self) -> None:
|
||||||
|
@ -163,46 +163,46 @@ class SessionPage(BasePage):
|
|||||||
else:
|
else:
|
||||||
return [_cookie_to_dict(cookie) for cookie in cookies]
|
return [_cookie_to_dict(cookie) for cookie in cookies]
|
||||||
|
|
||||||
def _try_to_connect(self,
|
# def _try_to_connect(self,
|
||||||
to_url: str,
|
# to_url: str,
|
||||||
times: int = 0,
|
# times: int = 0,
|
||||||
interval: float = 1,
|
# interval: float = 1,
|
||||||
mode: str = 'get',
|
# mode: str = 'get',
|
||||||
data: Union[dict, str] = None,
|
# data: Union[dict, str] = None,
|
||||||
show_errmsg: bool = False,
|
# show_errmsg: bool = False,
|
||||||
**kwargs) -> Union[Response, None]:
|
# **kwargs) -> Union[Response, None]:
|
||||||
"""尝试连接,重试若干次 \n
|
# """尝试连接,重试若干次 \n
|
||||||
:param to_url: 要访问的url
|
# :param to_url: 要访问的url
|
||||||
:param times: 重试次数
|
# :param times: 重试次数
|
||||||
:param interval: 重试间隔(秒)
|
# :param interval: 重试间隔(秒)
|
||||||
:param mode: 连接方式,'get' 或 'post'
|
# :param mode: 连接方式,'get' 或 'post'
|
||||||
:param data: post方式提交的数据
|
# :param data: post方式提交的数据
|
||||||
:param show_errmsg: 是否抛出异常
|
# :param show_errmsg: 是否抛出异常
|
||||||
:param kwargs: 连接参数
|
# :param kwargs: 连接参数
|
||||||
:return: HTMLResponse对象
|
# :return: HTMLResponse对象
|
||||||
"""
|
# """
|
||||||
err = None
|
# err = None
|
||||||
r = None
|
# r = None
|
||||||
|
#
|
||||||
for _ in range(times + 1):
|
# for _ in range(times + 1):
|
||||||
try:
|
# try:
|
||||||
r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
|
# r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0]
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
err = e
|
# err = e
|
||||||
r = None
|
# r = None
|
||||||
|
#
|
||||||
if r and (r.content != b'' or r.status_code in (403, 404)):
|
# if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||||
break
|
# break
|
||||||
|
#
|
||||||
if _ < times:
|
# if _ < times:
|
||||||
sleep(interval)
|
# sleep(interval)
|
||||||
if show_errmsg:
|
# if show_errmsg:
|
||||||
print(f'重试 {to_url}')
|
# print(f'重试 {to_url}')
|
||||||
|
#
|
||||||
if not r and show_errmsg:
|
# if not r and show_errmsg:
|
||||||
raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
|
# raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}')
|
||||||
|
#
|
||||||
return r
|
# return r
|
||||||
|
|
||||||
# ----------------session独有属性和方法-----------------------
|
# ----------------session独有属性和方法-----------------------
|
||||||
@property
|
@property
|
||||||
@ -286,8 +286,9 @@ class SessionPage(BasePage):
|
|||||||
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
|
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
|
||||||
|
|
||||||
# 设置referer和host值
|
# 设置referer和host值
|
||||||
hostname = urlparse(url).hostname
|
parsed_url = urlparse(url)
|
||||||
scheme = urlparse(url).scheme
|
hostname = parsed_url.hostname
|
||||||
|
scheme = parsed_url.scheme
|
||||||
if not _check_headers(kwargs, self.session.headers, 'Referer'):
|
if not _check_headers(kwargs, self.session.headers, 'Referer'):
|
||||||
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
|
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
|
||||||
if 'Host' not in kwargs['headers']:
|
if 'Host' not in kwargs['headers']:
|
||||||
@ -300,71 +301,41 @@ class SessionPage(BasePage):
|
|||||||
retry = retry if retry is not None else self.retry_times
|
retry = retry if retry is not None else self.retry_times
|
||||||
interval = interval if interval is not None else self.retry_interval
|
interval = interval if interval is not None else self.retry_interval
|
||||||
for i in range(retry + 1):
|
for i in range(retry + 1):
|
||||||
|
e = None
|
||||||
try:
|
try:
|
||||||
if mode == 'get':
|
if mode == 'get':
|
||||||
r = self.session.get(url, **kwargs)
|
r = self.session.get(url, **kwargs)
|
||||||
elif mode == 'post':
|
elif mode == 'post':
|
||||||
r = self.session.post(url, data=data, **kwargs)
|
r = self.session.post(url, data=data, **kwargs)
|
||||||
|
raise ConnectionError
|
||||||
|
|
||||||
print(r.url)
|
|
||||||
if r:
|
if r:
|
||||||
print(r.request.headers)
|
return _set_charset(r), 'Success'
|
||||||
e = 'Success'
|
|
||||||
r = _set_charset(r)
|
|
||||||
return r, e
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if show_errmsg:
|
pass
|
||||||
raise e
|
|
||||||
|
# if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||||
|
# break
|
||||||
|
|
||||||
if i < retry:
|
if i < retry:
|
||||||
sleep(interval)
|
sleep(interval)
|
||||||
|
if show_errmsg:
|
||||||
|
print(f'重试 {url}')
|
||||||
|
|
||||||
if r is None:
|
if r is None:
|
||||||
return None, '连接失败'
|
if show_errmsg:
|
||||||
|
if e:
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
raise ConnectionError('连接失败')
|
||||||
|
return None, '连接失败' if e is None else e
|
||||||
|
|
||||||
if not r.ok:
|
if not r.ok:
|
||||||
|
if show_errmsg:
|
||||||
|
raise ConnectionError(f'状态码:{r.status_code}')
|
||||||
return r, f'状态码:{r.status_code}'
|
return r, f'状态码:{r.status_code}'
|
||||||
|
|
||||||
# try:
|
|
||||||
# r = None
|
|
||||||
# if mode == 'get':
|
|
||||||
# r = self.session.get(url, **kwargs)
|
|
||||||
# elif mode == 'post':
|
|
||||||
# r = self.session.post(url, data=data, **kwargs)
|
|
||||||
#
|
|
||||||
# if r is None:
|
|
||||||
# return None, '连接失败'
|
|
||||||
#
|
|
||||||
# except Exception as e:
|
|
||||||
# if show_errmsg:
|
|
||||||
# raise e
|
|
||||||
#
|
|
||||||
# return None, e
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# # ----------------获取并设置编码开始-----------------
|
|
||||||
# # 在headers中获取编码
|
|
||||||
# content_type = r.headers.get('content-type', '').lower()
|
|
||||||
# charset = search(r'charset[=: ]*(.*)?[;]', content_type)
|
|
||||||
#
|
|
||||||
# if charset:
|
|
||||||
# r.encoding = charset.group(1)
|
|
||||||
#
|
|
||||||
# # 在headers中获取不到编码,且如果是网页
|
|
||||||
# elif content_type.replace(' ', '').startswith('text/html'):
|
|
||||||
# re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
|
|
||||||
#
|
|
||||||
# if re_result:
|
|
||||||
# charset = re_result.group(1).decode()
|
|
||||||
# else:
|
|
||||||
# charset = r.apparent_encoding
|
|
||||||
#
|
|
||||||
# r.encoding = charset
|
|
||||||
# # ----------------获取并设置编码结束-----------------
|
|
||||||
#
|
|
||||||
# return r, 'Success'
|
|
||||||
|
|
||||||
|
|
||||||
def _check_headers(kwargs, headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool:
|
def _check_headers(kwargs, headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool:
|
||||||
"""检查kwargs或headers中是否有arg所示属性"""
|
"""检查kwargs或headers中是否有arg所示属性"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user