From b39470f9222e58308277a7db4d00a1ca169f7fff Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 10 Apr 2022 19:13:57 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=5Fmake=5Fresponse()=EF=BC=8C?= =?UTF-8?q?=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/base.py | 8 -- DrissionPage/drission.py | 8 +- DrissionPage/mix_page.py | 42 +++++----- DrissionPage/session_page.py | 149 ++++++++++++++--------------------- 4 files changed, 86 insertions(+), 121 deletions(-) diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 62dd09f..35026e9 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -350,11 +350,3 @@ class BasePage(BaseParser): retry: int = None, interval: float = None): pass - - @abstractmethod - def _try_to_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - show_errmsg: bool = False, ): - pass diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 14c4ec0..7aaae69 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -9,6 +9,7 @@ from typing import Union from requests import Session from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict from selenium import webdriver from selenium.common.exceptions import SessionNotCreatedException, WebDriverException from selenium.webdriver.chrome.options import Options @@ -316,12 +317,13 @@ class Drission(object): if self._session is None: self._session = Session() - attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' - + if 'headers' in data: + self._session.headers = CaseInsensitiveDict(data['headers']) if 'cookies' in data: self.set_cookies(data['cookies'], set_session=True) + attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' for i in attrs: if i in data: self._session.__setattr__(i, data[i]) diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index f25a845..cb9fb02 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -204,26 +204,26 @@ class MixPage(SessionPage, DriverPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).get_cookies(as_dict) - def _try_to_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - mode: str = 'get', - data: dict = None, - show_errmsg: bool = False, - **kwargs): - """尝试连接,重试若干次 \n - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param show_errmsg: 是否抛出异常 - :param kwargs: 连接参数 - :return: s模式为Response对象,d模式为bool或None - """ - if self._mode == 'd': - return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg) - elif self._mode == 's': - return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs) + # def _try_to_connect(self, + # to_url: str, + # times: int = 0, + # interval: float = 1, + # mode: str = 'get', + # data: dict = None, + # show_errmsg: bool = False, + # **kwargs): + # """尝试连接,重试若干次 \n + # :param to_url: 要访问的url + # :param times: 重试次数 + # :param interval: 重试间隔(秒) + # :param show_errmsg: 是否抛出异常 + # :param kwargs: 连接参数 + # :return: s模式为Response对象,d模式为bool或None + # """ + # if self._mode == 'd': + # return super(SessionPage, self)._try_to_connect(to_url, times, interval, show_errmsg) + # elif self._mode == 's': + # return super()._try_to_connect(to_url, times, interval, mode, data, show_errmsg, **kwargs) # ----------------MixPage独有属性和方法----------------------- @property @@ -336,7 +336,7 @@ class MixPage(SessionPage, DriverPage, BasePage): # 使用requests访问url并判断可用性 if by_requests: self.cookies_to_session() - r = self._make_response(self.url, **{'timeout': 3})[0] + r = self._make_response(self.url)[0] return r.ok if r else False def close_driver(self) -> None: diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index dc5d939..675fb4f 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -163,46 +163,46 @@ class SessionPage(BasePage): else: return [_cookie_to_dict(cookie) for cookie in cookies] - def _try_to_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - mode: str = 'get', - data: Union[dict, str] = None, - show_errmsg: bool = False, - **kwargs) -> Union[Response, None]: - """尝试连接,重试若干次 \n - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param mode: 连接方式,'get' 或 'post' - :param data: post方式提交的数据 - :param show_errmsg: 是否抛出异常 - :param kwargs: 连接参数 - :return: HTMLResponse对象 - """ - err = None - r = None - - for _ in range(times + 1): - try: - r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0] - except Exception as e: - err = e - r = None - - if r and (r.content != b'' or r.status_code in (403, 404)): - break - - if _ < times: - sleep(interval) - if show_errmsg: - print(f'重试 {to_url}') - - if not r and show_errmsg: - raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}') - - return r + # def _try_to_connect(self, + # to_url: str, + # times: int = 0, + # interval: float = 1, + # mode: str = 'get', + # data: Union[dict, str] = None, + # show_errmsg: bool = False, + # **kwargs) -> Union[Response, None]: + # """尝试连接,重试若干次 \n + # :param to_url: 要访问的url + # :param times: 重试次数 + # :param interval: 重试间隔(秒) + # :param mode: 连接方式,'get' 或 'post' + # :param data: post方式提交的数据 + # :param show_errmsg: 是否抛出异常 + # :param kwargs: 连接参数 + # :return: HTMLResponse对象 + # """ + # err = None + # r = None + # + # for _ in range(times + 1): + # try: + # r = self._make_response(to_url, mode=mode, data=data, show_errmsg=True, **kwargs)[0] + # except Exception as e: + # err = e + # r = None + # + # if r and (r.content != b'' or r.status_code in (403, 404)): + # break + # + # if _ < times: + # sleep(interval) + # if show_errmsg: + # print(f'重试 {to_url}') + # + # if not r and show_errmsg: + # raise err if err is not None else ConnectionError(f'连接异常。{r.status_code if r is not None else ""}') + # + # return r # ----------------session独有属性和方法----------------------- @property @@ -286,8 +286,9 @@ class SessionPage(BasePage): kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) # 设置referer和host值 - hostname = urlparse(url).hostname - scheme = urlparse(url).scheme + parsed_url = urlparse(url) + hostname = parsed_url.hostname + scheme = parsed_url.scheme if not _check_headers(kwargs, self.session.headers, 'Referer'): kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' if 'Host' not in kwargs['headers']: @@ -300,71 +301,41 @@ class SessionPage(BasePage): retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval for i in range(retry + 1): + e = None try: if mode == 'get': r = self.session.get(url, **kwargs) elif mode == 'post': r = self.session.post(url, data=data, **kwargs) + raise ConnectionError - print(r.url) if r: - print(r.request.headers) - e = 'Success' - r = _set_charset(r) - return r, e + return _set_charset(r), 'Success' except Exception as e: - if show_errmsg: - raise e + pass + + # if r and (r.content != b'' or r.status_code in (403, 404)): + # break if i < retry: sleep(interval) + if show_errmsg: + print(f'重试 {url}') if r is None: - return None, '连接失败' + if show_errmsg: + if e: + raise e + else: + raise ConnectionError('连接失败') + return None, '连接失败' if e is None else e if not r.ok: + if show_errmsg: + raise ConnectionError(f'状态码:{r.status_code}') return r, f'状态码:{r.status_code}' - # try: - # r = None - # if mode == 'get': - # r = self.session.get(url, **kwargs) - # elif mode == 'post': - # r = self.session.post(url, data=data, **kwargs) - # - # if r is None: - # return None, '连接失败' - # - # except Exception as e: - # if show_errmsg: - # raise e - # - # return None, e - # - # else: - # # ----------------获取并设置编码开始----------------- - # # 在headers中获取编码 - # content_type = r.headers.get('content-type', '').lower() - # charset = search(r'charset[=: ]*(.*)?[;]', content_type) - # - # if charset: - # r.encoding = charset.group(1) - # - # # 在headers中获取不到编码,且如果是网页 - # elif content_type.replace(' ', '').startswith('text/html'): - # re_result = search(b']+).*?>', r.content) - # - # if re_result: - # charset = re_result.group(1).decode() - # else: - # charset = r.apparent_encoding - # - # r.encoding = charset - # # ----------------获取并设置编码结束----------------- - # - # return r, 'Success' - def _check_headers(kwargs, headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool: """检查kwargs或headers中是否有arg所示属性"""