From 0dcfcf5cd8232472e6b50b8536ebc73a664fe56a Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 1 Sep 2020 22:44:34 +0800 Subject: [PATCH] =?UTF-8?q?get()=E5=A2=9E=E5=8A=A0=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 40 ++++++++++++++++++++++++++++++------ DrissionPage/mix_page.py | 14 ++++++++++--- DrissionPage/session_page.py | 30 +++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index ee8b7f0..4be52e0 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -6,7 +6,7 @@ """ from glob import glob from pathlib import Path -from time import time +from time import time, sleep from typing import Union, List, Any from urllib.parse import quote @@ -60,21 +60,49 @@ class DriverPage(object): """返回网页title""" return self.driver.title - def get(self, url: str, go_anyway: bool = False, show_errmsg: bool = False) -> Union[None, bool]: + def _try_to_get(self, + to_url: str, + times: int = 0, + interval: float = 1, + show_errmsg: bool = False, ): + """ + :param to_url: 要访问的url + :param times: 重试次数 + :param interval: 重试间隔(秒) + :param show_errmsg: 是否抛出异常 + :return: + """ + self.driver.get(to_url) + is_ok = self.check_page() + while times and is_ok is False: + sleep(interval) + self.driver.get(to_url) + is_ok = self.check_page() + times -= 1 + if is_ok is False and show_errmsg: + raise ConnectionError('Connect error.') + return is_ok + + def get(self, + url: str, + go_anyway: bool = False, + show_errmsg: bool = False, + retry: int = 0, + interval: float = 1, + ) -> Union[None, bool]: """访问url \n :param url: 目标url :param go_anyway: 若目标url与当前url一致,是否强制跳转 :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) :return: 目标url是否可用 """ to_url = quote(url, safe='/:&?=%;#@') if not url or (not go_anyway and self.url == to_url): return self._url = to_url - self.driver.get(to_url) - self._url_available = self.check_page() - if self._url_available is False and show_errmsg: - raise ConnectionError('Connect error.') + self._url_available = self._try_to_get(to_url, times=retry, interval=interval, show_errmsg=show_errmsg) return self._url_available def ele(self, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index b6cab5e..3153b56 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -236,19 +236,27 @@ class MixPage(Null, SessionPage, DriverPage): # ----------------以下为共用函数----------------------- - def get(self, url: str, go_anyway=False, show_errmsg: bool = False, **kwargs) -> Union[bool, None]: + def get(self, + url: str, + go_anyway=False, + show_errmsg: bool = False, + retry: int = 0, + interval: float = 1, + **kwargs) -> Union[bool, None]: """跳转到一个url \n 跳转前先同步cookies,跳转后判断目标url是否可用 :param url: 目标url :param go_anyway: 若目标url与当前url一致,是否强制跳转 :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) :param kwargs: 连接参数,s模式专用 :return: url是否可用 """ if self._mode == 'd': - return super(SessionPage, self).get(url, go_anyway, show_errmsg) + return super(SessionPage, self).get(url, go_anyway, show_errmsg, retry, interval) elif self._mode == 's': - return super().get(url, go_anyway, show_errmsg, **kwargs) + return super().get(url, go_anyway, show_errmsg, retry, interval, **kwargs) def ele(self, loc_or_ele: Union[tuple, str, DriverElement, SessionElement, Element, WebElement], diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index aa20cc9..1002f7c 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -9,7 +9,7 @@ from pathlib import Path from random import randint from re import search as re_SEARCH from re import sub as re_SUB -from time import time +from time import time, sleep from typing import Union, List from urllib.parse import urlparse, quote @@ -142,15 +142,41 @@ class SessionPage(object): raise TypeError('Type of loc_or_str can only be tuple or str.') return self.ele(loc_or_str, mode='all', show_errmsg=True) + def _try_to_get(self, + to_url: str, + times: int = 0, + interval: float = 1, + show_errmsg: bool = False, + **kwargs) -> HTMLResponse: + """尝试连接,重试若干次 + :param to_url: 要访问的url + :param times: 重试次数 + :param interval: 重试间隔(秒) + :param show_errmsg: 是否抛出异常 + :param kwargs: 连接参数 + :return: HTMLResponse对象 + """ + r = self._make_response(to_url, show_errmsg=show_errmsg, **kwargs)[0] + while times and (not r or r.content == b''): + print('重试', to_url) + sleep(interval) + r = self._make_response(to_url, show_errmsg=show_errmsg, **kwargs)[0] + times -= 1 + return r + def get(self, url: str, go_anyway: bool = False, show_errmsg: bool = False, + retry: int = 0, + interval: float = 1, **kwargs) -> Union[bool, None]: """用get方式跳转到url \n :param url: 目标url :param go_anyway: 若目标url与当前url一致,是否强制跳转 :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) :param kwargs: 连接参数 :return: url是否可用 """ @@ -158,7 +184,7 @@ class SessionPage(object): if not url or (not go_anyway and self.url == to_url): return self._url = to_url - self._response = self._make_response(to_url, show_errmsg=show_errmsg, **kwargs)[0] + self._response = self._try_to_get(to_url, times=retry, interval=interval, show_errmsg=show_errmsg, **kwargs) if self._response is None: self._url_available = False else: