diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index 23ef08e..7568f90 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -5,7 +5,6 @@ """ from abc import abstractmethod from re import sub -from urllib.parse import quote from DownloadKit import DownloadKit @@ -421,18 +420,6 @@ class BasePage(BaseParser): self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path) return self._DownloadKit - def _before_connect(self, url, retry, interval): - """连接前的准备 - :param url: 要访问的url - :param retry: 重试次数 - :param interval: 重试间隔 - :return: 重试次数和间隔组成的tuple - """ - self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - return retry, interval - # ----------------以下属性或方法由后代实现---------------- @property def url(self): diff --git a/DrissionPage/_base/base.pyi b/DrissionPage/_base/base.pyi index 157909d..13175e1 100644 --- a/DrissionPage/_base/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -166,8 +166,6 @@ class BasePage(BaseParser): @property def download(self) -> DownloadKit: ... - def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... - # ----------------以下属性或方法由后代实现---------------- @property def url(self) -> str: ... diff --git a/DrissionPage/_commons/tools.py b/DrissionPage/_commons/tools.py index afcb270..0105b52 100644 --- a/DrissionPage/_commons/tools.py +++ b/DrissionPage/_commons/tools.py @@ -12,7 +12,8 @@ from time import perf_counter, sleep from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess from .._configs.options_manage import OptionsManager -from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError +from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError, + WrongURLError) def get_usable_path(path, is_file=True, parents=True): @@ -273,6 +274,8 @@ def raise_error(r): raise AlertExistsError elif error in ('Node does not have a layout object', 'Could not compute box model.'): raise NoRectError + elif error == 'Cannot navigate to invalid URL': + raise WrongURLError(f'无效的url:{r["args"]["url"]}。也许要加上"http://"?') elif r['type'] == 'call_method_error': raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}\n出现这个错误可能意味着程序有bug,' '请把错误信息和重现方法告知作者,谢谢。\n报告网站:https://gitee.com/g1879/DrissionPage/issues') diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index 5c0109e..bd19b87 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -195,10 +195,10 @@ class SessionElement(DrissionElement): return link else: # 其它情况直接返回绝对url - return make_absolute_link(link, self.page) + return make_absolute_link(link, self.page.url) elif attr == 'src': - return make_absolute_link(self.inner_ele.get('src'), self.page) + return make_absolute_link(self.inner_ele.get('src'), self.page.url) elif attr == 'text': return self.text diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index 832c9de..1bf2f13 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -5,9 +5,10 @@ """ from json import loads, JSONDecodeError from os.path import sep -from re import findall +from re import findall, match from threading import Thread from time import perf_counter, sleep +from urllib.parse import quote from .._base.base import BasePage from .._commons.locator import get_loc, is_loc @@ -895,6 +896,24 @@ class ChromiumBase(BasePage): pass return False + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') + if not url: + self._url = 'chrome://newtab/' + elif not match(r'.*?://', url): + self._url = f'http://{url}' + else: + self._url = url + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None): """尝试连接,重试若干次 :param to_url: 要访问的url diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index 3ba30f9..ebc0a84 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -231,6 +231,8 @@ class ChromiumBase(BasePage): def _on_alert_open(self, **kwargs): ... + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False, timeout: float = None) -> Union[bool, None]: ... diff --git a/DrissionPage/_pages/session_page.py b/DrissionPage/_pages/session_page.py index 7b5fe44..112db23 100644 --- a/DrissionPage/_pages/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -6,9 +6,9 @@ from pathlib import Path from re import search from time import sleep -from urllib.parse import urlparse +from urllib.parse import urlparse, quote -from requests import Session +from requests import Session, Response from requests.structures import CaseInsensitiveDict from tldextract import extract @@ -130,8 +130,8 @@ class SessionPage(BasePage): return self._set def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): - """用get方式跳转到url - :param url: 目标url + """用get方式跳转到url,可输入文件路径 + :param url: 目标url,可指定本地文件路径 :param show_errmsg: 是否显示和抛出异常 :param retry: 重试次数 :param interval: 重试间隔(秒) @@ -139,6 +139,17 @@ class SessionPage(BasePage): :param kwargs: 连接参数 :return: url是否可用 """ + if not url.lower().startswith('http'): + if url.startswith('file:///'): + url = url[8:] + if Path(url).exists(): + with open(url, 'rb') as f: + r = Response() + r._content = f.read() + r.status_code = 200 + self._response = r + return + retry, interval = self._before_connect(url, retry, interval) return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs) def ele(self, loc_or_ele, timeout=None): @@ -220,6 +231,7 @@ class SessionPage(BasePage): :param kwargs: 连接参数 :return: url是否可用 """ + retry, interval = self._before_connect(url, retry, interval) return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs) def close(self): @@ -228,6 +240,18 @@ class SessionPage(BasePage): if self._response is not None: self._response.close() + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): """执行get或post连接 :param url: 目标url diff --git a/DrissionPage/_pages/session_page.pyi b/DrissionPage/_pages/session_page.pyi index 7a214f1..a0e8775 100644 --- a/DrissionPage/_pages/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -136,6 +136,10 @@ class SessionPage(BasePage): verify: Any | None = ..., cert: Any | None = ...) -> bool: ... + def close(self) -> None: ... + + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + def _s_connect(self, url: str, mode: str, diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 7423445..30894b0 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -75,3 +75,7 @@ class GetDocumentError(BaseError): class WaitTimeoutError(BaseError): _info = '等待失败。' + + +class WrongURLError(BaseError): + _info = '无效的url。'