增加WrongURLError;get()会检查url规范;SessionPage的get()可指向本地文件

This commit is contained in:
g1879 2023-12-08 19:53:11 +08:00
parent 294e5219c7
commit 30df1c8eb8
9 changed files with 64 additions and 23 deletions

View File

@ -5,7 +5,6 @@
"""
from abc import abstractmethod
from re import sub
from urllib.parse import quote
from DownloadKit import DownloadKit
@ -421,18 +420,6 @@ class BasePage(BaseParser):
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
return self._DownloadKit
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数和间隔组成的tuple
"""
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
return retry, interval
# ----------------以下属性或方法由后代实现----------------
@property
def url(self):

View File

@ -166,8 +166,6 @@ class BasePage(BaseParser):
@property
def download(self) -> DownloadKit: ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
# ----------------以下属性或方法由后代实现----------------
@property
def url(self) -> str: ...

View File

@ -12,7 +12,8 @@ from time import perf_counter, sleep
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
from .._configs.options_manage import OptionsManager
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError,
WrongURLError)
def get_usable_path(path, is_file=True, parents=True):
@ -273,6 +274,8 @@ def raise_error(r):
raise AlertExistsError
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
raise NoRectError
elif error == 'Cannot navigate to invalid URL':
raise WrongURLError(f'无效的url{r["args"]["url"]}。也许要加上"http://"')
elif r['type'] == 'call_method_error':
raise CDPError(f'\n错误:{r["error"]}\nmethod{r["method"]}\nargs{r["args"]}\n出现这个错误可能意味着程序有bug'
'请把错误信息和重现方法告知作者,谢谢。\n报告网站https://gitee.com/g1879/DrissionPage/issues')

View File

@ -195,10 +195,10 @@ class SessionElement(DrissionElement):
return link
else: # 其它情况直接返回绝对url
return make_absolute_link(link, self.page)
return make_absolute_link(link, self.page.url)
elif attr == 'src':
return make_absolute_link(self.inner_ele.get('src'), self.page)
return make_absolute_link(self.inner_ele.get('src'), self.page.url)
elif attr == 'text':
return self.text

View File

@ -5,9 +5,10 @@
"""
from json import loads, JSONDecodeError
from os.path import sep
from re import findall
from re import findall, match
from threading import Thread
from time import perf_counter, sleep
from urllib.parse import quote
from .._base.base import BasePage
from .._commons.locator import get_loc, is_loc
@ -895,6 +896,24 @@ class ChromiumBase(BasePage):
pass
return False
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数和间隔组成的tuple
"""
url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
if not url:
self._url = 'chrome://newtab/'
elif not match(r'.*?://', url):
self._url = f'http://{url}'
else:
self._url = url
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
return retry, interval
def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None):
"""尝试连接,重试若干次
:param to_url: 要访问的url

View File

@ -231,6 +231,8 @@ class ChromiumBase(BasePage):
def _on_alert_open(self, **kwargs): ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]: ...

View File

@ -6,9 +6,9 @@
from pathlib import Path
from re import search
from time import sleep
from urllib.parse import urlparse
from urllib.parse import urlparse, quote
from requests import Session
from requests import Session, Response
from requests.structures import CaseInsensitiveDict
from tldextract import extract
@ -130,8 +130,8 @@ class SessionPage(BasePage):
return self._set
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url
:param url: 目标url
"""用get方式跳转到url,可输入文件路径
:param url: 目标url可指定本地文件路径
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
@ -139,6 +139,17 @@ class SessionPage(BasePage):
:param kwargs: 连接参数
:return: url是否可用
"""
if not url.lower().startswith('http'):
if url.startswith('file:///'):
url = url[8:]
if Path(url).exists():
with open(url, 'rb') as f:
r = Response()
r._content = f.read()
r.status_code = 200
self._response = r
return
retry, interval = self._before_connect(url, retry, interval)
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
def ele(self, loc_or_ele, timeout=None):
@ -220,6 +231,7 @@ class SessionPage(BasePage):
:param kwargs: 连接参数
:return: url是否可用
"""
retry, interval = self._before_connect(url, retry, interval)
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
def close(self):
@ -228,6 +240,18 @@ class SessionPage(BasePage):
if self._response is not None:
self._response.close()
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数和间隔组成的tuple
"""
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
return retry, interval
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""执行get或post连接
:param url: 目标url

View File

@ -136,6 +136,10 @@ class SessionPage(BasePage):
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
def close(self) -> None: ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
def _s_connect(self,
url: str,
mode: str,

View File

@ -75,3 +75,7 @@ class GetDocumentError(BaseError):
class WaitTimeoutError(BaseError):
_info = '等待失败。'
class WrongURLError(BaseError):
_info = '无效的url。'