mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
增加WrongURLError;get()会检查url规范;SessionPage的get()可指向本地文件
This commit is contained in:
parent
294e5219c7
commit
30df1c8eb8
@ -5,7 +5,6 @@
|
|||||||
"""
|
"""
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from re import sub
|
from re import sub
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
from DownloadKit import DownloadKit
|
from DownloadKit import DownloadKit
|
||||||
|
|
||||||
@ -421,18 +420,6 @@ class BasePage(BaseParser):
|
|||||||
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
|
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
|
||||||
return self._DownloadKit
|
return self._DownloadKit
|
||||||
|
|
||||||
def _before_connect(self, url, retry, interval):
|
|
||||||
"""连接前的准备
|
|
||||||
:param url: 要访问的url
|
|
||||||
:param retry: 重试次数
|
|
||||||
:param interval: 重试间隔
|
|
||||||
:return: 重试次数和间隔组成的tuple
|
|
||||||
"""
|
|
||||||
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
|
||||||
retry = retry if retry is not None else self.retry_times
|
|
||||||
interval = interval if interval is not None else self.retry_interval
|
|
||||||
return retry, interval
|
|
||||||
|
|
||||||
# ----------------以下属性或方法由后代实现----------------
|
# ----------------以下属性或方法由后代实现----------------
|
||||||
@property
|
@property
|
||||||
def url(self):
|
def url(self):
|
||||||
|
@ -166,8 +166,6 @@ class BasePage(BaseParser):
|
|||||||
@property
|
@property
|
||||||
def download(self) -> DownloadKit: ...
|
def download(self) -> DownloadKit: ...
|
||||||
|
|
||||||
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
|
||||||
|
|
||||||
# ----------------以下属性或方法由后代实现----------------
|
# ----------------以下属性或方法由后代实现----------------
|
||||||
@property
|
@property
|
||||||
def url(self) -> str: ...
|
def url(self) -> str: ...
|
||||||
|
@ -12,7 +12,8 @@ from time import perf_counter, sleep
|
|||||||
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
|
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
|
||||||
|
|
||||||
from .._configs.options_manage import OptionsManager
|
from .._configs.options_manage import OptionsManager
|
||||||
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError
|
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError,
|
||||||
|
WrongURLError)
|
||||||
|
|
||||||
|
|
||||||
def get_usable_path(path, is_file=True, parents=True):
|
def get_usable_path(path, is_file=True, parents=True):
|
||||||
@ -273,6 +274,8 @@ def raise_error(r):
|
|||||||
raise AlertExistsError
|
raise AlertExistsError
|
||||||
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
|
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
|
||||||
raise NoRectError
|
raise NoRectError
|
||||||
|
elif error == 'Cannot navigate to invalid URL':
|
||||||
|
raise WrongURLError(f'无效的url:{r["args"]["url"]}。也许要加上"http://"?')
|
||||||
elif r['type'] == 'call_method_error':
|
elif r['type'] == 'call_method_error':
|
||||||
raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}\n出现这个错误可能意味着程序有bug,'
|
raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}\n出现这个错误可能意味着程序有bug,'
|
||||||
'请把错误信息和重现方法告知作者,谢谢。\n报告网站:https://gitee.com/g1879/DrissionPage/issues')
|
'请把错误信息和重现方法告知作者,谢谢。\n报告网站:https://gitee.com/g1879/DrissionPage/issues')
|
||||||
|
@ -195,10 +195,10 @@ class SessionElement(DrissionElement):
|
|||||||
return link
|
return link
|
||||||
|
|
||||||
else: # 其它情况直接返回绝对url
|
else: # 其它情况直接返回绝对url
|
||||||
return make_absolute_link(link, self.page)
|
return make_absolute_link(link, self.page.url)
|
||||||
|
|
||||||
elif attr == 'src':
|
elif attr == 'src':
|
||||||
return make_absolute_link(self.inner_ele.get('src'), self.page)
|
return make_absolute_link(self.inner_ele.get('src'), self.page.url)
|
||||||
|
|
||||||
elif attr == 'text':
|
elif attr == 'text':
|
||||||
return self.text
|
return self.text
|
||||||
|
@ -5,9 +5,10 @@
|
|||||||
"""
|
"""
|
||||||
from json import loads, JSONDecodeError
|
from json import loads, JSONDecodeError
|
||||||
from os.path import sep
|
from os.path import sep
|
||||||
from re import findall
|
from re import findall, match
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from time import perf_counter, sleep
|
from time import perf_counter, sleep
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
from .._base.base import BasePage
|
from .._base.base import BasePage
|
||||||
from .._commons.locator import get_loc, is_loc
|
from .._commons.locator import get_loc, is_loc
|
||||||
@ -895,6 +896,24 @@ class ChromiumBase(BasePage):
|
|||||||
pass
|
pass
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _before_connect(self, url, retry, interval):
|
||||||
|
"""连接前的准备
|
||||||
|
:param url: 要访问的url
|
||||||
|
:param retry: 重试次数
|
||||||
|
:param interval: 重试间隔
|
||||||
|
:return: 重试次数和间隔组成的tuple
|
||||||
|
"""
|
||||||
|
url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
||||||
|
if not url:
|
||||||
|
self._url = 'chrome://newtab/'
|
||||||
|
elif not match(r'.*?://', url):
|
||||||
|
self._url = f'http://{url}'
|
||||||
|
else:
|
||||||
|
self._url = url
|
||||||
|
retry = retry if retry is not None else self.retry_times
|
||||||
|
interval = interval if interval is not None else self.retry_interval
|
||||||
|
return retry, interval
|
||||||
|
|
||||||
def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None):
|
def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None):
|
||||||
"""尝试连接,重试若干次
|
"""尝试连接,重试若干次
|
||||||
:param to_url: 要访问的url
|
:param to_url: 要访问的url
|
||||||
|
@ -231,6 +231,8 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
def _on_alert_open(self, **kwargs): ...
|
def _on_alert_open(self, **kwargs): ...
|
||||||
|
|
||||||
|
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
||||||
|
|
||||||
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
|
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
|
||||||
timeout: float = None) -> Union[bool, None]: ...
|
timeout: float = None) -> Union[bool, None]: ...
|
||||||
|
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from re import search
|
from re import search
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse, quote
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session, Response
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
from tldextract import extract
|
from tldextract import extract
|
||||||
|
|
||||||
@ -130,8 +130,8 @@ class SessionPage(BasePage):
|
|||||||
return self._set
|
return self._set
|
||||||
|
|
||||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||||
"""用get方式跳转到url
|
"""用get方式跳转到url,可输入文件路径
|
||||||
:param url: 目标url
|
:param url: 目标url,可指定本地文件路径
|
||||||
:param show_errmsg: 是否显示和抛出异常
|
:param show_errmsg: 是否显示和抛出异常
|
||||||
:param retry: 重试次数
|
:param retry: 重试次数
|
||||||
:param interval: 重试间隔(秒)
|
:param interval: 重试间隔(秒)
|
||||||
@ -139,6 +139,17 @@ class SessionPage(BasePage):
|
|||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: url是否可用
|
:return: url是否可用
|
||||||
"""
|
"""
|
||||||
|
if not url.lower().startswith('http'):
|
||||||
|
if url.startswith('file:///'):
|
||||||
|
url = url[8:]
|
||||||
|
if Path(url).exists():
|
||||||
|
with open(url, 'rb') as f:
|
||||||
|
r = Response()
|
||||||
|
r._content = f.read()
|
||||||
|
r.status_code = 200
|
||||||
|
self._response = r
|
||||||
|
return
|
||||||
|
retry, interval = self._before_connect(url, retry, interval)
|
||||||
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
|
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
|
||||||
|
|
||||||
def ele(self, loc_or_ele, timeout=None):
|
def ele(self, loc_or_ele, timeout=None):
|
||||||
@ -220,6 +231,7 @@ class SessionPage(BasePage):
|
|||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: url是否可用
|
:return: url是否可用
|
||||||
"""
|
"""
|
||||||
|
retry, interval = self._before_connect(url, retry, interval)
|
||||||
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
|
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
@ -228,6 +240,18 @@ class SessionPage(BasePage):
|
|||||||
if self._response is not None:
|
if self._response is not None:
|
||||||
self._response.close()
|
self._response.close()
|
||||||
|
|
||||||
|
def _before_connect(self, url, retry, interval):
|
||||||
|
"""连接前的准备
|
||||||
|
:param url: 要访问的url
|
||||||
|
:param retry: 重试次数
|
||||||
|
:param interval: 重试间隔
|
||||||
|
:return: 重试次数和间隔组成的tuple
|
||||||
|
"""
|
||||||
|
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
||||||
|
retry = retry if retry is not None else self.retry_times
|
||||||
|
interval = interval if interval is not None else self.retry_interval
|
||||||
|
return retry, interval
|
||||||
|
|
||||||
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||||
"""执行get或post连接
|
"""执行get或post连接
|
||||||
:param url: 目标url
|
:param url: 目标url
|
||||||
|
@ -136,6 +136,10 @@ class SessionPage(BasePage):
|
|||||||
verify: Any | None = ...,
|
verify: Any | None = ...,
|
||||||
cert: Any | None = ...) -> bool: ...
|
cert: Any | None = ...) -> bool: ...
|
||||||
|
|
||||||
|
def close(self) -> None: ...
|
||||||
|
|
||||||
|
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
||||||
|
|
||||||
def _s_connect(self,
|
def _s_connect(self,
|
||||||
url: str,
|
url: str,
|
||||||
mode: str,
|
mode: str,
|
||||||
|
@ -75,3 +75,7 @@ class GetDocumentError(BaseError):
|
|||||||
|
|
||||||
class WaitTimeoutError(BaseError):
|
class WaitTimeoutError(BaseError):
|
||||||
_info = '等待失败。'
|
_info = '等待失败。'
|
||||||
|
|
||||||
|
|
||||||
|
class WrongURLError(BaseError):
|
||||||
|
_info = '无效的url。'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user