mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
增加WrongURLError;get()会检查url规范;SessionPage的get()可指向本地文件
This commit is contained in:
parent
294e5219c7
commit
30df1c8eb8
@ -5,7 +5,6 @@
|
||||
"""
|
||||
from abc import abstractmethod
|
||||
from re import sub
|
||||
from urllib.parse import quote
|
||||
|
||||
from DownloadKit import DownloadKit
|
||||
|
||||
@ -421,18 +420,6 @@ class BasePage(BaseParser):
|
||||
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
|
||||
return self._DownloadKit
|
||||
|
||||
def _before_connect(self, url, retry, interval):
|
||||
"""连接前的准备
|
||||
:param url: 要访问的url
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔
|
||||
:return: 重试次数和间隔组成的tuple
|
||||
"""
|
||||
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
return retry, interval
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
def url(self):
|
||||
|
@ -166,8 +166,6 @@ class BasePage(BaseParser):
|
||||
@property
|
||||
def download(self) -> DownloadKit: ...
|
||||
|
||||
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
def url(self) -> str: ...
|
||||
|
@ -12,7 +12,8 @@ from time import perf_counter, sleep
|
||||
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
|
||||
|
||||
from .._configs.options_manage import OptionsManager
|
||||
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError
|
||||
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError,
|
||||
WrongURLError)
|
||||
|
||||
|
||||
def get_usable_path(path, is_file=True, parents=True):
|
||||
@ -273,6 +274,8 @@ def raise_error(r):
|
||||
raise AlertExistsError
|
||||
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
|
||||
raise NoRectError
|
||||
elif error == 'Cannot navigate to invalid URL':
|
||||
raise WrongURLError(f'无效的url:{r["args"]["url"]}。也许要加上"http://"?')
|
||||
elif r['type'] == 'call_method_error':
|
||||
raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}\n出现这个错误可能意味着程序有bug,'
|
||||
'请把错误信息和重现方法告知作者,谢谢。\n报告网站:https://gitee.com/g1879/DrissionPage/issues')
|
||||
|
@ -195,10 +195,10 @@ class SessionElement(DrissionElement):
|
||||
return link
|
||||
|
||||
else: # 其它情况直接返回绝对url
|
||||
return make_absolute_link(link, self.page)
|
||||
return make_absolute_link(link, self.page.url)
|
||||
|
||||
elif attr == 'src':
|
||||
return make_absolute_link(self.inner_ele.get('src'), self.page)
|
||||
return make_absolute_link(self.inner_ele.get('src'), self.page.url)
|
||||
|
||||
elif attr == 'text':
|
||||
return self.text
|
||||
|
@ -5,9 +5,10 @@
|
||||
"""
|
||||
from json import loads, JSONDecodeError
|
||||
from os.path import sep
|
||||
from re import findall
|
||||
from re import findall, match
|
||||
from threading import Thread
|
||||
from time import perf_counter, sleep
|
||||
from urllib.parse import quote
|
||||
|
||||
from .._base.base import BasePage
|
||||
from .._commons.locator import get_loc, is_loc
|
||||
@ -895,6 +896,24 @@ class ChromiumBase(BasePage):
|
||||
pass
|
||||
return False
|
||||
|
||||
def _before_connect(self, url, retry, interval):
|
||||
"""连接前的准备
|
||||
:param url: 要访问的url
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔
|
||||
:return: 重试次数和间隔组成的tuple
|
||||
"""
|
||||
url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
||||
if not url:
|
||||
self._url = 'chrome://newtab/'
|
||||
elif not match(r'.*?://', url):
|
||||
self._url = f'http://{url}'
|
||||
else:
|
||||
self._url = url
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
return retry, interval
|
||||
|
||||
def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None):
|
||||
"""尝试连接,重试若干次
|
||||
:param to_url: 要访问的url
|
||||
|
@ -231,6 +231,8 @@ class ChromiumBase(BasePage):
|
||||
|
||||
def _on_alert_open(self, **kwargs): ...
|
||||
|
||||
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
||||
|
||||
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
|
||||
timeout: float = None) -> Union[bool, None]: ...
|
||||
|
||||
|
@ -6,9 +6,9 @@
|
||||
from pathlib import Path
|
||||
from re import search
|
||||
from time import sleep
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, quote
|
||||
|
||||
from requests import Session
|
||||
from requests import Session, Response
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from tldextract import extract
|
||||
|
||||
@ -130,8 +130,8 @@ class SessionPage(BasePage):
|
||||
return self._set
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""用get方式跳转到url
|
||||
:param url: 目标url
|
||||
"""用get方式跳转到url,可输入文件路径
|
||||
:param url: 目标url,可指定本地文件路径
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
@ -139,6 +139,17 @@ class SessionPage(BasePage):
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
if not url.lower().startswith('http'):
|
||||
if url.startswith('file:///'):
|
||||
url = url[8:]
|
||||
if Path(url).exists():
|
||||
with open(url, 'rb') as f:
|
||||
r = Response()
|
||||
r._content = f.read()
|
||||
r.status_code = 200
|
||||
self._response = r
|
||||
return
|
||||
retry, interval = self._before_connect(url, retry, interval)
|
||||
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def ele(self, loc_or_ele, timeout=None):
|
||||
@ -220,6 +231,7 @@ class SessionPage(BasePage):
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
retry, interval = self._before_connect(url, retry, interval)
|
||||
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def close(self):
|
||||
@ -228,6 +240,18 @@ class SessionPage(BasePage):
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
|
||||
def _before_connect(self, url, retry, interval):
|
||||
"""连接前的准备
|
||||
:param url: 要访问的url
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔
|
||||
:return: 重试次数和间隔组成的tuple
|
||||
"""
|
||||
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
return retry, interval
|
||||
|
||||
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""执行get或post连接
|
||||
:param url: 目标url
|
||||
|
@ -136,6 +136,10 @@ class SessionPage(BasePage):
|
||||
verify: Any | None = ...,
|
||||
cert: Any | None = ...) -> bool: ...
|
||||
|
||||
def close(self) -> None: ...
|
||||
|
||||
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
|
||||
|
||||
def _s_connect(self,
|
||||
url: str,
|
||||
mode: str,
|
||||
|
@ -75,3 +75,7 @@ class GetDocumentError(BaseError):
|
||||
|
||||
class WaitTimeoutError(BaseError):
|
||||
_info = '等待失败。'
|
||||
|
||||
|
||||
class WrongURLError(BaseError):
|
||||
_info = '无效的url。'
|
||||
|
Loading…
x
Reference in New Issue
Block a user