diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index 45a79f4..2f017f2 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -156,19 +156,21 @@ class DrissionElement(BaseElement): nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) if not nodes: if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) try: return nodes[index - 1] except IndexError: if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -185,10 +187,10 @@ class DrissionElement(BaseElement): if nodes: return nodes[-1] if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'prev()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def next(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -205,10 +207,10 @@ class DrissionElement(BaseElement): if nodes: return nodes[0] if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'next()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -225,10 +227,10 @@ class DrissionElement(BaseElement): if nodes: return nodes[-1] if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'before()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -245,10 +247,10 @@ class DrissionElement(BaseElement): if nodes: return nodes[0] if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'after()', - {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement('after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def children(self, filter_loc='', timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 @@ -378,6 +380,8 @@ class BasePage(BaseParser): self.retry_interval = 2 self._DownloadKit = None self._download_path = None + self._none_ele_return_value = False + self._none_ele_value = None @property def title(self): diff --git a/DrissionPage/_base/base.pyi b/DrissionPage/_base/base.pyi index 20b793e..fd18289 100644 --- a/DrissionPage/_base/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -4,7 +4,7 @@ @Contact : g1879@qq.com """ from abc import abstractmethod -from typing import Union, Tuple, List +from typing import Union, Tuple, List, Any from DownloadKit import DownloadKit @@ -165,6 +165,8 @@ class BasePage(BaseParser): self._timeout: float = ... self._download_path: str = ... self._DownloadKit: DownloadKit = ... + self._none_ele_return_value: bool = ... + self._none_ele_value: Any = ... @property def title(self) -> Union[str, None]: ... diff --git a/DrissionPage/_commons/settings.py b/DrissionPage/_commons/settings.py index ee5cda9..0107271 100644 --- a/DrissionPage/_commons/settings.py +++ b/DrissionPage/_commons/settings.py @@ -9,4 +9,3 @@ class Settings(object): raise_when_ele_not_found = False raise_when_click_failed = False raise_when_wait_failed = False - NoneElement_value = None diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index d89c887..3c7dab8 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -5,6 +5,7 @@ """ from os.path import basename, sep from pathlib import Path +from re import search from time import perf_counter, sleep from .none_element import NoneElement @@ -462,6 +463,14 @@ class ChromiumElement(DrissionElement): sleep(.1) src = self.attr('src') + if src.lower().startswith('data:image'): + if base64_to_bytes: + from base64 import b64decode + return b64decode(src.split(',', 1)[-1]) + + else: + return src.split(',', 1)[-1] + is_blob = src.startswith('blob') result = None end_time = perf_counter() + timeout @@ -494,8 +503,7 @@ class ChromiumElement(DrissionElement): continue node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] - frame = node.get('frameId', None) - frame = frame or self.page._target_id + frame = node.get('frameId', None) or self.page._frame_id try: result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) @@ -532,6 +540,11 @@ class ChromiumElement(DrissionElement): raise NoResourceError path = path or '.' + if not name and self.tag == 'img': + src = self.attr('src') + if src.lower().startswith('data:image'): + r = search(r'data:image/(.*?);base64,', src) + name = f'img.{r.group(1)}' if r else None name = name or basename(self.prop('currentSrc')) path = get_usable_path(f'{path}{sep}{name}').absolute() write_type = 'wb' if isinstance(data, bytes) else 'w' @@ -871,7 +884,7 @@ class ChromiumShadowRoot(BaseElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) else: - return NoneElement('child()', {'filter_loc': filter_loc, 'index': index}) + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) try: return nodes[index - 1] @@ -879,7 +892,7 @@ class ChromiumShadowRoot(BaseElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) else: - return NoneElement('child()', {'filter_loc': filter_loc, 'index': index}) + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) def next(self, filter_loc='', index=1): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -893,7 +906,7 @@ class ChromiumShadowRoot(BaseElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, 'index': index}) else: - return NoneElement('next()', {'filter_loc': filter_loc, 'index': index}) + return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index}) def before(self, filter_loc='', index=1): """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -908,7 +921,7 @@ class ChromiumShadowRoot(BaseElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, 'index': index}) else: - return NoneElement('before()', {'filter_loc': filter_loc, 'index': index}) + return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index}) def after(self, filter_loc='', index=1): """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -923,7 +936,7 @@ class ChromiumShadowRoot(BaseElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, 'index': index}) else: - return NoneElement('after()', {'filter_loc': filter_loc, 'index': index}) + return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index}) def children(self, filter_loc=''): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -1033,7 +1046,7 @@ class ChromiumShadowRoot(BaseElement): if loc[0] == 'css selector': if single: nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] - result = make_chromium_ele(self.page, node_id=nod_id) if nod_id else NoneElement() + result = make_chromium_ele(self.page, node_id=nod_id) if nod_id else NoneElement(self.page) else: nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId'] @@ -1042,13 +1055,13 @@ class ChromiumShadowRoot(BaseElement): else: eles = make_session_ele(self.html).eles(loc) if not eles: - result = NoneElement() if single else eles + result = NoneElement(self.page) if single else eles continue css = [i.css_path[61:] for i in eles] if single: node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId'] - result = make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement() + result = make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement(self.page) else: result = [] @@ -1143,7 +1156,7 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True): returnByValue=False, awaitPromise=True, userGesture=True) if single: - return NoneElement() if r['result']['subtype'] == 'null' \ + return NoneElement(ele.page) if r['result']['subtype'] == 'null' \ else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) if r['result']['description'] == 'NodeList(0)': @@ -1181,7 +1194,7 @@ def find_by_css(ele, selector, single, timeout): raise SyntaxError(f'查询语句错误:\n{r}') if single: - return NoneElement() if r['result']['subtype'] == 'null' \ + return NoneElement(ele.page) if r['result']['subtype'] == 'null' \ else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) if r['result']['description'] == 'NodeList(0)': diff --git a/DrissionPage/_elements/none_element.py b/DrissionPage/_elements/none_element.py index 0ef09d3..25732e5 100644 --- a/DrissionPage/_elements/none_element.py +++ b/DrissionPage/_elements/none_element.py @@ -3,23 +3,28 @@ @Author : g1879 @Contact : g1879@qq.com """ -from .._commons.settings import Settings from ..errors import ElementNotFoundError class NoneElement(object): - def __init__(self, method=None, args=None): + def __init__(self, page=None, method=None, args=None): + if page: + self._none_ele_value = page._none_ele_value + self._none_ele_return_value = page._none_ele_return_value + else: + self._none_ele_value = None + self._none_ele_return_value = False self.method = method self.args = args def __call__(self, *args, **kwargs): - if Settings.NoneElement_value is None: + if not self._none_ele_return_value: raise ElementNotFoundError(None, self.method, self.args) else: return self def __getattr__(self, item): - if Settings.NoneElement_value is None: + if not self._none_ele_return_value: raise ElementNotFoundError(None, self.method, self.args) elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before', 'after', 'get_frame', 'shadow_root', 'sr'): @@ -27,7 +32,7 @@ class NoneElement(object): else: if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html', 'attrs', 'text', 'raw_text'): - return Settings.NoneElement_value + return self._none_ele_value else: raise ElementNotFoundError(None, self.method, self.args) diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index f646b1c..5c0109e 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -375,7 +375,7 @@ def make_session_ele(html_or_ele, loc=None, single=True): elif isinstance(ele, str): return ele else: - return NoneElement() + return NoneElement(page) else: # 返回全部 return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index 74320fc..f338d5b 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -621,7 +621,7 @@ class ChromiumBase(BasePage): pass if perf_counter() >= end_time: - return NoneElement() if single else [] + return NoneElement(self) if single else [] sleep(.1) diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py index da1d97b..d51d2eb 100644 --- a/DrissionPage/_units/setter.py +++ b/DrissionPage/_units/setter.py @@ -11,10 +11,24 @@ from .._commons.tools import show_or_hide_browser from .._commons.web import set_browser_cookies, set_session_cookies -class ChromiumBaseSetter(object): +class BasePageSetter(object): def __init__(self, page): self._page = page + def NoneElement_value(self, value=None, on_off=True): + """设置空元素是否返回设定值 + :param value: 返回的设定值 + :param on_off: 是否启用 + :return: None + """ + self._page._none_ele_return_value = on_off + self._page._none_ele_value = value + + +class ChromiumBaseSetter(BasePageSetter): + def __init__(self, page): + super().__init__(page) + @property def load_mode(self): """返回用于设置页面加载策略的对象""" @@ -190,12 +204,12 @@ class ChromiumPageSetter(TabSetter): return PageWindowSetter(self._page) -class SessionPageSetter(object): +class SessionPageSetter(BasePageSetter): def __init__(self, page): """ :param page: SessionPage对象 """ - self._page = page + super().__init__(page) def retry_times(self, times): """设置连接失败时重连次数""" diff --git a/DrissionPage/_units/setter.pyi b/DrissionPage/_units/setter.pyi index 15db2b7..b936565 100644 --- a/DrissionPage/_units/setter.pyi +++ b/DrissionPage/_units/setter.pyi @@ -5,13 +5,14 @@ """ from http.cookiejar import Cookie from pathlib import Path -from typing import Union, Tuple, Literal +from typing import Union, Tuple, Literal, Any from requests.adapters import HTTPAdapter from requests.auth import HTTPBasicAuth from requests.cookies import RequestsCookieJar from .scroller import PageScroller +from .._base.base import BasePage from .._elements.chromium_element import ChromiumElement from .._pages.chromium_base import ChromiumBase from .._pages.chromium_frame import ChromiumFrame @@ -23,7 +24,14 @@ from .._pages.web_page import WebPage FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o'] -class ChromiumBaseSetter(object): +class BasePageSetter(object): + def __init__(self, page: BasePage): + self._page: BasePage = ... + + def NoneElement_value(self, value: Any = None, on_off: bool = True) -> None: ... + + +class ChromiumBaseSetter(BasePageSetter): def __init__(self, page): self._page: ChromiumBase = ... @@ -80,7 +88,7 @@ class ChromiumPageSetter(TabSetter): def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... -class SessionPageSetter(object): +class SessionPageSetter(BasePageSetter): def __init__(self, page: SessionPage): self._page: SessionPage = ...