4.0.0b15(+)

get_src()可获取src属性内的base64数据
NoneElement_value改用页面对象设置
This commit is contained in:
g1879 2023-12-01 17:22:35 +08:00
parent 5090fd5c0b
commit 018c944405
9 changed files with 90 additions and 45 deletions

View File

@ -156,19 +156,21 @@ class DrissionElement(BaseElement):
nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only)
if not nodes: if not nodes:
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
try: try:
return nodes[index - 1] return nodes[index - 1]
except IndexError: except IndexError:
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): def prev(self, filter_loc='', index=1, timeout=0, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -185,10 +187,10 @@ class DrissionElement(BaseElement):
if nodes: if nodes:
return nodes[-1] return nodes[-1]
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'prev()', raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def next(self, filter_loc='', index=1, timeout=0, ele_only=True): def next(self, filter_loc='', index=1, timeout=0, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -205,10 +207,10 @@ class DrissionElement(BaseElement):
if nodes: if nodes:
return nodes[0] return nodes[0]
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'next()', raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def before(self, filter_loc='', index=1, timeout=None, ele_only=True): def before(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -225,10 +227,10 @@ class DrissionElement(BaseElement):
if nodes: if nodes:
return nodes[-1] return nodes[-1]
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'before()', raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def after(self, filter_loc='', index=1, timeout=None, ele_only=True): def after(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -245,10 +247,10 @@ class DrissionElement(BaseElement):
if nodes: if nodes:
return nodes[0] return nodes[0]
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'after()', raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc,
{'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) 'index': index, 'ele_only': ele_only})
else: else:
return NoneElement('after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def children(self, filter_loc='', timeout=None, ele_only=True): def children(self, filter_loc='', timeout=None, ele_only=True):
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选 """返回直接子元素元素或节点组成的列表,可用查询语法筛选
@ -378,6 +380,8 @@ class BasePage(BaseParser):
self.retry_interval = 2 self.retry_interval = 2
self._DownloadKit = None self._DownloadKit = None
self._download_path = None self._download_path = None
self._none_ele_return_value = False
self._none_ele_value = None
@property @property
def title(self): def title(self):

View File

@ -4,7 +4,7 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from abc import abstractmethod from abc import abstractmethod
from typing import Union, Tuple, List from typing import Union, Tuple, List, Any
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
@ -165,6 +165,8 @@ class BasePage(BaseParser):
self._timeout: float = ... self._timeout: float = ...
self._download_path: str = ... self._download_path: str = ...
self._DownloadKit: DownloadKit = ... self._DownloadKit: DownloadKit = ...
self._none_ele_return_value: bool = ...
self._none_ele_value: Any = ...
@property @property
def title(self) -> Union[str, None]: ... def title(self) -> Union[str, None]: ...

View File

@ -9,4 +9,3 @@ class Settings(object):
raise_when_ele_not_found = False raise_when_ele_not_found = False
raise_when_click_failed = False raise_when_click_failed = False
raise_when_wait_failed = False raise_when_wait_failed = False
NoneElement_value = None

View File

@ -5,6 +5,7 @@
""" """
from os.path import basename, sep from os.path import basename, sep
from pathlib import Path from pathlib import Path
from re import search
from time import perf_counter, sleep from time import perf_counter, sleep
from .none_element import NoneElement from .none_element import NoneElement
@ -462,6 +463,14 @@ class ChromiumElement(DrissionElement):
sleep(.1) sleep(.1)
src = self.attr('src') src = self.attr('src')
if src.lower().startswith('data:image'):
if base64_to_bytes:
from base64 import b64decode
return b64decode(src.split(',', 1)[-1])
else:
return src.split(',', 1)[-1]
is_blob = src.startswith('blob') is_blob = src.startswith('blob')
result = None result = None
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
@ -494,8 +503,7 @@ class ChromiumElement(DrissionElement):
continue continue
node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
frame = node.get('frameId', None) frame = node.get('frameId', None) or self.page._frame_id
frame = frame or self.page._target_id
try: try:
result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src)
@ -532,6 +540,11 @@ class ChromiumElement(DrissionElement):
raise NoResourceError raise NoResourceError
path = path or '.' path = path or '.'
if not name and self.tag == 'img':
src = self.attr('src')
if src.lower().startswith('data:image'):
r = search(r'data:image/(.*?);base64,', src)
name = f'img.{r.group(1)}' if r else None
name = name or basename(self.prop('currentSrc')) name = name or basename(self.prop('currentSrc'))
path = get_usable_path(f'{path}{sep}{name}').absolute() path = get_usable_path(f'{path}{sep}{name}').absolute()
write_type = 'wb' if isinstance(data, bytes) else 'w' write_type = 'wb' if isinstance(data, bytes) else 'w'
@ -871,7 +884,7 @@ class ChromiumShadowRoot(BaseElement):
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index})
else: else:
return NoneElement('child()', {'filter_loc': filter_loc, 'index': index}) return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index})
try: try:
return nodes[index - 1] return nodes[index - 1]
@ -879,7 +892,7 @@ class ChromiumShadowRoot(BaseElement):
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index})
else: else:
return NoneElement('child()', {'filter_loc': filter_loc, 'index': index}) return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index})
def next(self, filter_loc='', index=1): def next(self, filter_loc='', index=1):
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -893,7 +906,7 @@ class ChromiumShadowRoot(BaseElement):
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, 'index': index}) raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, 'index': index})
else: else:
return NoneElement('next()', {'filter_loc': filter_loc, 'index': index}) return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index})
def before(self, filter_loc='', index=1): def before(self, filter_loc='', index=1):
"""返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -908,7 +921,7 @@ class ChromiumShadowRoot(BaseElement):
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, 'index': index}) raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, 'index': index})
else: else:
return NoneElement('before()', {'filter_loc': filter_loc, 'index': index}) return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index})
def after(self, filter_loc='', index=1): def after(self, filter_loc='', index=1):
"""返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
@ -923,7 +936,7 @@ class ChromiumShadowRoot(BaseElement):
if Settings.raise_when_ele_not_found: if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, 'index': index}) raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, 'index': index})
else: else:
return NoneElement('after()', {'filter_loc': filter_loc, 'index': index}) return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index})
def children(self, filter_loc=''): def children(self, filter_loc=''):
"""返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选
@ -1033,7 +1046,7 @@ class ChromiumShadowRoot(BaseElement):
if loc[0] == 'css selector': if loc[0] == 'css selector':
if single: if single:
nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId']
result = make_chromium_ele(self.page, node_id=nod_id) if nod_id else NoneElement() result = make_chromium_ele(self.page, node_id=nod_id) if nod_id else NoneElement(self.page)
else: else:
nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId'] nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId']
@ -1042,13 +1055,13 @@ class ChromiumShadowRoot(BaseElement):
else: else:
eles = make_session_ele(self.html).eles(loc) eles = make_session_ele(self.html).eles(loc)
if not eles: if not eles:
result = NoneElement() if single else eles result = NoneElement(self.page) if single else eles
continue continue
css = [i.css_path[61:] for i in eles] css = [i.css_path[61:] for i in eles]
if single: if single:
node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId'] node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId']
result = make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement() result = make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement(self.page)
else: else:
result = [] result = []
@ -1143,7 +1156,7 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True):
returnByValue=False, awaitPromise=True, userGesture=True) returnByValue=False, awaitPromise=True, userGesture=True)
if single: if single:
return NoneElement() if r['result']['subtype'] == 'null' \ return NoneElement(ele.page) if r['result']['subtype'] == 'null' \
else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) else make_chromium_ele(ele.page, obj_id=r['result']['objectId'])
if r['result']['description'] == 'NodeList(0)': if r['result']['description'] == 'NodeList(0)':
@ -1181,7 +1194,7 @@ def find_by_css(ele, selector, single, timeout):
raise SyntaxError(f'查询语句错误:\n{r}') raise SyntaxError(f'查询语句错误:\n{r}')
if single: if single:
return NoneElement() if r['result']['subtype'] == 'null' \ return NoneElement(ele.page) if r['result']['subtype'] == 'null' \
else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) else make_chromium_ele(ele.page, obj_id=r['result']['objectId'])
if r['result']['description'] == 'NodeList(0)': if r['result']['description'] == 'NodeList(0)':

View File

@ -3,23 +3,28 @@
@Author : g1879 @Author : g1879
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from .._commons.settings import Settings
from ..errors import ElementNotFoundError from ..errors import ElementNotFoundError
class NoneElement(object): class NoneElement(object):
def __init__(self, method=None, args=None): def __init__(self, page=None, method=None, args=None):
if page:
self._none_ele_value = page._none_ele_value
self._none_ele_return_value = page._none_ele_return_value
else:
self._none_ele_value = None
self._none_ele_return_value = False
self.method = method self.method = method
self.args = args self.args = args
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
if Settings.NoneElement_value is None: if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args) raise ElementNotFoundError(None, self.method, self.args)
else: else:
return self return self
def __getattr__(self, item): def __getattr__(self, item):
if Settings.NoneElement_value is None: if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args) raise ElementNotFoundError(None, self.method, self.args)
elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before', elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before',
'after', 'get_frame', 'shadow_root', 'sr'): 'after', 'get_frame', 'shadow_root', 'sr'):
@ -27,7 +32,7 @@ class NoneElement(object):
else: else:
if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html', if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html',
'attrs', 'text', 'raw_text'): 'attrs', 'text', 'raw_text'):
return Settings.NoneElement_value return self._none_ele_value
else: else:
raise ElementNotFoundError(None, self.method, self.args) raise ElementNotFoundError(None, self.method, self.args)

View File

@ -375,7 +375,7 @@ def make_session_ele(html_or_ele, loc=None, single=True):
elif isinstance(ele, str): elif isinstance(ele, str):
return ele return ele
else: else:
return NoneElement() return NoneElement(page)
else: # 返回全部 else: # 返回全部
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n']

View File

@ -621,7 +621,7 @@ class ChromiumBase(BasePage):
pass pass
if perf_counter() >= end_time: if perf_counter() >= end_time:
return NoneElement() if single else [] return NoneElement(self) if single else []
sleep(.1) sleep(.1)

View File

@ -11,10 +11,24 @@ from .._commons.tools import show_or_hide_browser
from .._commons.web import set_browser_cookies, set_session_cookies from .._commons.web import set_browser_cookies, set_session_cookies
class ChromiumBaseSetter(object): class BasePageSetter(object):
def __init__(self, page): def __init__(self, page):
self._page = page self._page = page
def NoneElement_value(self, value=None, on_off=True):
"""设置空元素是否返回设定值
:param value: 返回的设定值
:param on_off: 是否启用
:return: None
"""
self._page._none_ele_return_value = on_off
self._page._none_ele_value = value
class ChromiumBaseSetter(BasePageSetter):
def __init__(self, page):
super().__init__(page)
@property @property
def load_mode(self): def load_mode(self):
"""返回用于设置页面加载策略的对象""" """返回用于设置页面加载策略的对象"""
@ -190,12 +204,12 @@ class ChromiumPageSetter(TabSetter):
return PageWindowSetter(self._page) return PageWindowSetter(self._page)
class SessionPageSetter(object): class SessionPageSetter(BasePageSetter):
def __init__(self, page): def __init__(self, page):
""" """
:param page: SessionPage对象 :param page: SessionPage对象
""" """
self._page = page super().__init__(page)
def retry_times(self, times): def retry_times(self, times):
"""设置连接失败时重连次数""" """设置连接失败时重连次数"""

View File

@ -5,13 +5,14 @@
""" """
from http.cookiejar import Cookie from http.cookiejar import Cookie
from pathlib import Path from pathlib import Path
from typing import Union, Tuple, Literal from typing import Union, Tuple, Literal, Any
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar from requests.cookies import RequestsCookieJar
from .scroller import PageScroller from .scroller import PageScroller
from .._base.base import BasePage
from .._elements.chromium_element import ChromiumElement from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame from .._pages.chromium_frame import ChromiumFrame
@ -23,7 +24,14 @@ from .._pages.web_page import WebPage
FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o'] FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']
class ChromiumBaseSetter(object): class BasePageSetter(object):
def __init__(self, page: BasePage):
self._page: BasePage = ...
def NoneElement_value(self, value: Any = None, on_off: bool = True) -> None: ...
class ChromiumBaseSetter(BasePageSetter):
def __init__(self, page): def __init__(self, page):
self._page: ChromiumBase = ... self._page: ChromiumBase = ...
@ -80,7 +88,7 @@ class ChromiumPageSetter(TabSetter):
def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ...
class SessionPageSetter(object): class SessionPageSetter(BasePageSetter):
def __init__(self, page: SessionPage): def __init__(self, page: SessionPage):
self._page: SessionPage = ... self._page: SessionPage = ...