diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index a071967..9bb8baf 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -14,4 +14,4 @@ from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions __all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__'] -__version__ = '4.0.4.21' +__version__ = '4.0.5.3' diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index 205baaa..1fcbd40 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -169,14 +169,8 @@ class DrissionElement(BaseElement): loc = loc[1].lstrip('./') node = self._ele(f'xpath:./{loc}', timeout=timeout, index=index, relative=True, raise_err=False) - if node: - return node - - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'locator': locator, 'index': index, - 'ele_only': ele_only}) - else: - return NoneElement(self.owner, 'child()', {'locator': locator, 'index': index, 'ele_only': ele_only}) + return node if node else NoneElement(self.owner, 'child()', + {'locator': locator, 'index': index, 'ele_only': ele_only}) def prev(self, locator='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -289,12 +283,8 @@ class DrissionElement(BaseElement): index = locator locator = '' node = self._get_relatives(index, locator, direction, brother, timeout, ele_only) - if node: - return node - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, func, {'locator': locator, 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.owner, func, {'locator': locator, 'index': index, 'ele_only': ele_only}) + return node if node else NoneElement(self.owner, func, + {'locator': locator, 'index': index, 'ele_only': ele_only}) def _get_relatives(self, index=None, locator='', direction='following', brother=True, timeout=.5, ele_only=True): """按要求返回兄弟元素或节点组成的列表 @@ -411,7 +401,8 @@ class BasePage(BaseParser): if p.exists(): url = str(p.absolute()) is_file = True - self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') + + self._url = url if is_file else quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval, is_file diff --git a/DrissionPage/_base/base.pyi b/DrissionPage/_base/base.pyi index ea4d22d..8de4f35 100644 --- a/DrissionPage/_base/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -12,6 +12,7 @@ from DownloadKit import DownloadKit from .._elements.none_element import NoneElement from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList from .._pages.chromium_page import ChromiumPage from .._pages.session_page import SessionPage from .._pages.web_page import WebPage @@ -37,7 +38,7 @@ class BaseParser(object): locator: Union[Tuple[str, str], str, BaseElement, None] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _ele(self, locator: Union[Tuple[str, str], str], diff --git a/DrissionPage/_base/browser.py b/DrissionPage/_base/browser.py index c877013..00649d5 100644 --- a/DrissionPage/_base/browser.py +++ b/DrissionPage/_base/browser.py @@ -5,7 +5,6 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ -from os import waitpid from pathlib import Path from shutil import rmtree from time import perf_counter, sleep @@ -135,7 +134,8 @@ class Browser(object): def tab_ids(self): """返回所有标签页id组成的列表""" j = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp,因为顺序不对 - return [i['id'] for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')] + return [i['id'] for i in j if i['type'] in ('page', 'webview') + and not i['url'].startswith('devtools://')] @property def process_id(self): @@ -143,7 +143,7 @@ class Browser(object): return self._process_id def find_tabs(self, title=None, url=None, tab_type=None): - """查找符合条件的tab,返回它们组成的列表 + """查找符合条件的tab,返回它们组成的列表,title和url是与关系 :param title: 要匹配title的文本 :param url: 要匹配url的文本 :param tab_type: tab类型,可用列表输入多个 @@ -274,10 +274,6 @@ class Browser(object): if ok: break - sleep(.05) - - if self.process_id: - waitpid(self.process_id, 0) def _on_disconnect(self): self.page._on_disconnect() @@ -293,4 +289,4 @@ class Browser(object): break except (PermissionError, FileNotFoundError, OSError): pass - sleep(.05) + sleep(.03) diff --git a/DrissionPage/_base/driver.py b/DrissionPage/_base/driver.py index d98bc31..3044a87 100644 --- a/DrissionPage/_base/driver.py +++ b/DrissionPage/_base/driver.py @@ -10,12 +10,12 @@ from queue import Queue, Empty from threading import Thread, Event from time import perf_counter, sleep -from requests import get +from requests import Session from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection, WebSocketException, WebSocketBadStatusException) from .._functions.settings import Settings -from ..errors import PageDisconnectedError, TargetNotFoundError +from ..errors import PageDisconnectedError class Driver(object): @@ -201,13 +201,10 @@ class Driver(object): try: self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True) except WebSocketBadStatusException as e: - txt = str(e) - if 'No such target id' in txt: - raise TargetNotFoundError(f'找不到页面:{self.id}。') - elif 'Handshake status 403 Forbidden' in txt: + if 'Handshake status 403 Forbidden' in str(e): raise RuntimeError('请升级websocket-client库。') else: - raise e + return self._recv_th.start() self._handle_event_th.start() return True @@ -274,11 +271,13 @@ class BrowserDriver(Driver): self._created = True BrowserDriver.BROWSERS[tab_id] = self super().__init__(tab_id, tab_type, address, owner) + self._control_session = Session() + self._control_session.trust_env = False def __repr__(self): return f'<BrowserDriver {self.id}>' def get(self, url): - r = get(url, headers={'Connection': 'close'}) + r = self._control_session.get(url, headers={'Connection': 'close'}) r.close() return r diff --git a/DrissionPage/_base/driver.pyi b/DrissionPage/_base/driver.pyi index 329280b..b3f44f9 100644 --- a/DrissionPage/_base/driver.pyi +++ b/DrissionPage/_base/driver.pyi @@ -9,7 +9,7 @@ from queue import Queue from threading import Thread, Event from typing import Union, Callable, Dict, Optional -from requests import Response +from requests import Response, Session from websocket import WebSocket from .browser import Browser @@ -68,14 +68,10 @@ class Driver(object): class BrowserDriver(Driver): BROWSERS: Dict[str, Driver] = ... owner: Browser = ... + _control_session: Session = ... def __new__(cls, tab_id: str, tab_type: str, address: str, owner: Browser): ... - def __init__(self, tab_id: str, tab_type: str, address: str, owner: Browser): - """ - - :rtype: object - """ - ... + def __init__(self, tab_id: str, tab_type: str, address: str, owner: Browser): ... def get(self, url) -> Response: ... diff --git a/DrissionPage/_configs/chromium_options.py b/DrissionPage/_configs/chromium_options.py index 0a5a9e7..86e64d2 100644 --- a/DrissionPage/_configs/chromium_options.py +++ b/DrissionPage/_configs/chromium_options.py @@ -389,7 +389,7 @@ class ChromiumOptions(object): return self def set_paths(self, browser_path=None, local_port=None, address=None, download_path=None, - user_data_path=None, cache_path=None, debugger_address=None): + user_data_path=None, cache_path=None): """快捷的路径设置函数 :param browser_path: 浏览器可执行文件路径 :param local_port: 本地端口号 @@ -399,7 +399,6 @@ class ChromiumOptions(object): :param cache_path: 缓存路径 :return: 当前对象 """ - address = address or debugger_address if browser_path is not None: self.set_browser_path(browser_path) @@ -568,50 +567,3 @@ class ChromiumOptions(object): def __repr__(self): return f'<ChromiumOptions at {id(self)}>' - - # ---------------即将废弃-------------- - - @property - def debugger_address(self): - """返回浏览器地址,ip:port""" - return self._address - - @debugger_address.setter - def debugger_address(self, address): - """设置浏览器地址,格式ip:port""" - self.set_address(address) - - def set_page_load_strategy(self, value): - return self.set_load_mode(value) - - def set_headless(self, on_off=True): - """设置是否隐藏浏览器界面 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = 'new' if on_off else 'false' - return self.set_argument('--headless', on_off) - - def set_no_imgs(self, on_off=True): - """设置是否加载图片 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = None if on_off else False - return self.set_argument('--blink-settings=imagesEnabled=false', on_off) - - def set_no_js(self, on_off=True): - """设置是否禁用js - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = None if on_off else False - return self.set_argument('--disable-javascript', on_off) - - def set_mute(self, on_off=True): - """设置是否静音 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = None if on_off else False - return self.set_argument('--mute-audio', on_off) diff --git a/DrissionPage/_configs/session_options.py b/DrissionPage/_configs/session_options.py index ee9b0f8..a533e4d 100644 --- a/DrissionPage/_configs/session_options.py +++ b/DrissionPage/_configs/session_options.py @@ -457,17 +457,6 @@ class SessionOptions(object): self._adapters = [(k, i) for k, i in session.adapters.items()] return self - # --------------即将废弃--------------- - - def set_paths(self, download_path=None): - """设置默认下载路径 - :param download_path: 下载路径 - :return: 返回当前对象 - """ - if download_path is not None: - self._download_path = str(download_path) - return self - def __repr__(self): return f'<SessionOptions at {id(self)}>' diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index b56263b..3f42b5f 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -17,8 +17,8 @@ from .none_element import NoneElement from .session_element import make_session_ele from .._base.base import DrissionElement, BaseElement from .._functions.keys import input_text_or_keys -from .._functions.locator import get_loc -from .._functions.settings import Settings +from .._functions.locator import get_loc, locator_to_tuple +from .._functions.elements import ChromiumElementsList from .._functions.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll, get_blob from .._units.clicker import Clicker from .._units.rect import ElementRect @@ -27,8 +27,8 @@ from .._units.selector import SelectElement from .._units.setter import ChromiumElementSetter from .._units.states import ElementStates, ShadowRootStates from .._units.waiter import ElementWaiter -from ..errors import (ContextLostError, ElementLostError, JavaScriptError, ElementNotFoundError, - CDPError, NoResourceError, AlertExistsError) +from ..errors import ContextLostError, ElementLostError, JavaScriptError, CDPError, NoResourceError, AlertExistsError, \ + NoRectError __FRAME_ELEMENT__ = ('iframe', 'frame') @@ -55,6 +55,7 @@ class ChromiumElement(DrissionElement): self._tag = None self._wait = None self._type = 'ChromiumElement' + self._doc_id = None if node_id and obj_id and backend_id: self._node_id = node_id @@ -75,9 +76,6 @@ class ChromiumElement(DrissionElement): else: raise ElementLostError - doc = self.run_js('return this.ownerDocument;') - self._doc_id = doc['objectId'] if doc else None - def __repr__(self): attrs = [f"{k}='{v}'" for k, v in self.attrs.items()] return f'<ChromiumElement {self.tag} {" ".join(attrs)}>' @@ -93,14 +91,6 @@ class ChromiumElement(DrissionElement): def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) - def __getattr__(self, item): - """获取元素属性 - :param item: 属性名 - :return: 属性值 - """ - a = self.attr(item) - return a if a is not None else self.property(item) - @property def tag(self): """返回元素tag""" @@ -221,25 +211,6 @@ class ChromiumElement(DrissionElement): def value(self): return self.property('value') - # -----即将废弃开始-------- - @property - def location(self): - """返回元素左上角的绝对坐标""" - return self.rect.location - - @property - def size(self): - """返回元素宽和高组成的元组""" - return self.rect.size - - def prop(self, prop): - return self.property(prop) - - def get_src(self, timeout=None, base64_to_bytes=True): - return self.src(timeout=timeout, base64_to_bytes=base64_to_bytes) - - # -----即将废弃结束-------- - def check(self, uncheck=False, by_js=False): """选中或取消选中当前元素 :param uncheck: 是否取消选中 @@ -328,7 +299,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ - return super().children(locator, timeout, ele_only=ele_only) + return ChromiumElementsList(self.owner, super().children(locator, timeout, ele_only=ele_only)) def prevs(self, locator='', timeout=None, ele_only=True): """返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -337,7 +308,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return super().prevs(locator, timeout, ele_only=ele_only) + return ChromiumElementsList(self.owner, super().prevs(locator, timeout, ele_only=ele_only)) def nexts(self, locator='', timeout=None, ele_only=True): """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -346,7 +317,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return super().nexts(locator, timeout, ele_only=ele_only) + return ChromiumElementsList(self.owner, super().nexts(locator, timeout, ele_only=ele_only)) def befores(self, locator='', timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -356,7 +327,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - return super().befores(locator, timeout, ele_only=ele_only) + return ChromiumElementsList(self.owner, super().befores(locator, timeout, ele_only=ele_only)) def afters(self, locator='', timeout=None, ele_only=True): """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -366,7 +337,137 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ - return super().afters(locator, timeout, ele_only=ele_only) + return ChromiumElementsList(self.owner, super().afters(locator, timeout, ele_only=ele_only)) + + def over(self, timeout=None): + """获取覆盖在本元素上最上层的元素 + :param timeout: 等待元素出现的超时时间(秒) + :return: 元素对象 + """ + timeout = timeout if timeout is None else self.owner.timeout + bid = self.wait.covered(timeout=timeout) + if bid: + return ChromiumElement(owner=self.owner, backend_id=bid) + else: + return NoneElement(page=self.owner, method='on()', args={'timeout': timeout}) + + def offset(self, offset_x, offset_y): + """获取相对本元素左上角左边指定偏移量位置的元素 + :param offset_x: 横坐标偏移量,向右为正 + :param offset_y: 纵坐标偏移量,向下为正 + :return: 元素对象 + """ + x, y = self.rect.location + try: + return ChromiumElement(owner=self.owner, + backend_id=self.owner.run_cdp('DOM.getNodeForLocation', x=x + offset_x, + y=y + offset_y, includeUserAgentShadowDOM=True, + ignorePointerEventsNone=False)['backendNodeId']) + except CDPError: + return NoneElement(page=self.owner, method='offset()', args={'offset_x': offset_x, 'offset_y': offset_y}) + + def east(self, loc_or_pixel=None, index=1): + """获取元素右边某个指定元素 + :param loc_or_pixel: 定位符,只支持str或int,且不支持xpath和css方式,传入int按像素距离获取 + :param index: 第几个,从1开始 + :return: 获取到的元素对象 + """ + return self._get_relative_eles(mode='east', locator=loc_or_pixel, index=index) + + def south(self, loc_or_pixel=None, index=1): + """获取元素下方某个指定元素 + :param loc_or_pixel: 定位符,只支持str或int,且不支持xpath和css方式,传入int按像素距离获取 + :param index: 第几个,从1开始 + :return: 获取到的元素对象 + """ + return self._get_relative_eles(mode='south', locator=loc_or_pixel, index=index) + + def west(self, loc_or_pixel=None, index=1): + """获取元素左边某个指定元素 + :param loc_or_pixel: 定位符,只支持str或int,且不支持xpath和css方式,传入int按像素距离获取 + :param index: 第几个,从1开始 + :return: 获取到的元素对象 + """ + return self._get_relative_eles(mode='west', locator=loc_or_pixel, index=index) + + def north(self, loc_or_pixel=None, index=1): + """获取元素上方某个指定元素 + :param loc_or_pixel: 定位符,只支持str或int,且不支持xpath和css方式,传入int按像素距离获取 + :param index: 第几个,从1开始 + :return: 获取到的元素对象 + """ + return self._get_relative_eles(mode='north', locator=loc_or_pixel, index=index) + + def _get_relative_eles(self, mode='north', locator=None, index=1): + """获取元素下方某个指定元素 + :param locator: 定位符,只支持str或int,且不支持xpath和css方式 + :param index: 第几个,从1开始 + :return: 获取到的元素对象 + """ + if locator and not (isinstance(locator, str) and not locator.startswith( + ('x:', 'xpath:', 'x=', 'xpath=', 'c:', 'css:', 'c=', 'css=')) or isinstance(locator, int)): + raise ValueError('locator参数只能是str格式且不支持xpath和css形式。') + rect = self.states.has_rect + if not rect: + raise NoRectError + + if mode == 'east': + cdp_data = {'x': int(rect[1][0]), 'y': int(self.rect.midpoint[1]), + 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} + variable = 'x' + minus = False + elif mode == 'south': + cdp_data = {'x': int(self.rect.midpoint[0]), 'y': int(rect[2][1]), + 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} + variable = 'y' + minus = False + elif mode == 'west': + cdp_data = {'x': int(rect[0][0]), 'y': int(self.rect.midpoint[1]), + 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} + variable = 'x' + minus = True + else: # north + cdp_data = {'x': int(self.rect.midpoint[0]), 'y': int(rect[0][1]), + 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} + variable = 'y' + minus = True + + if isinstance(locator, int): + if minus: + cdp_data[variable] -= locator + else: + cdp_data[variable] += locator + try: + return ChromiumElement(owner=self.owner, + backend_id=self.owner.run_cdp('DOM.getNodeForLocation', + **cdp_data)['backendNodeId']) + except CDPError: + return NoneElement(page=self.owner, method=f'{mode}()', args={'locator': locator}) + + num = 0 + value = -8 if minus else 8 + size = self.owner.rect.size + max_len = size[0] if mode == 'east' else size[1] + loc_data = locator_to_tuple(locator) if locator else None + curr_ele = None + while 0 < cdp_data[variable] < max_len: + cdp_data[variable] += value + try: + bid = self.owner.run_cdp('DOM.getNodeForLocation', **cdp_data)['backendNodeId'] + if bid == curr_ele: + continue + else: + curr_ele = bid + ele = ChromiumElement(self.owner, backend_id=bid) + + if loc_data is None or _check_ele(ele, loc_data): + num += 1 + if num == index: + return ele + except: + pass + + return NoneElement(page=self.owner, method=f'{mode}()', args={'locator': locator}) def attr(self, attr): """返回一个attribute属性值 @@ -375,14 +476,14 @@ class ChromiumElement(DrissionElement): """ attrs = self.attrs if attr == 'href': # 获取href属性时返回绝对url - link = attrs.get('href', None) + link = attrs.get('href') if not link or link.lower().startswith(('javascript:', 'mailto:')): return link else: return make_absolute_link(link, self.property('baseURI')) elif attr == 'src': - return make_absolute_link(attrs.get('src', None), self.property('baseURI')) + return make_absolute_link(attrs.get('src'), self.property('baseURI')) elif attr == 'text': return self.text @@ -459,14 +560,7 @@ class ChromiumElement(DrissionElement): :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, locator, index=index) - if isinstance(r, NoneElement): - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 's_ele()', {'locator': locator}) - else: - r.method = 's_ele()' - r.args = {'locator': locator} - return r + return make_session_ele(self, locator, index=index, method='s_ele()') def s_eles(self, locator=None): """查找所有符合条件的元素,以SessionElement列表形式返回 @@ -638,6 +732,7 @@ class ChromiumElement(DrissionElement): self.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') return + self.wait.clickable(wait_moved=False, timeout=.5) if clear and vals not in ('\n', '\ue007'): self.clear(by_js=False) else: @@ -686,7 +781,7 @@ class ChromiumElement(DrissionElement): """拖拽当前元素到相对位置 :param offset_x: x变化值 :param offset_y: y变化值 - :param duration: 拖动用时,传入0即瞬间到j达 + :param duration: 拖动用时,传入0即瞬间到达 :return: None """ curr_x, curr_y = self.rect.midpoint @@ -704,7 +799,6 @@ class ChromiumElement(DrissionElement): ele_or_loc = ele_or_loc.rect.midpoint elif not isinstance(ele_or_loc, (list, tuple)): raise TypeError('需要ChromiumElement对象或坐标。') - self.owner.actions.hold(self).move_to(ele_or_loc, duration=duration).release() def _get_obj_id(self, node_id=None, backend_id=None): @@ -917,13 +1011,8 @@ class ShadowRoot(BaseElement): loc = f'xpath:./{loc}' ele = self._ele(loc, index=index, relative=True) - if ele: - return ele - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'locator': locator, 'index': index}) - else: - return NoneElement(self.owner, 'child()', {'locator': locator, 'index': index}) + return ele if ele else NoneElement(self.owner, 'child()', {'locator': locator, 'index': index}) def next(self, locator='', index=1): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -938,13 +1027,8 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./{loc}' ele = self.parent_ele._ele(xpath, index=index, relative=True) - if ele: - return ele - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'next()', {'locator': locator, 'index': index}) - else: - return NoneElement(self.owner, 'next()', {'locator': locator, 'index': index}) + return ele if ele else NoneElement(self.owner, 'next()', {'locator': locator, 'index': index}) def before(self, locator='', index=1): """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -960,13 +1044,8 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./preceding::{loc}' ele = self.parent_ele._ele(xpath, index=index, relative=True) - if ele: - return ele - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'before()', {'locator': locator, 'index': index}) - else: - return NoneElement(self.owner, 'before()', {'locator': locator, 'index': index}) + return ele if ele else NoneElement(self.owner, 'before()', {'locator': locator, 'index': index}) def after(self, locator='', index=1): """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -976,12 +1055,7 @@ class ShadowRoot(BaseElement): :return: 本元素后面的某个元素或节点 """ nodes = self.afters(locator=locator) - if nodes: - return nodes[index - 1] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'after()', {'locator': locator, 'index': index}) - else: - return NoneElement(self.owner, 'after()', {'locator': locator, 'index': index}) + return nodes[index - 1] if nodes else NoneElement(self.owner, 'after()', {'locator': locator, 'index': index}) def children(self, locator=''): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -1115,8 +1189,8 @@ class ShadowRoot(BaseElement): r = make_chromium_eles(self.owner, _ids=node_id, is_obj_id=False) return None if r is False else r else: - node_ids = [self.owner.run_cdp('DOM.querySelector', - nodeId=self._node_id, selector=i)['nodeId'] for i in css] + node_ids = [self.owner.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] + for i in css] if 0 in node_ids: return None r = make_chromium_eles(self.owner, _ids=node_ids, index=index, is_obj_id=False) @@ -1219,8 +1293,12 @@ def find_by_xpath(ele, xpath, index, timeout, relative=True): res = ele.owner.run_cdp('Runtime.getProperties', objectId=res['result']['objectId'], ownProperties=True)['result'][:-1] if index is None: - r = [make_chromium_eles(ele.owner, _ids=i['value']['objectId'], is_obj_id=True) - if i['value']['type'] == 'object' else i['value']['value'] for i in res] + r = ChromiumElementsList(page=ele.owner) + for i in res: + if i['value']['type'] == 'object': + r.append(make_chromium_eles(ele.owner, _ids=i['value']['objectId'], is_obj_id=True)) + else: + r.append(i['value']['value']) return None if False in r else r else: @@ -1244,7 +1322,7 @@ def find_by_xpath(ele, xpath, index, timeout, relative=True): if result: return result - return NoneElement(ele.owner) if index is not None else [] + return NoneElement(ele.owner) if index is not None else ChromiumElementsList(page=ele.owner) def find_by_css(ele, selector, index, timeout): @@ -1290,7 +1368,7 @@ def find_by_css(ele, selector, index, timeout): if result: return result - return NoneElement(ele.owner) if index is not None else [] + return NoneElement(ele.owner) if index is not None else ChromiumElementsList(page=ele.owner) def make_chromium_eles(page, _ids, index=1, is_obj_id=True, ele_only=False): @@ -1322,7 +1400,7 @@ def make_chromium_eles(page, _ids, index=1, is_obj_id=True, ele_only=False): return get_node_func(page, obj_id, ele_only) else: # 获取全部 - nodes = [] + nodes = ChromiumElementsList(page=page) for obj_id in _ids: tmp = get_node_func(page, obj_id, ele_only) if tmp is False: @@ -1569,3 +1647,59 @@ class Pseudo(object): def after(self): """返回当前元素的::after伪元素内容""" return self._ele.style('content', 'after') + + +def _check_ele(ele, loc_data): + """检查元素是否符合loc_data指定的要求 + :param ele: 元素对象 + :param loc_data: 格式: {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]} + :return: bool + """ + attrs = ele.attrs + if loc_data['and']: + ok = True + for i in loc_data['args']: + name, symbol, value, deny = i + if name == 'tag()': + arg = ele.tag + symbol = '=' + elif name == 'text()': + arg = ele.raw_text + elif name is None: + arg = None + else: + arg = attrs.get(name, '') + + if ((symbol == '=' and ((deny and arg == value) or (not deny and arg != value))) + or (symbol == ':' and ((deny and value in arg) or (not deny and value not in arg))) + or (symbol == '^' and ((deny and arg.startswith(value)) + or (not deny and not arg.startswith(value)))) + or (symbol == '$' and ((deny and arg.endswith(value)) or (not deny and not arg.endswith(value)))) + or (arg is None and attrs)): + ok = False + break + + else: + ok = False + for i in loc_data['args']: + name, value, symbol, deny = i + if name == 'tag()': + arg = ele.tag + symbol = '=' + elif name == 'text()': + arg = ele.text + elif name is None: + arg = None + else: + arg = attrs.get(name, '') + + if ((symbol == '=' and ((not deny and arg == value) or (deny and arg != value))) + or (symbol == ':' and ((not deny and value in arg) or (deny and value not in arg))) + or (symbol == '^' and ((not deny and arg.startswith(value)) + or (deny and not arg.startswith(value)))) + or (symbol == '$' and ((not deny and arg.endswith(value)) or (deny and not arg.endswith(value)))) + or (arg is None and not attrs)): + ok = True + break + + return ok diff --git a/DrissionPage/_elements/chromium_element.pyi b/DrissionPage/_elements/chromium_element.pyi index e6a83a8..1be5553 100644 --- a/DrissionPage/_elements/chromium_element.pyi +++ b/DrissionPage/_elements/chromium_element.pyi @@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Any, Literal, Optional from .._base.base import DrissionElement, BaseElement from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList, ChromiumElementsList from .._pages.chromium_base import ChromiumBase from .._pages.chromium_frame import ChromiumFrame from .._pages.chromium_page import ChromiumPage @@ -56,8 +57,6 @@ class ChromiumElement(DrissionElement): def __eq__(self, other: ChromiumElement) -> bool: ... - def __getattr__(self, item: str) -> str: ... - @property def tag(self) -> str: ... @@ -138,27 +137,44 @@ class ChromiumElement(DrissionElement): def children(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + ele_only: bool = True) -> Union[ChromiumElementsList, List[Union[ChromiumElement, str]]]: ... def prevs(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + ele_only: bool = True) -> Union[ChromiumElementsList, List[Union[ChromiumElement, str]]]: ... def nexts(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + ele_only: bool = True) -> Union[ChromiumElementsList, List[Union[ChromiumElement, str]]]: ... def befores(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + ele_only: bool = True) -> Union[ChromiumElementsList, List[Union[ChromiumElement, str]]]: ... def afters(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + ele_only: bool = True) -> Union[ChromiumElementsList, List[Union[ChromiumElement, str]]]: ... + + def over(self, timeout: float = None) -> ChromiumElement: ... + + def south(self, loc_or_pixel: Union[str, int, None] = None, index: int = 1) -> ChromiumElement: ... + + def north(self, loc_or_pixel: Union[str, int, None] = None, index: int = 1) -> ChromiumElement: ... + + def west(self, loc_or_pixel: Union[str, int, None] = None, index: int = 1) -> ChromiumElement: ... + + def east(self, loc_or_pixel: Union[str, int, None] = None, index: int = 1) -> ChromiumElement: ... + + def offset(self, offset_x: int, offset_y: int) -> ChromiumElement: ... + + def _get_relative_eles(self, + mode: str = 'north', + locator: Union[int, str] = None, + index: int = 1) -> ChromiumElement: ... @property def wait(self) -> ElementWaiter: ... @@ -188,21 +204,20 @@ class ChromiumElement(DrissionElement): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... + timeout: float = None) -> ChromiumElementsList: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str] = None) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str] = None) -> SessionElementsList: ... def _find_elements(self, locator: Union[Tuple[str, str], str], timeout: float = None, index: Optional[int] = 1, relative: bool = False, - raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, - List[Union[ChromiumElement, ChromiumFrame]]]: ... + raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, ChromiumElementsList]: ... def style(self, style: str, pseudo_ele: str = '') -> str: ... @@ -318,21 +333,20 @@ class ShadowRoot(BaseElement): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... + timeout: float = None) -> ChromiumElementsList: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _find_elements(self, locator: Union[Tuple[str, str], str], timeout: float = None, index: Optional[int] = 1, relative: bool = False, - raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, str, - List[Union[ChromiumElement, ChromiumFrame, str]]]: ... + raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, str, ChromiumElementsList]: ... def _get_node_id(self, obj_id: str) -> int: ... @@ -366,7 +380,7 @@ def make_chromium_eles(page: Union[ChromiumBase, ChromiumPage, WebPage, Chromium index: Optional[int] = 1, is_obj_id: bool = True, ele_only: bool = False - ) -> Union[ChromiumElement, ChromiumFrame, List[Union[ChromiumElement, ChromiumFrame]]]: ... + ) -> Union[ChromiumElement, ChromiumFrame, ChromiumElementsList]: ... def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... diff --git a/DrissionPage/_elements/none_element.py b/DrissionPage/_elements/none_element.py index 15502f3..31e04e6 100644 --- a/DrissionPage/_elements/none_element.py +++ b/DrissionPage/_elements/none_element.py @@ -5,11 +5,20 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ +from .._functions.settings import Settings from ..errors import ElementNotFoundError class NoneElement(object): def __init__(self, page=None, method=None, args=None): + """ + :param page: 元素所在页面 + :param method: 查找元素的方法 + :param args: 查找元素的参数 + """ + if method and Settings.raise_when_ele_not_found: # 无传入method时不自动抛出,由调用者处理 + raise ElementNotFoundError(None, method=method, arguments=args) + if page: self._none_ele_value = page._none_ele_value self._none_ele_return_value = page._none_ele_return_value diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index 4fe9c77..888269a 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -6,13 +6,14 @@ @License : BSD 3-Clause. """ from html import unescape -from re import match, sub, DOTALL +from re import match, sub, DOTALL, search from lxml.etree import tostring from lxml.html import HtmlElement, fromstring from .none_element import NoneElement from .._base.base import DrissionElement, BasePage, BaseElement +from .._functions.elements import SessionElementsList from .._functions.locator import get_loc from .._functions.web import get_ele_txt, make_absolute_link @@ -50,13 +51,6 @@ class SessionElement(DrissionElement): def __eq__(self, other): return self.xpath == getattr(other, 'xpath', None) - def __getattr__(self, item): - """获取元素属性 - :param item: 属性名 - :return: 属性值 - """ - return self.attr(item) - @property def tag(self): """返回元素类型""" @@ -156,7 +150,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ - return super().children(locator, timeout, ele_only=ele_only) + return SessionElementsList(self.owner, super().children(locator, timeout, ele_only=ele_only)) def prevs(self, locator='', timeout=None, ele_only=True): """返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -165,7 +159,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点文本组成的列表 """ - return super().prevs(locator, timeout, ele_only=ele_only) + return SessionElementsList(self.owner, super().prevs(locator, timeout, ele_only=ele_only)) def nexts(self, locator='', timeout=None, ele_only=True): """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -174,7 +168,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点文本组成的列表 """ - return super().nexts(locator, timeout, ele_only=ele_only) + return SessionElementsList(self.owner, super().nexts(locator, timeout, ele_only=ele_only)) def befores(self, locator='', timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -184,7 +178,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - return super().befores(locator, timeout, ele_only=ele_only) + return SessionElementsList(self.owner, super().befores(locator, timeout, ele_only=ele_only)) def afters(self, locator='', timeout=None, ele_only=True): """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -194,7 +188,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ - return super().afters(locator, timeout, ele_only=ele_only) + return SessionElementsList(self.owner, super().afters(locator, timeout, ele_only=ele_only)) def attr(self, name): """返回attribute属性值 @@ -293,12 +287,13 @@ class SessionElement(DrissionElement): return f'{path_str[1:]}' if mode == 'css' else path_str -def make_session_ele(html_or_ele, loc=None, index=1): +def make_session_ele(html_or_ele, loc=None, index=1, method=None): """从接收到的对象或html文本中查找元素,返回SessionElement对象 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 :param html_or_ele: html文本、BaseParser对象 :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个,None获取所有 + :param method: 调用此方法的方法 :return: 返回SessionElement元素或列表,或属性文本 """ # ---------------处理定位符--------------- @@ -353,8 +348,14 @@ def make_session_ele(html_or_ele, loc=None, index=1): page = html_or_ele.owner xpath = html_or_ele.xpath # ChromiumElement,兼容传入的元素在iframe内的情况 - html = html_or_ele.owner.run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML'] \ - if html_or_ele._doc_id else html_or_ele.owner.html + if html_or_ele._doc_id is None: + doc = html_or_ele.run_js('return this.ownerDocument;') + html_or_ele._doc_id = doc['objectId'] if doc else False + + if html_or_ele._doc_id: + html = html_or_ele.owner.run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML'] + else: + html = html_or_ele.owner.html html_or_ele = fromstring(html) html_or_ele = html_or_ele.xpath(xpath)[0] @@ -373,7 +374,11 @@ def make_session_ele(html_or_ele, loc=None, index=1): # ShadowRoot elif isinstance(html_or_ele, BaseElement): page = html_or_ele.owner - html_or_ele = fromstring(html_or_ele.html) + html = html_or_ele.html + r = search(r'^<shadow_root>[ \n]*?<html>[ \n]*?(.*?)[ \n]*?</html>[ \n]*?</shadow_root>$', html) + if r: + html = r.group(1) + html_or_ele = fromstring(html) else: raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') @@ -390,12 +395,16 @@ def make_session_ele(html_or_ele, loc=None, index=1): # 把lxml元素对象包装成SessionElement对象并按需要返回一个或全部 if index is None: - return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in eles if e != '\n'] + r = SessionElementsList(page=page) + for e in eles: + if e != '\n': + r.append(SessionElement(e, page) if isinstance(e, HtmlElement) else e) + return r else: eles_count = len(eles) if eles_count == 0 or abs(index) > eles_count: - return NoneElement(page) + return NoneElement(page, method=method, args={'locator': loc, 'index': index}) if index < 0: index = eles_count + index + 1 @@ -405,7 +414,7 @@ def make_session_ele(html_or_ele, loc=None, index=1): elif isinstance(ele, str): return ele else: - return NoneElement(page) + return NoneElement(page, method=method, args={'locator': loc, 'index': index}) except Exception as e: if 'Invalid expression' in str(e): diff --git a/DrissionPage/_elements/session_element.pyi b/DrissionPage/_elements/session_element.pyi index 74034a9..18eef9d 100644 --- a/DrissionPage/_elements/session_element.pyi +++ b/DrissionPage/_elements/session_element.pyi @@ -11,6 +11,7 @@ from lxml.html import HtmlElement from .._base.base import DrissionElement, BaseElement from .._elements.chromium_element import ChromiumElement +from .._functions.elements import SessionElementsList from .._pages.chromium_base import ChromiumBase from .._pages.chromium_frame import ChromiumFrame from .._pages.session_page import SessionPage @@ -35,8 +36,6 @@ class SessionElement(DrissionElement): def __eq__(self, other: SessionElement) -> bool: ... - def __getattr__(self, item: str) -> str: ... - @property def tag(self) -> str: ... @@ -92,27 +91,27 @@ class SessionElement(DrissionElement): def children(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ... def prevs(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ... def nexts(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ... def befores(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ... def afters(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, - ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ... def attr(self, name: str) -> Optional[str]: ... @@ -123,20 +122,20 @@ class SessionElement(DrissionElement): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[SessionElement]: ... + timeout: float = None) -> SessionElementsList: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _find_elements(self, locator: Union[Tuple[str, str], str], timeout: float = None, index: Optional[int] = 1, relative: bool = False, - raise_err: bool = None) -> Union[SessionElement, List[SessionElement]]: ... + raise_err: bool = None) -> Union[SessionElement, SessionElementsList]: ... def _get_ele_path(self, mode: str) -> str: ... @@ -144,4 +143,5 @@ class SessionElement(DrissionElement): def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, ChromiumBase], loc: Union[str, Tuple[str, str]] = None, - index: Optional[int] = 1) -> Union[SessionElement, List[SessionElement]]: ... + index: Optional[int] = 1, + method: Optional[str] = None) -> Union[SessionElement, SessionElementsList]: ... diff --git a/DrissionPage/_functions/browser.py b/DrissionPage/_functions/browser.py index d0829b8..a8f79d4 100644 --- a/DrissionPage/_functions/browser.py +++ b/DrissionPage/_functions/browser.py @@ -8,12 +8,11 @@ from json import load, dump, JSONDecodeError from os import environ from pathlib import Path -from platform import system from subprocess import Popen, DEVNULL from tempfile import gettempdir from time import perf_counter, sleep -from requests import get as requests_get +from requests import Session from .tools import port_is_using from .._configs.options_manage import OptionsManager @@ -200,16 +199,21 @@ def test_connect(ip, port, timeout=30): :return: None """ end_time = perf_counter() + timeout + s = Session() + s.trust_env = False while perf_counter() < end_time: try: - tabs = requests_get(f'http://{ip}:{port}/json', timeout=10, headers={'Connection': 'close'}, - proxies={'http': None, 'https': None}).json() - for tab in tabs: + r = s.get(f'http://{ip}:{port}/json', timeout=10, headers={'Connection': 'close'}) + for tab in r.json(): if tab['type'] in ('page', 'webview'): + r.close() + s.close() return + r.close() except Exception: sleep(.2) + s.close() raise BrowserConnectError(f'\n{ip}:{port}浏览器无法链接。\n请确认:\n1、该端口为浏览器\n' f'2、已添加\'--remote-debugging-port={port}\'启动项\n' f'3、用户文件夹没有和已打开的浏览器冲突\n' diff --git a/DrissionPage/_functions/elements.py b/DrissionPage/_functions/elements.py new file mode 100644 index 0000000..d77d359 --- /dev/null +++ b/DrissionPage/_functions/elements.py @@ -0,0 +1,507 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from time import perf_counter + +from .._elements.none_element import NoneElement + + +class SessionElementsList(list): + def __init__(self, page=None, *args): + super().__init__(*args) + self._page = page + + @property + def get(self): + return Getter(self) + + @property + def filter(self): + return SessionFilter(self) + + @property + def filter_one(self): + return SessionFilterOne(self) + + +class ChromiumElementsList(SessionElementsList): + + @property + def filter(self): + return ChromiumFilter(self) + + @property + def filter_one(self): + return ChromiumFilterOne(self) + + def search(self, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + return _search(self, displayed=displayed, checked=checked, selected=selected, enabled=enabled, + clickable=clickable, have_rect=have_rect, have_text=have_text) + + def search_one(self, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素,获取一个结果 + :param index: 元素序号,从1开始 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + return _search_one(self, index=index, displayed=displayed, checked=checked, selected=selected, + enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text) + + +class SessionFilterOne(object): + def __init__(self, _list): + self._list = _list + self._index = 1 + + def __call__(self, index=1): + """返回结果中第几个元素 + :param index: 元素序号,从1开始 + :return: 对象自身 + """ + self._index = index + return self + + def attr(self, name, value, equal=True): + """以是否拥有某个attribute值为条件筛选元素 + :param name: 属性名称 + :param value: 属性值 + :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 + :return: 筛选结果 + """ + return self._get_attr(name, value, 'attr', equal=equal) + + def text(self, text, fuzzy=True, contain=True): + """以是否含有指定文本为条件筛选元素 + :param text: 用于匹配的文本 + :param fuzzy: 是否模糊匹配 + :param contain: 是否包含该字符串,False表示不包含 + :return: 筛选结果 + """ + num = 0 + if contain: + for i in self._list: + t = i if isinstance(i, str) else i.raw_text + if (fuzzy and text in t) or (not fuzzy and text == t): + num += 1 + if self._index == num: + return i + else: + for i in self._list: + t = i if isinstance(i, str) else i.raw_text + if (fuzzy and text not in t) or (not fuzzy and text != t): + num += 1 + if self._index == num: + return i + return NoneElement(self._list._page, 'text()', + args={'text': text, 'fuzzy': fuzzy, 'contain': contain, 'index': self._index}) + + def _get_attr(self, name, value, method, equal=True): + """返回通过某个方法可获得某个值的元素 + :param name: 属性名称 + :param value: 属性值 + :param method: 方法名称 + :return: 筛选结果 + """ + num = 0 + if equal: + for i in self._list: + if not isinstance(i, str) and getattr(i, method)(name) == value: + num += 1 + if self._index == num: + return i + else: + for i in self._list: + if not isinstance(i, str) and getattr(i, method)(name) != value: + num += 1 + if self._index == num: + return i + return NoneElement(self._list._page, f'{method}()', + args={'name': name, 'value': value, 'equal': equal, 'index': self._index}) + + +class SessionFilter(SessionFilterOne): + + def __iter__(self): + return iter(self._list) + + def __next__(self): + return next(self._list) + + def __len__(self): + return len(self._list) + + def __getitem__(self, item): + return self._list[item] + + @property + def get(self): + """返回用于获取元素属性的对象""" + return self._list.get + + def text(self, text, fuzzy=True, contain=True): + """以是否含有指定文本为条件筛选元素 + :param text: 用于匹配的文本 + :param fuzzy: 是否模糊匹配 + :param contain: 是否包含该字符串,False表示不包含 + :return: 筛选结果 + """ + self._list = _text_all(self._list, SessionElementsList(page=self._list._page), + text=text, fuzzy=fuzzy, contain=contain) + + def _get_attr(self, name, value, method, equal=True): + """返回通过某个方法可获得某个值的元素 + :param name: 属性名称 + :param value: 属性值 + :param method: 方法名称 + :return: 筛选结果 + """ + self._list = _get_attr_all(self._list, SessionElementsList(page=self._list._page), + name=name, value=value, method=method, equal=equal) + return self + + +class ChromiumFilterOne(SessionFilterOne): + + def displayed(self, equal=True): + """以是否显示为条件筛选元素 + :param equal: 是否匹配显示的元素,False匹配不显示的 + :return: 筛选结果 + """ + return self._any_state('is_displayed', equal=equal) + + def checked(self, equal=True): + """以是否被选中为条件筛选元素 + :param equal: 是否匹配被选中的元素,False匹配不被选中的 + :return: 筛选结果 + """ + return self._any_state('is_checked', equal=equal) + + def selected(self, equal=True): + """以是否被选择为条件筛选元素,用于<select>元素项目 + :param equal: 是否匹配被选择的元素,False匹配不被选择的 + :return: 筛选结果 + """ + return self._any_state('is_selected', equal=equal) + + def enabled(self, equal=True): + """以是否可用为条件筛选元素 + :param equal: 是否匹配可用的元素,False表示匹配disabled状态的 + :return: 筛选结果 + """ + return self._any_state('is_enabled', equal=equal) + + def clickable(self, equal=True): + """以是否可点击为条件筛选元素 + :param equal: 是否匹配可点击的元素,False表示匹配不是可点击的 + :return: 筛选结果 + """ + return self._any_state('is_clickable', equal=equal) + + def have_rect(self, equal=True): + """以是否有大小为条件筛选元素 + :param equal: 是否匹配有大小的元素,False表示匹配没有大小的 + :return: 筛选结果 + """ + return self._any_state('has_rect', equal=equal) + + def style(self, name, value, equal=True): + """以是否拥有某个style值为条件筛选元素 + :param name: 属性名称 + :param value: 属性值 + :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 + :return: 筛选结果 + """ + return self._get_attr(name, value, 'style', equal=equal) + + def property(self, name, value, equal=True): + """以是否拥有某个property值为条件筛选元素 + :param name: 属性名称 + :param value: 属性值 + :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 + :return: 筛选结果 + """ + return self._get_attr(name, value, 'property', equal=equal) + + def _any_state(self, name, equal=True): + """ + :param name: 状态名称 + :param equal: 是否是指定状态,False表示否定状态 + :return: 选中的元素 + """ + num = 0 + if equal: + for i in self._list: + if not isinstance(i, str) and getattr(i.states, name): + num += 1 + if self._index == num: + return i + else: + for i in self._list: + if not isinstance(i, str) and not getattr(i.states, name): + num += 1 + if self._index == num: + return i + return NoneElement(self._list._page, f'{name}()', args={'equal': equal, 'index': self._index}) + + +class ChromiumFilter(ChromiumFilterOne): + + def __iter__(self): + return iter(self._list) + + def __next__(self): + return next(self._list) + + def __len__(self): + return len(self._list) + + def __getitem__(self, item): + return self._list[item] + + @property + def get(self): + """返回用于获取元素属性的对象""" + return self._list.get + + def search_one(self, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素,获取一个结果 + :param index: 元素序号,从1开始 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + return _search_one(self._list, index=index, displayed=displayed, checked=checked, selected=selected, + enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text) + + def search(self, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + return _search(self._list, displayed=displayed, checked=checked, selected=selected, enabled=enabled, + clickable=clickable, have_rect=have_rect, have_text=have_text) + + def text(self, text, fuzzy=True, contain=True): + """以是否含有指定文本为条件筛选元素 + :param text: 用于匹配的文本 + :param fuzzy: 是否模糊匹配 + :param contain: 是否包含该字符串,False表示不包含 + :return: 筛选结果 + """ + self._list = _text_all(self._list, ChromiumElementsList(page=self._list._page), + text=text, fuzzy=fuzzy, contain=contain) + return self + + def _get_attr(self, name, value, method, equal=True): + """返回通过某个方法可获得某个值的元素 + :param name: 属性名称 + :param value: 属性值 + :param method: 方法名称 + :return: 筛选结果 + """ + self._list = _get_attr_all(self._list, ChromiumElementsList(page=self._list._page), + name=name, value=value, method=method, equal=equal) + return self + + def _any_state(self, name, equal=True): + """ + :param name: 状态名称 + :param equal: 是否是指定状态,False表示否定状态 + :return: 选中的列表 + """ + r = ChromiumElementsList(page=self._list._page) + if equal: + for i in self._list: + if not isinstance(i, str) and getattr(i.states, name): + r.append(i) + else: + for i in self._list: + if not isinstance(i, str) and not getattr(i.states, name): + r.append(i) + self._list = r + return self + + +class Getter(object): + def __init__(self, _list): + self._list = _list + + def links(self): + """返回所有元素的link属性组成的列表""" + return [e.link for e in self._list if not isinstance(e, str)] + + def texts(self): + """返回所有元素的text属性组成的列表""" + return [e if isinstance(e, str) else e.text for e in self._list] + + def attrs(self, name): + """返回所有元素指定的attr属性组成的列表 + :param name: 属性名称 + :return: 属性文本组成的列表 + """ + return [e.attr(name) for e in self._list if not isinstance(e, str)] + + +def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10): + """传入多个定位符,获取多个ele + :param locators: 定位符组成的列表 + :param owner: 页面或元素对象 + :param any_one: 是否找到任何一个即返回 + :param first_ele: 每个定位符是否只获取第一个元素 + :param timeout: 超时时间(秒) + :return: 多个定位符组成的dict + """ + res = {loc: False for loc in locators} + end_time = perf_counter() + timeout + while perf_counter() <= end_time: + for loc in locators: + if res[loc] is not False: + continue + ele = owner.ele(loc, timeout=0) if first_ele else owner.eles(loc, timeout=0) + if ele: + res[loc] = ele + if any_one: + return res + if False not in res.values(): + break + return res + + +def _get_attr_all(src_list, aim_list, name, value, method, equal=True): + if equal: + for i in src_list: + if not isinstance(i, str) and getattr(i, method)(name) == value: + aim_list.append(i) + else: + for i in src_list: + if not isinstance(i, str) and getattr(i, method)(name) != value: + aim_list.append(i) + return aim_list + + +def _text_all(src_list, aim_list, text, fuzzy=True, contain=True): + """以是否含有指定文本为条件筛选元素 + :param text: 用于匹配的文本 + :param fuzzy: 是否模糊匹配 + :param contain: 是否包含该字符串,False表示不包含 + :return: 筛选结果 + """ + if contain: + for i in src_list: + t = i if isinstance(i, str) else i.raw_text + if (fuzzy and text in t) or (not fuzzy and text == t): + aim_list.append(i) + else: + for i in src_list: + t = i if isinstance(i, str) else i.raw_text + if (fuzzy and text not in t) or (not fuzzy and text != t): + aim_list.append(i) + return aim_list + + +def _search(_list, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + r = ChromiumElementsList(page=_list._page) + for i in _list: + if not isinstance(i, str) and ( + (displayed is not None and (displayed is True and i.states.is_displayed) + or (displayed is False and not i.states.is_displayed)) + or (checked is not None and (checked is True and i.states.is_checked) + or (checked is False and not i.states.is_checked)) + or (selected is not None and (selected is True and i.states.is_selected) + or (selected is False and not i.states.is_selected)) + or (enabled is not None and (enabled is True and i.states.is_enabled) + or (enabled is False and not i.states.is_enabled)) + or (clickable is not None and (clickable is True and i.states.is_clickable) + or (clickable is False and not i.states.is_clickable)) + or (have_rect is not None and (have_rect is True and i.states.has_rect) + or (have_rect is False and not i.states.has_rect)) + or (have_text is not None and (have_text is True and i.raw_text) + or (have_text is False and not i.raw_text))): + r.append(i) + return ChromiumFilter(r) + + +def _search_one(_list, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None, + have_rect=None, have_text=None): + """或关系筛选元素,获取一个结果 + :param index: 元素序号,从1开始 + :param displayed: 是否显示,bool,None为忽略该项 + :param checked: 是否被选中,bool,None为忽略该项 + :param selected: 是否被选择,bool,None为忽略该项 + :param enabled: 是否可用,bool,None为忽略该项 + :param clickable: 是否可点击,bool,None为忽略该项 + :param have_rect: 是否拥有大小和位置,bool,None为忽略该项 + :param have_text: 是否含有文本,bool,None为忽略该项 + :return: 筛选结果 + """ + num = 0 + for i in _list: + if not isinstance(i, str) and ( + (displayed is not None and (displayed is True and i.states.is_displayed) + or (displayed is False and not i.states.is_displayed)) + or (checked is not None and (checked is True and i.states.is_checked) + or (checked is False and not i.states.is_checked)) + or (selected is not None and (selected is True and i.states.is_selected) + or (selected is False and not i.states.is_selected)) + or (enabled is not None and (enabled is True and i.states.is_enabled) + or (enabled is False and not i.states.is_enabled)) + or (clickable is not None and (clickable is True and i.states.is_clickable) + or (clickable is False and not i.states.is_clickable)) + or (have_rect is not None and (have_rect is True and i.states.has_rect) + or (have_rect is False and not i.states.has_rect)) + or (have_text is not None and (have_text is True and i.raw_text) + or (have_text is False and not i.raw_text))): + num += 1 + if num == index: + return i + + return NoneElement(_list._page, method='filter()', args={'displayed': displayed, + 'checked': checked, 'selected': selected, + 'enabled': enabled, 'clickable': clickable, + 'have_rect': have_rect, 'have_text': have_text}) diff --git a/DrissionPage/_functions/elements.pyi b/DrissionPage/_functions/elements.pyi new file mode 100644 index 0000000..41562e5 --- /dev/null +++ b/DrissionPage/_functions/elements.pyi @@ -0,0 +1,220 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, List, Optional, Iterable + +from .._base.base import BaseParser +from .._elements.chromium_element import ChromiumElement +from .._elements.session_element import SessionElement + + +def get_eles(locators: Union[List[str], tuple], + owner: BaseParser, + any_one: bool = False, + first_ele: bool = True, + timeout: float = 10) -> dict: ... + + +class SessionElementsList(list): + _page = ... + + def __init__(self, page=None, *args): ... + + @property + def get(self) -> Getter: ... + + @property + def filter(self) -> SessionFilter: ... + + @property + def filter_one(self) -> SessionFilterOne: ... + + def __next__(self) -> SessionElement: ... + + +class ChromiumElementsList(SessionElementsList): + + @property + def filter(self) -> ChromiumFilter: ... + + @property + def filter_one(self) -> ChromiumFilterOne: ... + + def search(self, + displayed: Optional[bool] = None, + checked: Optional[bool] = None, + selected: Optional[bool] = None, + enabled: Optional[bool] = None, + clickable: Optional[bool] = None, + have_rect: Optional[bool] = None, + have_text: Optional[bool] = None) -> ChromiumFilter: ... + + def search_one(self, + index: int = 1, + displayed: Optional[bool] = None, + checked: Optional[bool] = None, + selected: Optional[bool] = None, + enabled: Optional[bool] = None, + clickable: Optional[bool] = None, + have_rect: Optional[bool] = None, + have_text: Optional[bool] = None) -> ChromiumElement: ... + + def __next__(self) -> ChromiumElement: ... + + +class SessionFilterOne(object): + _list: SessionElementsList = ... + _index: int = ... + + def __init__(self, _list: SessionElementsList, index: int = 1): ... + + def __call__(self, index: int = 1) -> SessionFilterOne: ... + + def attr(self, name: str, value: str, equal: bool = True) -> SessionElement: ... + + def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionElement: ... + + def _get_attr(self, + name: str, + value: str, + method: str, + equal: bool = True) -> SessionElement: ... + + +class SessionFilter(SessionFilterOne): + + def __iter__(self) -> Iterable[SessionElement]: ... + + def __next__(self) -> SessionElement: ... + + def __len__(self) -> int: ... + + def __getitem__(self, item: int) -> SessionElement: ... + + @property + def get(self) -> Getter: ... + + def attr(self, name: str, value: str, equal: bool = True) -> SessionFilter: ... + + def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionFilter: ... + + def _get_attr(self, + name: str, + value: str, + method: str, + equal: bool = True) -> SessionFilter: ... + + +class ChromiumFilterOne(SessionFilterOne): + _list: ChromiumElementsList = ... + + def __init__(self, _list: ChromiumElementsList): ... + + def __call__(self, index: int = 1) -> ChromiumFilterOne: ... + + def displayed(self, equal: bool = True) -> ChromiumElement: ... + + def checked(self, equal: bool = True) -> ChromiumElement: ... + + def selected(self, equal: bool = True) -> ChromiumElement: ... + + def enabled(self, equal: bool = True) -> ChromiumElement: ... + + def clickable(self, equal: bool = True) -> ChromiumElement: ... + + def have_rect(self, equal: bool = True) -> ChromiumElement: ... + + def style(self, name: str, value: str, equal: bool = True) -> ChromiumElement: ... + + def property(self, + name: str, + value: str, equal: bool = True) -> ChromiumElement: ... + + def attr(self, name: str, value: str, equal: bool = True) -> ChromiumElement: ... + + def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumElement: ... + + def _get_attr(self, + name: str, + value: str, + method: str, equal: bool = True) -> ChromiumElement: ... + + def _any_state(self, name: str, equal: bool = True) -> ChromiumElement: ... + + +class ChromiumFilter(ChromiumFilterOne): + + def __iter__(self) -> Iterable[ChromiumElement]: ... + + def __next__(self) -> ChromiumElement: ... + + def __len__(self) -> int: ... + + def __getitem__(self, item: int) -> ChromiumElement: ... + + @property + def get(self) -> Getter: ... + + def displayed(self, equal: bool = True) -> ChromiumFilter: ... + + def checked(self, equal: bool = True) -> ChromiumFilter: ... + + def selected(self, equal: bool = True) -> ChromiumFilter: ... + + def enabled(self, equal: bool = True) -> ChromiumFilter: ... + + def clickable(self, equal: bool = True) -> ChromiumFilter: ... + + def have_rect(self, equal: bool = True) -> ChromiumFilter: ... + + def style(self, name: str, value: str, equal: bool = True) -> ChromiumFilter: ... + + def property(self, + name: str, + value: str, equal: bool = True) -> ChromiumFilter: ... + + def attr(self, name: str, value: str, equal: bool = True) -> ChromiumFilter: ... + + def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumFilter: ... + + def search(self, + displayed: Optional[bool] = None, + checked: Optional[bool] = None, + selected: Optional[bool] = None, + enabled: Optional[bool] = None, + clickable: Optional[bool] = None, + have_rect: Optional[bool] = None, + have_text: Optional[bool] = None) -> ChromiumFilter: ... + + def search_one(self, + index: int = 1, + displayed: Optional[bool] = None, + checked: Optional[bool] = None, + selected: Optional[bool] = None, + enabled: Optional[bool] = None, + clickable: Optional[bool] = None, + have_rect: Optional[bool] = None, + have_text: Optional[bool] = None) -> ChromiumElement: ... + + def _get_attr(self, + name: str, + value: str, + method: str, equal: bool = True) -> ChromiumFilter: ... + + def _any_state(self, name: str, equal: bool = True) -> ChromiumFilter: ... + + +class Getter(object): + _list: SessionElementsList = ... + + def __init__(self, _list: SessionElementsList): ... + + def links(self) -> List[str]: ... + + def texts(self) -> List[str]: ... + + def attrs(self, name: str) -> List[str]: ... diff --git a/DrissionPage/_functions/locator.py b/DrissionPage/_functions/locator.py index ac65759..e0aa381 100644 --- a/DrissionPage/_functions/locator.py +++ b/DrissionPage/_functions/locator.py @@ -9,6 +9,80 @@ from re import split from .by import By +def locator_to_tuple(loc): + """解析定位字符串生成dict格式数据 + :param loc: 待处理的字符串 + :return: 格式: {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]} + """ + loc = _preprocess(loc) + + # 多属性查找 + if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'): + args = _get_args(loc) + + # 单属性查找 + elif loc.startswith('@') and loc != '@': + arg = _get_arg(loc[1:]) + arg.append(False) + args = {'and': True, 'args': [arg]} + + # 根据tag name查找 + elif loc.startswith(('tag:', 'tag=', 'tag^', 'tag$')) and loc not in ('tag:', 'tag=', 'tag^', 'tag$'): + at_ind = loc.find('@') + if at_ind == -1: + args = {'and': True, 'args': [['tag()', '=', loc[4:].lower(), False]]} + else: + args_str = loc[at_ind:] + if args_str.startswith(('@@', '@|', '@!')): + args = _get_args(args_str) + args['args'].append([f'tag()', '=', loc[4:at_ind].lower(), False]) + else: # t:div@aa=bb的格式 + arg = _get_arg(loc[at_ind + 1:]) + arg.append(False) + args = {'and': True, 'args': [['tag()', '=', loc[4:at_ind].lower(), False], arg]} + + # 根据文本查找 + elif loc.startswith(('text=', 'text:', 'text^', 'text$')): + args = {'and': True, 'args': [['text()', loc[4], loc[5:], False]]} + + # 根据文本模糊查找 + else: + args = {'and': True, 'args': [['text()', '=', loc, False]]} + + return args + + +def _get_args(text: str = '') -> dict: + """解析定位参数字符串生成dict格式数据 + :param text: 待处理的字符串 + :return: 格式: {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]} + """ + arg_list = [] + args = split(r'(@!|@@|@\|)', text)[1:] + if '@@' in args and '@|' in args: + raise ValueError('@@和@|不能同时出现在一个定位语句中。') + _and = '@|' not in args + + for k in range(0, len(args) - 1, 2): + arg = _get_arg(args[k + 1]) + if arg: + arg.append(True if args[k] == '@!' else False) # 是否去除某个属性 + arg_list.append(arg) + + return {'and': _and, 'args': arg_list} + + +def _get_arg(text) -> list: + """解析arg=abc格式字符串,生成格式:['属性名称', '匹配方式', '属性值', 是否否定],不是式子的返回None""" + r = split(r'([:=$^])', text, maxsplit=1) + if not r[0]: + return [None, None, None, None] + # !=时只有属性名没有属性内容,查询是否存在该属性 + name = r[0] if r[0] != 'tx()' else 'text()' + name = name if name != 't()' else 'teg()' + return [name, None, None] if len(r) != 3 else [name, r[1], r[2]] + + def is_loc(text): """返回text是否定位符""" return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=', @@ -49,26 +123,8 @@ def str_to_xpath_loc(loc): :return: 匹配符元组 """ loc_by = 'xpath' + loc = _preprocess(loc) - if loc.startswith('.'): - if loc.startswith(('.=', '.:', '.^', '.$')): - loc = loc.replace('.', '@class', 1) - else: - loc = loc.replace('.', '@class=', 1) - - elif loc.startswith('#'): - if loc.startswith(('#=', '#:', '#^', '#$')): - loc = loc.replace('#', '@id', 1) - else: - loc = loc.replace('#', '@id=', 1) - - elif loc.startswith(('t:', 't=')): - loc = f'tag:{loc[2:]}' - - elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')): - loc = f'text{loc[2:]}' - - # ------------------------------------------------------------------ # 多属性查找 if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'): loc_str = _make_multi_xpath_str('*', loc)[1] @@ -78,7 +134,7 @@ def str_to_xpath_loc(loc): loc_str = _make_single_xpath_str('*', loc)[1] # 根据tag name查找 - elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='): + elif loc.startswith(('tag:', 'tag=', 'tag^', 'tag$')) and loc not in ('tag:', 'tag=', 'tag^', 'tag$'): at_ind = loc.find('@') if at_ind == -1: loc_str = f'//*[name()="{loc[4:]}"]' @@ -101,16 +157,11 @@ def str_to_xpath_loc(loc): # 用xpath查找 elif loc.startswith(('xpath:', 'xpath=')) and loc not in ('xpath:', 'xpath='): loc_str = loc[6:] - elif loc.startswith(('x:', 'x=')) and loc not in ('x:', 'x='): - loc_str = loc[2:] # 用css selector查找 elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='): loc_by = 'css selector' loc_str = loc[4:] - elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='): - loc_by = 'css selector' - loc_str = loc[2:] # 根据文本模糊查找 elif loc: @@ -127,26 +178,8 @@ def str_to_css_loc(loc): :return: 匹配符元组 """ loc_by = 'css selector' + loc = _preprocess(loc) - if loc.startswith('.'): - if loc.startswith(('.=', '.:', '.^', '.$')): - loc = loc.replace('.', '@class', 1) - else: - loc = loc.replace('.', '@class=', 1) - - elif loc.startswith('#'): - if loc.startswith(('#=', '#:', '#^', '#$')): - loc = loc.replace('#', '@id', 1) - else: - loc = loc.replace('#', '@id=', 1) - - elif loc.startswith(('t:', 't=')): - loc = f'tag:{loc[2:]}' - - elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')): - loc = f'text{loc[2:]}' - - # ------------------------------------------------------------------ # 多属性查找 if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'): loc_str = _make_multi_css_str('*', loc)[1] @@ -156,7 +189,7 @@ def str_to_css_loc(loc): loc_by, loc_str = _make_single_css_str('*', loc) # 根据tag name查找 - elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='): + elif loc.startswith(('tag:', 'tag=', 'tag^', 'tag$')) and loc not in ('tag:', 'tag=', 'tag^', 'tag$'): at_ind = loc.find('@') if at_ind == -1: loc_str = loc[4:] @@ -166,14 +199,12 @@ def str_to_css_loc(loc): loc_by, loc_str = _make_single_css_str(loc[4:at_ind], loc[at_ind:]) # 根据文本查找 - elif loc.startswith(('text=', 'text:', 'text^', 'text$', 'xpath=', 'xpath:', 'x:', 'x=')): + elif loc.startswith(('text=', 'text:', 'text^', 'text$', 'xpath=', 'xpath:')): loc_by, loc_str = str_to_xpath_loc(loc) # 用css selector查找 elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='): loc_str = loc[4:] - elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='): - loc_str = loc[2:] # 根据文本模糊查找 elif loc: @@ -202,39 +233,45 @@ def _make_single_xpath_str(tag: str, text: str) -> tuple: len_r = len(r) len_r0 = len(r[0]) if len_r == 3 and len_r0 > 1: - symbol = r[1] - if symbol == '=': # 精确查找 - arg = '.' if r[0] in ('@text()', '@tx()') else r[0] - arg_str = f'{arg}={_make_search_str(r[2])}' - - elif symbol == '^': # 匹配开头 - if r[0] in ('@text()', '@tx()'): - txt_str = f'/text()[starts-with(., {_make_search_str(r[2])})]/..' - arg_str = '' - else: - arg_str = f"starts-with({r[0]},{_make_search_str(r[2])})" - - elif symbol == '$': # 匹配结尾 - if r[0] in ('@text()', '@tx()'): - txt_str = f'/text()[substring(., string-length(.) - string-length({_make_search_str(r[2])}) +1) ' \ - f'= {_make_search_str(r[2])}]/..' - arg_str = '' - else: - arg_str = f'substring({r[0]}, string-length({r[0]}) - string-length({_make_search_str(r[2])}) +1)' \ - f' = {_make_search_str(r[2])}' - - elif symbol == ':': # 模糊查找 - if r[0] in ('@text()', '@tx()'): - txt_str = f'/text()[contains(., {_make_search_str(r[2])})]/..' - arg_str = '' - else: - arg_str = f"contains({r[0]},{_make_search_str(r[2])})" - + if r[0] in ('@tag()', '@t()'): + arg_str = f'name()="{r[2].lower()}"' else: - raise ValueError(f'符号不正确:{symbol}') + symbol = r[1] + if symbol == '=': # 精确查找 + arg = '.' if r[0] in ('@text()', '@tx()') else r[0] + arg_str = f'{arg}={_make_search_str(r[2])}' + + elif symbol == '^': # 匹配开头 + if r[0] in ('@text()', '@tx()'): + txt_str = f'/text()[starts-with(., {_make_search_str(r[2])})]/..' + arg_str = '' + else: + arg_str = f"starts-with({r[0]},{_make_search_str(r[2])})" + + elif symbol == '$': # 匹配结尾 + if r[0] in ('@text()', '@tx()'): + txt_str = (f'/text()[substring(., string-length(.) - string-length({_make_search_str(r[2])}) ' + f'+1) = {_make_search_str(r[2])}]/..') + arg_str = '' + else: + arg_str = (f'substring({r[0]}, string-length({r[0]}) - string-length({_make_search_str(r[2])}) ' + f'+1) = {_make_search_str(r[2])}') + + elif symbol == ':': # 模糊查找 + if r[0] in ('@text()', '@tx()'): + txt_str = f'/text()[contains(., {_make_search_str(r[2])})]/..' + arg_str = '' + else: + arg_str = f"contains({r[0]},{_make_search_str(r[2])})" + + else: + raise ValueError(f'符号不正确:{symbol}') elif len_r != 3 and len_r0 > 1: - arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}' + if r[0] in ('@tag()', '@t()'): + arg_str = '' + else: + arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}' if arg_str: arg_list.append(arg_str) @@ -252,10 +289,9 @@ def _make_multi_xpath_str(tag: str, text: str) -> tuple: args = split(r'(@!|@@|@\|)', text)[1:] if '@@' in args and '@|' in args: raise ValueError('@@和@|不能同时出现在一个定位语句中。') - elif '@@' in args: - _and = True - else: # @| - _and = False + _and = '@|' not in args + tags = [] if tag == '*' else [f'name()="{tag}"'] + tags_connect = ' or ' for k in range(0, len(args) - 1, 2): r = split(r'([:=$^])', args[k + 1], maxsplit=1) @@ -268,23 +304,39 @@ def _make_multi_xpath_str(tag: str, text: str) -> tuple: else: ignore = True if args[k] == '@!' else False # 是否去除某个属性 if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性 + if r[0] in ('tag()', 't()'): + continue arg_str = 'normalize-space(text())' if r[0] in ('text()', 'tx()') else f'@{r[0]}' elif len_r == 3: # 属性名和内容都有 - arg = '.' if r[0] in ('text()', 'tx()') else f'@{r[0]}' + if r[0] in ('tag()', 't()'): + if ignore: + tags.append(f'not(name()="{r[2]}")') + tags_connect = ' and ' + else: + tags.append(f'name()="{r[2]}"') + continue + symbol = r[1] + if r[0] in ('text()', 'tx()'): + arg = '.' + txt = r[2] + else: + arg = f'@{r[0]}' + txt = r[2] + if symbol == '=': - arg_str = f'{arg}={_make_search_str(r[2])}' + arg_str = f'{arg}={_make_search_str(txt)}' elif symbol == ':': - arg_str = f'contains({arg},{_make_search_str(r[2])})' + arg_str = f'contains({arg},{_make_search_str(txt)})' elif symbol == '^': - arg_str = f'starts-with({arg},{_make_search_str(r[2])})' + arg_str = f'starts-with({arg},{_make_search_str(txt)})' elif symbol == '$': - arg_str = f'substring({arg}, string-length({arg}) - string-length({_make_search_str(r[2])}) +1) ' \ - f'= {_make_search_str(r[2])}' + arg_str = f'substring({arg}, string-length({arg}) - string-length({_make_search_str(txt)}) +1) ' \ + f'= {_make_search_str(txt)}' else: raise ValueError(f'符号不正确:{symbol}') @@ -296,9 +348,9 @@ def _make_multi_xpath_str(tag: str, text: str) -> tuple: arg_list.append(arg_str) arg_str = ' and '.join(arg_list) if _and else ' or '.join(arg_list) - if tag != '*': + if tags: condition = f' and ({arg_str})' if arg_str else '' - arg_str = f'name()="{tag}"{condition}' + arg_str = f'({tags_connect.join(tags)}){condition}' return 'xpath', f'//*[{arg_str}]' if arg_str else f'//*' @@ -330,10 +382,7 @@ def _make_multi_css_str(tag: str, text: str) -> tuple: args = split(r'(@!|@@|@\|)', text)[1:] if '@@' in args and '@|' in args: raise ValueError('@@和@|不能同时出现在一个定位语句中。') - elif '@@' in args: - _and = True - else: # @| - _and = False + _and = '@|' not in args for k in range(0, len(args) - 1, 2): r = split(r'([:=$^])', args[k + 1], maxsplit=1) @@ -344,9 +393,18 @@ def _make_multi_css_str(tag: str, text: str) -> tuple: len_r = len(r) ignore = True if args[k] == '@!' else False # 是否去除某个属性 if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性 + if r[0] in ('tag()', 't()'): + continue arg_str = f'[{r[0]}]' elif len_r == 3: # 属性名和内容都有 + if r[0] in ('tag()', 't()'): + if tag == '*': + tag = f':not({r[2].lower()})' if ignore else f'{r[2]}' + else: + tag += f',:not({r[2].lower()})' if ignore else f',{r[2]}' + continue + d = {'=': '', '^': '^', '$': '$', ':': '*'} arg_str = f'[{r[0]}{d[r[1]]}={css_trans(r[2])}]' @@ -372,6 +430,9 @@ def _make_single_css_str(tag: str, text: str) -> tuple: return _make_single_xpath_str(tag, text) r = split(r'([:=$^])', text, maxsplit=1) + if r[0] in ('@tag()', '@t()'): + return 'css selector', r[2] + if len(r) == 3: d = {'=': '', '^': '^', '$': '$', ':': '*'} arg_str = f'[{r[0][1:]}{d[r[1]]}={css_trans(r[2])}]' @@ -472,3 +533,32 @@ def css_trans(txt): c = ('!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', ',', '{', '|', '}', '~', ' ') return ''.join([fr'\{i}' if i in c else i for i in txt]) + + +def _preprocess(loc): + """对缩写进行处理,替换回完整写法""" + if loc.startswith('.'): + if loc.startswith(('.=', '.:', '.^', '.$')): + loc = loc.replace('.', '@class', 1) + else: + loc = loc.replace('.', '@class=', 1) + + elif loc.startswith('#'): + if loc.startswith(('#=', '#:', '#^', '#$')): + loc = loc.replace('#', '@id', 1) + else: + loc = loc.replace('#', '@id=', 1) + + elif loc.startswith(('t:', 't=')): + loc = f'tag:{loc[2:]}' + + elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')): + loc = f'text{loc[2:]}' + + elif loc.startswith(('c:', 'c=')): + loc = f'css:{loc[2:]}' + + elif loc.startswith(('x:', 'x=')): + loc = f'xpath:{loc[2:]}' + + return loc diff --git a/DrissionPage/_functions/locator.pyi b/DrissionPage/_functions/locator.pyi index 2f79a69..991ff27 100644 --- a/DrissionPage/_functions/locator.pyi +++ b/DrissionPage/_functions/locator.pyi @@ -8,6 +8,9 @@ from typing import Union +def locator_to_tuple(loc: str) -> dict: ... + + def is_loc(text: str) -> bool: ... @@ -17,6 +20,9 @@ def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool def str_to_xpath_loc(loc: str) -> tuple: ... +def str_to_css_loc(loc: str) -> tuple: ... + + def translate_loc(loc: tuple) -> tuple: ... diff --git a/DrissionPage/_functions/tools.pyi b/DrissionPage/_functions/tools.pyi index 225ad81..a6fc535 100644 --- a/DrissionPage/_functions/tools.pyi +++ b/DrissionPage/_functions/tools.pyi @@ -10,7 +10,6 @@ from pathlib import Path from threading import Lock from typing import Union, Tuple -from ..errors import BaseError from .._pages.chromium_base import ChromiumBase @@ -46,4 +45,4 @@ def wait_until(function: callable, kwargs: dict = None, timeout: float = 10): .. def configs_to_here(file_name: Union[Path, str] = None) -> None: ... -def raise_error(result: dict, ignore: BaseError = None) -> None: ... +def raise_error(result: dict, ignore=None) -> None: ... diff --git a/DrissionPage/_functions/web.py b/DrissionPage/_functions/web.py index 95d2209..461f143 100644 --- a/DrissionPage/_functions/web.py +++ b/DrissionPage/_functions/web.py @@ -8,9 +8,12 @@ from datetime import datetime from html import unescape from http.cookiejar import Cookie, CookieJar -from re import sub +from os.path import sep +from pathlib import Path +from re import sub, match from urllib.parse import urlparse, urljoin, urlunparse +from DataRecorder.tools import make_valid_name from tldextract import extract @@ -138,8 +141,11 @@ def make_absolute_link(link, baseURI=None): if not link: return link - link = link.strip() + link = link.strip().replace('\\', '/') parsed = urlparse(link)._asdict() + if baseURI: + p = urlparse(baseURI)._asdict() + baseURI = f'{p["scheme"]}://{p["netloc"]}' # 是相对路径,与页面url拼接并返回 if not parsed['netloc']: @@ -207,7 +213,16 @@ def cookies_to_tuple(cookies): elif isinstance(cookies, str): c_dict = {} - for attr in cookies.strip().rstrip(';, ').split(',' if ',' in cookies else ';'): + cookies = cookies.rstrip('; ') + cookies = cookies.split(';') + # r = match(r'.*?=([^=]+)=', cookies) + # if not r: # 只有一个 + # cookies = [cookies.rstrip(',;')] + # else: + # s = match(r'.*([,;]).*', r.group(1)).group(1) + # cookies = cookies.rstrip(s).split(s) + + for attr in cookies: attr_val = attr.strip().split('=', 1) c_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else True cookies = _dict_cookies_to_tuple(c_dict) @@ -314,8 +329,7 @@ def set_browser_cookies(page, cookies): tmp.append(i) for i in range(len(tmp)): - d = ''.join(tmp[i:]) - cookie['domain'] = d + cookie['domain'] = ''.join(tmp[i:]) page.run_cdp_loaded('Network.setCookie', **cookie) if is_cookie_in_driver(page, cookie): break @@ -374,9 +388,92 @@ def get_blob(page, url, as_bytes=True): return result -def tree(ele_or_page): +def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None): + """把当前页面保存为文件,如果path和name参数都为None,只返回文本 + :param tab: Tab或Page对象 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :param as_pdf: 为Ture保存为pdf,否则为mhtml且忽略kwargs参数 + :param kwargs: pdf生成参数 + :return: as_pdf为True时返回bytes,否则返回文件文本 + """ + if name: + if name.endswith('.pdf'): + name = name[:-4] + as_pdf = True + elif name.endswith('.mhtml'): + name = name[:-6] + as_pdf = False + + if path: + path = Path(path) + if path.suffix.lower() == '.mhtml': + name = path.stem + path = path.parent + as_pdf = False + elif path.suffix.lower() == '.pdf': + name = path.stem + path = path.parent + as_pdf = True + + return get_pdf(tab, path, name, kwargs) if as_pdf else get_mhtml(tab, path, name) + + +def get_mhtml(page, path=None, name=None): + """把当前页面保存为mhtml文件,如果path和name参数都为None,只返回mhtml文本 + :param page: 要保存的页面对象 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :return: mhtml文本 + """ + r = page.run_cdp('Page.captureSnapshot')['data'] + if path is None and name is None: + return r + + path = path or '.' + Path(path).mkdir(parents=True, exist_ok=True) + name = make_valid_name(name or page.title) + with open(f'{path}{sep}{name}.mhtml', 'w', encoding='utf-8') as f: + f.write(r.replace('\r\n', '\n')) + return r + + +def get_pdf(page, path=None, name=None, kwargs=None): + """把当前页面保存为pdf文件,如果path和name参数都为None,只返回字节 + :param page: 要保存的页面对象 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :param kwargs: pdf生成参数 + :return: pdf文本 + """ + if not kwargs: + kwargs = {} + kwargs['transferMode'] = 'ReturnAsBase64' + if 'printBackground' not in kwargs: + kwargs['printBackground'] = True + try: + r = page.run_cdp('Page.printToPDF', **kwargs)['data'] + except: + raise RuntimeError('保存失败,可能浏览器版本不支持。') + from base64 import b64decode + r = b64decode(r) + if path is None and name is None: + return r + + path = path or '.' + Path(path).mkdir(parents=True, exist_ok=True) + name = make_valid_name(name or page.title) + with open(f'{path}{sep}{name}.pdf', 'wb') as f: + f.write(r) + return r + + +def tree(ele_or_page, text=False, show_js=False, show_css=False): """把页面或元素对象DOM结构打印出来 :param ele_or_page: 页面或元素对象 + :param text: 是否打印文本,输入数字可指定打印文本长度上线 + :param show_js: 打印文本时是否包含<script>内文本,text参数为False时无效 + :param show_css: 打印文本时是否包含<style>内文本,text参数为False时无效 :return: None """ @@ -396,13 +493,33 @@ def tree(ele_or_page): e = list_ele[i] attrs = ' '.join([f"{k}='{v}'" for k, v in e.attrs.items()]) - print(f'{new_body}{tail}<{e.tag} {attrs}>'.replace('\n', ' ')) + show_text = f'{new_body}{tail}<{e.tag} {attrs}>'.replace('\n', ' ') + if text: + t = e('x:/text()') + if t: + t = t.replace('\n', ' ') + if (e.tag not in ('script', 'style') or (e.tag == 'script' and show_js) + or (e.tag == 'style' and show_css)): + if text is not True: + t = t[:text] + show_text = f'{show_text} {t}' + print(show_text) _tree(e, new_last_one, new_body) ele = ele_or_page.s_ele() attrs = ' '.join([f"{k}='{v}'" for k, v in ele.attrs.items()]) - print(f'<{ele.tag} {attrs}>'.replace('\n', ' ')) + show_text = f'<{ele.tag} {attrs}>'.replace('\n', ' ') + if text: + t = ele('x:/text()') + if t: + t = t.replace('\n', ' ') + if (ele.tag not in ('script', 'style') or (ele.tag == 'script' and show_js) + or (ele.tag == 'style' and show_css)): + if text is not True: + t = t[:text] + show_text = f'{show_text} {t}' + print(show_text) _tree(ele) diff --git a/DrissionPage/_functions/web.pyi b/DrissionPage/_functions/web.pyi index 8ce0c3a..3fdca32 100644 --- a/DrissionPage/_functions/web.pyi +++ b/DrissionPage/_functions/web.pyi @@ -6,7 +6,8 @@ @License : BSD 3-Clause. """ from http.cookiejar import Cookie -from typing import Union +from pathlib import Path +from typing import Union, Optional from requests import Session from requests.cookies import RequestsCookieJar @@ -14,6 +15,8 @@ from requests.cookies import RequestsCookieJar from .._base.base import DrissionElement, BaseParser from .._elements.chromium_element import ChromiumElement from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_page import ChromiumPage +from .._pages.chromium_tab import ChromiumTab def get_ele_txt(e: DrissionElement) -> str: ... @@ -52,7 +55,28 @@ def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ... def get_blob(page: ChromiumBase, url: str, as_bytes: bool = True) -> bytes: ... -def tree(ele_or_page: BaseParser) -> None: ... +def save_page(tab: Union[ChromiumPage, ChromiumTab], + path: Union[Path, str, None] = None, + name: Optional[str] = None, + as_pdf: bool = False, + kwargs: dict = None) -> Union[bytes, str]: ... + + +def get_mhtml(page: Union[ChromiumPage, ChromiumTab], + path: Optional[Path] = None, + name: Optional[str] = None) -> Union[bytes, str]: ... + + +def get_pdf(page: Union[ChromiumPage, ChromiumTab], + path: Optional[Path] = None, + name: Optional[str] = None, + kwargs: dict = None) -> Union[bytes, str]: ... + + +def tree(ele_or_page: BaseParser, + text: Union[int, bool] = False, + show_js: bool = False, + show_css: bool = False) -> None: ... def format_headers(txt: str) -> dict: ... diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index 027241e..96a1173 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -30,7 +30,7 @@ from .._units.scroller import PageScroller from .._units.setter import ChromiumBaseSetter from .._units.states import PageStates from .._units.waiter import BaseWaiter -from ..errors import ContextLostError, CDPError, PageDisconnectedError, ElementNotFoundError, ElementLostError +from ..errors import ContextLostError, CDPError, PageDisconnectedError, ElementLostError __ERROR__ = 'error' @@ -478,7 +478,7 @@ class ChromiumBase(BasePage): def cookies(self, as_dict=False, all_domains=False, all_info=False): """返回cookies信息 - :param as_dict: 为True时以dict格式返回,为False时返回list且all_info无效 + :param as_dict: 为True时以dict格式返回且all_info无效,为False时返回list :param all_domains: 是否返回所有域的cookies :param all_info: 是否返回所有信息,为False时只返回name、value、domain :return: cookies信息 @@ -517,14 +517,7 @@ class ChromiumBase(BasePage): :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, locator, index=index) - if isinstance(r, NoneElement): - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 's_ele()', {'locator': locator}) - else: - r.method = 's_ele()' - r.args = {'locator': locator} - return r + return make_session_ele(self, locator, index=index, method='s_ele()') def s_eles(self, locator): """查找所有符合条件的元素以SessionElement列表形式返回 @@ -585,6 +578,9 @@ class ChromiumBase(BasePage): if r is not False: break + elif nIds[__ERROR__] == 'connection disconnected': + raise PageDisconnectedError + if perf_counter() >= end_time: return NoneElement(self) if index is not None else [] @@ -592,9 +588,11 @@ class ChromiumBase(BasePage): timeout = end_time - perf_counter() timeout = .5 if timeout <= 0 else timeout result = self.driver.run('DOM.performSearch', query=loc, _timeout=timeout, includeUserAgentShadowDOM=True) - if not result or __ERROR__ not in result: + if result and __ERROR__ not in result: num = result['resultCount'] search_ids.append(result['searchId']) + elif result and result[__ERROR__] == 'connection disconnected': + raise PageDisconnectedError for _id in search_ids: self._driver.run('DOM.discardSearchResults', searchId=_id) @@ -1065,7 +1063,7 @@ class ChromiumBase(BasePage): name = f'{self.title}.jpg' elif not name.endswith(('.jpg', '.jpeg', '.png', '.webp')): name = f'{name}.jpg' - path = f'{path}{sep}{name}' + path = f'{path}{sep}{make_valid_name(name)}' path = Path(path) pic_type = path.suffix.lower() @@ -1117,50 +1115,6 @@ class ChromiumBase(BasePage): f.write(png) return str(path.absolute()) - # --------------------即将废弃--------------------- - - @property - def page_load_strategy(self): - return self._load_mode - - @property - def is_alive(self): - return self.states.is_alive - - @property - def is_loading(self): - """返回页面是否正在加载状态""" - return self._is_loading - - @property - def ready_state(self): - return self._ready_state - - @property - def size(self): - """返回页面总宽高,格式:(宽, 高)""" - return self.rect.size - - def get_session_storage(self, item=None): - return self.session_storage(item) - - def get_local_storage(self, item=None): - return self.local_storage(item) - - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - return self.cookies(as_dict=as_dict, all_domains=all_domains, all_info=all_info) - - def upload(self, loc_or_ele, file_paths, by_js=False): - """触发上传文件选择框并自动填入指定路径 - :param loc_or_ele: 被点击后会触发文件选择框的元素或它的定位符 - :param file_paths: 文件路径,如果上传框支持多文件,可传入列表或字符串,字符串时多个文件用回车分隔 - :param by_js: 是否用js方式点击 - :return: None - """ - self.set.upload_files(file_paths) - self.ele(loc_or_ele).click(by_js=by_js) - self.wait.upload_paths_inputted() - class Timeout(object): """用于保存d模式timeout信息的类""" @@ -1228,50 +1182,3 @@ def close_privacy_dialog(page, tid): except: pass - - -def get_mhtml(page, path=None, name=None): - """把当前页面保存为mhtml文件,如果path和name参数都为None,只返回mhtml文本 - :param page: 要保存的页面对象 - :param path: 保存路径,为None且name不为None时保存在当前路径 - :param name: 文件名,为None且path不为None时用title属性值 - :return: mhtml文本 - """ - r = page.run_cdp('Page.captureSnapshot')['data'] - if path is None and name is None: - return r - path = path or '.' - Path(path).mkdir(parents=True, exist_ok=True) - name = make_valid_name(name or page.title) - with open(f'{path}{sep}{name}.mhtml', 'w', encoding='utf-8') as f: - f.write(r.replace('\r\n', '\n')) - return r - - -def get_pdf(page, path=None, name=None, kwargs=None): - """把当前页面保存为pdf文件,如果path和name参数都为None,只返回字节 - :param page: 要保存的页面对象 - :param path: 保存路径,为None且name不为None时保存在当前路径 - :param name: 文件名,为None且path不为None时用title属性值 - :param kwargs: pdf生成参数 - :return: pdf文本 - """ - if not kwargs: - kwargs = {} - kwargs['transferMode'] = 'ReturnAsBase64' - if 'printBackground' not in kwargs: - kwargs['printBackground'] = True - try: - r = page.run_cdp('Page.printToPDF', **kwargs)['data'] - except: - raise RuntimeError('保存失败,可能浏览器版本不支持。') - from base64 import b64decode - r = b64decode(r) - if path is None and name is None: - return r - path = path or '.' - Path(path).mkdir(parents=True, exist_ok=True) - name = make_valid_name(name or page.title) - with open(f'{path}{sep}{name}.pdf', 'wb') as f: - f.write(r) - return r diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index ddb89b7..17b14bb 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -14,6 +14,7 @@ from .._base.browser import Browser from .._base.driver import Driver from .._elements.chromium_element import ChromiumElement from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList, ChromiumElementsList from .._pages.chromium_frame import ChromiumFrame from .._pages.chromium_page import ChromiumPage from .._units.actions import Actions @@ -187,21 +188,20 @@ class ChromiumBase(BasePage): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... + timeout: float = None) -> ChromiumElementsList: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _find_elements(self, locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, index: Optional[int] = 1, relative: bool = False, - raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, List[Union[ChromiumElement, ChromiumFrame]]]: ... + raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, ChromiumElementsList]: ... def refresh(self, ignore_cache: bool = False) -> None: ... diff --git a/DrissionPage/_pages/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py index 500859f..f3e1ab5 100644 --- a/DrissionPage/_pages/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -58,11 +58,11 @@ class ChromiumFrame(ChromiumBase): self._rect = None self._type = 'ChromiumFrame' - end_time = perf_counter() + 2 - while perf_counter() < end_time: # todo: 优化 - if self.url not in (None, 'about:blank'): - break - sleep(.1) + # end_time = perf_counter() + 2 + # while perf_counter() < end_time: + # if self.url not in (None, 'about:blank'): + # break + # sleep(.1) def __call__(self, locator, index=1, timeout=None): """在内部查找元素 @@ -343,33 +343,6 @@ class ChromiumFrame(ChromiumBase): except: return None - # ----------------即将废弃----------------- - @property - def is_alive(self): - """返回是否仍可用""" - return self.states.is_alive - - @property - def page_size(self): - """返回frame内页面尺寸,格式:(宽,, 高)""" - return self.rect.size - - @property - def size(self): - """返回frame元素大小""" - return self.frame_ele.rect.size - - @property - def location(self): - """返回frame元素左上角的绝对坐标""" - return self.frame_ele.rect.location - - @property - def locations(self): - """返回用于获取元素位置的对象""" - return self.frame_ele.rect - # ----------------即将废弃结束----------------- - def refresh(self): """刷新frame页面""" self.doc_ele.run_js('this.location.reload();') @@ -480,7 +453,7 @@ class ChromiumFrame(ChromiumBase): """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 查找范围不限同级元素,而是整个DOM文档 :param locator: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ @@ -490,7 +463,7 @@ class ChromiumFrame(ChromiumBase): """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 查找范围不限同级元素,而是整个DOM文档 :param locator: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ @@ -586,7 +559,7 @@ class ChromiumFrame(ChromiumBase): def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None): """在frame内查找单个元素 :param locator: 定位符或元素对象 - :param timeout: 查找超时时间 + :param timeout: 查找超时时间(秒) :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 diff --git a/DrissionPage/_pages/chromium_frame.pyi b/DrissionPage/_pages/chromium_frame.pyi index 7a4c772..b0c1a83 100644 --- a/DrissionPage/_pages/chromium_frame.pyi +++ b/DrissionPage/_pages/chromium_frame.pyi @@ -10,9 +10,10 @@ from typing import Union, Tuple, List, Any, Optional from .chromium_base import ChromiumBase from .chromium_page import ChromiumPage -from .chromium_tab import ChromiumTab, WebPageTab +from .chromium_tab import ChromiumTab from .web_page import WebPage from .._elements.chromium_element import ChromiumElement +from .._functions.elements import ChromiumElementsList from .._units.listener import FrameListener from .._units.rect import FrameRect from .._units.scroller import FrameScroller @@ -214,7 +215,6 @@ class ChromiumFrame(ChromiumBase): timeout: float = None, index: Optional[int] = 1, relative: bool = False, - raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ... + raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, None, ChromiumElementsList]: ... def _is_inner_frame(self) -> bool: ... diff --git a/DrissionPage/_pages/chromium_page.py b/DrissionPage/_pages/chromium_page.py index f2b0fe1..7bd7277 100644 --- a/DrissionPage/_pages/chromium_page.py +++ b/DrissionPage/_pages/chromium_page.py @@ -9,14 +9,15 @@ from pathlib import Path from threading import Lock from time import sleep, perf_counter -from requests import get +from requests import Session from .._base.browser import Browser from .._configs.chromium_options import ChromiumOptions from .._functions.browser import connect_browser from .._functions.settings import Settings from .._functions.tools import PortFinder -from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf, Timeout +from .._functions.web import save_page +from .._pages.chromium_base import ChromiumBase, Timeout from .._pages.chromium_tab import ChromiumTab from .._units.setter import ChromiumPageSetter from .._units.waiter import PageWaiter @@ -27,13 +28,12 @@ class ChromiumPage(ChromiumBase): """用于管理浏览器的类""" _PAGES = {} - def __new__(cls, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None): + def __new__(cls, addr_or_opts=None, tab_id=None, timeout=None): """ :param addr_or_opts: 浏览器地址:端口、ChromiumOptions对象或端口数字(int) :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间(秒) """ - addr_or_opts = addr_or_opts or addr_driver_opts opt = handle_options(addr_or_opts) is_exist, browser_id = run_browser(opt) if browser_id in cls._PAGES: @@ -49,7 +49,7 @@ class ChromiumPage(ChromiumBase): cls._PAGES[browser_id] = r return r - def __init__(self, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None): + def __init__(self, addr_or_opts=None, tab_id=None, timeout=None): """ :param addr_or_opts: 浏览器地址:端口、ChromiumOptions对象或端口数字(int) :param tab_id: 要控制的标签页id,不指定默认为激活的 @@ -76,9 +76,13 @@ class ChromiumPage(ChromiumBase): if self._is_exist and self._chromium_options._headless is False and 'headless' in r['userAgent'].lower(): self._browser.quit(3) connect_browser(self._chromium_options) - ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'}) - ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1] - self._browser = Browser(self._chromium_options.address, ws, self) + s = Session() + s.trust_env = False + ws = s.get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'}) + bid = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + self._browser = Browser(self._chromium_options.address, bid, self) + ws.close() + s.close() def _d_set_runtime_settings(self): """设置运行时用到的属性""" @@ -154,7 +158,7 @@ class ChromiumPage(ChromiumBase): :param kwargs: pdf生成参数 :return: as_pdf为True时返回bytes,否则返回文件文本 """ - return get_pdf(self, path, name, kwargs) if as_pdf else get_mhtml(self, path, name) + return save_page(self, path, name, as_pdf, kwargs) def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', as_id=False): """获取一个标签页对象,id_or_num不为None时,后面几个参数无效 @@ -272,30 +276,6 @@ class ChromiumPage(ChromiumBase): def __repr__(self): return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>' - # ----------即将废弃----------- - def close_other_tabs(self, tabs_or_ids=None): - """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 - :param tabs_or_ids: 要保留的标签页对象或id,可传入列表或元组,为None时保存当前页 - :return: None - """ - self.close_tabs(tabs_or_ids, True) - - @property - def tabs(self): - """返回所有标签页id组成的列表""" - return self.browser.tab_ids - - def find_tabs(self, title=None, url=None, tab_type=None, single=True): - """查找符合条件的tab,返回它们组成的列表 - :param title: 要匹配title的文本 - :param url: 要匹配url的文本 - :param tab_type: tab类型,可用列表输入多个 - :param single: 是否返回首个结果的id,为False返回所有信息 - :return: tab id或tab列表 - """ - r = self._browser.find_tabs(title, url, tab_type) - return r[0]['id'] if r and single else r - def handle_options(addr_or_opts): """设置浏览器启动属性 @@ -336,12 +316,16 @@ def run_browser(chromium_options): """连接浏览器""" is_exist = connect_browser(chromium_options) try: - ws = get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'}) + s = Session() + s.trust_env = False + ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'}) if not ws: raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') browser_id = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + ws.close() + s.close() except KeyError: - raise BrowserConnectError('浏览器版本太旧,请升级。') + raise BrowserConnectError('浏览器版本太旧或此浏览器不支持接管。') except: raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') return is_exist, browser_id diff --git a/DrissionPage/_pages/chromium_tab.py b/DrissionPage/_pages/chromium_tab.py index e496728..d965a5d 100644 --- a/DrissionPage/_pages/chromium_tab.py +++ b/DrissionPage/_pages/chromium_tab.py @@ -11,8 +11,8 @@ from time import sleep from .._base.base import BasePage from .._configs.session_options import SessionOptions from .._functions.settings import Settings -from .._functions.web import set_session_cookies, set_browser_cookies -from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf +from .._functions.web import set_session_cookies, set_browser_cookies, save_page +from .._pages.chromium_base import ChromiumBase from .._pages.session_page import SessionPage from .._units.setter import TabSetter, WebPageTabSetter from .._units.waiter import TabWaiter @@ -91,7 +91,7 @@ class ChromiumTab(ChromiumBase): :param kwargs: pdf生成参数 :return: as_pdf为True时返回bytes,否则返回文件文本 """ - return get_pdf(self, path, name, kwargs) if as_pdf else get_mhtml(self, path, name) + return save_page(self, path, name, as_pdf, kwargs) def __repr__(self): return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>' @@ -399,7 +399,3 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): def __repr__(self): return f'<WebPageTab browser_id={self.browser.id} tab_id={self.tab_id}>' - - # --------即将废弃------- - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - return self.cookies(as_dict=as_dict, all_domains=all_domains, all_info=all_info) diff --git a/DrissionPage/_pages/chromium_tab.pyi b/DrissionPage/_pages/chromium_tab.pyi index 2a5f6a5..2161070 100644 --- a/DrissionPage/_pages/chromium_tab.pyi +++ b/DrissionPage/_pages/chromium_tab.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Union, Tuple, Any, List, Optional +from typing import Union, Tuple, Any, Optional from requests import Session, Response @@ -18,6 +18,7 @@ from .web_page import WebPage from .._base.browser import Browser from .._elements.chromium_element import ChromiumElement from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList, ChromiumElementsList from .._units.rect import TabRect from .._units.setter import TabSetter, WebPageTabSetter from .._units.waiter import TabWaiter @@ -150,13 +151,13 @@ class WebPageTab(SessionPage, ChromiumTab): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... + timeout: float = None) -> Union[SessionElementsList, ChromiumElementsList]: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... @@ -199,5 +200,4 @@ class WebPageTab(SessionPage, ChromiumTab): index: Optional[int] = 1, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, SessionElement, ChromiumFrame, List[SessionElement], List[ - Union[ChromiumElement, ChromiumFrame]]]: ... + -> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]: ... diff --git a/DrissionPage/_pages/session_page.py b/DrissionPage/_pages/session_page.py index a2d8694..4f699c8 100644 --- a/DrissionPage/_pages/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -354,10 +354,6 @@ class SessionPage(BasePage): def __repr__(self): return f'<SessionPage url={self.url}>' - # ---------即将废弃--------- - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - return self.cookies(as_dict=as_dict, all_domains=all_domains, all_info=all_info) - def check_headers(kwargs, headers, arg): """检查kwargs或headers中是否有arg所示属性""" diff --git a/DrissionPage/_pages/session_page.pyi b/DrissionPage/_pages/session_page.pyi index e229a36..16f95be 100644 --- a/DrissionPage/_pages/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Any, Union, Tuple, List, Optional +from typing import Any, Union, Tuple, Optional from requests import Session, Response from requests.structures import CaseInsensitiveDict @@ -14,6 +14,7 @@ from requests.structures import CaseInsensitiveDict from .._base.base import BasePage from .._configs.session_options import SessionOptions from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList from .._units.setter import SessionPageSetter @@ -97,21 +98,20 @@ class SessionPage(BasePage): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[SessionElement]: ... + timeout: float = None) -> SessionElementsList: ... def s_ele(self, locator: Union[Tuple[str, str], str, SessionElement] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, loc: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, loc: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _find_elements(self, locator: Union[Tuple[str, str], str, SessionElement], timeout: float = None, index: Optional[int] = 1, relative: bool = True, - raise_err: bool = None) \ - -> Union[SessionElement, List[SessionElement]]: ... + raise_err: bool = None) -> Union[SessionElement, SessionElementsList]: ... def cookies(self, as_dict: bool = False, diff --git a/DrissionPage/_pages/web_page.py b/DrissionPage/_pages/web_page.py index 3a35e18..ea9e83d 100644 --- a/DrissionPage/_pages/web_page.py +++ b/DrissionPage/_pages/web_page.py @@ -17,15 +17,14 @@ from .._units.setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): """整合浏览器和request的页面类""" - def __new__(cls, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None): + def __new__(cls, mode='d', timeout=None, chromium_options=None, session_or_options=None): """初始化函数 :param mode: 'd' 或 's',即driver模式和session模式 :param timeout: 超时时间(秒),d模式时为寻找元素时间,s模式时为连接时间,默认10秒 :param chromium_options: Driver对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ - opts = chromium_options or driver_or_options - return super().__new__(cls, opts) + return super().__new__(cls, chromium_options) def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None): """初始化函数 @@ -402,7 +401,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param locator: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,d模式专用 + :param timeout: 查找元素超时时间(秒),d模式专用 :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 @@ -415,7 +414,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def quit(self, timeout=5, force=True): """关闭浏览器和Session - :param timeout: 等待浏览器关闭超时时间 + :param timeout: 等待浏览器关闭超时时间(秒) :param force: 关闭超时是否强制终止进程 :return: None """ @@ -431,7 +430,3 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __repr__(self): return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>' - - # -------即将废弃-------- - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - return self.cookies(as_dict=as_dict, all_domains=all_domains, all_info=all_info) diff --git a/DrissionPage/_pages/web_page.pyi b/DrissionPage/_pages/web_page.pyi index bc0be0b..e1046d8 100644 --- a/DrissionPage/_pages/web_page.pyi +++ b/DrissionPage/_pages/web_page.pyi @@ -19,6 +19,7 @@ from .._configs.chromium_options import ChromiumOptions from .._configs.session_options import SessionOptions from .._elements.chromium_element import ChromiumElement from .._elements.session_element import SessionElement +from .._functions.elements import SessionElementsList, ChromiumElementsList from .._units.setter import WebPageSetter @@ -108,13 +109,13 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def eles(self, locator: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... + timeout: float = None) -> Union[SessionElementsList, ChromiumElementsList]: ... def s_ele(self, locator: Union[Tuple[str, str], str] = None, index: int = 1) -> SessionElement: ... - def s_eles(self, locator: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... @@ -185,8 +186,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): index: Optional[int] = 1, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, SessionElement, ChromiumFrame, List[SessionElement], - List[Union[ChromiumElement, ChromiumFrame]]]: ... + -> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]: ... def _set_start_options(self, dr_opt: Union[Driver, bool, None], diff --git a/DrissionPage/_units/actions.py b/DrissionPage/_units/actions.py index d119629..e1c951c 100644 --- a/DrissionPage/_units/actions.py +++ b/DrissionPage/_units/actions.py @@ -24,6 +24,7 @@ class Actions: self.modifier = 0 # 修饰符,Alt=1, Ctrl=2, Meta/Command=4, Shift=8 self.curr_x = 0 # 视口坐标 self.curr_y = 0 + self._holding = 'left' def move_to(self, ele_or_loc, offset_x=0, offset_y=0, duration=.5): """鼠标移动到元素中点,或页面上的某个绝对坐标。可设置偏移量 @@ -86,7 +87,7 @@ class Actions: t = perf_counter() self.curr_x = x self.curr_y = y - self._dr.run('Input.dispatchMouseEvent', type='mouseMoved', + self._dr.run('Input.dispatchMouseEvent', type='mouseMoved', button=self._holding, x=self.curr_x, y=self.curr_y, modifiers=self.modifier) ss = .02 - perf_counter() + t if ss > 0: @@ -140,7 +141,7 @@ class Actions: :return: self """ if on_ele: - self.move_to(on_ele, duration=0) + self.move_to(on_ele, duration=.2) self._release('left') return self @@ -158,7 +159,7 @@ class Actions: :return: self """ if on_ele: - self.move_to(on_ele, duration=0) + self.move_to(on_ele, duration=.2) self._release('right') return self @@ -176,7 +177,7 @@ class Actions: :return: self """ if on_ele: - self.move_to(on_ele, duration=0) + self.move_to(on_ele, duration=.2) self._release('middle') return self @@ -188,9 +189,10 @@ class Actions: :return: self """ if on_ele: - self.move_to(on_ele, duration=0) + self.move_to(on_ele, duration=.2) self._dr.run('Input.dispatchMouseEvent', type='mousePressed', button=button, clickCount=count, x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + self._holding = button return self def _release(self, button): @@ -200,17 +202,18 @@ class Actions: """ self._dr.run('Input.dispatchMouseEvent', type='mouseReleased', button=button, clickCount=1, x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + self._holding = 'left' return self - def scroll(self, delta_x=0, delta_y=0, on_ele=None): + def scroll(self, delta_y=0, delta_x=0, on_ele=None): """滚动鼠标滚轮,可先移动到元素上 - :param delta_x: 滚轮变化值x :param delta_y: 滚轮变化值y + :param delta_x: 滚轮变化值x :param on_ele: ChromiumElement元素 :return: self """ if on_ele: - self.move_to(on_ele, duration=0) + self.move_to(on_ele, duration=.2) self._dr.run('Input.dispatchMouseEvent', type='mouseWheel', x=self.curr_x, y=self.curr_y, deltaX=delta_x, deltaY=delta_y, modifiers=self.modifier) return self diff --git a/DrissionPage/_units/actions.pyi b/DrissionPage/_units/actions.pyi index 4904a35..bc610eb 100644 --- a/DrissionPage/_units/actions.pyi +++ b/DrissionPage/_units/actions.pyi @@ -50,11 +50,12 @@ class Actions: self.modifier: int = ... self.curr_x: int = ... self.curr_y: int = ... + self._holding: str = ... - def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[int, int], str], - offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ... + def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[float, float], str], + offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions: ... - def move(self, offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ... + def move(self, offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions: ... def click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... @@ -81,7 +82,7 @@ class Actions: def _release(self, button: str) -> Actions: ... - def scroll(self, delta_x: int = 0, delta_y: int = 0, + def scroll(self, delta_y: int = 0, delta_x: int = 0, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... def up(self, pixel: int) -> Actions: ... diff --git a/DrissionPage/_units/clicker.py b/DrissionPage/_units/clicker.py index 589465e..6006b4c 100644 --- a/DrissionPage/_units/clicker.py +++ b/DrissionPage/_units/clicker.py @@ -87,8 +87,8 @@ class Clicker(object): x = rect[1][0] - (rect[1][0] - rect[0][0]) / 2 y = rect[0][0] + 3 try: - r = self._ele.owner.run_cdp('DOM.getNodeForLocation', x=x, y=y, includeUserAgentShadowDOM=True, - ignorePointerEventsNone=True) + r = self._ele.owner.run_cdp('DOM.getNodeForLocation', x=int(x), y=int(y), + includeUserAgentShadowDOM=True, ignorePointerEventsNone=True) if r['backendNodeId'] != self._ele._backend_id: vx, vy = self._ele.rect.viewport_midpoint else: @@ -113,11 +113,19 @@ class Clicker(object): x, y = self._ele.rect.viewport_click_point self._click(x, y, 'right') - def middle(self): - """中键单击""" + def middle(self, get_tab=True): + """中键单击,默认返回新出现的tab对象 + :param get_tab: 是否返回新tab对象,为False则返回None + :return: Tab对象或None + """ self._ele.owner.scroll.to_see(self._ele) x, y = self._ele.rect.viewport_click_point self._click(x, y, 'middle') + if get_tab: + tid = self._ele.page.wait.new_tab() + if not tid: + raise RuntimeError('没有出现新标签页。') + return self._ele.page.get_tab(tid) def at(self, offset_x=None, offset_y=None, button='left', count=1): """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中间点 @@ -196,19 +204,5 @@ class Clicker(object): """ self._ele.owner.run_cdp('Input.dispatchMouseEvent', type='mousePressed', x=client_x, y=client_y, button=button, clickCount=count, _ignore=AlertExistsError) - # sleep(.05) self._ele.owner.run_cdp('Input.dispatchMouseEvent', type='mouseReleased', x=client_x, y=client_y, button=button, _ignore=AlertExistsError) - - # -------------即将废弃-------------- - - def twice(self): - """双击元素""" - self.at(count=2) - - def multiple(self, times=2): - """多次点击 - :param times: 默认双击 - :return: None - """ - self.at(count=times) diff --git a/DrissionPage/_units/clicker.pyi b/DrissionPage/_units/clicker.pyi index e6ca30f..15fc212 100644 --- a/DrissionPage/_units/clicker.pyi +++ b/DrissionPage/_units/clicker.pyi @@ -23,7 +23,7 @@ class Clicker(object): def right(self) -> None: ... - def middle(self) -> None: ... + def middle(self, get_tab: bool = True) -> Union[ChromiumTab, WebPageTab, None]: ... def at(self, offset_x: float = None, @@ -39,10 +39,10 @@ class Clicker(object): suffix: str = None, new_tab: bool = False, by_js: bool = False, - timeout:float=None) -> DownloadMission: ... + timeout: float = None) -> DownloadMission: ... def to_upload(self, file_paths: Union[str, Path, list, tuple], by_js: bool = False) -> None: ... - def for_new_tab(self, by_js:bool=False)->Union[ChromiumTab, WebPageTab]:... + def for_new_tab(self, by_js: bool = False) -> Union[ChromiumTab, WebPageTab]: ... def _click(self, client_x: float, client_y: float, button: str = 'left', count: int = 1) -> None: ... diff --git a/DrissionPage/_units/downloader.py b/DrissionPage/_units/downloader.py index 73dc956..ec41cc4 100644 --- a/DrissionPage/_units/downloader.py +++ b/DrissionPage/_units/downloader.py @@ -292,7 +292,7 @@ class DownloadMission(object): def wait(self, show=True, timeout=None, cancel_if_timeout=True): """等待任务结束 :param show: 是否显示下载信息 - :param timeout: 超时时间,为None则无限等待 + :param timeout: 超时时间(秒),为None则无限等待 :param cancel_if_timeout: 超时时是否取消任务 :return: 等待成功返回完整路径,否则返回False """ diff --git a/DrissionPage/_units/listener.py b/DrissionPage/_units/listener.py index c552fbf..ef49b35 100644 --- a/DrissionPage/_units/listener.py +++ b/DrissionPage/_units/listener.py @@ -119,7 +119,7 @@ class Listener(object): def wait(self, count=1, timeout=None, fit_count=True, raise_err=None): """等待符合要求的数据包到达指定数量 :param count: 需要捕捉的数据包数量 - :param timeout: 超时时间,为None无限等待 + :param timeout: 超时时间(秒),为None无限等待 :param fit_count: 是否必须满足总数要求,发生超时,为True返回False,为False返回已捕捉到的数据包 :param raise_err: 超时时是否抛出错误,为None时根据Settings设置 :return: count为1时返回数据包对象,大于1时返回列表,超时且fit_count为True时返回False @@ -128,7 +128,7 @@ class Listener(object): raise RuntimeError('监听未启动或已暂停。') if not timeout: while self._caught.qsize() < count: - sleep(.05) + sleep(.03) fail = False else: @@ -140,6 +140,7 @@ class Listener(object): if self._caught.qsize() >= count: fail = False break + sleep(.03) if fail: if fit_count or not self._caught.qsize(): @@ -158,7 +159,7 @@ class Listener(object): def steps(self, count=None, timeout=None, gap=1): """用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页) :param count: 需捕获的数据包总数,为None表示无限 - :param timeout: 每个数据包等待时间,为None表示无限 + :param timeout: 每个数据包等待时间(秒),为None表示无限 :param gap: 每接收到多少个数据包返回一次数据 :return: 用于在接收到监听目标时触发动作的可迭代对象 """ @@ -177,7 +178,7 @@ class Listener(object): caught += gap if caught >= count: return - sleep(.05) + sleep(.03) def stop(self): """停止监听,清空已监听到的列表""" @@ -218,7 +219,7 @@ class Listener(object): def wait_silent(self, timeout=None, targets_only=False, limit=0): """等待所有请求结束 - :param timeout: 超时,为None时无限等待 + :param timeout: 超时时间(秒),为None时无限等待 :param targets_only: 是否只等待targets指定的请求结束 :param limit: 剩下多少个连接时视为结束 :return: 返回是否等待成功 @@ -250,13 +251,13 @@ class Listener(object): self._target_id = target_id self._address = address self._owner = owner - debug = False + # debug = False if self._driver: - debug = self._driver._debug + # debug = self._driver._debug self._driver.stop() if self.listening: self._driver = Driver(self._target_id, 'page', self._address) - self._driver._debug = debug + # self._driver._debug = debug self._driver.run('Network.enable') self._set_callback() @@ -479,7 +480,7 @@ class DataPacket(object): def wait_extra_info(self, timeout=None): """等待额外的信息加载完成 - :param timeout: 超时时间,None为无限等待 + :param timeout: 超时时间(秒),None为无限等待 :return: 是否等待成功 """ if timeout is None: diff --git a/DrissionPage/_units/rect.py b/DrissionPage/_units/rect.py index 4428a16..b751d4b 100644 --- a/DrissionPage/_units/rect.py +++ b/DrissionPage/_units/rect.py @@ -16,7 +16,7 @@ class ElementRect(object): @property def corners(self): - """返回元素四个角坐标,顺序:坐上、右上、右下、左下,没有大小的元素抛出NoRectError""" + """返回元素四个角坐标,顺序:左上、右上、右下、左下,没有大小的元素抛出NoRectError""" vr = self._get_viewport_rect('border') r = self._ele.owner.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport'] sx = r['pageX'] @@ -25,7 +25,7 @@ class ElementRect(object): @property def viewport_corners(self): - """返回元素四个角视口坐标,顺序:坐上、右上、右下、左下,没有大小的元素抛出NoRectError""" + """返回元素四个角视口坐标,顺序:左上、右上、右下、左下,没有大小的元素抛出NoRectError""" r = self._get_viewport_rect('border') return (r[0], r[1]), (r[2], r[3]), (r[4], r[5]), (r[6], r[7]) @@ -225,10 +225,10 @@ class FrameRect(object): @property def corners(self): - """返回元素四个角坐标,顺序:坐上、右上、右下、左下""" + """返回元素四个角坐标,顺序:左上、右上、右下、左下""" return self._frame.frame_ele.rect.corners @property def viewport_corners(self): - """返回元素四个角视口坐标,顺序:坐上、右上、右下、左下""" + """返回元素四个角视口坐标,顺序:左上、右上、右下、左下""" return self._frame.frame_ele.rect.viewport_corners diff --git a/DrissionPage/_units/selector.py b/DrissionPage/_units/selector.py index fe5a23a..c97aded 100644 --- a/DrissionPage/_units/selector.py +++ b/DrissionPage/_units/selector.py @@ -23,7 +23,7 @@ class SelectElement(object): def __call__(self, text_or_index, timeout=None): """选定下拉列表中子元素 :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 + :param timeout: 超时时间(秒),不输入默认实用页面超时时间 :return: None """ para_type = 'index' if isinstance(text_or_index, int) else 'text' @@ -82,7 +82,7 @@ class SelectElement(object): def by_text(self, text, timeout=None): """此方法用于根据text值选择项。当元素是多选列表时,可以接收list或tuple :param text: text属性值,传入list或tuple可选择多项 - :param timeout: 超时时间,为None默认使用页面超时时间 + :param timeout: 超时时间(秒),为None默认使用页面超时时间 :return: 是否选择成功 """ return self._select(text, 'text', False, timeout) diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py index 8e576b2..9f0e443 100644 --- a/DrissionPage/_units/setter.py +++ b/DrissionPage/_units/setter.py @@ -14,7 +14,7 @@ from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesS from .._functions.settings import Settings from .._functions.tools import show_or_hide_browser from .._functions.web import format_headers -from ..errors import ElementLostError +from ..errors import ElementLostError, JavaScriptError class BasePageSetter(object): @@ -173,13 +173,6 @@ class ChromiumBaseSetter(BasePageSetter): self._owner.run_cdp('Network.enable') self._owner.run_cdp('Network.setBlockedURLs', urls=urls) - # --------------即将废弃--------------- - - @property - def load_strategy(self): - """返回用于设置页面加载策略的对象""" - return LoadMode(self._owner) - class TabSetter(ChromiumBaseSetter): def __init__(self, owner): @@ -494,6 +487,17 @@ class ChromiumElementSetter(object): value = value.replace('"', r'\"') self._ele.run_js(f'this.{name}="{value}";') + def style(self, name, value): + """设置元素style样式 + :param name: 样式名称 + :param value: 样式值 + :return: None + """ + try: + self._ele.run_js(f'this.style.{name}="{value}";') + except JavaScriptError: + raise ValueError(f'设置失败,请检查属性名{name}') + def innerHTML(self, html): """设置元素innerHTML :param html: html文本 diff --git a/DrissionPage/_units/setter.pyi b/DrissionPage/_units/setter.pyi index b0e6810..f371bf6 100644 --- a/DrissionPage/_units/setter.pyi +++ b/DrissionPage/_units/setter.pyi @@ -172,6 +172,8 @@ class ChromiumElementSetter(object): def property(self, name: str, value: str) -> None: ... + def style(self, name: str, value: str) -> None: ... + def innerHTML(self, html: str) -> None: ... def value(self, value: str) -> None: ... diff --git a/DrissionPage/_units/states.py b/DrissionPage/_units/states.py index a038624..763e27f 100644 --- a/DrissionPage/_units/states.py +++ b/DrissionPage/_units/states.py @@ -18,7 +18,7 @@ class ElementStates(object): @property def is_selected(self): - """返回元素是否被选择""" + """返回列表元素是否被选择""" return self._ele.run_js('return this.selected;') @property @@ -42,9 +42,9 @@ class ElementStates(object): def is_alive(self): """返回元素是否仍在DOM中""" try: - self._ele.attrs - return True - except Exception: + return self._ele.owner.run_cdp('DOM.describeNode', + backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0 + except ElementLostError: return False @property @@ -71,6 +71,11 @@ class ElementStates(object): except CDPError: return False + @property + def is_clickable(self): + """返回元素是否可被模拟点击,从是否有大小、是否可用、是否显示、是否响应点击判断,不判断是否被遮挡""" + return self.has_rect and self.is_enabled and self.is_displayed and self._ele.style('pointer-events') != 'none' + @property def has_rect(self): """返回元素是否拥有位置和大小,没有返回False,有返回四个角在页面中坐标组成的列表""" @@ -96,9 +101,9 @@ class ShadowRootStates(object): def is_alive(self): """返回元素是否仍在DOM中""" try: - self._ele.owner.run_cdp('DOM.describeNode', backendNodeId=self._ele._backend_id) - return True - except Exception: + return self._ele.owner.run_cdp('DOM.describeNode', + backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0 + except ElementLostError: return False diff --git a/DrissionPage/_units/states.pyi b/DrissionPage/_units/states.pyi index 71a673b..d9a0cb3 100644 --- a/DrissionPage/_units/states.pyi +++ b/DrissionPage/_units/states.pyi @@ -41,7 +41,10 @@ class ElementStates(object): def is_covered(self) -> Union[Literal[False], int]: ... @property - def has_rect(self) -> Union[bool, List[Tuple[float, float]]]: ... + def is_clickable(self) -> bool: ... + + @property + def has_rect(self) -> Union[Literal[False], List[Tuple[float, float]]]: ... class ShadowRootStates(object): diff --git a/DrissionPage/_units/waiter.py b/DrissionPage/_units/waiter.py index 4e6e3c4..35bf3b4 100644 --- a/DrissionPage/_units/waiter.py +++ b/DrissionPage/_units/waiter.py @@ -36,7 +36,7 @@ class BaseWaiter(OriginWaiter): def ele_deleted(self, loc_or_ele, timeout=None, raise_err=None): """等待元素从DOM中删除 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 + :param timeout: 超时时间(秒),默认读取页面超时时间 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -46,7 +46,7 @@ class BaseWaiter(OriginWaiter): def ele_displayed(self, loc_or_ele, timeout=None, raise_err=None): """等待元素变成显示状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 + :param timeout: 超时时间(秒),默认读取页面超时时间 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -65,7 +65,7 @@ class BaseWaiter(OriginWaiter): def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): """等待元素变成隐藏状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 + :param timeout: 超时时间(秒),默认读取页面超时时间 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -84,7 +84,7 @@ class BaseWaiter(OriginWaiter): def eles_loaded(self, locators, timeout=None, any_one=False, raise_err=None): """等待元素加载到DOM,可等待全部或任意一个 :param locators: 要等待的元素,输入定位符,用list输入多个 - :param timeout: 超时时间,默认读取页面超时时间 + :param timeout: 超时时间(秒),默认读取页面超时时间 :param any_one: 是否等待到一个就返回 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 成功返回True,失败返回False @@ -119,9 +119,10 @@ class BaseWaiter(OriginWaiter): locators = ((get_loc(locators)[1],) if (isinstance(locators, str) or isinstance(locators, tuple) and locators[0] in by and len(locators) == 2) else [get_loc(l)[1] for l in locators]) + method = any if any_one else all + timeout = self._driver.timeout if timeout is None else timeout end_time = perf_counter() + timeout - method = any if any_one else all while perf_counter() < end_time: if method([_find(l, self._driver.driver) for l in locators]): return True @@ -133,7 +134,7 @@ class BaseWaiter(OriginWaiter): def load_start(self, timeout=None, raise_err=None): """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 + :param timeout: 超时时间(秒),为None时使用页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -141,7 +142,7 @@ class BaseWaiter(OriginWaiter): def doc_loaded(self, timeout=None, raise_err=None): """等待页面加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 + :param timeout: 超时时间(秒),为None时使用页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -158,7 +159,7 @@ class BaseWaiter(OriginWaiter): def download_begin(self, timeout=None, cancel_it=False): """等待浏览器下载开始,可将其拦截 - :param timeout: 超时时间,None使用页面对象超时时间 + :param timeout: 超时时间(秒),None使用页面对象超时时间 :param cancel_it: 是否取消该任务 :return: 成功返回任务对象,失败返回False """ @@ -184,7 +185,7 @@ class BaseWaiter(OriginWaiter): """等待url变成包含或不包含指定文本 :param text: 用于识别的文本 :param exclude: 是否排除,为True时当url不包含text指定文本时返回True - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -194,7 +195,7 @@ class BaseWaiter(OriginWaiter): """等待title变成包含或不包含指定文本 :param text: 用于识别的文本 :param exclude: 是否排除,为True时当title不包含text指定文本时返回True - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -205,7 +206,7 @@ class BaseWaiter(OriginWaiter): :param arg: 要被匹配的属性 :param text: 用于识别的文本 :param exclude: 是否排除,为True时当属性不包含text指定文本时返回True - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -231,7 +232,7 @@ class BaseWaiter(OriginWaiter): def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): """等待页面开始加载或加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 + :param timeout: 超时时间(秒),为None时使用页面timeout属性 :param start: 等待开始还是结束 :param gap: 间隔秒数 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 @@ -251,45 +252,12 @@ class BaseWaiter(OriginWaiter): else: return False - # -----------即将废弃----------- - - def data_packets(self, count=1, timeout=None, fix_count: bool = True): - """等待符合要求的数据包到达指定数量 - :param count: 需要捕捉的数据包数量 - :param timeout: 超时时间,为None无限等待 - :param fix_count: 是否必须满足总数要求,发生超时,为True返回False,为False返回已捕捉到的数据包 - :return: count为1时返回数据包对象,大于1时返回列表,超时且fix_count为True时返回False""" - return self._driver.listen.wait(count, timeout, fix_count) - - def load_complete(self, timeout=None, raise_err=None): - """等待页面加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, start=False, raise_err=raise_err) - - def ele_loaded(self, locator, timeout=None, raise_err=None): - """等待元素加载到DOM - :param locator: 要等待的元素,输入定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 成功返回元素对象,失败返回False - """ - ele = self._driver._ele(locator, raise_err=False, timeout=timeout) - if ele: - return ele - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError(f'等待元素加载失败(等待{timeout}秒)。') - else: - return False - class TabWaiter(BaseWaiter): def downloads_done(self, timeout=None, cancel_if_timeout=True): """等待所有浏览器下载任务结束 - :param timeout: 超时时间,为None时无限等待 + :param timeout: 超时时间(秒),为None时无限等待 :param cancel_if_timeout: 超时时是否取消剩余任务 :return: 是否等待成功 """ @@ -329,7 +297,7 @@ class PageWaiter(TabWaiter): def new_tab(self, timeout=None, raise_err=None): """等待新标签页出现 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :param timeout: 超时时间(秒),为None则使用页面对象timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 等到新标签页返回其id,否则返回False """ @@ -348,7 +316,7 @@ class PageWaiter(TabWaiter): def all_downloads_done(self, timeout=None, cancel_if_timeout=True): """等待所有浏览器下载任务结束 - :param timeout: 超时时间,为None时无限等待 + :param timeout: 超时时间(秒),为None时无限等待 :param cancel_if_timeout: 超时时是否取消剩余任务 :return: 是否等待成功 """ @@ -388,7 +356,7 @@ class ElementWaiter(OriginWaiter): def deleted(self, timeout=None, raise_err=None): """等待元素从dom删除 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -396,7 +364,7 @@ class ElementWaiter(OriginWaiter): def displayed(self, timeout=None, raise_err=None): """等待元素从dom显示 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -404,7 +372,7 @@ class ElementWaiter(OriginWaiter): def hidden(self, timeout=None, raise_err=None): """等待元素从dom隐藏 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -412,15 +380,15 @@ class ElementWaiter(OriginWaiter): def covered(self, timeout=None, raise_err=None): """等待当前元素被遮盖 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 + :return: 成功返回覆盖元素id,返回False """ return self._wait_state('is_covered', True, timeout, raise_err, err_text='等待元素被覆盖失败。') def not_covered(self, timeout=None, raise_err=None): """等待当前元素不被遮盖 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -428,7 +396,7 @@ class ElementWaiter(OriginWaiter): def enabled(self, timeout=None, raise_err=None): """等待当前元素变成可用 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -436,7 +404,7 @@ class ElementWaiter(OriginWaiter): def disabled(self, timeout=None, raise_err=None): """等待当前元素变成不可用 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -444,7 +412,7 @@ class ElementWaiter(OriginWaiter): def disabled_or_deleted(self, timeout=None, raise_err=None): """等待当前元素变成不可用或从DOM移除 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -461,10 +429,10 @@ class ElementWaiter(OriginWaiter): else: return False - def stop_moving(self, gap=.1, timeout=None, raise_err=None): + def stop_moving(self, timeout=None, gap=.1, raise_err=None): """等待当前元素停止运动 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param gap: 检测间隔时间 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ @@ -494,33 +462,51 @@ class ElementWaiter(OriginWaiter): else: return False - def has_rect(self, timeout=None, raise_err=None): - """等待当前元素有大小及位置属性 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + def clickable(self, wait_moved=True, timeout=None, raise_err=None): + """等待当前元素可被点击 + :param wait_moved: 是否等待元素运动结束 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ + t1 = perf_counter() + r = self._wait_state('is_clickable', True, timeout, raise_err, err_text='等待元素可点击失败(等{}秒)。') + r = self.stop_moving(timeout=perf_counter() - t1) if wait_moved and r else r + if raise_err and not r: + raise WaitTimeoutError(f'等待元素可点击失败(等{timeout}秒)。') + return r + + def has_rect(self, timeout=None, raise_err=None): + """等待当前元素有大小及位置属性 + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 成功返回元素四角坐标(左上 右上 右下 左下),失败返回False + """ return self._wait_state('has_rect', True, timeout, raise_err, err_text='等待元素拥有大小及位置失败(等{}秒)。') def _wait_state(self, attr, mode=False, timeout=None, raise_err=None, err_text=None): """等待元素某个元素状态到达指定状态 :param attr: 状态名称 - :param mode: True或False - :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param mode: 等待True还是False + :param timeout: 超时时间(秒),为None使用元素所在页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :param err_text: 抛出错误时显示的信息 :return: 是否等待成功 """ - err_text = err_text or '等待元素状态改变失败(等待{}秒)。' + a = self._ele.states.__getattribute__(attr) + if (a and mode) or (not a and not mode): + return True if isinstance(a, bool) else a + if timeout is None: timeout = self._owner.timeout end_time = perf_counter() + timeout while perf_counter() < end_time: a = self._ele.states.__getattribute__(attr) if (a and mode) or (not a and not mode): - return True + return True if isinstance(a, bool) else a sleep(.05) + err_text = err_text or '等待元素状态改变失败(等待{}秒)。' if raise_err is True or Settings.raise_when_wait_failed is True: raise WaitTimeoutError(err_text.format(timeout)) else: diff --git a/DrissionPage/_units/waiter.pyi b/DrissionPage/_units/waiter.pyi index 4b96890..f05ca94 100644 --- a/DrissionPage/_units/waiter.pyi +++ b/DrissionPage/_units/waiter.pyi @@ -5,7 +5,7 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ -from typing import Union, Tuple +from typing import Union, Tuple, Literal, List from .downloader import DownloadMission from .._elements.chromium_element import ChromiumElement @@ -91,7 +91,7 @@ class ElementWaiter(OriginWaiter): def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... + def covered(self, timeout: float = None, raise_err: bool = None) -> Union[Literal[False], int]: ... def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... @@ -99,11 +99,15 @@ class ElementWaiter(OriginWaiter): def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def has_rect(self, timeout: float = None, raise_err: bool = None) -> bool: ... + def clickable(self, wait_moved: bool = True, timeout: float = None, raise_err: bool = None) -> bool: ... + + def has_rect(self, + timeout: float = None, + raise_err: bool = None) -> Union[Literal[False], List[Tuple[float, float]]]: ... def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def stop_moving(self, gap: float = .1, timeout: float = None, raise_err: bool = None) -> bool: ... + def stop_moving(self, timeout: float = None, gap: float = .1, raise_err: bool = None) -> bool: ... def _wait_state(self, attr: str, diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 7c3b474..eb9adac 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -7,6 +7,7 @@ """ from ._elements.session_element import make_session_ele from ._functions.by import By +from ._functions.elements import get_eles from ._functions.keys import Keys from ._functions.settings import Settings from ._functions.tools import wait_until, configs_to_here @@ -15,7 +16,7 @@ from ._pages.chromium_page import ChromiumPage from ._units.actions import Actions __all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here', 'get_blob', - 'tree', 'from_selenium', 'from_playwright'] + 'tree', 'from_selenium', 'from_playwright', 'get_eles'] def from_selenium(driver):