From 86ff9098b2f224b9178fcf769630f5cd08e2f868 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 9 Jan 2024 23:35:36 +0800 Subject: [PATCH] =?UTF-8?q?ele()=E5=92=8Cs=5Fele()=E5=A2=9E=E5=8A=A0index?= =?UTF-8?q?=E5=8F=82=E6=95=B0=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90=EF=BC=9B?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=9F=A5=E6=89=BE=E5=85=83=E7=B4=A0=E9=80=9F?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/__init__.py | 2 +- DrissionPage/_base/base.py | 164 ++++----- DrissionPage/_base/base.pyi | 130 +++++-- DrissionPage/_elements/chromium_element.py | 356 ++++++++++---------- DrissionPage/_elements/chromium_element.pyi | 72 ++-- DrissionPage/_elements/session_element.py | 63 ++-- DrissionPage/_elements/session_element.pyi | 16 +- DrissionPage/_pages/chromium_base.py | 57 ++-- DrissionPage/_pages/chromium_base.pyi | 26 +- DrissionPage/_pages/chromium_frame.py | 19 +- DrissionPage/_pages/chromium_frame.pyi | 5 +- DrissionPage/_pages/chromium_tab.py | 9 +- DrissionPage/_pages/chromium_tab.pyi | 8 +- DrissionPage/_pages/session_page.py | 25 +- DrissionPage/_pages/session_page.pyi | 2 +- DrissionPage/_pages/web_page.py | 16 +- DrissionPage/_pages/web_page.pyi | 5 +- requirements.txt | 2 +- setup.py | 8 +- 19 files changed, 551 insertions(+), 434 deletions(-) diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 0f1467c..45f5381 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -14,4 +14,4 @@ from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions __all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__'] -__version__ = '4.0.1' +__version__ = '4.0.0b37' diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index 0f16588..30435b7 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -23,11 +23,11 @@ class BaseParser(object): def __call__(self, loc_or_str): return self.ele(loc_or_str) - def ele(self, loc_or_ele, timeout=None): - return self._ele(loc_or_ele, timeout, True, method='ele()') + def ele(self, loc_or_ele, index=0, timeout=None): + return self._ele(loc_or_ele, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): - return self._ele(loc_or_str, timeout, False) + return self._ele(loc_or_str, timeout, index=None) # ----------------以下属性或方法待后代实现---------------- @property @@ -40,11 +40,11 @@ class BaseParser(object): def s_eles(self, loc_or_str): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): + def _ele(self, loc_or_ele, timeout=None, index=0, raise_err=None, method=None): pass @abstractmethod - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, raise_err=None): pass @@ -68,8 +68,8 @@ class BaseElement(BaseParser): def nexts(self): pass - def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None, method=None): - r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err) + def _ele(self, loc_or_str, timeout=None, index=0, relative=False, raise_err=None, method=None): + r = self._find_elements(loc_or_str, timeout=timeout, index=index, relative=relative, raise_err=raise_err) if r or isinstance(r, list): return r if Settings.raise_when_ele_not_found or raise_err is True: @@ -80,7 +80,7 @@ class BaseElement(BaseParser): return r @abstractmethod - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=0, relative=False, raise_err=None): pass @@ -122,8 +122,8 @@ class DrissionElement(BaseElement): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ if isinstance(level_or_loc, int): @@ -153,24 +153,23 @@ class DrissionElement(BaseElement): if isinstance(filter_loc, int): index = filter_loc filter_loc = '' - nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) - if not nodes: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) + if not filter_loc: + loc = '*' if ele_only else 'node()' + else: + loc = get_loc(filter_loc, True) # 把定位符转换为xpath + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + loc = loc[1].lstrip('./') - try: - return nodes[index - 1] - except IndexError: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) + node = self._ele(f'xpath:./{loc}', timeout=timeout, index=index, relative=True, raise_err=False) + if node: + return node + + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index, + 'ele_only': ele_only}) + else: + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -180,17 +179,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[-1] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._do_relative_find('prev()', 'preceding', filter_loc, index, timeout, ele_only) def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -200,17 +189,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[0] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._do_relative_find('next()', 'following', filter_loc, index, timeout, ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -220,17 +199,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[-1] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._do_relative_find('before()', 'preceding', filter_loc, index, timeout, ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -240,17 +209,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) - if nodes: - return nodes[0] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._do_relative_find('after()', 'following', filter_loc, index, timeout, ele_only) def children(self, filter_loc='', timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 @@ -268,7 +227,7 @@ class DrissionElement(BaseElement): loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) + nodes = self._ele(loc, timeout=timeout, index=None, relative=True) return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] def prevs(self, filter_loc='', timeout=None, ele_only=True): @@ -278,7 +237,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only) def nexts(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -287,7 +246,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='following', timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='following', timeout=timeout, ele_only=ele_only) def befores(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -296,8 +255,8 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', - brother=False, timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='preceding', + brother=False, timeout=timeout, ele_only=ele_only) def afters(self, filter_loc='', timeout=None, ele_only=True): """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -306,11 +265,31 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='following', - brother=False, timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='following', + brother=False, timeout=timeout, ele_only=ele_only) - def _get_brothers(self, index=None, filter_loc='', direction='following', - brother=True, timeout=.5, ele_only=True): + def _do_relative_find(self, func, direction, filter_loc='', index=1, timeout=None, ele_only=True): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param func: 方法名称 + :param direction: 方向,'following' 或 'preceding' + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素前面的某个元素或节点 + """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' + node = self._get_relatives(index, filter_loc, direction, False, timeout, ele_only) + if node: + return node + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, func, {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + else: + return NoneElement(self.page, func, {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + + def _get_relatives(self, index=None, filter_loc='', direction='following', brother=True, timeout=.5, ele_only=True): """按要求返回兄弟元素或节点组成的列表 :param index: 获取第几个,该参数不为None时只获取该编号的元素 :param filter_loc: 用于筛选的查询语法 @@ -319,8 +298,8 @@ class DrissionElement(BaseElement): :param timeout: 查找等待时间(秒) :return: 元素对象或字符串 """ - if index is not None and index < 1: - raise ValueError('index必须大于等于1。') + if index is not None and index < 0: + raise ValueError('index必须大于等于0。') brother = '-sibling' if brother else '' @@ -335,17 +314,12 @@ class DrissionElement(BaseElement): loc = f'xpath:./{direction}{brother}::{loc}' - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) - nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] - - if nodes and index is not None: + if index is not None: index = index - 1 if direction == 'following' else -index - try: - return [nodes[index]] - except IndexError: - return [] - else: - return nodes + nodes = self._ele(loc, timeout=timeout, index=index, relative=True, raise_err=False) + if isinstance(nodes, list): + nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] + return nodes # ----------------以下属性或方法由后代实现---------------- @property @@ -442,11 +416,11 @@ class BasePage(BaseParser): def get(self, url, show_errmsg=False, retry=None, interval=None): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): + def _ele(self, loc_or_ele, timeout=None, index=0, raise_err=None, method=None): if not loc_or_ele: raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele}) - r = self._find_elements(loc_or_ele, timeout=timeout, single=single, raise_err=raise_err) + r = self._find_elements(loc_or_ele, timeout=timeout, index=index, raise_err=raise_err) if r or isinstance(r, list): return r @@ -458,5 +432,5 @@ class BasePage(BaseParser): return r @abstractmethod - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, raise_err=None): pass diff --git a/DrissionPage/_base/base.pyi b/DrissionPage/_base/base.pyi index b4964b0..900a4d2 100644 --- a/DrissionPage/_base/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from abc import abstractmethod -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from DownloadKit import DownloadKit @@ -15,9 +15,12 @@ from .._elements.none_element import NoneElement class BaseParser(object): - def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... + def __call__(self, loc_or_str: Union[Tuple[str, str], str], index: int = 0): ... - def ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], timeout: float = None): ... + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, BaseElement], + index: int = 0, + timeout: float = None): ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): ... @@ -25,15 +28,23 @@ class BaseParser(object): @property def html(self) -> str: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement]): ... + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], index: int = 0): ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, - raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 0, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _find_elements(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 0, + raise_err: bool = None): ... class BaseElement(BaseParser): @@ -45,11 +56,19 @@ class BaseElement(BaseParser): @property def tag(self) -> str: ... - def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, - relative: bool = False, raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + index: Optional[int] = 0, + relative: bool = False, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_str, timeout: float = None, single: bool = True, relative: bool = False, + def _find_elements(self, loc_or_str, + timeout: float = None, + index: Optional[int] = 0, + relative: bool = False, raise_err: bool = None): ... def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... @@ -83,41 +102,80 @@ class DrissionElement(BaseElement): def texts(self, text_node_only: bool = False) -> list: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ... + def parent(self, + level_or_loc: Union[tuple, str, int] = 1, + index: int = 1) -> Union[DrissionElement, None]: ... - def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def child(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def children(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def children(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def prevs(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def nexts(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def befores(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def afters(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def _get_brothers(self, index: int = None, filter_loc: Union[tuple, str] = '', - direction: str = 'following', brother: bool = True, - timeout: float = 0.5, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... + def _do_relative_find(self, + func: str, + direction: str, + filter_loc: Union[tuple, str] ='', + index: int =1, + timeout: float =None, + ele_only: bool =True) -> DrissionElement: ... + + def _get_relatives(self, + index: int = None, + filter_loc: Union[tuple, str] = '', + direction: str = 'following', + brother: bool = True, + timeout: float = 0.5, + ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... # ----------------以下属性或方法由后代实现---------------- @property @@ -184,8 +242,16 @@ class BasePage(BaseParser): @abstractmethod def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, - raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 0, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _find_elements(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 0, + raise_err: bool = None): ... diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index 9dff0a2..ce15ee0 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -80,13 +80,13 @@ class ChromiumElement(DrissionElement): attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'' - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=0, timeout=None): """在内部查找元素 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @@ -227,8 +227,8 @@ class ChromiumElement(DrissionElement): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ return super().parent(level_or_loc, index) @@ -264,7 +264,7 @@ class ChromiumElement(DrissionElement): return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -275,7 +275,7 @@ class ChromiumElement(DrissionElement): return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 @@ -400,13 +400,14 @@ class ChromiumElement(DrissionElement): """ run_js(self, script, as_expr, 0, args) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def ele(self, loc_or_str, index=0, timeout=None): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,0开始 :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象或属性、文本 """ - return self._ele(loc_or_str, timeout, method='ele()') + return self._ele(loc_or_str, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -414,17 +415,18 @@ class ChromiumElement(DrissionElement): :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素,以SessionElement形式返回 + def s_ele(self, loc_or_str=None, index=0): + """查找一个符合条件的元素,以SessionElement形式返回 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :return: SessionElement对象或属性、文本 """ if self.tag in __FRAME_ELEMENT__: - r = make_session_ele(self.inner_html, loc_or_str) + r = make_session_ele(self.inner_html, loc_or_str, index=index) else: - r = make_session_ele(self, loc_or_str) + r = make_session_ele(self, loc_or_str, index=index) if isinstance(r, NoneElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 's_ele()', {'loc_or_str': loc_or_str}) @@ -439,19 +441,19 @@ class ChromiumElement(DrissionElement): :return: SessionElement或属性、文本组成的列表 """ if self.tag in __FRAME_ELEMENT__: - return make_session_ele(self.inner_html, loc_or_str, single=False) - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self.inner_html, loc_or_str, index=None) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=0, relative=False, raise_err=None): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒) - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或文本、属性或其组成的列表 """ - return find_in_chromium_ele(self, loc_or_str, single, timeout, relative=relative) + return find_in_chromium_ele(self, loc_or_str, index, timeout, relative=relative) def style(self, style, pseudo_ele=''): """返回元素样式属性值,可获取伪元素属性值 @@ -806,14 +808,15 @@ class ShadowRoot(BaseElement): def __repr__(self): return f'' - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=0, timeout=None): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :param timeout: 超时时间(秒) :return: 元素对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @@ -920,7 +923,7 @@ class ShadowRoot(BaseElement): return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index}) def before(self, filter_loc='', index=1): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -935,7 +938,7 @@ class ShadowRoot(BaseElement): return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index}) def after(self, filter_loc='', index=1): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 @@ -963,7 +966,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' - return self._ele(loc, single=False, relative=True) + return self._ele(loc, index=None, relative=True) def nexts(self, filter_loc=''): """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -976,7 +979,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) + return self.parent_ele._ele(xpath, index=None, relative=True) def befores(self, filter_loc=''): """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -990,7 +993,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./preceding::{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) + return self.parent_ele._ele(xpath, index=None, relative=True) def afters(self, filter_loc=''): """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -1001,15 +1004,16 @@ class ShadowRoot(BaseElement): eles1 = self.nexts(filter_loc) loc = get_loc(filter_loc, True)[1].lstrip('./') xpath = f'xpath:./following::{loc}' - return eles1 + self.parent_ele._ele(xpath, single=False, relative=True) + return eles1 + self.parent_ele._ele(xpath, index=None, relative=True) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素 + def ele(self, loc_or_str, index=0, timeout=None): + """返回当前元素下级符合条件的一个元素 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,0开始 :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象 """ - return self._ele(loc_or_str, timeout, method='ele()') + return self._ele(loc_or_str, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素 @@ -1017,14 +1021,15 @@ class ShadowRoot(BaseElement): :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + def s_ele(self, loc_or_str=None, index=0): + """查找一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, loc_or_str) + r = make_session_ele(self, loc_or_str, index=index) if isinstance(r, NoneElement): r.method = 's_ele()' r.args = {'loc_or_str': loc_or_str} @@ -1035,13 +1040,13 @@ class ShadowRoot(BaseElement): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象 """ - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=0, relative=False, raise_err=None): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒) - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或其组成的列表 @@ -1052,15 +1057,15 @@ class ShadowRoot(BaseElement): def do_find(): if loc[0] == 'css selector': - if single: + if index == 0: nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] if nod_id: - r = make_chromium_ele(self.page, node_id=nod_id) + r = make_chromium_eles(self.page, _ids=nod_id, is_obj_id=False) return None if r is False else r else: nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId'] - r = make_chromium_eles(self.page, node_ids=nod_ids, single=False) + r = make_chromium_eles(self.page, _ids=nod_ids, index=index, is_obj_id=False) return None if r is False else r else: @@ -1069,16 +1074,20 @@ class ShadowRoot(BaseElement): return None css = [i.css_path[61:] for i in eles] - if single: - node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId'] - r = make_chromium_ele(self.page, node_id=node_id) + if index is not None: + try: + node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, + selector=css[index])['nodeId'] + except IndexError: + return None + r = make_chromium_eles(self.page, _ids=node_id, is_obj_id=False) return None if r is False else r else: node_ids = [self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] for i in css] if 0 in node_ids: return None - r = make_chromium_eles(self.page, node_ids=node_ids, single=False) + r = make_chromium_eles(self.page, _ids=node_ids, index=index, is_obj_id=False) return None if r is False else r timeout = timeout if timeout is not None else self.page.timeout @@ -1090,7 +1099,7 @@ class ShadowRoot(BaseElement): if result: return result - return NoneElement(self.page) if single else [] + return NoneElement(self.page) if index is not None else [] def _get_node_id(self, obj_id): """返回元素node id""" @@ -1107,11 +1116,11 @@ class ShadowRoot(BaseElement): return r['backendNodeId'] -def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): +def find_in_chromium_ele(ele, loc, index=0, timeout=None, relative=True): """在chromium元素中查找 :param ele: ChromiumElement对象 :param loc: 元素定位元组 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,为None返回所有 :param timeout: 查找元素超时时间(秒) :param relative: WebPage用于标记是否相对定位使用 :return: 返回ChromiumElement元素或它们组成的列表 @@ -1133,22 +1142,22 @@ def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): # ---------------执行查找----------------- if loc[0] == 'xpath': - return find_by_xpath(ele, loc[1], single, timeout, relative=relative) + return find_by_xpath(ele, loc[1], index, timeout, relative=relative) else: - return find_by_css(ele, loc[1], single, timeout) + return find_by_css(ele, loc[1], index, timeout) -def find_by_xpath(ele, xpath, single, timeout, relative=True): +def find_by_xpath(ele, xpath, index, timeout, relative=True): """执行用xpath在元素中查找元素 :param ele: 在此元素中查找 :param xpath: 查找语句 - :param single: 是否只返回第一个结果 + :param index: 第几个结果,为None返回所有 :param timeout: 超时时间(秒) :param relative: 是否相对定位 :return: ChromiumElement或其组成的列表 """ - type_txt = '9' if single else '7' + type_txt = '9' if index == 0 else '7' node_txt = 'this.contentDocument' if ele.tag in __FRAME_ELEMENT__ and not relative else 'this' js = make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt) ele.page.wait.load_complete() @@ -1170,21 +1179,28 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True): if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None - if single: - r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + if index == 0: + r = make_chromium_eles(ele.page, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: - # from pprint import pprint - # for i in ele.page.run_cdp('Runtime.getProperties', - # objectId=res['result']['objectId'], - # ownProperties=True)['result'][:-1]: - # pprint(i) - r = [make_chromium_ele(ele.page, obj_id=i['value']['objectId']) if i['value']['type'] == 'object' else - i['value']['value'] for i in ele.page.run_cdp('Runtime.getProperties', - objectId=res['result']['objectId'], - ownProperties=True)['result'][:-1]] - return None if not r or r is False in r else r + res = ele.page.run_cdp('Runtime.getProperties', objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1] + if index is None: + r = [make_chromium_eles(ele.page, _ids=i['value']['objectId'], is_obj_id=True) + if i['value']['type'] == 'object' else i['value']['value'] for i in res] + return None if False in r else r + + else: + try: + res = res[index] + except IndexError: + return None + if res['value']['type'] == 'object': + r = make_chromium_eles(ele.page, _ids=res['value']['objectId'], is_obj_id=True) + else: + r = res['value']['value'] + return None if r is False else r end_time = perf_counter() + timeout result = do_find() @@ -1194,19 +1210,19 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True): if result: return result - return NoneElement(ele.page) if single else [] + return NoneElement(ele.page) if index is not None else [] -def find_by_css(ele, selector, single, timeout): +def find_by_css(ele, selector, index, timeout): """执行用css selector在元素中查找元素 :param ele: 在此元素中查找 :param selector: 查找语句 - :param single: 是否只返回第一个结果 + :param index: 第几个结果,为None返回所有 :param timeout: 超时时间(秒) :return: ChromiumElement或其组成的列表 """ selector = selector.replace('"', r'\"') - find_all = '' if single else 'All' + find_all = '' if index == 0 else 'All' node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame', 'shadow-root') else 'this' js = f'function(){{return {node_txt}.querySelector{find_all}("{selector}");}}' @@ -1221,15 +1237,15 @@ def find_by_css(ele, selector, single, timeout): if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None - if single: - r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + if index == 0: + r = make_chromium_eles(ele.page, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: - node_ids = [i['value']['objectId'] for i in ele.page.run_cdp('Runtime.getProperties', - objectId=res['result']['objectId'], - ownProperties=True)['result'][:-1]] - r = make_chromium_eles(ele.page, obj_ids=node_ids, single=False, ele_only=False) + obj_ids = [i['value']['objectId'] for i in ele.page.run_cdp('Runtime.getProperties', + objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1]] + r = make_chromium_eles(ele.page, _ids=obj_ids, index=index, is_obj_id=True) return None if r is False else r end_time = perf_counter() + timeout @@ -1240,115 +1256,117 @@ def find_by_css(ele, selector, single, timeout): if result: return result - return NoneElement(ele.page) if single else [] + return NoneElement(ele.page) if index is not None else [] -def make_chromium_ele(page, node_id=None, obj_id=None): +def make_chromium_eles(page, _ids, index=0, is_obj_id=True): """根据node id或object id生成相应元素对象 :param page: ChromiumPage对象 - :param node_id: 元素的node id - :param obj_id: 元素的object id - :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False + :param _ids: 元素的id列表 + :param index: 获取第几个,为None返回全部 + :param is_obj_id: 传入的id是obj id还是node id + :return: 浏览器元素对象或它们组成的列表,生成失败返回False """ - if node_id: - node = page.driver.run('DOM.describeNode', nodeId=node_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - # todo: Node() - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - obj_id = page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] + if is_obj_id: + get_node_func = _get_node_by_obj_id + # id_txt = 'objectId' + else: + get_node_func = _get_node_by_node_id + # id_txt = 'nodeId' + if not isinstance(_ids, (list, tuple)): + _ids = (_ids,) + + # if not ele_only: + if index is not None: # 获取一个 + obj_id = _ids[index] + return get_node_func(page, obj_id) + + else: # 获取全部 + nodes = [] + for obj_id in _ids: + tmp = get_node_func(page, obj_id) + if tmp is False: + return False + nodes.append(tmp) + return nodes + + # if index is None: + # nodes = [] + # for obj_id in _ids: + # tmp = get_node_func(page, obj_id) + # if tmp is False: + # return False + # if not isinstance(tmp, str): + # nodes.append(tmp) + # return nodes + # + # ids_count = len(_ids) + # if index < 0: + # index = ids_count + index + # if index > ids_count - 1: + # return False + # + # tmp = get_node_func(page, _ids[index]) + # if not isinstance(tmp, str): + # return tmp + # + # num = -1 + # for obj_id in _ids: + # node = _get_node_info(page, id_txt, obj_id) + # if node is False: + # return False + # if node['node']['nodeName'] in ('#text', '#comment'): + # continue + # num += 1 + # if num == index: + # return _make_ele(page, obj_id, node) + + # return NoneElement(page) + + +def _get_node_info(page, id_type, _id): + if not _id: + return False + arg = {id_type: _id} + node = page.driver.run('DOM.describeNode', **arg) + if 'error' in node: + return False + return node + + +def _get_node_by_obj_id(page, obj_id): + node = _get_node_info(page, 'objectId', obj_id) + if node is False: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + return node['node']['nodeValue'] + else: + return _make_ele(page, obj_id, node) + + +def _get_node_by_node_id(page, node_id): + node = _get_node_info(page, 'nodeId', node_id) + if node is False: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + return node['node']['nodeValue'] + else: + obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) if 'error' in obj_id: return False + obj_id = obj_id['object']['objectId'] + return _make_ele(page, obj_id, node) - elif obj_id: - node = page.driver.run('DOM.describeNode', objectId=obj_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - # todo: Node() - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - node_id = node['node']['nodeId'] - else: - return False - - ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=backend_id) +def _make_ele(page, obj_id, node): + ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], + backend_id=node['node']['backendNodeId']) if ele.tag in __FRAME_ELEMENT__: from .._pages.chromium_frame import ChromiumFrame ele = ChromiumFrame(page, ele, node) - return ele -def make_chromium_eles(page, node_ids=None, obj_ids=None, single=True, ele_only=True): - """根据node id或object id生成相应元素对象 - :param page: ChromiumPage对象 - :param node_ids: 元素的node id - :param obj_ids: 元素的object id - :param single: 是否获取但个元素 - :param ele_only: 是否只要ele - :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False - """ - nodes = [] - if node_ids: - for node_id in node_ids: - if not node_id: - return False - node = page.driver.run('DOM.describeNode', nodeId=node_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - if ele_only: - continue - else: - if single: - return node['node']['nodeValue'] - else: - nodes.append(node['node']['nodeValue']) - - obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) - if 'error' in obj_id: - return False - obj_id = obj_id['object']['objectId'] - ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=node['node']['backendNodeId']) - if ele.tag in __FRAME_ELEMENT__: - from .._pages.chromium_frame import ChromiumFrame - ele = ChromiumFrame(page, ele, node) - if single: - return ele - nodes.append(ele) - - if obj_ids: - for obj_id in obj_ids: - if not obj_id: - return False - node = page.driver.run('DOM.describeNode', objectId=obj_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - if ele_only: - continue - else: - if single: - return node['node']['nodeValue'] - else: - nodes.append(node['node']['nodeValue']) - - ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], - backend_id=node['node']['backendNodeId']) - if ele.tag in __FRAME_ELEMENT__: - from .._pages.chromium_frame import ChromiumFrame - ele = ChromiumFrame(page, ele, node) - if single: - return ele - nodes.append(ele) - - return NoneElement(page) if single and not nodes else nodes - - def make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt): """生成用xpath在元素中查找元素的js文本 :param xpath: xpath文本 @@ -1470,7 +1488,7 @@ def parse_js_result(page, ele, result): elif class_name == 'HTMLDocument': return result else: - r = make_chromium_ele(page, obj_id=result['objectId']) + r = make_chromium_eles(page, _ids=result['objectId']) if r is False: raise ElementLostError return r diff --git a/DrissionPage/_elements/chromium_element.pyi b/DrissionPage/_elements/chromium_element.pyi index fa341ca..32e4c65 100644 --- a/DrissionPage/_elements/chromium_element.pyi +++ b/DrissionPage/_elements/chromium_element.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Union, Tuple, List, Any, Literal +from typing import Union, Tuple, List, Any, Literal, Optional from .none_element import NoneElement from .._base.base import DrissionElement, BaseElement @@ -47,7 +47,9 @@ class ChromiumElement(DrissionElement): def __repr__(self) -> str: ... - def __call__(self, loc_or_str: Union[Tuple[str, str], str], + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def __eq__(self, other: ChromiumElement) -> bool: ... @@ -175,20 +177,23 @@ class ChromiumElement(DrissionElement): def ele(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 0) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[SessionElement]: ... def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, + index: Optional[int] = True, relative: bool = False, raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... @@ -286,20 +291,28 @@ class ShadowRoot(BaseElement): def afters(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... - def ele(self, loc_or_str: Union[Tuple[str, str], str], + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... - def eles(self, loc_or_str: Union[Tuple[str, str], str], + def eles(self, + loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 0) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... - def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement, - ChromiumFrame, str]]]: ... + def _find_elements(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + index: Optional[int] = 0, + relative: bool = False, + raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, + List[Union[ChromiumElement, ChromiumFrame, str]]]: ... def _get_node_id(self, obj_id: str) -> int: ... @@ -308,37 +321,42 @@ class ShadowRoot(BaseElement): def _get_backend_id(self, node_id: int) -> int: ... -def find_in_chromium_ele(ele: ChromiumElement, loc: Union[str, Tuple[str, str]], - single: bool = True, timeout: float = None, relative: bool = True) \ - -> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ... +def find_in_chromium_ele(ele: ChromiumElement, + loc: Union[str, Tuple[str, str]], + index: Optional[int] = 0, + timeout: float = None, + relative: bool = True) -> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ... -def find_by_xpath(ele: ChromiumElement, xpath: str, single: bool, timeout: float, +def find_by_xpath(ele: ChromiumElement, + xpath: str, + index: Optional[int], + timeout: float, relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... -def find_by_css(ele: ChromiumElement, selector: str, single: bool, +def find_by_css(ele: ChromiumElement, + selector: str, + index: Optional[int], timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... -def make_chromium_ele(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], - node_id: int = ..., - obj_id: str = ...) -> Union[ChromiumElement, ChromiumFrame, str]: ... - - def make_chromium_eles(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], - node_ids: Union[tuple, list] = None, - obj_ids: Union[tuple, list] = None, - single: bool = True, - ele_only: bool = True) -> Union[ChromiumElement, ChromiumFrame, NoneElement, + _ids: Union[tuple, list, str, int], + index: Optional[int] = 0, + is_obj_id: bool = True + ) -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... -def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], script: str, - as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ... +def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], + script: str, + as_expr: bool = False, + timeout: float = None, + args: tuple = ...) -> Any: ... def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ... diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index c829ed6..881a0d9 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -119,7 +119,7 @@ class SessionElement(DrissionElement): return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -130,7 +130,7 @@ class SessionElement(DrissionElement): return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 @@ -220,13 +220,14 @@ class SessionElement(DrissionElement): else: return self.inner_ele.get(attr) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def ele(self, loc_or_str, index=0, timeout=None): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 第几个元素,0开始 :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str, method='ele()') + return self._ele(loc_or_str, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -234,32 +235,33 @@ class SessionElement(DrissionElement): :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def s_ele(self, loc_or_str=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def s_ele(self, loc_or_str=None, index=0): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str, method='s_ele()') + return self._ele(loc_or_str, index=index, method='s_ele()') def s_eles(self, loc_or_str): """返回当前元素下级所有符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 + def _find_elements(self, loc_or_str, timeout=None, index=0, relative=False, raise_err=None): + """返回当前元素下级符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: SessionElement对象 """ - return make_session_ele(self, loc_or_str, single) + return make_session_ele(self, loc_or_str, index=index) def _get_ele_path(self, mode): """获取css路径或xpath路径 @@ -282,19 +284,18 @@ class SessionElement(DrissionElement): return f'{path_str[1:]}' if mode == 'css' else path_str -def make_session_ele(html_or_ele, loc=None, single=True): +def make_session_ele(html_or_ele, loc=None, index=0): """从接收到的对象或html文本中查找元素,返回SessionElement对象 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 :param html_or_ele: html文本、BaseParser对象 :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 - :param single: True则返回第一个,False则返回全部 + :param index: 获取第几个元素,None获取所有 :return: 返回SessionElement元素或列表,或属性文本 """ # ---------------处理定位符--------------- if not loc: if isinstance(html_or_ele, SessionElement): - return html_or_ele if single else [html_or_ele] - + return html_or_ele loc = ('xpath', '.') elif isinstance(loc, (str, tuple)): @@ -368,16 +369,25 @@ def make_session_ele(html_or_ele, loc=None, single=True): # ---------------执行查找----------------- try: if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 - ele = html_or_ele.xpath(loc[1]) + eles = html_or_ele.xpath(loc[1]) else: # 用css selector获取元素对象列表 - ele = html_or_ele.cssselect(loc[1]) + eles = html_or_ele.cssselect(loc[1]) - if not isinstance(ele, list): # 结果不是列表,如数字 - return ele + if not isinstance(eles, list): # 结果不是列表,如数字 + return eles - # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 - if single: - ele = ele[0] if ele else None + # 把lxml元素对象包装成SessionElement对象并按需要返回一个或全部 + if index is None: + return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in eles if e != '\n'] + + else: + eles_count = len(eles) + if index < 0: + index = eles_count + index + if index > eles_count - 1: + return NoneElement(page) + + ele = eles[index] if isinstance(ele, HtmlElement): return SessionElement(ele, page) elif isinstance(ele, str): @@ -385,9 +395,6 @@ def make_session_ele(html_or_ele, loc=None, single=True): else: return NoneElement(page) - else: # 返回全部 - return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] - except Exception as e: if 'Invalid expression' in str(e): raise SyntaxError(f'无效的xpath语句:{loc}') diff --git a/DrissionPage/_elements/session_element.pyi b/DrissionPage/_elements/session_element.pyi index 5c82e6f..d9e7438 100644 --- a/DrissionPage/_elements/session_element.pyi +++ b/DrissionPage/_elements/session_element.pyi @@ -30,6 +30,7 @@ class SessionElement(DrissionElement): def __call__(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[SessionElement, NoneElement]: ... def __eq__(self, other: SessionElement) -> bool: ... @@ -115,6 +116,7 @@ class SessionElement(DrissionElement): def ele(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[SessionElement, NoneElement]: ... def eles(self, @@ -122,18 +124,17 @@ class SessionElement(DrissionElement): timeout: float = None) -> List[SessionElement]: ... def s_ele(self, - loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 0) -> Union[SessionElement, NoneElement]: ... - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, + index: Optional[int] = 0, relative: bool = False, - raise_err: bool = None) \ - -> Union[SessionElement, NoneElement, List[SessionElement]]: ... + raise_err: bool = None) -> Union[SessionElement, NoneElement, List[SessionElement]]: ... def _get_ele_path(self, mode: str) -> str: ... @@ -141,5 +142,4 @@ class SessionElement(DrissionElement): def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, ChromiumBase], loc: Union[str, Tuple[str, str]] = None, - single: bool = True) -> Union[ - SessionElement, NoneElement, List[SessionElement]]: ... + index: Optional[int] = 0) -> Union[SessionElement, NoneElement, List[SessionElement]]: ... diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index dbaf4f8..f4c9197 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -244,14 +244,15 @@ class ChromiumBase(BasePage): self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self._upload_list = None - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=0, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,0开始 :param timeout: 超时时间(秒) :return: ChromiumElement对象 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index, timeout) def _wait_to_stop(self): """eager策略超时时使页面停止加载""" @@ -490,13 +491,14 @@ class ChromiumBase(BasePage): return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} for cookie in cookies] - def ele(self, loc_or_ele, timeout=None): - """获取第一个符合条件的元素对象 + def ele(self, loc_or_ele, index=0, timeout=None): + """获取一个符合条件的元素对象 :param loc_or_ele: 定位符或元素对象 + :param index: 获取第几个元素,0开始 :param timeout: 查找超时时间(秒) :return: ChromiumElement对象 """ - return self._ele(loc_or_ele, timeout=timeout, method='ele()') + return self._ele(loc_or_ele, timeout=timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """获取所有符合条件的元素对象 @@ -504,14 +506,15 @@ class ChromiumBase(BasePage): :param timeout: 查找超时时间(秒) :return: ChromiumElement对象组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + def s_ele(self, loc_or_ele=None, index=0): + """查找一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, loc_or_ele) + r = make_session_ele(self, loc_or_ele, index=index) if isinstance(r, NoneElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 's_ele()', {'loc_or_ele': loc_or_ele}) @@ -525,13 +528,13 @@ class ChromiumBase(BasePage): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象组成的列表 """ - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, relative=False, raise_err=None): """执行元素查找 :param loc_or_ele: 定位符或元素对象 :param timeout: 查找超时时间(秒) - :param single: 是否只返回第一个 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或元素对象组成的列表 @@ -558,16 +561,28 @@ class ChromiumBase(BasePage): while True: if num > 0: - num = 1 if single else num - nIds = self._driver.run('DOM.getSearchResults', searchId=result['searchId'], fromIndex=0, toIndex=num) - if __ERROR__ not in nIds: - if nIds['nodeIds'][0] != 0: - r = make_chromium_eles(self, node_ids=nIds['nodeIds'], single=single) - if r is not False: - break + from_index = index_arg = 0 + if index is None: + end_index = num + index_arg = None + elif index < 0: + from_index = index + num + end_index = from_index + 1 + else: + from_index = index + end_index = from_index + 1 + + if from_index <= num - 1: + nIds = self._driver.run('DOM.getSearchResults', searchId=result['searchId'], + fromIndex=from_index, toIndex=end_index) + if __ERROR__ not in nIds: + if nIds['nodeIds'][0] != 0: + r = make_chromium_eles(self, _ids=nIds['nodeIds'], index=index_arg, is_obj_id=False) + if r is not False: + break if perf_counter() >= end_time: - return NoneElement(self) if single else [] + return NoneElement(self) if index is not None else [] sleep(.1) timeout = end_time - perf_counter() @@ -699,7 +714,7 @@ class ChromiumBase(BasePage): :return: ChromiumFrame对象组成的列表 """ loc = loc or 'xpath://*[name()="iframe" or name()="frame"]' - frames = self._ele(loc, timeout=timeout, single=False, raise_err=False) + frames = self._ele(loc, timeout=timeout, index=None, raise_err=False) return [i for i in frames if str(type(i)).endswith(".ChromiumFrame'>")] def get_session_storage(self, item=None): diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index d4993b1..83e3573 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -93,7 +93,9 @@ class ChromiumBase(BasePage): def _d_set_runtime_settings(self) -> None: ... - def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement], + def __call__(self, + loc_or_str: Union[Tuple[str, str], str, ChromiumElement], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... @property @@ -177,19 +179,27 @@ class ChromiumBase(BasePage): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[list, dict]: ... - def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... - def eles(self, loc_or_str: Union[Tuple[str, str], str], + def eles(self, + loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_ele: Union[Tuple[str, str], str] = None, + index:int = 0) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None, + index: Optional[int] = 0, + relative: bool = False, + raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... def refresh(self, ignore_cache: bool = False) -> None: ... @@ -279,4 +289,4 @@ def get_mhtml(page: Union[ChromiumPage, ChromiumTab], def get_pdf(page: Union[ChromiumPage, ChromiumTab], path: Union[str, Path] = None, - name: str = None, kwargs: dict=None) -> bytes: ... + name: str = None, kwargs: dict = None) -> bytes: ... diff --git a/DrissionPage/_pages/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py index a7aac92..2f7b3f0 100644 --- a/DrissionPage/_pages/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -64,14 +64,15 @@ class ChromiumFrame(ChromiumBase): break sleep(.1) - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=0, timeout=None): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._frame_id == getattr(other, '_frame_id', None) @@ -388,8 +389,8 @@ class ChromiumFrame(ChromiumBase): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ return self.frame_ele.parent(level_or_loc, index) @@ -415,7 +416,7 @@ class ChromiumFrame(ChromiumBase): return self.frame_ele.next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -426,7 +427,7 @@ class ChromiumFrame(ChromiumBase): return self.frame_ele.before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 @@ -561,11 +562,11 @@ class ChromiumFrame(ChromiumBase): self.tab.remove_ele(new_ele) return r - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, relative=False, raise_err=None): """在frame内查找单个元素 :param loc_or_ele: 定位符或元素对象 :param timeout: 查找超时时间 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象 @@ -574,7 +575,7 @@ class ChromiumFrame(ChromiumBase): return loc_or_ele self.wait.load_complete() return self.doc_ele._ele(loc_or_ele, timeout, - raise_err=raise_err) if single else self.doc_ele.eles(loc_or_ele, timeout) + raise_err=raise_err) if index is not None else self.doc_ele.eles(loc_or_ele, timeout) def _is_inner_frame(self): """返回当前frame是否同域""" diff --git a/DrissionPage/_pages/chromium_frame.pyi b/DrissionPage/_pages/chromium_frame.pyi index 74c3314..a7d606f 100644 --- a/DrissionPage/_pages/chromium_frame.pyi +++ b/DrissionPage/_pages/chromium_frame.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from .chromium_base import ChromiumBase from .chromium_page import ChromiumPage @@ -44,6 +44,7 @@ class ChromiumFrame(ChromiumBase): def __call__(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def __eq__(self, other: ChromiumFrame) -> bool: ... @@ -209,7 +210,7 @@ class ChromiumFrame(ChromiumBase): def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, - single: bool = True, + index: Optional[int] = 0, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ... diff --git a/DrissionPage/_pages/chromium_tab.py b/DrissionPage/_pages/chromium_tab.py index cbab53e..3799f73 100644 --- a/DrissionPage/_pages/chromium_tab.py +++ b/DrissionPage/_pages/chromium_tab.py @@ -355,20 +355,19 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): if self._response is not None: self._response.close() - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒),d模式专用 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super()._find_elements(loc_or_ele, single=single) + return super()._find_elements(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, - relative=relative) + return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, index=index, relative=relative) def __repr__(self): return f'' diff --git a/DrissionPage/_pages/chromium_tab.pyi b/DrissionPage/_pages/chromium_tab.pyi index de61132..3ca580c 100644 --- a/DrissionPage/_pages/chromium_tab.pyi +++ b/DrissionPage/_pages/chromium_tab.pyi @@ -191,7 +191,11 @@ class WebPageTab(SessionPage, ChromiumTab): @property def set(self) -> WebPageTabSetter: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], + timeout: float = None, + index: Optional[int] = 0, + relative: bool = False, + raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], List[ Union[ChromiumElement, ChromiumFrame]]]: ... diff --git a/DrissionPage/_pages/session_page.py b/DrissionPage/_pages/session_page.py index 4faa2c3..88ca76f 100644 --- a/DrissionPage/_pages/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -174,13 +174,14 @@ class SessionPage(BasePage): """ return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs) - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素、属性或节点文本 + def ele(self, loc_or_ele, index=0, timeout=None): + """返回页面中符合条件的一个元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,0开始 :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_ele, method='ele()') + return self._ele(loc_or_ele, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 @@ -188,31 +189,33 @@ class SessionPage(BasePage): :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 :return: SessionElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def s_ele(self, loc_or_ele=None): - """返回页面中符合条件的第一个元素、属性或节点文本 + def s_ele(self, loc_or_ele=None, index=0): + """返回页面中符合条件的一个元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,0开始 :return: SessionElement对象或属性、文本 """ - return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, method='s_ele()') + return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, + index=index, method='s_ele()') def s_eles(self, loc_or_str): """返回页面中符合条件的所有元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: SessionElement对象 """ - return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single) + return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, index=index) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): """返回cookies diff --git a/DrissionPage/_pages/session_page.pyi b/DrissionPage/_pages/session_page.pyi index a6379ca..1e3b8a8 100644 --- a/DrissionPage/_pages/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -106,7 +106,7 @@ class SessionPage(BasePage): def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], timeout: float = None, - single: bool = True, + index: Optional[int] = 0, raise_err: bool = None) \ -> Union[SessionElement, NoneElement, List[SessionElement]]: ... diff --git a/DrissionPage/_pages/web_page.py b/DrissionPage/_pages/web_page.py index fd4e079..96e7191 100644 --- a/DrissionPage/_pages/web_page.py +++ b/DrissionPage/_pages/web_page.py @@ -38,17 +38,18 @@ class WebPage(SessionPage, ChromiumPage, BasePage): super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout) self.change_mode(self._mode, go=False, copy_cookies=False) - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=0, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,0开始 :param timeout: 超时时间(秒) :return: 子元素对象 """ if self._mode == 'd': - return super(SessionPage, self).__call__(loc_or_str, timeout) + return super(SessionPage, self).__call__(loc_or_str, index=index, timeout=timeout) elif self._mode == 's': - return super().__call__(loc_or_str) + return super().__call__(loc_or_str, index=index) @property def set(self): @@ -360,20 +361,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._response is not None: self._response.close() - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=0, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param timeout: 查找元素超时时间,d模式专用 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,0开始,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super()._find_elements(loc_or_ele, single=single) + return super()._find_elements(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, - relative=relative) + return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, index=index, relative=relative) def quit(self, timeout=5, force=True): """关闭浏览器和Session diff --git a/DrissionPage/_pages/web_page.pyi b/DrissionPage/_pages/web_page.pyi index 20bcdf0..474e12a 100644 --- a/DrissionPage/_pages/web_page.pyi +++ b/DrissionPage/_pages/web_page.pyi @@ -5,7 +5,7 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from requests import Session, Response @@ -38,6 +38,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], + index: int = 0, timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... # -----------------共有属性和方法------------------- @@ -167,7 +168,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], timeout: float = None, - single: bool = True, + index: Optional[int] = 0, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], diff --git a/requirements.txt b/requirements.txt index 8a430e8..63f0080 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ requests lxml cssselect DownloadKit>=2.0.0 -websocket-client>=1.7.0 +websocket-client click tldextract psutil \ No newline at end of file diff --git a/setup.py b/setup.py index 4e39d05..a8fca9c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="4.0.1", + version="4.0.0b37", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -23,18 +23,18 @@ setup( 'requests', 'cssselect', 'DownloadKit>=2.0.0', - 'websocket-client>=1.7.0', + 'websocket-client', 'click', 'tldextract', 'psutil' ], classifiers=[ - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.6", "Development Status :: 4 - Beta", "Topic :: Utilities", "License :: OSI Approved :: BSD License", ], - python_requires='>=3.8', + python_requires='>=3.6', entry_points={ 'console_scripts': [ 'dp = DrissionPage.commons.cli:main',