diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 0f1467c..06964d5 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -14,4 +14,4 @@ from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions __all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__'] -__version__ = '4.0.1' +__version__ = '4.0.2' diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index 0f16588..a5322e4 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -23,11 +23,11 @@ class BaseParser(object): def __call__(self, loc_or_str): return self.ele(loc_or_str) - def ele(self, loc_or_ele, timeout=None): - return self._ele(loc_or_ele, timeout, True, method='ele()') + def ele(self, loc_or_ele, index=1, timeout=None): + return self._ele(loc_or_ele, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): - return self._ele(loc_or_str, timeout, False) + return self._ele(loc_or_str, timeout, index=None) # ----------------以下属性或方法待后代实现---------------- @property @@ -40,11 +40,11 @@ class BaseParser(object): def s_eles(self, loc_or_str): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): + def _ele(self, loc_or_ele, timeout=None, index=1, raise_err=None, method=None): pass @abstractmethod - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, raise_err=None): pass @@ -68,19 +68,28 @@ class BaseElement(BaseParser): def nexts(self): pass - def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None, method=None): - r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err) + def _ele(self, loc_or_str, timeout=None, index=1, relative=False, raise_err=None, method=None): + """调用获取元素的方法 + :param loc_or_str: 定位符 + :param timeout: 超时时间(秒) + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 + :param relative: 是否相对定位 + :param raise_err: 找不到时是否抛出异常 + :param method: 调用的方法名 + :return: 元素对象或它们组成的列表 + """ + r = self._find_elements(loc_or_str, timeout=timeout, index=index, relative=relative, raise_err=raise_err) if r or isinstance(r, list): return r if Settings.raise_when_ele_not_found or raise_err is True: - raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_str}) + raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_str, 'index': index}) r.method = method - r.args = {'loc_or_str': loc_or_str} + r.args = {'loc_or_str': loc_or_str, 'index': index} return r @abstractmethod - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=1, relative=False, raise_err=None): pass @@ -122,8 +131,8 @@ class DrissionElement(BaseElement): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ if isinstance(level_or_loc, int): @@ -153,24 +162,23 @@ class DrissionElement(BaseElement): if isinstance(filter_loc, int): index = filter_loc filter_loc = '' - nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) - if not nodes: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) + if not filter_loc: + loc = '*' if ele_only else 'node()' + else: + loc = get_loc(filter_loc, True) # 把定位符转换为xpath + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + loc = loc[1].lstrip('./') - try: - return nodes[index - 1] - except IndexError: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) + node = self._ele(f'xpath:./{loc}', timeout=timeout, index=index, relative=True, raise_err=False) + if node: + return node + + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index, + 'ele_only': ele_only}) + else: + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -180,17 +188,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[-1] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._get_relative('prev()', 'preceding', True, filter_loc, index, timeout, ele_only) def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -200,17 +198,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[0] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._get_relative('next()', 'following', True, filter_loc, index, timeout, ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -220,17 +208,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) - if nodes: - return nodes[-1] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._get_relative('before()', 'preceding', False, filter_loc, index, timeout, ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -240,17 +218,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' - nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) - if nodes: - return nodes[0] - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, - 'index': index, 'ele_only': ele_only}) - else: - return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + return self._get_relative('after()', 'following', False, filter_loc, index, timeout, ele_only) def children(self, filter_loc='', timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 @@ -268,7 +236,7 @@ class DrissionElement(BaseElement): loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) + nodes = self._ele(loc, timeout=timeout, index=None, relative=True) return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] def prevs(self, filter_loc='', timeout=None, ele_only=True): @@ -278,7 +246,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only) def nexts(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -287,7 +255,7 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='following', timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='following', timeout=timeout, ele_only=ele_only) def befores(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -296,8 +264,8 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', - brother=False, timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='preceding', + brother=False, timeout=timeout, ele_only=ele_only) def afters(self, filter_loc='', timeout=None, ele_only=True): """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 @@ -306,11 +274,31 @@ class DrissionElement(BaseElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ - return self._get_brothers(filter_loc=filter_loc, direction='following', - brother=False, timeout=timeout, ele_only=ele_only) + return self._get_relatives(filter_loc=filter_loc, direction='following', + brother=False, timeout=timeout, ele_only=ele_only) - def _get_brothers(self, index=None, filter_loc='', direction='following', - brother=True, timeout=.5, ele_only=True): + def _get_relative(self, func, direction, brother, filter_loc='', index=1, timeout=None, ele_only=True): + """获取一个亲戚元素或节点,可用查询语法筛选,可指定返回筛选结果的第几个 + :param func: 方法名称 + :param direction: 方向,'following' 或 'preceding' + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素前面的某个元素或节点 + """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' + node = self._get_relatives(index, filter_loc, direction, brother, timeout, ele_only) + if node: + return node + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, func, {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + else: + return NoneElement(self.page, func, {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) + + def _get_relatives(self, index=None, filter_loc='', direction='following', brother=True, timeout=.5, ele_only=True): """按要求返回兄弟元素或节点组成的列表 :param index: 获取第几个,该参数不为None时只获取该编号的元素 :param filter_loc: 用于筛选的查询语法 @@ -319,9 +307,6 @@ class DrissionElement(BaseElement): :param timeout: 查找等待时间(秒) :return: 元素对象或字符串 """ - if index is not None and index < 1: - raise ValueError('index必须大于等于1。') - brother = '-sibling' if brother else '' if not filter_loc: @@ -335,17 +320,12 @@ class DrissionElement(BaseElement): loc = f'xpath:./{direction}{brother}::{loc}' - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) - nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] - - if nodes and index is not None: - index = index - 1 if direction == 'following' else -index - try: - return [nodes[index]] - except IndexError: - return [] - else: - return nodes + if index is not None: + index = index if direction == 'following' else -index + nodes = self._ele(loc, timeout=timeout, index=index, relative=True, raise_err=False) + if isinstance(nodes, list): + nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] + return nodes # ----------------以下属性或方法由后代实现---------------- @property @@ -442,21 +422,29 @@ class BasePage(BaseParser): def get(self, url, show_errmsg=False, retry=None, interval=None): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): + def _ele(self, loc_or_ele, timeout=None, index=1, raise_err=None, method=None): + """调用获取元素的方法 + :param loc_or_ele: 定位符 + :param timeout: 超时时间(秒) + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 + :param raise_err: 找不到时是否抛出异常 + :param method: 调用的方法名 + :return: 元素对象或它们组成的列表 + """ if not loc_or_ele: raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele}) - r = self._find_elements(loc_or_ele, timeout=timeout, single=single, raise_err=raise_err) + r = self._find_elements(loc_or_ele, timeout=timeout, index=index, raise_err=raise_err) if r or isinstance(r, list): return r if Settings.raise_when_ele_not_found or raise_err is True: - raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele}) + raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele, 'index': index}) r.method = method - r.args = {'loc_or_str': loc_or_ele} + r.args = {'loc_or_str': loc_or_ele, 'index': index} return r @abstractmethod - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, raise_err=None): pass diff --git a/DrissionPage/_base/base.pyi b/DrissionPage/_base/base.pyi index b4964b0..568a35e 100644 --- a/DrissionPage/_base/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from abc import abstractmethod -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from DownloadKit import DownloadKit @@ -15,9 +15,12 @@ from .._elements.none_element import NoneElement class BaseParser(object): - def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... + def __call__(self, loc_or_str: Union[Tuple[str, str], str], index: int = 1): ... - def ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], timeout: float = None): ... + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, BaseElement], + index: int = 1, + timeout: float = None): ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): ... @@ -25,15 +28,23 @@ class BaseParser(object): @property def html(self) -> str: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement]): ... + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], index: int = 1): ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, - raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 1, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _find_elements(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 1, + raise_err: bool = None): ... class BaseElement(BaseParser): @@ -45,11 +56,19 @@ class BaseElement(BaseParser): @property def tag(self) -> str: ... - def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, - relative: bool = False, raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + index: Optional[int] = 1, + relative: bool = False, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_str, timeout: float = None, single: bool = True, relative: bool = False, + def _find_elements(self, loc_or_str, + timeout: float = None, + index: Optional[int] = 1, + relative: bool = False, raise_err: bool = None): ... def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... @@ -83,41 +102,81 @@ class DrissionElement(BaseElement): def texts(self, text_node_only: bool = False) -> list: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ... + def parent(self, + level_or_loc: Union[tuple, str, int] = 1, + index: int = 1) -> Union[DrissionElement, None]: ... - def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def child(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, - timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def children(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def children(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def prevs(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def nexts(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def befores(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def afters(self, filter_loc: Union[tuple, str] = '', timeout: float = None, + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def _get_brothers(self, index: int = None, filter_loc: Union[tuple, str] = '', - direction: str = 'following', brother: bool = True, - timeout: float = 0.5, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... + def _get_relative(self, + func: str, + direction: str, + brother: bool, + filter_loc: Union[tuple, str] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> DrissionElement: ... + + def _get_relatives(self, + index: int = None, + filter_loc: Union[tuple, str] = '', + direction: str = 'following', + brother: bool = True, + timeout: float = 0.5, + ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... # ----------------以下属性或方法由后代实现---------------- @property @@ -184,8 +243,16 @@ class BasePage(BaseParser): @abstractmethod def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, - raise_err: bool = None, method: str = None): ... + def _ele(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 1, + raise_err: bool = None, + method: str = None): ... @abstractmethod - def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _find_elements(self, + loc_or_ele, + timeout: float = None, + index: Optional[int] = 1, + raise_err: bool = None): ... diff --git a/DrissionPage/_base/browser.py b/DrissionPage/_base/browser.py index 0e71e1d..7c6918b 100644 --- a/DrissionPage/_base/browser.py +++ b/DrissionPage/_base/browser.py @@ -63,12 +63,13 @@ class Browser(object): self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed) self._driver.set_callback('Target.targetCreated', self._onTargetCreated) - def _get_driver(self, tab_id): + def _get_driver(self, tab_id, owner=None): """获取对应tab id的Driver :param tab_id: 标签页id + :param owner: 使用该驱动的对象 :return: Driver对象 """ - return self._drivers.pop(tab_id, Driver(tab_id, 'page', self.address)) + return self._drivers.pop(tab_id, Driver(tab_id, 'page', self.address, owner)) def _onTargetCreated(self, **kwargs): """标签页创建时执行""" @@ -201,7 +202,8 @@ class Browser(object): except TypeError: pass - def _on_quit(self): + def _on_disconnect(self): + self.page._on_disconnect() Browser.BROWSERS.pop(self.id, None) if self.page._chromium_options.is_auto_port and self.page._chromium_options.user_data_path: path = Path(self.page._chromium_options.user_data_path) diff --git a/DrissionPage/_base/browser.pyi b/DrissionPage/_base/browser.pyi index e29afcb..d22aaf2 100644 --- a/DrissionPage/_base/browser.pyi +++ b/DrissionPage/_base/browser.pyi @@ -28,7 +28,7 @@ class Browser(object): def __init__(self, address: str, browser_id: str, page: ChromiumPage): ... - def _get_driver(self, tab_id: str) -> Driver: ... + def _get_driver(self, tab_id: str, owner=None) -> Driver: ... def run_cdp(self, cmd, **cmd_args) -> dict: ... @@ -61,4 +61,4 @@ class Browser(object): def quit(self, timeout: float = 5, force: bool = False) -> None: ... - def _on_quit(self) -> None: ... + def _on_disconnect(self) -> None: ... diff --git a/DrissionPage/_base/driver.py b/DrissionPage/_base/driver.py index d5ce858..feb7e18 100644 --- a/DrissionPage/_base/driver.py +++ b/DrissionPage/_base/driver.py @@ -12,21 +12,23 @@ from time import perf_counter, sleep from requests import get from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection, - WebSocketException) + WebSocketException, WebSocketBadStatusException) -from ..errors import PageDisconnectedError +from ..errors import PageDisconnectedError, TargetNotFoundError class Driver(object): - def __init__(self, tab_id, tab_type, address): + def __init__(self, tab_id, tab_type, address, owner=None): """ :param tab_id: 标签页id :param tab_type: 标签页类型 :param address: 浏览器连接地址 + :param owner: 创建这个驱动的对象 """ self.id = tab_id self.address = address self.type = tab_type + self.owner = owner self._debug = False self.alert_flag = False # 标记alert出现,跳过一条请求后复原 @@ -195,7 +197,10 @@ class Driver(object): def start(self): """启动连接""" self._stopped.clear() - self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True) + try: + self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True) + except WebSocketBadStatusException as e: + raise TargetNotFoundError(f'找不到页面:{self.id}。') if 'No such target id' in str(e) else e self._recv_th.start() self._handle_event_th.start() return True @@ -230,6 +235,9 @@ class Driver(object): self.method_results.clear() self.event_queue.queue.clear() + if hasattr(self.owner, '_on_disconnect'): + self.owner._on_disconnect() + def set_callback(self, event, callback, immediate=False): """绑定cdp event和回调方法 :param event: cdp event @@ -247,18 +255,17 @@ class Driver(object): class BrowserDriver(Driver): BROWSERS = {} - def __new__(cls, tab_id, tab_type, address, browser): + def __new__(cls, tab_id, tab_type, address, owner): if tab_id in cls.BROWSERS: return cls.BROWSERS[tab_id] return object.__new__(cls) - def __init__(self, tab_id, tab_type, address, browser): + def __init__(self, tab_id, tab_type, address, owner): if hasattr(self, '_created'): return self._created = True BrowserDriver.BROWSERS[tab_id] = self - super().__init__(tab_id, tab_type, address) - self.browser = browser + super().__init__(tab_id, tab_type, address, owner) def __repr__(self): return f'' @@ -267,7 +274,3 @@ class BrowserDriver(Driver): r = get(url, headers={'Connection': 'close'}) r.close() return r - - def _stop(self): - super()._stop() - self.browser._on_quit() diff --git a/DrissionPage/_base/driver.pyi b/DrissionPage/_base/driver.pyi index ae86582..a4810a2 100644 --- a/DrissionPage/_base/driver.pyi +++ b/DrissionPage/_base/driver.pyi @@ -27,7 +27,7 @@ class Driver(object): id: str address: str type: str - # _debug: bool + owner = ... alert_flag: bool _websocket_url: str _cur_id: int @@ -42,7 +42,7 @@ class Driver(object): event_queue: Queue immediate_event_queue: Queue - def __init__(self, tab_id: str, tab_type: str, address: str): ... + def __init__(self, tab_id: str, tab_type: str, address: str, owner=None): ... def _send(self, message: dict, timeout: float = None) -> dict: ... @@ -67,10 +67,10 @@ class Driver(object): class BrowserDriver(Driver): BROWSERS: Dict[str, Driver] = ... - browser: Browser = ... + owner: Browser = ... - def __new__(cls, tab_id: str, tab_type: str, address: str, browser: Browser): ... + def __new__(cls, tab_id: str, tab_type: str, address: str, owner: Browser): ... - def __init__(self, tab_id: str, tab_type: str, address: str, browser: Browser): ... + def __init__(self, tab_id: str, tab_type: str, address: str, owner: Browser): ... def get(self, url) -> Response: ... diff --git a/DrissionPage/_configs/configs.ini b/DrissionPage/_configs/configs.ini index 2eb7adb..3872ad9 100644 --- a/DrissionPage/_configs/configs.ini +++ b/DrissionPage/_configs/configs.ini @@ -5,7 +5,7 @@ tmp_path = [chromium_options] address = 127.0.0.1:9222 browser_path = chrome -arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking'] +arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking', '--hide-crash-restore-bubble'] extensions = [] prefs = {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}} flags = {} diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index 9dff0a2..a7e49fe 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -5,6 +5,7 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ +from json import loads from os.path import basename, sep from pathlib import Path from re import search @@ -80,13 +81,13 @@ class ChromiumElement(DrissionElement): attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'' - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @@ -227,8 +228,8 @@ class ChromiumElement(DrissionElement): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ return super().parent(level_or_loc, index) @@ -264,7 +265,7 @@ class ChromiumElement(DrissionElement): return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -275,7 +276,7 @@ class ChromiumElement(DrissionElement): return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 @@ -400,13 +401,14 @@ class ChromiumElement(DrissionElement): """ run_js(self, script, as_expr, 0, args) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def ele(self, loc_or_str, index=1, timeout=None): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象或属性、文本 """ - return self._ele(loc_or_str, timeout, method='ele()') + return self._ele(loc_or_str, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -414,17 +416,18 @@ class ChromiumElement(DrissionElement): :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素,以SessionElement形式返回 + def s_ele(self, loc_or_str=None, index=1): + """查找一个符合条件的元素,以SessionElement形式返回 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ if self.tag in __FRAME_ELEMENT__: - r = make_session_ele(self.inner_html, loc_or_str) + r = make_session_ele(self.inner_html, loc_or_str, index=index) else: - r = make_session_ele(self, loc_or_str) + r = make_session_ele(self, loc_or_str, index=index) if isinstance(r, NoneElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 's_ele()', {'loc_or_str': loc_or_str}) @@ -439,19 +442,19 @@ class ChromiumElement(DrissionElement): :return: SessionElement或属性、文本组成的列表 """ if self.tag in __FRAME_ELEMENT__: - return make_session_ele(self.inner_html, loc_or_str, single=False) - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self.inner_html, loc_or_str, index=None) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=1, relative=False, raise_err=None): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒) - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或文本、属性或其组成的列表 """ - return find_in_chromium_ele(self, loc_or_str, single, timeout, relative=relative) + return find_in_chromium_ele(self, loc_or_str, index, timeout, relative=relative) def style(self, style, pseudo_ele=''): """返回元素样式属性值,可获取伪元素属性值 @@ -806,14 +809,15 @@ class ShadowRoot(BaseElement): def __repr__(self): return f'' - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 超时时间(秒) :return: 元素对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @@ -890,20 +894,23 @@ class ShadowRoot(BaseElement): :param index: 第几个查询结果,1开始 :return: 直接子元素或节点文本组成的列表 """ - nodes = self.children(filter_loc=filter_loc) - if not nodes: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) + if not filter_loc: + loc = '*' + else: + loc = get_loc(filter_loc, True) # 把定位符转换为xpath + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + loc = loc[1].lstrip('./') - try: - return nodes[index - 1] - except IndexError: - if Settings.raise_when_ele_not_found: - raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) - else: - return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) + loc = f'xpath:./{loc}' + ele = self._ele(loc, index=index, relative=True) + if ele: + return ele + + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) def next(self, filter_loc='', index=1): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -911,31 +918,45 @@ class ShadowRoot(BaseElement): :param index: 第几个查询结果,1开始 :return: ChromiumElement对象 """ - nodes = self.nexts(filter_loc=filter_loc) - if nodes: - return nodes[index - 1] + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./{loc}' + ele = self.parent_ele._ele(xpath, index=index, relative=True) + if ele: + return ele + if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, 'index': index}) else: return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index}) def before(self, filter_loc='', index=1): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 :return: 本元素前面的某个元素或节点 """ - nodes = self.befores(filter_loc=filter_loc) - if nodes: - return nodes[index - 1] + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./preceding::{loc}' + ele = self.parent_ele._ele(xpath, index=index, relative=True) + if ele: + return ele + if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, 'index': index}) else: return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index}) def after(self, filter_loc='', index=1): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 @@ -963,7 +984,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' - return self._ele(loc, single=False, relative=True) + return self._ele(loc, index=None, relative=True) def nexts(self, filter_loc=''): """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 @@ -976,7 +997,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) + return self.parent_ele._ele(xpath, index=None, relative=True) def befores(self, filter_loc=''): """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -990,7 +1011,7 @@ class ShadowRoot(BaseElement): loc = loc[1].lstrip('./') xpath = f'xpath:./preceding::{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) + return self.parent_ele._ele(xpath, index=None, relative=True) def afters(self, filter_loc=''): """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 @@ -1001,15 +1022,16 @@ class ShadowRoot(BaseElement): eles1 = self.nexts(filter_loc) loc = get_loc(filter_loc, True)[1].lstrip('./') xpath = f'xpath:./following::{loc}' - return eles1 + self.parent_ele._ele(xpath, single=False, relative=True) + return eles1 + self.parent_ele._ele(xpath, index=None, relative=True) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素 + def ele(self, loc_or_str, index=1, timeout=None): + """返回当前元素下级符合条件的一个元素 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象 """ - return self._ele(loc_or_str, timeout, method='ele()') + return self._ele(loc_or_str, timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素 @@ -1017,14 +1039,15 @@ class ShadowRoot(BaseElement): :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 :return: ChromiumElement对象组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + def s_ele(self, loc_or_str=None, index=1): + """查找一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, loc_or_str) + r = make_session_ele(self, loc_or_str, index=index) if isinstance(r, NoneElement): r.method = 's_ele()' r.args = {'loc_or_str': loc_or_str} @@ -1035,13 +1058,13 @@ class ShadowRoot(BaseElement): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象 """ - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_str, timeout=None, index=1, relative=False, raise_err=None): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒) - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或其组成的列表 @@ -1052,15 +1075,15 @@ class ShadowRoot(BaseElement): def do_find(): if loc[0] == 'css selector': - if single: + if index == 1: nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] if nod_id: - r = make_chromium_ele(self.page, node_id=nod_id) + r = make_chromium_eles(self.page, _ids=nod_id, is_obj_id=False) return None if r is False else r else: nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId'] - r = make_chromium_eles(self.page, node_ids=nod_ids, single=False) + r = make_chromium_eles(self.page, _ids=nod_ids, index=index, is_obj_id=False) return None if r is False else r else: @@ -1069,16 +1092,20 @@ class ShadowRoot(BaseElement): return None css = [i.css_path[61:] for i in eles] - if single: - node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId'] - r = make_chromium_ele(self.page, node_id=node_id) + if index is not None: + try: + node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, + selector=css[index - 1])['nodeId'] + except IndexError: + return None + r = make_chromium_eles(self.page, _ids=node_id, is_obj_id=False) return None if r is False else r else: node_ids = [self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] for i in css] if 0 in node_ids: return None - r = make_chromium_eles(self.page, node_ids=node_ids, single=False) + r = make_chromium_eles(self.page, _ids=node_ids, index=index, is_obj_id=False) return None if r is False else r timeout = timeout if timeout is not None else self.page.timeout @@ -1090,7 +1117,7 @@ class ShadowRoot(BaseElement): if result: return result - return NoneElement(self.page) if single else [] + return NoneElement(self.page) if index is not None else [] def _get_node_id(self, obj_id): """返回元素node id""" @@ -1107,11 +1134,11 @@ class ShadowRoot(BaseElement): return r['backendNodeId'] -def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): +def find_in_chromium_ele(ele, loc, index=1, timeout=None, relative=True): """在chromium元素中查找 :param ele: ChromiumElement对象 :param loc: 元素定位元组 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param timeout: 查找元素超时时间(秒) :param relative: WebPage用于标记是否相对定位使用 :return: 返回ChromiumElement元素或它们组成的列表 @@ -1133,25 +1160,25 @@ def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): # ---------------执行查找----------------- if loc[0] == 'xpath': - return find_by_xpath(ele, loc[1], single, timeout, relative=relative) + return find_by_xpath(ele, loc[1], index, timeout, relative=relative) else: - return find_by_css(ele, loc[1], single, timeout) + return find_by_css(ele, loc[1], index, timeout) -def find_by_xpath(ele, xpath, single, timeout, relative=True): +def find_by_xpath(ele, xpath, index, timeout, relative=True): """执行用xpath在元素中查找元素 :param ele: 在此元素中查找 :param xpath: 查找语句 - :param single: 是否只返回第一个结果 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param timeout: 超时时间(秒) :param relative: 是否相对定位 :return: ChromiumElement或其组成的列表 """ - type_txt = '9' if single else '7' + type_txt = '9' if index == 1 else '7' node_txt = 'this.contentDocument' if ele.tag in __FRAME_ELEMENT__ and not relative else 'this' js = make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt) - ele.page.wait.load_complete() + ele.page.wait.doc_loaded() def do_find(): res = ele.page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, @@ -1170,21 +1197,30 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True): if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None - if single: - r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + if index == 1: + r = make_chromium_eles(ele.page, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: - # from pprint import pprint - # for i in ele.page.run_cdp('Runtime.getProperties', - # objectId=res['result']['objectId'], - # ownProperties=True)['result'][:-1]: - # pprint(i) - r = [make_chromium_ele(ele.page, obj_id=i['value']['objectId']) if i['value']['type'] == 'object' else - i['value']['value'] for i in ele.page.run_cdp('Runtime.getProperties', - objectId=res['result']['objectId'], - ownProperties=True)['result'][:-1]] - return None if not r or r is False in r else r + res = ele.page.run_cdp('Runtime.getProperties', objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1] + if index is None: + r = [make_chromium_eles(ele.page, _ids=i['value']['objectId'], is_obj_id=True) + if i['value']['type'] == 'object' else i['value']['value'] for i in res] + return None if False in r else r + + else: + eles_count = len(res) + if eles_count == 0 or abs(index) > eles_count: + return None + + index1 = eles_count + index + 1 if index < 0 else index + res = res[index1 - 1] + if res['value']['type'] == 'object': + r = make_chromium_eles(ele.page, _ids=res['value']['objectId'], is_obj_id=True) + else: + r = res['value']['value'] + return None if r is False else r end_time = perf_counter() + timeout result = do_find() @@ -1194,23 +1230,23 @@ def find_by_xpath(ele, xpath, single, timeout, relative=True): if result: return result - return NoneElement(ele.page) if single else [] + return NoneElement(ele.page) if index is not None else [] -def find_by_css(ele, selector, single, timeout): +def find_by_css(ele, selector, index, timeout): """执行用css selector在元素中查找元素 :param ele: 在此元素中查找 :param selector: 查找语句 - :param single: 是否只返回第一个结果 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param timeout: 超时时间(秒) :return: ChromiumElement或其组成的列表 """ selector = selector.replace('"', r'\"') - find_all = '' if single else 'All' + find_all = '' if index == 1 else 'All' node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame', 'shadow-root') else 'this' js = f'function(){{return {node_txt}.querySelector{find_all}("{selector}");}}' - ele.page.wait.load_complete() + ele.page.wait.doc_loaded() def do_find(): res = ele.page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, @@ -1221,15 +1257,15 @@ def find_by_css(ele, selector, single, timeout): if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None - if single: - r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + if index == 1: + r = make_chromium_eles(ele.page, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: - node_ids = [i['value']['objectId'] for i in ele.page.run_cdp('Runtime.getProperties', - objectId=res['result']['objectId'], - ownProperties=True)['result'][:-1]] - r = make_chromium_eles(ele.page, obj_ids=node_ids, single=False, ele_only=False) + obj_ids = [i['value']['objectId'] for i in ele.page.run_cdp('Runtime.getProperties', + objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1]] + r = make_chromium_eles(ele.page, _ids=obj_ids, index=index, is_obj_id=True) return None if r is False else r end_time = perf_counter() + timeout @@ -1240,115 +1276,81 @@ def find_by_css(ele, selector, single, timeout): if result: return result - return NoneElement(ele.page) if single else [] + return NoneElement(ele.page) if index is not None else [] -def make_chromium_ele(page, node_id=None, obj_id=None): +def make_chromium_eles(page, _ids, index=1, is_obj_id=True): """根据node id或object id生成相应元素对象 :param page: ChromiumPage对象 - :param node_id: 元素的node id - :param obj_id: 元素的object id - :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False + :param _ids: 元素的id列表 + :param index: 获取第几个,为None返回全部 + :param is_obj_id: 传入的id是obj id还是node id + :return: 浏览器元素对象或它们组成的列表,生成失败返回False """ - if node_id: - node = page.driver.run('DOM.describeNode', nodeId=node_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - # todo: Node() - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - obj_id = page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] + if is_obj_id: + get_node_func = _get_node_by_obj_id + else: + get_node_func = _get_node_by_node_id + if not isinstance(_ids, (list, tuple)): + _ids = (_ids,) + + if index is not None: # 获取一个 + obj_id = _ids[index - 1] + return get_node_func(page, obj_id) + + else: # 获取全部 + nodes = [] + for obj_id in _ids: + tmp = get_node_func(page, obj_id) + if tmp is False: + return False + nodes.append(tmp) + return nodes + + +def _get_node_info(page, id_type, _id): + if not _id: + return False + arg = {id_type: _id} + node = page.driver.run('DOM.describeNode', **arg) + if 'error' in node: + return False + return node + + +def _get_node_by_obj_id(page, obj_id): + node = _get_node_info(page, 'objectId', obj_id) + if node is False: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + return node['node']['nodeValue'] + else: + return _make_ele(page, obj_id, node) + + +def _get_node_by_node_id(page, node_id): + node = _get_node_info(page, 'nodeId', node_id) + if node is False: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + return node['node']['nodeValue'] + else: + obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) if 'error' in obj_id: return False + obj_id = obj_id['object']['objectId'] + return _make_ele(page, obj_id, node) - elif obj_id: - node = page.driver.run('DOM.describeNode', objectId=obj_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - # todo: Node() - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - node_id = node['node']['nodeId'] - else: - return False - - ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=backend_id) +def _make_ele(page, obj_id, node): + ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], + backend_id=node['node']['backendNodeId']) if ele.tag in __FRAME_ELEMENT__: from .._pages.chromium_frame import ChromiumFrame ele = ChromiumFrame(page, ele, node) - return ele -def make_chromium_eles(page, node_ids=None, obj_ids=None, single=True, ele_only=True): - """根据node id或object id生成相应元素对象 - :param page: ChromiumPage对象 - :param node_ids: 元素的node id - :param obj_ids: 元素的object id - :param single: 是否获取但个元素 - :param ele_only: 是否只要ele - :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False - """ - nodes = [] - if node_ids: - for node_id in node_ids: - if not node_id: - return False - node = page.driver.run('DOM.describeNode', nodeId=node_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - if ele_only: - continue - else: - if single: - return node['node']['nodeValue'] - else: - nodes.append(node['node']['nodeValue']) - - obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) - if 'error' in obj_id: - return False - obj_id = obj_id['object']['objectId'] - ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=node['node']['backendNodeId']) - if ele.tag in __FRAME_ELEMENT__: - from .._pages.chromium_frame import ChromiumFrame - ele = ChromiumFrame(page, ele, node) - if single: - return ele - nodes.append(ele) - - if obj_ids: - for obj_id in obj_ids: - if not obj_id: - return False - node = page.driver.run('DOM.describeNode', objectId=obj_id) - if 'error' in node: - return False - if node['node']['nodeName'] in ('#text', '#comment'): - if ele_only: - continue - else: - if single: - return node['node']['nodeValue'] - else: - nodes.append(node['node']['nodeValue']) - - ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], - backend_id=node['node']['backendNodeId']) - if ele.tag in __FRAME_ELEMENT__: - from .._pages.chromium_frame import ChromiumFrame - ele = ChromiumFrame(page, ele, node) - if single: - return ele - nodes.append(ele) - - return NoneElement(page) if single and not nodes else nodes - - def make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt): """生成用xpath在元素中查找元素的js文本 :param xpath: xpath文本 @@ -1391,7 +1393,7 @@ else{a.push(e.snapshotItem(i));}}""" return js -def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): +def run_js(page_or_ele, script, as_expr, timeout, args=None): """运行javascript代码 :param page_or_ele: 页面对象或元素对象 :param script: js文本 @@ -1418,6 +1420,7 @@ def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): if page.states.has_alert: raise AlertExistsError + end_time = perf_counter() + timeout try: if as_expr: res = page.run_cdp('Runtime.evaluate', expression=script, returnByValue=False, @@ -1446,18 +1449,17 @@ def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): raise JavaScriptError(f'\njavascript运行错误:\n{script}\n错误信息: \n{exceptionDetails}') try: - return parse_js_result(page, page_or_ele, res.get('result')) + return parse_js_result(page, page_or_ele, res.get('result'), end_time) except Exception: return res -def parse_js_result(page, ele, result): +def parse_js_result(page, ele, result, end_time): """解析js返回的结果""" if 'unserializableValue' in result: return result['unserializableValue'] the_type = result['type'] - if the_type == 'object': sub_type = result.get('subtype', None) if sub_type == 'null': @@ -1470,21 +1472,31 @@ def parse_js_result(page, ele, result): elif class_name == 'HTMLDocument': return result else: - r = make_chromium_ele(page, obj_id=result['objectId']) + r = make_chromium_eles(page, _ids=result['objectId']) if r is False: raise ElementLostError return r elif sub_type == 'array': r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], ownProperties=True)['result'] - return [parse_js_result(page, ele, result=i['value']) for i in r[:-1]] + return [parse_js_result(page, ele, result=i['value'], end_time=end_time) for i in r[:-1]] elif 'objectId' in result and result['className'].lower() == 'object': # dict r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], ownProperties=True)['result'] - return {i['name']: parse_js_result(page, ele, result=i['value']) for i in r} + return {i['name']: parse_js_result(page, ele, result=i['value'], end_time=end_time) for i in r} + + elif 'objectId' in result: + timeout = end_time - perf_counter() + if timeout < 0: + return + js = 'function(){return JSON.stringify(this);}' + r = page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=result['objectId'], + returnByValue=False, awaitPromise=True, userGesture=True, _ignore=AlertExistsError, + _timeout=timeout) + return loads(parse_js_result(page, ele, r['result'], end_time)) else: - return result['value'] + return result.get('value', result) elif the_type == 'undefined': return None diff --git a/DrissionPage/_elements/chromium_element.pyi b/DrissionPage/_elements/chromium_element.pyi index fa341ca..cbddbdc 100644 --- a/DrissionPage/_elements/chromium_element.pyi +++ b/DrissionPage/_elements/chromium_element.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Union, Tuple, List, Any, Literal +from typing import Union, Tuple, List, Any, Literal, Optional from .none_element import NoneElement from .._base.base import DrissionElement, BaseElement @@ -47,7 +47,9 @@ class ChromiumElement(DrissionElement): def __repr__(self) -> str: ... - def __call__(self, loc_or_str: Union[Tuple[str, str], str], + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def __eq__(self, other: ChromiumElement) -> bool: ... @@ -175,20 +177,23 @@ class ChromiumElement(DrissionElement): def ele(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[SessionElement]: ... def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, + index: Optional[int] = 1, relative: bool = False, raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... @@ -286,20 +291,28 @@ class ShadowRoot(BaseElement): def afters(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... - def ele(self, loc_or_str: Union[Tuple[str, str], str], + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... - def eles(self, loc_or_str: Union[Tuple[str, str], str], + def eles(self, + loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... - def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement, - ChromiumFrame, str]]]: ... + def _find_elements(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + index: Optional[int] = 1, + relative: bool = False, + raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, + List[Union[ChromiumElement, ChromiumFrame, str]]]: ... def _get_node_id(self, obj_id: str) -> int: ... @@ -308,40 +321,48 @@ class ShadowRoot(BaseElement): def _get_backend_id(self, node_id: int) -> int: ... -def find_in_chromium_ele(ele: ChromiumElement, loc: Union[str, Tuple[str, str]], - single: bool = True, timeout: float = None, relative: bool = True) \ - -> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ... +def find_in_chromium_ele(ele: ChromiumElement, + loc: Union[str, Tuple[str, str]], + index: Optional[int] = 1, + timeout: float = None, + relative: bool = True) -> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ... -def find_by_xpath(ele: ChromiumElement, xpath: str, single: bool, timeout: float, +def find_by_xpath(ele: ChromiumElement, + xpath: str, + index: Optional[int], + timeout: float, relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... -def find_by_css(ele: ChromiumElement, selector: str, single: bool, +def find_by_css(ele: ChromiumElement, + selector: str, + index: Optional[int], timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... -def make_chromium_ele(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], - node_id: int = ..., - obj_id: str = ...) -> Union[ChromiumElement, ChromiumFrame, str]: ... - - def make_chromium_eles(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], - node_ids: Union[tuple, list] = None, - obj_ids: Union[tuple, list] = None, - single: bool = True, - ele_only: bool = True) -> Union[ChromiumElement, ChromiumFrame, NoneElement, + _ids: Union[tuple, list, str, int], + index: Optional[int] = 1, + is_obj_id: bool = True + ) -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... -def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], script: str, - as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ... +def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], + script: str, + as_expr: bool, + timeout: float, + args: tuple = ...) -> Any: ... -def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ... +def parse_js_result(page: ChromiumBase, + ele: ChromiumElement, + result: dict, + end_time: float): ... def convert_argument(arg: Any) -> dict: ... diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index cc93772..5ab59f3 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -118,7 +118,7 @@ class SessionElement(DrissionElement): return super().next(index, filter_loc, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -129,7 +129,7 @@ class SessionElement(DrissionElement): return super().before(index, filter_loc, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 @@ -219,13 +219,14 @@ class SessionElement(DrissionElement): else: return self.inner_ele.get(attr) - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def ele(self, loc_or_str, index=1, timeout=None): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param index: 第几个元素,从1开始,可传入负数获取倒数第几个 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str, method='ele()') + return self._ele(loc_or_str, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -233,32 +234,33 @@ class SessionElement(DrissionElement): :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 :return: SessionElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def s_ele(self, loc_or_str=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 + def s_ele(self, loc_or_str=None, index=1): + """返回当前元素下级符合条件的一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str, method='s_ele()') + return self._ele(loc_or_str, index=index, method='s_ele()') def s_eles(self, loc_or_str): """返回当前元素下级所有符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本组成的列表 """ - return self._ele(loc_or_str, single=False) + return self._ele(loc_or_str, index=None) - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 + def _find_elements(self, loc_or_str, timeout=None, index=1, relative=False, raise_err=None): + """返回当前元素下级符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: SessionElement对象 """ - return make_session_ele(self, loc_or_str, single) + return make_session_ele(self, loc_or_str, index=index) def _get_ele_path(self, mode): """获取css路径或xpath路径 @@ -281,19 +283,18 @@ class SessionElement(DrissionElement): return f'{path_str[1:]}' if mode == 'css' else path_str -def make_session_ele(html_or_ele, loc=None, single=True): +def make_session_ele(html_or_ele, loc=None, index=1): """从接收到的对象或html文本中查找元素,返回SessionElement对象 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 :param html_or_ele: html文本、BaseParser对象 :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 - :param single: True则返回第一个,False则返回全部 + :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个,None获取所有 :return: 返回SessionElement元素或列表,或属性文本 """ # ---------------处理定位符--------------- if not loc: if isinstance(html_or_ele, SessionElement): - return html_or_ele if single else [html_or_ele] - + return html_or_ele loc = ('xpath', '.') elif isinstance(loc, (str, tuple)): @@ -368,16 +369,25 @@ def make_session_ele(html_or_ele, loc=None, single=True): # ---------------执行查找----------------- try: if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 - ele = html_or_ele.xpath(loc[1]) + eles = html_or_ele.xpath(loc[1]) else: # 用css selector获取元素对象列表 - ele = html_or_ele.cssselect(loc[1]) + eles = html_or_ele.cssselect(loc[1]) - if not isinstance(ele, list): # 结果不是列表,如数字 - return ele + if not isinstance(eles, list): # 结果不是列表,如数字 + return eles - # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 - if single: - ele = ele[0] if ele else None + # 把lxml元素对象包装成SessionElement对象并按需要返回一个或全部 + if index is None: + return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in eles if e != '\n'] + + else: + eles_count = len(eles) + if eles_count == 0 or abs(index) > eles_count: + return NoneElement(page) + if index < 0: + index = eles_count + index + 1 + + ele = eles[index - 1] if isinstance(ele, HtmlElement): return SessionElement(ele, page) elif isinstance(ele, str): @@ -385,9 +395,6 @@ def make_session_ele(html_or_ele, loc=None, single=True): else: return NoneElement(page) - else: # 返回全部 - return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] - except Exception as e: if 'Invalid expression' in str(e): raise SyntaxError(f'无效的xpath语句:{loc}') diff --git a/DrissionPage/_elements/session_element.pyi b/DrissionPage/_elements/session_element.pyi index 5c82e6f..4e6fb8f 100644 --- a/DrissionPage/_elements/session_element.pyi +++ b/DrissionPage/_elements/session_element.pyi @@ -30,6 +30,7 @@ class SessionElement(DrissionElement): def __call__(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[SessionElement, NoneElement]: ... def __eq__(self, other: SessionElement) -> bool: ... @@ -115,6 +116,7 @@ class SessionElement(DrissionElement): def ele(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[SessionElement, NoneElement]: ... def eles(self, @@ -122,18 +124,17 @@ class SessionElement(DrissionElement): timeout: float = None) -> List[SessionElement]: ... def s_ele(self, - loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + loc_or_str: Union[Tuple[str, str], str] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, + index: Optional[int] = 1, relative: bool = False, - raise_err: bool = None) \ - -> Union[SessionElement, NoneElement, List[SessionElement]]: ... + raise_err: bool = None) -> Union[SessionElement, NoneElement, List[SessionElement]]: ... def _get_ele_path(self, mode: str) -> str: ... @@ -141,5 +142,4 @@ class SessionElement(DrissionElement): def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, ChromiumBase], loc: Union[str, Tuple[str, str]] = None, - single: bool = True) -> Union[ - SessionElement, NoneElement, List[SessionElement]]: ... + index: Optional[int] = 1) -> Union[SessionElement, NoneElement, List[SessionElement]]: ... diff --git a/DrissionPage/_functions/settings.py b/DrissionPage/_functions/settings.py index 225190c..07374ac 100644 --- a/DrissionPage/_functions/settings.py +++ b/DrissionPage/_functions/settings.py @@ -11,3 +11,4 @@ class Settings(object): raise_when_ele_not_found = False raise_when_click_failed = False raise_when_wait_failed = False + singleton_tab_obj = True diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index dbaf4f8..9967f0f 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -124,7 +124,7 @@ class ChromiumBase(BasePage): :return: None """ self._is_loading = True - self._driver = self.browser._get_driver(tab_id) + self._driver = self.browser._get_driver(tab_id, self) self._alert = Alert() self._driver.set_callback('Page.javascriptDialogOpening', self._on_alert_open, immediate=True) @@ -244,14 +244,15 @@ class ChromiumBase(BasePage): self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self._upload_list = None - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个 :param timeout: 超时时间(秒) :return: ChromiumElement对象 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index, timeout) def _wait_to_stop(self): """eager策略超时时使页面停止加载""" @@ -289,7 +290,7 @@ class ChromiumBase(BasePage): """返回用于执行动作链的对象""" if self._actions is None: self._actions = Actions(self) - self.wait.load_complete() + self.wait.doc_loaded() return self._actions @property @@ -309,7 +310,7 @@ class ChromiumBase(BasePage): @property def scroll(self): """返回用于滚动滚动条的对象""" - self.wait.load_complete() + self.wait.doc_loaded() if self._scroll is None: self._scroll = PageScroller(self) return self._scroll @@ -317,7 +318,7 @@ class ChromiumBase(BasePage): @property def rect(self): """返回获取窗口坐标和大小的对象""" - # self.wait.load_complete() + # self.wait.doc_loaded() if self._rect is None: self._rect = TabRect(self) return self._rect @@ -358,7 +359,7 @@ class ChromiumBase(BasePage): @property def html(self): """返回当前页面html文本""" - self.wait.load_complete() + self.wait.doc_loaded() return self.run_cdp('DOM.getOuterHTML', objectId=self._root_id)['outerHTML'] @property @@ -425,7 +426,7 @@ class ChromiumBase(BasePage): :param cmd_args: 参数 :return: 执行的结果 """ - self.wait.load_complete() + self.wait.doc_loaded() return self.run_cdp(cmd, **cmd_args) def run_js(self, script, *args, as_expr=False, timeout=None): @@ -446,7 +447,7 @@ class ChromiumBase(BasePage): :param timeout: js超时时间(秒),为None则使用页面timeouts.script属性值 :return: 运行的结果 """ - self.wait.load_complete() + self.wait.doc_loaded() return run_js(self, script, as_expr, self.timeouts.script if timeout is None else timeout, args) def run_async_js(self, script, *args, as_expr=False): @@ -490,13 +491,14 @@ class ChromiumBase(BasePage): return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} for cookie in cookies] - def ele(self, loc_or_ele, timeout=None): - """获取第一个符合条件的元素对象 + def ele(self, loc_or_ele, index=1, timeout=None): + """获取一个符合条件的元素对象 :param loc_or_ele: 定位符或元素对象 + :param index: 获取第几个元素,从1开始,可传入负数获取倒数第几个 :param timeout: 查找超时时间(秒) :return: ChromiumElement对象 """ - return self._ele(loc_or_ele, timeout=timeout, method='ele()') + return self._ele(loc_or_ele, timeout=timeout, index=index, method='ele()') def eles(self, loc_or_str, timeout=None): """获取所有符合条件的元素对象 @@ -504,14 +506,15 @@ class ChromiumBase(BasePage): :param timeout: 查找超时时间(秒) :return: ChromiumElement对象组成的列表 """ - return self._ele(loc_or_str, timeout=timeout, single=False) + return self._ele(loc_or_str, timeout=timeout, index=None) - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + def s_ele(self, loc_or_ele=None, index=1): + """查找一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ - r = make_session_ele(self, loc_or_ele) + r = make_session_ele(self, loc_or_ele, index=index) if isinstance(r, NoneElement): if Settings.raise_when_ele_not_found: raise ElementNotFoundError(None, 's_ele()', {'loc_or_ele': loc_or_ele}) @@ -525,13 +528,13 @@ class ChromiumBase(BasePage): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象组成的列表 """ - return make_session_ele(self, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, index=None) - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, relative=False, raise_err=None): """执行元素查找 :param loc_or_ele: 定位符或元素对象 :param timeout: 查找超时时间(秒) - :param single: 是否只返回第一个 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象或元素对象组成的列表 @@ -543,7 +546,7 @@ class ChromiumBase(BasePage): else: raise ValueError('loc_or_str参数只能是tuple、str、ChromiumElement类型。') - self.wait.load_complete() + self.wait.doc_loaded() timeout = timeout if timeout is not None else self.timeout end_time = perf_counter() + timeout @@ -558,16 +561,28 @@ class ChromiumBase(BasePage): while True: if num > 0: - num = 1 if single else num - nIds = self._driver.run('DOM.getSearchResults', searchId=result['searchId'], fromIndex=0, toIndex=num) - if __ERROR__ not in nIds: - if nIds['nodeIds'][0] != 0: - r = make_chromium_eles(self, node_ids=nIds['nodeIds'], single=single) - if r is not False: - break + from_index = index_arg = 0 + if index is None: + end_index = num + index_arg = None + elif index < 0: + from_index = index + num + end_index = from_index + 1 + else: + from_index = index - 1 + end_index = from_index + 1 + + if from_index <= num - 1: + nIds = self._driver.run('DOM.getSearchResults', searchId=result['searchId'], + fromIndex=from_index, toIndex=end_index) + if __ERROR__ not in nIds: + if nIds['nodeIds'][0] != 0: + r = make_chromium_eles(self, _ids=nIds['nodeIds'], index=index_arg, is_obj_id=False) + if r is not False: + break if perf_counter() >= end_time: - return NoneElement(self) if single else [] + return NoneElement(self) if index is not None else [] sleep(.1) timeout = end_time - perf_counter() @@ -653,8 +668,8 @@ class ChromiumBase(BasePage): self.run_cdp('DOM.removeNode', nodeId=ele._node_id) def get_frame(self, loc_ind_ele, timeout=None): - """获取页面中一个frame对象,可传入定位符、iframe序号、ChromiumFrame对象,序号从0开始 - :param loc_ind_ele: 定位符、iframe序号、ChromiumFrame对象 + """获取页面中一个frame对象 + :param loc_ind_ele: 定位符、iframe序号、ChromiumFrame对象,序号从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒) :return: ChromiumFrame对象 """ @@ -676,9 +691,11 @@ class ChromiumBase(BasePage): r = ele elif isinstance(loc_ind_ele, int): - if loc_ind_ele < 0: - raise ValueError('序号必须大于等于0。') - xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele + 1}]' + if loc_ind_ele == 0: + loc_ind_ele = 1 + elif loc_ind_ele < 0: + loc_ind_ele = f'last()+{loc_ind_ele}+1' + xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele}]' r = self._ele(xpath, timeout=timeout) elif str(type(loc_ind_ele)).endswith(".ChromiumFrame'>"): @@ -699,7 +716,7 @@ class ChromiumBase(BasePage): :return: ChromiumFrame对象组成的列表 """ loc = loc or 'xpath://*[name()="iframe" or name()="frame"]' - frames = self._ele(loc, timeout=timeout, single=False, raise_err=False) + frames = self._ele(loc, timeout=timeout, index=None, raise_err=False) return [i for i in frames if str(type(i)).endswith(".ChromiumFrame'>")] def get_session_storage(self, item=None): diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index d4993b1..ae21e4f 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -93,7 +93,9 @@ class ChromiumBase(BasePage): def _d_set_runtime_settings(self) -> None: ... - def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement], + def __call__(self, + loc_or_str: Union[Tuple[str, str], str, ChromiumElement], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... @property @@ -177,19 +179,27 @@ class ChromiumBase(BasePage): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[list, dict]: ... - def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... - def eles(self, loc_or_str: Union[Tuple[str, str], str], + def eles(self, + loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[ChromiumElement]: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_ele: Union[Tuple[str, str], str] = None, + index:int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None, + index: Optional[int] = 1, + relative: bool = False, + raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... def refresh(self, ignore_cache: bool = False) -> None: ... @@ -279,4 +289,4 @@ def get_mhtml(page: Union[ChromiumPage, ChromiumTab], def get_pdf(page: Union[ChromiumPage, ChromiumTab], path: Union[str, Path] = None, - name: str = None, kwargs: dict=None) -> bytes: ... + name: str = None, kwargs: dict = None) -> bytes: ... diff --git a/DrissionPage/_pages/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py index a7aac92..e224b23 100644 --- a/DrissionPage/_pages/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -58,20 +58,21 @@ class ChromiumFrame(ChromiumBase): self.doc_ele = ChromiumElement(self, obj_id=obj_id) self._rect = None - end_time = perf_counter() + 5 + end_time = perf_counter() + 2 while perf_counter() < end_time: if self.url not in (None, 'about:blank'): break sleep(.1) - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ - return self.ele(loc_or_str, timeout) + return self.ele(loc_or_str, index=index, timeout=timeout) def __eq__(self, other): return self._frame_id == getattr(other, '_frame_id', None) @@ -211,7 +212,7 @@ class ChromiumFrame(ChromiumBase): @property def scroll(self): """返回用于滚动的对象""" - self.wait.load_complete() + self.wait.doc_loaded() if self._scroll is None: self._scroll = FrameScroller(self) return self._scroll @@ -388,8 +389,8 @@ class ChromiumFrame(ChromiumBase): def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :param level_or_loc: 第几级父元素,1开始,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :return: 上级元素对象 """ return self.frame_ele.parent(level_or_loc, index) @@ -415,7 +416,7 @@ class ChromiumFrame(ChromiumBase): return self.frame_ele.next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 @@ -426,7 +427,7 @@ class ChromiumFrame(ChromiumBase): return self.frame_ele.before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 @@ -561,20 +562,20 @@ class ChromiumFrame(ChromiumBase): self.tab.remove_ele(new_ele) return r - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, relative=False, raise_err=None): """在frame内查找单个元素 :param loc_or_ele: 定位符或元素对象 :param timeout: 查找超时时间 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象 """ if isinstance(loc_or_ele, ChromiumElement): return loc_or_ele - self.wait.load_complete() - return self.doc_ele._ele(loc_or_ele, timeout, - raise_err=raise_err) if single else self.doc_ele.eles(loc_or_ele, timeout) + self.wait.doc_loaded() + return self.doc_ele._ele(loc_or_ele, index=index, timeout=timeout, + raise_err=raise_err) if index is not None else self.doc_ele.eles(loc_or_ele, timeout) def _is_inner_frame(self): """返回当前frame是否同域""" diff --git a/DrissionPage/_pages/chromium_frame.pyi b/DrissionPage/_pages/chromium_frame.pyi index 74c3314..f18f4ac 100644 --- a/DrissionPage/_pages/chromium_frame.pyi +++ b/DrissionPage/_pages/chromium_frame.pyi @@ -6,7 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from .chromium_base import ChromiumBase from .chromium_page import ChromiumPage @@ -44,6 +44,7 @@ class ChromiumFrame(ChromiumBase): def __call__(self, loc_or_str: Union[Tuple[str, str], str], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... def __eq__(self, other: ChromiumFrame) -> bool: ... @@ -209,7 +210,7 @@ class ChromiumFrame(ChromiumBase): def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, - single: bool = True, + index: Optional[int] = 1, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ... diff --git a/DrissionPage/_pages/chromium_page.py b/DrissionPage/_pages/chromium_page.py index 820d085..f7d4ac5 100644 --- a/DrissionPage/_pages/chromium_page.py +++ b/DrissionPage/_pages/chromium_page.py @@ -22,6 +22,26 @@ from ..errors import BrowserConnectError class ChromiumPage(ChromiumBase): """用于管理浏览器的类""" + PAGES = {} + + def __new__(cls, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None): + """ + :param addr_or_opts: 浏览器地址:端口、ChromiumOptions对象或端口数字(int) + :param tab_id: 要控制的标签页id,不指定默认为激活的 + :param timeout: 超时时间(秒) + """ + addr_or_opts = addr_or_opts or addr_driver_opts + opt = handle_options(addr_or_opts) + is_exist, browser_id = run_browser(opt) + if browser_id in cls.PAGES: + return cls.PAGES[browser_id] + r = object.__new__(cls) + r._chromium_options = opt + r._is_exist = is_exist + r._browser_id = browser_id + r.address = opt.address + cls.PAGES[browser_id] = r + return r def __init__(self, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None): """ @@ -29,58 +49,20 @@ class ChromiumPage(ChromiumBase): :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间(秒) """ - addr_or_opts = addr_or_opts or addr_driver_opts + if hasattr(self, '_created'): + return + self._created = True + self._page = self - address = self._handle_options(addr_or_opts) self._run_browser() - super().__init__(address, tab_id) + super().__init__(self.address, tab_id) self.set.timeouts(base=timeout) self._page_init() - def _handle_options(self, addr_or_opts): - """设置浏览器启动属性 - :param addr_or_opts: 'ip:port'、ChromiumOptions、Driver - :return: 返回浏览器地址 - """ - if not addr_or_opts: - self._chromium_options = ChromiumOptions(addr_or_opts) - - elif isinstance(addr_or_opts, ChromiumOptions): - if addr_or_opts.is_auto_port: - port, path = PortFinder(addr_or_opts.tmp_path).get_port() - addr_or_opts.set_address(f'127.0.0.1:{port}') - addr_or_opts.set_user_data_path(path) - addr_or_opts.auto_port() - self._chromium_options = addr_or_opts - - elif isinstance(addr_or_opts, str): - self._chromium_options = ChromiumOptions() - self._chromium_options.set_address(addr_or_opts) - - elif isinstance(addr_or_opts, int): - self._chromium_options = ChromiumOptions() - self._chromium_options.set_local_port(addr_or_opts) - - else: - raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。') - - return self._chromium_options.address - def _run_browser(self): """连接浏览器""" - is_exist = connect_browser(self._chromium_options) - try: - ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'}) - if not ws: - raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') - ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1] - except KeyError: - raise BrowserConnectError('浏览器版本太旧,请升级。') - except: - raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') - - self._browser = Browser(self._chromium_options.address, ws, self) - if (is_exist and self._chromium_options._headless is False and + self._browser = Browser(self._chromium_options.address, self._browser_id, self) + if (self._is_exist and self._chromium_options._headless is False and 'headless' in self._browser.run_cdp('Browser.getVersion')['userAgent'].lower()): self._browser.quit(3) connect_browser(self._chromium_options) @@ -156,17 +138,17 @@ class ChromiumPage(ChromiumBase): :param kwargs: pdf生成参数 :return: as_pdf为True时返回bytes,否则返回文件文本 """ - return get_pdf(self, path, name, kwargs)if as_pdf else get_mhtml(self, path, name) + return get_pdf(self, path, name, kwargs) if as_pdf else get_mhtml(self, path, name) def get_tab(self, id_or_num=None): """获取一个标签页对象 - :param id_or_num: 要获取的标签页id或序号,为None时获取当前tab,序号不是视觉排列顺序,而是激活顺序 + :param id_or_num: 要获取的标签页id或序号,为None时获取当前tab,序号从1开始,可传入负数获取倒数第几个,不是视觉排列顺序,而是激活顺序 :return: 标签页对象 """ if isinstance(id_or_num, str): return ChromiumTab(self, id_or_num) elif isinstance(id_or_num, int): - return ChromiumTab(self, self.tabs[id_or_num]) + return ChromiumTab(self, self.tabs[id_or_num - 1 if id_or_num > 0 else id_or_num]) elif id_or_num is None: return ChromiumTab(self, self.tab_id) elif isinstance(id_or_num, ChromiumTab): @@ -263,6 +245,10 @@ class ChromiumPage(ChromiumBase): """ self.browser.quit(timeout, force) + def _on_disconnect(self): + """浏览器退出时执行""" + ChromiumPage.PAGES.pop(self._browser_id, None) + def __repr__(self): return f'' @@ -275,6 +261,51 @@ class ChromiumPage(ChromiumBase): self.close_tabs(tabs_or_ids, True) +def handle_options(addr_or_opts): + """设置浏览器启动属性 + :param addr_or_opts: 'ip:port'、ChromiumOptions、Driver + :return: 返回ChromiumOptions对象 + """ + if not addr_or_opts: + _chromium_options = ChromiumOptions(addr_or_opts) + + elif isinstance(addr_or_opts, ChromiumOptions): + if addr_or_opts.is_auto_port: + port, path = PortFinder(addr_or_opts.tmp_path).get_port() + addr_or_opts.set_address(f'127.0.0.1:{port}') + addr_or_opts.set_user_data_path(path) + addr_or_opts.auto_port() + _chromium_options = addr_or_opts + + elif isinstance(addr_or_opts, str): + _chromium_options = ChromiumOptions() + _chromium_options.set_address(addr_or_opts) + + elif isinstance(addr_or_opts, int): + _chromium_options = ChromiumOptions() + _chromium_options.set_local_port(addr_or_opts) + + else: + raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。') + + return _chromium_options + + +def run_browser(chromium_options): + """连接浏览器""" + is_exist = connect_browser(chromium_options) + try: + ws = get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'}) + if not ws: + raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') + browser_id = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + except KeyError: + raise BrowserConnectError('浏览器版本太旧,请升级。') + except: + raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') + return is_exist, browser_id + + def get_rename(original, rename): if '.' in rename: return rename diff --git a/DrissionPage/_pages/chromium_page.pyi b/DrissionPage/_pages/chromium_page.pyi index 407cd1a..171f488 100644 --- a/DrissionPage/_pages/chromium_page.pyi +++ b/DrissionPage/_pages/chromium_page.pyi @@ -18,6 +18,12 @@ from .._units.waiter import PageWaiter class ChromiumPage(ChromiumBase): + PAGES: dict = ... + + def __new__(cls, + addr_or_opts: Union[str, int, ChromiumOptions] = None, + tab_id: str = None, + timeout: float = None): ... def __init__(self, addr_or_opts: Union[str, int, ChromiumOptions] = None, @@ -25,7 +31,9 @@ class ChromiumPage(ChromiumBase): timeout: float = None): self._chromium_options: ChromiumOptions = ... self._browser: Browser = ... + self._browser_id: str = ... self._rect: Optional[TabRect] = ... + self._is_exist: bool = ... def _handle_options(self, addr_or_opts: Union[str, ChromiumOptions]) -> str: ... @@ -95,5 +103,13 @@ class ChromiumPage(ChromiumBase): def quit(self, timeout: float = 5, force: bool = True) -> None: ... + def _on_disconnect(self) -> None: ... + + +def handle_options(addr_or_opts): ... + + +def run_browser(chromium_options): ... + def get_rename(original: str, rename: str) -> str: ... diff --git a/DrissionPage/_pages/chromium_tab.py b/DrissionPage/_pages/chromium_tab.py index cbab53e..9764f8d 100644 --- a/DrissionPage/_pages/chromium_tab.py +++ b/DrissionPage/_pages/chromium_tab.py @@ -9,6 +9,7 @@ from copy import copy from .._base.base import BasePage from .._configs.session_options import SessionOptions +from .._functions.settings import Settings from .._functions.web import set_session_cookies, set_browser_cookies from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf from .._pages.session_page import SessionPage @@ -18,12 +19,28 @@ from .._units.waiter import TabWaiter class ChromiumTab(ChromiumBase): """实现浏览器标签页的类""" + TABS = {} - def __init__(self, page, tab_id=None): + def __new__(cls, page, tab_id): """ :param page: ChromiumPage对象 - :param tab_id: 要控制的标签页id,不指定默认为激活的 + :param tab_id: 要控制的标签页id """ + if Settings.singleton_tab_obj and tab_id in cls.TABS: + return cls.TABS[tab_id] + r = object.__new__(cls) + cls.TABS[tab_id] = r + return r + + def __init__(self, page, tab_id): + """ + :param page: ChromiumPage对象 + :param tab_id: 要控制的标签页id + """ + if Settings.singleton_tab_obj and hasattr(self, '_created'): + return + self._created = True + self._page = page self._browser = page.browser super().__init__(page.address, tab_id, page.timeout) @@ -73,6 +90,9 @@ class ChromiumTab(ChromiumBase): def __repr__(self): return f'' + def _on_disconnect(self): + ChromiumTab.TABS.pop(self.tab_id, None) + class WebPageTab(SessionPage, ChromiumTab, BasePage): def __init__(self, page, tab_id): @@ -87,17 +107,18 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): page._headers)) super(SessionPage, self).__init__(page=page, tab_id=tab_id) - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 超时时间(秒) :return: 子元素对象 """ if self._mode == 'd': - return super(SessionPage, self).__call__(loc_or_str, timeout) + return super(SessionPage, self).__call__(loc_or_str, index=index, timeout=timeout) elif self._mode == 's': - return super().__call__(loc_or_str) + return super().__call__(loc_or_str, index=index) @property def set(self): @@ -231,16 +252,17 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): return self.response return super().post(url, show_errmsg, retry, interval, **kwargs) - def ele(self, loc_or_ele, timeout=None): + def ele(self, loc_or_ele, index=1, timeout=None): """返回第一个符合条件的元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super().ele(loc_or_ele) + return super().ele(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self).ele(loc_or_ele, timeout=timeout) + return super(SessionPage, self).ele(loc_or_ele, index=index, timeout=timeout) def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 @@ -253,15 +275,16 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): elif self._mode == 'd': return super(SessionPage, self).eles(loc_or_str, timeout=timeout) - def s_ele(self, loc_or_ele=None): + def s_ele(self, loc_or_ele=None, index=1): """查找第一个符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ if self._mode == 's': - return super().s_ele(loc_or_ele) + return super().s_ele(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self).s_ele(loc_or_ele) + return super(SessionPage, self).s_ele(loc_or_ele, index=index) def s_eles(self, loc_or_str): """查找所有符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 @@ -355,20 +378,19 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): if self._response is not None: self._response.close() - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param timeout: 查找元素超时时间(秒),d模式专用 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super()._find_elements(loc_or_ele, single=single) + return super()._find_elements(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, - relative=relative) + return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, index=index, relative=relative) def __repr__(self): return f'' diff --git a/DrissionPage/_pages/chromium_tab.pyi b/DrissionPage/_pages/chromium_tab.pyi index de61132..952867d 100644 --- a/DrissionPage/_pages/chromium_tab.pyi +++ b/DrissionPage/_pages/chromium_tab.pyi @@ -25,8 +25,11 @@ from .._units.waiter import TabWaiter class ChromiumTab(ChromiumBase): + TABS: dict = ... - def __init__(self, page: ChromiumPage, tab_id: str = None): + def __new__(cls, page: ChromiumPage, tab_id: str): ... + + def __init__(self, page: ChromiumPage, tab_id: str): self._page: ChromiumPage = ... self._browser: Browser = ... self._rect: Optional[TabRect] = ... @@ -76,6 +79,7 @@ class WebPageTab(SessionPage, ChromiumTab): def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... @property @@ -145,14 +149,16 @@ class WebPageTab(SessionPage, ChromiumTab): def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_ele: Union[Tuple[str, str], str] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... @@ -191,7 +197,11 @@ class WebPageTab(SessionPage, ChromiumTab): @property def set(self) -> WebPageTabSetter: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], + timeout: float = None, + index: Optional[int] = 1, + relative: bool = False, + raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], List[ Union[ChromiumElement, ChromiumFrame]]]: ... diff --git a/DrissionPage/_pages/session_page.py b/DrissionPage/_pages/session_page.py index aaba8b6..bbc8e16 100644 --- a/DrissionPage/_pages/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -1,391 +1,395 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. -@License : BSD 3-Clause. -""" -from pathlib import Path -from re import search, DOTALL -from time import sleep -from urllib.parse import urlparse, quote - -from requests import Session, Response -from requests.structures import CaseInsensitiveDict -from tldextract import extract - -from .._base.base import BasePage -from .._configs.session_options import SessionOptions -from .._elements.session_element import SessionElement, make_session_ele -from .._functions.web import cookie_to_dict -from .._units.setter import SessionPageSetter - - -class SessionPage(BasePage): - """SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页""" - - def __init__(self, session_or_options=None, timeout=None): - """ - :param session_or_options: Session对象或SessionOptions对象 - :param timeout: 连接超时时间(秒),为None时从ini文件读取或默认10 - """ - super(SessionPage, SessionPage).__init__(self) - self._headers = None - self._response = None - self._session = None - self._set = None - self._encoding = None - self._s_set_start_options(session_or_options) - self._s_set_runtime_settings() - self._create_session() - if timeout is not None: - self.timeout = timeout - - def _s_set_start_options(self, session_or_options): - """启动配置 - :param session_or_options: Session、SessionOptions对象 - :return: None - """ - if not session_or_options or isinstance(session_or_options, SessionOptions): - self._session_options = session_or_options or SessionOptions(session_or_options) - - elif isinstance(session_or_options, Session): - self._session_options = SessionOptions() - self._headers = session_or_options.headers - session_or_options.headers = None - self._session = session_or_options - - def _s_set_runtime_settings(self): - """设置运行时用到的属性""" - self._timeout = self._session_options.timeout - self._download_path = None if self._session_options.download_path is None \ - else str(Path(self._session_options.download_path).absolute()) - self.retry_times = self._session_options.retry_times - self.retry_interval = self._session_options.retry_interval - - def _create_session(self): - """创建内建Session对象""" - if not self._session: - self._session, self._headers = self._session_options.make_session() - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性文本 - """ - return self.ele(loc_or_str) - - # -----------------共有属性和方法------------------- - @property - def title(self): - """返回网页title""" - ele = self._ele('xpath://title', raise_err=False) - return ele.text if ele else None - - @property - def url(self): - """返回当前访问url""" - return self._url - - @property - def _session_url(self): - """返回当前访问url""" - return self._url - - @property - def raw_data(self): - """返回页面原始数据""" - return self.response.content if self.response else b'' - - @property - def html(self): - """返回页面的html文本""" - return self.response.text if self.response else '' - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" - try: - return self.response.json() - except Exception: - return None - - @property - def user_agent(self): - """返回user agent""" - return self._headers.get('user-agent', '') - - @property - def session(self): - """返回Session对象""" - return self._session - - @property - def response(self): - """返回访问url得到的Response对象""" - return self._response - - @property - def encoding(self): - """返回设置的编码""" - return self._encoding - - @property - def set(self): - """返回用于设置的对象""" - if self._set is None: - self._set = SessionPageSetter(self) - return self._set - - def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): - """用get方式跳转到url,可输入文件路径 - :param url: 目标url,可指定本地文件路径 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数,为None时使用页面对象retry_times属性值 - :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 - :param timeout: 连接超时时间(秒),为None时使用页面对象timeout属性值 - :param kwargs: 连接参数 - :return: url是否可用 - """ - if isinstance(url, Path): - url = str(url.absolute()) - if not url.lower().startswith('http'): - if url.startswith('file:///'): - url = url[8:] - if Path(url).exists(): - with open(url, 'rb') as f: - r = Response() - r._content = f.read() - r.status_code = 200 - self._response = r - return - return self._s_connect(url, 'get', show_errmsg, retry, interval, **kwargs) - - def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数,为None时使用页面对象retry_times属性值 - :param interval: 重试间隔(秒),为None时使用页面对象timeout属性值 - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs) - - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_ele, method='ele()') - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def s_ele(self, loc_or_ele=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, method='s_ele()') - - def s_eles(self, loc_or_str): - """返回页面中符合条件的所有元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str, single=False) - - def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None): - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 - :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 - :return: SessionElement对象 - """ - return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single) - - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - """返回cookies - :param as_dict: 是否以字典方式返回,False则以list返回 - :param all_domains: 是否返回所有域的cookies - :param all_info: 是否返回所有信息,False则只返回name、value、domain - :return: cookies信息 - """ - if all_domains: - cookies = self.session.cookies - else: - if self.url: - ex_url = extract(self._session_url) - domain = f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain - - cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') - else: - cookies = tuple(x for x in self.session.cookies) - - if as_dict: - return {x.name: x.value for x in cookies} - elif all_info: - return [cookie_to_dict(cookie) for cookie in cookies] - else: - r = [] - for c in cookies: - c = cookie_to_dict(c) - r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']}) - return r - - def close(self): - """关闭Session对象""" - self._session.close() - if self._response is not None: - self._response.close() - - def _before_connect(self, url, retry, interval): - """连接前的准备 - :param url: 要访问的url - :param retry: 重试次数 - :param interval: 重试间隔 - :return: 重试次数和间隔组成的tuple - """ - self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - return retry, interval - - def _s_connect(self, url, mode, show_errmsg=False, retry=None, interval=None, **kwargs): - """执行get或post连接 - :param url: 目标url - :param mode: 'get' 或 'post' - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - retry, interval = self._before_connect(url, retry, interval) - self._response, info = self._make_response(self._url, mode, retry, interval, show_errmsg, **kwargs) - - if self._response is None: - self._url_available = False - - else: - if self._response.ok: - self._url_available = True - - else: - if show_errmsg: - raise ConnectionError(f'状态码:{self._response.status_code}.') - self._url_available = False - - return self._url_available - - def _make_response(self, url, mode='get', retry=None, interval=None, show_errmsg=False, **kwargs): - """生成Response对象 - :param url: 目标url - :param mode: 'get' 或 'post' - :param show_errmsg: 是否显示和抛出异常 - :param kwargs: 其它参数 - :return: tuple,第一位为Response或None,第二位为出错信息或 'Success' - """ - kwargs = CaseInsensitiveDict(kwargs) - if 'headers' not in kwargs: - kwargs['headers'] = {} - else: - kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) - - # 设置referer和host值 - parsed_url = urlparse(url) - hostname = parsed_url.hostname - scheme = parsed_url.scheme - if not check_headers(kwargs, self._headers, 'Referer'): - kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' - if 'Host' not in kwargs['headers']: - kwargs['headers']['Host'] = hostname - - if not check_headers(kwargs, self._headers, 'timeout'): - kwargs['timeout'] = self.timeout - - kwargs['headers'] = {**self._headers, **kwargs['headers']} - - r = err = None - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - for i in range(retry + 1): - try: - if mode == 'get': - r = self.session.get(url, **kwargs) - elif mode == 'post': - r = self.session.post(url, **kwargs) - - if r and r.content: - if self._encoding: - r.encoding = self._encoding - return r, 'Success' - return set_charset(r), 'Success' - - except Exception as e: - err = e - - # if r and r.status_code in (403, 404): - # break - - if i < retry: - sleep(interval) - if show_errmsg: - print(f'重试 {url}') - - if show_errmsg: - if err: - raise err - elif r is not None: - raise ConnectionError(f'状态码:{r.status_code}') if r.content else ConnectionError('返回内容为空。') - else: - raise ConnectionError('连接失败') - - else: - if r is not None: - return (r, f'状态码:{r.status_code}') if r.content else (None, '返回内容为空') - else: - return None, '连接失败' if err is None else err - - def __repr__(self): - return f'' - - -def check_headers(kwargs, headers, arg): - """检查kwargs或headers中是否有arg所示属性""" - return arg in kwargs['headers'] or arg in headers - - -def set_charset(response): - """设置Response对象的编码""" - # 在headers中获取编码 - content_type = response.headers.get('content-type', '').lower() - if not content_type.endswith(';'): - content_type += ';' - charset = search(r'charset[=: ]*(.*)?;?', content_type) - - if charset: - response.encoding = charset.group(1) - - # 在headers中获取不到编码,且如果是网页 - elif content_type.replace(' ', '').startswith('text/html'): - re_result = search(b']+).*?>', response.content, DOTALL) - - if re_result: - charset = re_result.group(1).decode() - else: - charset = response.apparent_encoding - - response.encoding = charset - - return response +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from re import search, DOTALL +from time import sleep +from urllib.parse import urlparse, quote + +from requests import Session, Response +from requests.structures import CaseInsensitiveDict +from tldextract import extract + +from .._base.base import BasePage +from .._configs.session_options import SessionOptions +from .._elements.session_element import SessionElement, make_session_ele +from .._functions.web import cookie_to_dict +from .._units.setter import SessionPageSetter + + +class SessionPage(BasePage): + """SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页""" + + def __init__(self, session_or_options=None, timeout=None): + """ + :param session_or_options: Session对象或SessionOptions对象 + :param timeout: 连接超时时间(秒),为None时从ini文件读取或默认10 + """ + super(SessionPage, SessionPage).__init__(self) + self._headers = None + self._response = None + self._session = None + self._set = None + self._encoding = None + self._s_set_start_options(session_or_options) + self._s_set_runtime_settings() + self._create_session() + if timeout is not None: + self.timeout = timeout + + def _s_set_start_options(self, session_or_options): + """启动配置 + :param session_or_options: Session、SessionOptions对象 + :return: None + """ + if not session_or_options or isinstance(session_or_options, SessionOptions): + self._session_options = session_or_options or SessionOptions(session_or_options) + + elif isinstance(session_or_options, Session): + self._session_options = SessionOptions() + self._headers = session_or_options.headers + session_or_options.headers = None + self._session = session_or_options + + def _s_set_runtime_settings(self): + """设置运行时用到的属性""" + self._timeout = self._session_options.timeout + self._download_path = None if self._session_options.download_path is None \ + else str(Path(self._session_options.download_path).absolute()) + self.retry_times = self._session_options.retry_times + self.retry_interval = self._session_options.retry_interval + + def _create_session(self): + """创建内建Session对象""" + if not self._session: + self._session, self._headers = self._session_options.make_session() + + def __call__(self, loc_or_str, index=1, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性文本 + """ + return self.ele(loc_or_str, index=index) + + # -----------------共有属性和方法------------------- + @property + def title(self): + """返回网页title""" + ele = self._ele('xpath://title', raise_err=False) + return ele.text if ele else None + + @property + def url(self): + """返回当前访问url""" + return self._url + + @property + def _session_url(self): + """返回当前访问url""" + return self._url + + @property + def raw_data(self): + """返回页面原始数据""" + return self.response.content if self.response else b'' + + @property + def html(self): + """返回页面的html文本""" + return self.response.text if self.response else '' + + @property + def json(self): + """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" + try: + return self.response.json() + except Exception: + return None + + @property + def user_agent(self): + """返回user agent""" + return self._headers.get('user-agent', '') + + @property + def session(self): + """返回Session对象""" + return self._session + + @property + def response(self): + """返回访问url得到的Response对象""" + return self._response + + @property + def encoding(self): + """返回设置的编码""" + return self._encoding + + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = SessionPageSetter(self) + return self._set + + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): + """用get方式跳转到url,可输入文件路径 + :param url: 目标url,可指定本地文件路径 + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 + :param timeout: 连接超时时间(秒),为None时使用页面对象timeout属性值 + :param kwargs: 连接参数 + :return: url是否可用 + """ + if isinstance(url, Path): + url = str(url.absolute()) + if not url.lower().startswith('http'): + if url.startswith('file:///'): + url = url[8:] + if Path(url).exists(): + with open(url, 'rb') as f: + r = Response() + r._content = f.read() + r.status_code = 200 + self._response = r + return + return self._s_connect(url, 'get', show_errmsg, retry, interval, **kwargs) + + def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): + """用post方式跳转到url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象timeout属性值 + :param kwargs: 连接参数 + :return: url是否可用 + """ + return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs) + + def ele(self, loc_or_ele, index=1, timeout=None): + """返回页面中符合条件的一个元素、属性或节点文本 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_ele, index=index, method='ele()') + + def eles(self, loc_or_str, timeout=None): + """返回页面中所有符合条件的元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, index=None) + + def s_ele(self, loc_or_ele=None, index=1): + """返回页面中符合条件的一个元素、属性或节点文本 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 + :return: SessionElement对象或属性、文本 + """ + return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, + index=index, method='s_ele()') + + def s_eles(self, loc_or_str): + """返回页面中符合条件的所有元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_str, index=None) + + def _find_elements(self, loc_or_ele, timeout=None, index=1, raise_err=None): + """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和父类对应 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 + :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 + :return: SessionElement对象 + """ + return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, index=index) + + def get_cookies(self, as_dict=False, all_domains=False, all_info=False): + """返回cookies + :param as_dict: 是否以字典方式返回,False则以list返回 + :param all_domains: 是否返回所有域的cookies + :param all_info: 是否返回所有信息,False则只返回name、value、domain + :return: cookies信息 + """ + if all_domains: + cookies = self.session.cookies + else: + if self.url: + ex_url = extract(self._session_url) + domain = f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain + + cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') + else: + cookies = tuple(x for x in self.session.cookies) + + if as_dict: + return {x.name: x.value for x in cookies} + elif all_info: + return [cookie_to_dict(cookie) for cookie in cookies] + else: + r = [] + for c in cookies: + c = cookie_to_dict(c) + r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']}) + return r + + def close(self): + """关闭Session对象""" + self._session.close() + if self._response is not None: + self._response.close() + + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + + def _s_connect(self, url, mode, show_errmsg=False, retry=None, interval=None, **kwargs): + """执行get或post连接 + :param url: 目标url + :param mode: 'get' 或 'post' + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + retry, interval = self._before_connect(url, retry, interval) + self._response, info = self._make_response(self._url, mode, retry, interval, show_errmsg, **kwargs) + + if self._response is None: + self._url_available = False + + else: + if self._response.ok: + self._url_available = True + + else: + if show_errmsg: + raise ConnectionError(f'状态码:{self._response.status_code}.') + self._url_available = False + + return self._url_available + + def _make_response(self, url, mode='get', retry=None, interval=None, show_errmsg=False, **kwargs): + """生成Response对象 + :param url: 目标url + :param mode: 'get' 或 'post' + :param show_errmsg: 是否显示和抛出异常 + :param kwargs: 其它参数 + :return: tuple,第一位为Response或None,第二位为出错信息或 'Success' + """ + kwargs = CaseInsensitiveDict(kwargs) + if 'headers' not in kwargs: + kwargs['headers'] = {} + else: + kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) + + # 设置referer和host值 + parsed_url = urlparse(url) + hostname = parsed_url.hostname + scheme = parsed_url.scheme + if not check_headers(kwargs, self._headers, 'Referer'): + kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' + if 'Host' not in kwargs['headers']: + kwargs['headers']['Host'] = hostname + + if not check_headers(kwargs, self._headers, 'timeout'): + kwargs['timeout'] = self.timeout + + kwargs['headers'] = {**self._headers, **kwargs['headers']} + + r = err = None + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + for i in range(retry + 1): + try: + if mode == 'get': + r = self.session.get(url, **kwargs) + elif mode == 'post': + r = self.session.post(url, **kwargs) + + if r and r.content: + if self._encoding: + r.encoding = self._encoding + return r, 'Success' + return set_charset(r), 'Success' + + except Exception as e: + err = e + + # if r and r.status_code in (403, 404): + # break + + if i < retry: + sleep(interval) + if show_errmsg: + print(f'重试 {url}') + + if show_errmsg: + if err: + raise err + elif r is not None: + raise ConnectionError(f'状态码:{r.status_code}') if r.content else ConnectionError('返回内容为空。') + else: + raise ConnectionError('连接失败') + + else: + if r is not None: + return (r, f'状态码:{r.status_code}') if r.content else (None, '返回内容为空') + else: + return None, '连接失败' if err is None else err + + def __repr__(self): + return f'' + + +def check_headers(kwargs, headers, arg): + """检查kwargs或headers中是否有arg所示属性""" + return arg in kwargs['headers'] or arg in headers + + +def set_charset(response): + """设置Response对象的编码""" + # 在headers中获取编码 + content_type = response.headers.get('content-type', '').lower() + if not content_type.endswith(';'): + content_type += ';' + charset = search(r'charset[=: ]*(.*)?;?', content_type) + + if charset: + response.encoding = charset.group(1) + + # 在headers中获取不到编码,且如果是网页 + elif content_type.replace(' ', '').startswith('text/html'): + re_result = search(b']+).*?>', response.content, DOTALL) + + if re_result: + charset = re_result.group(1).decode() + else: + charset = response.apparent_encoding + + response.encoding = charset + + return response diff --git a/DrissionPage/_pages/session_page.pyi b/DrissionPage/_pages/session_page.pyi index a6379ca..c134a85 100644 --- a/DrissionPage/_pages/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -42,6 +42,7 @@ class SessionPage(BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, SessionElement], + index: int = 1, timeout: float = None) -> Union[SessionElement, NoneElement]: ... # -----------------共有属性和方法------------------- @@ -91,6 +92,7 @@ class SessionPage(BasePage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], + index: int = 1, timeout: float = None) -> Union[SessionElement, NoneElement]: ... def eles(self, @@ -98,15 +100,15 @@ class SessionPage(BasePage): timeout: float = None) -> List[SessionElement]: ... def s_ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \ - -> Union[SessionElement, NoneElement]: ... + loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], timeout: float = None, - single: bool = True, + index: Optional[int] = 1, raise_err: bool = None) \ -> Union[SessionElement, NoneElement, List[SessionElement]]: ... diff --git a/DrissionPage/_pages/web_page.py b/DrissionPage/_pages/web_page.py index fd4e079..8727e3f 100644 --- a/DrissionPage/_pages/web_page.py +++ b/DrissionPage/_pages/web_page.py @@ -17,6 +17,16 @@ from .._units.setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): """整合浏览器和request的页面类""" + def __new__(cls, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None): + """初始化函数 + :param mode: 'd' 或 's',即driver模式和session模式 + :param timeout: 超时时间(秒),d模式时为寻找元素时间,s模式时为连接时间,默认10秒 + :param chromium_options: Driver对象,只使用s模式时应传入False + :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False + """ + opts = chromium_options or driver_or_options + return super().__new__(cls, opts) + def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None): """初始化函数 :param mode: 'd' 或 's',即driver模式和session模式 @@ -24,7 +34,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): :param chromium_options: Driver对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ - chromium_options = chromium_options or driver_or_options + if hasattr(self, '_created'): + return + self._mode = mode.lower() if self._mode not in ('s', 'd'): raise ValueError('mode参数只能是s或d。') @@ -38,17 +50,18 @@ class WebPage(SessionPage, ChromiumPage, BasePage): super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout) self.change_mode(self._mode, go=False, copy_cookies=False) - def __call__(self, loc_or_str, timeout=None): + def __call__(self, loc_or_str, index=1, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 超时时间(秒) :return: 子元素对象 """ if self._mode == 'd': - return super(SessionPage, self).__call__(loc_or_str, timeout) + return super(SessionPage, self).__call__(loc_or_str, index=index, timeout=timeout) elif self._mode == 's': - return super().__call__(loc_or_str) + return super().__call__(loc_or_str, index=index) @property def set(self): @@ -182,16 +195,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage): return self.response return super().post(url, show_errmsg, retry, interval, **kwargs) - def ele(self, loc_or_ele, timeout=None): + def ele(self, loc_or_ele, index=1, timeout=None): """返回第一个符合条件的元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super().ele(loc_or_ele) + return super().ele(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self).ele(loc_or_ele, timeout=timeout) + return super(SessionPage, self).ele(loc_or_ele, index=index, timeout=timeout) def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 @@ -204,15 +218,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).eles(loc_or_str, timeout=timeout) - def s_ele(self, loc_or_ele=None): + def s_ele(self, loc_or_ele=None, index=1): """查找第一个符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :return: SessionElement对象或属性、文本 """ if self._mode == 's': - return super().s_ele(loc_or_ele) + return super().s_ele(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self).s_ele(loc_or_ele) + return super(SessionPage, self).s_ele(loc_or_ele, index=index) def s_eles(self, loc_or_str): """查找所有符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 @@ -360,20 +375,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._response is not None: self._response.close() - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + def _find_elements(self, loc_or_ele, timeout=None, index=1, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param timeout: 查找元素超时时间,d模式专用 - :param single: True则返回第一个,False则返回全部 + :param index: 第几个结果,从1开始,可传入负数获取倒数第几个,为None返回所有 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': - return super()._find_elements(loc_or_ele, single=single) + return super()._find_elements(loc_or_ele, index=index) elif self._mode == 'd': - return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, - relative=relative) + return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, index=index, relative=relative) def quit(self, timeout=5, force=True): """关闭浏览器和Session diff --git a/DrissionPage/_pages/web_page.pyi b/DrissionPage/_pages/web_page.pyi index 20bcdf0..607d130 100644 --- a/DrissionPage/_pages/web_page.pyi +++ b/DrissionPage/_pages/web_page.pyi @@ -5,7 +5,7 @@ @Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. @License : BSD 3-Clause. """ -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Optional from requests import Session, Response @@ -38,6 +38,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... # -----------------共有属性和方法------------------- @@ -105,13 +106,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement], + index: int = 1, timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + def s_ele(self, + loc_or_ele: Union[Tuple[str, str], str] = None, + index: int = 1) -> Union[SessionElement, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... @@ -167,7 +171,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], timeout: float = None, - single: bool = True, + index: Optional[int] = 1, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], diff --git a/DrissionPage/_units/downloader.py b/DrissionPage/_units/downloader.py index 2d0cdb4..8f7f2b5 100644 --- a/DrissionPage/_units/downloader.py +++ b/DrissionPage/_units/downloader.py @@ -22,6 +22,7 @@ class DownloadManager(object): self._browser = browser self._page = browser.page self._when_download_file_exists = 'rename' + self._save_path = None t = TabDownloadSettings(self._page.tab_id) t.path = self._page.download_path @@ -46,18 +47,19 @@ class DownloadManager(object): """返回所有未完成的下载任务""" return self._missions - def set_path(self, tab_id, path): + def set_path(self, tab, path): """设置某个tab的下载路径 - :param tab_id: tab id + :param tab: 页面对象 :param path: 下载路径(绝对路径str) :return: None """ - TabDownloadSettings(tab_id).path = path - if tab_id == self._page.tab_id or not self._running: + TabDownloadSettings(tab.tab_id).path = path + if tab is self._page or not self._running: self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress) self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin) r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=path, behavior='allowAndName', eventsEnabled=True) + self._save_path = path if 'error' in r: print('浏览器版本太低无法使用下载管理功能。') self._running = True @@ -124,7 +126,10 @@ class DownloadManager(object): :return: None """ mission.state = 'canceled' - self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + try: + self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + except: + pass if mission.final_path: Path(mission.final_path).unlink(True) @@ -134,7 +139,10 @@ class DownloadManager(object): :return: None """ mission.state = 'skipped' - self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + try: + self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + except: + pass def clear_tab_info(self, tab_id): """当tab关闭时清除有关信息 @@ -182,7 +190,7 @@ class DownloadManager(object): elif settings.when_file_exists == 'overwrite': goal_path.unlink() - m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._page.download_path) + m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._save_path) self._missions[guid] = m if self.get_flag(tab_id) is False: # 取消该任务 @@ -210,7 +218,7 @@ class DownloadManager(object): return mission.received_bytes = kwargs['receivedBytes'] mission.total_bytes = kwargs['totalBytes'] - form_path = f'{mission.path}{sep}{mission.id}' + form_path = f'{mission.save_path}{sep}{mission.id}' to_path = str(get_usable_path(f'{mission.path}{sep}{mission.name}')) move(form_path, to_path) self.set_done(mission, 'completed', final_path=to_path) diff --git a/DrissionPage/_units/downloader.pyi b/DrissionPage/_units/downloader.pyi index 37d8ae7..d4570a4 100644 --- a/DrissionPage/_units/downloader.pyi +++ b/DrissionPage/_units/downloader.pyi @@ -8,6 +8,7 @@ from typing import Dict, Optional, Union, Literal from .._base.browser import Browser +from .._pages.chromium_base import ChromiumBase from .._pages.chromium_page import ChromiumPage @@ -18,13 +19,14 @@ class DownloadManager(object): _tab_missions: dict = ... _flags: dict = ... _running: bool = ... + _save_path: Optional[str] = ... def __init__(self, browser: Browser): ... @property def missions(self) -> Dict[str, DownloadMission]: ... - def set_path(self, tab_id: str, path: str) -> None: ... + def set_path(self, tab: ChromiumBase, path: str) -> None: ... def set_rename(self, tab_id: str, rename: str = None, suffix: str = None) -> None: ... diff --git a/DrissionPage/_units/listener.py b/DrissionPage/_units/listener.py index 9063b53..80ee15a 100644 --- a/DrissionPage/_units/listener.py +++ b/DrissionPage/_units/listener.py @@ -30,6 +30,7 @@ class Listener(object): self._target_id = page._target_id self._driver = None self._running_requests = 0 + self._running_targets = 0 self._caught = None self._request_ids = None @@ -208,22 +209,24 @@ class Listener(object): self._extra_info_ids = {} self._caught = Queue(maxsize=0) self._running_requests = 0 + self._running_targets = 0 - def wait_silent(self, timeout=None): + def wait_silent(self, timeout=None, targets_only=False): """等待所有请求结束 :param timeout: 超时,为None时无限等待 + :param targets_only: 是否只等待targets指定的请求结束 :return: 返回是否等待成功 """ if not self.listening: raise RuntimeError('监听未启动,用listen.start()启动。') if timeout is None: - while self._running_requests > 0: + while (not targets_only and self._running_requests > 0) or (targets_only and self._running_targets > 0): sleep(.1) return True end_time = perf_counter() + timeout while perf_counter() < end_time: - if self._running_requests <= 0: + if (not targets_only and self._running_requests <= 0) or (targets_only and self._running_targets <= 0): return True sleep(.1) else: @@ -265,6 +268,7 @@ class Listener(object): if self._targets is True: if ((self._method is True or kwargs['request']['method'] in self._method) and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)): + self._running_targets += 1 rid = kwargs['requestId'] p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, True)) p._raw_request = kwargs @@ -279,6 +283,7 @@ class Listener(object): or (not self._is_regex and target in kwargs['request']['url'])) and (self._method is True or kwargs['request']['method'] in self._method) and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)): + self._running_targets += 1 p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, target)) p._raw_request = kwargs break @@ -346,16 +351,17 @@ class Listener(object): if packet: self._caught.put(packet) + self._running_targets -= 1 def _loading_failed(self, **kwargs): """请求失败时的回调方法""" self._running_requests -= 1 r_id = kwargs['requestId'] - dp = self._request_ids.get(r_id, None) - if dp: - dp._raw_fail_info = kwargs - dp._resource_type = kwargs['type'] - dp.is_failed = True + data_packet = self._request_ids.get(r_id, None) + if data_packet: + data_packet._raw_fail_info = kwargs + data_packet._resource_type = kwargs['type'] + data_packet.is_failed = True r = self._extra_info_ids.get(kwargs['requestId'], None) if r: @@ -371,8 +377,9 @@ class Listener(object): self._request_ids.pop(r_id, None) - if dp: - self._caught.put(dp) + if data_packet: + self._caught.put(data_packet) + self._running_targets -= 1 class FrameListener(Listener): diff --git a/DrissionPage/_units/listener.pyi b/DrissionPage/_units/listener.pyi index 9daef5d..a80d3c2 100644 --- a/DrissionPage/_units/listener.pyi +++ b/DrissionPage/_units/listener.pyi @@ -33,6 +33,7 @@ class Listener(object): self._extra_info_ids: dict = ... self.listening: bool = ... self._running_requests: int = ... + self._running_targets: int = ... @property def targets(self) -> Optional[set]: ... @@ -66,7 +67,7 @@ class Listener(object): def clear(self) -> None: ... - def wait_silent(self, timeout=None) -> bool: ... + def wait_silent(self, timeout: float = None, targets_only: bool = False) -> bool: ... def _to_target(self, target_id: str, address: str, page: ChromiumBase) -> None: ... diff --git a/DrissionPage/_units/selector.py b/DrissionPage/_units/selector.py index 1b94ed7..9e7f721 100644 --- a/DrissionPage/_units/selector.py +++ b/DrissionPage/_units/selector.py @@ -97,7 +97,7 @@ class SelectElement(object): def by_index(self, index, timeout=None): """此方法用于根据index值选择项。当元素是多选列表时,可以接收list或tuple - :param index: 序号,0开始,传入list或tuple可选择多项 + :param index: 序号,从1开始,可传入负数获取倒数第几个,传入list或tuple可选择多项 :param timeout: 超时时间,为None默认使用页面超时时间 :return: 是否选择成功 """ @@ -136,7 +136,7 @@ class SelectElement(object): def cancel_by_index(self, index, timeout=None): """此方法用于根据index值取消选择项。当元素是多选列表时,可以接收list或tuple - :param index: 序号,0开始,传入list或tuple可取消多项 + :param index: 序号,从1开始,可传入负数获取倒数第几个,传入list或tuple可取消多项 :param timeout: 超时时间,不输入默认实用页面超时时间 :return: 是否取消成功 """ @@ -231,7 +231,7 @@ class SelectElement(object): """ ok = False condition = [int(i) for i in condition] - text_len = max(condition) + text_len = abs(max(condition, key=abs)) end_time = perf_counter() + timeout while perf_counter() < end_time: if len(self.options) >= text_len: @@ -240,7 +240,7 @@ class SelectElement(object): if ok: eles = self.options - eles = [eles[i - 1] for i in condition] + eles = [eles[i - 1] if i > 0 else eles[i] for i in condition] self._select_options(eles, mode) return True diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py index 7928a93..17e4b44 100644 --- a/DrissionPage/_units/setter.py +++ b/DrissionPage/_units/setter.py @@ -185,7 +185,7 @@ class TabSetter(ChromiumBaseSetter): """ path = str(Path(path).absolute()) self._page._download_path = path - self._page.browser._dl_mgr.set_path(self._page.tab_id, path) + self._page.browser._dl_mgr.set_path(self._page, path) if self._page._DownloadKit: self._page._DownloadKit.set.goal_path(path) diff --git a/DrissionPage/_units/waiter.py b/DrissionPage/_units/waiter.py index 1569b0e..52ff58c 100644 --- a/DrissionPage/_units/waiter.py +++ b/DrissionPage/_units/waiter.py @@ -96,7 +96,7 @@ class BaseWaiter(object): """ return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) - def load_complete(self, timeout=None, raise_err=None): + def doc_loaded(self, timeout=None, raise_err=None): """等待页面加载完成 :param timeout: 超时时间,为None时使用页面timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 @@ -215,6 +215,14 @@ class BaseWaiter(object): :return: count为1时返回数据包对象,大于1时返回列表,超时且fix_count为True时返回False""" return self._driver.listen.wait(count, timeout, fix_count) + def load_complete(self, timeout=None, raise_err=None): + """等待页面加载完成 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, start=False, raise_err=raise_err) + class TabWaiter(BaseWaiter): diff --git a/DrissionPage/_units/waiter.pyi b/DrissionPage/_units/waiter.pyi index a0c8603..a04dd9a 100644 --- a/DrissionPage/_units/waiter.pyi +++ b/DrissionPage/_units/waiter.pyi @@ -42,7 +42,7 @@ class BaseWaiter(object): def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... + def doc_loaded(self, timeout: float = None, raise_err: bool = None) -> bool: ... def upload_paths_inputted(self) -> bool: ... diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 9ed94df..51996e3 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -89,3 +89,7 @@ class StorageError(BaseError): class CookieFormatError(BaseError): _info = 'cookie格式不正确。' + + +class TargetNotFoundError(BaseError): + _info = '找不到指定页面。' diff --git a/requirements.txt b/requirements.txt index 8a430e8..63f0080 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ requests lxml cssselect DownloadKit>=2.0.0 -websocket-client>=1.7.0 +websocket-client click tldextract psutil \ No newline at end of file diff --git a/setup.py b/setup.py index 4e39d05..f228274 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="4.0.1", + version="4.0.2", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -23,21 +23,21 @@ setup( 'requests', 'cssselect', 'DownloadKit>=2.0.0', - 'websocket-client>=1.7.0', + 'websocket-client', 'click', 'tldextract', 'psutil' ], classifiers=[ - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.6", "Development Status :: 4 - Beta", "Topic :: Utilities", "License :: OSI Approved :: BSD License", ], - python_requires='>=3.8', + python_requires='>=3.6', entry_points={ 'console_scripts': [ - 'dp = DrissionPage.commons.cli:main', + 'dp = DrissionPage.functions.cli:main', ], }, )