diff --git a/.gitee/ISSUE_TEMPLATE.zh-CN.md b/.gitee/ISSUE_TEMPLATE.zh-CN.md index 1c9e53c..fc92e66 100644 --- a/.gitee/ISSUE_TEMPLATE.zh-CN.md +++ b/.gitee/ISSUE_TEMPLATE.zh-CN.md @@ -1,3 +1,3 @@ -- 使用上的问题请先查看文档[使用文档](http://g1879.gitee.io/drissionpagedocs) -- 遇到bug请详细描述如何重现,并附上代码 -- 提问前先给本库打个星,谢谢 \ No newline at end of file +1. 使用上的问题请先查看文档[使用文档](http://g1879.gitee.io/drissionpagedocs) +2. 遇到bug请详细描述如何重现,并附上代码 +3. 提问前先给本库打个星,谢谢 \ No newline at end of file diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 2f87351..0f1467c 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -1,23 +1,17 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -# 常用页面类 -from .chromium_page import ChromiumPage -from .session_page import SessionPage -from .web_page import WebPage +from ._pages.chromium_page import ChromiumPage +from ._pages.session_page import SessionPage +from ._pages.web_page import WebPage # 启动配置类 -from .configs.chromium_options import ChromiumOptions -from .configs.session_options import SessionOptions +from ._configs.chromium_options import ChromiumOptions +from ._configs.session_options import SessionOptions -# 旧版页面类和启动配置类 -try: - from .mixpage.mix_page import MixPage - from .mixpage.drission import Drission - from .configs.driver_options import DriverOptions -except ModuleNotFoundError: - pass - -__version__ = '3.2.35' +__all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__'] +__version__ = '4.0.1' diff --git a/DrissionPage/base.py b/DrissionPage/_base/base.py similarity index 67% rename from DrissionPage/base.py rename to DrissionPage/_base/base.py index 3bc3469..0f16588 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/_base/base.py @@ -1,16 +1,20 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from abc import abstractmethod from re import sub -from urllib.parse import quote -from .commons.constants import Settings, NoneElement -from .commons.locator import get_loc -from .commons.web import format_html -from .errors import ElementNotFoundError +from DownloadKit import DownloadKit + +from .._functions.settings import Settings +from .._functions.locator import get_loc +from .._functions.web import format_html +from .._elements.none_element import NoneElement +from ..errors import ElementNotFoundError class BaseParser(object): @@ -20,7 +24,7 @@ class BaseParser(object): return self.ele(loc_or_str) def ele(self, loc_or_ele, timeout=None): - return self._ele(loc_or_ele, timeout, True) + return self._ele(loc_or_ele, timeout, True, method='ele()') def eles(self, loc_or_str, timeout=None): return self._ele(loc_or_str, timeout, False) @@ -36,7 +40,7 @@ class BaseParser(object): def s_eles(self, loc_or_str): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): pass @abstractmethod @@ -58,24 +62,21 @@ class BaseElement(BaseParser): def parent(self, level_or_loc=1): pass - def prev(self, index=1): - return None # ShadowRootElement直接继承 - - def prevs(self) -> None: - return None # ShadowRootElement直接继承 - def next(self, index=1): pass def nexts(self): pass - def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None, method=None): r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err) - if not single or raise_err is False: + if r or isinstance(r, list): return r - if not r and (Settings.raise_ele_not_found or raise_err is True): - raise ElementNotFoundError + if Settings.raise_when_ele_not_found or raise_err is True: + raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_str}) + + r.method = method + r.args = {'loc_or_str': loc_or_str} return r @abstractmethod @@ -84,8 +85,8 @@ class BaseElement(BaseParser): class DrissionElement(BaseElement): - """DriverElement、ChromiumElement 和 SessionElement的基类 - 但不是ShadowRootElement的基类""" + """ChromiumElement 和 SessionElement的基类 + 但不是ShadowRoot的基类""" @property def link(self): @@ -119,9 +120,10 @@ class DrissionElement(BaseElement): return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ if isinstance(level_or_loc, int): @@ -133,104 +135,127 @@ class DrissionElement(BaseElement): if loc[0] == 'css selector': raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' + loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]' else: raise TypeError('level_or_loc参数只能是tuple、int或str。') - return self._ele(loc, timeout=0, relative=True, raise_err=False) + return self._ele(loc, timeout=0, relative=True, raise_err=False, method='parent()') - def child(self, index=1, filter_loc='', timeout=None, ele_only=True): + def child(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 - :param index: 第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param index: 第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) if not nodes: - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) try: return nodes[index - 1] except IndexError: - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) - def prev(self, index=1, filter_loc='', timeout=0, ele_only=True): + def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) - def next(self, index=1, filter_loc='', timeout=0, ele_only=True): + def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param index: 后面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) - def before(self, index=1, filter_loc='', timeout=None, ele_only=True): + def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) - def after(self, index=1, filter_loc='', timeout=None, ele_only=True): + def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param index: 后面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_ele_not_found: - raise ElementNotFoundError + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, + 'index': index, 'ele_only': ele_only}) else: - return NoneElement() + return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only}) def children(self, filter_loc='', timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ @@ -246,19 +271,19 @@ class DrissionElement(BaseElement): nodes = self._ele(loc, timeout=timeout, single=False, relative=True) return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] - def prevs(self, filter_loc='', timeout=0, ele_only=True): + def prevs(self, filter_loc='', timeout=None, ele_only=True): """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only) - def nexts(self, filter_loc='', timeout=0, ele_only=True): + def nexts(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ @@ -267,7 +292,7 @@ class DrissionElement(BaseElement): def befores(self, filter_loc='', timeout=None, ele_only=True): """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ @@ -277,7 +302,7 @@ class DrissionElement(BaseElement): def afters(self, filter_loc='', timeout=None, ele_only=True): """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ @@ -291,8 +316,8 @@ class DrissionElement(BaseElement): :param filter_loc: 用于筛选的查询语法 :param direction: 'following' 或 'preceding',查找的方向 :param brother: 查找范围,在同级查找还是整个dom前后查找 - :param timeout: 查找等待时间 - :return: DriverElement对象或字符串 + :param timeout: 查找等待时间(秒) + :return: 元素对象或字符串 """ if index is not None and index < 1: raise ValueError('index必须大于等于1。') @@ -346,18 +371,22 @@ class DrissionElement(BaseElement): class BasePage(BaseParser): """页面类的基类""" - def __init__(self, timeout=None): + def __init__(self): """初始化函数""" self._url = None - self.timeout = timeout if timeout is not None else 10 + self._timeout = 10 + self._url_available = None self.retry_times = 3 self.retry_interval = 2 - self._url_available = None + self._DownloadKit = None + self._download_path = None + self._none_ele_return_value = False + self._none_ele_value = None @property def title(self): """返回网页title""" - ele = self._ele('xpath://title', raise_err=False) + ele = self._ele('xpath://title', raise_err=False, method='title') return ele.text if ele else None @property @@ -380,17 +409,17 @@ class BasePage(BaseParser): """返回当前访问的url有效性""" return self._url_available - def _before_connect(self, url, retry, interval): - """连接前的准备 - :param url: 要访问的url - :param retry: 重试次数 - :param interval: 重试间隔 - :return: 重试次数和间隔组成的tuple - """ - self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - return retry, interval + @property + def download_path(self): + """返回默认下载路径""" + return self._download_path + + @property + def download(self): + """返回下载器对象""" + if self._DownloadKit is None: + self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path) + return self._DownloadKit # ----------------以下属性或方法由后代实现---------------- @property @@ -401,6 +430,10 @@ class BasePage(BaseParser): def json(self): return + @property + def user_agent(self): + return + @abstractmethod def get_cookies(self, as_dict=False, all_info=False): return {} @@ -409,16 +442,19 @@ class BasePage(BaseParser): def get(self, url, show_errmsg=False, retry=None, interval=None): pass - def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None): + def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None): if not loc_or_ele: - raise ElementNotFoundError + raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele}) r = self._find_elements(loc_or_ele, timeout=timeout, single=single, raise_err=raise_err) - if not single or raise_err is False: + if r or isinstance(r, list): return r - if not r and (Settings().raise_ele_not_found is True or raise_err is True): - raise ElementNotFoundError + if Settings.raise_when_ele_not_found or raise_err is True: + raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele}) + + r.method = method + r.args = {'loc_or_str': loc_or_ele} return r @abstractmethod diff --git a/DrissionPage/base.pyi b/DrissionPage/_base/base.pyi similarity index 57% rename from DrissionPage/base.pyi rename to DrissionPage/_base/base.pyi index 690241d..b4964b0 100644 --- a/DrissionPage/base.pyi +++ b/DrissionPage/_base/base.pyi @@ -1,12 +1,16 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from abc import abstractmethod -from typing import Union, Tuple, List +from typing import Union, Tuple, List, Any -from .commons.constants import NoneElement +from DownloadKit import DownloadKit + +from .._elements.none_element import NoneElement class BaseParser(object): @@ -25,7 +29,8 @@ class BaseParser(object): def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, + raise_err: bool = None, method: str = None): ... @abstractmethod def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... @@ -41,7 +46,7 @@ class BaseElement(BaseParser): def tag(self) -> str: ... def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, - relative: bool = False, raise_err: bool = None): ... + relative: bool = False, raise_err: bool = None, method: str = None): ... @abstractmethod def _find_elements(self, loc_or_str, timeout: float = None, single: bool = True, relative: bool = False, @@ -78,59 +83,41 @@ class DrissionElement(BaseElement): def texts(self, text_node_only: bool = False) -> list: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ... - def child(self, index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, + timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def prev(self, index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, + timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def next(self, index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, + timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def before(self, index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, + timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def after(self, index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... + def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, + timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def children(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, + def children(self, filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def prevs(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, + def prevs(self, filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def nexts(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, + def nexts(self, filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def befores(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, + def befores(self, filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def afters(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, + def afters(self, filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... - def _get_brothers(self, index: int = None, - filter_loc: Union[tuple, str] = '', - direction: str = 'following', - brother: bool = True, - timeout: float = 0.5, - ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... + def _get_brothers(self, index: int = None, filter_loc: Union[tuple, str] = '', + direction: str = 'following', brother: bool = True, + timeout: float = 0.5, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ... # ----------------以下属性或方法由后代实现---------------- @property @@ -150,11 +137,15 @@ class DrissionElement(BaseElement): class BasePage(BaseParser): - def __init__(self, timeout: float = None): + def __init__(self): self._url_available: bool = ... self.retry_times: int = ... self.retry_interval: float = ... - self._timeout = float = ... + self._timeout: float = ... + self._download_path: str = ... + self._DownloadKit: DownloadKit = ... + self._none_ele_return_value: bool = ... + self._none_ele_value: Any = ... @property def title(self) -> Union[str, None]: ... @@ -171,7 +162,11 @@ class BasePage(BaseParser): @property def url_available(self) -> bool: ... - def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + @property + def download_path(self) -> str: ... + + @property + def download(self) -> DownloadKit: ... # ----------------以下属性或方法由后代实现---------------- @property @@ -180,17 +175,17 @@ class BasePage(BaseParser): @property def json(self) -> dict: ... + @property + def user_agent(self) -> str: ... + @abstractmethod def get_cookies(self, as_dict: bool = False, all_info: bool = False) -> Union[list, dict]: ... @abstractmethod - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None): ... + def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ... - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... + def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, + raise_err: bool = None, method: str = None): ... @abstractmethod def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ... diff --git a/DrissionPage/_base/browser.py b/DrissionPage/_base/browser.py new file mode 100644 index 0000000..0e71e1d --- /dev/null +++ b/DrissionPage/_base/browser.py @@ -0,0 +1,216 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from shutil import rmtree +from time import sleep, perf_counter + +from websocket import WebSocketBadStatusException + +from .driver import BrowserDriver, Driver +from .._functions.tools import stop_process_on_port, raise_error +from .._units.downloader import DownloadManager +from ..errors import PageDisconnectedError + +__ERROR__ = 'error' + + +class Browser(object): + BROWSERS = {} + + def __new__(cls, address, browser_id, page): + """ + :param address: 浏览器地址 + :param browser_id: 浏览器id + :param page: ChromiumPage对象 + """ + if browser_id in cls.BROWSERS: + return cls.BROWSERS[browser_id] + return object.__new__(cls) + + def __init__(self, address, browser_id, page): + """ + :param address: 浏览器地址 + :param browser_id: 浏览器id + :param page: ChromiumPage对象 + """ + if hasattr(self, '_created'): + return + self._created = True + Browser.BROWSERS[browser_id] = self + + self.page = page + self.address = address + self._driver = BrowserDriver(browser_id, 'browser', address, self) + self.id = browser_id + self._frames = {} + self._drivers = {} + # self._drivers = {t: Driver(t, 'page', address) for t in self.tabs} + self._connected = False + + self._process_id = None + r = self.run_cdp('SystemInfo.getProcessInfo') + for i in r.get('processInfo', []): + if i['type'] == 'browser': + self._process_id = i['id'] + break + + self.run_cdp('Target.setDiscoverTargets', discover=True) + self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed) + self._driver.set_callback('Target.targetCreated', self._onTargetCreated) + + def _get_driver(self, tab_id): + """获取对应tab id的Driver + :param tab_id: 标签页id + :return: Driver对象 + """ + return self._drivers.pop(tab_id, Driver(tab_id, 'page', self.address)) + + def _onTargetCreated(self, **kwargs): + """标签页创建时执行""" + if (kwargs['targetInfo']['type'] in ('page', 'webview') + and not kwargs['targetInfo']['url'].startswith('devtools://')): + try: + self._drivers[kwargs['targetInfo']['targetId']] = Driver(kwargs['targetInfo']['targetId'], + 'page', self.address) + except WebSocketBadStatusException: + pass + + def _onTargetDestroyed(self, **kwargs): + """标签页关闭时执行""" + tab_id = kwargs['targetId'] + if hasattr(self, '_dl_mgr'): + self._dl_mgr.clear_tab_info(tab_id) + for key in [k for k, i in self._frames.items() if i == tab_id]: + self._frames.pop(key, None) + self._drivers.pop(tab_id, None) + + def connect_to_page(self): + """执行与page相关的逻辑""" + if not self._connected: + self._dl_mgr = DownloadManager(self) + self._connected = True + + def run_cdp(self, cmd, **cmd_args): + """执行Chrome DevTools Protocol语句 + :param cmd: 协议项目 + :param cmd_args: 参数 + :return: 执行的结果 + """ + ignore = cmd_args.pop('_ignore', None) + r = self._driver.run(cmd, **cmd_args) + return r if __ERROR__ not in r else raise_error(r, ignore) + + @property + def driver(self): + return self._driver + + @property + def tabs_count(self): + """返回标签页数量""" + j = self.run_cdp('Target.getTargets')['targetInfos'] # 不要改用get,避免卡死 + return len([i for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')]) + + @property + def tabs(self): + """返回所有标签页id组成的列表""" + j = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp,因为顺序不对 + return [i['id'] for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')] + + @property + def process_id(self): + """返回浏览器进程id""" + return self._process_id + + def find_tabs(self, title=None, url=None, tab_type=None, single=True): + """查找符合条件的tab,返回它们的id组成的列表 + :param title: 要匹配title的文本 + :param url: 要匹配url的文本 + :param tab_type: tab类型,可用列表输入多个 + :param single: 是否返回首个结果的id,为False返回所有信息 + :return: tab id或tab列表 + """ + tabs = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp + + if isinstance(tab_type, str): + tab_type = {tab_type} + elif isinstance(tab_type, (list, tuple, set)): + tab_type = set(tab_type) + elif tab_type is not None: + raise TypeError('tab_type只能是set、list、tuple、str、None。') + + r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) + and (tab_type is None or i['type'] in tab_type))] + return r[0]['id'] if r and single else r + + def close_tab(self, tab_id): + """关闭标签页 + :param tab_id: 标签页id + :return: None + """ + self.run_cdp('Target.closeTarget', targetId=tab_id, _ignore=PageDisconnectedError) + + def activate_tab(self, tab_id): + """使标签页变为活动状态 + :param tab_id: 标签页id + :return: None + """ + self.run_cdp('Target.activateTarget', targetId=tab_id) + + def get_window_bounds(self, tab_id=None): + """返回浏览器窗口位置和大小信息 + :param tab_id: 标签页id + :return: 窗口大小字典 + """ + return self.run_cdp('Browser.getWindowForTarget', targetId=tab_id or self.id)['bounds'] + + def quit(self, timeout=5, force=False): + """关闭浏览器 + :param timeout: 等待浏览器关闭超时时间(秒) + :param force: 是否立刻强制终止进程 + :return: None + """ + try: + self.run_cdp('Browser.close') + except PageDisconnectedError: + return + + if force: + ip, port = self.address.split(':') + if ip not in ('127.0.0.1', 'localhost'): + return + stop_process_on_port(port) + return + + if self.process_id: + from os import popen + from platform import system + txt = f'tasklist | findstr {self.process_id}' if system().lower() == 'windows' \ + else f'ps -ef | grep {self.process_id}' + end_time = perf_counter() + timeout + while perf_counter() < end_time: + p = popen(txt) + sleep(.1) + try: + if f' {self.process_id} ' not in p.read(): + return + except TypeError: + pass + + def _on_quit(self): + Browser.BROWSERS.pop(self.id, None) + if self.page._chromium_options.is_auto_port and self.page._chromium_options.user_data_path: + path = Path(self.page._chromium_options.user_data_path) + end_time = perf_counter() + 7 + while perf_counter() < end_time: + if not path.exists(): + break + try: + rmtree(path) + break + except (PermissionError, FileNotFoundError, OSError): + pass diff --git a/DrissionPage/_base/browser.pyi b/DrissionPage/_base/browser.pyi new file mode 100644 index 0000000..e29afcb --- /dev/null +++ b/DrissionPage/_base/browser.pyi @@ -0,0 +1,64 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import List, Optional, Union + +from .driver import BrowserDriver, Driver +from .._pages.chromium_page import ChromiumPage +from .._units.downloader import DownloadManager + + +class Browser(object): + BROWSERS: dict = ... + page: ChromiumPage = ... + _driver: BrowserDriver = ... + id: str = ... + address: str = ... + _frames: dict = ... + _drivers: dict = ... + _process_id: Optional[int] = ... + _dl_mgr: DownloadManager = ... + _connected: bool = ... + + def __new__(cls, address: str, browser_id: str, page: ChromiumPage): ... + + def __init__(self, address: str, browser_id: str, page: ChromiumPage): ... + + def _get_driver(self, tab_id: str) -> Driver: ... + + def run_cdp(self, cmd, **cmd_args) -> dict: ... + + @property + def driver(self) -> BrowserDriver: ... + + @property + def tabs_count(self) -> int: ... + + @property + def tabs(self) -> List[str]: ... + + @property + def process_id(self) -> Optional[int]: ... + + def find_tabs(self, title: str = None, url: str = None, + tab_type: Union[str, list, tuple] = None, single: bool = True) -> Union[str, List[str]]: ... + + def close_tab(self, tab_id: str) -> None: ... + + def activate_tab(self, tab_id: str) -> None: ... + + def get_window_bounds(self, tab_id: str = None) -> dict: ... + + def connect_to_page(self) -> None: ... + + def _onTargetCreated(self, **kwargs) -> None: ... + + def _onTargetDestroyed(self, **kwargs) -> None: ... + + def quit(self, timeout: float = 5, force: bool = False) -> None: ... + + def _on_quit(self) -> None: ... diff --git a/DrissionPage/_base/driver.py b/DrissionPage/_base/driver.py new file mode 100644 index 0000000..d5ce858 --- /dev/null +++ b/DrissionPage/_base/driver.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from json import dumps, loads, JSONDecodeError +from queue import Queue, Empty +from threading import Thread, Event +from time import perf_counter, sleep + +from requests import get +from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection, + WebSocketException) + +from ..errors import PageDisconnectedError + + +class Driver(object): + def __init__(self, tab_id, tab_type, address): + """ + :param tab_id: 标签页id + :param tab_type: 标签页类型 + :param address: 浏览器连接地址 + """ + self.id = tab_id + self.address = address + self.type = tab_type + self._debug = False + self.alert_flag = False # 标记alert出现,跳过一条请求后复原 + + self._websocket_url = f'ws://{address}/devtools/{tab_type}/{tab_id}' + self._cur_id = 0 + self._ws = None + + self._recv_th = Thread(target=self._recv_loop) + self._handle_event_th = Thread(target=self._handle_event_loop) + self._recv_th.daemon = True + self._handle_event_th.daemon = True + self._handle_immediate_event_th = None + + self._stopped = Event() + + self.event_handlers = {} + self.immediate_event_handlers = {} + self.method_results = {} + self.event_queue = Queue() + self.immediate_event_queue = Queue() + + self.start() + + def _send(self, message, timeout=None): + """发送信息到浏览器,并返回浏览器返回的信息 + :param message: 发送给浏览器的数据 + :param timeout: 超时时间,为None表示无限 + :return: 浏览器返回的数据 + """ + self._cur_id += 1 + ws_id = self._cur_id + message['id'] = ws_id + message_json = dumps(message) + + # if self._debug: + # if self._debug is True or (isinstance(self._debug, str) and + # message.get('method', '').startswith(self._debug)): + # print(f'发> {message_json}') + # elif isinstance(self._debug, (list, tuple, set)): + # for m in self._debug: + # if message.get('method', '').startswith(m): + # print(f'发> {message_json}') + # break + + end_time = perf_counter() + timeout if timeout is not None else None + self.method_results[ws_id] = Queue() + try: + self._ws.send(message_json) + if timeout == 0: + self.method_results.pop(ws_id, None) + return {'id': ws_id, 'result': {}} + + except (OSError, WebSocketConnectionClosedException): + self.method_results.pop(ws_id, None) + return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'} + + while not self._stopped.is_set(): + try: + result = self.method_results[ws_id].get(timeout=.2) + self.method_results.pop(ws_id, None) + return result + + except Empty: + if self.alert_flag and message['method'].startswith(('Input.', 'Runtime.')): + return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'} + + if timeout is not None and perf_counter() > end_time: + self.method_results.pop(ws_id, None) + return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'} \ + if self.alert_flag else {'error': {'message': 'timeout'}, 'type': 'timeout'} + + continue + + return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'} + + def _recv_loop(self): + """接收浏览器信息的守护线程方法""" + while not self._stopped.is_set(): + try: + # self._ws.settimeout(1) + msg_json = self._ws.recv() + msg = loads(msg_json) + except WebSocketTimeoutException: + continue + except (WebSocketException, OSError, WebSocketConnectionClosedException, JSONDecodeError): + self._stop() + return + + # if self._debug: + # if self._debug is True or 'id' in msg or (isinstance(self._debug, str) + # and msg.get('method', '').startswith(self._debug)): + # print(f'<收 {msg_json}') + # elif isinstance(self._debug, (list, tuple, set)): + # for m in self._debug: + # if msg.get('method', '').startswith(m): + # print(f'<收 {msg_json}') + # break + + if 'method' in msg: + if msg['method'].startswith('Page.javascriptDialog'): + self.alert_flag = msg['method'].endswith('Opening') + function = self.immediate_event_handlers.get(msg['method']) + if function: + self._handle_immediate_event(function, msg['params']) + else: + self.event_queue.put(msg) + + elif msg.get('id') in self.method_results: + self.method_results[msg['id']].put(msg) + + # elif self._debug: + # print(f'未知信息:{msg}') + + def _handle_event_loop(self): + """当接收到浏览器信息,执行已绑定的方法""" + while not self._stopped.is_set(): + try: + event = self.event_queue.get(timeout=1) + except Empty: + continue + + function = self.event_handlers.get(event['method']) + if function: + function(**event['params']) + + self.event_queue.task_done() + + def _handle_immediate_event_loop(self): + while not self._stopped.is_set() and not self.immediate_event_queue.empty(): + function, kwargs = self.immediate_event_queue.get(timeout=1) + try: + function(**kwargs) + except PageDisconnectedError: + pass + + def _handle_immediate_event(self, function, kwargs): + """处理立即执行的动作 + :param function: 要运行下方法 + :param kwargs: 方法参数 + :return: None + """ + self.immediate_event_queue.put((function, kwargs)) + if self._handle_immediate_event_th is None or not self._handle_immediate_event_th.is_alive(): + self._handle_immediate_event_th = Thread(target=self._handle_immediate_event_loop) + self._handle_immediate_event_th.daemon = True + self._handle_immediate_event_th.start() + + def run(self, _method, **kwargs): + """执行cdp方法 + :param _method: cdp方法名 + :param args: cdp参数 + :param kwargs: cdp参数 + :return: 执行结果 + """ + if self._stopped.is_set(): + return {'error': 'connection disconnected', 'type': 'connection_error'} + + timeout = kwargs.pop('_timeout', 30) + result = self._send({'method': _method, 'params': kwargs}, timeout=timeout) + if 'result' not in result and 'error' in result: + return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'), + 'method': _method, 'args': kwargs, 'timeout': timeout} + else: + return result['result'] + + def start(self): + """启动连接""" + self._stopped.clear() + self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True) + self._recv_th.start() + self._handle_event_th.start() + return True + + def stop(self): + """中断连接""" + self._stop() + while self._handle_event_th.is_alive() or self._recv_th.is_alive(): + sleep(.1) + return True + + def _stop(self): + """中断连接""" + if self._stopped.is_set(): + return False + + self._stopped.set() + if self._ws: + self._ws.close() + self._ws = None + + try: + while not self.event_queue.empty(): + event = self.event_queue.get_nowait() + function = self.event_handlers.get(event['method']) + if function: + function(**event['params']) + except: + pass + + self.event_handlers.clear() + self.method_results.clear() + self.event_queue.queue.clear() + + def set_callback(self, event, callback, immediate=False): + """绑定cdp event和回调方法 + :param event: cdp event + :param callback: 绑定到cdp event的回调方法 + :param immediate: 是否要立即处理的动作 + :return: None + """ + handler = self.immediate_event_handlers if immediate else self.event_handlers + if callback: + handler[event] = callback + else: + handler.pop(event, None) + + +class BrowserDriver(Driver): + BROWSERS = {} + + def __new__(cls, tab_id, tab_type, address, browser): + if tab_id in cls.BROWSERS: + return cls.BROWSERS[tab_id] + return object.__new__(cls) + + def __init__(self, tab_id, tab_type, address, browser): + if hasattr(self, '_created'): + return + self._created = True + BrowserDriver.BROWSERS[tab_id] = self + super().__init__(tab_id, tab_type, address) + self.browser = browser + + def __repr__(self): + return f'<BrowserDriver {self.id}>' + + def get(self, url): + r = get(url, headers={'Connection': 'close'}) + r.close() + return r + + def _stop(self): + super()._stop() + self.browser._on_quit() diff --git a/DrissionPage/_base/driver.pyi b/DrissionPage/_base/driver.pyi new file mode 100644 index 0000000..ae86582 --- /dev/null +++ b/DrissionPage/_base/driver.pyi @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from queue import Queue +from threading import Thread, Event +from typing import Union, Callable, Dict, Optional + +from requests import Response +from websocket import WebSocket + +from .browser import Browser + + +class GenericAttr(object): + def __init__(self, name: str, tab: Driver): ... + + def __getattr__(self, item: str) -> Callable: ... + + def __setattr__(self, key: str, value: Callable) -> None: ... + + +class Driver(object): + id: str + address: str + type: str + # _debug: bool + alert_flag: bool + _websocket_url: str + _cur_id: int + _ws: Optional[WebSocket] + _recv_th: Thread + _handle_event_th: Thread + _handle_immediate_event_th: Optional[Thread] + _stopped: Event + event_handlers: dict + immediate_event_handlers: dict + method_results: dict + event_queue: Queue + immediate_event_queue: Queue + + def __init__(self, tab_id: str, tab_type: str, address: str): ... + + def _send(self, message: dict, timeout: float = None) -> dict: ... + + def _recv_loop(self) -> None: ... + + def _handle_event_loop(self) -> None: ... + + def _handle_immediate_event_loop(self): ... + + def _handle_immediate_event(self, function: Callable, kwargs: dict): ... + + def run(self, _method: str, **kwargs) -> dict: ... + + def start(self) -> bool: ... + + def stop(self) -> bool: ... + + def _stop(self) -> None: ... + + def set_callback(self, event: str, callback: Union[Callable, None], immediate: bool = False) -> None: ... + + +class BrowserDriver(Driver): + BROWSERS: Dict[str, Driver] = ... + browser: Browser = ... + + def __new__(cls, tab_id: str, tab_type: str, address: str, browser: Browser): ... + + def __init__(self, tab_id: str, tab_type: str, address: str, browser: Browser): ... + + def get(self, url) -> Response: ... diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/_configs/chromium_options.py similarity index 51% rename from DrissionPage/configs/chromium_options.py rename to DrissionPage/_configs/chromium_options.py index 0ade900..11b56f2 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/_configs/chromium_options.py @@ -1,13 +1,18 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from pathlib import Path +from re import search +from shutil import rmtree from tempfile import gettempdir, TemporaryDirectory +from threading import Lock from .options_manage import OptionsManager -from ..commons.tools import port_is_using, clean_folder +from .._functions.tools import port_is_using, clean_folder class ChromiumOptions(object): @@ -19,22 +24,28 @@ class ChromiumOptions(object): self._user_data_path = None self._user = 'Default' self._prefs_to_del = [] + self.clear_file_flags = False + self._headless = None if read_file is not False: ini_path = str(ini_path) if ini_path else None om = OptionsManager(ini_path) self.ini_path = om.ini_path - options = om.chrome_options - self._download_path = om.paths.get('download_path', None) + options = om.chromium_options + self._download_path = om.paths.get('download_path', None) or None + self._tmp_path = om.paths.get('tmp_path', None) or None self._arguments = options.get('arguments', []) - self._binary_location = options.get('binary_location', '') + self._browser_path = options.get('browser_path', '') self._extensions = options.get('extensions', []) - self._prefs = options.get('experimental_options', {}).get('prefs', {}) - self._debugger_address = options.get('debugger_address', None) - self._page_load_strategy = options.get('page_load_strategy', 'normal') - self._proxy = om.proxies.get('http', None) + self._prefs = options.get('prefs', {}) + self._flags = options.get('flags', {}) + self._address = options.get('address', None) + self._load_mode = options.get('load_mode', 'normal') self._system_user_path = options.get('system_user_path', False) + self._existing_only = options.get('existing_only', False) + + self._proxy = om.proxies.get('http', None) or om.proxies.get('https', None) user_path = user = False for arg in self._arguments: @@ -48,29 +59,39 @@ class ChromiumOptions(object): break timeouts = om.timeouts - self._timeouts = {'implicit': timeouts['implicit'], + self._timeouts = {'base': timeouts['base'], 'pageLoad': timeouts['page_load'], 'script': timeouts['script']} self._auto_port = options.get('auto_port', False) if self._auto_port: port, path = PortFinder().get_port() - self._debugger_address = f'127.0.0.1:{port}' + self._address = f'127.0.0.1:{port}' self.set_argument('--user-data-dir', path) + + others = om.others + self._retry_times = others.get('retry_times', 3) + self._retry_interval = others.get('retry_interval', 2) + return self.ini_path = None - self._binary_location = "chrome" + self._browser_path = "chrome" self._arguments = [] self._download_path = None + self._tmp_path = None self._extensions = [] self._prefs = {} - self._timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} - self._debugger_address = '127.0.0.1:9222' - self._page_load_strategy = 'normal' + self._flags = {} + self._timeouts = {'base': 10, 'pageLoad': 30, 'script': 30} + self._address = '127.0.0.1:9222' + self._load_mode = 'normal' self._proxy = None self._auto_port = False self._system_user_path = False + self._existing_only = False + self._retry_times = 3 + self._retry_interval = 2 @property def download_path(self): @@ -80,22 +101,27 @@ class ChromiumOptions(object): @property def browser_path(self): """浏览器启动文件路径""" - return self._binary_location + return self._browser_path @property def user_data_path(self): """返回用户数据文件夹路径""" return self._user_data_path + @property + def tmp_path(self): + """返回临时文件夹路径""" + return self._tmp_path + @property def user(self): """返回用户配置文件夹名称""" return self._user @property - def page_load_strategy(self): + def load_mode(self): """返回页面加载策略,'normal', 'eager', 'none'""" - return self._page_load_strategy + return self._load_mode @property def timeouts(self): @@ -108,15 +134,9 @@ class ChromiumOptions(object): return self._proxy @property - def debugger_address(self): + def address(self): """返回浏览器地址,ip:port""" - return self._debugger_address - - @debugger_address.setter - def debugger_address(self, address): - """设置浏览器地址,格式ip:port""" - address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') - self._debugger_address = address + return self._address @property def arguments(self): @@ -133,11 +153,48 @@ class ChromiumOptions(object): """返回用户首选项配置""" return self._prefs + @property + def flags(self): + """返回实验项配置""" + return self._flags + @property def system_user_path(self): """返回是否使用系统安装的浏览器所使用的用户数据文件夹""" return self._system_user_path + @property + def is_existing_only(self): + """返回是否只接管现有浏览器方式""" + return self._existing_only + + @property + def is_auto_port(self): + """返回是否使用自动端口和用户文件""" + return self._auto_port + + @property + def retry_times(self): + """返回连接失败时的重试次数""" + return self._retry_times + + @property + def retry_interval(self): + """返回连接失败时的重试间隔(秒)""" + return self._retry_interval + + def set_retry(self, times=None, interval=None): + """设置连接失败时的重试操作 + :param times: 重试次数 + :param interval: 重试间隔 + :return: 当前对象 + """ + if times is not None: + self._retry_times = times + if interval is not None: + self._retry_interval = interval + return self + def set_argument(self, arg, value=None): """设置浏览器配置的argument属性 :param arg: 属性名 @@ -201,7 +258,7 @@ class ChromiumOptions(object): :param arg: 设置项名称 :return: 当前对象 """ - self._prefs.pop(arg) + self._prefs.pop(arg, None) return self def remove_pref_from_file(self, arg): @@ -212,15 +269,33 @@ class ChromiumOptions(object): self._prefs_to_del.append(arg) return self - def set_timeouts(self, implicit=None, pageLoad=None, script=None): + def set_flag(self, flag, value=None): + """设置实验项 + :param flag: 设置项名称 + :param value: 设置项的值,为False则删除该项 + :return: 当前对象 + """ + if value is False: + self._flags.pop(flag, None) + else: + self._flags[flag] = value + return self + + def clear_flags_in_file(self): + """删除浏览器配置文件中已设置的实验项""" + self.clear_file_flags = True + return self + + def set_timeouts(self, base=None, pageLoad=None, script=None, implicit=None): """设置超时时间,单位为秒 - :param implicit: 默认超时时间 + :param base: 默认超时时间 :param pageLoad: 页面加载超时时间 :param script: 脚本运行超时时间 :return: 当前对象 """ - if implicit is not None: - self._timeouts['implicit'] = implicit + base = base if base is not None else implicit + if base is not None: + self._timeouts['base'] = base if pageLoad is not None: self._timeouts['pageLoad'] = pageLoad if script is not None: @@ -237,15 +312,15 @@ class ChromiumOptions(object): self._user = user return self - def set_headless(self, on_off=True): + def headless(self, on_off=True): """设置是否隐藏浏览器界面 :param on_off: 开或关 :return: 当前对象 """ - on_off = 'new' if on_off else False + on_off = 'new' if on_off else 'false' return self.set_argument('--headless', on_off) - def set_no_imgs(self, on_off=True): + def no_imgs(self, on_off=True): """设置是否加载图片 :param on_off: 开或关 :return: 当前对象 @@ -253,7 +328,7 @@ class ChromiumOptions(object): on_off = None if on_off else False return self.set_argument('--blink-settings=imagesEnabled=false', on_off) - def set_no_js(self, on_off=True): + def no_js(self, on_off=True): """设置是否禁用js :param on_off: 开或关 :return: 当前对象 @@ -261,7 +336,7 @@ class ChromiumOptions(object): on_off = None if on_off else False return self.set_argument('--disable-javascript', on_off) - def set_mute(self, on_off=True): + def mute(self, on_off=True): """设置是否静音 :param on_off: 开或关 :return: 当前对象 @@ -269,6 +344,22 @@ class ChromiumOptions(object): on_off = None if on_off else False return self.set_argument('--mute-audio', on_off) + def incognito(self, on_off=True): + """设置是否使用无痕模式启动 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = None if on_off else False + return self.set_argument('--incognito', on_off) + + def ignore_certificate_errors(self, on_off=True): + """设置是否忽略证书错误 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = None if on_off else False + return self.set_argument('--ignore-certificate-errors', on_off) + def set_user_agent(self, user_agent): """设置user agent :param user_agent: user agent文本 @@ -281,11 +372,15 @@ class ChromiumOptions(object): :param proxy: 代理url和端口 :return: 当前对象 """ + if search(r'.*?:.*?@.*?\..*', proxy): + print('你似乎在设置使用账号密码的代理,暂时不支持这种代理,可自行用插件实现需求。') + if proxy.lower().startswith('socks'): + print('你似乎在设置使用socks代理,暂时不支持这种代理,可自行用插件实现需求。') self._proxy = proxy return self.set_argument('--proxy-server', proxy) - def set_page_load_strategy(self, value): - """设置page_load_strategy,可接收 'normal', 'eager', 'none' + def set_load_mode(self, value): + """设置load_mode,可接收 'normal', 'eager', 'none' normal:默认情况下使用, 等待所有资源下载完成 eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 none:完全不阻塞 @@ -293,46 +388,104 @@ class ChromiumOptions(object): :return: 当前对象 """ if value not in ('normal', 'eager', 'none'): - raise ValueError("只能选择'normal', 'eager', 'none'。") - self._page_load_strategy = value.lower() + raise ValueError("只能选择 'normal', 'eager', 'none'。") + self._load_mode = value.lower() return self - def set_paths(self, browser_path=None, local_port=None, debugger_address=None, download_path=None, - user_data_path=None, cache_path=None): + def set_paths(self, browser_path=None, local_port=None, address=None, download_path=None, + user_data_path=None, cache_path=None, debugger_address=None): """快捷的路径设置函数 :param browser_path: 浏览器可执行文件路径 :param local_port: 本地端口号 - :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 + :param address: 调试浏览器地址,例:127.0.0.1:9222 :param download_path: 下载文件路径 :param user_data_path: 用户数据路径 :param cache_path: 缓存路径 :return: 当前对象 """ + address = address or debugger_address if browser_path is not None: - self._binary_location = str(browser_path) - self._auto_port = False + self.set_browser_path(browser_path) if local_port is not None: - self._debugger_address = f'127.0.0.1:{local_port}' - self._auto_port = False + self.set_local_port(local_port) - if debugger_address is not None: - self.debugger_address = debugger_address + if address is not None: + self.set_address(address) if download_path is not None: - self._download_path = str(download_path) + self.set_download_path(download_path) if user_data_path is not None: - u = str(user_data_path) - self.set_argument('--user-data-dir', u) - self._user_data_path = u - self._auto_port = False + self.set_user_data_path(user_data_path) if cache_path is not None: - self.set_argument('--disk-cache-dir', str(cache_path)) + self.set_cache_path(cache_path) return self + def set_local_port(self, port): + """设置本地启动端口 + :param port: 端口号 + :return: 当前对象 + """ + self._address = f'127.0.0.1:{port}' + self._auto_port = False + return self + + def set_address(self, address): + """设置浏览器地址,格式'ip:port' + :param address: 浏览器地址 + :return: 当前对象 + """ + address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') + self._address = address + return self + + def set_browser_path(self, path): + """设置浏览器可执行文件路径 + :param path: 浏览器路径 + :return: 当前对象 + """ + self._browser_path = str(path) + self._auto_port = False + return self + + def set_download_path(self, path): + """设置下载文件保存路径 + :param path: 下载路径 + :return: 当前对象 + """ + self._download_path = str(path) + return self + + def set_tmp_path(self, path): + """设置临时文件文件保存路径 + :param path: 下载路径 + :return: 当前对象 + """ + self._tmp_path = str(path) + return self + + def set_user_data_path(self, path): + """设置用户文件夹路径 + :param path: 用户文件夹路径 + :return: 当前对象 + """ + u = str(path) + self.set_argument('--user-data-dir', u) + self._user_data_path = u + self._auto_port = False + return self + + def set_cache_path(self, path): + """设置缓存路径 + :param path: 缓存路径 + :return: 当前对象 + """ + self.set_argument('--disk-cache-dir', str(path)) + return self + def use_system_user_path(self, on_off=True): """设置是否使用系统安装的浏览器默认用户文件夹 :param on_off: 开或关 @@ -341,19 +494,28 @@ class ChromiumOptions(object): self._system_user_path = on_off return self - def auto_port(self, on_off=True): + def auto_port(self, on_off=True, tmp_path=None): """自动获取可用端口 :param on_off: 是否开启自动获取端口号 + :param tmp_path: 临时文件保存路径,为None时保存到系统临时文件夹,on_off为False时此参数无效 :return: 当前对象 """ if on_off: - port, path = PortFinder().get_port() - self.set_paths(local_port=port, user_data_path=path) self._auto_port = True + if tmp_path: + self._tmp_path = str(tmp_path) else: self._auto_port = False return self + def existing_only(self, on_off=True): + """设置只接管已有浏览器,不自动启动新的 + :param on_off: 是否开启自动获取端口号 + :return: 当前对象 + """ + self._existing_only = on_off + return self + def save(self, path=None): """保存设置到文件 :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 @@ -378,24 +540,26 @@ class ChromiumOptions(object): else: om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) - # 设置chrome_options - attrs = ('debugger_address', 'binary_location', 'arguments', 'extensions', 'user', 'page_load_strategy', - 'auto_port', 'system_user_path') + # 设置chromium_options + attrs = ('address', 'browser_path', 'arguments', 'extensions', 'user', 'load_mode', + 'auto_port', 'system_user_path', 'existing_only', 'flags') for i in attrs: - om.set_item('chrome_options', i, self.__getattribute__(f'_{i}')) + om.set_item('chromium_options', i, self.__getattribute__(f'_{i}')) # 设置代理 om.set_item('proxies', 'http', self._proxy) om.set_item('proxies', 'https', self._proxy) # 设置路径 - om.set_item('paths', 'download_path', self._download_path) + om.set_item('paths', 'download_path', self._download_path or '') + om.set_item('paths', 'tmp_path', self._tmp_path or '') # 设置timeout - om.set_item('timeouts', 'implicit', self._timeouts['implicit']) + om.set_item('timeouts', 'base', self._timeouts['base']) om.set_item('timeouts', 'page_load', self._timeouts['pageLoad']) om.set_item('timeouts', 'script', self._timeouts['script']) + # 设置重试 + om.set_item('others', 'retry_times', self.retry_times) + om.set_item('others', 'retry_interval', self.retry_interval) # 设置prefs - eo = om.chrome_options.get('experimental_options', {}) - eo['prefs'] = self._prefs - om.set_item('chrome_options', 'experimental_options', eo) + om.set_item('chromium_options', 'prefs', self._prefs) path = str(path) om.save(path) @@ -406,12 +570,67 @@ class ChromiumOptions(object): """保存当前配置到默认ini文件""" return self.save('default') + def __repr__(self): + return f'<ChromiumOptions at {id(self)}>' + + # ---------------即将废弃-------------- + + @property + def debugger_address(self): + """返回浏览器地址,ip:port""" + return self._address + + @debugger_address.setter + def debugger_address(self, address): + """设置浏览器地址,格式ip:port""" + self.set_address(address) + + def set_page_load_strategy(self, value): + return self.set_load_mode(value) + + def set_headless(self, on_off=True): + """设置是否隐藏浏览器界面 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = 'new' if on_off else 'false' + return self.set_argument('--headless', on_off) + + def set_no_imgs(self, on_off=True): + """设置是否加载图片 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = None if on_off else False + return self.set_argument('--blink-settings=imagesEnabled=false', on_off) + + def set_no_js(self, on_off=True): + """设置是否禁用js + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = None if on_off else False + return self.set_argument('--disable-javascript', on_off) + + def set_mute(self, on_off=True): + """设置是否静音 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = None if on_off else False + return self.set_argument('--mute-audio', on_off) + class PortFinder(object): - used_port = [] + used_port = {} + lock = Lock() - def __init__(self): - self.tmp_dir = Path(gettempdir()) / 'DrissionPage' / 'TempFolder' + def __init__(self, path=None): + """ + :param path: 临时文件保存路径,为None时使用系统临时文件夹 + """ + tmp = Path(path) if path else Path(gettempdir()) / 'DrissionPage' + self.tmp_dir = tmp / 'UserTempFolder' self.tmp_dir.mkdir(parents=True, exist_ok=True) if not PortFinder.used_port: clean_folder(self.tmp_dir) @@ -420,12 +639,21 @@ class PortFinder(object): """查找一个可用端口 :return: 可以使用的端口和用户文件夹路径组成的元组 """ - for i in range(9600, 19800): - if i in PortFinder.used_port or port_is_using('127.0.0.1', i): - continue + with PortFinder.lock: + for i in range(9600, 19600): + if i in PortFinder.used_port: + continue + elif port_is_using('127.0.0.1', i): + PortFinder.used_port[i] = None + continue + path = TemporaryDirectory(dir=self.tmp_dir).name + PortFinder.used_port[i] = path + return i, path - path = TemporaryDirectory(dir=self.tmp_dir) - PortFinder.used_port.append(i) - return i, path.name + for i in range(9600, 19600): + if port_is_using('127.0.0.1', i): + continue + rmtree(PortFinder.used_port[i], ignore_errors=True) + return i, TemporaryDirectory(dir=self.tmp_dir).name raise OSError('未找到可用端口。') diff --git a/DrissionPage/_configs/chromium_options.pyi b/DrissionPage/_configs/chromium_options.pyi new file mode 100644 index 0000000..ce2a748 --- /dev/null +++ b/DrissionPage/_configs/chromium_options.pyi @@ -0,0 +1,173 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from threading import Lock +from typing import Union, Tuple, Any, Literal, Optional + + +class ChromiumOptions(object): + def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None): + self.ini_path: str = ... + self._driver_path: str = ... + self._user_data_path: str = ... + self._download_path: str = ... + self._tmp_path: str = ... + self._arguments: list = ... + self._browser_path: str = ... + self._user: str = ... + self._load_mode: str = ... + self._timeouts: dict = ... + self._proxy: str = ... + self._address: str = ... + self._extensions: list = ... + self._prefs: dict = ... + self._flags: dict = ... + self._prefs_to_del: list = ... + self.clear_file_flags: bool = ... + self._auto_port: bool = ... + self._system_user_path: bool = ... + self._existing_only: bool = ... + self._headless: bool = ... + self._retry_times: int = ... + self._retry_interval: float = ... + + @property + def download_path(self) -> str: ... + + @property + def browser_path(self) -> str: ... + + @property + def user_data_path(self) -> str: ... + + @property + def tmp_path(self) -> Optional[str]: ... + + @property + def user(self) -> str: ... + + @property + def load_mode(self) -> str: ... + + @property + def timeouts(self) -> dict: ... + + @property + def proxy(self) -> str: ... + + @property + def address(self) -> str: ... + + @property + def arguments(self) -> list: ... + + @property + def extensions(self) -> list: ... + + @property + def preferences(self) -> dict: ... + + @property + def flags(self) -> dict: ... + + @property + def system_user_path(self) -> bool: ... + + @property + def is_existing_only(self) -> bool: ... + + @property + def is_auto_port(self) -> bool: ... + + @property + def retry_times(self) -> int: ... + + @property + def retry_interval(self) -> float: ... + + def set_retry(self, times: int = None, interval: float = None) -> ChromiumOptions: ... + + def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ... + + def remove_argument(self, value: str) -> ChromiumOptions: ... + + def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def remove_extensions(self) -> ChromiumOptions: ... + + def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ... + + def remove_pref(self, arg: str) -> ChromiumOptions: ... + + def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ... + + def set_flag(self, flag: str, value: Union[int, str, bool] = None) -> ChromiumOptions: ... + + def clear_flags_in_file(self) -> ChromiumOptions: ... + + def set_timeouts(self, base: float = None, pageLoad: float = None, + script: float = None) -> ChromiumOptions: ... + + def set_user(self, user: str = 'Default') -> ChromiumOptions: ... + + def headless(self, on_off: bool = True) -> ChromiumOptions: ... + + def no_imgs(self, on_off: bool = True) -> ChromiumOptions: ... + + def no_js(self, on_off: bool = True) -> ChromiumOptions: ... + + def mute(self, on_off: bool = True) -> ChromiumOptions: ... + + def incognito(self, on_off: bool = True) -> ChromiumOptions: ... + + def set_user_agent(self, user_agent: str) -> ChromiumOptions: ... + + def set_proxy(self, proxy: str) -> ChromiumOptions: ... + + def ignore_certificate_errors(self, on_off=True) -> ChromiumOptions: ... + + def set_load_mode(self, value: Literal['normal', 'eager', 'none']) -> ChromiumOptions: ... + + def set_browser_path(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def set_local_port(self, port: Union[str, int]) -> ChromiumOptions: ... + + def set_address(self, address: str) -> ChromiumOptions: ... + + def set_download_path(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def set_tmp_path(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def set_user_data_path(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def set_cache_path(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None, + address: str = None, download_path: Union[str, Path] = None, user_data_path: Union[str, Path] = None, + cache_path: Union[str, Path] = None) -> ChromiumOptions: ... + + def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: ... + + def auto_port(self, on_off: bool = True, tmp_path: Union[str, Path] = None) -> ChromiumOptions: ... + + def existing_only(self, on_off: bool = True) -> ChromiumOptions: ... + + def save(self, path: Union[str, Path] = None) -> str: ... + + def save_to_default(self) -> str: ... + + +class PortFinder(object): + used_port: dict = ... + lock: Lock = ... + tmp_dir: Path = ... + + def __init__(self, path: Union[str, Path] = None): ... + + @staticmethod + def get_port() -> Tuple[int, str]: ... diff --git a/DrissionPage/configs/configs.ini b/DrissionPage/_configs/configs.ini similarity index 52% rename from DrissionPage/configs/configs.ini rename to DrissionPage/_configs/configs.ini index ab4c0a1..2eb7adb 100644 --- a/DrissionPage/configs/configs.ini +++ b/DrissionPage/_configs/configs.ini @@ -1,22 +1,25 @@ [paths] download_path = +tmp_path = -[chrome_options] -debugger_address = 127.0.0.1:9222 -binary_location = chrome -arguments = ['--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking'] +[chromium_options] +address = 127.0.0.1:9222 +browser_path = chrome +arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking'] extensions = [] -experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}} -page_load_strategy = normal +prefs = {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}} +flags = {} +load_mode = normal user = Default auto_port = False system_user_path = False +existing_only = False [session_options] headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': 'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7'} [timeouts] -implicit = 10 +base = 10 page_load = 30 script = 30 @@ -24,3 +27,6 @@ script = 30 http = https = +[others] +retry_times = 3 +retry_interval = 2 diff --git a/DrissionPage/configs/options_manage.py b/DrissionPage/_configs/options_manage.py similarity index 92% rename from DrissionPage/configs/options_manage.py rename to DrissionPage/_configs/options_manage.py index 5f2b3e5..703adbd 100644 --- a/DrissionPage/configs/options_manage.py +++ b/DrissionPage/_configs/options_manage.py @@ -1,7 +1,9 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from configparser import RawConfigParser, NoSectionError, NoOptionError from pathlib import Path @@ -26,7 +28,8 @@ class OptionsManager(object): self.ini_path = str(path) if not Path(self.ini_path).exists(): - raise FileNotFoundError('ini文件不存在。') + input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission' + 'pagedocs/advance/packaging/') self._conf = RawConfigParser() self._conf.read(self.ini_path, encoding='utf-8') diff --git a/DrissionPage/configs/options_manage.pyi b/DrissionPage/_configs/options_manage.pyi similarity index 86% rename from DrissionPage/configs/options_manage.pyi rename to DrissionPage/_configs/options_manage.pyi index bd431f5..805ae92 100644 --- a/DrissionPage/configs/options_manage.pyi +++ b/DrissionPage/_configs/options_manage.pyi @@ -1,7 +1,9 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from configparser import RawConfigParser from typing import Any diff --git a/DrissionPage/configs/session_options.py b/DrissionPage/_configs/session_options.py similarity index 71% rename from DrissionPage/configs/session_options.py rename to DrissionPage/_configs/session_options.py index 01cc3d3..532098a 100644 --- a/DrissionPage/configs/session_options.py +++ b/DrissionPage/_configs/session_options.py @@ -1,7 +1,9 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from pathlib import Path @@ -9,7 +11,7 @@ from requests import Session from requests.structures import CaseInsensitiveDict from .options_manage import OptionsManager -from ..commons.web import cookies_to_tuple, set_session_cookies +from .._functions.web import cookies_to_tuple, set_session_cookies class SessionOptions(object): @@ -22,6 +24,9 @@ class SessionOptions(object): """ self.ini_path = None self._download_path = None + self._timeout = 10 + self._del_set = set() # 记录要从ini文件删除的参数 + self._headers = None self._cookies = None self._auth = None @@ -34,46 +39,51 @@ class SessionOptions(object): self._stream = None self._trust_env = None self._max_redirects = None - self._timeout = 10 + self._retry_times = 3 + self._retry_interval = 2 - self._del_set = set() # 记录要从ini文件删除的参数 + if read_file is False: + return - if read_file is not False: - ini_path = str(ini_path) if ini_path else None - om = OptionsManager(ini_path) - self.ini_path = om.ini_path - options_dict = om.session_options + ini_path = str(ini_path) if ini_path else None + om = OptionsManager(ini_path) + self.ini_path = om.ini_path - if options_dict.get('headers', None) is not None: - self.set_headers(options_dict['headers']) + options = om.session_options + if options.get('headers', None) is not None: + self.set_headers(options['headers']) - if options_dict.get('cookies', None) is not None: - self.set_cookies(options_dict['cookies']) + if options.get('cookies', None) is not None: + self.set_cookies(options['cookies']) - if options_dict.get('auth', None) is not None: - self._auth = options_dict['auth'] + if options.get('auth', None) is not None: + self._auth = options['auth'] - if options_dict.get('params', None) is not None: - self._params = options_dict['params'] + if options.get('params', None) is not None: + self._params = options['params'] - if options_dict.get('verify', None) is not None: - self._verify = options_dict['verify'] + if options.get('verify', None) is not None: + self._verify = options['verify'] - if options_dict.get('cert', None) is not None: - self._cert = options_dict['cert'] + if options.get('cert', None) is not None: + self._cert = options['cert'] - if options_dict.get('stream', None) is not None: - self._stream = options_dict['stream'] + if options.get('stream', None) is not None: + self._stream = options['stream'] - if options_dict.get('trust_env', None) is not None: - self._trust_env = options_dict['trust_env'] + if options.get('trust_env', None) is not None: + self._trust_env = options['trust_env'] - if options_dict.get('max_redirects', None) is not None: - self._max_redirects = options_dict['max_redirects'] + if options.get('max_redirects', None) is not None: + self._max_redirects = options['max_redirects'] - self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) - self._timeout = om.timeouts.get('implicit', 10) - self._download_path = om.paths.get('download_path', None) + self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) + self._timeout = om.timeouts.get('base', 10) + self._download_path = om.paths.get('download_path', None) or None + + others = om.others + self._retry_times = others.get('retry_times', 3) + self._retry_interval = others.get('retry_interval', 2) # ===========须独立处理的项开始============ @property @@ -81,13 +91,12 @@ class SessionOptions(object): """返回默认下载路径属性信息""" return self._download_path - def set_paths(self, download_path=None): + def set_download_path(self, path): """设置默认下载路径 - :param download_path: 下载路径 + :param path: 下载路径 :return: 返回当前对象 """ - if download_path is not None: - self._download_path = str(download_path) + self._download_path = str(path) return self @property @@ -110,14 +119,35 @@ class SessionOptions(object): self._proxies = {} return self._proxies - def set_proxies(self, http, https=None): + def set_proxies(self, http=None, https=None): """设置proxies参数 :param http: http代理地址 :param https: https代理地址 :return: 返回当前对象 """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._sets('proxies', proxies) + self._sets('proxies', {'http': http, 'https': https}) + return self + + @property + def retry_times(self): + """返回连接失败时的重试次数""" + return self._retry_times + + @property + def retry_interval(self): + """返回连接失败时的重试间隔(秒)""" + return self._retry_interval + + def set_retry(self, times=None, interval=None): + """设置连接失败时的重试操作 + :param times: 重试次数 + :param interval: 重试间隔 + :return: 当前对象 + """ + if times is not None: + self._retry_times = times + if interval is not None: + self._retry_interval = interval return self # ===========须独立处理的项结束============ @@ -162,8 +192,7 @@ class SessionOptions(object): return self attr = attr.lower() - if attr in self._headers: - self._headers.pop(attr) + self._headers.pop(attr, None) return self @@ -351,10 +380,12 @@ class SessionOptions(object): if i not in ('download_path', 'timeout', 'proxies'): om.set_item('session_options', i, options[i]) - om.set_item('paths', 'download_path', self.download_path) - om.set_item('timeouts', 'implicit', self.timeout) + om.set_item('paths', 'download_path', self.download_path or '') + om.set_item('timeouts', 'base', self.timeout) om.set_item('proxies', 'http', self.proxies.get('http', None)) om.set_item('proxies', 'https', self.proxies.get('https', None)) + om.set_item('others', 'retry_times', self.retry_times) + om.set_item('others', 'retry_interval', self.retry_interval) for i in self._del_set: if i == 'download_path': @@ -379,25 +410,57 @@ class SessionOptions(object): return session_options_to_dict(self) def make_session(self): - """根据内在的配置生成Session对象""" + """根据内在的配置生成Session对象,ua从对象中分离""" s = Session() + h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict() - if self.headers: - s.headers = CaseInsensitiveDict(self.headers) if self.cookies: set_session_cookies(s, self.cookies) if self.adapters: for url, adapter in self.adapters: s.mount(url, adapter) - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] - for i in attrs: + for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']: attr = self.__getattribute__(i) if attr: s.__setattr__(i, attr) - return s + return s, h + + def from_session(self, session, headers=None): + """从Session对象中读取配置 + :param session: Session对象 + :param headers: headers + :return: 当前对象 + """ + self._headers = CaseInsensitiveDict(**session.headers, **headers) if headers else session.headers + self._cookies = session.cookies + self._auth = session.auth + self._proxies = session.proxies + self._hooks = session.hooks + self._params = session.params + self._verify = session.verify + self._cert = session.cert + self._stream = session.stream + self._trust_env = session.trust_env + self._max_redirects = session.max_redirects + if session.adapters: + self._adapters = [(k, i) for k, i in session.adapters.items()] + return self + + # --------------即将废弃--------------- + + def set_paths(self, download_path=None): + """设置默认下载路径 + :param download_path: 下载路径 + :return: 返回当前对象 + """ + if download_path is not None: + self._download_path = str(download_path) + return self + + def __repr__(self): + return f'<SessionOptions at {id(self)}>' def session_options_to_dict(options): diff --git a/DrissionPage/configs/session_options.pyi b/DrissionPage/_configs/session_options.pyi similarity index 80% rename from DrissionPage/configs/session_options.pyi rename to DrissionPage/_configs/session_options.pyi index 3c0ae72..cc1b4e9 100644 --- a/DrissionPage/configs/session_options.pyi +++ b/DrissionPage/_configs/session_options.pyi @@ -1,15 +1,18 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from pathlib import Path -from typing import Any, Union, Tuple +from typing import Any, Union, Tuple, Optional from requests import Session from requests.adapters import HTTPAdapter from requests.auth import HTTPBasicAuth from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict class SessionOptions(object): @@ -30,11 +33,13 @@ class SessionOptions(object): self._max_redirects: int = ... self._timeout: float = ... self._del_set: set = ... + self._retry_times: int = ... + self._retry_interval: float = ... @property def download_path(self) -> str: ... - def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ... + def set_download_path(self, path: Union[str, Path]) -> SessionOptions: ... @property def timeout(self) -> float: ... @@ -65,6 +70,14 @@ class SessionOptions(object): def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: ... + @property + def retry_times(self) -> int: ... + + @property + def retry_interval(self) -> float: ... + + def set_retry(self, times: int = None, interval: float = None) -> SessionOptions: ... + @property def hooks(self) -> dict: ... @@ -113,7 +126,9 @@ class SessionOptions(object): def as_dict(self) -> dict: ... - def make_session(self) -> Session: ... + def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ... + + def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ... def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ... diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py new file mode 100644 index 0000000..9dff0a2 --- /dev/null +++ b/DrissionPage/_elements/chromium_element.py @@ -0,0 +1,1528 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from os.path import basename, sep +from pathlib import Path +from re import search +from time import perf_counter, sleep + +from DataRecorder.tools import get_usable_path + +from .none_element import NoneElement +from .session_element import make_session_ele +from .._base.base import DrissionElement, BaseElement +from .._functions.keys import input_text_or_keys +from .._functions.locator import get_loc +from .._functions.settings import Settings +from .._functions.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll +from .._units.clicker import Clicker +from .._units.rect import ElementRect +from .._units.scroller import ElementScroller +from .._units.selector import SelectElement +from .._units.setter import ChromiumElementSetter +from .._units.states import ElementStates, ShadowRootStates +from .._units.waiter import ElementWaiter +from ..errors import (ContextLostError, ElementLostError, JavaScriptError, ElementNotFoundError, + CDPError, NoResourceError, AlertExistsError) + +__FRAME_ELEMENT__ = ('iframe', 'frame') + + +class ChromiumElement(DrissionElement): + """控制浏览器元素的对象""" + + def __init__(self, page, node_id=None, obj_id=None, backend_id=None): + """node_id、obj_id和backend_id必须至少传入一个 + :param page: 元素所在页面对象 + :param node_id: cdp中的node id + :param obj_id: js中的object id + :param backend_id: backend id + """ + super().__init__(page) + self._select = None + self._scroll = None + self._rect = None + self._set = None + self._states = None + self._pseudo = None + self._clicker = None + self._tag = None + self._wait = None + + if node_id and obj_id and backend_id: + self._node_id = node_id + self._obj_id = obj_id + self._backend_id = backend_id + elif node_id: + self._node_id = node_id + self._obj_id = self._get_obj_id(node_id) + self._backend_id = self._get_backend_id(self._node_id) + elif obj_id: + self._node_id = self._get_node_id(obj_id) + self._obj_id = obj_id + self._backend_id = self._get_backend_id(self._node_id) + elif backend_id: + self._obj_id = self._get_obj_id(backend_id=backend_id) + self._node_id = self._get_node_id(obj_id=self._obj_id) + self._backend_id = backend_id + else: + raise ElementLostError + + doc = self.run_js('return this.ownerDocument;') + self._doc_id = doc['objectId'] if doc else None + + def __repr__(self): + attrs = self.attrs + attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] + return f'<ChromiumElement {self.tag} {" ".join(attrs)}>' + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间(秒) + :return: ChromiumElement对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) + + def __eq__(self, other): + return self._backend_id == getattr(other, '_backend_id', None) + + @property + def tag(self): + """返回元素tag""" + if self._tag is None: + self._tag = self.page.run_cdp('DOM.describeNode', + backendNodeId=self._backend_id)['node']['localName'].lower() + return self._tag + + @property + def html(self): + """返回元素outerHTML文本""" + return self.page.run_cdp('DOM.getOuterHTML', backendNodeId=self._backend_id)['outerHTML'] + + @property + def inner_html(self): + """返回元素innerHTML文本""" + return self.run_js('return this.innerHTML;') + + @property + def attrs(self): + """返回元素所有attribute属性""" + try: + attrs = self.page.run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] + return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} + except CDPError: # 文档根元素不能调用此方法 + return {} + + @property + def text(self): + """返回元素内所有文本,文本已格式化""" + return get_ele_txt(make_session_ele(self.html)) + + @property + def raw_text(self): + """返回未格式化处理的元素内文本""" + return self.prop('innerText') + + # -----------------d模式独有属性------------------- + + @property + def set(self): + """返回用于设置元素属性的对象""" + if self._set is None: + self._set = ChromiumElementSetter(self) + return self._set + + @property + def states(self): + """返回用于获取元素状态的对象""" + if self._states is None: + self._states = ElementStates(self) + return self._states + + @property + def pseudo(self): + """返回用于获取伪元素内容的对象""" + if self._pseudo is None: + self._pseudo = Pseudo(self) + return self._pseudo + + @property + def rect(self): + """返回用于获取元素位置的对象""" + if self._rect is None: + self._rect = ElementRect(self) + return self._rect + + @property + def shadow_root(self): + """返回当前元素的shadow_root元素对象""" + info = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] + if not info.get('shadowRoots', None): + return None + + return ShadowRoot(self, backend_id=info['shadowRoots'][0]['backendNodeId']) + + @property + def sr(self): + """返回当前元素的shadow_root元素对象""" + return self.shadow_root + + @property + def scroll(self): + """用于滚动滚动条的对象""" + if self._scroll is None: + self._scroll = ElementScroller(self) + return self._scroll + + @property + def click(self): + """返回用于点击的对象""" + if self._clicker is None: + self._clicker = Clicker(self) + return self._clicker + + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = ElementWaiter(self.page, self) + return self._wait + + @property + def select(self): + """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" + if self._select is None: + if self.tag != 'select': + self._select = False + else: + self._select = SelectElement(self) + + return self._select + + def check(self, uncheck=False, by_js=False): + """选中或取消选中当前元素 + :param uncheck: 是否取消选中 + :param by_js: 是否用js执行 + :return: None + """ + is_checked = self.states.is_checked + if by_js: + js = None + if is_checked and uncheck: + js = 'this.checked=false' + elif not is_checked and not uncheck: + js = 'this.checked=true' + if js: + self.run_js(js) + self.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') + + else: + if (is_checked and uncheck) or (not is_checked and not uncheck): + self.click() + + def parent(self, level_or_loc=1, index=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :return: 上级元素对象 + """ + return super().parent(level_or_loc, index) + + def child(self, filter_loc='', index=1, timeout=None, ele_only=True): + """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 直接子元素或节点文本 + """ + return super().child(filter_loc, index, timeout, ele_only=ele_only) + + def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): + """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 兄弟元素或节点文本 + """ + return super().prev(filter_loc, index, timeout, ele_only=ele_only) + + def next(self, filter_loc='', index=1, timeout=None, ele_only=True): + """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 兄弟元素或节点文本 + """ + return super().next(filter_loc, index, timeout, ele_only=ele_only) + + def before(self, filter_loc='', index=1, timeout=None, ele_only=True): + """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素前面的某个元素或节点 + """ + return super().before(filter_loc, index, timeout, ele_only=ele_only) + + def after(self, filter_loc='', index=1, timeout=None, ele_only=True): + """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素后面的某个元素或节点 + """ + return super().after(filter_loc, index, timeout, ele_only=ele_only) + + def children(self, filter_loc='', timeout=None, ele_only=True): + """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 直接子元素或节点文本组成的列表 + """ + return super().children(filter_loc, timeout, ele_only=ele_only) + + def prevs(self, filter_loc='', timeout=None, ele_only=True): + """返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().prevs(filter_loc, timeout, ele_only=ele_only) + + def nexts(self, filter_loc='', timeout=None, ele_only=True): + """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().nexts(filter_loc, timeout, ele_only=ele_only) + + def befores(self, filter_loc='', timeout=None, ele_only=True): + """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素前面的元素或节点组成的列表 + """ + return super().befores(filter_loc, timeout, ele_only=ele_only) + + def afters(self, filter_loc='', timeout=None, ele_only=True): + """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间(秒) + :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 + :return: 本元素后面的元素或节点组成的列表 + """ + return super().afters(filter_loc, timeout, ele_only=ele_only) + + def attr(self, attr): + """返回一个attribute属性值 + :param attr: 属性名 + :return: 属性值文本,没有该属性返回None + """ + attrs = self.attrs + if attr == 'href': # 获取href属性时返回绝对url + link = attrs.get('href', None) + if not link or link.lower().startswith(('javascript:', 'mailto:')): + return link + else: + return make_absolute_link(link, self.prop('baseURI')) + + elif attr == 'src': + return make_absolute_link(attrs.get('src', None), self.prop('baseURI')) + + elif attr == 'text': + return self.text + + elif attr == 'innerText': + return self.raw_text + + elif attr in ('html', 'outerHTML'): + return self.html + + elif attr == 'innerHTML': + return self.inner_html + + else: + return attrs.get(attr, None) + + def remove_attr(self, attr): + """删除元素一个attribute属性 + :param attr: 属性名 + :return: None + """ + self.run_js(f'this.removeAttribute("{attr}");') + + def prop(self, prop): + """获取一个property属性值 + :param prop: 属性名 + :return: 属性值文本 + """ + try: + value = self.run_js(f'return this.{prop};') + return format_html(value) if isinstance(value, str) else value + except: + return None + + def run_js(self, script, *args, as_expr=False, timeout=None): + """对本元素执行javascript代码 + :param script: js文本,文本中用this表示本元素 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script设置 + :return: 运行的结果 + """ + return run_js(self, script, as_expr, self.page.timeouts.script if timeout is None else timeout, args) + + def run_async_js(self, script, *args, as_expr=False): + """以异步方式对本元素执行javascript代码 + :param script: js文本,文本中用this表示本元素 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :return: None + """ + run_js(self, script, as_expr, 0, args) + + def ele(self, loc_or_str, timeout=None): + """返回当前元素下级符合条件的第一个元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 + :return: ChromiumElement对象或属性、文本 + """ + return self._ele(loc_or_str, timeout, method='ele()') + + def eles(self, loc_or_str, timeout=None): + """返回当前元素下级所有符合条件的子元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 + :return: ChromiumElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, timeout=timeout, single=False) + + def s_ele(self, loc_or_str=None): + """查找第一个符合条件的元素,以SessionElement形式返回 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + if self.tag in __FRAME_ELEMENT__: + r = make_session_ele(self.inner_html, loc_or_str) + else: + r = make_session_ele(self, loc_or_str) + if isinstance(r, NoneElement): + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 's_ele()', {'loc_or_str': loc_or_str}) + else: + r.method = 's_ele()' + r.args = {'loc_or_str': loc_or_str} + return r + + def s_eles(self, loc_or_str=None): + """查找所有符合条件的元素,以SessionElement列表形式返回 + :param loc_or_str: 定位符 + :return: SessionElement或属性、文本组成的列表 + """ + if self.tag in __FRAME_ELEMENT__: + return make_session_ele(self.inner_html, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, single=False) + + def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒) + :param single: True则返回第一个,False则返回全部 + :param relative: WebPage用的表示是否相对定位的参数 + :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 + :return: ChromiumElement对象或文本、属性或其组成的列表 + """ + return find_in_chromium_ele(self, loc_or_str, single, timeout, relative=relative) + + def style(self, style, pseudo_ele=''): + """返回元素样式属性值,可获取伪元素属性值 + :param style: 样式属性名称 + :param pseudo_ele: 伪元素名称(如有) + :return: 样式属性的值 + """ + if pseudo_ele: + pseudo_ele = f', "{pseudo_ele}"' if pseudo_ele.startswith(':') else f', "::{pseudo_ele}"' + return self.run_js(f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");') + + def get_src(self, timeout=None, base64_to_bytes=True): + """返回元素src资源,base64的可转为bytes返回,其它返回str + :param timeout: 等待资源加载的超时时间(秒) + :param base64_to_bytes: 为True时,如果是base64数据,转换为bytes格式 + :return: 资源内容 + """ + timeout = self.page.timeout if timeout is None else timeout + if self.tag == 'img': # 等待图片加载完成 + js = ('return this.complete && typeof this.naturalWidth != "undefined" ' + '&& this.naturalWidth > 0 && typeof this.naturalHeight != "undefined" ' + '&& this.naturalHeight > 0') + end_time = perf_counter() + timeout + while not self.run_js(js) and perf_counter() < end_time: + sleep(.1) + + src = self.attr('src') + if src.lower().startswith('data:image'): + if base64_to_bytes: + from base64 import b64decode + return b64decode(src.split(',', 1)[-1]) + + else: + return src.split(',', 1)[-1] + + is_blob = src.startswith('blob') + result = None + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if is_blob: + js = """ + function fetchData(url) { + return new Promise((resolve, reject) => { + var xhr = new XMLHttpRequest(); + xhr.responseType = 'blob'; + xhr.onload = function() { + var reader = new FileReader(); + reader.onloadend = function() {resolve(reader.result);} + reader.readAsDataURL(xhr.response); + }; + xhr.open('GET', url, true); + xhr.send(); + }); + } + """ + try: + result = self.page.run_js(js, src) + break + except: + continue + + else: + src = self.prop('currentSrc') + if not src: + continue + + node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] + frame = node.get('frameId', None) or self.page._frame_id + + try: + result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) + break + except CDPError: + sleep(.1) + + if not result: + return None + + if is_blob: + if base64_to_bytes: + from base64 import b64decode + return b64decode(result.split(',', 1)[-1]) + else: + return result + + else: + if result['base64Encoded'] and base64_to_bytes: + from base64 import b64decode + return b64decode(result['content']) + else: + return result['content'] + + def save(self, path=None, name=None, timeout=None): + """保存图片或其它有src属性的元素的资源 + :param path: 文件保存路径,为None时保存到当前文件夹 + :param name: 文件名称,为None时从资源url获取 + :param timeout: 等待资源加载的超时时间(秒) + :return: 返回保存路径 + """ + data = self.get_src(timeout=timeout) + if not data: + raise NoResourceError + + path = path or '.' + if not name and self.tag == 'img': + src = self.attr('src') + if src.lower().startswith('data:image'): + r = search(r'data:image/(.*?);base64,', src) + name = f'img.{r.group(1)}' if r else None + name = name or basename(self.prop('currentSrc')) + path = get_usable_path(f'{path}{sep}{name}').absolute() + write_type = 'wb' if isinstance(data, bytes) else 'w' + + with open(path, write_type) as f: + f.write(data) + + return str(path) + + def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None, scroll_to_center=True): + """对当前元素截图,可保存到文件,或以字节方式返回 + :param path: 文件保存路径 + :param name: 完整文件名,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 + :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 + :param scroll_to_center: 截图前是否滚动到视口中央 + :return: 图片完整路径或字节文本 + """ + if self.tag == 'img': # 等待图片加载完成 + js = ('return this.complete && typeof this.naturalWidth != "undefined" && this.naturalWidth > 0 ' + '&& typeof this.naturalHeight != "undefined" && this.naturalHeight > 0') + end_time = perf_counter() + self.page.timeout + while not self.run_js(js) and perf_counter() < end_time: + sleep(.1) + if scroll_to_center: + self.scroll.to_see(center=True) + + left, top = self.rect.location + width, height = self.rect.size + left_top = (left, top) + right_bottom = (left + width, top + height) + if not name: + name = f'{self.tag}.jpg' + + return self.page._get_screenshot(path, name, as_bytes=as_bytes, as_base64=as_base64, full_page=False, + left_top=left_top, right_bottom=right_bottom, ele=self) + + def input(self, vals, clear=True, by_js=False): + """输入文本或组合键,也可用于输入文件路径到input元素(路径间用\n间隔) + :param vals: 文本值或按键组合 + :param clear: 输入前是否清空文本框 + :param by_js: 是否用js方式输入,不能输入组合键 + :return: None + """ + if self.tag == 'input' and self.attr('type') == 'file': + return self._set_file_input(vals) + + if by_js: + if clear: + self.clear(True) + if isinstance(vals, (list, tuple)): + vals = ''.join([str(i) for i in vals]) + self.set.prop('value', str(vals)) + self.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') + return + + if clear and vals not in ('\n', '\ue007'): + self.clear(by_js=False) + else: + self._input_focus() + + input_text_or_keys(self.page, vals) + + def clear(self, by_js=False): + """清空元素文本 + :param by_js: 是否用js方式清空,为False则用全选+del模拟输入删除 + :return: None + """ + if by_js: + self.run_js("this.value='';") + self.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') + return + + self._input_focus() + self.input(('\ue009', 'a', '\ue017'), clear=False) + + def _input_focus(self): + """输入前使元素获取焦点""" + try: + self.page.run_cdp('DOM.focus', backendNodeId=self._backend_id) + except Exception: + self.click(by_js=None) + + def focus(self): + """使元素获取焦点""" + try: + self.page.run_cdp('DOM.focus', backendNodeId=self._backend_id) + except Exception: + self.run_js('this.focus();') + + def hover(self, offset_x=None, offset_y=None): + """鼠标悬停,可接受偏移量,偏移量相对于元素左上角坐标。不传入x或y值时悬停在元素中点 + :param offset_x: 相对元素左上角坐标的x轴偏移量 + :param offset_y: 相对元素左上角坐标的y轴偏移量 + :return: None + """ + self.page.scroll.to_see(self) + x, y = offset_scroll(self, offset_x, offset_y) + self.page.run_cdp('Input.dispatchMouseEvent', type='mouseMoved', x=x, y=y, _ignore=AlertExistsError) + + def drag(self, offset_x=0, offset_y=0, duration=.5): + """拖拽当前元素到相对位置 + :param offset_x: x变化值 + :param offset_y: y变化值 + :param duration: 拖动用时,传入0即瞬间到j达 + :return: None + """ + curr_x, curr_y = self.rect.midpoint + offset_x += curr_x + offset_y += curr_y + self.drag_to((offset_x, offset_y), duration) + + def drag_to(self, ele_or_loc, duration=.5): + """拖拽当前元素,目标为另一个元素或坐标元组(x, y) + :param ele_or_loc: 另一个元素或坐标元组,坐标为元素中点的坐标 + :param duration: 拖动用时,传入0即瞬间到达 + :return: None + """ + if isinstance(ele_or_loc, ChromiumElement): + ele_or_loc = ele_or_loc.rect.midpoint + elif not isinstance(ele_or_loc, (list, tuple)): + raise TypeError('需要ChromiumElement对象或坐标。') + + self.page.actions.hold(self).move_to(ele_or_loc, duration=duration).release() + + def _get_obj_id(self, node_id=None, backend_id=None): + """根据传入node id或backend id获取js中的object id + :param node_id: cdp中的node id + :param backend_id: backend id + :return: js中的object id + """ + if node_id: + return self.page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] + else: + return self.page.run_cdp('DOM.resolveNode', backendNodeId=backend_id)['object']['objectId'] + + def _get_node_id(self, obj_id=None, backend_id=None): + """根据传入object id或backend id获取cdp中的node id + :param obj_id: js中的object id + :param backend_id: backend id + :return: cdp中的node id + """ + if obj_id: + return self.page.run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] + else: + n = self.page.run_cdp('DOM.describeNode', backendNodeId=backend_id)['node'] + self._tag = n['localName'] + return n['nodeId'] + + def _get_backend_id(self, node_id): + """根据传入node id获取backend id + :param node_id: + :return: backend id + """ + n = self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node'] + self._tag = n['localName'] + return n['backendNodeId'] + + def _get_ele_path(self, mode): + """返获取绝对的css路径或xpath路径""" + if mode == 'xpath': + txt1 = 'var tag = el.nodeName.toLowerCase();' + txt3 = ''' && sib.nodeName.toLowerCase()==tag''' + txt4 = ''' + if(nth>1){path = '/' + tag + '[' + nth + ']' + path;} + else{path = '/' + tag + path;}''' + txt5 = '''return path;''' + + elif mode == 'css': + txt1 = '' + txt3 = '' + txt4 = '''path = '>' + el.tagName.toLowerCase() + ":nth-child(" + nth + ")" + path;''' + txt5 = '''return path.substr(1);''' + + else: + raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") + + js = '''function(){ + function e(el) { + if (!(el instanceof Element)) return; + var path = ''; + while (el.nodeType === Node.ELEMENT_NODE) { + ''' + txt1 + ''' + var sib = el, nth = 0; + while (sib) { + if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} + sib = sib.previousSibling; + } + ''' + txt4 + ''' + el = el.parentNode; + } + ''' + txt5 + ''' + } + return e(this);} + ''' + t = self.run_js(js) + return f'{t}' if mode == 'css' else t + + def _set_file_input(self, files): + """对上传控件写入路径 + :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 + :return: None + """ + if isinstance(files, str): + files = files.split('\n') + files = [str(Path(i).absolute()) for i in files] + self.page.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=self._backend_id) + + # -------------即将废弃------------- + + @property + def location(self): + """返回元素左上角的绝对坐标""" + return self.rect.location + + @property + def size(self): + """返回元素宽和高组成的元组""" + return self.rect.size + + +class ShadowRoot(BaseElement): + """ShadowRoot是用于处理ShadowRoot的类,使用方法和ChromiumElement基本一致""" + + def __init__(self, parent_ele, obj_id=None, backend_id=None): + """ + :param parent_ele: shadow root 所在父元素 + :param obj_id: js中的object id + :param backend_id: cdp中的backend id + """ + super().__init__(parent_ele.page) + self.parent_ele = parent_ele + if backend_id: + self._backend_id = backend_id + self._obj_id = self._get_obj_id(backend_id) + self._node_id = self._get_node_id(self._obj_id) + elif obj_id: + self._obj_id = obj_id + self._node_id = self._get_node_id(obj_id) + self._backend_id = self._get_backend_id(self._node_id) + self._states = None + + def __repr__(self): + return f'<ShadowRoot in {self.parent_ele}>' + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间(秒) + :return: 元素对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) + + def __eq__(self, other): + return self._backend_id == getattr(other, '_backend_id', None) + + @property + def tag(self): + """返回元素标签名""" + return 'shadow-root' + + @property + def html(self): + """返回outerHTML文本""" + return f'<shadow_root>{self.inner_html}</shadow_root>' + + @property + def inner_html(self): + """返回内部的html文本""" + return self.run_js('return this.innerHTML;') + + @property + def states(self): + """返回用于获取元素状态的对象""" + if self._states is None: + self._states = ShadowRootStates(self) + return self._states + + def run_js(self, script, *args, as_expr=False, timeout=None): + """运行javascript代码 + :param script: js文本 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script设置 + :return: 运行的结果 + """ + return run_js(self, script, as_expr, self.page.timeouts.script if timeout is None else timeout, args) + + def run_async_js(self, script, *args, as_expr=False, timeout=None): + """以异步方式执行js代码 + :param script: js文本 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script设置 + :return: None + """ + from threading import Thread + Thread(target=run_js, args=(self, script, as_expr, + self.page.timeouts.script if timeout is None else timeout, args)).start() + + def parent(self, level_or_loc=1, index=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 + :return: ChromiumElement对象 + """ + if isinstance(level_or_loc, int): + loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' + + elif isinstance(level_or_loc, (tuple, str)): + loc = get_loc(level_or_loc, True) + + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}[{index}]' + + else: + raise TypeError('level_or_loc参数只能是tuple、int或str。') + + return self.parent_ele._ele(loc, timeout=0, relative=True, raise_err=False, method='parent()') + + def child(self, filter_loc='', index=1): + """返回直接子元素元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 + :return: 直接子元素或节点文本组成的列表 + """ + nodes = self.children(filter_loc=filter_loc) + if not nodes: + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) + + try: + return nodes[index - 1] + except IndexError: + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'child()', {'filter_loc': filter_loc, 'index': index}) + + def next(self, filter_loc='', index=1): + """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 + :return: ChromiumElement对象 + """ + nodes = self.nexts(filter_loc=filter_loc) + if nodes: + return nodes[index - 1] + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index}) + + def before(self, filter_loc='', index=1): + """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 + :return: 本元素前面的某个元素或节点 + """ + nodes = self.befores(filter_loc=filter_loc) + if nodes: + return nodes[index - 1] + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index}) + + def after(self, filter_loc='', index=1): + """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param index: 后面第几个查询结果,1开始 + :return: 本元素后面的某个元素或节点 + """ + nodes = self.afters(filter_loc=filter_loc) + if nodes: + return nodes[index - 1] + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc, 'index': index}) + else: + return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index}) + + def children(self, filter_loc=''): + """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :return: 直接子元素或节点文本组成的列表 + """ + if not filter_loc: + loc = '*' + else: + loc = get_loc(filter_loc, True) # 把定位符转换为xpath + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + loc = loc[1].lstrip('./') + + loc = f'xpath:./{loc}' + return self._ele(loc, single=False, relative=True) + + def nexts(self, filter_loc=''): + """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :return: ChromiumElement对象组成的列表 + """ + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./{loc}' + return self.parent_ele._ele(xpath, single=False, relative=True) + + def befores(self, filter_loc=''): + """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素前面的元素或节点组成的列表 + """ + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./preceding::{loc}' + return self.parent_ele._ele(xpath, single=False, relative=True) + + def afters(self, filter_loc=''): + """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 + 查找范围不限同级元素,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素后面的元素或节点组成的列表 + """ + eles1 = self.nexts(filter_loc) + loc = get_loc(filter_loc, True)[1].lstrip('./') + xpath = f'xpath:./following::{loc}' + return eles1 + self.parent_ele._ele(xpath, single=False, relative=True) + + def ele(self, loc_or_str, timeout=None): + """返回当前元素下级符合条件的第一个元素 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 + :return: ChromiumElement对象 + """ + return self._ele(loc_or_str, timeout, method='ele()') + + def eles(self, loc_or_str, timeout=None): + """返回当前元素下级所有符合条件的子元素 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒),默认与元素所在页面等待时间一致 + :return: ChromiumElement对象组成的列表 + """ + return self._ele(loc_or_str, timeout=timeout, single=False) + + def s_ele(self, loc_or_str=None): + """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + r = make_session_ele(self, loc_or_str) + if isinstance(r, NoneElement): + r.method = 's_ele()' + r.args = {'loc_or_str': loc_or_str} + return r + + def s_eles(self, loc_or_str): + """查找所有符合条件的元素以SessionElement列表形式返回,处理复杂页面时效率很高 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象 + """ + return make_session_ele(self, loc_or_str, single=False) + + def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间(秒) + :param single: True则返回第一个,False则返回全部 + :param relative: WebPage用的表示是否相对定位的参数 + :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 + :return: ChromiumElement对象或其组成的列表 + """ + loc = get_loc(loc_or_str, css_mode=False) + if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): + loc = loc[0], loc[1][5:] + + def do_find(): + if loc[0] == 'css selector': + if single: + nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] + if nod_id: + r = make_chromium_ele(self.page, node_id=nod_id) + return None if r is False else r + + else: + nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId'] + r = make_chromium_eles(self.page, node_ids=nod_ids, single=False) + return None if r is False else r + + else: + eles = make_session_ele(self.html).eles(loc) + if not eles: + return None + + css = [i.css_path[61:] for i in eles] + if single: + node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId'] + r = make_chromium_ele(self.page, node_id=node_id) + return None if r is False else r + else: + node_ids = [self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] + for i in css] + if 0 in node_ids: + return None + r = make_chromium_eles(self.page, node_ids=node_ids, single=False) + return None if r is False else r + + timeout = timeout if timeout is not None else self.page.timeout + end_time = perf_counter() + timeout + result = do_find() + while result is None and perf_counter() <= end_time: + sleep(.1) + result = do_find() + + if result: + return result + return NoneElement(self.page) if single else [] + + def _get_node_id(self, obj_id): + """返回元素node id""" + return self.page.run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] + + def _get_obj_id(self, back_id): + """返回元素object id""" + return self.page.run_cdp('DOM.resolveNode', backendNodeId=back_id)['object']['objectId'] + + def _get_backend_id(self, node_id): + """返回元素object id""" + r = self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node'] + self._tag = r['localName'].lower() + return r['backendNodeId'] + + +def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): + """在chromium元素中查找 + :param ele: ChromiumElement对象 + :param loc: 元素定位元组 + :param single: True则返回第一个,False则返回全部 + :param timeout: 查找元素超时时间(秒) + :param relative: WebPage用于标记是否相对定位使用 + :return: 返回ChromiumElement元素或它们组成的列表 + """ + # ---------------处理定位符--------------- + if isinstance(loc, (str, tuple)): + loc = get_loc(loc) + else: + raise ValueError(f"定位符必须为str或长度为2的tuple对象。现在是:{loc}") + + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc_str}' + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): + loc_str = f'{ele.css_path}{loc[1]}' + loc = loc[0], loc_str + + timeout = timeout if timeout is not None else ele.page.timeout + + # ---------------执行查找----------------- + if loc[0] == 'xpath': + return find_by_xpath(ele, loc[1], single, timeout, relative=relative) + + else: + return find_by_css(ele, loc[1], single, timeout) + + +def find_by_xpath(ele, xpath, single, timeout, relative=True): + """执行用xpath在元素中查找元素 + :param ele: 在此元素中查找 + :param xpath: 查找语句 + :param single: 是否只返回第一个结果 + :param timeout: 超时时间(秒) + :param relative: 是否相对定位 + :return: ChromiumElement或其组成的列表 + """ + type_txt = '9' if single else '7' + node_txt = 'this.contentDocument' if ele.tag in __FRAME_ELEMENT__ and not relative else 'this' + js = make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt) + ele.page.wait.load_complete() + + def do_find(): + res = ele.page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, + returnByValue=False, awaitPromise=True, userGesture=True) + if res['result']['type'] == 'string': + return res['result']['value'] + if 'exceptionDetails' in res: + if 'The result is not a node set' in res['result']['description']: + js1 = make_js_for_find_ele_by_xpath(xpath, '1', node_txt) + res = ele.page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js1, objectId=ele._obj_id, + returnByValue=False, awaitPromise=True, userGesture=True) + return res['result']['value'] + else: + raise SyntaxError(f'查询语句错误:\n{res}') + + if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): + return None + + if single: + r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + return None if r is False else r + + else: + # from pprint import pprint + # for i in ele.page.run_cdp('Runtime.getProperties', + # objectId=res['result']['objectId'], + # ownProperties=True)['result'][:-1]: + # pprint(i) + r = [make_chromium_ele(ele.page, obj_id=i['value']['objectId']) if i['value']['type'] == 'object' else + i['value']['value'] for i in ele.page.run_cdp('Runtime.getProperties', + objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1]] + return None if not r or r is False in r else r + + end_time = perf_counter() + timeout + result = do_find() + while result is None and perf_counter() < end_time: + sleep(.1) + result = do_find() + + if result: + return result + return NoneElement(ele.page) if single else [] + + +def find_by_css(ele, selector, single, timeout): + """执行用css selector在元素中查找元素 + :param ele: 在此元素中查找 + :param selector: 查找语句 + :param single: 是否只返回第一个结果 + :param timeout: 超时时间(秒) + :return: ChromiumElement或其组成的列表 + """ + selector = selector.replace('"', r'\"') + find_all = '' if single else 'All' + node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame', 'shadow-root') else 'this' + js = f'function(){{return {node_txt}.querySelector{find_all}("{selector}");}}' + + ele.page.wait.load_complete() + + def do_find(): + res = ele.page.run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, + returnByValue=False, awaitPromise=True, userGesture=True) + + if 'exceptionDetails' in res: + raise SyntaxError(f'查询语句错误:\n{res}') + if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): + return None + + if single: + r = make_chromium_ele(ele.page, obj_id=res['result']['objectId']) + return None if r is False else r + + else: + node_ids = [i['value']['objectId'] for i in ele.page.run_cdp('Runtime.getProperties', + objectId=res['result']['objectId'], + ownProperties=True)['result'][:-1]] + r = make_chromium_eles(ele.page, obj_ids=node_ids, single=False, ele_only=False) + return None if r is False else r + + end_time = perf_counter() + timeout + result = do_find() + while result is None and perf_counter() < end_time: + sleep(.1) + result = do_find() + + if result: + return result + return NoneElement(ele.page) if single else [] + + +def make_chromium_ele(page, node_id=None, obj_id=None): + """根据node id或object id生成相应元素对象 + :param page: ChromiumPage对象 + :param node_id: 元素的node id + :param obj_id: 元素的object id + :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False + """ + if node_id: + node = page.driver.run('DOM.describeNode', nodeId=node_id) + if 'error' in node: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + # todo: Node() + return node['node']['nodeValue'] + backend_id = node['node']['backendNodeId'] + obj_id = page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] + if 'error' in obj_id: + return False + + elif obj_id: + node = page.driver.run('DOM.describeNode', objectId=obj_id) + if 'error' in node: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + # todo: Node() + return node['node']['nodeValue'] + backend_id = node['node']['backendNodeId'] + node_id = node['node']['nodeId'] + + else: + return False + + ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=backend_id) + if ele.tag in __FRAME_ELEMENT__: + from .._pages.chromium_frame import ChromiumFrame + ele = ChromiumFrame(page, ele, node) + + return ele + + +def make_chromium_eles(page, node_ids=None, obj_ids=None, single=True, ele_only=True): + """根据node id或object id生成相应元素对象 + :param page: ChromiumPage对象 + :param node_ids: 元素的node id + :param obj_ids: 元素的object id + :param single: 是否获取但个元素 + :param ele_only: 是否只要ele + :return: ChromiumElement对象或ChromiumFrame对象,生成失败返回False + """ + nodes = [] + if node_ids: + for node_id in node_ids: + if not node_id: + return False + node = page.driver.run('DOM.describeNode', nodeId=node_id) + if 'error' in node: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + if ele_only: + continue + else: + if single: + return node['node']['nodeValue'] + else: + nodes.append(node['node']['nodeValue']) + + obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) + if 'error' in obj_id: + return False + obj_id = obj_id['object']['objectId'] + ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=node['node']['backendNodeId']) + if ele.tag in __FRAME_ELEMENT__: + from .._pages.chromium_frame import ChromiumFrame + ele = ChromiumFrame(page, ele, node) + if single: + return ele + nodes.append(ele) + + if obj_ids: + for obj_id in obj_ids: + if not obj_id: + return False + node = page.driver.run('DOM.describeNode', objectId=obj_id) + if 'error' in node: + return False + if node['node']['nodeName'] in ('#text', '#comment'): + if ele_only: + continue + else: + if single: + return node['node']['nodeValue'] + else: + nodes.append(node['node']['nodeValue']) + + ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], + backend_id=node['node']['backendNodeId']) + if ele.tag in __FRAME_ELEMENT__: + from .._pages.chromium_frame import ChromiumFrame + ele = ChromiumFrame(page, ele, node) + if single: + return ele + nodes.append(ele) + + return NoneElement(page) if single and not nodes else nodes + + +def make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt): + """生成用xpath在元素中查找元素的js文本 + :param xpath: xpath文本 + :param type_txt: 查找类型 + :param node_txt: 节点类型 + :return: js文本 + """ + for_txt = '' + + # 获取第一个元素、节点或属性 + if type_txt == '9': + return_txt = ''' +if(e.singleNodeValue==null){return null;} +else if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} +else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} +else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} +else{return e.singleNodeValue;}''' + + # 按顺序获取所有元素、节点或属性 + elif type_txt == '7': + for_txt = """ +var a=new Array(); +for(var i = 0; i <e.snapshotLength ; i++){ +if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);} +else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);} +else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);} +else{a.push(e.snapshotItem(i));}}""" + return_txt = 'return a;' + + elif type_txt == '2': + return_txt = 'return e.stringValue;' + elif type_txt == '1': + return_txt = 'return e.numberValue;' + else: + return_txt = 'return e.singleNodeValue;' + + xpath = xpath.replace(r"'", r"\'") + js = f'function(){{var e=document.evaluate(\'{xpath}\',{node_txt},null,{type_txt},null);\n{for_txt}\n{return_txt}}}' + + return js + + +def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): + """运行javascript代码 + :param page_or_ele: 页面对象或元素对象 + :param script: js文本 + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: 超时时间(秒) + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :return: js执行结果 + """ + if isinstance(page_or_ele, (ChromiumElement, ShadowRoot)): + is_page = False + page = page_or_ele.page + obj_id = page_or_ele._obj_id + else: + is_page = True + page = page_or_ele + end_time = perf_counter() + 5 + while perf_counter() < end_time: + obj_id = page_or_ele._root_id + if obj_id is not None: + break + else: + raise RuntimeError('js运行环境出错。') + + if page.states.has_alert: + raise AlertExistsError + + try: + if as_expr: + res = page.run_cdp('Runtime.evaluate', expression=script, returnByValue=False, + awaitPromise=True, userGesture=True, _timeout=timeout, _ignore=AlertExistsError) + + else: + args = args or () + if not is_js_func(script): + script = f'function(){{{script}}}' + res = page.run_cdp('Runtime.callFunctionOn', functionDeclaration=script, objectId=obj_id, + arguments=[convert_argument(arg) for arg in args], returnByValue=False, + awaitPromise=True, userGesture=True, _timeout=timeout, _ignore=AlertExistsError) + except TimeoutError: + raise TimeoutError(f'执行js超时(等待{timeout}秒)。') + except ContextLostError: + if is_page: + raise ContextLostError('页面已被刷新,请尝试等待页面加载完成再执行操作。') + else: + raise ElementLostError('原来获取到的元素对象已不在页面内。') + + if res is None and page.states.has_alert: # 存在alert的情况 + return None + + exceptionDetails = res.get('exceptionDetails') + if exceptionDetails: + raise JavaScriptError(f'\njavascript运行错误:\n{script}\n错误信息: \n{exceptionDetails}') + + try: + return parse_js_result(page, page_or_ele, res.get('result')) + except Exception: + return res + + +def parse_js_result(page, ele, result): + """解析js返回的结果""" + if 'unserializableValue' in result: + return result['unserializableValue'] + + the_type = result['type'] + + if the_type == 'object': + sub_type = result.get('subtype', None) + if sub_type == 'null': + return None + + elif sub_type == 'node': + class_name = result['className'] + if class_name == 'ShadowRoot': + return ShadowRoot(ele, obj_id=result['objectId']) + elif class_name == 'HTMLDocument': + return result + else: + r = make_chromium_ele(page, obj_id=result['objectId']) + if r is False: + raise ElementLostError + return r + + elif sub_type == 'array': + r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], ownProperties=True)['result'] + return [parse_js_result(page, ele, result=i['value']) for i in r[:-1]] + + elif 'objectId' in result and result['className'].lower() == 'object': # dict + r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], ownProperties=True)['result'] + return {i['name']: parse_js_result(page, ele, result=i['value']) for i in r} + + else: + return result['value'] + + elif the_type == 'undefined': + return None + + else: + return result['value'] + + +def convert_argument(arg): + """把参数转换成js能够接收的形式""" + if isinstance(arg, ChromiumElement): + return {'objectId': arg._obj_id} + + elif isinstance(arg, (int, float, str, bool)): + return {'value': arg} + + from math import inf + if arg == inf: + return {'unserializableValue': 'Infinity'} + elif arg == -inf: + return {'unserializableValue': '-Infinity'} + + raise TypeError(f'不支持参数{arg}的类型:{type(arg)}') + + +class Pseudo(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + @property + def before(self): + """返回当前元素的::before伪元素内容""" + return self._ele.style('content', 'before') + + @property + def after(self): + """返回当前元素的::after伪元素内容""" + return self._ele.style('content', 'after') diff --git a/DrissionPage/_elements/chromium_element.pyi b/DrissionPage/_elements/chromium_element.pyi new file mode 100644 index 0000000..fa341ca --- /dev/null +++ b/DrissionPage/_elements/chromium_element.pyi @@ -0,0 +1,358 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union, Tuple, List, Any, Literal + +from .none_element import NoneElement +from .._base.base import DrissionElement, BaseElement +from .._elements.session_element import SessionElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame +from .._pages.chromium_page import ChromiumPage +from .._pages.chromium_tab import ChromiumTab +from .._pages.web_page import WebPage +from .._units.clicker import Clicker +from .._units.rect import ElementRect +from .._units.scroller import ElementScroller +from .._units.selector import SelectElement +from .._units.setter import ChromiumElementSetter +from .._units.states import ShadowRootStates, ElementStates +from .._units.waiter import ElementWaiter + +PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True] + + +class ChromiumElement(DrissionElement): + + def __init__(self, page: ChromiumBase, node_id: int = None, obj_id: str = None, backend_id: int = None): + self._tag: str = ... + self.page: Union[ChromiumPage, WebPage] = ... + self._node_id: int = ... + self._obj_id: str = ... + self._backend_id: int = ... + self._doc_id: str = ... + self._scroll: ElementScroller = ... + self._clicker: Clicker = ... + self._select: SelectElement = ... + self._wait: ElementWaiter = ... + self._rect: ElementRect = ... + self._set: ChromiumElementSetter = ... + self._states: ElementStates = ... + self._pseudo: Pseudo = ... + + def __repr__(self) -> str: ... + + def __call__(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + def __eq__(self, other: ChromiumElement) -> bool: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + @property + def attrs(self) -> dict: ... + + @property + def text(self) -> str: ... + + @property + def raw_text(self) -> str: ... + + # -----------------d模式独有属性------------------- + + @property + def set(self) -> ChromiumElementSetter: ... + + @property + def states(self) -> ElementStates: ... + + @property + def rect(self) -> ElementRect: ... + + @property + def pseudo(self) -> Pseudo: ... + + @property + def shadow_root(self) -> Union[None, ShadowRoot]: ... + + @property + def sr(self) -> Union[None, ShadowRoot]: ... + + @property + def scroll(self) -> ElementScroller: ... + + @property + def click(self) -> Clicker: ... + + def parent(self, + level_or_loc: Union[tuple, str, int] = 1, + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def child(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ... + + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ... + + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ... + + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ... + + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ... + + def children(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + @property + def wait(self) -> ElementWaiter: ... + + @property + def select(self) -> SelectElement: ... + + def check(self, uncheck: bool = False, by_js: bool = False) -> None: ... + + def attr(self, attr: str) -> Union[str, None]: ... + + def remove_attr(self, attr: str) -> None: ... + + def prop(self, prop: str) -> Union[str, int, None]: ... + + def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ... + + def run_async_js(self, script: str, *args, as_expr: bool = False) -> None: ... + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[ChromiumElement]: ... + + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[SessionElement]: ... + + def _find_elements(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + single: bool = True, + relative: bool = False, + raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, NoneElement, + List[Union[ChromiumElement, ChromiumFrame]]]: ... + + def style(self, style: str, pseudo_ele: str = '') -> str: ... + + def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ... + + def save(self, path: [str, bool] = None, name: str = None, timeout: float = None) -> str: ... + + def get_screenshot(self, + path: [str, Path] = None, + name: str = None, + as_bytes: PIC_TYPE = None, + as_base64: PIC_TYPE = None, + scroll_to_center: bool = True) -> Union[str, bytes]: ... + + def input(self, vals: Any, clear: bool = True, by_js: bool = False) -> None: ... + + def _set_file_input(self, files: Union[str, list, tuple]) -> None: ... + + def clear(self, by_js: bool = False) -> None: ... + + def _input_focus(self) -> None: ... + + def focus(self) -> None: ... + + def hover(self, offset_x: int = None, offset_y: int = None) -> None: ... + + def drag(self, offset_x: int = 0, offset_y: int = 0, duration: float = 0.5) -> None: ... + + def drag_to(self, ele_or_loc: Union[tuple, ChromiumElement], duration: float = 0.5) -> None: ... + + def _get_obj_id(self, node_id: int = None, backend_id: int = None) -> str: ... + + def _get_node_id(self, obj_id: str = None, backend_id: int = None) -> int: ... + + def _get_backend_id(self, node_id: int) -> int: ... + + def _get_ele_path(self, mode: str) -> str: ... + + +class ShadowRoot(BaseElement): + + def __init__(self, parent_ele: ChromiumElement, obj_id: str = None, backend_id: int = None): + self._obj_id: str = ... + self._node_id: int = ... + self._backend_id: int = ... + self.page: ChromiumPage = ... + self.parent_ele: ChromiumElement = ... + self._states: ShadowRootStates = ... + + def __repr__(self) -> str: ... + + def __call__(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> ChromiumElement: ... + + def __eq__(self, other: ShadowRoot) -> bool: ... + + @property + def states(self) -> ShadowRootStates: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ... + + def run_async_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> None: ... + + def parent(self, level_or_loc: Union[str, int] = 1, index: int = 1) -> ChromiumElement: ... + + def child(self, filter_loc: Union[tuple, str] = '', + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def next(self, filter_loc: Union[tuple, str] = '', + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def before(self, filter_loc: Union[tuple, str] = '', + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def after(self, filter_loc: Union[tuple, str] = '', + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def children(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... + + def nexts(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... + + def befores(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... + + def afters(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ... + + def ele(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + def eles(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[ChromiumElement]: ... + + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + + def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, + single: bool = True, relative: bool = False, raise_err: bool = None) \ + -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement, + ChromiumFrame, str]]]: ... + + def _get_node_id(self, obj_id: str) -> int: ... + + def _get_obj_id(self, back_id: int) -> str: ... + + def _get_backend_id(self, node_id: int) -> int: ... + + +def find_in_chromium_ele(ele: ChromiumElement, loc: Union[str, Tuple[str, str]], + single: bool = True, timeout: float = None, relative: bool = True) \ + -> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ... + + +def find_by_xpath(ele: ChromiumElement, xpath: str, single: bool, timeout: float, + relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... + + +def find_by_css(ele: ChromiumElement, selector: str, single: bool, + timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... + + +def make_chromium_ele(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], + node_id: int = ..., + obj_id: str = ...) -> Union[ChromiumElement, ChromiumFrame, str]: ... + + +def make_chromium_eles(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], + node_ids: Union[tuple, list] = None, + obj_ids: Union[tuple, list] = None, + single: bool = True, + ele_only: bool = True) -> Union[ChromiumElement, ChromiumFrame, NoneElement, +List[Union[ChromiumElement, ChromiumFrame]]]: ... + + +def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... + + +def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], script: str, + as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ... + + +def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ... + + +def convert_argument(arg: Any) -> dict: ... + + +class Pseudo(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + @property + def before(self) -> str: ... + + @property + def after(self) -> str: ... diff --git a/DrissionPage/_elements/none_element.py b/DrissionPage/_elements/none_element.py new file mode 100644 index 0000000..fac5415 --- /dev/null +++ b/DrissionPage/_elements/none_element.py @@ -0,0 +1,49 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from ..errors import ElementNotFoundError + + +class NoneElement(object): + def __init__(self, page=None, method=None, args=None): + if page: + self._none_ele_value = page._none_ele_value + self._none_ele_return_value = page._none_ele_return_value + else: + self._none_ele_value = None + self._none_ele_return_value = False + self.method = method + self.args = args + + def __call__(self, *args, **kwargs): + if not self._none_ele_return_value: + raise ElementNotFoundError(None, self.method, self.args) + else: + return self + + def __getattr__(self, item): + if not self._none_ele_return_value: + raise ElementNotFoundError(None, self.method, self.args) + elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before', + 'after', 'get_frame', 'shadow_root', 'sr'): + return self + else: + if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html', + 'attrs', 'text', 'raw_text'): + return self._none_ele_value + else: + raise ElementNotFoundError(None, self.method, self.args) + + def __eq__(self, other): + if other is None: + return True + + def __bool__(self): + return False + + def __repr__(self): + return 'None' diff --git a/DrissionPage/session_element.py b/DrissionPage/_elements/session_element.py similarity index 93% rename from DrissionPage/session_element.py rename to DrissionPage/_elements/session_element.py index e7d375b..cc93772 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -1,18 +1,20 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from html import unescape -from re import match, DOTALL +from re import match, sub, DOTALL from lxml.etree import tostring from lxml.html import HtmlElement, fromstring -from .base import DrissionElement, BasePage, BaseElement -from .commons.constants import NoneElement -from .commons.locator import get_loc -from .commons.web import get_ele_txt, make_absolute_link +from .none_element import NoneElement +from .._base.base import DrissionElement, BasePage, BaseElement +from .._functions.locator import get_loc +from .._functions.web import get_ele_txt, make_absolute_link class SessionElement(DrissionElement): @@ -43,6 +45,9 @@ class SessionElement(DrissionElement): """ return self.ele(loc_or_str) + def __eq__(self, other): + return self.xpath == getattr(other, 'xpath', None) + @property def tag(self): """返回元素类型""" @@ -194,10 +199,10 @@ class SessionElement(DrissionElement): return link else: # 其它情况直接返回绝对url - return make_absolute_link(link, self.page) + return make_absolute_link(link, self.page.url) elif attr == 'src': - return make_absolute_link(self.inner_ele.get('src'), self.page) + return make_absolute_link(self.inner_ele.get('src'), self.page.url) elif attr == 'text': return self.text @@ -220,7 +225,7 @@ class SessionElement(DrissionElement): :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str) + return self._ele(loc_or_str, method='ele()') def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -235,7 +240,7 @@ class SessionElement(DrissionElement): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_str) + return self._ele(loc_or_str, method='s_ele()') def s_eles(self, loc_or_str): """返回当前元素下级所有符合条件的子元素、属性或节点文本 @@ -266,14 +271,14 @@ class SessionElement(DrissionElement): while ele: if mode == 'css': brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) - path_str = f'>:nth-child({brothers + 1}){path_str}' + path_str = f'>{ele.tag}:nth-child({brothers + 1}){path_str}' else: brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' ele = ele.parent() - return f':root{path_str[1:]}' if mode == 'css' else path_str + return f'{path_str[1:]}' if mode == 'css' else path_str def make_session_ele(html_or_ele, loc=None, single=True): @@ -334,23 +339,26 @@ def make_session_ele(html_or_ele, loc=None, single=True): page = html_or_ele.page xpath = html_or_ele.xpath # ChromiumElement,兼容传入的元素在iframe内的情况 - html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele.ids.doc_id)['outerHTML'] \ - if html_or_ele.ids.doc_id else html_or_ele.page.html + html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML'] \ + if html_or_ele._doc_id else html_or_ele.page.html html_or_ele = fromstring(html) html_or_ele = html_or_ele.xpath(xpath)[0] # 各种页面对象 elif isinstance(html_or_ele, BasePage): page = html_or_ele - html_or_ele = fromstring(html_or_ele.html) + html = html_or_ele.html + if html.startswith('<?xml '): + html = sub(r'^<\?xml.*?>', '', html) + html_or_ele = fromstring(html) # 直接传入html文本 elif isinstance(html_or_ele, str): page = None html_or_ele = fromstring(html_or_ele) - # ShadowRootElement, ChromiumShadowRoot, ChromiumFrame - elif isinstance(html_or_ele, BaseElement) or the_type.endswith(".ChromiumFrame'>"): + # ShadowRoot + elif isinstance(html_or_ele, BaseElement): page = html_or_ele.page html_or_ele = fromstring(html_or_ele.html) @@ -375,7 +383,7 @@ def make_session_ele(html_or_ele, loc=None, single=True): elif isinstance(ele, str): return ele else: - return NoneElement() + return NoneElement(page) else: # 返回全部 return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] diff --git a/DrissionPage/_elements/session_element.pyi b/DrissionPage/_elements/session_element.pyi new file mode 100644 index 0000000..5c82e6f --- /dev/null +++ b/DrissionPage/_elements/session_element.pyi @@ -0,0 +1,145 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, List, Tuple, Optional + +from lxml.html import HtmlElement + +from .none_element import NoneElement +from .._base.base import DrissionElement, BaseElement +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame +from .._pages.session_page import SessionPage + + +class SessionElement(DrissionElement): + + def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None): + self._inner_ele: HtmlElement = ... + self.page: SessionPage = ... + + @property + def inner_ele(self) -> HtmlElement: ... + + def __repr__(self) -> str: ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[SessionElement, NoneElement]: ... + + def __eq__(self, other: SessionElement) -> bool: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + @property + def attrs(self) -> dict: ... + + @property + def text(self) -> str: ... + + @property + def raw_text(self) -> str: ... + + def parent(self, + level_or_loc: Union[tuple, str, int] = 1, + index: int = 1) -> Union[SessionElement, NoneElement]: ... + + def child(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ... + + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ... + + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ... + + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ... + + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ... + + def children(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[SessionElement, str]]: ... + + def attr(self, attr: str) -> Optional[str]: ... + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[SessionElement, NoneElement]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[SessionElement]: ... + + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... + + def s_eles(self, + loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + + def _find_elements(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + single: bool = True, + relative: bool = False, + raise_err: bool = None) \ + -> Union[SessionElement, NoneElement, List[SessionElement]]: ... + + def _get_ele_path(self, mode: str) -> str: ... + + +def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, +ChromiumBase], + loc: Union[str, Tuple[str, str]] = None, + single: bool = True) -> Union[ + SessionElement, NoneElement, List[SessionElement]]: ... diff --git a/DrissionPage/_functions/browser.py b/DrissionPage/_functions/browser.py new file mode 100644 index 0000000..113a7f9 --- /dev/null +++ b/DrissionPage/_functions/browser.py @@ -0,0 +1,354 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from json import load, dump, JSONDecodeError +from os import popen +from pathlib import Path +from platform import system +from re import search +from subprocess import Popen, DEVNULL +from tempfile import gettempdir +from time import perf_counter, sleep + +from requests import get as requests_get + +from .tools import port_is_using +from .._configs.options_manage import OptionsManager +from ..errors import BrowserConnectError + + +def connect_browser(option): + """连接或启动浏览器 + :param option: ChromiumOptions对象 + :return: 返回是否接管的浏览器 + """ + address = option.address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') + chrome_path = option.browser_path + + ip, port = address.split(':') + if ip != '127.0.0.1' or port_is_using(ip, port) or option.is_existing_only: + test_connect(ip, port) + option._headless = False + for i in option.arguments: + if i.startswith('--headless') and not i.endswith('=false'): + option._headless = True + break + return True + + # ----------创建浏览器进程---------- + args = get_launch_args(option) + set_prefs(option) + set_flags(option) + try: + _run_browser(port, chrome_path, args) + + # 传入的路径找不到,主动在ini文件、注册表、系统变量中找 + except FileNotFoundError: + chrome_path = get_chrome_path() + + if not chrome_path: + raise FileNotFoundError('无法找到浏览器可执行文件路径,请手动配置。') + + _run_browser(port, chrome_path, args) + + test_connect(ip, port) + return False + + +def get_launch_args(opt): + """从ChromiumOptions获取命令行启动参数 + :param opt: ChromiumOptions + :return: 启动参数列表 + """ + # ----------处理arguments----------- + result = set() + has_user_path = False + headless = None + for i in opt.arguments: + if i.startswith(('--load-extension=', '--remote-debugging-port=')): + continue + elif i.startswith('--user-data-dir') and not opt.system_user_path: + result.add(f'--user-data-dir={Path(i[16:]).absolute()}') + has_user_path = True + continue + elif i.startswith('--headless'): + if i == '--headless=false': + headless = False + continue + elif i == '--headless': + i = '--headless=new' + headless = True + else: + headless = True + + result.add(i) + + if not has_user_path and not opt.system_user_path: + port = opt.address.split(':')[-1] if opt.address else '0' + p = Path(opt.tmp_path) if opt.tmp_path else Path(gettempdir()) / 'DrissionPage' + path = p / f'userData_{port}' + path.mkdir(parents=True, exist_ok=True) + opt.set_user_data_path(path) + result.add(f'--user-data-dir={path}') + + if headless is None and system().lower() == 'linux': + from os import popen + r = popen('systemctl list-units | grep graphical.target') + if 'graphical.target' not in r.read(): + headless = True + result.add('--headless=new') + + result = list(result) + opt._headless = headless + + # ----------处理插件extensions------------- + ext = [str(Path(e).absolute()) for e in opt.extensions] + if ext: + ext = ','.join(set(ext)) + ext = f'--load-extension={ext}' + result.append(ext) + + return result + + +def set_prefs(opt): + """处理启动配置中的prefs项,目前只能对已存在文件夹配置 + :param opt: ChromiumOptions + :return: None + """ + if not opt.user_data_path or (not opt.preferences and not opt._prefs_to_del): + return + prefs = opt.preferences + del_list = opt._prefs_to_del + + user = 'Default' + for arg in opt.arguments: + if arg.startswith('--profile-directory'): + user = arg.split('=')[-1].strip() + break + + prefs_file = Path(opt.user_data_path) / user / 'Preferences' + + if not prefs_file.exists(): + prefs_file.parent.mkdir(parents=True, exist_ok=True) + with open(prefs_file, 'w') as f: + f.write('{}') + + with open(prefs_file, "r", encoding='utf-8') as f: + try: + prefs_dict = load(f) + except JSONDecodeError: + prefs_dict = {} + + for pref in prefs: + value = prefs[pref] + pref = pref.split('.') + _make_leave_in_dict(prefs_dict, pref, 0, len(pref)) + _set_value_to_dict(prefs_dict, pref, value) + + for pref in del_list: + _remove_arg_from_dict(prefs_dict, pref) + + with open(prefs_file, 'w', encoding='utf-8') as f: + dump(prefs_dict, f) + + +def set_flags(opt): + """处理启动配置中的prefs项,目前只能对已存在文件夹配置 + :param opt: ChromiumOptions + :return: None + """ + if not opt.user_data_path or (not opt.clear_file_flags and not opt.flags): + return + + state_file = Path(opt.user_data_path) / 'Local State' + + if not state_file.exists(): + state_file.parent.mkdir(parents=True, exist_ok=True) + with open(state_file, 'w') as f: + f.write('{}') + + with open(state_file, "r", encoding='utf-8') as f: + try: + states_dict = load(f) + except JSONDecodeError: + states_dict = {} + flags_list = [] if opt.clear_file_flags else states_dict.setdefault( + 'browser', {}).setdefault('enabled_labs_experiments', []) + flags_dict = {} + for i in flags_list: + f = str(i).split('@', 1) + flags_dict[f[0]] = None if len(f) == 1 else f[1] + + for k, i in opt.flags.items(): + flags_dict[k] = i + + states_dict['browser']['enabled_labs_experiments'] = [f'{k}@{i}' if i else k for k, i in flags_dict.items()] + + with open(state_file, 'w', encoding='utf-8') as f: + dump(states_dict, f) + + +def test_connect(ip, port, timeout=30): + """测试浏览器是否可用 + :param ip: 浏览器ip + :param port: 浏览器端口 + :param timeout: 超时时间(秒) + :return: None + """ + end_time = perf_counter() + timeout + while perf_counter() < end_time: + try: + tabs = requests_get(f'http://{ip}:{port}/json', timeout=10, headers={'Connection': 'close'}, + proxies={'http': None, 'https': None}).json() + for tab in tabs: + if tab['type'] in ('page', 'webview'): + return + except Exception: + sleep(.2) + + raise BrowserConnectError(f'\n{ip}:{port}浏览器无法链接。\n请确认:\n1、该端口为浏览器\n' + f'2、已添加\'--remote-debugging-port={port}\'启动项\n' + f'3、用户文件夹没有和已打开的浏览器冲突\n' + f'4、如为无界面系统,请添加\'--headless=new\'参数\n' + f'5、如果是Linux系统,可能还要添加\'--no-sandbox\'启动参数\n' + f'可使用ChromiumOptions设置端口和用户文件夹路径。') + + +def _run_browser(port, path: str, args) -> Popen: + """创建chrome进程 + :param port: 端口号 + :param path: 浏览器路径 + :param args: 启动参数 + :return: 进程对象 + """ + p = Path(path) + p = str(p / 'chrome') if p.is_dir() else str(path) + arguments = [p, f'--remote-debugging-port={port}'] + arguments.extend(args) + try: + return Popen(arguments, shell=False, stdout=DEVNULL, stderr=DEVNULL) + except FileNotFoundError: + raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。') + + +def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None: + """把prefs中a.b.c形式的属性转为a['b']['c']形式 + :param target_dict: 要处理的字典 + :param src: 属性层级列表[a, b, c] + :param num: 当前处理第几个 + :param end: src长度 + :return: None + """ + if num == end: + return + if src[num] not in target_dict: + target_dict[src[num]] = {} + num += 1 + _make_leave_in_dict(target_dict[src[num - 1]], src, num, end) + + +def _set_value_to_dict(target_dict: dict, src: list, value) -> None: + """把a.b.c形式的属性的值赋值到a['b']['c']形式的字典中 + :param target_dict: 要处理的字典 + :param src: 属性层级列表[a, b, c] + :param value: 属性值 + :return: None + """ + src = "']['".join(src) + src = f"target_dict['{src}']=value" + exec(src) + + +def _remove_arg_from_dict(target_dict: dict, arg: str) -> None: + """把a.b.c形式的属性从字典中删除 + :param target_dict: 要处理的字典 + :param arg: 层级属性,形式'a.b.c' + :return: None + """ + args = arg.split('.') + args = [f"['{i}']" for i in args] + src = ''.join(args) + src = f"target_dict{src}" + try: + exec(src) + src = ''.join(args[:-1]) + src = f"target_dict{src}.pop({args[-1][1:-1]})" + exec(src) + except: + pass + + +def get_chrome_path(): + """从ini文件或系统变量中获取chrome可执行文件的路径""" + # -----------从ini文件中获取-------------- + path = OptionsManager().chromium_options.get('browser_path', None) + if path and Path(path).is_file(): + return str(path) + + # -----------使用which获取----------- + from shutil import which + path = (which('chrome') or which('chromium') or which('google-chrome') or which('google-chrome-stable') + or which('google-chrome-unstable') or which('google-chrome-beta')) + if path: + return path + + # -----------从MAC和Linux默认路径获取----------- + from platform import system + sys = system().lower() + if sys in ('macos', 'darwin'): + p = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' + return p if Path(p).exists() else None + + elif sys == 'linux': + paths = ('/usr/bin/google-chrome', '/opt/google/chrome/google-chrome', + '/user/lib/chromium-browser/chromium-browser') + for p in paths: + if Path(p).exists(): + return p + return None + + elif sys != 'windows': + return None + + # -----------从注册表中获取-------------- + import winreg + try: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', + reserved=0, access=winreg.KEY_READ) + k = winreg.EnumValue(key, 0) + winreg.CloseKey(key) + + return k[1] + + except FileNotFoundError: + pass + + # -----------从系统变量中获取-------------- + try: + paths = popen('set path').read().lower() + except: + return None + r = search(r'[^;]*chrome[^;]*', paths) + + if r: + path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe' + + if path.exists(): + return str(path) + + paths = paths.split(';') + + for path in paths: + path = Path(path) / 'chrome.exe' + + try: + if path.exists(): + return str(path) + except OSError: + pass diff --git a/DrissionPage/_functions/browser.pyi b/DrissionPage/_functions/browser.pyi new file mode 100644 index 0000000..6285a34 --- /dev/null +++ b/DrissionPage/_functions/browser.pyi @@ -0,0 +1,28 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union + +from .._configs.chromium_options import ChromiumOptions + + +def connect_browser(option: ChromiumOptions) -> bool: ... + + +def get_launch_args(opt: ChromiumOptions) -> list: ... + + +def set_prefs(opt: ChromiumOptions) -> None: ... + + +def set_flags(opt: ChromiumOptions) -> None: ... + + +def test_connect(ip: str, port: Union[int, str], timeout: float = 30) -> None: ... + + +def get_chrome_path() -> Union[str, None]: ... diff --git a/DrissionPage/commons/by.py b/DrissionPage/_functions/by.py similarity index 64% rename from DrissionPage/commons/by.py rename to DrissionPage/_functions/by.py index 899a183..e494189 100644 --- a/DrissionPage/commons/by.py +++ b/DrissionPage/_functions/by.py @@ -1,4 +1,12 @@ # -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" + + class By: ID = 'id' XPATH = 'xpath' diff --git a/DrissionPage/commons/cli.py b/DrissionPage/_functions/cli.py similarity index 52% rename from DrissionPage/commons/cli.py rename to DrissionPage/_functions/cli.py index f9507db..0141c60 100644 --- a/DrissionPage/commons/cli.py +++ b/DrissionPage/_functions/cli.py @@ -1,7 +1,15 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" from click import command, option -from ..chromium_page import ChromiumPage -from ..easy_set import set_paths, configs_to_here as ch +from .._functions.tools import configs_to_here as ch +from .._configs.chromium_options import ChromiumOptions +from .._pages.chromium_page import ChromiumPage @command() @@ -24,5 +32,22 @@ def main(set_browser_path, set_user_path, configs_to_here, launch_browser): ChromiumPage(port) +def set_paths(browser_path=None, user_data_path=None): + """快捷的路径设置函数 + :param browser_path: 浏览器可执行文件路径 + :param user_data_path: 用户数据路径 + :return: None + """ + co = ChromiumOptions() + + if browser_path is not None: + co.set_browser_path(browser_path) + + if user_data_path is not None: + co.set_user_data_path(user_data_path) + + co.save() + + if __name__ == '__main__': main() diff --git a/DrissionPage/commons/keys.py b/DrissionPage/_functions/keys.py similarity index 90% rename from DrissionPage/commons/keys.py rename to DrissionPage/_functions/keys.py index e31590e..e4bebea 100644 --- a/DrissionPage/commons/keys.py +++ b/DrissionPage/_functions/keys.py @@ -1,9 +1,11 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from typing import List, Tuple, Dict +from ..errors import AlertExistsError class Keys: @@ -339,10 +341,10 @@ modifierBit = {'\ue00a': 1, '\ue008': 8} -def keys_to_typing(value) -> Tuple[int, str]: +def keys_to_typing(value): """把要输入的内容连成字符串,去掉其中 ctrl 等键。 返回的modifier表示是否有按下组合键""" - typing: List[str] = [] + typing = [] modifier = 0 for val in value: if val in ('\ue009', '\ue008', '\ue00a', '\ue03d'): @@ -359,7 +361,7 @@ def keys_to_typing(value) -> Tuple[int, str]: return modifier, ''.join(typing) -def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C901 +def keyDescriptionForString(_modifiers, keyString): # noqa: C901 shift = _modifiers & 8 description = {'key': '', 'keyCode': 0, @@ -367,7 +369,7 @@ def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C 'text': '', 'location': 0} - definition: Dict = keyDefinitions.get(keyString) # type: ignore + definition = keyDefinitions.get(keyString) # type: ignore if not definition: raise ValueError(f'未知按键:{keyString}') @@ -399,3 +401,50 @@ def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C description['text'] = '' return description + + +def send_key(page, modifier, key): + """发送一个字,在键盘中的字符触发按键,其它直接发送文本""" + if key not in keyDefinitions: + page.run_cdp('Input.insertText', text=key, _ignore=AlertExistsError) + + else: + description = keyDescriptionForString(modifier, key) + text = description['text'] + data = {'type': 'keyDown' if text else 'rawKeyDown', + 'modifiers': modifier, + 'windowsVirtualKeyCode': description['keyCode'], + 'code': description['code'], + 'key': description['key'], + 'text': text, + 'autoRepeat': False, + 'unmodifiedText': text, + 'location': description['location'], + 'isKeypad': description['location'] == 3, + '_ignore': AlertExistsError} + + page.run_cdp('Input.dispatchKeyEvent', **data) + data['type'] = 'keyUp' + page.run_cdp('Input.dispatchKeyEvent', **data) + + +def input_text_or_keys(page, text_or_keys): + """输入文本,也可输入组合键,组合键用tuple形式输入 + :param page: ChromiumBase对象 + :param text_or_keys: 文本值或按键组合 + :return: self + """ + if not isinstance(text_or_keys, (tuple, list)): + text_or_keys = (str(text_or_keys),) + modifier, text_or_keys = keys_to_typing(text_or_keys) + + if modifier != 0: # 包含修饰符 + for key in text_or_keys: + send_key(page, modifier, key) + return + + if text_or_keys.endswith(('\n', '\ue007')): + page.run_cdp('Input.insertText', text=text_or_keys[:-1], _ignore=AlertExistsError) + send_key(page, modifier, '\n') + else: + page.run_cdp('Input.insertText', text=text_or_keys, _ignore=AlertExistsError) diff --git a/DrissionPage/_functions/keys.pyi b/DrissionPage/_functions/keys.pyi new file mode 100644 index 0000000..896bc42 --- /dev/null +++ b/DrissionPage/_functions/keys.pyi @@ -0,0 +1,99 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Tuple, Dict, Union, Any + +from .._pages.chromium_base import ChromiumBase + + +class Keys: + """特殊按键""" + + NULL: str + CANCEL: str + HELP: str + BACKSPACE: str + BACK_SPACE: str + TAB: str + CLEAR: str + RETURN: str + ENTER: str + SHIFT: str + LEFT_SHIFT: str + CONTROL: str + CTRL: str + LEFT_CONTROL: str + ALT: str + LEFT_ALT: str + PAUSE: str + ESCAPE: str + SPACE: str + PAGE_UP: str + PAGE_DOWN: str + END: str + HOME: str + LEFT: str + ARROW_LEFT: str + UP: str + ARROW_UP: str + RIGHT: str + ARROW_RIGHT: str + DOWN: str + ARROW_DOWN: str + INSERT: str + DELETE: str + DEL: str + SEMICOLON: str + EQUALS: str + + NUMPAD0: str + NUMPAD1: str + NUMPAD2: str + NUMPAD3: str + NUMPAD4: str + NUMPAD5: str + NUMPAD6: str + NUMPAD7: str + NUMPAD8: str + NUMPAD9: str + MULTIPLY: str + ADD: str + SUBTRACT: str + DECIMAL: str + DIVIDE: str + + F1: str + F2: str + F3: str + F4: str + F5: str + F6: str + F7: str + F8: str + F9: str + F10: str + F11: str + F12: str + + META: str + COMMAND: str + + +keyDefinitions: dict = ... +modifierBit: dict = ... + + +def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]: ... + + +def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: ... + + +def send_key(page: ChromiumBase, modifier: int, key: str) -> None: ... + + +def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None: ... diff --git a/DrissionPage/commons/locator.py b/DrissionPage/_functions/locator.py similarity index 51% rename from DrissionPage/commons/locator.py rename to DrissionPage/_functions/locator.py index 9daeb16..ac65759 100644 --- a/DrissionPage/commons/locator.py +++ b/DrissionPage/_functions/locator.py @@ -1,23 +1,32 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from re import split from .by import By -def get_loc(loc, translate_css=False): - """接收selenium定位元组或本库定位语法,转换为标准定位元组,可翻译css selector为xpath - :param loc: selenium定位元组或本库定位语法 - :param translate_css: 是否翻译css selector为xpath +def is_loc(text): + """返回text是否定位符""" + return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=', + 'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c=')) + + +def get_loc(loc, translate_css=False, css_mode=False): + """接收本库定位语法或selenium定位元组,转换为标准定位元组,可翻译css selector为xpath + :param loc: 本库定位语法或selenium定位元组 + :param translate_css: 是否翻译css selector为xpath,用于相对定位 + :param css_mode: 是否尽量用css selector方式 :return: DrissionPage定位元组 """ if isinstance(loc, tuple): - loc = translate_loc(loc) + loc = translate_css_loc(loc) if css_mode else translate_loc(loc) elif isinstance(loc, str): - loc = str_to_loc(loc) + loc = str_to_css_loc(loc) if css_mode else str_to_xpath_loc(loc) else: raise TypeError('loc参数只能是tuple或str。') @@ -34,7 +43,7 @@ def get_loc(loc, translate_css=False): return loc -def str_to_loc(loc): +def str_to_xpath_loc(loc): """处理元素查找语句 :param loc: 查找语法字符串 :return: 匹配符元组 @@ -61,39 +70,30 @@ def str_to_loc(loc): # ------------------------------------------------------------------ # 多属性查找 - if loc.startswith('@@') and loc != '@@': - loc_str = _make_multi_xpath_str('*', loc) - - elif loc.startswith('@|') and loc != '@|': - loc_str = _make_multi_xpath_str('*', loc, False) + if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'): + loc_str = _make_multi_xpath_str('*', loc)[1] # 单属性查找 elif loc.startswith('@') and loc != '@': - loc_str = _make_single_xpath_str('*', loc) + loc_str = _make_single_xpath_str('*', loc)[1] # 根据tag name查找 elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='): at_ind = loc.find('@') if at_ind == -1: loc_str = f'//*[name()="{loc[4:]}"]' + elif loc[at_ind:].startswith(('@@', '@|', '@!')): + loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:])[1] else: - if loc[at_ind:].startswith('@@'): - loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:]) - elif loc[at_ind:].startswith('@|'): - loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:], False) - else: - loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:]) + loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:])[1] # 根据文本查找 elif loc.startswith('text='): loc_str = f'//*[text()={_make_search_str(loc[5:])}]' - elif loc.startswith('text:') and loc != 'text:': loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..' - elif loc.startswith('text^') and loc != 'text^': loc_str = f'//*/text()[starts-with(., {_make_search_str(loc[5:])})]/..' - elif loc.startswith('text$') and loc != 'text$': loc_str = f'//*/text()[substring(., string-length(.) - string-length({_make_search_str(loc[5:])}) +1) = ' \ f'{_make_search_str(loc[5:])}]/..' @@ -121,8 +121,72 @@ def str_to_loc(loc): return loc_by, loc_str -def _make_single_xpath_str(tag: str, text: str) -> str: - """生成xpath语句 +def str_to_css_loc(loc): + """处理元素查找语句 + :param loc: 查找语法字符串 + :return: 匹配符元组 + """ + loc_by = 'css selector' + + if loc.startswith('.'): + if loc.startswith(('.=', '.:', '.^', '.$')): + loc = loc.replace('.', '@class', 1) + else: + loc = loc.replace('.', '@class=', 1) + + elif loc.startswith('#'): + if loc.startswith(('#=', '#:', '#^', '#$')): + loc = loc.replace('#', '@id', 1) + else: + loc = loc.replace('#', '@id=', 1) + + elif loc.startswith(('t:', 't=')): + loc = f'tag:{loc[2:]}' + + elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')): + loc = f'text{loc[2:]}' + + # ------------------------------------------------------------------ + # 多属性查找 + if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'): + loc_str = _make_multi_css_str('*', loc)[1] + + # 单属性查找 + elif loc.startswith('@') and loc != '@': + loc_by, loc_str = _make_single_css_str('*', loc) + + # 根据tag name查找 + elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='): + at_ind = loc.find('@') + if at_ind == -1: + loc_str = loc[4:] + elif loc[at_ind:].startswith(('@@', '@|', '@!')): + loc_by, loc_str = _make_multi_css_str(loc[4:at_ind], loc[at_ind:]) + else: + loc_by, loc_str = _make_single_css_str(loc[4:at_ind], loc[at_ind:]) + + # 根据文本查找 + elif loc.startswith(('text=', 'text:', 'text^', 'text$', 'xpath=', 'xpath:', 'x:', 'x=')): + loc_by, loc_str = str_to_xpath_loc(loc) + + # 用css selector查找 + elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='): + loc_str = loc[4:] + elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='): + loc_str = loc[2:] + + # 根据文本模糊查找 + elif loc: + loc_by, loc_str = str_to_xpath_loc(loc) + + else: + loc_str = '*' + + return loc_by, loc_str + + +def _make_single_xpath_str(tag: str, text: str) -> tuple: + """生成单属性xpath语句 :param tag: 标签名 :param text: 待处理的字符串 :return: xpath字符串 @@ -137,16 +201,13 @@ def _make_single_xpath_str(tag: str, text: str) -> str: r = split(r'([:=$^])', text, maxsplit=1) len_r = len(r) len_r0 = len(r[0]) - if len_r != 3 and len_r0 > 1: - arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}' - - elif len_r == 3 and len_r0 > 1: + if len_r == 3 and len_r0 > 1: symbol = r[1] if symbol == '=': # 精确查找 arg = '.' if r[0] in ('@text()', '@tx()') else r[0] arg_str = f'{arg}={_make_search_str(r[2])}' - elif symbol == '^': # 开头开头 + elif symbol == '^': # 匹配开头 if r[0] in ('@text()', '@tx()'): txt_str = f'/text()[starts-with(., {_make_search_str(r[2])})]/..' arg_str = '' @@ -172,24 +233,32 @@ def _make_single_xpath_str(tag: str, text: str) -> str: else: raise ValueError(f'符号不正确:{symbol}') + elif len_r != 3 and len_r0 > 1: + arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}' + if arg_str: arg_list.append(arg_str) arg_str = ' and '.join(arg_list) - return f'//*[{arg_str}]{txt_str}' if arg_str else f'//*{txt_str}' + return 'xpath', f'//*[{arg_str}]{txt_str}' if arg_str else f'//*{txt_str}' -def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str: +def _make_multi_xpath_str(tag: str, text: str) -> tuple: """生成多属性查找的xpath语句 :param tag: 标签名 :param text: 待处理的字符串 - :param _and: 是否与方式 :return: xpath字符串 """ arg_list = [] - args = text.split('@@') if _and else text.split('@|') + args = split(r'(@!|@@|@\|)', text)[1:] + if '@@' in args and '@|' in args: + raise ValueError('@@和@|不能同时出现在一个定位语句中。') + elif '@@' in args: + _and = True + else: # @| + _and = False - for arg in args[1:]: - r = split(r'([:=$^])', arg, maxsplit=1) + for k in range(0, len(args) - 1, 2): + r = split(r'([:=$^])', args[k + 1], maxsplit=1) arg_str = '' len_r = len(r) @@ -197,8 +266,7 @@ def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str: arg_str = 'not(@*)' else: - r[0], ignore = (r[0][1:], True) if r[0][0] == '-' else (r[0], None) # 是否去除某个属性 - + ignore = True if args[k] == '@!' else False # 是否去除某个属性 if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性 arg_str = 'normalize-space(text())' if r[0] in ('text()', 'tx()') else f'@{r[0]}' @@ -232,7 +300,7 @@ def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str: condition = f' and ({arg_str})' if arg_str else '' arg_str = f'name()="{tag}"{condition}' - return f'//*[{arg_str}]' if arg_str else f'//*' + return 'xpath', f'//*[{arg_str}]' if arg_str else f'//*' def _make_search_str(search_str: str) -> str: @@ -252,6 +320,68 @@ def _make_search_str(search_str: str) -> str: return search_str +def _make_multi_css_str(tag: str, text: str) -> tuple: + """生成多属性查找的css selector语句 + :param tag: 标签名 + :param text: 待处理的字符串 + :return: css selector字符串 + """ + arg_list = [] + args = split(r'(@!|@@|@\|)', text)[1:] + if '@@' in args and '@|' in args: + raise ValueError('@@和@|不能同时出现在一个定位语句中。') + elif '@@' in args: + _and = True + else: # @| + _and = False + + for k in range(0, len(args) - 1, 2): + r = split(r'([:=$^])', args[k + 1], maxsplit=1) + if not r[0] or r[0].startswith(('text()', 'tx()')): + return _make_multi_xpath_str(tag, text) + + arg_str = '' + len_r = len(r) + ignore = True if args[k] == '@!' else False # 是否去除某个属性 + if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性 + arg_str = f'[{r[0]}]' + + elif len_r == 3: # 属性名和内容都有 + d = {'=': '', '^': '^', '$': '$', ':': '*'} + arg_str = f'[{r[0]}{d[r[1]]}={css_trans(r[2])}]' + + if arg_str and ignore: + arg_str = f':not({arg_str})' + + if arg_str: + arg_list.append(arg_str) + + if _and: + return 'css selector', f'{tag}{"".join(arg_list)}' + + return 'css selector', f'{tag}{("," + tag).join(arg_list)}' + + +def _make_single_css_str(tag: str, text: str) -> tuple: + """生成单属性css selector语句 + :param tag: 标签名 + :param text: 待处理的字符串 + :return: css selector字符串 + """ + if text == '@' or text.startswith(('@text()', '@tx()')): + return _make_single_xpath_str(tag, text) + + r = split(r'([:=$^])', text, maxsplit=1) + if len(r) == 3: + d = {'=': '', '^': '^', '$': '$', ':': '*'} + arg_str = f'[{r[0][1:]}{d[r[1]]}={css_trans(r[2])}]' + + else: + arg_str = f'[{css_trans(r[0][1:])}]' + + return 'css selector', f'{tag}{arg_str}' + + def translate_loc(loc): """把By类型的loc元组转换为css selector或xpath类型的 :param loc: By类型的loc元组 @@ -276,7 +406,7 @@ def translate_loc(loc): elif loc_0 == By.CLASS_NAME: loc_str = f'//*[@class="{loc[1]}"]' - elif loc_0 == By.PARTIAL_LINK_TEXT: + elif loc_0 == By.LINK_TEXT: loc_str = f'//a[text()="{loc[1]}"]' elif loc_0 == By.NAME: @@ -292,3 +422,53 @@ def translate_loc(loc): raise ValueError('无法识别的定位符。') return loc_by, loc_str + + +def translate_css_loc(loc): + """把By类型的loc元组转换为css selector或xpath类型的 + :param loc: By类型的loc元组 + :return: css selector或xpath类型的loc元组 + """ + if len(loc) != 2: + raise ValueError('定位符长度必须为2。') + + loc_by = By.CSS_SELECTOR + loc_0 = loc[0].lower() + if loc_0 == By.XPATH: + loc_by = By.XPATH + loc_str = loc[1] + + elif loc_0 == By.CSS_SELECTOR: + loc_by = loc_0 + loc_str = loc[1] + + elif loc_0 == By.ID: + loc_str = f'#{css_trans(loc[1])}' + + elif loc_0 == By.CLASS_NAME: + loc_str = f'.{css_trans(loc[1])}' + + elif loc_0 == By.LINK_TEXT: + loc_by = By.XPATH + loc_str = f'//a[text()="{css_trans(loc[1])}"]' + + elif loc_0 == By.NAME: + loc_str = f'*[@name={css_trans(loc[1])}]' + + elif loc_0 == By.TAG_NAME: + loc_str = loc[1] + + elif loc_0 == By.PARTIAL_LINK_TEXT: + loc_by = By.XPATH + loc_str = f'//a[contains(text(),"{loc[1]}")]' + + else: + raise ValueError('无法识别的定位符。') + + return loc_by, loc_str + + +def css_trans(txt): + c = ('!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', + '[', '\\', ']', '^', '`', ',', '{', '|', '}', '~', ' ') + return ''.join([fr'\{i}' if i in c else i for i in txt]) diff --git a/DrissionPage/_functions/locator.pyi b/DrissionPage/_functions/locator.pyi new file mode 100644 index 0000000..2f79a69 --- /dev/null +++ b/DrissionPage/_functions/locator.pyi @@ -0,0 +1,26 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union + + +def is_loc(text: str) -> bool: ... + + +def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool = False) -> tuple: ... + + +def str_to_xpath_loc(loc: str) -> tuple: ... + + +def translate_loc(loc: tuple) -> tuple: ... + + +def translate_css_loc(loc: tuple) -> tuple: ... + + +def css_trans(txt: str) -> str: ... diff --git a/DrissionPage/_functions/settings.py b/DrissionPage/_functions/settings.py new file mode 100644 index 0000000..225190c --- /dev/null +++ b/DrissionPage/_functions/settings.py @@ -0,0 +1,13 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" + + +class Settings(object): + raise_when_ele_not_found = False + raise_when_click_failed = False + raise_when_wait_failed = False diff --git a/DrissionPage/_functions/tools.py b/DrissionPage/_functions/tools.py new file mode 100644 index 0000000..b785ed1 --- /dev/null +++ b/DrissionPage/_functions/tools.py @@ -0,0 +1,229 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from platform import system +from shutil import rmtree +from time import perf_counter, sleep + +from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess + +from .._configs.options_manage import OptionsManager +from ..errors import (ContextLostError, ElementLostError, CDPError, PageDisconnectedError, NoRectError, + AlertExistsError, WrongURLError, StorageError, CookieFormatError, JavaScriptError) + + +def port_is_using(ip, port): + """检查端口是否被占用 + :param ip: 浏览器地址 + :param port: 浏览器端口 + :return: bool + """ + from socket import socket, AF_INET, SOCK_STREAM + s = socket(AF_INET, SOCK_STREAM) + s.settimeout(.1) + result = s.connect_ex((ip, int(port))) + s.close() + return result == 0 + + +def clean_folder(folder_path, ignore=None): + """清空一个文件夹,除了ignore里的文件和文件夹 + :param folder_path: 要清空的文件夹路径 + :param ignore: 忽略列表 + :return: None + """ + ignore = [] if not ignore else ignore + p = Path(folder_path) + + for f in p.iterdir(): + if f.name not in ignore: + if f.is_file(): + f.unlink() + elif f.is_dir(): + rmtree(f, True) + + +def show_or_hide_browser(page, hide=True): + """执行显示或隐藏浏览器窗口 + :param page: ChromePage对象 + :param hide: 是否隐藏 + :return: None + """ + if not page.address.startswith(('127.0.0.1', 'localhost')): + return + + if system().lower() != 'windows': + raise OSError('该方法只能在Windows系统使用。') + + try: + from win32gui import ShowWindow + from win32con import SW_HIDE, SW_SHOW + except ImportError: + raise ImportError('请先安装:pip install pypiwin32') + + pid = page.process_id + if not pid: + return None + hds = get_chrome_hwnds_from_pid(pid, page.title) + sw = SW_HIDE if hide else SW_SHOW + for hd in hds: + ShowWindow(hd, sw) + + +def get_browser_progress_id(progress, address): + """获取浏览器进程id + :param progress: 已知的进程对象,没有时传入None + :param address: 浏览器管理地址,含端口 + :return: 进程id或None + """ + if progress: + return progress.pid + + from os import popen + port = address.split(':')[-1] + txt = '' + progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') + for progress in progresses: + if 'LISTENING' in progress: + txt = progress + break + if not txt: + return None + + return txt.split(' ')[-1] + + +def get_chrome_hwnds_from_pid(pid, title): + """通过PID查询句柄ID + :param pid: 进程id + :param title: 窗口标题 + :return: 进程句柄组成的列表 + """ + try: + from win32gui import IsWindow, GetWindowText, EnumWindows + from win32process import GetWindowThreadProcessId + except ImportError: + raise ImportError('请先安装win32gui,pip install pypiwin32') + + def callback(hwnd, hds): + if IsWindow(hwnd) and title in GetWindowText(hwnd): + _, found_pid = GetWindowThreadProcessId(hwnd) + if str(found_pid) == str(pid): + hds.append(hwnd) + return True + + hwnds = [] + EnumWindows(callback, hwnds) + return hwnds + + +def wait_until(page, condition, timeout=10, poll=0.1, raise_err=True): + """等待返回值不为False或空,直到超时 + :param page: DrissionPage对象 + :param condition: 等待条件,返回值不为False则停止等待 + :param timeout: 超时时间(秒) + :param poll: 轮询间隔 + :param raise_err: 是否抛出异常 + :return: DP Element or bool + """ + end_time = perf_counter() + timeout + if isinstance(condition, str) or isinstance(condition, tuple): + if not callable(getattr(page, 's_ele', None)): + raise AttributeError('page对象缺少s_ele方法') + condition_method = lambda page: page.s_ele(condition) + elif callable(condition): + condition_method = condition + else: + raise ValueError('condition必须是函数或者字符串或者元组') + while perf_counter() < end_time: + try: + value = condition_method(page) + if value: + return value + except Exception: + pass + + sleep(poll) + if perf_counter() > end_time: + break + + if raise_err: + raise TimeoutError(f'等待超时(等待{timeout}秒)。') + else: + return False + + +def stop_process_on_port(port): + """强制关闭某个端口内的进程 + :param port: 端口号 + :return: None + """ + for proc in process_iter(['pid', 'connections']): + try: + connections = proc.connections() + except (AccessDenied, NoSuchProcess): + continue + for conn in connections: + if conn.laddr.port == int(port): + try: + proc.terminate() + except (NoSuchProcess, AccessDenied, ZombieProcess): + pass + except Exception as e: + print(f"{proc.pid} {port}: {e}") + + +def configs_to_here(save_name=None): + """把默认ini文件复制到当前目录 + :param save_name: 指定文件名,为None则命名为'dp_configs.ini' + :return: None + """ + om = OptionsManager('default') + save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini' + om.save(save_name) + + +def raise_error(result, ignore=None): + """抛出error对应报错 + :param result: 包含error的dict + :param ignore: 要忽略的错误 + :return: None + """ + error = result['error'] + if error in ('Cannot find context with specified id', 'Inspected target navigated or closed'): + r = ContextLostError() + elif error in ('Could not find node with given id', 'Could not find object with given id', + 'No node with given id found', 'Node with given id does not belong to the document', + 'No node found for given backend id'): + r = ElementLostError() + elif error in ('connection disconnected', 'No target with given id found'): + r = PageDisconnectedError() + elif error == 'alert exists.': + r = AlertExistsError() + elif error in ('Node does not have a layout object', 'Could not compute box model.'): + r = NoRectError() + elif error == 'Cannot navigate to invalid URL': + r = WrongURLError(f'无效的url:{result["args"]["url"]}。也许要加上"http://"?') + elif error == 'Frame corresponds to an opaque origin and its storage key cannot be serialized': + r = StorageError() + elif error == 'Sanitizing cookie failed': + r = CookieFormatError(f'cookie格式不正确:{result["args"]}') + elif error == 'Given expression does not evaluate to a function': + r = JavaScriptError(f'传入的js无法解析成函数:\n{result["args"]["functionDeclaration"]}') + elif result['type'] in ('call_method_error', 'timeout'): + from DrissionPage import __version__ + from time import process_time + txt = f'\n错误:{result["error"]}\nmethod:{result["method"]}\nargs:{result["args"]}\n' \ + f'版本:{__version__}\n运行时间:{process_time()}\n出现这个错误可能意味着程序有bug,请把错误信息和重现方法' \ + '告知作者,谢谢。\n报告网站:https://gitee.com/g1879/DrissionPage/issues' + r = TimeoutError(txt) if result['type'] == 'timeout' else CDPError(txt) + else: + r = RuntimeError(result) + + if not ignore or not isinstance(r, ignore): + raise r diff --git a/DrissionPage/_functions/tools.pyi b/DrissionPage/_functions/tools.pyi new file mode 100644 index 0000000..057a43f --- /dev/null +++ b/DrissionPage/_functions/tools.pyi @@ -0,0 +1,40 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from os import popen +from pathlib import Path +from typing import Union +from types import FunctionType + +from .._pages.chromium_page import ChromiumPage + + +def port_is_using(ip: str, port: Union[str, int]) -> bool: ... + + +def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... + + +def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... + + +def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... + + +def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... + + +def wait_until(page, condition: Union[FunctionType, str, tuple], timeout: float, poll: float, raise_err: bool): ... + + +def stop_process_on_port(port: Union[int, str]) -> None: ... + + +def configs_to_here(file_name: Union[Path, str] = None) -> None: ... + + +def raise_error(result: dict, ignore=None) -> None: ... diff --git a/DrissionPage/commons/web.py b/DrissionPage/_functions/web.py similarity index 65% rename from DrissionPage/commons/web.py rename to DrissionPage/_functions/web.py index e6a24e6..9479c05 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/_functions/web.py @@ -1,105 +1,20 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from base64 import b64decode +from datetime import datetime from html import unescape from http.cookiejar import Cookie -from json import loads, JSONDecodeError from re import sub from urllib.parse import urlparse, urljoin, urlunparse from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict from tldextract import extract -class ResponseData(object): - """返回的数据包管理类""" - __slots__ = ('requestId', 'response', 'rawBody', 'tab', 'target', 'url', 'status', 'statusText', 'securityDetails', - 'headersText', 'mimeType', 'requestHeadersText', 'connectionReused', 'connectionId', 'remoteIPAddress', - 'remotePort', 'fromDiskCache', 'fromServiceWorker', 'fromPrefetchCache', 'encodedDataLength', 'timing', - 'serviceWorkerResponseSource', 'responseTime', 'cacheStorageCacheName', 'protocol', 'securityState', - '_requestHeaders', '_body', '_base64_body', '_rawPostData', '_postData', 'method') - - def __init__(self, request_id, response, body, tab, target): - """ - :param response: response的数据 - :param body: response包含的内容 - :param tab: 产生这个数据包的tab的id - :param target: 监听目标 - """ - self.requestId = request_id - self.response = CaseInsensitiveDict(response) - self.rawBody = body - self.tab = tab - self.target = target - self._requestHeaders = None - self._postData = None - self._body = None - self._base64_body = False - self._rawPostData = None - - def __getattr__(self, item): - return self.response.get(item, None) - - def __getitem__(self, item): - return self.response.get(item, None) - - def __repr__(self): - return f'<ResponseData target={self.target} request_id={self.requestId}>' - - @property - def headers(self): - """以大小写不敏感字典返回headers数据""" - headers = self.response.get('headers', None) - return CaseInsensitiveDict(headers) if headers else None - - @property - def requestHeaders(self): - """以大小写不敏感字典返回requestHeaders数据""" - if self._requestHeaders: - return self._requestHeaders - headers = self.response.get('requestHeaders', None) - return CaseInsensitiveDict(headers) if headers else None - - @requestHeaders.setter - def requestHeaders(self, val): - """设置requestHeaders""" - self._requestHeaders = val - - @property - def postData(self): - """返回postData数据""" - if self._postData is None and self._rawPostData: - try: - self._postData = loads(self._rawPostData) - except (JSONDecodeError, TypeError): - self._postData = self._rawPostData - return self._postData - - @postData.setter - def postData(self, val): - """设置postData""" - self._rawPostData = val - - @property - def body(self): - """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" - if self._body is None: - if self._base64_body: - self._body = b64decode(self.rawBody) - - else: - try: - self._body = loads(self.rawBody) - except (JSONDecodeError, TypeError): - self._body = self.rawBody - - return self._body - - def get_ele_txt(e): """获取元素内所有文本 :param e: 元素对象 @@ -142,7 +57,7 @@ def get_ele_txt(e): if sub('[ \n\t\r]', '', el) != '': # 字符除了回车和空格还有其它内容 txt = el if not pre: - txt = txt.replace('\n', ' ').strip(' ') + txt = txt.replace('\r\n', ' ').replace('\n', ' ').strip(' ') txt = sub(r' {2,}', ' ', txt) str_list.append(txt) @@ -190,8 +105,6 @@ def location_in_viewport(page, loc_x, loc_y): if (x< scrollLeft || y < scrollTop || x > vWidth + scrollLeft || y > vHeight + scrollTop){{return false;}} return true;}}''' return page.run_js(js) - # const vWidth = window.innerWidth || document.documentElement.clientWidth; - # const vHeight = window.innerHeight || document.documentElement.clientHeight; def offset_scroll(ele, offset_x, offset_y): @@ -202,39 +115,40 @@ def offset_scroll(ele, offset_x, offset_y): :param offset_y: 偏移量y :return: 视口中的坐标 """ - loc_x, loc_y = ele.location - cp_x, cp_y = ele.locations.click_point + loc_x, loc_y = ele.rect.location + cp_x, cp_y = ele.rect.click_point lx = loc_x + offset_x if offset_x else cp_x ly = loc_y + offset_y if offset_y else cp_y if not location_in_viewport(ele.page, lx, ly): clientWidth = ele.page.run_js('return document.body.clientWidth;') clientHeight = ele.page.run_js('return document.body.clientHeight;') ele.page.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2) - cl_x, cl_y = ele.locations.viewport_location - ccp_x, ccp_y = ele.locations.viewport_click_point + cl_x, cl_y = ele.rect.viewport_location + ccp_x, ccp_y = ele.rect.viewport_click_point cx = cl_x + offset_x if offset_x else ccp_x cy = cl_y + offset_y if offset_y else ccp_y return cx, cy -def make_absolute_link(link, page=None): +def make_absolute_link(link, baseURI=None): """获取绝对url :param link: 超链接 - :param page: 页面对象 + :param baseURI: 页面或iframe的url :return: 绝对链接 """ if not link: return link + link = link.strip() parsed = urlparse(link)._asdict() # 是相对路径,与页面url拼接并返回 if not parsed['netloc']: - return urljoin(page.url, link) if page else link + return urljoin(baseURI, link) if baseURI else link # 是绝对路径但缺少协议,从页面url获取协议并修复 - if not parsed['scheme'] and page: - parsed['scheme'] = urlparse(page.url).scheme + if not parsed['scheme'] and baseURI: + parsed['scheme'] = urlparse(baseURI).scheme parsed = tuple(v for v in parsed.values()) return urlunparse(parsed) @@ -254,20 +168,20 @@ def is_js_func(func): def cookie_to_dict(cookie): """把Cookie对象转为dict格式 - :param cookie: Cookie对象 + :param cookie: Cookie对象、字符串或字典 :return: cookie字典 """ if isinstance(cookie, Cookie): cookie_dict = cookie.__dict__.copy() - cookie_dict.pop('rfc2109') - cookie_dict.pop('_rest') + cookie_dict.pop('rfc2109', None) + cookie_dict.pop('_rest', None) return cookie_dict elif isinstance(cookie, dict): cookie_dict = cookie elif isinstance(cookie, str): - cookie = cookie.split(',' if ',' in cookie else ';') + cookie = cookie.rstrip(';,').split(',' if ',' in cookie else ';') cookie_dict = {} for key, attr in enumerate(cookie): @@ -296,7 +210,7 @@ def cookies_to_tuple(cookies): cookies = tuple(cookie_to_dict(cookie) for cookie in cookies) elif isinstance(cookies, str): - cookies = tuple(cookie_to_dict(cookie.lstrip()) for cookie in cookies.split(";")) + cookies = tuple(cookie_to_dict(c.lstrip()) for c in cookies.rstrip(';,').split(',' if ',' in cookies else ';')) elif isinstance(cookies, dict): cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) @@ -334,31 +248,51 @@ def set_browser_cookies(page, cookies): :param cookies: cookies信息 :return: None """ - cookies = cookies_to_tuple(cookies) - for cookie in cookies: + for cookie in cookies_to_tuple(cookies): if 'expiry' in cookie: cookie['expires'] = int(cookie['expiry']) cookie.pop('expiry') + if 'expires' in cookie: - cookie['expires'] = int(cookie['expires']) + if not cookie['expires']: + cookie.pop('expires') + + elif isinstance(cookie['expires'], str): + if cookie['expires'].isdigit(): + cookie['expires'] = int(cookie['expires']) + + elif cookie['expires'].replace('.', '').isdigit(): + cookie['expires'] = float(cookie['expires']) + + else: + try: + cookie['expires'] = datetime.strptime(cookie['expires'], + '%a, %d %b %Y %H:%M:%S GMT').timestamp() + except ValueError: + cookie['expires'] = datetime.strptime(cookie['expires'], + '%a, %d %b %y %H:%M:%S GMT').timestamp() + if cookie['value'] is None: cookie['value'] = '' - + elif not isinstance(cookie['value'], str): + cookie['value'] = str(cookie['value']) if cookie['name'].startswith('__Secure-'): cookie['secure'] = True if cookie['name'].startswith('__Host-'): cookie['path'] = '/' cookie['secure'] = True + cookie['url'] = page.url + page.run_cdp_loaded('Network.setCookie', **cookie) + continue # 不用设置域名,可退出 - else: - if cookie.get('domain', None): - try: - page.run_cdp_loaded('Network.setCookie', **cookie) - if is_cookie_in_driver(page, cookie): - continue - except Exception: - pass + if cookie.get('domain', None): + try: + page.run_cdp_loaded('Network.setCookie', **cookie) + if is_cookie_in_driver(page, cookie): + continue + except Exception: + pass ex_url = extract(page._browser_url) d_list = ex_url.subdomain.split('.') @@ -384,7 +318,13 @@ def is_cookie_in_driver(page, cookie): :param cookie: dict格式cookie :return: bool """ - for c in page.get_cookies(): - if cookie['name'] == c['name'] and cookie['value'] == c['value']: - return True + if 'domain' in cookie: + for c in page.get_cookies(all_domains=True): + if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain', + None): + return True + else: + for c in page.get_cookies(all_domains=True): + if cookie['name'] == c['name'] and cookie['value'] == c['value']: + return True return False diff --git a/DrissionPage/_functions/web.pyi b/DrissionPage/_functions/web.pyi new file mode 100644 index 0000000..240bdab --- /dev/null +++ b/DrissionPage/_functions/web.pyi @@ -0,0 +1,49 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from http.cookiejar import Cookie +from typing import Union + +from requests import Session +from requests.cookies import RequestsCookieJar + +from .._base.base import BasePage, DrissionElement +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase + + +def get_ele_txt(e: DrissionElement) -> str: ... + + +def format_html(text: str) -> str: ... + + +def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool: ... + + +def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> tuple: ... + + +def make_absolute_link(link: str, baseURI: str = None) -> str: ... + + +def is_js_func(func: str) -> bool: ... + + +def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ... + + +def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ... + + +def set_session_cookies(session: Session, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + +def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + +def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ... diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py new file mode 100644 index 0000000..dbaf4f8 --- /dev/null +++ b/DrissionPage/_pages/chromium_base.py @@ -0,0 +1,1180 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from json import loads, JSONDecodeError +from os.path import sep +from pathlib import Path +from re import findall +from threading import Thread +from time import perf_counter, sleep +from urllib.parse import quote + +from DataRecorder.tools import make_valid_name + +from .._base.base import BasePage +from .._elements.chromium_element import ChromiumElement, run_js, make_chromium_eles +from .._elements.none_element import NoneElement +from .._elements.session_element import make_session_ele +from .._functions.locator import get_loc, is_loc +from .._functions.settings import Settings +from .._functions.tools import raise_error +from .._functions.web import location_in_viewport +from .._units.actions import Actions +from .._units.listener import Listener +from .._units.rect import TabRect +from .._units.screencast import Screencast +from .._units.scroller import PageScroller +from .._units.setter import ChromiumBaseSetter +from .._units.states import PageStates +from .._units.waiter import BaseWaiter +from ..errors import ContextLostError, CDPError, PageDisconnectedError, ElementNotFoundError + +__ERROR__ = 'error' + + +class ChromiumBase(BasePage): + """标签页、frame、页面基类""" + + def __init__(self, address, tab_id=None, timeout=None): + """ + :param address: 浏览器 ip:port + :param tab_id: 要控制的标签页id,不指定默认为激活的 + :param timeout: 超时时间(秒) + """ + super().__init__() + self._is_loading = None + self._root_id = None # object id + self._set = None + self._screencast = None + self._actions = None + self._states = None + self._has_alert = False + self._ready_state = None + self._rect = None + self._wait = None + self._scroll = None + self._upload_list = None + self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc + self._download_path = None + self._load_end_time = 0 + self._init_jss = [] + if not hasattr(self, '_listener'): + self._listener = None + + if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): + address = f'127.0.0.1:{address}' + + self._d_set_start_options(address) + self._d_set_runtime_settings() + self._connect_browser(tab_id) + if timeout is not None: + self.timeout = timeout + + def _d_set_start_options(self, address): + """设置浏览器启动属性 + :param address: 'ip:port' + :return: None + """ + self.address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') + + def _d_set_runtime_settings(self): + self._timeouts = Timeout(self) + self._load_mode = 'normal' + + def _connect_browser(self, tab_id=None): + """连接浏览器,在第一次时运行 + :param tab_id: 要控制的标签页id,不指定默认为激活的 + :return: None + """ + self._is_reading = False + + if not tab_id: + tabs = self.browser.driver.get(f'http://{self.address}/json').json() + tabs = [(i['id'], i['url']) for i in tabs + if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')] + dialog = None + if len(tabs) > 1: + for k, t in enumerate(tabs): + if t[1] == 'chrome://privacy-sandbox-dialog/notice': + dialog = k + elif not tab_id: + tab_id = t[0] + + if tab_id and dialog is not None: + break + + if dialog is not None: + close_privacy_dialog(self, tabs[dialog][0]) + + else: + tab_id = tabs[0][0] + + self._driver_init(tab_id) + if self._js_ready_state == 'complete' and self._ready_state is None: + self._get_document() + self._ready_state = 'complete' + + def _driver_init(self, tab_id): + """新建页面、页面刷新、切换标签页后要进行的cdp参数初始化 + :param tab_id: 要跳转到的标签页id + :return: None + """ + self._is_loading = True + self._driver = self.browser._get_driver(tab_id) + + self._alert = Alert() + self._driver.set_callback('Page.javascriptDialogOpening', self._on_alert_open, immediate=True) + self._driver.set_callback('Page.javascriptDialogClosed', self._on_alert_close) + + self._driver.run('DOM.enable') + self._driver.run('Page.enable') + self._driver.run('Emulation.setFocusEmulationEnabled', enabled=True) + + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id + if not hasattr(self, '_frame_id'): + self._frame_id = r['frameTree']['frame']['id'] + + self._driver.set_callback('Page.frameStartedLoading', self._onFrameStartedLoading) + self._driver.set_callback('Page.frameNavigated', self._onFrameNavigated) + self._driver.set_callback('Page.domContentEventFired', self._onDomContentEventFired) + self._driver.set_callback('Page.loadEventFired', self._onLoadEventFired) + self._driver.set_callback('Page.frameStoppedLoading', self._onFrameStoppedLoading) + self._driver.set_callback('Page.frameAttached', self._onFrameAttached) + self._driver.set_callback('Page.frameDetached', self._onFrameDetached) + + def _get_document(self, timeout=10): + """获取页面文档 + :param timeout: 超时时间(秒) + :return: 是否获取成功 + """ + if self._is_reading: + return + self._is_reading = True + timeout = timeout if timeout >= .5 else .5 + end_time = perf_counter() + timeout + while perf_counter() < end_time: + try: + b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId'] + timeout = end_time - perf_counter() + timeout = .5 if timeout <= 0 else timeout + self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id, + _timeout=timeout)['object']['objectId'] + result = True + break + + except: + timeout = end_time - perf_counter() + timeout = .5 if timeout <= 0 else timeout + + else: + result = False + + if result: + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id + + self._is_loading = False + self._is_reading = False + return result + + def _onFrameDetached(self, **kwargs): + self.browser._frames.pop(kwargs['frameId'], None) + + def _onFrameAttached(self, **kwargs): + self.browser._frames[kwargs['frameId']] = self.tab_id + + def _onFrameStartedLoading(self, **kwargs): + """页面开始加载时执行""" + self.browser._frames[kwargs['frameId']] = self.tab_id + if kwargs['frameId'] == self._frame_id: + self._doc_got = False + self._ready_state = 'connecting' + self._is_loading = True + self._load_end_time = perf_counter() + self.timeouts.page_load + if self._load_mode == 'eager': + t = Thread(target=self._wait_to_stop) + t.daemon = True + t.start() + + def _onFrameNavigated(self, **kwargs): + """页面跳转时执行""" + if kwargs['frame']['id'] == self._frame_id: + self._doc_got = False + self._ready_state = 'loading' + self._is_loading = True + + def _onDomContentEventFired(self, **kwargs): + """在页面刷新、变化后重新读取页面内容""" + if self._load_mode == 'eager': + self.run_cdp('Page.stopLoading') + if self._get_document(self._load_end_time - perf_counter() - .1): + self._doc_got = True + self._ready_state = 'interactive' + + def _onLoadEventFired(self, **kwargs): + """在页面刷新、变化后重新读取页面内容""" + if self._doc_got is False and self._get_document(self._load_end_time - perf_counter() - .1): + self._doc_got = True + self._ready_state = 'complete' + + def _onFrameStoppedLoading(self, **kwargs): + """页面加载完成后执行""" + self.browser._frames[kwargs['frameId']] = self.tab_id + if kwargs['frameId'] == self._frame_id: + if self._doc_got is False: + self._get_document(self._load_end_time - perf_counter() - .1) + self._ready_state = 'complete' + + def _onFileChooserOpened(self, **kwargs): + """文件选择框打开时执行""" + if self._upload_list: + if 'backendNodeId' not in kwargs: + raise TypeError('该输入框无法接管,请改用对<input>元素输入路径的方法设置。') + files = self._upload_list if kwargs['mode'] == 'selectMultiple' else self._upload_list[:1] + self.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=kwargs['backendNodeId']) + + self.driver.set_callback('Page.fileChooserOpened', None) + self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) + self._upload_list = None + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele = page('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间(秒) + :return: ChromiumElement对象 + """ + return self.ele(loc_or_str, timeout) + + def _wait_to_stop(self): + """eager策略超时时使页面停止加载""" + end_time = perf_counter() + self.timeouts.page_load + while perf_counter() < end_time: + sleep(.1) + if self._ready_state in ('interactive', 'complete') and self._is_loading: + self.stop_loading() + + # ----------挂件---------- + + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = BaseWaiter(self) + return self._wait + + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = ChromiumBaseSetter(self) + return self._set + + @property + def screencast(self): + """返回用于录屏的对象""" + if self._screencast is None: + self._screencast = Screencast(self) + return self._screencast + + @property + def actions(self): + """返回用于执行动作链的对象""" + if self._actions is None: + self._actions = Actions(self) + self.wait.load_complete() + return self._actions + + @property + def listen(self): + """返回用于聆听数据包的对象""" + if self._listener is None: + self._listener = Listener(self) + return self._listener + + @property + def states(self): + """返回用于获取状态信息的对象""" + if self._states is None: + self._states = PageStates(self) + return self._states + + @property + def scroll(self): + """返回用于滚动滚动条的对象""" + self.wait.load_complete() + if self._scroll is None: + self._scroll = PageScroller(self) + return self._scroll + + @property + def rect(self): + """返回获取窗口坐标和大小的对象""" + # self.wait.load_complete() + if self._rect is None: + self._rect = TabRect(self) + return self._rect + + @property + def timeouts(self): + """返回timeouts设置""" + return self._timeouts + + # ----------挂件---------- + + @property + def browser(self): + return self._browser + + @property + def driver(self): + """返回用于控制浏览器的Driver对象""" + if self._driver is None: + raise RuntimeError('浏览器已关闭或链接已断开。') + return self._driver + + @property + def title(self): + """返回当前页面title""" + return self.run_cdp_loaded('Target.getTargetInfo', targetId=self._target_id)['targetInfo']['title'] + + @property + def url(self): + """返回当前页面url""" + return self.run_cdp_loaded('Target.getTargetInfo', targetId=self._target_id)['targetInfo']['url'] + + @property + def _browser_url(self): + """用于被WebPage覆盖""" + return self.url + + @property + def html(self): + """返回当前页面html文本""" + self.wait.load_complete() + return self.run_cdp('DOM.getOuterHTML', objectId=self._root_id)['outerHTML'] + + @property + def json(self): + """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" + try: + return loads(self('t:pre', timeout=.5).text) + except JSONDecodeError: + return None + + @property + def tab_id(self): + """返回当前标签页id""" + return self._target_id + + @property + def _target_id(self): + """返回当前标签页id""" + return self.driver.id if not self.driver._stopped.is_set() else '' + + @property + def active_ele(self): + """返回当前焦点所在元素""" + return self.run_js_loaded('return document.activeElement;') + + @property + def load_mode(self): + """返回页面加载策略,有3种:'none'、'normal'、'eager'""" + return self._load_mode + + @property + def user_agent(self): + """返回user agent""" + return self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + + @property + def upload_list(self): + """返回等待上传文件列表""" + return self._upload_list + + @property + def _js_ready_state(self): + """返回js获取的ready state信息""" + try: + return self.run_cdp('Runtime.evaluate', expression='document.readyState;', _timeout=3)['result']['value'] + except ContextLostError: + return None + except TimeoutError: + return 'timeout' + + def run_cdp(self, cmd, **cmd_args): + """执行Chrome DevTools Protocol语句 + :param cmd: 协议项目 + :param cmd_args: 参数 + :return: 执行的结果 + """ + ignore = cmd_args.pop('_ignore', None) + r = self.driver.run(cmd, **cmd_args) + return r if __ERROR__ not in r else raise_error(r, ignore) + + def run_cdp_loaded(self, cmd, **cmd_args): + """执行Chrome DevTools Protocol语句,执行前等待页面加载完毕 + :param cmd: 协议项目 + :param cmd_args: 参数 + :return: 执行的结果 + """ + self.wait.load_complete() + return self.run_cdp(cmd, **cmd_args) + + def run_js(self, script, *args, as_expr=False, timeout=None): + """运行javascript代码 + :param script: js文本 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script设置 + :return: 运行的结果 + """ + return run_js(self, script, as_expr, self.timeouts.script if timeout is None else timeout, args) + + def run_js_loaded(self, script, *args, as_expr=False, timeout=None): + """运行javascript代码,执行前等待页面加载完毕 + :param script: js文本 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script属性值 + :return: 运行的结果 + """ + self.wait.load_complete() + return run_js(self, script, as_expr, self.timeouts.script if timeout is None else timeout, args) + + def run_async_js(self, script, *args, as_expr=False): + """以异步方式执行js代码 + :param script: js文本 + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... + :param as_expr: 是否作为表达式运行,为True时args无效 + :return: None + """ + run_js(self, script, as_expr, 0, args) + + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None): + """访问url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 + :param timeout: 连接超时时间(秒),为None时使用页面对象timeouts.page_load属性值 + :return: 目标url是否可用 + """ + retry, interval = self._before_connect(url, retry, interval) + self._url_available = self._d_connect(self._url, times=retry, interval=interval, + show_errmsg=show_errmsg, timeout=timeout) + return self._url_available + + def get_cookies(self, as_dict=False, all_domains=False, all_info=False): + """获取cookies信息 + :param as_dict: 为True时返回由{name: value}键值对组成的dict,为True时返回list且all_info无效 + :param all_domains: 是否返回所有域的cookies + :param all_info: 是否返回所有信息,为False时只返回name、value、domain + :return: cookies信息 + """ + txt = 'Storage' if all_domains else 'Network' + cookies = self.run_cdp_loaded(f'{txt}.getCookies')['cookies'] + + if as_dict: + return {cookie['name']: cookie['value'] for cookie in cookies} + elif all_info: + return cookies + else: + return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} + for cookie in cookies] + + def ele(self, loc_or_ele, timeout=None): + """获取第一个符合条件的元素对象 + :param loc_or_ele: 定位符或元素对象 + :param timeout: 查找超时时间(秒) + :return: ChromiumElement对象 + """ + return self._ele(loc_or_ele, timeout=timeout, method='ele()') + + def eles(self, loc_or_str, timeout=None): + """获取所有符合条件的元素对象 + :param loc_or_str: 定位符或元素对象 + :param timeout: 查找超时时间(秒) + :return: ChromiumElement对象组成的列表 + """ + return self._ele(loc_or_str, timeout=timeout, single=False) + + def s_ele(self, loc_or_ele=None): + """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + r = make_session_ele(self, loc_or_ele) + if isinstance(r, NoneElement): + if Settings.raise_when_ele_not_found: + raise ElementNotFoundError(None, 's_ele()', {'loc_or_ele': loc_or_ele}) + else: + r.method = 's_ele()' + r.args = {'loc_or_ele': loc_or_ele} + return r + + def s_eles(self, loc_or_str): + """查找所有符合条件的元素以SessionElement列表形式返回 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象组成的列表 + """ + return make_session_ele(self, loc_or_str, single=False) + + def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): + """执行元素查找 + :param loc_or_ele: 定位符或元素对象 + :param timeout: 查找超时时间(秒) + :param single: 是否只返回第一个 + :param relative: WebPage用的表示是否相对定位的参数 + :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 + :return: ChromiumElement对象或元素对象组成的列表 + """ + if isinstance(loc_or_ele, (str, tuple)): + loc = get_loc(loc_or_ele)[1] + elif isinstance(loc_or_ele, ChromiumElement) or str(type(loc_or_ele)).endswith(".ChromiumFrame'>"): + return loc_or_ele + else: + raise ValueError('loc_or_str参数只能是tuple、str、ChromiumElement类型。') + + self.wait.load_complete() + timeout = timeout if timeout is not None else self.timeout + end_time = perf_counter() + timeout + + search_ids = [] + timeout = .5 if timeout <= 0 else timeout + result = self.driver.run('DOM.performSearch', query=loc, _timeout=timeout, includeUserAgentShadowDOM=True) + if not result or __ERROR__ in result: + num = 0 + else: + num = result['resultCount'] + search_ids.append(result['searchId']) + + while True: + if num > 0: + num = 1 if single else num + nIds = self._driver.run('DOM.getSearchResults', searchId=result['searchId'], fromIndex=0, toIndex=num) + if __ERROR__ not in nIds: + if nIds['nodeIds'][0] != 0: + r = make_chromium_eles(self, node_ids=nIds['nodeIds'], single=single) + if r is not False: + break + + if perf_counter() >= end_time: + return NoneElement(self) if single else [] + + sleep(.1) + timeout = end_time - perf_counter() + timeout = .5 if timeout <= 0 else timeout + result = self.driver.run('DOM.performSearch', query=loc, _timeout=timeout, includeUserAgentShadowDOM=True) + if not result or __ERROR__ not in result: + num = result['resultCount'] + search_ids.append(result['searchId']) + + for _id in search_ids: + self._driver.run('DOM.discardSearchResults', searchId=_id) + + return r + + def refresh(self, ignore_cache=False): + """刷新当前页面 + :param ignore_cache: 是否忽略缓存 + :return: None + """ + self._is_loading = True + self.run_cdp('Page.reload', ignoreCache=ignore_cache) + self.wait.load_start() + + def forward(self, steps=1): + """在浏览历史中前进若干步 + :param steps: 前进步数 + :return: None + """ + self._forward_or_back(steps) + + def back(self, steps=1): + """在浏览历史中后退若干步 + :param steps: 后退步数 + :return: None + """ + self._forward_or_back(-steps) + + def _forward_or_back(self, steps): + """执行浏览器前进或后退,会跳过url相同的历史记录 + :param steps: 步数 + :return: None + """ + if steps == 0: + return + + history = self.run_cdp('Page.getNavigationHistory') + index = history['currentIndex'] + history = history['entries'] + direction = 1 if steps > 0 else -1 + curr_url = history[index]['url'] + nid = None + for num in range(abs(steps)): + for i in history[index::direction]: + index += direction + if i['url'] != curr_url: + nid = i['id'] + curr_url = i['url'] + break + + if nid: + self._is_loading = True + self.run_cdp('Page.navigateToHistoryEntry', entryId=nid) + + def stop_loading(self): + """页面停止加载""" + try: + self.run_cdp('Page.stopLoading') + except (PageDisconnectedError, CDPError): + pass + end_time = perf_counter() + self.timeouts.page_load + while self._ready_state != 'complete' and perf_counter() < end_time: + sleep(.1) + + def remove_ele(self, loc_or_ele): + """从页面上删除一个元素 + :param loc_or_ele: 元素对象或定位符 + :return: None + """ + if not loc_or_ele: + return + ele = self._ele(loc_or_ele, raise_err=False) + if ele: + self.run_cdp('DOM.removeNode', nodeId=ele._node_id) + + def get_frame(self, loc_ind_ele, timeout=None): + """获取页面中一个frame对象,可传入定位符、iframe序号、ChromiumFrame对象,序号从0开始 + :param loc_ind_ele: 定位符、iframe序号、ChromiumFrame对象 + :param timeout: 查找元素超时时间(秒) + :return: ChromiumFrame对象 + """ + if isinstance(loc_ind_ele, str): + if not is_loc(loc_ind_ele): + xpath = f'xpath://*[(name()="iframe" or name()="frame") and ' \ + f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]' + else: + xpath = loc_ind_ele + ele = self._ele(xpath, timeout=timeout) + if ele and not str(type(ele)).endswith(".ChromiumFrame'>"): + raise TypeError('该定位符不是指向frame元素。') + r = ele + + elif isinstance(loc_ind_ele, tuple): + ele = self._ele(loc_ind_ele, timeout=timeout) + if ele and not str(type(ele)).endswith(".ChromiumFrame'>"): + raise TypeError('该定位符不是指向frame元素。') + r = ele + + elif isinstance(loc_ind_ele, int): + if loc_ind_ele < 0: + raise ValueError('序号必须大于等于0。') + xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele + 1}]' + r = self._ele(xpath, timeout=timeout) + + elif str(type(loc_ind_ele)).endswith(".ChromiumFrame'>"): + r = loc_ind_ele + + else: + raise TypeError('必须传入定位符、iframe序号、id、name、ChromiumFrame对象其中之一。') + + if isinstance(r, NoneElement): + r.method = 'get_frame()' + r.args = {'loc_ind_ele': loc_ind_ele} + return r + + def get_frames(self, loc=None, timeout=None): + """获取所有符合条件的frame对象 + :param loc: 定位符,为None时返回所有 + :param timeout: 查找超时时间(秒) + :return: ChromiumFrame对象组成的列表 + """ + loc = loc or 'xpath://*[name()="iframe" or name()="frame"]' + frames = self._ele(loc, timeout=timeout, single=False, raise_err=False) + return [i for i in frames if str(type(i)).endswith(".ChromiumFrame'>")] + + def get_session_storage(self, item=None): + """获取sessionStorage信息,不设置item则获取全部 + :param item: 要获取的项,不设置则返回全部 + :return: sessionStorage一个或所有项内容 + """ + if item: + js = f'sessionStorage.getItem("{item}");' + return self.run_js_loaded(js, as_expr=True) + else: + js = ''' + var dp_ls_len = sessionStorage.length; + var dp_ls_arr = new Array(); + for(var i = 0; i < dp_ls_len; i++) { + var getKey = sessionStorage.key(i); + var getVal = sessionStorage.getItem(getKey); + dp_ls_arr[i] = {'key': getKey, 'val': getVal} + } + return dp_ls_arr; + ''' + return {i['key']: i['val'] for i in self.run_js_loaded(js)} + + def get_local_storage(self, item=None): + """获取localStorage信息,不设置item则获取全部 + :param item: 要获取的项目,不设置则返回全部 + :return: localStorage一个或所有项内容 + """ + if item: + js = f'localStorage.getItem("{item}");' + return self.run_js_loaded(js, as_expr=True) + else: + js = ''' + var dp_ls_len = localStorage.length; + var dp_ls_arr = new Array(); + for(var i = 0; i < dp_ls_len; i++) { + var getKey = localStorage.key(i); + var getVal = localStorage.getItem(getKey); + dp_ls_arr[i] = {'key': getKey, 'val': getVal} + } + return dp_ls_arr; + ''' + return {i['key']: i['val'] for i in self.run_js_loaded(js)} + + def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None, + full_page=False, left_top=None, right_bottom=None): + """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 + :param path: 保存路径 + :param name: 完整文件名,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 + :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 + :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 + :param left_top: 截取范围左上角坐标 + :param right_bottom: 截取范围右下角角坐标 + :return: 图片完整路径或字节文本 + """ + return self._get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64, + full_page=full_page, left_top=left_top, right_bottom=right_bottom) + + def add_init_js(self, script): + """添加初始化脚本,在页面加载任何脚本前执行 + :param script: js文本 + :return: 添加的脚本的id + """ + js_id = self.run_cdp('Page.addScriptToEvaluateOnNewDocument', source=script, + includeCommandLineAPI=True)['identifier'] + self._init_jss.append(js_id) + return js_id + + def remove_init_js(self, script_id=None): + """删除初始化脚本,js_id传入None时删除所有 + :param script_id: 脚本的id + :return: None + """ + if script_id is None: + for js_id in self._init_jss: + self.run_cdp('Page.removeScriptToEvaluateOnNewDocument', identifier=js_id) + self._init_jss.clear() + + elif script_id in self._init_jss: + self.run_cdp('Page.removeScriptToEvaluateOnNewDocument', identifier=script_id) + self._init_jss.remove(script_id) + + def clear_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): + """清除缓存,可选要清除的项 + :param session_storage: 是否清除sessionStorage + :param local_storage: 是否清除localStorage + :param cache: 是否清除cache + :param cookies: 是否清除cookies + :return: None + """ + if session_storage or local_storage: + self.run_cdp_loaded('DOMStorage.enable') + i = self.run_cdp('Storage.getStorageKeyForFrame', frameId=self._frame_id)['storageKey'] + if session_storage: + self.run_cdp('DOMStorage.clear', storageId={'storageKey': i, 'isLocalStorage': False}) + if local_storage: + self.run_cdp('DOMStorage.clear', storageId={'storageKey': i, 'isLocalStorage': True}) + self.run_cdp_loaded('DOMStorage.disable') + + if cache: + self.run_cdp_loaded('Network.clearBrowserCache') + + if cookies: + self.run_cdp_loaded('Network.clearBrowserCookies') + + def disconnect(self): + """断开与页面的连接,不关闭页面""" + if self._driver: + self.driver.stop() + + def handle_alert(self, accept=True, send=None, timeout=None, next_one=False): + r = self._handle_alert(accept=accept, send=send, timeout=timeout, next_one=next_one) + while self._has_alert: + sleep(.1) + return r + + def _handle_alert(self, accept=True, send=None, timeout=None, next_one=False): + """处理提示框,可以自动等待提示框出现 + :param accept: True表示确认,False表示取消,其它值不会按按钮但依然返回文本值 + :param send: 处理prompt提示框时可输入文本 + :param timeout: 等待提示框出现的超时时间(秒),为None则使用self.timeout属性的值 + :param next_one: 是否处理下一个出现的提示框,为True时timeout参数无效 + :return: 提示框内容文本,未等到提示框则返回False + """ + if next_one: + self._alert.handle_next = accept + self._alert.next_text = send + return + + timeout = self.timeout if timeout is None else timeout + timeout = .1 if timeout <= 0 else timeout + end_time = perf_counter() + timeout + while not self._alert.activated and perf_counter() < end_time: + sleep(.1) + if not self._alert.activated: + return False + + res_text = self._alert.text + d = {'accept': accept, '_timeout': 0} + if self._alert.type == 'prompt' and send is not None: + d['promptText'] = send + self.driver.run('Page.handleJavaScriptDialog', **d) + return res_text + + def _on_alert_open(self, **kwargs): + """alert出现时触发的方法""" + self._alert.activated = True + self._alert.text = kwargs['message'] + self._alert.type = kwargs['message'] + self._alert.defaultPrompt = kwargs.get('defaultPrompt', None) + self._alert.response_accept = None + self._alert.response_text = None + self._has_alert = True + + if self._alert.auto is not None: + self._handle_alert(self._alert.auto) + elif self._alert.handle_next is not None: + self._handle_alert(self._alert.handle_next, self._alert.next_text) + self._alert.handle_next = None + + def _on_alert_close(self, **kwargs): + """alert关闭时触发的方法""" + self._alert.activated = False + self._alert.text = None + self._alert.type = None + self._alert.defaultPrompt = None + self._alert.response_accept = kwargs.get('result') + self._alert.response_text = kwargs['userInput'] + self._has_alert = False + + def _wait_loaded(self, timeout=None): + """等待页面加载完成,超时触发停止加载 + :param timeout: 超时时间(秒) + :return: 是否成功,超时返回False + """ + timeout = timeout if timeout is not None else self.timeouts.page_load + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._ready_state == 'complete': + return True + elif self._load_mode == 'eager' and self._ready_state in ('interactive', + 'complete') and not self._is_loading: + return True + + sleep(.1) + + try: + self.stop_loading() + except CDPError: + pass + return False + + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') or 'chrome://newtab/' + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + + def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None): + """尝试连接,重试若干次 + :param to_url: 要访问的url + :param times: 重试次数 + :param interval: 重试间隔(秒) + :param show_errmsg: 是否抛出异常 + :param timeout: 连接超时时间(秒) + :return: 是否成功,返回None表示不确定 + """ + err = None + self._is_loading = True + timeout = timeout if timeout is not None else self.timeouts.page_load + for t in range(times + 1): + err = None + end_time = perf_counter() + timeout + try: + result = self.run_cdp('Page.navigate', frameId=self._frame_id, url=to_url, _timeout=timeout) + if 'errorText' in result: + err = ConnectionError(result['errorText']) + except TimeoutError: + err = TimeoutError(f'页面连接超时(等待{timeout}秒)。') + + if err: + if t < times: + sleep(interval) + if show_errmsg: + print(f'重试{t + 1} {to_url}') + end_time1 = end_time - perf_counter() + while self._ready_state not in ('loading', 'complete') and perf_counter() < end_time1: # 等待出错信息显示 + sleep(.1) + self.stop_loading() + continue + + if self._load_mode == 'none': + return True + + yu = end_time - perf_counter() + ok = self._wait_loaded(1 if yu <= 0 else yu) + if not ok: + err = TimeoutError(f'页面连接超时(等待{timeout}秒)。') + if t < times: + sleep(interval) + if show_errmsg: + print(f'重试{t + 1} {to_url}') + continue + + if not err: + break + + if err: + if show_errmsg: + raise err if err is not None else ConnectionError('连接异常。') + return False + + return True + + def _get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None, + full_page=False, left_top=None, right_bottom=None, ele=None): + """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 + :param path: 保存路径 + :param name: 完整文件名,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 + :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 + :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 + :param left_top: 截取范围左上角坐标 + :param right_bottom: 截取范围右下角角坐标 + :param ele: 为异域iframe内元素截图设置 + :return: 图片完整路径或字节文本 + """ + if as_bytes: + if as_bytes is True: + pic_type = 'png' + else: + if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes + + elif as_base64: + if as_base64 is True: + pic_type = 'png' + else: + if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 + + else: + path = str(path).rstrip('\\/') if path else '.' + if not path.endswith(('.jpg', '.jpeg', '.png', '.webp')): + if not name: + name = f'{self.title}.jpg' + elif not name.endswith(('.jpg', '.jpeg', '.png', '.webp')): + name = f'{name}.jpg' + path = f'{path}{sep}{name}' + + path = Path(path) + pic_type = path.suffix.lower() + pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] + + width, height = self.rect.size + if full_page: + vp = {'x': 0, 'y': 0, 'width': width, 'height': height, 'scale': 1} + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, + captureBeyondViewport=True, clip=vp)['data'] + else: + if left_top and right_bottom: + x, y = left_top + w = right_bottom[0] - x + h = right_bottom[1] - y + + v = not (location_in_viewport(self, x, y) and + location_in_viewport(self, right_bottom[0], right_bottom[1])) + if v and (self.run_js('return document.body.scrollHeight > window.innerHeight;') and + not self.run_js('return document.body.scrollWidth > window.innerWidth;')): + x += 10 + + vp = {'x': x, 'y': y, 'width': w, 'height': h, 'scale': 1} + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, + captureBeyondViewport=v, clip=vp)['data'] + + else: + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type)['data'] + + if as_base64: + return png + + from base64 import b64decode + png = b64decode(png) + + if as_bytes: + return png + + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, 'wb') as f: + f.write(png) + return str(path.absolute()) + + # --------------------即将废弃--------------------- + + @property + def page_load_strategy(self): + return self._load_mode + + @property + def is_alive(self): + return self.states.is_alive + + @property + def is_loading(self): + """返回页面是否正在加载状态""" + return self._is_loading + + @property + def ready_state(self): + return self._ready_state + + @property + def size(self): + """返回页面总宽高,格式:(宽, 高)""" + return self.rect.size + + +class Timeout(object): + """用于保存d模式timeout信息的类""" + + def __init__(self, page, base=None, page_load=None, script=None, implicit=None): + """ + :param page: ChromiumBase页面 + :param base: 默认超时时间 + :param page_load: 页面加载超时时间 + :param script: js超时时间 + """ + self._page = page + base = base if base is not None else implicit + self.base = 10 if base is None else base + self.page_load = 30 if page_load is None else page_load + self.script = 30 if script is None else script + + def __repr__(self): + return str({'base': self.base, 'page_load': self.page_load, 'script': self.script}) + + +class Alert(object): + """用于保存alert信息的类""" + + def __init__(self): + self.activated = False + self.text = None + self.type = None + self.defaultPrompt = None + self.response_accept = None + self.response_text = None + self.handle_next = None + self.next_text = None + self.auto = None + + +def close_privacy_dialog(page, tid): + """关闭隐私声明弹窗 + :param page: ChromiumBase对象 + :param tid: tab id + :return: None + """ + try: + driver = page.browser._get_driver(tid) + driver.run('Runtime.enable') + driver.run('DOM.enable') + driver.run('DOM.getDocument') + sid = driver.run('DOM.performSearch', query='//*[name()="privacy-sandbox-notice-dialog-app"]', + includeUserAgentShadowDOM=True)['searchId'] + r = driver.run('DOM.getSearchResults', searchId=sid, fromIndex=0, toIndex=1)['nodeIds'][0] + end_time = perf_counter() + 3 + while perf_counter() < end_time: + try: + r = driver.run('DOM.describeNode', nodeId=r)['node']['shadowRoots'][0]['backendNodeId'] + break + except KeyError: + pass + driver.run('DOM.discardSearchResults', searchId=sid) + r = driver.run('DOM.resolveNode', backendNodeId=r)['object']['objectId'] + r = driver.run('Runtime.callFunctionOn', objectId=r, + functionDeclaration='function(){return this.getElementById("ackButton");}')['result']['objectId'] + driver.run('Runtime.callFunctionOn', objectId=r, functionDeclaration='function(){return this.click();}') + driver.close() + + except: + pass + + +def get_mhtml(page, path=None, name=None): + """把当前页面保存为mhtml文件,如果path和name参数都为None,只返回mhtml文本 + :param page: 要保存的页面对象 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :return: mhtml文本 + """ + r = page.run_cdp('Page.captureSnapshot')['data'] + if path is None and name is None: + return r + path = path or '.' + Path(path).mkdir(parents=True, exist_ok=True) + name = make_valid_name(name or page.title) + with open(f'{path}{sep}{name}.mhtml', 'w', encoding='utf-8') as f: + f.write(r) + return r + + +def get_pdf(page, path=None, name=None, kwargs=None): + """把当前页面保存为pdf文件,如果path和name参数都为None,只返回字节 + :param page: 要保存的页面对象 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :param kwargs: pdf生成参数 + :return: pdf文本 + """ + if not kwargs: + kwargs = {} + kwargs['transferMode'] = 'ReturnAsBase64' + if 'printBackground' not in kwargs: + kwargs['printBackground'] = True + try: + r = page.run_cdp('Page.printToPDF', **kwargs)['data'] + except: + raise RuntimeError('保存失败,可能浏览器版本不支持。') + from base64 import b64decode + r = b64decode(r) + if path is None and name is None: + return r + path = path or '.' + Path(path).mkdir(parents=True, exist_ok=True) + name = make_valid_name(name or page.title) + with open(f'{path}{sep}{name}.pdf', 'wb') as f: + f.write(r) + return r diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi new file mode 100644 index 0000000..d4993b1 --- /dev/null +++ b/DrissionPage/_pages/chromium_base.pyi @@ -0,0 +1,282 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union, Tuple, List, Any, Optional, Literal + +from .chromium_tab import ChromiumTab +from .._base.base import BasePage +from .._base.browser import Browser +from .._base.driver import Driver +from .._elements.chromium_element import ChromiumElement +from .._elements.none_element import NoneElement +from .._elements.session_element import SessionElement +from .._pages.chromium_frame import ChromiumFrame +from .._pages.chromium_page import ChromiumPage +from .._units.actions import Actions +from .._units.listener import Listener +from .._units.rect import TabRect +from .._units.screencast import Screencast +from .._units.scroller import Scroller, PageScroller +from .._units.setter import ChromiumBaseSetter +from .._units.states import PageStates +from .._units.waiter import BaseWaiter + +PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True] + + +class ChromiumBase(BasePage): + def __init__(self, + address: Union[str, int], + tab_id: str = None, + timeout: float = None): + self._browser: Browser = ... + self._page: ChromiumPage = ... + self.address: str = ... + self._driver: Driver = ... + self._frame_id: str = ... + self._is_reading: bool = ... + self._is_timeout: bool = ... + self._timeouts: Timeout = ... + self._first_run: bool = ... + self._is_loading: bool = ... + self._load_mode: str = ... + self._scroll: Scroller = ... + self._url: str = ... + self._root_id: str = ... + self._upload_list: list = ... + self._wait: BaseWaiter = ... + self._set: ChromiumBaseSetter = ... + self._screencast: Screencast = ... + self._actions: Actions = ... + self._listener: Listener = ... + self._states: PageStates = ... + self._alert: Alert = ... + self._has_alert: bool = ... + self._doc_got: bool = ... + self._load_end_time: float = ... + self._init_jss: list = ... + self._ready_state: Optional[str] = ... + self._rect: TabRect = ... + + def _connect_browser(self, tab_id: str = None) -> None: ... + + def _driver_init(self, tab_id: str) -> None: ... + + def _get_document(self, timeout: float = 10) -> bool: ... + + def _wait_loaded(self, timeout: float = None) -> bool: ... + + def _onFrameDetached(self, **kwargs) -> None: ... + + def _onFrameAttached(self, **kwargs) -> None: ... + + def _onFrameStartedLoading(self, **kwargs): ... + + def _onFrameNavigated(self, **kwargs): ... + + def _onDomContentEventFired(self, **kwargs): ... + + def _onLoadEventFired(self, **kwargs): ... + + def _onFrameStoppedLoading(self, **kwargs): ... + + def _onFileChooserOpened(self, **kwargs): ... + + def _wait_to_stop(self): ... + + def _d_set_start_options(self, address) -> None: ... + + def _d_set_runtime_settings(self) -> None: ... + + def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + @property + def _js_ready_state(self) -> str: ... + + @property + def browser(self) -> Browser: ... + + @property + def title(self) -> str: ... + + @property + def driver(self) -> Driver: ... + + @property + def url(self) -> str: ... + + @property + def _browser_url(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def json(self) -> Union[dict, None]: ... + + @property + def _target_id(self) -> str: ... + + @property + def tab_id(self) -> str: ... + + @property + def active_ele(self) -> ChromiumElement: ... + + @property + def load_mode(self) -> str: ... + + @property + def user_agent(self) -> str: ... + + @property + def scroll(self) -> PageScroller: ... + + @property + def rect(self) -> TabRect: ... + + @property + def timeouts(self) -> Timeout: ... + + @property + def upload_list(self) -> list: ... + + @property + def wait(self) -> BaseWaiter: ... + + @property + def set(self) -> ChromiumBaseSetter: ... + + @property + def screencast(self) -> Screencast: ... + + @property + def actions(self) -> Actions: ... + + @property + def listen(self) -> Listener: ... + + @property + def states(self) -> PageStates: ... + + def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ... + + def run_js_loaded(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ... + + def run_async_js(self, script: str, *args, as_expr: bool = False) -> None: ... + + def get(self, url: str, show_errmsg: bool = False, retry: int = None, + interval: float = None, timeout: float = None) -> Union[None, bool]: ... + + def get_cookies(self, as_dict: bool = False, all_domains: bool = False, + all_info: bool = False) -> Union[list, dict]: ... + + def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + def eles(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[ChromiumElement]: ... + + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ + -> Union[SessionElement, NoneElement]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... + + def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ + -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... + + def refresh(self, ignore_cache: bool = False) -> None: ... + + def forward(self, steps: int = 1) -> None: ... + + def back(self, steps: int = 1) -> None: ... + + def _forward_or_back(self, steps: int) -> None: ... + + def stop_loading(self) -> None: ... + + def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None: ... + + def get_frame(self, loc_ind_ele: Union[str, int, tuple, ChromiumFrame], timeout: float = None) -> ChromiumFrame: ... + + def get_frames(self, loc: Union[str, tuple] = None, timeout: float = None) -> List[ChromiumFrame]: ... + + def run_cdp(self, cmd: str, **cmd_args) -> dict: ... + + def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict: ... + + def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ... + + def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ... + + def add_init_js(self, script: str) -> str: ... + + def remove_init_js(self, script_id: str = None) -> None: ... + + def get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None, + as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[int, int] = None, + right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ... + + def _get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None, + as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[float, float] = None, + right_bottom: Tuple[float, float] = None, ele: ChromiumElement = None) -> Union[str, bytes]: ... + + def clear_cache(self, session_storage: bool = True, local_storage: bool = True, cache: bool = True, + cookies: bool = True) -> None: ... + + def disconnect(self) -> None: ... + + def handle_alert(self, accept: bool = True, send: str = None, timeout: float = None, + next_one: bool = False) -> Union[str, False]: ... + + def _handle_alert(self, accept: bool = True, send: str = None, timeout: float = None, + next_one: bool = False) -> Union[str, False]: ... + + def _on_alert_close(self, **kwargs): ... + + def _on_alert_open(self, **kwargs): ... + + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + + def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False, + timeout: float = None) -> Union[bool, None]: ... + + +class Timeout(object): + + def __init__(self, page: ChromiumBase, base=None, page_load=None, script=None): + self._page: ChromiumBase = ... + self.base: float = ... + self.page_load: float = ... + self.script: float = ... + + +class Alert(object): + + def __init__(self): + self.activated: bool = ... + self.text: str = ... + self.type: str = ... + self.defaultPrompt: str = ... + self.response_accept: str = ... + self.response_text: str = ... + self.handle_next: Optional[bool] = ... + self.next_text: str = ... + self.auto: Optional[bool] = ... + + +def get_mhtml(page: Union[ChromiumPage, ChromiumTab], + path: Union[str, Path] = None, + name: str = None) -> str: ... + + +def get_pdf(page: Union[ChromiumPage, ChromiumTab], + path: Union[str, Path] = None, + name: str = None, kwargs: dict=None) -> bytes: ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py similarity index 52% rename from DrissionPage/chromium_frame.py rename to DrissionPage/_pages/chromium_frame.py index b6b401c..a7aac92 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -1,67 +1,96 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from re import search -from threading import Thread +from copy import copy +from re import search, findall, DOTALL from time import sleep, perf_counter -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter -from .chromium_element import ChromiumElement, ChromiumElementWaiter -from .commons.tools import get_usable_path -from .errors import ContextLossError +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._units.listener import FrameListener +from .._units.rect import FrameRect +from .._units.scroller import FrameScroller +from .._units.setter import ChromiumFrameSetter +from .._units.states import FrameStates +from .._units.waiter import FrameWaiter +from ..errors import ContextLostError, ElementLostError, PageDisconnectedError, JavaScriptError class ChromiumFrame(ChromiumBase): - def __init__(self, page, ele): - self.page = page + def __init__(self, page, ele, info=None): + """ + :param page: frame所在的页面对象 + :param ele: frame所在元素 + :param info: frame所在元素信息 + """ + page_type = str(type(page)) + if 'ChromiumPage' in page_type or 'WebPage' in page_type: + self._page = self._target_page = self.tab = page + self._browser = page.browser + else: # Tab、Frame + self._page = page.page + self._browser = self._page.browser + self._target_page = page + self.tab = page.tab if 'ChromiumFrame' in page_type else page + self.address = page.address - node = page.run_cdp('DOM.describeNode', backendNodeId=ele.ids.backend_id)['node'] - self.frame_id = node['frameId'] - self._backend_id = ele.ids.backend_id + self._tab_id = page.tab_id + self._backend_id = ele._backend_id self._frame_ele = ele self._states = None + self._reloading = False + node = info['node'] if not info else page.run_cdp('DOM.describeNode', backendNodeId=ele._backend_id)['node'] + self._frame_id = node['frameId'] if self._is_inner_frame(): self._is_diff_domain = False - self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) + self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) super().__init__(page.address, page.tab_id, page.timeout) else: self._is_diff_domain = True - super().__init__(page.address, self.frame_id, page.timeout) + delattr(self, '_frame_id') + super().__init__(page.address, node['frameId'], page.timeout) obj_id = super().run_js('document;', as_expr=True)['objectId'] self.doc_ele = ChromiumElement(self, obj_id=obj_id) - self._ids = ChromiumFrameIds(self) - end_time = perf_counter() + 2 - while perf_counter() < end_time and self.url == 'about:blank': + self._rect = None + end_time = perf_counter() + 5 + while perf_counter() < end_time: + if self.url not in (None, 'about:blank'): + break sleep(.1) - t = Thread(target=self._check_alive) - t.daemon = True - t.start() - def __call__(self, loc_or_str, timeout=None): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ return self.ele(loc_or_str, timeout) + def __eq__(self, other): + return self._frame_id == getattr(other, '_frame_id', None) + def __repr__(self): - attrs = self.frame_ele.attrs + attrs = self._frame_ele.attrs attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>' - def _runtime_settings(self): + def _d_set_runtime_settings(self): """重写设置浏览器运行参数方法""" - self._timeouts = self.page.timeouts - self._page_load_strategy = self.page.page_load_strategy + if not hasattr(self, '_timeouts'): + self._timeouts = copy(self._target_page.timeouts) + self.retry_times = self._target_page.retry_times + self.retry_interval = self._target_page.retry_interval + self._download_path = self._target_page.download_path + self._load_mode = self._target_page._load_mode if not self._is_diff_domain else 'normal' - def _driver_init(self, tab_id): + def _driver_init(self, tab_id, is_init=True): """避免出现服务器500错误 :param tab_id: 要跳转到的标签页id :return: None @@ -69,112 +98,174 @@ class ChromiumFrame(ChromiumBase): try: super()._driver_init(tab_id) except: - self._control_session.get(f'http://{self.address}/json') + self.browser.driver.get(f'http://{self.address}/json') super()._driver_init(tab_id) + self._driver.set_callback('Inspector.detached', self._onInspectorDetached, immediate=True) + self._driver.set_callback('Page.frameDetached', None) + self._driver.set_callback('Page.frameDetached', self._onFrameDetached, immediate=True) def _reload(self): """重新获取document""" - debug = self._debug - if debug: - print('reload') + self._is_loading = True + # d_debug = self.driver._debug + self._reloading = True + self._doc_got = False - self._frame_ele = ChromiumElement(self.page, backend_id=self._backend_id) - node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node'] + self._driver.stop() + try: + self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id) + end_time = perf_counter() + 2 + while perf_counter() < end_time: + node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node'] + if 'frameId' in node: + break + + else: + return + + except (ElementLostError, PageDisconnectedError): + return if self._is_inner_frame(): self._is_diff_domain = False - self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) - super().__init__(self.address, self.page.tab_id, self.page.timeout) - self._debug = debug + self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) + self._frame_id = node['frameId'] + if self._listener: + self._listener._to_target(self._target_page.tab_id, self.address, self) + super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout) + # self.driver._debug = d_debug + else: self._is_diff_domain = True - self._tab_obj.stop() - super().__init__(self.address, self.frame_id, self.page.timeout) - obj_id = super().run_js('document;', as_expr=True)['objectId'] - self.doc_ele = ChromiumElement(self, obj_id=obj_id) - self._debug = debug - - def _check_ok(self): - """用于应付同域异域之间跳转导致元素丢失问题""" - if self._tab_obj._stopped.is_set(): - self._reload() - - try: - self.page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id) - except Exception: - self._reload() - # sleep(2) - - def _get_new_document(self): - """刷新cdp使用的document数据""" - if not self._is_reading: - self._is_reading = True - - if self._debug: - print('---获取document') - - end_time = perf_counter() + 3 - while self.is_alive and perf_counter() < end_time: - try: - if self._is_diff_domain is False: - node = self.page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] - self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) - - else: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self.doc_ele = ChromiumElement(self, backend_id=b_id) - - break - - except Exception: - sleep(.1) - + if self._listener: + self._listener._to_target(node['frameId'], self.address, self) + end_time = perf_counter() + self.timeouts.page_load + super().__init__(self.address, node['frameId'], self._target_page.timeout) + timeout = end_time - perf_counter() + if timeout <= 0: + timeout = .5 + self._wait_loaded(timeout) + # while perf_counter() < end_time: + # try: + # obj_id = super().run_js('document;', as_expr=True)['objectId'] + # self.doc_ele = ChromiumElement(self, obj_id=obj_id) + # break + # except Exception as e: + # sleep(.1) + # if self._debug: + # print(f'获取doc失败,重试 {e}') # else: - # raise RuntimeError('获取document失败。') + # raise GetDocumentError - if self._debug: - print('---获取document结束') + # self.driver._debug = d_debug - self._is_loading = False + self._is_loading = False + self._reloading = False + + def _get_document(self, timeout=10): + """刷新cdp使用的document数据 + :param timeout: 超时时间(秒) + :return: 是否获取成功 + """ + if self._is_reading: + return + + self._is_reading = True + try: + if self._is_diff_domain is False: + node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] + self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) + + else: + timeout = timeout if timeout >= .5 else .5 + b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId'] + self.doc_ele = ChromiumElement(self, backend_id=b_id) + + self._root_id = self.doc_ele._obj_id + + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id + return True + + except: + return False + + finally: + if not self._reloading: # 阻止reload时标识 + self._is_loading = False self._is_reading = False - def _onFrameNavigated(self, **kwargs): - """页面跳转时触发""" - if kwargs['frame']['id'] == self.frame_id and self._first_run is False and self._is_loading: - self._is_loading = True + def _onInspectorDetached(self, **kwargs): + """异域转同域或退出""" + self._reload() - if self._debug: - print('navigated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) + def _onFrameDetached(self, **kwargs): + """同域变异域""" + self.browser._frames.pop(kwargs['frameId'], None) + if kwargs['frameId'] == self._frame_id: + self._reload() - def _onLoadEventFired(self, **kwargs): - """在页面刷新、变化后重新读取页面内容""" - # 用于覆盖父类方法,不能删 - self._get_new_document() - - if self._debug: - print('loadEventFired') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired')) - - def _onFrameStartedLoading(self, **kwargs): - """页面开始加载时触发""" - if kwargs['frameId'] == self.frame_id: - self._is_loading = True - if self._debug: - print('页面开始加载 FrameStartedLoading') - - def _onFrameStoppedLoading(self, **kwargs): - """页面加载完成后触发""" - if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading: - if self._debug: - print('页面停止加载 FrameStoppedLoading') - self._get_new_document() + # ----------挂件---------- @property - def ids(self): - return self._ids + def scroll(self): + """返回用于滚动的对象""" + self.wait.load_complete() + if self._scroll is None: + self._scroll = FrameScroller(self) + return self._scroll + + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = ChromiumFrameSetter(self) + return self._set + + @property + def states(self): + """返回用于获取状态信息的对象""" + if self._states is None: + self._states = FrameStates(self) + return self._states + + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = FrameWaiter(self) + return self._wait + + @property + def rect(self): + """返回获取坐标和大小的对象""" + if self._rect is None: + self._rect = FrameRect(self) + return self._rect + + @property + def listen(self): + """返回用于聆听数据包的对象""" + if self._listener is None: + self._listener = FrameListener(self) + return self._listener + + # ----------挂件---------- + + @property + def _obj_id(self): + """返回frame元素的object id""" + return self.frame_ele._obj_id + + @property + def _node_id(self): + """返回cdp中的node id""" + return self.frame_ele._node_id + + @property + def page(self): + return self._page @property def frame_ele(self): @@ -184,150 +275,88 @@ class ChromiumFrame(ChromiumBase): @property def tag(self): """返回元素tag""" - self._check_ok() return self.frame_ele.tag @property def url(self): """返回frame当前访问的url""" - self._check_ok() - return self.doc_ele.run_js('return this.location.href;') + try: + return self.doc_ele.run_js('return this.location.href;') + except JavaScriptError: + return None @property def html(self): """返回元素outerHTML文本""" - self._check_ok() tag = self.tag - out_html = self.page.run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele.ids.backend_id)['outerHTML'] - sign = search(rf'<{tag}.*?>', out_html).group(0) + out_html = self._target_page.run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML'] + sign = search(rf'<{tag}.*?>', out_html, DOTALL).group(0) return f'{sign}{self.inner_html}</{tag}>' @property def inner_html(self): """返回元素innerHTML文本""" - self._check_ok() return self.doc_ele.run_js('return this.documentElement.outerHTML;') @property def title(self): """返回页面title""" - self._check_ok() r = self._ele('t:title', raise_err=False) return r.text if r else None @property def cookies(self): """以dict格式返回cookies""" - self._check_ok() return super().cookies if self._is_diff_domain else self.doc_ele.run_js('return this.cookie;') @property def attrs(self): """返回frame元素所有attribute属性""" - self._check_ok() return self.frame_ele.attrs - @property - def frame_size(self): - """返回frame内页面尺寸,格式:(长, 高)""" - self._check_ok() - w = self.doc_ele.run_js('return this.body.scrollWidth') - h = self.doc_ele.run_js('return this.body.scrollHeight') - return w, h - - @property - def size(self): - """返回frame元素大小""" - self._check_ok() - return self.frame_ele.size - @property def active_ele(self): """返回当前焦点所在元素""" - self._check_ok() return self.doc_ele.run_js('return this.activeElement;') - @property - def location(self): - """返回frame元素左上角的绝对坐标""" - self._check_ok() - return self.frame_ele.location - - @property - def locations(self): - """返回用于获取元素位置的对象""" - return self.frame_ele.locations - @property def xpath(self): """返回frame的xpath绝对路径""" - self._check_ok() return self.frame_ele.xpath @property def css_path(self): """返回frame的css selector绝对路径""" - self._check_ok() return self.frame_ele.css_path @property - def ready_state(self): + def tab_id(self): + """返回frame所在tab的id""" + return self._tab_id + + @property + def download_path(self): + return self._download_path + + @property + def _js_ready_state(self): """返回当前页面加载状态,'loading' 'interactive' 'complete'""" if self._is_diff_domain: - try: - return super().ready_state - except: - return 'complete' + return super()._js_ready_state else: - end_time = perf_counter() + 3 - while self.is_alive and perf_counter() < end_time: + try: + return self.doc_ele.run_js('return this.readyState;') + except ContextLostError: try: - return self.doc_ele.run_js('return this.readyState;') - except ContextLossError: - try: - node = self.run_cdp('DOM.describeNode', backendNodeId=self.frame_ele.ids.backend_id)['node'] - doc = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) - return doc.run_js('return this.readyState;') - except: - pass - - sleep(.1) - - # raise RuntimeError('获取document失败。') - - @property - def is_alive(self): - """返回是否仍可用""" - return self.states.is_alive - - @property - def scroll(self): - """返回用于等待的对象""" - return ChromiumFrameScroll(self) - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = ChromiumFrameSetter(self) - return self._set - - @property - def states(self): - """返回用于获取状态信息的对象""" - return self.frame_ele.states - - @property - def wait(self): - """返回用于等待的对象""" - if self._wait is None: - self._wait = FrameWaiter(self) - return self._wait + node = self.run_cdp('DOM.describeNode', backendNodeId=self.frame_ele._backend_id)['node'] + doc = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) + return doc.run_js('return this.readyState;') + except: + return None def refresh(self): """刷新frame页面""" - self._check_ok() self.doc_ele.run_js('this.location.reload();') def attr(self, attr): @@ -335,7 +364,6 @@ class ChromiumFrame(ChromiumBase): :param attr: 属性名 :return: 属性值文本,没有该属性返回None """ - self._check_ok() return self.frame_ele.attr(attr) def remove_attr(self, attr): @@ -343,50 +371,47 @@ class ChromiumFrame(ChromiumBase): :param attr: 属性名 :return: None """ - self._check_ok() self.frame_ele.remove_attr(attr) - def run_js(self, script, *args, as_expr=False): + def run_js(self, script, *args, as_expr=False, timeout=None): """运行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 + :param timeout: js超时时间(秒),为None则使用页面timeouts.script设置 :return: 运行的结果 """ - self._check_ok() if script.startswith('this.scrollIntoView'): - return self.frame_ele.run_js(script, *args, as_expr=as_expr) + return self.frame_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout) else: - return self.doc_ele.run_js(script, *args, as_expr=as_expr) + return self.doc_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout) - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - self._check_ok() - return self.frame_ele.parent(level_or_loc) + return self.frame_ele.parent(level_or_loc, index) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点 """ - self._check_ok() return self.frame_ele.prev(filter_loc, index, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点 """ - self._check_ok() return self.frame_ele.next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): @@ -394,11 +419,10 @@ class ChromiumFrame(ChromiumBase): 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - self._check_ok() return self.frame_ele.before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): @@ -406,31 +430,28 @@ class ChromiumFrame(ChromiumBase): 查找范围不限同级元素,而是整个DOM文档 :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - self._check_ok() return self.frame_ele.after(filter_loc, index, timeout, ele_only=ele_only) def prevs(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点文本组成的列表 """ - self._check_ok() return self.frame_ele.prevs(filter_loc, timeout, ele_only=ele_only) def nexts(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 + :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素或节点文本组成的列表 """ - self._check_ok() return self.frame_ele.nexts(filter_loc, timeout, ele_only=ele_only) def befores(self, filter_loc='', timeout=None, ele_only=True): @@ -441,7 +462,6 @@ class ChromiumFrame(ChromiumBase): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - self._check_ok() return self.frame_ele.befores(filter_loc, timeout, ele_only=ele_only) def afters(self, filter_loc='', timeout=None, ele_only=True): @@ -452,22 +472,23 @@ class ChromiumFrame(ChromiumBase): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ - self._check_ok() return self.frame_ele.afters(filter_loc, timeout, ele_only=ele_only) - def get_screenshot(self, path=None, as_bytes=None, as_base64=None): + def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None): """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 - :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' + :param path: 文件保存路径 + :param name: 完整文件名,后缀可选 'jpg','jpeg','png','webp' :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 :return: 图片完整路径或字节文本 """ - return self.frame_ele.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64) + return self.frame_ele.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64) - def _get_screenshot(self, path=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None, + def _get_screenshot(self, path=None, name=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None, full_page=False, left_top=None, right_bottom=None, ele=None): - """实现对元素截图 - :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' + """实现截图 + :param path: 文件保存路径 + :param name: 完整文件名,后缀可选 'jpg','jpeg','png','webp' :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 @@ -477,7 +498,7 @@ class ChromiumFrame(ChromiumBase): :return: 图片完整路径或字节文本 """ if not self._is_diff_domain: - return super().get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, + return super().get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64, full_page=full_page, left_top=left_top, right_bottom=right_bottom) if as_bytes: @@ -497,20 +518,25 @@ class ChromiumFrame(ChromiumBase): pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 else: - if not path: - path = f'{self.title}.jpg' - path = get_usable_path(path) - pic_type = path.suffix.lower() - if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'): - raise TypeError(f'不支持的文件格式:{pic_type}。') - pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] + path = str(path).rstrip('\\/') if path else '.' + if path and path.endswith(('.jpg', '.jpeg', '.png', '.webp')): + pic_type = path.rsplit('.', 1)[-1] + + elif name and name.endswith(('.jpg', '.jpeg', '.png', '.webp')): + pic_type = name.rsplit('.', 1)[-1] + + else: + pic_type = 'jpeg' + + if pic_type == 'jpg': + pic_type = 'jpeg' self.frame_ele.scroll.to_see(center=True) self.scroll.to_see(ele, center=True) - cx, cy = ele.locations.viewport_location - w, h = ele.size + cx, cy = ele.rect.viewport_location + w, h = ele.rect.size img_data = f'data:image/{pic_type};base64,{self.frame_ele.get_screenshot(as_base64=True)}' - body = self.page('t:body') + body = self.tab('t:body') first_child = body('c::first-child') if not isinstance(first_child, ChromiumElement): first_child = first_child.frame_ele @@ -526,13 +552,13 @@ class ChromiumFrame(ChromiumBase): top = int(self.frame_ele.style('border-top').split('px')[0]) left = int(self.frame_ele.style('border-left').split('px')[0]) - r = self.page.run_cdp('Page.getLayoutMetrics')['visualViewport'] + r = self.tab.run_cdp('Page.getLayoutMetrics')['visualViewport'] sx = r['pageX'] sy = r['pageY'] - r = self.page.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, - left_top=(cx + left + sx, cy + top + sy), - right_bottom=(cx + w + left + sx, cy + h + top + sy)) - self.page.remove_ele(new_ele) + r = self.tab.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64, + left_top=(cx + left + sx, cy + top + sy), + right_bottom=(cx + w + left + sx, cy + h + top + sy)) + self.tab.remove_ele(new_ele) return r def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): @@ -544,130 +570,39 @@ class ChromiumFrame(ChromiumBase): :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 :return: ChromiumElement对象 """ - self._check_ok() if isinstance(loc_or_ele, ChromiumElement): return loc_or_ele - self.wait.load_complete() - - return self.doc_ele._ele(loc_or_ele, timeout, raise_err=raise_err) \ - if single else self.doc_ele.eles(loc_or_ele, timeout) - - def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None): - """尝试连接,重试若干次 - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param show_errmsg: 是否抛出异常 - :param timeout: 连接超时时间 - :return: 是否成功,返回None表示不确定 - """ - self._check_ok() - err = None - timeout = timeout if timeout is not None else self.timeouts.page_load - - for t in range(times + 1): - err = None - result = self.driver.Page.navigate(url=to_url, frameId=self.frame_id) - - is_timeout = not self._wait_loaded(timeout) - sleep(.5) - self.wait.load_complete() - - if is_timeout: - err = TimeoutError('页面连接超时。') - if 'errorText' in result: - err = ConnectionError(result['errorText']) - - if not err: - break - - if t < times: - sleep(interval) - while self.ready_state not in ('complete', None): - sleep(.1) - if self._debug: - print('重试') - if show_errmsg: - print(f'重试 {to_url}') - - if err: - if show_errmsg: - raise err if err is not None else ConnectionError('连接异常。') - return False - - return True + return self.doc_ele._ele(loc_or_ele, timeout, + raise_err=raise_err) if single else self.doc_ele.eles(loc_or_ele, timeout) def _is_inner_frame(self): """返回当前frame是否同域""" - return self.frame_id in str(self.page.run_cdp('Page.getFrameTree')['frameTree']) + return self._frame_id in str(self._target_page.run_cdp('Page.getFrameTree')['frameTree']) - def _check_alive(self): - """检测iframe是否有效线程方法""" - while self.is_alive: - sleep(1) - self.driver.stop() - - -class ChromiumFrameIds(object): - def __init__(self, frame): - self._frame = frame + # ----------------即将废弃----------------- @property - def tab_id(self): - """返回当前标签页id""" - return self._frame.page.tab_id + def is_alive(self): + """返回是否仍可用""" + return self.states.is_alive @property - def backend_id(self): - """返回cdp中的node id""" - return self._frame._backend_id + def page_size(self): + """返回frame内页面尺寸,格式:(宽,, 高)""" + return self.rect.size @property - def obj_id(self): - """返回frame元素的object id""" - return self._frame.frame_ele.ids.obj_id + def size(self): + """返回frame元素大小""" + return self.frame_ele.rect.size @property - def node_id(self): - """返回cdp中的node id""" - return self._frame.frame_ele.ids.node_id + def location(self): + """返回frame元素左上角的绝对坐标""" + return self.frame_ele.rect.location - -class ChromiumFrameScroll(ChromiumPageScroll): - def __init__(self, frame): - """ - :param frame: ChromiumFrame对象 - """ - self._driver = frame.doc_ele - self.t1 = self.t2 = 'this.documentElement' - self._wait_complete = False - - def to_see(self, loc_or_ele, center=None): - """滚动页面直到元素可见 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 - :return: None - """ - ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) - self._to_see(ele, center) - - -class ChromiumFrameSetter(ChromiumBaseSetter): - def attr(self, attr, value): - """设置frame元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._page._check_ok() - self._page.frame_ele.set.attr(attr, value) - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame): - """ - :param frame: ChromiumFrame对象 - """ - super().__init__(frame) - super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) + @property + def locations(self): + """返回用于获取元素位置的对象""" + return self.frame_ele.rect diff --git a/DrissionPage/_pages/chromium_frame.pyi b/DrissionPage/_pages/chromium_frame.pyi new file mode 100644 index 0000000..74c3314 --- /dev/null +++ b/DrissionPage/_pages/chromium_frame.pyi @@ -0,0 +1,217 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union, Tuple, List, Any + +from .chromium_base import ChromiumBase +from .chromium_page import ChromiumPage +from .chromium_tab import ChromiumTab +from .web_page import WebPage +from .._elements.chromium_element import ChromiumElement +from .._elements.none_element import NoneElement +from .._units.listener import FrameListener +from .._units.rect import FrameRect +from .._units.scroller import FrameScroller +from .._units.setter import ChromiumFrameSetter +from .._units.states import FrameStates +from .._units.waiter import FrameWaiter + + +class ChromiumFrame(ChromiumBase): + + def __init__(self, + page: Union[ChromiumPage, WebPage, ChromiumTab, ChromiumFrame], + ele: ChromiumElement, + info: dict = None): + self._page: ChromiumPage = ... + self._target_page: ChromiumBase = ... + self.tab: ChromiumTab = ... + self._tab_id: str = ... + self._frame_ele: ChromiumElement = ... + self._backend_id: int = ... + self._doc_ele: ChromiumElement = ... + self._is_diff_domain: bool = ... + self.doc_ele: ChromiumElement = ... + self._states: FrameStates = ... + self._reloading: bool = ... + self._rect: FrameRect = ... + self._listener: FrameListener = ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[ChromiumElement, NoneElement]: ... + + def __eq__(self, other: ChromiumFrame) -> bool: ... + + def _check_alive(self) -> None: ... + + def __repr__(self) -> str: ... + + def _d_set_runtime_settings(self) -> None: ... + + def _driver_init(self, tab_id: str) -> None: ... + + def _reload(self) -> None: ... + + def _get_document(self, timeout: float = 10) -> bool: ... + + def _onFrameStoppedLoading(self, **kwargs): ... + + def _onInspectorDetached(self, **kwargs): ... + + @property + def page(self) -> Union[ChromiumPage, WebPage]: ... + + @property + def frame_ele(self) -> ChromiumElement: ... + + @property + def tag(self) -> str: ... + + @property + def url(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + @property + def title(self) -> str: ... + + @property + def cookies(self) -> dict: ... + + @property + def attrs(self) -> dict: ... + + @property + def rect(self) -> FrameRect: ... + + @property + def listen(self) -> FrameListener: ... + + @property + def _obj_id(self) -> str: ... + + @property + def _node_id(self) -> int: ... + + @property + def active_ele(self) -> ChromiumElement: ... + + @property + def xpath(self) -> str: ... + + @property + def css_path(self) -> str: ... + + @property + def scroll(self) -> FrameScroller: ... + + @property + def set(self) -> ChromiumFrameSetter: ... + + @property + def states(self) -> FrameStates: ... + + @property + def wait(self) -> FrameWaiter: ... + + @property + def tab_id(self) -> str: ... + + @property + def download_path(self) -> str: ... + + def refresh(self) -> None: ... + + def attr(self, attr: str) -> Union[str, None]: ... + + def remove_attr(self, attr: str) -> None: ... + + def run_js(self, + script: str, + *args, + as_expr: bool = False, + timeout: float = None) -> Any: ... + + def parent(self, + level_or_loc: Union[tuple, str, int] = 1, + index: int = 1) -> Union[ChromiumElement, NoneElement]: ... + + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = 0, + ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ... + + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = 0, + ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ... + + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ... + + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, + timeout: float = None, + ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None, + ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... + + def get_screenshot(self, + path: [str, Path] = None, + name: str = None, + as_bytes: [bool, str] = None, + as_base64: [bool, str] = None) -> Union[str, bytes]: ... + + def _get_screenshot(self, + path: [str, Path] = None, + name: str = None, + as_bytes: [bool, str] = None, + as_base64: [bool, str] = None, + full_page: bool = False, + left_top: Tuple[int, int] = None, + right_bottom: Tuple[int, int] = None, + ele: ChromiumElement = None) -> Union[str, bytes]: ... + + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None, + single: bool = True, + relative: bool = False, + raise_err: bool = None) \ + -> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ... + + def _is_inner_frame(self) -> bool: ... diff --git a/DrissionPage/_pages/chromium_page.py b/DrissionPage/_pages/chromium_page.py new file mode 100644 index 0000000..820d085 --- /dev/null +++ b/DrissionPage/_pages/chromium_page.py @@ -0,0 +1,283 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from time import sleep, perf_counter + +from requests import get + +from .._base.browser import Browser +from .._functions.browser import connect_browser +from .._configs.chromium_options import ChromiumOptions, PortFinder +from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf, Timeout +from .._pages.chromium_tab import ChromiumTab +from .._units.setter import ChromiumPageSetter +from .._units.waiter import PageWaiter +from ..errors import BrowserConnectError + + +class ChromiumPage(ChromiumBase): + """用于管理浏览器的类""" + + def __init__(self, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None): + """ + :param addr_or_opts: 浏览器地址:端口、ChromiumOptions对象或端口数字(int) + :param tab_id: 要控制的标签页id,不指定默认为激活的 + :param timeout: 超时时间(秒) + """ + addr_or_opts = addr_or_opts or addr_driver_opts + self._page = self + address = self._handle_options(addr_or_opts) + self._run_browser() + super().__init__(address, tab_id) + self.set.timeouts(base=timeout) + self._page_init() + + def _handle_options(self, addr_or_opts): + """设置浏览器启动属性 + :param addr_or_opts: 'ip:port'、ChromiumOptions、Driver + :return: 返回浏览器地址 + """ + if not addr_or_opts: + self._chromium_options = ChromiumOptions(addr_or_opts) + + elif isinstance(addr_or_opts, ChromiumOptions): + if addr_or_opts.is_auto_port: + port, path = PortFinder(addr_or_opts.tmp_path).get_port() + addr_or_opts.set_address(f'127.0.0.1:{port}') + addr_or_opts.set_user_data_path(path) + addr_or_opts.auto_port() + self._chromium_options = addr_or_opts + + elif isinstance(addr_or_opts, str): + self._chromium_options = ChromiumOptions() + self._chromium_options.set_address(addr_or_opts) + + elif isinstance(addr_or_opts, int): + self._chromium_options = ChromiumOptions() + self._chromium_options.set_local_port(addr_or_opts) + + else: + raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。') + + return self._chromium_options.address + + def _run_browser(self): + """连接浏览器""" + is_exist = connect_browser(self._chromium_options) + try: + ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'}) + if not ws: + raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') + ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + except KeyError: + raise BrowserConnectError('浏览器版本太旧,请升级。') + except: + raise BrowserConnectError('\n浏览器连接失败,如使用全局代理,须设置不代理127.0.0.1地址。') + + self._browser = Browser(self._chromium_options.address, ws, self) + if (is_exist and self._chromium_options._headless is False and + 'headless' in self._browser.run_cdp('Browser.getVersion')['userAgent'].lower()): + self._browser.quit(3) + connect_browser(self._chromium_options) + ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'}) + ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + self._browser = Browser(self._chromium_options.address, ws, self) + + def _d_set_runtime_settings(self): + """设置运行时用到的属性""" + self._timeouts = Timeout(self, page_load=self._chromium_options.timeouts['pageLoad'], + script=self._chromium_options.timeouts['script'], + base=self._chromium_options.timeouts['base']) + if self._chromium_options.timeouts['base'] is not None: + self._timeout = self._chromium_options.timeouts['base'] + self._load_mode = self._chromium_options.load_mode + self._download_path = None if self._chromium_options.download_path is None \ + else str(Path(self._chromium_options.download_path).absolute()) + self.retry_times = self._chromium_options.retry_times + self.retry_interval = self._chromium_options.retry_interval + + def _page_init(self): + """浏览器相关设置""" + self._browser.connect_to_page() + + # ----------挂件---------- + + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = ChromiumPageSetter(self) + return self._set + + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = PageWaiter(self) + return self._wait + + # ----------挂件---------- + + @property + def browser(self): + """返回用于控制浏览器cdp的driver""" + return self._browser + + @property + def tabs_count(self): + """返回标签页数量""" + return self.browser.tabs_count + + @property + def tabs(self): + """返回所有标签页id组成的列表""" + return self.browser.tabs + + @property + def latest_tab(self): + """返回最新的标签页id,最新标签页指最后创建或最后被激活的""" + return self.tabs[0] + + @property + def process_id(self): + """返回浏览器进程id""" + return self.browser.process_id + + def save(self, path=None, name=None, as_pdf=False, **kwargs): + """把当前页面保存为文件,如果path和name参数都为None,只返回文本 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :param as_pdf: 为Ture保存为pdf,否则为mhtml且忽略kwargs参数 + :param kwargs: pdf生成参数 + :return: as_pdf为True时返回bytes,否则返回文件文本 + """ + return get_pdf(self, path, name, kwargs)if as_pdf else get_mhtml(self, path, name) + + def get_tab(self, id_or_num=None): + """获取一个标签页对象 + :param id_or_num: 要获取的标签页id或序号,为None时获取当前tab,序号不是视觉排列顺序,而是激活顺序 + :return: 标签页对象 + """ + if isinstance(id_or_num, str): + return ChromiumTab(self, id_or_num) + elif isinstance(id_or_num, int): + return ChromiumTab(self, self.tabs[id_or_num]) + elif id_or_num is None: + return ChromiumTab(self, self.tab_id) + elif isinstance(id_or_num, ChromiumTab): + return id_or_num + else: + raise TypeError(f'id_or_num需传入tab id或序号,非{id_or_num}。') + + def find_tabs(self, title=None, url=None, tab_type=None, single=True): + """查找符合条件的tab,返回它们的id组成的列表 + :param title: 要匹配title的文本 + :param url: 要匹配url的文本 + :param tab_type: tab类型,可用列表输入多个 + :param single: 是否返回首个结果的id,为False返回所有信息 + :return: tab id或tab列表 + """ + return self._browser.find_tabs(title, url, tab_type, single) + + def new_tab(self, url=None, new_window=False, background=False, new_context=False): + """新建一个标签页 + :param url: 新标签页跳转到的网址 + :param new_window: 是否在新窗口打开标签页 + :param background: 是否不激活新标签页,如new_window为True则无效 + :param new_context: 是否创建新的上下文 + :return: 新标签页对象 + """ + tab = ChromiumTab(self, tab_id=self._new_tab(new_window, background, new_context)) + if url: + tab.get(url) + return tab + + def _new_tab(self, new_window=False, background=False, new_context=False): + """新建一个标签页 + :param new_window: 是否在新窗口打开标签页 + :param background: 是否不激活新标签页,如new_window为True则无效 + :param new_context: 是否创建新的上下文 + :return: 新标签页对象 + """ + bid = None + if new_context: + bid = self.browser.run_cdp('Target.createBrowserContext')['browserContextId'] + + kwargs = {'url': ''} + if new_window: + kwargs['newWindow'] = True + if background: + kwargs['background'] = True + if bid: + kwargs['browserContextId'] = bid + + return self.browser.run_cdp('Target.createTarget', **kwargs)['targetId'] + + def close(self): + """关闭Page管理的标签页""" + self.browser.close_tab(self.tab_id) + + def close_tabs(self, tabs_or_ids=None, others=False): + """关闭传入的标签页,默认关闭当前页。可传入多个 + :param tabs_or_ids: 要关闭的标签页对象或id,可传入列表或元组,为None时关闭当前页 + :param others: 是否关闭指定标签页之外的 + :return: None + """ + all_tabs = set(self.tabs) + if isinstance(tabs_or_ids, str): + tabs = {tabs_or_ids} + elif isinstance(tabs_or_ids, ChromiumTab): + tabs = {tabs_or_ids.tab_id} + elif tabs_or_ids is None: + tabs = {self.tab_id} + elif isinstance(tabs_or_ids, (list, tuple)): + tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids) + else: + raise TypeError('tabs_or_ids参数只能传入标签页对象或id。') + + if others: + tabs = all_tabs - tabs + + end_len = len(set(all_tabs) - set(tabs)) + if end_len <= 0: + self.quit() + return + + for tab in tabs: + self.browser.close_tab(tab) + sleep(.2) + end_time = perf_counter() + 3 + while self.tabs_count != end_len and perf_counter() < end_time: + sleep(.1) + + def quit(self, timeout=5, force=True): + """关闭浏览器 + :param timeout: 等待浏览器关闭超时时间(秒) + :param force: 关闭超时是否强制终止进程 + :return: None + """ + self.browser.quit(timeout, force) + + def __repr__(self): + return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>' + + # ----------即将废弃----------- + def close_other_tabs(self, tabs_or_ids=None): + """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 + :param tabs_or_ids: 要保留的标签页对象或id,可传入列表或元组,为None时保存当前页 + :return: None + """ + self.close_tabs(tabs_or_ids, True) + + +def get_rename(original, rename): + if '.' in rename: + return rename + else: + suffix = original[original.rfind('.'):] if '.' in original else '' + return f'{rename}{suffix}' diff --git a/DrissionPage/_pages/chromium_page.pyi b/DrissionPage/_pages/chromium_page.pyi new file mode 100644 index 0000000..407cd1a --- /dev/null +++ b/DrissionPage/_pages/chromium_page.pyi @@ -0,0 +1,99 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union, Tuple, List, Optional + +from .._base.browser import Browser +from .._configs.chromium_options import ChromiumOptions +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_tab import ChromiumTab +from .._units.rect import TabRect +from .._units.setter import ChromiumPageSetter +from .._units.waiter import PageWaiter + + +class ChromiumPage(ChromiumBase): + + def __init__(self, + addr_or_opts: Union[str, int, ChromiumOptions] = None, + tab_id: str = None, + timeout: float = None): + self._chromium_options: ChromiumOptions = ... + self._browser: Browser = ... + self._rect: Optional[TabRect] = ... + + def _handle_options(self, addr_or_opts: Union[str, ChromiumOptions]) -> str: ... + + def _run_browser(self) -> None: ... + + def _page_init(self) -> None: ... + + @property + def browser(self) -> Browser: ... + + @property + def tabs_count(self) -> int: ... + + @property + def tabs(self) -> List[str]: ... + + @property + def wait(self) -> PageWaiter: ... + + @property + def main_tab(self) -> str: ... + + @property + def latest_tab(self) -> str: ... + + @property + def process_id(self) -> Optional[int]: ... + + @property + def set(self) -> ChromiumPageSetter: ... + + def save(self, + path: Union[str, Path] = None, + name: str = None, + as_pdf: bool = False, + landscape: bool = ..., + displayHeaderFooter: bool = ..., + printBackground: bool = ..., + scale: float = ..., + paperWidth: float = ..., + paperHeight: float = ..., + marginTop: float = ..., + marginBottom: float = ..., + marginLeft: float = ..., + marginRight: float = ..., + pageRanges: str = ..., + headerTemplate: str = ..., + footerTemplate: str = ..., + preferCSSPageSize: bool = ..., + generateTaggedPDF: bool = ..., + generateDocumentOutline: bool = ...) -> Union[bytes, str]: ... + + def get_tab(self, tab_id: Union[str, ChromiumTab, int] = None) -> ChromiumTab: ... + + def find_tabs(self, title: str = None, url: str = None, + tab_type: Union[str, list, tuple] = None, single: bool = True) -> Union[str, List[str]]: ... + + def new_tab(self, url: str = None, new_window: bool = False, background: bool = False, + new_context: bool = False) -> ChromiumTab: ... + + def _new_tab(self, new_window: bool = False, background: bool = False, new_context: bool = False) -> str: ... + + def close(self) -> None: ... + + def close_tabs(self, tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]], + Tuple[Union[str, ChromiumTab]]] = None, others: bool = False) -> None: ... + + def quit(self, timeout: float = 5, force: bool = True) -> None: ... + + +def get_rename(original: str, rename: str) -> str: ... diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/_pages/chromium_tab.py similarity index 68% rename from DrissionPage/chromium_tab.py rename to DrissionPage/_pages/chromium_tab.py index 46716b8..cbab53e 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/_pages/chromium_tab.py @@ -1,13 +1,19 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from copy import copy -from .chromium_base import ChromiumBase, ChromiumBaseSetter -from .commons.web import set_session_cookies, set_browser_cookies -from .session_page import SessionPage, SessionPageSetter, DownloadSetter +from .._base.base import BasePage +from .._configs.session_options import SessionOptions +from .._functions.web import set_session_cookies, set_browser_cookies +from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf +from .._pages.session_page import SessionPage +from .._units.setter import TabSetter, WebPageTabSetter +from .._units.waiter import TabWaiter class ChromiumTab(ChromiumBase): @@ -18,49 +24,74 @@ class ChromiumTab(ChromiumBase): :param page: ChromiumPage对象 :param tab_id: 要控制的标签页id,不指定默认为激活的 """ - self.page = page + self._page = page + self._browser = page.browser super().__init__(page.address, tab_id, page.timeout) + self._rect = None - def _set_runtime_settings(self): + def _d_set_runtime_settings(self): """重写设置浏览器运行参数方法""" - self._timeouts = self.page.timeouts + self._timeouts = copy(self.page.timeouts) self.retry_times = self.page.retry_times self.retry_interval = self.page.retry_interval - self._page_load_strategy = self.page.page_load_strategy + self._load_mode = self.page._load_mode + self._download_path = self.page.download_path + + def close(self): + """关闭当前标签页""" + self.page.close_tabs(self.tab_id) @property - def rect(self): - """返回获取窗口坐标和大小的对象""" - return self.page.rect + def page(self): + """返回总体page对象""" + return self._page + + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = TabSetter(self) + return self._set + + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = TabWaiter(self) + return self._wait + + def save(self, path=None, name=None, as_pdf=False, **kwargs): + """把当前页面保存为文件,如果path和name参数都为None,只返回文本 + :param path: 保存路径,为None且name不为None时保存在当前路径 + :param name: 文件名,为None且path不为None时用title属性值 + :param as_pdf: 为Ture保存为pdf,否则为mhtml且忽略kwargs参数 + :param kwargs: pdf生成参数 + :return: as_pdf为True时返回bytes,否则返回文件文本 + """ + return get_pdf(self, path, name, kwargs) if as_pdf else get_mhtml(self, path, name) + + def __repr__(self): + return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>' -class WebPageTab(SessionPage, ChromiumTab): +class WebPageTab(SessionPage, ChromiumTab, BasePage): def __init__(self, page, tab_id): """ :param page: WebPage对象 :param tab_id: 要控制的标签页id """ - self.page = page - self.address = page.address - self._debug = page._debug - self._debug_recorder = page._debug_recorder self._mode = 'd' self._has_driver = True self._has_session = True - self._session = copy(page.session) - - self._response = None - self._download_set = None - self._download_path = None - self._set = None - super(SessionPage, self)._set_runtime_settings() - self._connect_browser(tab_id) + super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session), + page._headers)) + super(SessionPage, self).__init__(page=page, tab_id=tab_id) def __call__(self, loc_or_str, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :return: 子元素对象 """ if self._mode == 'd': @@ -68,6 +99,13 @@ class WebPageTab(SessionPage, ChromiumTab): elif self._mode == 's': return super().__call__(loc_or_str) + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = WebPageTabSetter(self) + return self._set + @property def url(self): """返回当前url""" @@ -79,7 +117,7 @@ class WebPageTab(SessionPage, ChromiumTab): @property def _browser_url(self): """返回浏览器当前url""" - return super(SessionPage, self).url if self._tab_obj else None + return super(SessionPage, self).url if self._driver else None @property def title(self): @@ -89,6 +127,14 @@ class WebPageTab(SessionPage, ChromiumTab): elif self._mode == 'd': return super(SessionPage, self).title + @property + def raw_data(self): + """返回页码原始数据数据""" + if self._mode == 's': + return super().raw_data + elif self._mode == 'd': + return super(SessionPage, self).html if self._has_driver else '' + @property def html(self): """返回页面html文本""" @@ -143,7 +189,7 @@ class WebPageTab(SessionPage, ChromiumTab): @property def timeout(self): """返回通用timeout设置""" - return self.timeouts.implicit + return self.timeouts.base @timeout.setter def timeout(self, second): @@ -151,34 +197,15 @@ class WebPageTab(SessionPage, ChromiumTab): :param second: 秒数 :return: None """ - self.set.timeouts(implicit=second) - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = WebPageTabSetter(self) - return self._set - - @property - def download_set(self): - """返回下载设置对象""" - if self._download_set is None: - self._download_set = WebPageTabDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit + self.set.timeouts(base=second) def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url :param url: 目标url :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间(秒) + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 + :param timeout: 连接超时时间(秒),为None时使用页面对象timeouts.page_load属性值 :param kwargs: 连接参数,s模式专用 :return: url是否可用,d模式返回None时表示不确定 """ @@ -189,24 +216,25 @@ class WebPageTab(SessionPage, ChromiumTab): timeout = self.timeouts.page_load if self._has_driver else self.timeout return super().get(url, show_errmsg, retry, interval, timeout, **kwargs) - def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): """用post方式跳转到url,会切换到s模式 :param url: 目标url - :param data: post方式时提交的数据 :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 :param kwargs: 连接参数 - :return: url是否可用 + :return: s模式时返回url是否可用,d模式时返回获取到的Response对象 """ if self.mode == 'd': self.cookies_to_session() - return super().post(url, data, show_errmsg, retry, interval, **kwargs) + super().post(url, show_errmsg, retry, interval, **kwargs) + return self.response + return super().post(url, show_errmsg, retry, interval, **kwargs) def ele(self, loc_or_ele, timeout=None): """返回第一个符合条件的元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': @@ -217,7 +245,7 @@ class WebPageTab(SessionPage, ChromiumTab): def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本组成的列表 """ if self._mode == 's': @@ -261,8 +289,8 @@ class WebPageTab(SessionPage, ChromiumTab): # s模式转d模式 if self._mode == 'd': - if self._tab_obj is None: - self._connect_browser(self.page._driver_options) + if self._driver is None: + self._connect_browser(self.page._chromium_options) self._url = None if not self._has_driver else super(SessionPage, self).url self._has_driver = True @@ -289,7 +317,7 @@ class WebPageTab(SessionPage, ChromiumTab): self.get(url) def cookies_to_session(self, copy_user_agent=True): - """把driver对象的cookies复制到session对象 + """把浏览器的cookies复制到session对象 :param copy_user_agent: 是否复制ua信息 :return: None """ @@ -297,8 +325,8 @@ class WebPageTab(SessionPage, ChromiumTab): return if copy_user_agent: - selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": selenium_user_agent}) + user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self._headers.update({"User-Agent": user_agent}) set_session_cookies(self.session, super(SessionPage, self).get_cookies()) @@ -306,9 +334,6 @@ class WebPageTab(SessionPage, ChromiumTab): """把session对象的cookies复制到浏览器""" if not self._has_driver: return - - # set_browser_cookies(self, super().get_cookies(as_dict=True)) - # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -323,10 +348,17 @@ class WebPageTab(SessionPage, ChromiumTab): elif self._mode == 'd': return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) + def close(self): + """关闭当前标签页""" + self.page.close_tabs(self.tab_id) + self._session.close() + if self._response is not None: + self._response.close() + def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,d模式专用 + :param timeout: 查找元素超时时间(秒),d模式专用 :param single: True则返回第一个,False则返回全部 :param relative: WebPage用的表示是否相对定位的参数 :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 @@ -338,53 +370,5 @@ class WebPageTab(SessionPage, ChromiumTab): return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, relative=relative) - -class WebPageTabSetter(ChromiumBaseSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumBaseSetter(self._page) - - def cookies(self, cookies): - """添加cookies信息到浏览器或session对象 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page._has_session: - self._session_setter.headers(headers) - if self._page._has_driver: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page._has_session: - self._session_setter.user_agent(ua) - if self._page._has_driver: - self._chromium_setter.user_agent(ua, platform) - - -class WebPageTabDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page): - super().__init__(page) - self._session = page.session - - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - if self._page.mode == 'd': - ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self._page.session.headers.update({"User-Agent": ua}) - set_session_cookies(self._page.session, self._page.get_cookies(as_dict=False, all_domains=False)) - return self.DownloadKit + def __repr__(self): + return f'<WebPageTab browser_id={self.browser.id} tab_id={self.tab_id}>' diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/_pages/chromium_tab.pyi similarity index 63% rename from DrissionPage/chromium_tab.pyi rename to DrissionPage/_pages/chromium_tab.pyi index fc1b132..de61132 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/_pages/chromium_tab.pyi @@ -1,45 +1,85 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from typing import Union, Tuple, Any, List +from pathlib import Path +from typing import Union, Tuple, Any, List, Optional -from DownloadKit import DownloadKit from requests import Session, Response -from .chromium_base import ChromiumBase, ChromiumBaseSetter -from .chromium_element import ChromiumElement +from .chromium_base import ChromiumBase from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage, ChromiumTabRect -from .session_element import SessionElement -from .session_page import SessionPage, SessionPageSetter, DownloadSetter +from .chromium_page import ChromiumPage +from .session_page import SessionPage from .web_page import WebPage +from .._base.browser import Browser +from .._elements.chromium_element import ChromiumElement +from .._elements.none_element import NoneElement +from .._elements.session_element import SessionElement +from .._units.rect import TabRect +from .._units.setter import TabSetter, WebPageTabSetter +from .._units.waiter import TabWaiter class ChromiumTab(ChromiumBase): def __init__(self, page: ChromiumPage, tab_id: str = None): - self.page: ChromiumPage = ... + self._page: ChromiumPage = ... + self._browser: Browser = ... + self._rect: Optional[TabRect] = ... - def _set_runtime_settings(self) -> None: ... + def _d_set_runtime_settings(self) -> None: ... + + def close(self) -> None: ... @property - def rect(self) -> ChromiumTabRect: ... + def page(self) -> ChromiumPage: ... + + @property + def set(self) -> TabSetter: ... + + @property + def wait(self) -> TabWaiter: ... + + def save(self, + path: Union[str, Path] = None, + name: str = None, + as_pdf: bool = False, + landscape: bool = ..., + displayHeaderFooter: bool = ..., + printBackground: bool = ..., + scale: float = ..., + paperWidth: float = ..., + paperHeight: float = ..., + marginTop: float = ..., + marginBottom: float = ..., + marginLeft: float = ..., + marginRight: float = ..., + pageRanges: str = ..., + headerTemplate: str = ..., + footerTemplate: str = ..., + preferCSSPageSize: bool = ..., + generateTaggedPDF: bool = ..., + generateDocumentOutline: bool = ...) -> Union[bytes, str]: ... class WebPageTab(SessionPage, ChromiumTab): def __init__(self, page: WebPage, tab_id: str): - self.page: WebPage = ... + self._page: WebPage = ... + self._browser: Browser = ... self._mode: str = ... self._has_driver = ... self._has_session = ... - self._download_set = ... - self._download_path = ... def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], - timeout: float = None) -> Union[ChromiumElement, SessionElement]: ... + timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... + + @property + def page(self) -> WebPage: ... @property def url(self) -> Union[str, None]: ... @@ -50,6 +90,9 @@ class WebPageTab(SessionPage, ChromiumTab): @property def title(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... @@ -102,16 +145,16 @@ class WebPageTab(SessionPage, ChromiumTab): def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement], - timeout: float = None) -> Union[ChromiumElement, SessionElement, str]: ... + timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, SessionElement, str]]: ... + timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, str, None]: ... + -> Union[SessionElement, NoneElement]: ... - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... @@ -122,6 +165,8 @@ class WebPageTab(SessionPage, ChromiumTab): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... + def close(self) -> None: ... + # ----------------重写SessionPage的函数----------------------- def post(self, url: str, @@ -141,40 +186,12 @@ class WebPageTab(SessionPage, ChromiumTab): hooks: Any | None = ..., stream: Any | None = ..., verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... + cert: Any | None = ...) -> Union[bool, Response]: ... @property def set(self) -> WebPageTabSetter: ... - @property - def download(self) -> DownloadKit: ... - - @property - def download_set(self) -> WebPageTabDownloadSetter: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ - Union[ChromiumElement, str, ChromiumFrame]]]: ... - - -class WebPageTabSetter(ChromiumBaseSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumBaseSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... - - -class WebPageTabDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page: WebPageTab): - self._page: WebPageTab = ... - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... + -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], List[ + Union[ChromiumElement, ChromiumFrame]]]: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/_pages/session_page.py similarity index 52% rename from DrissionPage/session_page.py rename to DrissionPage/_pages/session_page.py index 9348554..aaba8b6 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -1,21 +1,24 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from re import search +from pathlib import Path +from re import search, DOTALL from time import sleep -from urllib.parse import urlparse +from urllib.parse import urlparse, quote -from DownloadKit import DownloadKit -from requests import Session +from requests import Session, Response from requests.structures import CaseInsensitiveDict from tldextract import extract -from .base import BasePage -from .commons.web import cookie_to_dict, set_session_cookies -from .configs.session_options import SessionOptions -from .session_element import SessionElement, make_session_ele +from .._base.base import BasePage +from .._configs.session_options import SessionOptions +from .._elements.session_element import SessionElement, make_session_ele +from .._functions.web import cookie_to_dict +from .._units.setter import SessionPageSetter class SessionPage(BasePage): @@ -24,22 +27,23 @@ class SessionPage(BasePage): def __init__(self, session_or_options=None, timeout=None): """ :param session_or_options: Session对象或SessionOptions对象 - :param timeout: 连接超时时间,为None时从ini文件读取 + :param timeout: 连接超时时间(秒),为None时从ini文件读取或默认10 """ + super(SessionPage, SessionPage).__init__(self) + self._headers = None self._response = None - self._download_set = None self._session = None self._set = None - self._set_start_options(session_or_options, None) - self._set_runtime_settings() + self._encoding = None + self._s_set_start_options(session_or_options) + self._s_set_runtime_settings() self._create_session() - timeout = timeout if timeout is not None else self.timeout - super().__init__(timeout) + if timeout is not None: + self.timeout = timeout - def _set_start_options(self, session_or_options, none): + def _s_set_start_options(self, session_or_options): """启动配置 - :param session_or_options: Session、SessionOptions - :param none: 用于后代继承 + :param session_or_options: Session、SessionOptions对象 :return: None """ if not session_or_options or isinstance(session_or_options, SessionOptions): @@ -47,17 +51,22 @@ class SessionPage(BasePage): elif isinstance(session_or_options, Session): self._session_options = SessionOptions() + self._headers = session_or_options.headers + session_or_options.headers = None self._session = session_or_options - def _set_runtime_settings(self): + def _s_set_runtime_settings(self): """设置运行时用到的属性""" self._timeout = self._session_options.timeout - self._download_path = self._session_options.download_path + self._download_path = None if self._session_options.download_path is None \ + else str(Path(self._session_options.download_path).absolute()) + self.retry_times = self._session_options.retry_times + self.retry_interval = self._session_options.retry_interval def _create_session(self): """创建内建Session对象""" if not self._session: - self._session = self._session_options.make_session() + self._session, self._headers = self._session_options.make_session() def __call__(self, loc_or_str, timeout=None): """在内部查找元素 @@ -85,6 +94,11 @@ class SessionPage(BasePage): """返回当前访问url""" return self._url + @property + def raw_data(self): + """返回页面原始数据""" + return self.response.content if self.response else b'' + @property def html(self): """返回页面的html文本""" @@ -101,53 +115,64 @@ class SessionPage(BasePage): @property def user_agent(self): """返回user agent""" - return self.session.headers.get('user-agent', '') - - @property - def download_path(self): - """返回下载路径""" - return self._download_path - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = DownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set.DownloadKit + return self._headers.get('user-agent', '') @property def session(self): - """返回session对象""" + """返回Session对象""" return self._session @property def response(self): - """返回访问url得到的response对象""" + """返回访问url得到的Response对象""" return self._response + @property + def encoding(self): + """返回设置的编码""" + return self._encoding + @property def set(self): - """返回用于等待的对象""" + """返回用于设置的对象""" if self._set is None: self._set = SessionPageSetter(self) return self._set def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): - """用get方式跳转到url - :param url: 目标url + """用get方式跳转到url,可输入文件路径 + :param url: 目标url,可指定本地文件路径 :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间(秒) + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 + :param timeout: 连接超时时间(秒),为None时使用页面对象timeout属性值 :param kwargs: 连接参数 :return: url是否可用 """ - return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs) + if isinstance(url, Path): + url = str(url.absolute()) + if not url.lower().startswith('http'): + if url.startswith('file:///'): + url = url[8:] + if Path(url).exists(): + with open(url, 'rb') as f: + r = Response() + r._content = f.read() + r.status_code = 200 + self._response = r + return + return self._s_connect(url, 'get', show_errmsg, retry, interval, **kwargs) + + def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): + """用post方式跳转到url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象timeout属性值 + :param kwargs: 连接参数 + :return: url是否可用 + """ + return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs) def ele(self, loc_or_ele, timeout=None): """返回页面中符合条件的第一个元素、属性或节点文本 @@ -155,7 +180,7 @@ class SessionPage(BasePage): :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 :return: SessionElement对象或属性、文本 """ - return self._ele(loc_or_ele) + return self._ele(loc_or_ele, method='ele()') def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 @@ -170,7 +195,7 @@ class SessionPage(BasePage): :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :return: SessionElement对象或属性、文本 """ - return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele) + return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, method='s_ele()') def s_eles(self, loc_or_str): """返回页面中符合条件的所有元素、属性或节点文本 @@ -218,23 +243,28 @@ class SessionPage(BasePage): r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']}) return r - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url - :param url: 目标url - :param data: 提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs) + def close(self): + """关闭Session对象""" + self._session.close() + if self._response is not None: + self._response.close() - def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + + def _s_connect(self, url, mode, show_errmsg=False, retry=None, interval=None, **kwargs): """执行get或post连接 :param url: 目标url :param mode: 'get' 或 'post' - :param data: 提交的数据 :param show_errmsg: 是否显示和抛出异常 :param retry: 重试次数 :param interval: 重试间隔(秒) @@ -242,7 +272,7 @@ class SessionPage(BasePage): :return: url是否可用 """ retry, interval = self._before_connect(url, retry, interval) - self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs) + self._response, info = self._make_response(self._url, mode, retry, interval, show_errmsg, **kwargs) if self._response is None: self._url_available = False @@ -258,14 +288,13 @@ class SessionPage(BasePage): return self._url_available - def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs): + def _make_response(self, url, mode='get', retry=None, interval=None, show_errmsg=False, **kwargs): """生成Response对象 :param url: 目标url :param mode: 'get' 或 'post' - :param data: post方式要提交的数据 :param show_errmsg: 是否显示和抛出异常 :param kwargs: 其它参数 - :return: tuple,第一位为Response或None,第二位为出错信息或'Success' + :return: tuple,第一位为Response或None,第二位为出错信息或 'Success' """ kwargs = CaseInsensitiveDict(kwargs) if 'headers' not in kwargs: @@ -277,14 +306,16 @@ class SessionPage(BasePage): parsed_url = urlparse(url) hostname = parsed_url.hostname scheme = parsed_url.scheme - if not check_headers(kwargs, self.session.headers, 'Referer'): + if not check_headers(kwargs, self._headers, 'Referer'): kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' if 'Host' not in kwargs['headers']: kwargs['headers']['Host'] = hostname - if not check_headers(kwargs, self.session.headers, 'timeout'): + if not check_headers(kwargs, self._headers, 'timeout'): kwargs['timeout'] = self.timeout + kwargs['headers'] = {**self._headers, **kwargs['headers']} + r = err = None retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval @@ -293,9 +324,12 @@ class SessionPage(BasePage): if mode == 'get': r = self.session.get(url, **kwargs) elif mode == 'post': - r = self.session.post(url, data=data, **kwargs) + r = self.session.post(url, **kwargs) - if r: + if r and r.content: + if self._encoding: + r.encoding = self._encoding + return r, 'Success' return set_charset(r), 'Success' except Exception as e: @@ -309,202 +343,22 @@ class SessionPage(BasePage): if show_errmsg: print(f'重试 {url}') - if r is None: - if show_errmsg: - if err: - raise err - else: - raise ConnectionError('连接失败') - return None, '连接失败' if err is None else err + if show_errmsg: + if err: + raise err + elif r is not None: + raise ConnectionError(f'状态码:{r.status_code}') if r.content else ConnectionError('返回内容为空。') + else: + raise ConnectionError('连接失败') - if not r.ok: - if show_errmsg: - raise ConnectionError(f'状态码:{r.status_code}') - return r, f'状态码:{r.status_code}' + else: + if r is not None: + return (r, f'状态码:{r.status_code}') if r.content else (None, '返回内容为空') + else: + return None, '连接失败' if err is None else err - -class SessionPageSetter(object): - def __init__(self, page): - self._page = page - - def retry_times(self, times): - """设置连接失败时重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败时重连间隔""" - self._page.retry_interval = interval - - def timeout(self, second): - """设置连接超时时间 - :param second: 秒数 - :return: None - """ - self._page.timeout = second - - def cookies(self, cookies): - """为Session对象设置cookies - :param cookies: cookies信息 - :return: None - """ - set_session_cookies(self._page.session, cookies) - - def headers(self, headers): - """设置通用的headers - :param headers: dict形式的headers - :return: None - """ - self._page.session.headers = CaseInsensitiveDict(headers) - - def header(self, attr, value): - """设置headers中一个项 - :param attr: 设置名称 - :param value: 设置值 - :return: None - """ - self._page.session.headers[attr.lower()] = value - - def user_agent(self, ua): - """设置user agent - :param ua: user agent - :return: None - """ - self._page.session.headers['user-agent'] = ua - - def proxies(self, http, https=None): - """设置proxies参数 - :param http: http代理地址 - :param https: https代理地址 - :return: None - """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._page.session.proxies = proxies - - def auth(self, auth): - """设置认证元组或对象 - :param auth: 认证元组或对象 - :return: None - """ - self._page.session.auth = auth - - def hooks(self, hooks): - """设置回调方法 - :param hooks: 回调方法 - :return: None - """ - self._page.session.hooks = hooks - - def params(self, params): - """设置查询参数字典 - :param params: 查询参数字典 - :return: None - """ - self._page.session.params = params - - def verify(self, on_off): - """设置是否验证SSL证书 - :param on_off: 是否验证 SSL 证书 - :return: None - """ - self._page.session.verify = on_off - - def cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 - :param cert: 证书路径或元组 - :return: None - """ - self._page.session.cert = cert - - def stream(self, on_off): - """设置是否使用流式响应内容 - :param on_off: 是否使用流式响应内容 - :return: None - """ - self._page.session.stream = on_off - - def trust_env(self, on_off): - """设置是否信任环境 - :param on_off: 是否信任环境 - :return: None - """ - self._page.session.trust_env = on_off - - def max_redirects(self, times): - """设置最大重定向次数 - :param times: 最大重定向次数 - :return: None - """ - self._page.session.max_redirects = times - - def add_adapter(self, url, adapter): - """添加适配器 - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: None - """ - self._page.session.mount(url, adapter) - - -class DownloadSetter(object): - """用于设置下载参数的类""" - - def __init__(self, page): - self._page = page - self._DownloadKit = None - - @property - def DownloadKit(self): - if self._DownloadKit is None: - self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path) - return self._DownloadKit - - @property - def if_file_exists(self): - """返回用于设置存在同名文件时处理方法的对象""" - return FileExists(self) - - def split(self, on_off): - """设置是否允许拆分大文件用多线程下载 - :param on_off: 是否启用多线程下载大文件 - :return: None - """ - self.DownloadKit.split = on_off - - def save_path(self, path): - """设置下载保存路径 - :param path: 下载保存路径 - :return: None - """ - path = path if path is None else str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - -class FileExists(object): - """用于设置存在同名文件时处理方法""" - - def __init__(self, setter): - """ - :param setter: DownloadSetter对象 - """ - self._setter = setter - - def __call__(self, mode): - if mode not in ('skip', 'rename', 'overwrite'): - raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") - self._setter.DownloadKit.file_exists = mode - - def skip(self): - """设为跳过""" - self._setter.DownloadKit.file_exists = 'skip' - - def rename(self): - """设为重命名,文件名后加序号""" - self._setter.DownloadKit._file_exists = 'rename' - - def overwrite(self): - """设为覆盖""" - self._setter.DownloadKit._file_exists = 'overwrite' + def __repr__(self): + return f'<SessionPage url={self.url}>' def check_headers(kwargs, headers, arg): @@ -525,7 +379,7 @@ def set_charset(response): # 在headers中获取不到编码,且如果是网页 elif content_type.replace(' ', '').startswith('text/html'): - re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content) + re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content, DOTALL) if re_result: charset = re_result.group(1).decode() diff --git a/DrissionPage/session_page.pyi b/DrissionPage/_pages/session_page.pyi similarity index 53% rename from DrissionPage/session_page.pyi rename to DrissionPage/_pages/session_page.pyi index 3a32942..a6379ca 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -1,51 +1,48 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from pathlib import Path -from typing import Any, Union, Tuple, List +from typing import Any, Union, Tuple, List, Optional -from DownloadKit import DownloadKit from requests import Session, Response -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict -from .commons.constants import NoneElement -from .base import BasePage -from .chromium_page import ChromiumPage -from .configs.session_options import SessionOptions -from .session_element import SessionElement -from .web_page import WebPage +from .._base.base import BasePage +from .._configs.session_options import SessionOptions +from .._elements.none_element import NoneElement +from .._elements.session_element import SessionElement +from .._units.setter import SessionPageSetter class SessionPage(BasePage): def __init__(self, session_or_options: Union[Session, SessionOptions] = None, timeout: float = None): + self._headers: Optional[CaseInsensitiveDict] = ... self._session: Session = ... self._session_options: SessionOptions = ... self._url: str = ... self._response: Response = ... - self._download_path: str = ... - self._download_set: DownloadSetter = ... self._url_available: bool = ... self.timeout: float = ... self.retry_times: int = ... self.retry_interval: float = ... self._set: SessionPageSetter = ... + self._encoding: str = ... - def _set_start_options(self, session_or_options, none) -> None: ... + def _s_set_start_options(self, session_or_options: Union[Session, SessionOptions]) -> None: ... + + def _s_set_runtime_settings(self) -> None: ... def _create_session(self) -> None: ... - def _set_runtime_settings(self) -> None: ... - def __call__(self, loc_or_str: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, NoneElement]: ... + timeout: float = None) -> Union[SessionElement, NoneElement]: ... # -----------------共有属性和方法------------------- @property @@ -57,6 +54,9 @@ class SessionPage(BasePage): @property def _session_url(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... @@ -69,11 +69,8 @@ class SessionPage(BasePage): @property def download_path(self) -> str: ... - @property - def download_set(self) -> DownloadSetter: ... - def get(self, - url: str, + url: Union[Path, str], show_errmsg: bool | None = False, retry: int | None = None, interval: float | None = None, @@ -94,23 +91,28 @@ class SessionPage(BasePage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, NoneElement]: ... + timeout: float = None) -> Union[SessionElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[SessionElement, str]]: ... + timeout: float = None) -> List[SessionElement]: ... def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \ - -> Union[SessionElement, str, NoneElement]: ... + -> Union[SessionElement, NoneElement]: ... - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None, single: bool = True, raise_err: bool = None) \ - -> Union[SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ... + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, SessionElement], + timeout: float = None, + single: bool = True, + raise_err: bool = None) \ + -> Union[SessionElement, NoneElement, List[SessionElement]]: ... - def get_cookies(self, as_dict: bool = False, all_domains: bool = False, + def get_cookies(self, + as_dict: bool = False, + all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... # ----------------session独有属性和方法----------------------- @@ -121,17 +123,17 @@ class SessionPage(BasePage): def response(self) -> Response: ... @property - def set(self) -> SessionPageSetter: ... + def encoding(self) -> str: ... @property - def download(self) -> DownloadKit: ... + def set(self) -> SessionPageSetter: ... def post(self, url: str, - data: Union[dict, str, None] = ..., show_errmsg: bool = False, retry: int | None = None, interval: float | None = None, + data: Union[dict, str, None] = ..., timeout: float | None = ..., params: dict | None = ..., json: Union[dict, str, None] = ..., @@ -146,10 +148,13 @@ class SessionPage(BasePage): verify: Any | None = ..., cert: Any | None = ...) -> bool: ... + def close(self) -> None: ... + + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + def _s_connect(self, url: str, mode: str, - data: Union[dict, str, None] = None, show_errmsg: bool = False, retry: int = None, interval: float = None, @@ -158,82 +163,14 @@ class SessionPage(BasePage): def _make_response(self, url: str, mode: str = 'get', - data: Union[dict, str] = None, retry: int = None, interval: float = None, show_errmsg: bool = False, **kwargs) -> tuple: ... -class SessionPageSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeout(self, second: float) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def header(self, attr: str, value: str) -> None: ... - - def user_agent(self, ua: str) -> None: ... - - def proxies(self, http, https=None) -> None: ... - - def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... - - def hooks(self, hooks: Union[dict, None]) -> None: ... - - def params(self, params: Union[dict, None]) -> None: ... - - def verify(self, on_off: Union[bool, None]) -> None: ... - - def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... - - def stream(self, on_off: Union[bool, None]) -> None: ... - - def trust_env(self, on_off: Union[bool, None]) -> None: ... - - def max_redirects(self, times: Union[int, None]) -> None: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... - - -class DownloadSetter(object): - def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]): - self._page: SessionPage = ... - self._DownloadKit: DownloadKit = ... - - @property - def DownloadKit(self) -> DownloadKit: ... - - @property - def if_file_exists(self) -> FileExists: ... - - def split(self, on_off: bool) -> None: ... - - def save_path(self, path: Union[str, Path]): ... - - -class FileExists(object): - def __init__(self, setter: DownloadSetter): - self._setter: DownloadSetter = ... - - def __call__(self, mode: str) -> None: ... - - def skip(self) -> None: ... - - def rename(self) -> None: ... - - def overwrite(self) -> None: ... - - -def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], +def check_headers(kwargs: Union[dict, CaseInsensitiveDict], + headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/_pages/web_page.py similarity index 51% rename from DrissionPage/web_page.py rename to DrissionPage/_pages/web_page.py index cee4c34..fd4e079 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/_pages/web_page.py @@ -1,133 +1,48 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from pathlib import Path -from warnings import warn - -from requests import Session - -from .base import BasePage -from .chromium_base import ChromiumBase, Timeout -from .chromium_driver import ChromiumDriver -from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter +from .chromium_page import ChromiumPage from .chromium_tab import WebPageTab -from .commons.web import set_session_cookies, set_browser_cookies -from .configs.chromium_options import ChromiumOptions -from .configs.session_options import SessionOptions -from .errors import CallMethodError -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .._base.base import BasePage +from .._configs.chromium_options import ChromiumOptions +from .._functions.web import set_session_cookies, set_browser_cookies +from .._units.setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): """整合浏览器和request的页面类""" - def __init__(self, mode='d', timeout=None, driver_or_options=None, session_or_options=None): + def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None): """初始化函数 :param mode: 'd' 或 's',即driver模式和session模式 - :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_or_options: ChromiumDriver对象或DriverOptions对象,只使用s模式时应传入False + :param timeout: 超时时间(秒),d模式时为寻找元素时间,s模式时为连接时间,默认10秒 + :param chromium_options: Driver对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ + chromium_options = chromium_options or driver_or_options self._mode = mode.lower() if self._mode not in ('s', 'd'): raise ValueError('mode参数只能是s或d。') self._has_driver = True self._has_session = True - self._debug = False - self._debug_recorder = None - self.address = None - - self._session = None - self._tab_obj = None - self._driver_options = None - self._session_options = None - self._response = None - self._download_set = None - self._set = None - self._screencast = None - self._DownloadKit = None - - self._set_start_options(driver_or_options, session_or_options) - self._set_runtime_settings() - self._connect_browser() - self._create_session() - - t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit - super(ChromiumBase, self).__init__(t) # 调用Base的__init__() - - def _set_start_options(self, dr_opt, se_opt): - """处理两种模式的设置 - :param dr_opt: ChromiumDriver或DriverOptions对象,为None则从ini读取,为False用默认信息创建 - :param se_opt: Session、SessionOptions对象或配置信息,为None则从ini读取,为False用默认信息创建 - :return: None - """ - # 浏览器配置 - if isinstance(dr_opt, ChromiumDriver): - self._tab_obj = dr_opt - self._driver_options = ChromiumOptions() - self._driver_options.debugger_address = dr_opt.address - dr_opt = False - - else: - if dr_opt is None: - self._driver_options = ChromiumOptions() - - elif dr_opt is False: - self._driver_options = ChromiumOptions(read_file=False) - - elif str(type(dr_opt)).endswith(("ChromiumOptions'>", "DriverOptions'>")): - self._driver_options = dr_opt - - else: - raise TypeError('driver_or_options参数只能接收ChromiumDriver, ChromiumOptions、None或False。') - - self.address = self._driver_options.debugger_address.replace('localhost', - '127.0.0.1').lstrip('http://').lstrip('https://') - - # Session配置 - if isinstance(se_opt, Session): - self._session = se_opt - self._session_options = SessionOptions() - se_opt = False - - else: - if se_opt is None: - self._session_options = SessionOptions() - - elif se_opt is False: - self._session_options = SessionOptions(read_file=False) - - elif isinstance(se_opt, SessionOptions): - self._session_options = se_opt - - else: - raise TypeError('session_or_options参数只能接收Session, SessionOptions、None或False。') - - self._timeouts = Timeout(self) - self._page_load_strategy = self._driver_options.page_load_strategy - self._download_path = None - - if se_opt is not False: - self.set.timeouts(implicit=self._session_options.timeout) - self._download_path = self._session_options.download_path - - if dr_opt is not False: - t = self._driver_options.timeouts - self.set.timeouts(t['implicit'], t['pageLoad'], t['script']) - self._download_path = self._driver_options.download_path - - def _set_runtime_settings(self): - """设置运行时用到的属性""" - pass + super().__init__(session_or_options=session_or_options) + if not chromium_options: + chromium_options = ChromiumOptions(read_file=chromium_options) + chromium_options.set_timeouts(base=self._timeout).set_paths(download_path=self.download_path) + super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout) + self.change_mode(self._mode, go=False, copy_cookies=False) def __call__(self, loc_or_str, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 + :param timeout: 超时时间(秒) :return: 子元素对象 """ if self._mode == 'd': @@ -135,6 +50,13 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 's': return super().__call__(loc_or_str) + @property + def set(self): + """返回用于设置的对象""" + if self._set is None: + self._set = WebPageSetter(self) + return self._set + @property def url(self): """返回当前url""" @@ -146,7 +68,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def _browser_url(self): """返回浏览器当前url""" - return super(SessionPage, self).url if self._tab_obj else None + return super(SessionPage, self).url if self._driver else None @property def title(self): @@ -156,6 +78,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).title + @property + def raw_data(self): + """返回页码原始数据数据""" + if self._mode == 's': + return super().raw_data + elif self._mode == 'd': + return super(SessionPage, self).html if self._has_driver else '' + @property def html(self): """返回页面html文本""" @@ -210,7 +140,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def timeout(self): """返回通用timeout设置""" - return self.timeouts.implicit + return self.timeouts.base @timeout.setter def timeout(self, second): @@ -218,39 +148,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): :param second: 秒数 :return: None """ - self.set.timeouts(implicit=second) - - @property - def download_path(self): - """返回默认下载路径""" - return super(SessionPage, self).download_path - - @property - def download_set(self): - """返回下载设置对象""" - if self._download_set is None: - self._download_set = WebPageDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = WebPageSetter(self) - return self._set + self.set.timeouts(base=second) def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url :param url: 目标url :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间(秒) + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 + :param timeout: 连接超时时间(秒),为None时使用页面对象timeouts.page_load属性值 :param kwargs: 连接参数,s模式专用 :return: url是否可用,d模式返回None时表示不确定 """ @@ -261,24 +167,25 @@ class WebPage(SessionPage, ChromiumPage, BasePage): timeout = self.timeouts.page_load if self._has_driver else self.timeout return super().get(url, show_errmsg, retry, interval, timeout, **kwargs) - def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): """用post方式跳转到url,会切换到s模式 :param url: 目标url - :param data: post方式时提交的数据 :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) + :param retry: 重试次数,为None时使用页面对象retry_times属性值 + :param interval: 重试间隔(秒),为None时使用页面对象retry_interval属性值 :param kwargs: 连接参数 - :return: url是否可用 + :return: s模式时返回url是否可用,d模式时返回获取到的Response对象 """ if self.mode == 'd': self.cookies_to_session() - return super().post(url, data, show_errmsg, retry, interval, **kwargs) + super().post(url, show_errmsg, retry, interval, **kwargs) + return self.response + return super().post(url, show_errmsg, retry, interval, **kwargs) def ele(self, loc_or_ele, timeout=None): """返回第一个符合条件的元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本节点文本 """ if self._mode == 's': @@ -289,7 +196,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def eles(self, loc_or_str, timeout=None): """返回页面中所有符合条件的元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :param timeout: 查找元素超时时间(秒),默认与页面等待时间一致 :return: 元素对象或属性、文本组成的列表 """ if self._mode == 's': @@ -333,8 +240,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage): # s模式转d模式 if self._mode == 'd': - if self._tab_obj is None: - self._connect_browser(self._driver_options) + if self._driver is None: + self._connect_browser(self._chromium_options) self._url = None if not self._has_driver else super(SessionPage, self).url self._has_driver = True @@ -370,7 +277,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if copy_user_agent: user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": user_agent}) + self._headers.update({"User-Agent": user_agent}) set_session_cookies(self.session, super(SessionPage, self).get_cookies()) @@ -378,9 +285,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """把session对象的cookies复制到浏览器""" if not self._has_driver: return - - # set_browser_cookies(self, super().get_cookies(as_dict=True)) - # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -395,24 +299,45 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) - def get_tab(self, tab_id=None): + def get_tab(self, id_or_num=None): """获取一个标签页对象 - :param tab_id: 要获取的标签页id,为None时获取当前tab + :param id_or_num: 要获取的标签页id或序号,为None时获取当前tab,序号不是视觉排列顺序,而是激活顺序 :return: 标签页对象 """ - tab_id = tab_id or self.tab_id - return WebPageTab(self, tab_id) + if isinstance(id_or_num, str): + return WebPageTab(self, id_or_num) + elif isinstance(id_or_num, int): + return WebPageTab(self, self.tabs[id_or_num]) + elif id_or_num is None: + return WebPageTab(self, self.tab_id) + elif isinstance(id_or_num, WebPageTab): + return id_or_num + else: + raise TypeError(f'id_or_num需传入tab id或序号,非{id_or_num}。') + + def new_tab(self, url=None, new_window=False, background=False, new_context=False): + """新建一个标签页 + :param url: 新标签页跳转到的网址 + :param new_window: 是否在新窗口打开标签页 + :param background: 是否不激活新标签页,如new_window为True则无效 + :param new_context: 是否创建新的上下文 + :return: 新标签页对象 + """ + tab = WebPageTab(self, tab_id=self._new_tab(new_window, background, new_context)) + if url: + tab.get(url) + return tab def close_driver(self): """关闭driver及浏览器""" if self._has_driver: self.change_mode('s') try: - self.driver.Browser.close() + self.driver.run('Browser.close') except Exception: pass - self._tab_obj.stop() - self._tab_obj = None + self._driver.stop() + self._driver = None self._has_driver = None def close_session(self): @@ -420,10 +345,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._has_session: self.change_mode('d') self._session.close() + if self._response is not None: + self._response.close() self._session = None self._response = None self._has_session = None + def close(self): + """关闭标签页和Session""" + if self._has_driver: + self.close_tabs(self.tab_id) + if self._session: + self._session.close() + if self._response is not None: + self._response.close() + def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 @@ -439,111 +375,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage): return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, relative=relative) - def quit(self): - """关闭浏览器,关闭session""" + def quit(self, timeout=5, force=True): + """关闭浏览器和Session + :param timeout: 等待浏览器关闭超时时间 + :param force: 关闭超时是否强制终止进程 + :return: None + """ if self._has_session: self._session.close() self._session = None self._response = None self._has_session = None if self._has_driver: - super(SessionPage, self).quit() - self._tab_obj = None + super(SessionPage, self).quit(timeout, force) + self._driver = None self._has_driver = None - -class WebPageSetter(ChromiumPageSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumPageSetter(self._page) - - def cookies(self, cookies): - """添加cookies信息到浏览器或session对象 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page.mode == 's': - self._session_setter.headers(headers) - else: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page.mode == 's': - self._session_setter.user_agent(ua) - else: - self._chromium_setter.user_agent(ua, platform) - - -class WebPageDownloadSetter(ChromiumDownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page): - super().__init__(page) - self._session = page.session - - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - if self._page.mode == 'd': - self._cookies_to_session() - return self.DownloadKit - - def save_path(self, path): - """设置下载路径 - :param path: 下载路径 - :return: None - """ - path = path or '' - path = Path(path).absolute() - path.mkdir(parents=True, exist_ok=True) - path = str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - if self._page._has_driver: - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior=self._behavior, downloadPath=path, - eventsEnabled=True) - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path) - - def by_browser(self): - """设置使用浏览器下载文件""" - if not self._page._has_driver: - raise RuntimeError('浏览器未连接。') - - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser - - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) - self._page.driver.Page.downloadWillBegin = self._download_by_browser - - self._behavior = 'allow' - - def by_DownloadKit(self): - """设置使用DownloadKit下载文件""" - if self._page._has_driver: - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit - except CallMethodError: - raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') - - self._behavior = 'deny' + def __repr__(self): + return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>' diff --git a/DrissionPage/web_page.pyi b/DrissionPage/_pages/web_page.pyi similarity index 60% rename from DrissionPage/web_page.pyi rename to DrissionPage/_pages/web_page.pyi index d9ddda2..20bcdf0 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/_pages/web_page.pyi @@ -1,24 +1,26 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ from typing import Union, Tuple, List, Any -from DownloadKit import DownloadKit from requests import Session, Response -from .base import BasePage -from .chromium_driver import ChromiumDriver -from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter +from .chromium_page import ChromiumPage from .chromium_tab import WebPageTab -from .configs.chromium_options import ChromiumOptions -from .configs.driver_options import DriverOptions -from .configs.session_options import SessionOptions -from .session_element import SessionElement -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .._base.base import BasePage +from .._base.driver import Driver +from .._configs.chromium_options import ChromiumOptions +from .._configs.session_options import SessionOptions +from .._elements.chromium_element import ChromiumElement +from .._elements.none_element import NoneElement +from .._elements.session_element import SessionElement +from .._units.setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): @@ -26,21 +28,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __init__(self, mode: str = 'd', timeout: float = None, - driver_or_options: Union[ChromiumDriver, ChromiumOptions, DriverOptions, bool] = None, + chromium_options: Union[ChromiumOptions, bool] = None, session_or_options: Union[Session, SessionOptions, bool] = None) -> None: self._mode: str = ... self._has_driver: bool = ... self._has_session: bool = ... - self.address: str = ... self._session_options: Union[SessionOptions, None] = ... - self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ... - self._download_set: WebPageDownloadSetter = ... - self._download_path: str = ... - self._tab_obj: ChromiumDriver = ... + self._chromium_options: Union[ChromiumOptions, None] = ... def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], - timeout: float = None) -> Union[ChromiumElement, SessionElement]: ... + timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... # -----------------共有属性和方法------------------- @property @@ -52,6 +50,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def title(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... @@ -82,12 +83,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @timeout.setter def timeout(self, second: float) -> None: ... - @property - def download_path(self) -> str: ... - - @property - def download_set(self) -> WebPageDownloadSetter: ... - def get(self, url: str, show_errmsg: bool = False, @@ -110,16 +105,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement], - timeout: float = None) -> Union[ChromiumElement, SessionElement, str]: ... + timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ... def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, SessionElement, str]]: ... + timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ... - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, str, None]: ... + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ... - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ... def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... @@ -127,15 +121,25 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def cookies_to_browser(self) -> None: ... - def get_cookies(self, as_dict: bool = False, all_domains: bool = False, + def get_cookies(self, + as_dict: bool = False, + all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... - def get_tab(self, tab_id: str = None) -> WebPageTab: ... + def get_tab(self, id_or_num: Union[str, WebPageTab, int] = None) -> WebPageTab: ... + + def new_tab(self, + url: str = None, + new_window: bool = False, + background: bool = False, + new_context: bool = False) -> WebPageTab: ... def close_driver(self) -> None: ... def close_session(self) -> None: ... + def close(self) -> None: ... + # ----------------重写SessionPage的函数----------------------- def post(self, url: str, @@ -155,52 +159,22 @@ class WebPage(SessionPage, ChromiumPage, BasePage): hooks: Any | None = ..., stream: Any | None = ..., verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - @property - def download(self) -> DownloadKit: ... + cert: Any | None = ...) -> Union[bool, Response]: ... @property def set(self) -> WebPageSetter: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ - Union[ChromiumElement, str, ChromiumFrame]]]: ... + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], + timeout: float = None, + single: bool = True, + relative: bool = False, + raise_err: bool = None) \ + -> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], + List[Union[ChromiumElement, ChromiumFrame]]]: ... - def _set_start_options(self, dr_opt: Union[ChromiumDriver, DriverOptions, bool, None], + def _set_start_options(self, + dr_opt: Union[Driver, bool, None], se_opt: Union[Session, SessionOptions, bool, None]) -> None: ... - def quit(self) -> None: ... - - def _on_download_begin(self, **kwargs): ... - - -class WebPageSetter(ChromiumPageSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumPageSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... - - -class WebPageDownloadSetter(ChromiumDownloadSetter): - def __init__(self, page: WebPage): - self._page: WebPage = ... - self._behavior: str = ... - self._session: Session = None - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... - - def save_path(self, path) -> None: ... - - def by_browser(self) -> None: ... - - def by_DownloadKit(self) -> None: ... - - def _download_by_DownloadKit(self, **kwargs) -> None: ... + def quit(self, timeout: float = 5, force: bool = True) -> None: ... diff --git a/DrissionPage/action_chains.py b/DrissionPage/_units/actions.py similarity index 70% rename from DrissionPage/action_chains.py rename to DrissionPage/_units/actions.py index b9edcaa..0a41be9 100644 --- a/DrissionPage/action_chains.py +++ b/DrissionPage/_units/actions.py @@ -1,20 +1,23 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from time import sleep +from time import sleep, perf_counter -from .commons.keys import modifierBit, keyDescriptionForString -from .commons.web import location_in_viewport +from ..errors import AlertExistsError +from .._functions.keys import modifierBit, keyDescriptionForString, input_text_or_keys, Keys +from .._functions.web import location_in_viewport -class ActionChains: +class Actions: """用于实现动作链的类""" def __init__(self, page): """ - :param page: ChromiumPage对象 + :param page: ChromiumBase对象 """ self.page = page self._dr = page.driver @@ -22,12 +25,13 @@ class ActionChains: self.curr_x = 0 # 视口坐标 self.curr_y = 0 - def move_to(self, ele_or_loc, offset_x=0, offset_y=0): + def move_to(self, ele_or_loc, offset_x=0, offset_y=0, duration=.5): """鼠标移动到元素中点,或页面上的某个绝对坐标。可设置偏移量 当带偏移量时,偏移量相对于元素左上角坐标 :param ele_or_loc: 元素对象、绝对坐标或文本定位符,坐标为tuple(int, int)形式 :param offset_x: 偏移量x :param offset_y: 偏移量y + :param duration: 拖动用时,传入0即瞬间到达 :return: self """ is_loc = False @@ -38,7 +42,7 @@ class ActionChains: elif isinstance(ele_or_loc, str) or 'ChromiumElement' in str(type(ele_or_loc)): ele_or_loc = self.page(ele_or_loc) self.page.scroll.to_see(ele_or_loc) - x, y = ele_or_loc.location if offset_x or offset_y else ele_or_loc.locations.midpoint + x, y = ele_or_loc.rect.location if offset_x or offset_y else ele_or_loc.rect.midpoint lx = x + offset_x ly = y + offset_y else: @@ -50,29 +54,44 @@ class ActionChains: clientHeight = self.page.run_js('return document.body.clientHeight;') self.page.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2) - # # 这样设计为了应付那些不随滚动条滚动的元素 + # 这样设计为了应付那些不随滚动条滚动的元素 if is_loc: cx, cy = location_to_client(self.page, lx, ly) else: - x, y = ele_or_loc.locations.viewport_location if offset_x or offset_y \ - else ele_or_loc.locations.viewport_midpoint + x, y = ele_or_loc.rect.viewport_location if offset_x or offset_y \ + else ele_or_loc.rect.viewport_midpoint cx = x + offset_x cy = y + offset_y - self._dr.Input.dispatchMouseEvent(type='mouseMoved', x=cx, y=cy, modifiers=self.modifier) - self.curr_x = cx - self.curr_y = cy + ox = cx - self.curr_x + oy = cy - self.curr_y + self.move(ox, oy, duration) return self - def move(self, offset_x=0, offset_y=0): + def move(self, offset_x=0, offset_y=0, duration=.5): """鼠标相对当前位置移动若干位置 :param offset_x: 偏移量x :param offset_y: 偏移量y + :param duration: 拖动用时,传入0即瞬间到达 :return: self """ - self.curr_x += offset_x - self.curr_y += offset_y - self._dr.Input.dispatchMouseEvent(type='mouseMoved', x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + duration = .02 if duration < .02 else duration + num = int(duration * 50) + + points = [(self.curr_x + i * (offset_x / num), + self.curr_y + i * (offset_y / num)) for i in range(1, num)] + points.append((self.curr_x + offset_x, self.curr_y + offset_y)) + + for x, y in points: + t = perf_counter() + self.curr_x = x + self.curr_y = y + self._dr.run('Input.dispatchMouseEvent', type='mouseMoved', + x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + ss = .02 - perf_counter() + t + if ss > 0: + sleep(ss) + return self def click(self, on_ele=None): @@ -121,7 +140,7 @@ class ActionChains: :return: self """ if on_ele: - self.move_to(on_ele) + self.move_to(on_ele, duration=0) self._release('left') return self @@ -139,7 +158,7 @@ class ActionChains: :return: self """ if on_ele: - self.move_to(on_ele) + self.move_to(on_ele, duration=0) self._release('right') return self @@ -157,7 +176,7 @@ class ActionChains: :return: self """ if on_ele: - self.move_to(on_ele) + self.move_to(on_ele, duration=0) self._release('middle') return self @@ -169,9 +188,9 @@ class ActionChains: :return: self """ if on_ele: - self.move_to(on_ele) - self._dr.Input.dispatchMouseEvent(type='mousePressed', button=button, clickCount=count, - x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + self.move_to(on_ele, duration=0) + self._dr.run('Input.dispatchMouseEvent', type='mousePressed', button=button, clickCount=count, + x=self.curr_x, y=self.curr_y, modifiers=self.modifier) return self def _release(self, button): @@ -179,8 +198,8 @@ class ActionChains: :param button: 要释放的按键 :return: self """ - self._dr.Input.dispatchMouseEvent(type='mouseReleased', button=button, clickCount=1, - x=self.curr_x, y=self.curr_y, modifiers=self.modifier) + self._dr.run('Input.dispatchMouseEvent', type='mouseReleased', button=button, clickCount=1, + x=self.curr_x, y=self.curr_y, modifiers=self.modifier) return self def scroll(self, delta_x=0, delta_y=0, on_ele=None): @@ -191,9 +210,9 @@ class ActionChains: :return: self """ if on_ele: - self.move_to(on_ele) - self._dr.Input.dispatchMouseEvent(type='mouseWheel', x=self.curr_x, y=self.curr_y, - deltaX=delta_x, deltaY=delta_y, modifiers=self.modifier) + self.move_to(on_ele, duration=0) + self._dr.run('Input.dispatchMouseEvent', type='mouseWheel', x=self.curr_x, y=self.curr_y, + deltaX=delta_x, deltaY=delta_y, modifiers=self.modifier) return self def up(self, pixel): @@ -225,15 +244,17 @@ class ActionChains: return self.move(pixel, 0) def key_down(self, key): - """按下键盘上的按键 - :param key: 按键,特殊字符见Keys + """按下键盘上的按键, + :param key: 使用Keys获取的按键,或'DEL'形式按键名称 :return: self """ + key = getattr(Keys, key.upper(), key) if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量 self.modifier |= modifierBit.get(key, 0) return self data = self._get_key_data(key, 'keyDown') + data['_ignore'] = AlertExistsError self.page.run_cdp('Input.dispatchKeyEvent', **data) return self @@ -242,24 +263,39 @@ class ActionChains: :param key: 按键,特殊字符见Keys :return: self """ + key = getattr(Keys, key.upper(), key) if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量 self.modifier ^= modifierBit.get(key, 0) return self data = self._get_key_data(key, 'keyUp') + data['_ignore'] = AlertExistsError self.page.run_cdp('Input.dispatchKeyEvent', **data) return self - def type(self, text): - """输入文本 - :param text: 要输入的文本,特殊字符和多个文本可用list或tuple传入 + def type(self, keys): + """用模拟键盘按键方式输入文本,可输入字符串,也可输入组合键,只能输入键盘上有的字符 + :param keys: 要按下的按键,特殊字符和多个文本可用list或tuple传入 :return: self """ - for i in text: + modifiers = [] + for i in keys: for character in i: self.key_down(character) - sleep(.05) - self.key_up(character) + if character in ('\ue009', '\ue008', '\ue00a', '\ue03d'): + modifiers.append(character) + else: + self.key_up(character) + for m in modifiers: + self.key_up(m) + return self + + def input(self, text): + """输入文本,也可输入组合键,组合键用tuple形式输入 + :param text: 文本值或按键组合 + :return: self + """ + input_text_or_keys(self.page, text) return self def wait(self, second): diff --git a/DrissionPage/_units/actions.pyi b/DrissionPage/_units/actions.pyi new file mode 100644 index 0000000..55f863c --- /dev/null +++ b/DrissionPage/_units/actions.pyi @@ -0,0 +1,108 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, Tuple, Any, Literal + +from .._base.driver import Driver +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase + +KEYS = Literal['NULL', 'CANCEL', 'HELP', 'BACKSPACE', 'BACK_SPACE', 'meta', +'TAB', 'CLEAR', 'RETURN', 'ENTER', 'SHIFT', 'LEFT_SHIFT', 'CONTROL', 'command ', +'CTRL', 'LEFT_CONTROL', 'ALT', 'LEFT_ALT', 'PAUSE', 'ESCAPE', 'SPACE', +'PAGE_UP', 'PAGE_DOWN', 'END', 'HOME', 'LEFT', 'ARROW_LEFT', 'UP', +'ARROW_UP', 'RIGHT', 'ARROW_RIGHT', 'DOWN', 'ARROW_DOWN', 'INSERT', +'DELETE', 'DEL', 'SEMICOLON', 'EQUALS', 'NUMPAD0', 'NUMPAD1', 'NUMPAD2', +'NUMPAD3', 'NUMPAD4', 'NUMPAD5', 'NUMPAD6', 'NUMPAD7', 'NUMPAD8', 'NUMPAD9', +'MULTIPLY', 'ADD', 'SUBTRACT', 'DECIMAL', 'DIVIDE', 'F1', 'F2', +'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'META', 'COMMAND ', +'null', 'cancel', 'help', 'backspace', 'back_space', 'tab', 'clear', 'return', 'enter', +'shift', 'left_shift', 'control', 'ctrl', 'left_control', 'alt', 'left_alt', 'pause', +'escape', 'space', 'page_up', 'page_down', 'end', 'home', 'left', 'arrow_left', 'up', +'arrow_up', 'right', 'arrow_right', 'down', 'arrow_down', 'insert', 'delete', 'del', +'semicolon', 'equals', 'numpad0', 'numpad1', 'numpad2', 'numpad3', 'numpad4', 'numpad5', +'numpad6', 'numpad7', 'numpad8', 'numpad9', 'multiply', 'add', 'subtract', 'decimal', +'divide', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', +'\ue000', '\ue002', '\ue003', '\ue004', '\ue005', '\ue006', '\ue007', '\ue008', '\ue009', +'\ue009', '\ue00a', '\ue00b', '\ue00c', '\ue00d', '\ue00e', '\ue00f', '\ue010', '\ue011', +'\ue012', '\ue013', '\ue014', '\ue015', '\ue016', '\ue017', '\ue017', '\ue018', '\ue019', +'\ue01a', '\ue01b', '\ue01c', '\ue01d', '\ue01e', '\ue01f', '\ue020', '\ue021', '\ue022', +'\ue023', '\ue024', '\ue025', '\ue027', '\ue028', '\ue029', '\ue031', '\ue032', '\ue033', '\ue034', +'\ue035', '\ue036', '\ue037', '\ue038', '\ue039', '\ue03a', '\ue03b', '\ue03c', '\ue03d', '\ue03d', +'`', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '=', 'q', 'w', +'e', 'r', 't', 'y', 'u', 'i', 'o', 'p', '[', ']', '\\', 'a', 's', 'd', 'f', +'g', 'h', 'j', 'k', 'l', ';', '\'', 'z', 'x', 'c', 'v', 'b', 'n', 'm', ',', +'.', '/', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', '+', +'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P', '{', '}', 'A', 'S', 'D', +'F', 'G', 'H', 'J', 'K', 'L', ':', '"', 'Z', 'X', 'C', 'V', 'B', 'N', 'M', '<', '>', '?' +] + + +class Actions: + + def __init__(self, page: ChromiumBase): + self.page: ChromiumBase = ... + self._dr: Driver = ... + self.modifier: int = ... + self.curr_x: int = ... + self.curr_y: int = ... + + def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[int, int], str], + offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ... + + def move(self, offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ... + + def click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def r_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def m_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def db_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def _hold(self, on_ele: Union[ChromiumElement, str] = None, button: str = 'left', + count: int = 1) -> Actions: ... + + def _release(self, button: str) -> Actions: ... + + def scroll(self, delta_x: int = 0, delta_y: int = 0, + on_ele: Union[ChromiumElement, str] = None) -> Actions: ... + + def up(self, pixel: int) -> Actions: ... + + def down(self, pixel: int) -> Actions: ... + + def left(self, pixel: int) -> Actions: ... + + def right(self, pixel: int) -> Actions: ... + + def key_down(self, key: Union[KEYS, str]) -> Actions: ... + + def key_up(self, key: Union[KEYS, str]) -> Actions: ... + + def type(self, keys: Union[KEYS, str, list, tuple]) -> Actions: ... + + def input(self, text: Any) -> Actions: ... + + def wait(self, second: float) -> Actions: ... + + def _get_key_data(self, key: str, action: str) -> dict: ... + + +def location_to_client(page, lx: int, ly: int) -> tuple: ... diff --git a/DrissionPage/_units/clicker.py b/DrissionPage/_units/clicker.py new file mode 100644 index 0000000..1c397ac --- /dev/null +++ b/DrissionPage/_units/clicker.py @@ -0,0 +1,161 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from time import perf_counter, sleep + +from .._functions.settings import Settings +from .._functions.web import offset_scroll +from ..errors import CanNotClickError, CDPError, NoRectError, AlertExistsError + + +class Clicker(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + def __call__(self, by_js=False, timeout=1.5, wait_stop=True): + """点击元素 + 如果遇到遮挡,可选择是否用js点击 + :param by_js: 是否用js点击,为None时先用模拟点击,遇到遮挡改用js,为True时直接用js点击,为False时只用模拟点击 + :param timeout: 模拟点击的超时时间(秒),等待元素可见、可用、进入视口 + :param wait_stop: 是否等待元素运动结束再执行点击 + :return: 是否点击成功 + """ + return self.left(by_js, timeout, wait_stop) + + def left(self, by_js=False, timeout=1.5, wait_stop=True): + """点击元素,可选择是否用js点击 + :param by_js: 是否用js点击,为None时先用模拟点击,遇到遮挡改用js,为True时直接用js点击,为False时只用模拟点击 + :param timeout: 模拟点击的超时时间(秒),等待元素可见、可用、进入视口 + :param wait_stop: 是否等待元素运动结束再执行点击 + :return: 是否点击成功 + """ + if self._ele.tag == 'option': + if self._ele.states.is_selected: + self._ele.parent('t:select').select.cancel_by_option(self._ele) + else: + self._ele.parent('t:select').select.by_option(self._ele) + return + + if not by_js: # 模拟点击 + can_click = False + timeout = self._ele.page.timeout if timeout is None else timeout + rect = None + if timeout == 0: + try: + self._ele.scroll.to_see() + if self._ele.states.is_enabled and self._ele.states.is_displayed: + rect = self._ele.rect.viewport_corners + can_click = True + except NoRectError: + if by_js is False: + raise + + else: + rect = self._ele.states.has_rect + end_time = perf_counter() + timeout + while not rect and perf_counter() < end_time: + rect = self._ele.states.has_rect + sleep(.001) + + if wait_stop and rect: + self._ele.wait.stop_moving(timeout=end_time - perf_counter()) + if rect: + self._ele.scroll.to_see() + rect = self._ele.rect.corners + while perf_counter() < end_time: + if self._ele.states.is_enabled and self._ele.states.is_displayed: + can_click = True + break + sleep(.001) + + elif by_js is False: + raise NoRectError + + if can_click and not self._ele.states.is_in_viewport: + by_js = True + + elif can_click and (by_js is False or not self._ele.states.is_covered): + x = rect[1][0] - (rect[1][0] - rect[0][0]) / 2 + y = rect[0][0] + 3 + try: + r = self._ele.page.run_cdp('DOM.getNodeForLocation', x=x, y=y, includeUserAgentShadowDOM=True, + ignorePointerEventsNone=True) + if r['backendNodeId'] != self._ele._backend_id: + vx, vy = self._ele.rect.viewport_midpoint + else: + vx, vy = self._ele.rect.viewport_click_point + + except CDPError: + vx, vy = self._ele.rect.viewport_midpoint + + self._click(vx, vy) + return True + + if by_js is not False: + self._ele.run_js('this.click();') + return True + if Settings.raise_when_click_failed: + raise CanNotClickError + return False + + def right(self): + """右键单击""" + self._ele.page.scroll.to_see(self._ele) + x, y = self._ele.rect.viewport_click_point + self._click(x, y, 'right') + + def middle(self): + """中键单击""" + self._ele.page.scroll.to_see(self._ele) + x, y = self._ele.rect.viewport_click_point + self._click(x, y, 'middle') + + def at(self, offset_x=None, offset_y=None, button='left', count=1): + """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中间点 + :param offset_x: 相对元素左上角坐标的x轴偏移量 + :param offset_y: 相对元素左上角坐标的y轴偏移量 + :param button: 点击哪个键,可选 left, middle, right, back, forward + :param count: 点击次数 + :return: None + """ + self._ele.page.scroll.to_see(self._ele) + if offset_x is None and offset_y is None: + w, h = self._ele.rect.size + offset_x = w // 2 + offset_y = h // 2 + x, y = offset_scroll(self._ele, offset_x, offset_y) + self._click(x, y, button, count) + + def multiple(self, times=2): + """多次点击 + :param times: 默认双击 + :return: None + """ + self.at(count=times) + + def _click(self, client_x, client_y, button='left', count=1): + """实施点击 + :param client_x: 视口中的x坐标 + :param client_y: 视口中的y坐标 + :param button: 'left' 'right' 'middle' 'back' 'forward' + :param count: 点击次数 + :return: None + """ + self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mousePressed', x=client_x, + y=client_y, button=button, clickCount=count, _ignore=AlertExistsError) + # sleep(.05) + self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mouseReleased', x=client_x, + y=client_y, button=button, _ignore=AlertExistsError) + + # -------------即将废弃-------------- + + def twice(self): + """双击元素""" + self.at(count=2) diff --git a/DrissionPage/_units/clicker.pyi b/DrissionPage/_units/clicker.pyi new file mode 100644 index 0000000..895f762 --- /dev/null +++ b/DrissionPage/_units/clicker.pyi @@ -0,0 +1,29 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, Optional + +from .._elements.chromium_element import ChromiumElement + + +class Clicker(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + def __call__(self, by_js: Optional[bool] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ... + + def left(self, by_js: Optional[bool] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ... + + def right(self) -> None: ... + + def middle(self) -> None: ... + + def at(self, offset_x: float = None, offset_y: float = None, button: str = 'left', count: int = 1) -> None: ... + + def multiple(self, times: int = 2) -> None: ... + + def _click(self, client_x: float, client_y: float, button: str = 'left', count: int = 1) -> None: ... diff --git a/DrissionPage/_units/cookies_setter.py b/DrissionPage/_units/cookies_setter.py new file mode 100644 index 0000000..63bcc2f --- /dev/null +++ b/DrissionPage/_units/cookies_setter.py @@ -0,0 +1,105 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from http.cookiejar import Cookie + +from .._functions.web import set_browser_cookies, set_session_cookies + + +class CookiesSetter(object): + def __init__(self, page): + self._page = page + + def __call__(self, cookies): + """设置一个或多个cookie + :param cookies: cookies信息 + :return: None + """ + if (isinstance(cookies, dict) and 'name' in cookies and 'value' in cookies) or isinstance(cookies, Cookie): + cookies = [cookies] + set_browser_cookies(self._page, cookies) + + def remove(self, name, url=None, domain=None, path=None): + """删除一个cookie + :param name: cookie的name字段 + :param url: cookie的url字段,可选 + :param domain: cookie的domain字段,可选 + :param path: cookie的path字段,可选 + :return: None + """ + d = {'name': name} + if url is not None: + d['url'] = url + if domain is not None: + d['domain'] = domain + if path is not None: + d['path'] = path + self._page.run_cdp('Network.deleteCookies', **d) + + def clear(self): + """清除cookies""" + self._page.run_cdp('Network.clearBrowserCookies') + + +class SessionCookiesSetter(object): + def __init__(self, page): + self._page = page + + def __call__(self, cookies): + """设置多个cookie,注意不要传入单个 + :param cookies: cookies信息 + :return: None + """ + if (isinstance(cookies, dict) and 'name' in cookies and 'value' in cookies) or isinstance(cookies, Cookie): + cookies = [cookies] + set_session_cookies(self._page.session, cookies) + + def remove(self, name): + """删除一个cookie + :param name: cookie的name字段 + :return: None + """ + self._page.session.cookies.set(name, None) + + def clear(self): + """清除cookies""" + self._page.session.cookies.clear() + + +class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter): + + def __call__(self, cookies): + """设置多个cookie,注意不要传入单个 + :param cookies: cookies信息 + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + super().__call__(cookies) + elif self._page.mode == 's' and self._page._has_session: + super(CookiesSetter, self).__call__(cookies) + + def remove(self, name, url=None, domain=None, path=None): + """删除一个cookie + :param name: cookie的name字段 + :param url: cookie的url字段,可选,d模式时才有效 + :param domain: cookie的domain字段,可选,d模式时才有效 + :param path: cookie的path字段,可选,d模式时才有效 + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + super().remove(name, url, domain, path) + elif self._page.mode == 's' and self._page._has_session: + if url or domain or path: + raise AttributeError('url、domain、path参数只有d模式下有效。') + super(CookiesSetter, self).remove(name) + + def clear(self): + """清除cookies""" + if self._page.mode == 'd' and self._page._has_driver: + super().clear() + elif self._page.mode == 's' and self._page._has_session: + super(CookiesSetter, self).clear() diff --git a/DrissionPage/_units/cookies_setter.pyi b/DrissionPage/_units/cookies_setter.pyi new file mode 100644 index 0000000..3c6f37a --- /dev/null +++ b/DrissionPage/_units/cookies_setter.pyi @@ -0,0 +1,52 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from http.cookiejar import Cookie +from typing import Union + +from requests.cookies import RequestsCookieJar + +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_tab import WebPageTab +from .._pages.session_page import SessionPage +from .._pages.web_page import WebPage + + +class CookiesSetter(object): + _page: ChromiumBase + + def __init__(self, page: ChromiumBase): ... + + def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ... + + def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ... + + def clear(self) -> None: ... + + +class SessionCookiesSetter(object): + _page: SessionPage + + def __init__(self, page: SessionPage): ... + + def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ... + + def remove(self, name: str) -> None: ... + + def clear(self) -> None: ... + + +class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter): + _page: Union[WebPage, WebPageTab] + + def __init__(self, page: SessionPage): ... + + def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ... + + def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ... + + def clear(self) -> None: ... diff --git a/DrissionPage/_units/downloader.py b/DrissionPage/_units/downloader.py new file mode 100644 index 0000000..2d0cdb4 --- /dev/null +++ b/DrissionPage/_units/downloader.py @@ -0,0 +1,330 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from os.path import sep +from pathlib import Path +from shutil import move +from time import sleep, perf_counter + +from DataRecorder.tools import get_usable_path + + +class DownloadManager(object): + + def __init__(self, browser): + """ + :param browser: Browser对象 + """ + self._browser = browser + self._page = browser.page + self._when_download_file_exists = 'rename' + + t = TabDownloadSettings(self._page.tab_id) + t.path = self._page.download_path + self._missions = {} # {guid: DownloadMission} + self._tab_missions = {} # {tab_id: DownloadMission} + self._flags = {} # {tab_id: [bool, DownloadMission]} + + if self._page.download_path: + self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress) + self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin) + r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=self._page.download_path, + behavior='allowAndName', eventsEnabled=True) + if 'error' in r: + print('浏览器版本太低无法使用下载管理功能。') + self._running = True + + else: + self._running = False + + @property + def missions(self): + """返回所有未完成的下载任务""" + return self._missions + + def set_path(self, tab_id, path): + """设置某个tab的下载路径 + :param tab_id: tab id + :param path: 下载路径(绝对路径str) + :return: None + """ + TabDownloadSettings(tab_id).path = path + if tab_id == self._page.tab_id or not self._running: + self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress) + self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin) + r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=path, + behavior='allowAndName', eventsEnabled=True) + if 'error' in r: + print('浏览器版本太低无法使用下载管理功能。') + self._running = True + + def set_rename(self, tab_id, rename=None, suffix=None): + """设置某个tab的重命名文件名 + :param tab_id: tab id + :param rename: 文件名,可不含后缀,会自动使用远程文件后缀 + :param suffix: 后缀名,显式设置后缀名,不使用远程文件后缀 + :return: None + """ + ts = TabDownloadSettings(tab_id) + ts.rename = rename + ts.suffix = suffix + + def set_file_exists(self, tab_id, mode): + """设置某个tab下载文件重名时执行的策略 + :param tab_id: tab id + :param mode: 下载路径 + :return: None + """ + TabDownloadSettings(tab_id).when_file_exists = mode + + def set_flag(self, tab_id, flag): + """设置某个tab的重命名文件名 + :param tab_id: tab id + :param flag: 等待标志 + :return: None + """ + self._flags[tab_id] = flag + + def get_flag(self, tab_id): + """获取tab下载等待标记 + :param tab_id: tab id + :return: 任务对象或False + """ + return self._flags.get(tab_id, None) + + def get_tab_missions(self, tab_id): + """获取某个tab正在下载的任务 + :param tab_id: + :return: 下载任务组成的列表 + """ + return self._tab_missions.get(tab_id, []) + + def set_done(self, mission, state, final_path=None): + """设置任务结束 + :param mission: 任务对象 + :param state: 任务状态 + :param final_path: 最终路径 + :return: None + """ + if mission.state not in ('canceled', 'skipped'): + mission.state = state + mission.final_path = final_path + if mission.tab_id in self._tab_missions and mission.id in self._tab_missions[mission.tab_id]: + self._tab_missions[mission.tab_id].remove(mission.id) + self._missions.pop(mission.id, None) + mission._is_done = True + + def cancel(self, mission): + """取消任务 + :param mission: 任务对象 + :return: None + """ + mission.state = 'canceled' + self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + if mission.final_path: + Path(mission.final_path).unlink(True) + + def skip(self, mission): + """跳过任务 + :param mission: 任务对象 + :return: None + """ + mission.state = 'skipped' + self._browser.run_cdp('Browser.cancelDownload', guid=mission.id) + + def clear_tab_info(self, tab_id): + """当tab关闭时清除有关信息 + :param tab_id: 标签页id + :return: None + """ + self._tab_missions.pop(tab_id, None) + self._flags.pop(tab_id, None) + TabDownloadSettings.TABS.pop(tab_id, None) + + def _onDownloadWillBegin(self, **kwargs): + """用于获取弹出新标签页触发的下载任务""" + guid = kwargs['guid'] + tab_id = self._browser._frames.get(kwargs['frameId'], self._page.tab_id) + + settings = TabDownloadSettings(tab_id if tab_id in TabDownloadSettings.TABS else self._page.tab_id) + if settings.rename: + if settings.suffix is not None: + name = f'{settings.rename}.{settings.suffix}' if settings.suffix else settings.rename + + else: + tmp = kwargs['suggestedFilename'].rsplit('.', 1) + ext_name = tmp[-1] if len(tmp) > 1 else '' + tmp = settings.rename.rsplit('.', 1) + ext_rename = tmp[-1] if len(tmp) > 1 else '' + name = settings.rename if ext_rename == ext_name else f'{settings.rename}.{ext_name}' + + settings.rename = None + settings.suffix = None + + elif settings.suffix is not None: + name = kwargs["suggestedFilename"].rsplit(".", 1)[0] + if settings.suffix: + name = f'{name}.{settings.suffix}' + settings.suffix = None + + else: + name = kwargs['suggestedFilename'] + + skip = False + goal_path = Path(settings.path) / name + if goal_path.exists(): + if settings.when_file_exists == 'skip': + skip = True + elif settings.when_file_exists == 'overwrite': + goal_path.unlink() + + m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._page.download_path) + self._missions[guid] = m + + if self.get_flag(tab_id) is False: # 取消该任务 + self.cancel(m) + elif skip: + self.skip(m) + else: + self._tab_missions.setdefault(tab_id, []).append(guid) + + if self.get_flag(tab_id) is not None: + self._flags[tab_id] = m + + def _onDownloadProgress(self, **kwargs): + """下载状态变化时执行""" + if kwargs['guid'] in self._missions: + mission = self._missions[kwargs['guid']] + if kwargs['state'] == 'inProgress': + mission.received_bytes = kwargs['receivedBytes'] + mission.total_bytes = kwargs['totalBytes'] + + elif kwargs['state'] == 'completed': + if mission.state == 'skipped': + Path(f'{mission.save_path}{sep}{mission.id}').unlink(True) + self.set_done(mission, 'skipped') + return + mission.received_bytes = kwargs['receivedBytes'] + mission.total_bytes = kwargs['totalBytes'] + form_path = f'{mission.path}{sep}{mission.id}' + to_path = str(get_usable_path(f'{mission.path}{sep}{mission.name}')) + move(form_path, to_path) + self.set_done(mission, 'completed', final_path=to_path) + + else: # 'canceled' + self.set_done(mission, 'canceled') + + +class TabDownloadSettings(object): + TABS = {} + + def __new__(cls, tab_id): + """ + :param tab_id: tab id + """ + if tab_id in cls.TABS: + return cls.TABS[tab_id] + return object.__new__(cls) + + def __init__(self, tab_id): + """ + :param tab_id: tab id + """ + if hasattr(self, '_created'): + return + self._created = True + self.tab_id = tab_id + self.rename = None + self.suffix = None + self.path = '' + self.when_file_exists = 'rename' + + TabDownloadSettings.TABS[tab_id] = self + + +class DownloadMission(object): + def __init__(self, mgr, tab_id, _id, path, name, url, save_path): + """ + :param mgr: BrowserDownloadManager对象 + :param tab_id: 标签页id + :param _id: 任务id + :param path: 保存路径 + :param name: 文件名 + :param url: url + :param save_path: 下载路径 + """ + self._mgr = mgr + self.url = url + self.tab_id = tab_id + self.id = _id + self.path = path + self.name = name + self.state = 'running' + self.total_bytes = None + self.received_bytes = 0 + self.final_path = None + self.save_path = save_path + self._is_done = False + + def __repr__(self): + return f'<DownloadMission {id(self)} {self.rate}>' + + @property + def rate(self): + """以百分比形式返回下载进度""" + return round((self.received_bytes / self.total_bytes) * 100, 2) if self.total_bytes else None + + @property + def is_done(self): + """返回任务是否在运行中""" + return self._is_done + + def cancel(self): + """取消该任务,如任务已完成,删除已下载的文件""" + self._mgr.cancel(self) + + def wait(self, show=True, timeout=None, cancel_if_timeout=True): + """等待任务结束 + :param show: 是否显示下载信息 + :param timeout: 超时时间,为None则无限等待 + :param cancel_if_timeout: 超时时是否取消任务 + :return: 等待成功返回完整路径,否则返回False + """ + if show: + print(f'url:{self.url}') + end_time = perf_counter() + while self.name is None and perf_counter() < end_time: + sleep(0.01) + print(f'文件名:{self.name}') + print(f'目标路径:{self.path}') + + if timeout is None: + while not self.is_done: + if show: + print(f'\r{self.rate}% ', end='') + sleep(.2) + + else: + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if show: + print(f'\r{self.rate}% ', end='') + sleep(.2) + + if not self.is_done and cancel_if_timeout: + self.cancel() + + if show: + if self.state == 'completed': + print(f'下载完成 {self.final_path}') + elif self.state == 'canceled': + print(f'下载取消') + elif self.state == 'skipped': + print(f'已跳过') + print() + + return self.final_path if self.final_path else False diff --git a/DrissionPage/_units/downloader.pyi b/DrissionPage/_units/downloader.pyi new file mode 100644 index 0000000..37d8ae7 --- /dev/null +++ b/DrissionPage/_units/downloader.pyi @@ -0,0 +1,89 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Dict, Optional, Union, Literal + +from .._base.browser import Browser +from .._pages.chromium_page import ChromiumPage + + +class DownloadManager(object): + _browser: Browser = ... + _page: ChromiumPage = ... + _missions: Dict[str, DownloadMission] = ... + _tab_missions: dict = ... + _flags: dict = ... + _running: bool = ... + + def __init__(self, browser: Browser): ... + + @property + def missions(self) -> Dict[str, DownloadMission]: ... + + def set_path(self, tab_id: str, path: str) -> None: ... + + def set_rename(self, tab_id: str, rename: str = None, suffix: str = None) -> None: ... + + def set_file_exists(self, tab_id: str, mode: Literal['rename', 'skip', 'overwrite']) -> None: ... + + def set_flag(self, tab_id: str, flag: Optional[bool, DownloadMission]) -> None: ... + + def get_flag(self, tab_id: str) -> Optional[bool, DownloadMission]: ... + + def get_tab_missions(self, tab_id: str) -> list: ... + + def set_done(self, mission: DownloadMission, state: str, final_path: str = None) -> None: ... + + def cancel(self, mission: DownloadMission) -> None: ... + + def skip(self, mission: DownloadMission) -> None: ... + + def clear_tab_info(self, tab_id: str) -> None: ... + + def _onDownloadWillBegin(self, **kwargs) -> None: ... + + def _onDownloadProgress(self, **kwargs) -> None: ... + + +class TabDownloadSettings(object): + TABS: dict = ... + tab_id: str = ... + waiting_flag: Optional[bool, dict] = ... + rename: Optional[str] = ... + suffix: Optional[str] = ... + path: Optional[str] = ... + when_file_exists: str = ... + + def __init__(self, tab_id: str): ... + + +class DownloadMission(object): + tab_id: str = ... + _mgr: DownloadManager = ... + url: str = ... + id: str = ... + path: str = ... + name: str = ... + state: str = ... + total_bytes: Optional[int] = ... + received_bytes: int = ... + final_path: Optional[str] = ... + save_path: str = ... + _is_done: bool = ... + + def __init__(self, mgr: DownloadManager, tab_id: str, _id: str, path: str, name: str, url: str, + save_path: str): ... + + @property + def rate(self) -> float: ... + + @property + def is_done(self) -> bool: ... + + def cancel(self) -> None: ... + + def wait(self, show: bool = True, timeout=None, cancel_if_timeout=True) -> Union[bool, str]: ... diff --git a/DrissionPage/_units/listener.py b/DrissionPage/_units/listener.py new file mode 100644 index 0000000..9063b53 --- /dev/null +++ b/DrissionPage/_units/listener.py @@ -0,0 +1,595 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from base64 import b64decode +from json import JSONDecodeError, loads +from queue import Queue +from re import search +from time import perf_counter, sleep + +from requests.structures import CaseInsensitiveDict + +from .._base.driver import Driver +from .._functions.settings import Settings +from ..errors import WaitTimeoutError + + +class Listener(object): + """监听器基类""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + self._address = page.address + self._target_id = page._target_id + self._driver = None + self._running_requests = 0 + + self._caught = None + self._request_ids = None + self._extra_info_ids = None + + self.listening = False + self.tab_id = None + + self._targets = True + self._is_regex = False + self._method = ('GET', 'POST') + self._res_type = True + + @property + def targets(self): + """返回监听目标""" + return self._targets + + def set_targets(self, targets=True, is_regex=False, method=('GET', 'POST'), res_type=True): + """指定要等待的数据包 + :param targets: 要匹配的数据包url特征,可用list等传入多个,为True时获取所有 + :param is_regex: 设置的target是否正则表达式 + :param method: 设置监听的请求类型,可指定多个,为True时监听全部 + :param res_type: 设置监听的资源类型,可指定多个,为True时监听全部,可指定的值有: + Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket, + Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other + :return: None + """ + if targets is not None: + if not isinstance(targets, (str, list, tuple, set)) and targets is not True: + raise TypeError('targets只能是str、list、tuple、set、True。') + if targets is True: + self._targets = True + else: + self._targets = {targets} if isinstance(targets, str) else set(targets) + + if is_regex is not None: + self._is_regex = is_regex + + if method is not None: + if isinstance(method, str): + self._method = {method.upper()} + elif isinstance(method, (list, tuple, set)): + self._method = set(i.upper() for i in method) + elif method is True: + self._method = True + else: + raise TypeError('method参数只能是str、list、tuple、set、True类型。') + + if res_type is not None: + if isinstance(res_type, str): + self._res_type = {res_type.upper()} + elif isinstance(res_type, (list, tuple, set)): + self._res_type = set(i.upper() for i in res_type) + elif res_type is True: + self._res_type = True + else: + raise TypeError('res_type参数只能是str、list、tuple、set、True类型。') + + def start(self, targets=None, is_regex=None, method=None, res_type=None): + """拦截目标请求,每次拦截前清空结果 + :param targets: 要匹配的数据包url特征,可用list等传入多个,为True时获取所有 + :param is_regex: 设置的target是否正则表达式,为None时保持原来设置 + :param method: 设置监听的请求类型,可指定多个,默认('GET', 'POST'),为True时监听全部,为None时保持原来设置 + :param res_type: 设置监听的资源类型,可指定多个,默认为True时监听全部,为None时保持原来设置,可指定的值有: + Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket, + Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other + :return: None + """ + if targets or is_regex is not None or method or res_type: + self.set_targets(targets, is_regex, method, res_type) + self.clear() + + if self.listening: + return + + self._driver = Driver(self._target_id, 'page', self._address) + self._driver.run('Network.enable') + + self._set_callback() + self.listening = True + + def wait(self, count=1, timeout=None, fit_count=True, raise_err=None): + """等待符合要求的数据包到达指定数量 + :param count: 需要捕捉的数据包数量 + :param timeout: 超时时间,为None无限等待 + :param fit_count: 是否必须满足总数要求,发生超时,为True返回False,为False返回已捕捉到的数据包 + :param raise_err: 超时时是否抛出错误,为None时根据Settings设置 + :return: count为1时返回数据包对象,大于1时返回列表,超时且fit_count为True时返回False + """ + if not self.listening: + raise RuntimeError('监听未启动或已暂停。') + if not timeout: + while self._caught.qsize() < count: + sleep(.05) + fail = False + + else: + end = perf_counter() + timeout + while True: + if perf_counter() > end: + fail = True + break + if self._caught.qsize() >= count: + fail = False + break + + if fail: + if fit_count or not self._caught.qsize(): + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待数据包失败(等待{timeout}秒)。') + else: + return False + else: + return [self._caught.get_nowait() for _ in range(self._caught.qsize())] + + if count == 1: + return self._caught.get_nowait() + + return [self._caught.get_nowait() for _ in range(count)] + + def steps(self, count=None, timeout=None, gap=1): + """用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页) + :param count: 需捕获的数据包总数,为None表示无限 + :param timeout: 每个数据包等待时间,为None表示无限 + :param gap: 每接收到多少个数据包返回一次数据 + :return: 用于在接收到监听目标时触发动作的可迭代对象 + """ + caught = 0 + end = perf_counter() + timeout if timeout else None + while True: + if timeout and perf_counter() > end: + return + if self._caught.qsize() >= gap: + yield self._caught.get_nowait() if gap == 1 else [self._caught.get_nowait() for _ in range(gap)] + if timeout: + end = perf_counter() + timeout + if count: + caught += gap + if caught >= count: + return + sleep(.05) + + def stop(self): + """停止监听,清空已监听到的列表""" + if self.listening: + self.pause() + self.clear() + self._driver.stop() + self._driver = None + + def pause(self, clear=True): + """暂停监听 + :param clear: 是否清空已获取队列 + :return: None + """ + if self.listening: + self._driver.set_callback('Network.requestWillBeSent', None) + self._driver.set_callback('Network.responseReceived', None) + self._driver.set_callback('Network.loadingFinished', None) + self._driver.set_callback('Network.loadingFailed', None) + self.listening = False + if clear: + self.clear() + + def resume(self): + """继续暂停的监听""" + if self.listening: + return + self._set_callback() + self.listening = True + + def clear(self): + """清空结果""" + self._request_ids = {} + self._extra_info_ids = {} + self._caught = Queue(maxsize=0) + self._running_requests = 0 + + def wait_silent(self, timeout=None): + """等待所有请求结束 + :param timeout: 超时,为None时无限等待 + :return: 返回是否等待成功 + """ + if not self.listening: + raise RuntimeError('监听未启动,用listen.start()启动。') + if timeout is None: + while self._running_requests > 0: + sleep(.1) + return True + + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._running_requests <= 0: + return True + sleep(.1) + else: + return False + + def _to_target(self, target_id, address, page): + """切换监听的页面对象 + :param target_id: 新页面对象_target_id + :param address: 新页面对象address + :param page: 新页面对象 + :return: None + """ + self._target_id = target_id + self._address = address + self._page = page + debug = False + if self._driver: + debug = self._driver._debug + self._driver.stop() + if self.listening: + self._driver = Driver(self._target_id, 'page', self._address) + self._driver._debug = debug + self._driver.run('Network.enable') + self._set_callback() + + def _set_callback(self): + """设置监听请求的回调函数""" + self._driver.set_callback('Network.requestWillBeSent', self._requestWillBeSent) + self._driver.set_callback('Network.requestWillBeSentExtraInfo', self._requestWillBeSentExtraInfo) + self._driver.set_callback('Network.responseReceived', self._response_received) + self._driver.set_callback('Network.responseReceivedExtraInfo', self._responseReceivedExtraInfo) + self._driver.set_callback('Network.loadingFinished', self._loading_finished) + self._driver.set_callback('Network.loadingFailed', self._loading_failed) + + def _requestWillBeSent(self, **kwargs): + """接收到请求时的回调函数""" + self._running_requests += 1 + p = None + if self._targets is True: + if ((self._method is True or kwargs['request']['method'] in self._method) + and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)): + rid = kwargs['requestId'] + p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, True)) + p._raw_request = kwargs + if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): + p._raw_post_data = self._driver.run('Network.getRequestPostData', + requestId=rid).get('postData', None) + + else: + rid = kwargs['requestId'] + for target in self._targets: + if (((self._is_regex and search(target, kwargs['request']['url'])) + or (not self._is_regex and target in kwargs['request']['url'])) + and (self._method is True or kwargs['request']['method'] in self._method) + and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)): + p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, target)) + p._raw_request = kwargs + break + + self._extra_info_ids.setdefault(kwargs['requestId'], {})['obj'] = p if p else False + + def _requestWillBeSentExtraInfo(self, **kwargs): + """接收到请求额外信息时的回调函数""" + self._running_requests += 1 + self._extra_info_ids.setdefault(kwargs['requestId'], {})['request'] = kwargs + + def _response_received(self, **kwargs): + """接收到返回信息时处理方法""" + request = self._request_ids.get(kwargs['requestId'], None) + if request: + request._raw_response = kwargs['response'] + request._resource_type = kwargs['type'] + + def _responseReceivedExtraInfo(self, **kwargs): + """接收到返回额外信息时的回调函数""" + self._running_requests -= 1 + r = self._extra_info_ids.get(kwargs['requestId'], None) + if r: + obj = r.get('obj', None) + if obj is False: + self._extra_info_ids.pop(kwargs['requestId'], None) + elif isinstance(obj, DataPacket): + obj._requestExtraInfo = r.get('request', None) + obj._responseExtraInfo = kwargs + self._extra_info_ids.pop(kwargs['requestId'], None) + else: + r['response'] = kwargs + + def _loading_finished(self, **kwargs): + """请求完成时处理方法""" + self._running_requests -= 1 + rid = kwargs['requestId'] + packet = self._request_ids.get(rid) + if packet: + r = self._driver.run('Network.getResponseBody', requestId=rid) + if 'body' in r: + packet._raw_body = r['body'] + packet._base64_body = r['base64Encoded'] + else: + packet._raw_body = '' + packet._base64_body = False + + if (packet._raw_request['request'].get('hasPostData', None) + and not packet._raw_request['request'].get('postData', None)): + r = self._driver.run('Network.getRequestPostData', requestId=rid, _timeout=1) + packet._raw_post_data = r.get('postData', None) + + r = self._extra_info_ids.get(kwargs['requestId'], None) + if r: + obj = r.get('obj', None) + if obj is False or (isinstance(obj, DataPacket) and not self._extra_info_ids.get('request')): + self._extra_info_ids.pop(kwargs['requestId'], None) + elif isinstance(obj, DataPacket) and self._extra_info_ids.get('response'): + response = r.get('response') + obj._requestExtraInfo = r['request'] + obj._responseExtraInfo = response + self._extra_info_ids.pop(kwargs['requestId'], None) + + self._request_ids.pop(rid, None) + + if packet: + self._caught.put(packet) + + def _loading_failed(self, **kwargs): + """请求失败时的回调方法""" + self._running_requests -= 1 + r_id = kwargs['requestId'] + dp = self._request_ids.get(r_id, None) + if dp: + dp._raw_fail_info = kwargs + dp._resource_type = kwargs['type'] + dp.is_failed = True + + r = self._extra_info_ids.get(kwargs['requestId'], None) + if r: + obj = r.get('obj', None) + if obj is False and r.get('response'): + self._extra_info_ids.pop(kwargs['requestId'], None) + elif isinstance(obj, DataPacket): + response = r.get('response') + if response: + obj._requestExtraInfo = r['request'] + obj._responseExtraInfo = response + self._extra_info_ids.pop(kwargs['requestId'], None) + + self._request_ids.pop(r_id, None) + + if dp: + self._caught.put(dp) + + +class FrameListener(Listener): + def _requestWillBeSent(self, **kwargs): + """接收到请求时的回调函数""" + if not self._page._is_diff_domain and kwargs.get('frameId', None) != self._page._frame_id: + return + super()._requestWillBeSent(**kwargs) + + def _response_received(self, **kwargs): + """接收到返回信息时处理方法""" + if not self._page._is_diff_domain and kwargs.get('frameId', None) != self._page._frame_id: + return + super()._response_received(**kwargs) + + +class DataPacket(object): + """返回的数据包管理类""" + + def __init__(self, tab_id, target): + """ + :param tab_id: 产生这个数据包的tab的id + :param target: 监听目标 + """ + self.tab_id = tab_id + self.target = target + self.is_failed = False + + self._raw_request = None + self._raw_post_data = None + self._raw_response = None + self._raw_body = None + self._raw_fail_info = None + + self._request = None + self._response = None + self._fail_info = None + + self._base64_body = False + self._requestExtraInfo = None + self._responseExtraInfo = None + self._resource_type = None + + def __repr__(self): + t = f'"{self.target}"' if self.target is not True else True + return f'<DataPacket target={t} url="{self.url}">' + + @property + def _request_extra_info(self): + return self._requestExtraInfo + + @property + def _response_extra_info(self): + return self._responseExtraInfo + + @property + def url(self): + return self.request.url + + @property + def method(self): + return self.request.method + + @property + def frameId(self): + return self._raw_request.get('frameId') + + @property + def resourceType(self): + return self._resource_type + + @property + def request(self): + if self._request is None: + self._request = Request(self, self._raw_request['request'], self._raw_post_data) + return self._request + + @property + def response(self): + if self._response is None: + self._response = Response(self, self._raw_response, self._raw_body, self._base64_body) + return self._response + + @property + def fail_info(self): + if self._fail_info is None: + self._fail_info = FailInfo(self, self._raw_fail_info) + return self._fail_info + + def wait_extra_info(self, timeout=None): + """等待额外的信息加载完成 + :param timeout: 超时时间,None为无限等待 + :return: 是否等待成功 + """ + if timeout is None: + while self._responseExtraInfo is None: + sleep(.1) + return True + + else: + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._responseExtraInfo is not None: + return True + sleep(.1) + else: + return False + + +class Request(object): + def __init__(self, data_packet, raw_request, post_data): + self._data_packet = data_packet + self._request = raw_request + self._raw_post_data = post_data + self._postData = None + self._headers = None + + def __getattr__(self, item): + return self._request.get(item, None) + + @property + def headers(self): + """以大小写不敏感字典返回headers数据""" + if self._headers is None: + self._headers = CaseInsensitiveDict(self._request['headers']) + return self._headers + + @property + def postData(self): + """返回postData数据""" + if self._postData is None: + if self._raw_post_data: + postData = self._raw_post_data + elif self._request.get('postData', None): + postData = self._request['postData'] + else: + postData = False + try: + self._postData = loads(postData) + except (JSONDecodeError, TypeError): + self._postData = postData + return self._postData + + @property + def extra_info(self): + return RequestExtraInfo(self._data_packet._request_extra_info or {}) + + +class Response(object): + def __init__(self, data_packet, raw_response, raw_body, base64_body): + self._data_packet = data_packet + self._response = raw_response + self._raw_body = raw_body + self._is_base64_body = base64_body + self._body = None + self._headers = None + + def __getattr__(self, item): + return self._response.get(item, None) if self._response else None + + @property + def headers(self): + """以大小写不敏感字典返回headers数据""" + if self._headers is None: + self._headers = CaseInsensitiveDict(self._response['headers']) + return self._headers + + @property + def raw_body(self): + """返回未被处理的body文本""" + return self._raw_body + + @property + def body(self): + """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" + if self._body is None: + if self._is_base64_body: + self._body = b64decode(self._raw_body) + + else: + try: + self._body = loads(self._raw_body) + except (JSONDecodeError, TypeError): + self._body = self._raw_body + + return self._body + + @property + def extra_info(self): + return ResponseExtraInfo(self._data_packet._response_extra_info or {}) + + +class ExtraInfo(object): + def __init__(self, extra_info): + self._extra_info = extra_info + + @property + def all_info(self): + """以dict形式返回所有额外信息""" + return self._extra_info + + def __getattr__(self, item): + return self._extra_info.get(item, None) + + +class RequestExtraInfo(ExtraInfo): + pass + + +class ResponseExtraInfo(ExtraInfo): + pass + + +class FailInfo(object): + def __init__(self, data_packet, fail_info): + self._data_packet = data_packet + self._fail_info = fail_info + + def __getattr__(self, item): + return self._fail_info.get(item, None) if self._fail_info else None diff --git a/DrissionPage/_units/listener.pyi b/DrissionPage/_units/listener.pyi new file mode 100644 index 0000000..9daef5d --- /dev/null +++ b/DrissionPage/_units/listener.pyi @@ -0,0 +1,263 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from queue import Queue +from typing import Union, Dict, List, Iterable, Optional, Literal + +from requests.structures import CaseInsensitiveDict + +from .._base.driver import Driver +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame + +__RES_TYPE__ = Literal['Document', 'Stylesheet', 'Image', 'Media', 'Font', 'Script', 'TextTrack', 'XHR', 'Fetch', +'Prefetch', 'EventSource', 'WebSocket', 'Manifest', 'SignedExchange', 'Ping', 'CSPViolationReport', 'Preflight', 'Other'] + + +class Listener(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + self._address: str = ... + self._target_id: str = ... + self._targets: Union[str, dict] = ... + self._method: set = ... + self._res_type: set = ... + self._caught: Queue = ... + self._is_regex: bool = ... + self._driver: Driver = ... + self._request_ids: dict = ... + self._extra_info_ids: dict = ... + self.listening: bool = ... + self._running_requests: int = ... + + @property + def targets(self) -> Optional[set]: ... + + def set_targets(self, + targets: Optional[str, list, tuple, set, bool] = True, + is_regex: Optional[bool] = False, + method: Optional[str, list, tuple, set, bool] = ('GET', 'POST'), + res_type: Optional[__RES_TYPE__, list, tuple, set, bool] = True) -> None: ... + + def start(self, + targets: Optional[str, list, tuple, set, bool] = None, + is_regex: Optional[bool] = None, + method: Optional[str, list, tuple, set, bool] = None, + res_type: Optional[__RES_TYPE__, list, tuple, set, bool] = None) -> None: ... + + def stop(self) -> None: ... + + def pause(self, clear: bool = True) -> None: ... + + def resume(self) -> None: ... + + def wait(self, + count: int = 1, + timeout: float = None, + fit_count: bool = True, + raise_err: bool = None) -> Union[List[DataPacket], DataPacket, None]: ... + + @property + def results(self) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... + + def clear(self) -> None: ... + + def wait_silent(self, timeout=None) -> bool: ... + + def _to_target(self, target_id: str, address: str, page: ChromiumBase) -> None: ... + + def _requestWillBeSent(self, **kwargs) -> None: ... + + def _requestWillBeSentExtraInfo(self, **kwargs) -> None: ... + + def _response_received(self, **kwargs) -> None: ... + + def _responseReceivedExtraInfo(self, **kwargs) -> None: ... + + def _loading_finished(self, **kwargs) -> None: ... + + def _loading_failed(self, **kwargs) -> None: ... + + def steps(self, + count: int = None, + timeout: float = None, + gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]: ... + + def _set_callback(self) -> None: ... + + +class FrameListener(Listener): + def __init__(self, page: ChromiumFrame): + self._page: ChromiumFrame = ... + self._is_diff: bool = ... + + +class DataPacket(object): + """返回的数据包管理类""" + + def __init__(self, tab_id: str, target: [str, bool]): + self.tab_id: str = ... + self.target: str = ... + self.is_failed: bool = ... + self._raw_request: Optional[dict] = ... + self._raw_response: Optional[dict] = ... + self._raw_post_data: str = ... + self._raw_body: str = ... + self._raw_fail_info: Optional[dict] = ... + self._base64_body: bool = ... + self._request: Request = ... + self._response: Response = ... + self._fail_info: Optional[FailInfo] = ... + self._resource_type: str = ... + self._requestExtraInfo: Optional[dict] = ... + self._responseExtraInfo: Optional[dict] = ... + + @property + def _request_extra_info(self) -> Optional[dict]: ... + + @property + def _response_extra_info(self) -> Optional[dict]: ... + + @property + def url(self) -> str: ... + + @property + def method(self) -> str: ... + + @property + def frameId(self) -> str: ... + + @property + def resourceType(self) -> str: ... + + @property + def request(self) -> Request: ... + + @property + def response(self) -> Response: ... + + @property + def fail_info(self) -> Optional[FailInfo]: ... + + def wait_extra_info(self, timeout: float = None) -> bool: ... + + +class Request(object): + url: str = ... + _headers: Union[CaseInsensitiveDict, None] = ... + method: str = ... + + urlFragment = ... + hasPostData = ... + postDataEntries = ... + mixedContentType = ... + initialPriority = ... + referrerPolicy = ... + isLinkPreload = ... + trustTokenParams = ... + isSameSite = ... + + def __init__(self, data_packet: DataPacket, raw_request: dict, post_data: str): + self._data_packet: DataPacket = ... + self._request: dict = ... + self._raw_post_data: str = ... + self._postData: str = ... + + @property + def headers(self) -> dict: ... + + @property + def postData(self) -> Union[str, dict]: ... + + @property + def extra_info(self) -> Optional[RequestExtraInfo]: ... + + +class Response(object): + url = ... + status = ... + statusText = ... + headersText = ... + mimeType = ... + requestHeaders = ... + requestHeadersText = ... + connectionReused = ... + connectionId = ... + remoteIPAddress = ... + remotePort = ... + fromDiskCache = ... + fromServiceWorker = ... + fromPrefetchCache = ... + encodedDataLength = ... + timing = ... + serviceWorkerResponseSource = ... + responseTime = ... + cacheStorageCacheName = ... + protocol = ... + alternateProtocolUsage = ... + securityState = ... + securityDetails = ... + + def __init__(self, data_packet: DataPacket, raw_response: dict, raw_body: str, base64_body: bool): + self._data_packet: DataPacket = ... + self._response: dict = ... + self._raw_body: str = ... + self._is_base64_body: bool = ... + self._body: Union[str, dict] = ... + self._headers: dict = ... + + @property + def extra_info(self) -> Optional[ResponseExtraInfo]: ... + + @property + def headers(self) -> CaseInsensitiveDict: ... + + @property + def raw_body(self) -> str: ... + + @property + def body(self) -> Union[str, dict]: ... + + +class ExtraInfo(object): + def __init__(self, extra_info: dict): + self._extra_info: dict = ... + + @property + def all_info(self) -> dict: ... + + +class RequestExtraInfo(ExtraInfo): + requestId: str = ... + associatedCookies: List[dict] = ... + headers: dict = ... + connectTiming: dict = ... + clientSecurityState: dict = ... + siteHasCookieInOtherPartition: bool = ... + + +class ResponseExtraInfo(ExtraInfo): + requestId: str = ... + blockedCookies: List[dict] = ... + headers: dict = ... + resourceIPAddressSpace: str = ... + statusCode: int = ... + headersText: str = ... + cookiePartitionKey: str = ... + cookiePartitionKeyOpaque: bool = ... + + +class FailInfo(object): + _data_packet: DataPacket + _fail_info: dict + _fail_info: float + errorText: str + canceled: bool + blockedReason: Optional[str] + corsErrorStatus: Optional[str] + + def __init__(self, data_packet: DataPacket, fail_info: dict): ... diff --git a/DrissionPage/_units/rect.py b/DrissionPage/_units/rect.py new file mode 100644 index 0000000..3011ec6 --- /dev/null +++ b/DrissionPage/_units/rect.py @@ -0,0 +1,227 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" + + +class ElementRect(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + @property + def corners(self): + """返回元素四个角坐标,顺序:坐上、右上、右下、左下,没有大小的元素抛出NoRectError""" + vr = self._get_viewport_rect('border') + r = self._ele.page.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport'] + sx = r['pageX'] + sy = r['pageY'] + return [(vr[0] + sx, vr[1] + sy), (vr[2] + sx, vr[3] + sy), (vr[4] + sx, vr[5] + sy), (vr[6] + sx, vr[7] + sy)] + + @property + def viewport_corners(self): + """返回元素四个角视口坐标,顺序:坐上、右上、右下、左下,没有大小的元素抛出NoRectError""" + r = self._get_viewport_rect('border') + return (r[0], r[1]), (r[2], r[3]), (r[4], r[5]), (r[6], r[7]) + + @property + def size(self): + """返回元素大小,格式(宽, 高)""" + border = self._ele.page.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id, + nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model']['border'] + return border[2] - border[0], border[5] - border[1] + + @property + def location(self): + """返回元素左上角的绝对坐标""" + cl = self.viewport_location + return self._get_page_coord(cl[0], cl[1]) + + @property + def midpoint(self): + """返回元素中间点的绝对坐标""" + cl = self.viewport_midpoint + return self._get_page_coord(cl[0], cl[1]) + + @property + def click_point(self): + """返回元素接受点击的点的绝对坐标""" + cl = self.viewport_click_point + return self._get_page_coord(cl[0], cl[1]) + + @property + def viewport_location(self): + """返回元素左上角在视口中的坐标""" + m = self._get_viewport_rect('border') + return m[0], m[1] + + @property + def viewport_midpoint(self): + """返回元素中间点在视口中的坐标""" + m = self._get_viewport_rect('border') + return m[0] + (m[2] - m[0]) // 2, m[3] + (m[5] - m[3]) // 2 + + @property + def viewport_click_point(self): + """返回元素接受点击的点视口坐标""" + m = self._get_viewport_rect('padding') + return self.viewport_midpoint[0], m[1] + 3 + + @property + def screen_location(self): + """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" + vx, vy = self._ele.page.rect.viewport_location + ex, ey = self.viewport_location + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return (vx + ex) * pr, (ey + vy) * pr + + @property + def screen_midpoint(self): + """返回元素中点在屏幕上坐标,左上角为(0, 0)""" + vx, vy = self._ele.page.rect.viewport_location + ex, ey = self.viewport_midpoint + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return (vx + ex) * pr, (ey + vy) * pr + + @property + def screen_click_point(self): + """返回元素中点在屏幕上坐标,左上角为(0, 0)""" + vx, vy = self._ele.page.rect.viewport_location + ex, ey = self.viewport_click_point + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return (vx + ex) * pr, (ey + vy) * pr + + def _get_viewport_rect(self, quad): + """按照类型返回在可视窗口中的范围 + :param quad: 方框类型,margin border padding + :return: 四个角坐标 + """ + return self._ele.page.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id, + nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model'][quad] + + def _get_page_coord(self, x, y): + """根据视口坐标获取绝对坐标""" + r = self._ele.page.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport'] + sx = r['pageX'] + sy = r['pageY'] + return x + sx, y + sy + + +class TabRect(object): + def __init__(self, page): + self._page = page + + @property + def window_state(self): + """返回窗口状态:normal、fullscreen、maximized、 minimized""" + return self._get_window_rect()['windowState'] + + @property + def window_location(self): + """返回窗口在屏幕上的坐标,左上角为(0, 0)""" + r = self._get_window_rect() + if r['windowState'] in ('maximized', 'fullscreen'): + return 0, 0 + return r['left'] + 7, r['top'] + + @property + def window_size(self): + """返回窗口大小""" + r = self._get_window_rect() + if r['windowState'] == 'fullscreen': + return r['width'], r['height'] + elif r['windowState'] == 'maximized': + return r['width'] - 16, r['height'] - 16 + else: + return r['width'] - 16, r['height'] - 7 + + @property + def page_location(self): + """返回页面左上角在屏幕中坐标,左上角为(0, 0)""" + w, h = self.viewport_location + r = self._get_page_rect()['layoutViewport'] + return w - r['pageX'], h - r['pageY'] + + @property + def viewport_location(self): + """返回视口在屏幕中坐标,左上角为(0, 0)""" + w_bl, h_bl = self.window_location + w_bs, h_bs = self.window_size + w_vs, h_vs = self.viewport_size_with_scrollbar + return w_bl + w_bs - w_vs, h_bl + h_bs - h_vs + + @property + def size(self): + """返回页面总宽高,格式:(宽, 高)""" + r = self._get_page_rect()['contentSize'] + return r['width'], r['height'] + + @property + def viewport_size(self): + """返回视口宽高,不包括滚动条,格式:(宽, 高)""" + r = self._get_page_rect()['visualViewport'] + return r['clientWidth'], r['clientHeight'] + + @property + def viewport_size_with_scrollbar(self): + """返回视口宽高,包括滚动条,格式:(宽, 高)""" + r = self._page.run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();') + w, h = r.split(' ') + return int(w), int(h) + + def _get_page_rect(self): + """获取页面范围信息""" + return self._page.run_cdp_loaded('Page.getLayoutMetrics') + + def _get_window_rect(self): + """获取窗口范围信息""" + return self._page.browser.get_window_bounds(self._page.tab_id) + + +class FrameRect(object): + """异域iframe使用""" + + def __init__(self, frame): + self._frame = frame + + @property + def location(self): + """返回iframe元素左上角的绝对坐标""" + return self._frame.frame_ele.rect.location + + @property + def viewport_location(self): + """返回元素在视口中坐标,左上角为(0, 0)""" + return self._frame.frame_ele.rect.viewport_location + + @property + def screen_location(self): + """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" + return self._frame.frame_ele.rect.screen_location + + @property + def size(self): + """返回frame内页面尺寸,格式:(宽, 高)""" + w = self._frame.doc_ele.run_js('return this.body.scrollWidth') + h = self._frame.doc_ele.run_js('return this.body.scrollHeight') + return w, h + + @property + def viewport_size(self): + """返回视口宽高,格式:(宽, 高)""" + return self._frame.frame_ele.rect.size + + @property + def corners(self): + """返回元素四个角坐标,顺序:坐上、右上、右下、左下""" + return self._frame.frame_ele.rect.corners + + @property + def viewport_corners(self): + """返回元素四个角视口坐标,顺序:坐上、右上、右下、左下""" + return self._frame.frame_ele.rect.viewport_corners diff --git a/DrissionPage/_units/rect.pyi b/DrissionPage/_units/rect.pyi new file mode 100644 index 0000000..2e442ce --- /dev/null +++ b/DrissionPage/_units/rect.pyi @@ -0,0 +1,120 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" + +from typing import Tuple, Union, List + +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame +from .._pages.chromium_page import ChromiumPage +from .._pages.chromium_tab import ChromiumTab, WebPageTab +from .._pages.web_page import WebPage + + +class ElementRect(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + @property + def size(self) -> Tuple[float, float]: ... + + @property + def location(self) -> Tuple[float, float]: ... + + @property + def midpoint(self) -> Tuple[float, float]: ... + + @property + def click_point(self) -> Tuple[float, float]: ... + + @property + def viewport_location(self) -> Tuple[float, float]: ... + + @property + def viewport_midpoint(self) -> Tuple[float, float]: ... + + @property + def viewport_click_point(self) -> Tuple[float, float]: ... + + @property + def screen_location(self) -> Tuple[float, float]: ... + + @property + def screen_midpoint(self) -> Tuple[float, float]: ... + + @property + def screen_click_point(self) -> Tuple[float, float]: ... + + @property + def corners(self) -> Tuple[Tuple[float, float], ...]: ... + + @property + def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ... + + def _get_viewport_rect(self, quad: str) -> Union[list, None]: ... + + def _get_page_coord(self, x: float, y: float) -> Tuple[float, float]: ... + + +class TabRect(object): + def __init__(self, page: ChromiumBase): + self._page: Union[ChromiumPage, ChromiumTab, WebPage, WebPageTab] = ... + + @property + def window_state(self) -> str: ... + + @property + def window_location(self) -> Tuple[int, int]: ... + + @property + def page_location(self) -> Tuple[int, int]: ... + + @property + def viewport_location(self) -> Tuple[int, int]: ... + + @property + def window_size(self) -> Tuple[int, int]: ... + + @property + def size(self) -> Tuple[int, int]: ... + + @property + def viewport_size(self) -> Tuple[int, int]: ... + + @property + def viewport_size_with_scrollbar(self) -> Tuple[int, int]: ... + + def _get_page_rect(self) -> dict: ... + + def _get_window_rect(self) -> dict: ... + + +class FrameRect(object): + def __init__(self, frame: ChromiumFrame): + self._frame: ChromiumFrame = ... + + @property + def location(self) -> Tuple[float, float]: ... + + @property + def viewport_location(self) -> Tuple[float, float]: ... + + @property + def screen_location(self) -> Tuple[float, float]: ... + + @property + def size(self) -> Tuple[float, float]: ... + + @property + def viewport_size(self) -> Tuple[float, float]: ... + + @property + def corners(self) -> Tuple[Tuple[float, float], ...]: ... + + @property + def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ... diff --git a/DrissionPage/_units/screencast.py b/DrissionPage/_units/screencast.py new file mode 100644 index 0000000..958dbfc --- /dev/null +++ b/DrissionPage/_units/screencast.py @@ -0,0 +1,192 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from base64 import b64decode +from os.path import sep +from pathlib import Path +from random import randint +from shutil import rmtree +from tempfile import gettempdir +from threading import Thread +from time import sleep, time + + +class Screencast(object): + def __init__(self, page): + self._page = page + self._path = None + self._tmp_path = None + self._running = False + self._enable = False + self._mode = 'video' + + @property + def set_mode(self): + """返回用于设置录屏幕式的对象""" + return ScreencastMode(self) + + def start(self, save_path=None): + """开始录屏 + :param save_path: 录屏保存位置 + :return: None + """ + self.set_save_path(save_path) + if self._path is None: + raise ValueError('save_path必须设置。') + + if self._mode in ('frugal_video', 'video'): + if self._page.browser.page._chromium_options.tmp_path: + self._tmp_path = Path( + self._page.browser.page._chromium_options.tmp_path) / f'screencast_tmp_{time()}_{randint(0, 100)}' + else: + self._tmp_path = Path(gettempdir()) / 'DrissionPage' / f'screencast_tmp_{time()}_{randint(0, 100)}' + self._tmp_path.mkdir(parents=True, exist_ok=True) + + if self._mode.startswith('frugal'): + self._page.driver.set_callback('Page.screencastFrame', self._onScreencastFrame) + self._page.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100) + + elif not self._mode.startswith('js'): + self._running = True + self._enable = True + Thread(target=self._run).start() + + else: # js模式 + js = ''' + async function () { + stream = await navigator.mediaDevices.getDisplayMedia({video: true, audio: true}) + mime = MediaRecorder.isTypeSupported("video/webm; codecs=vp9") + ? "video/webm; codecs=vp9" + : "video/webm" + mediaRecorder = new MediaRecorder(stream, {mimeType: mime}) + DrissionPage_Screencast_chunks = [] + mediaRecorder.addEventListener('dataavailable', function(e) { + DrissionPage_Screencast_blob_ok = false; + DrissionPage_Screencast_chunks.push(e.data); + DrissionPage_Screencast_blob_ok = true; + }) + mediaRecorder.start() + + mediaRecorder.addEventListener('stop', function(){ + while(DrissionPage_Screencast_blob_ok==false){} + DrissionPage_Screencast_blob = new Blob(DrissionPage_Screencast_chunks, + {type: DrissionPage_Screencast_chunks[0].type}); + }) + } + ''' + print('请手动选择要录制的目标。') + self._page.run_js('var DrissionPage_Screencast_blob;var DrissionPage_Screencast_blob_ok=false;') + self._page.run_js(js) + + def stop(self, video_name=None): + """停止录屏 + :param video_name: 视频文件名,为None时以当前时间名命 + :return: 文件路径 + """ + if video_name and not video_name.endswith('mp4'): + video_name = f'{video_name}.mp4' + name = f'{time()}.mp4' if not video_name else video_name + path = f'{self._path}{sep}{name}' + + if self._mode.startswith('js'): + self._page.run_js('mediaRecorder.stop();', as_expr=True) + while not self._page.run_js('return DrissionPage_Screencast_blob_ok;'): + sleep(.1) + blob = self._page.run_js('return DrissionPage_Screencast_blob;') + uuid = self._page.run_cdp('IO.resolveBlob', objectId=blob['result']['objectId'])['uuid'] + data = self._page.run_cdp('IO.read', handle=f'blob:{uuid}')['data'] + with open(path, 'wb') as f: + f.write(b64decode(data)) + return path + + if self._mode.startswith('frugal'): + self._page.driver.set_callback('Page.screencastFrame', None) + self._page.run_cdp('Page.stopScreencast') + else: + self._enable = False + while self._running: + sleep(.1) + + if self._mode.endswith('imgs'): + return str(Path(self._path).absolute()) + + if not str(self._path).isascii(): + raise TypeError('转换成视频仅支持英文路径和文件名。') + + try: + from cv2 import VideoWriter, imread, VideoWriter_fourcc + from numpy import fromfile, uint8 + except ModuleNotFoundError: + raise ModuleNotFoundError('请先安装cv2,pip install opencv-python') + + pic_list = Path(self._tmp_path or self._path).glob('*.jpg') + img = imread(str(next(pic_list))) + imgInfo = img.shape + size = (imgInfo[1], imgInfo[0]) + + videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size) + + for i in pic_list: + img = imread(str(i)) + videoWrite.write(img) + + rmtree(self._tmp_path) + self._tmp_path = None + return f'{self._path}{sep}{name}' + + def set_save_path(self, save_path=None): + """设置保存路径 + :param save_path: 保存路径 + :return: None + """ + if save_path: + save_path = Path(save_path) + if save_path.exists() and save_path.is_file(): + raise TypeError('save_path必须指定文件夹。') + save_path.mkdir(parents=True, exist_ok=True) + self._path = save_path + + def _run(self): + """非节俭模式运行方法""" + self._running = True + path = self._tmp_path or self._path + while self._enable: + self._page.get_screenshot(path=path, name=f'{time()}.jpg') + sleep(.04) + self._running = False + + def _onScreencastFrame(self, **kwargs): + """节俭模式运行方法""" + path = self._tmp_path or self._path + with open(f'{path}{sep}{kwargs["metadata"]["timestamp"]}.jpg', 'wb') as f: + f.write(b64decode(kwargs['data'])) + self._page.run_cdp('Page.screencastFrameAck', sessionId=kwargs['sessionId']) + + +class ScreencastMode(object): + def __init__(self, screencast): + self._screencast = screencast + + def video_mode(self): + """持续视频模式,生成的视频没有声音""" + self._screencast._mode = 'video' + + def frugal_video_mode(self): + """设置节俭视频模式,页面有变化时才录制,生成的视频没有声音""" + self._screencast._mode = 'frugal_video' + + def js_video_mode(self): + """设置使用js录制视频模式,可生成有声音的视频,但需要手动启动""" + self._screencast._mode = 'js_video' + + def frugal_imgs_mode(self): + """设置节俭视频模式,页面有变化时才截图""" + self._screencast._mode = 'frugal_imgs' + + def imgs_mode(self): + """设置图片模式,持续对页面进行截图""" + self._screencast._mode = 'imgs' diff --git a/DrissionPage/_units/screencast.pyi b/DrissionPage/_units/screencast.pyi new file mode 100644 index 0000000..92d7d0f --- /dev/null +++ b/DrissionPage/_units/screencast.pyi @@ -0,0 +1,49 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union + +from .._pages.chromium_base import ChromiumBase + + +class Screencast(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + self._path: Path = ... + self._tmp_path: Path = ... + self._running: bool = ... + self._enable: bool = ... + self._mode: str = ... + + @property + def set_mode(self) -> ScreencastMode: ... + + def start(self, save_path: Union[str, Path] = None) -> None: ... + + def stop(self, video_name: str = None) -> str: ... + + def set_save_path(self, save_path: Union[str, Path] = None) -> None: ... + + def _run(self) -> None: ... + + def _onScreencastFrame(self, **kwargs) -> None: ... + + +class ScreencastMode(object): + def __init__(self, screencast: Screencast): + self._screencast: Screencast = ... + + def video_mode(self) -> None: ... + + def frugal_video_mode(self) -> None: ... + + def js_video_mode(self) -> None: ... + + def frugal_imgs_mode(self) -> None: ... + + def imgs_mode(self) -> None: ... diff --git a/DrissionPage/_units/scroller.py b/DrissionPage/_units/scroller.py new file mode 100644 index 0000000..545b3f7 --- /dev/null +++ b/DrissionPage/_units/scroller.py @@ -0,0 +1,177 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from time import sleep, perf_counter + + +class Scroller(object): + """用于滚动的对象""" + + def __init__(self, ele): + """ + :param ele: 元素对象 + """ + self._driver = ele + self.t1 = self.t2 = 'this' + self._wait_complete = False + + def _run_js(self, js): + js = js.format(self.t1, self.t2, self.t2) + self._driver.run_js(js) + self._wait_scrolled() + + def to_top(self): + """滚动到顶端,水平位置不变""" + self._run_js('{}.scrollTo({}.scrollLeft, 0);') + + def to_bottom(self): + """滚动到底端,水平位置不变""" + self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight);') + + def to_half(self): + """滚动到垂直中间位置,水平位置不变""" + self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight/2);') + + def to_rightmost(self): + """滚动到最右边,垂直位置不变""" + self._run_js('{}.scrollTo({}.scrollWidth, {}.scrollTop);') + + def to_leftmost(self): + """滚动到最左边,垂直位置不变""" + self._run_js('{}.scrollTo(0, {}.scrollTop);') + + def to_location(self, x, y): + """滚动到指定位置 + :param x: 水平距离 + :param y: 垂直距离 + :return: None + """ + self._run_js(f'{{}}.scrollTo({x}, {y});') + + def up(self, pixel=300): + """向上滚动若干像素,水平位置不变 + :param pixel: 滚动的像素 + :return: None + """ + pixel = -pixel + self._run_js(f'{{}}.scrollBy(0, {pixel});') + + def down(self, pixel=300): + """向下滚动若干像素,水平位置不变 + :param pixel: 滚动的像素 + :return: None + """ + self._run_js(f'{{}}.scrollBy(0, {pixel});') + + def left(self, pixel=300): + """向左滚动若干像素,垂直位置不变 + :param pixel: 滚动的像素 + :return: None + """ + pixel = -pixel + self._run_js(f'{{}}.scrollBy({pixel}, 0);') + + def right(self, pixel=300): + """向右滚动若干像素,垂直位置不变 + :param pixel: 滚动的像素 + :return: None + """ + self._run_js(f'{{}}.scrollBy({pixel}, 0);') + + def _wait_scrolled(self): + """等待滚动结束""" + if not self._wait_complete: + return + + page = self._driver.page if 'ChromiumElement' in str(type(self._driver)) else self._driver + r = page.run_cdp('Page.getLayoutMetrics') + x = r['layoutViewport']['pageX'] + y = r['layoutViewport']['pageY'] + + end_time = perf_counter() + page.timeout + while perf_counter() < end_time: + sleep(.1) + r = page.run_cdp('Page.getLayoutMetrics') + x1 = r['layoutViewport']['pageX'] + y1 = r['layoutViewport']['pageY'] + + if x == x1 and y == y1: + break + + x = x1 + y = y1 + + +class ElementScroller(Scroller): + def to_see(self, center=None): + """滚动页面直到元素可见 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :return: None + """ + self._driver.page.scroll.to_see(self._driver, center=center) + + def to_center(self): + """元素尽量滚动到视口中间""" + self._driver.page.scroll.to_see(self._driver, center=True) + + +class PageScroller(Scroller): + def __init__(self, page): + """ + :param page: 页面对象 + """ + super().__init__(page) + self.t1 = 'window' + self.t2 = 'document.documentElement' + + def to_see(self, loc_or_ele, center=None): + """滚动页面直到元素可见 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :return: None + """ + ele = self._driver._ele(loc_or_ele) + self._to_see(ele, center) + + def _to_see(self, ele, center): + """执行滚动页面直到元素可见 + :param ele: 元素对象 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :return: None + """ + txt = 'true' if center else 'false' + ele.run_js(f'this.scrollIntoViewIfNeeded({txt});') + if center or (center is not False and ele.states.is_covered): + ele.run_js('''function getWindowScrollTop() {var scroll_top = 0; + if (document.documentElement && document.documentElement.scrollTop) { + scroll_top = document.documentElement.scrollTop; + } else if (document.body) {scroll_top = document.body.scrollTop;} + return scroll_top;} + const { top, height } = this.getBoundingClientRect(); + const elCenter = top + height / 2; + const center = window.innerHeight / 2; + window.scrollTo({top: getWindowScrollTop() - (center - elCenter), + behavior: 'instant'});''') + self._wait_scrolled() + + +class FrameScroller(PageScroller): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + super().__init__(frame.doc_ele) + self.t1 = self.t2 = 'this.documentElement' + + def to_see(self, loc_or_ele, center=None): + """滚动页面直到元素可见 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :return: None + """ + ele = loc_or_ele if 'ChromiumElement' in str(type(loc_or_ele)) else self._driver._ele(loc_or_ele) + self._to_see(ele, center) diff --git a/DrissionPage/_units/scroller.pyi b/DrissionPage/_units/scroller.pyi new file mode 100644 index 0000000..8233948 --- /dev/null +++ b/DrissionPage/_units/scroller.pyi @@ -0,0 +1,77 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union + +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase + + +class Scroller(object): + def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement]): + self.t1: str = ... + self.t2: str = ... + self._driver: Union[ChromiumBase, ChromiumElement] = ... + self._wait_complete: bool = ... + + def _run_js(self, js: str): ... + + def to_top(self) -> None: ... + + def to_bottom(self) -> None: ... + + def to_half(self) -> None: ... + + def to_rightmost(self) -> None: ... + + def to_leftmost(self) -> None: ... + + def to_location(self, x: int, y: int) -> None: ... + + def up(self, pixel: int = 300) -> None: ... + + def down(self, pixel: int = 300) -> None: ... + + def left(self, pixel: int = 300) -> None: ... + + def right(self, pixel: int = 300) -> None: ... + + def _wait_scrolled(self) -> None: ... + + +class ElementScroller(Scroller): + + def to_see(self, center: Union[bool, None] = None) -> None: ... + + def to_center(self) -> None: ... + + +class PageScroller(Scroller): + def __init__(self, page: ChromiumBase): ... + + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ... + + def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... + + +class FrameScroller(PageScroller): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + self._driver = frame.doc_ele + self.t1 = self.t2 = 'this.documentElement' + self._wait_complete = False + + def to_see(self, loc_or_ele, center=None): + """滚动页面直到元素可见 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :return: None + """ + ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) + self._to_see(ele, center) diff --git a/DrissionPage/_units/selector.py b/DrissionPage/_units/selector.py new file mode 100644 index 0000000..1b94ed7 --- /dev/null +++ b/DrissionPage/_units/selector.py @@ -0,0 +1,267 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from time import perf_counter + + +class SelectElement(object): + """用于处理 select 标签""" + + def __init__(self, ele): + """ + :param ele: select 元素对象 + """ + if ele.tag != 'select': + raise TypeError("select方法只能在<select>元素使用。") + + self._ele = ele + + def __call__(self, text_or_index, timeout=None): + """选定下拉列表中子元素 + :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: None + """ + para_type = 'index' if isinstance(text_or_index, int) else 'text' + timeout = timeout if timeout is not None else self._ele.page.timeout + return self._select(text_or_index, para_type, timeout=timeout) + + @property + def is_multi(self): + """返回是否多选表单""" + return self._ele.attr('multiple') is not None + + @property + def options(self): + """返回所有选项元素组成的列表""" + return [i for i in self._ele.eles('xpath://option') if not isinstance(i, int)] + + @property + def selected_option(self): + """返回第一个被选中的option元素 + :return: ChromiumElement对象或None + """ + ele = self._ele.run_js('return this.options[this.selectedIndex];') + return ele + + @property + def selected_options(self): + """返回所有被选中的option元素列表 + :return: ChromiumElement对象组成的列表 + """ + return [x for x in self.options if x.states.is_selected] + + def all(self): + """全选""" + if not self.is_multi: + raise TypeError("只能在多选菜单执行此操作。") + return self._by_loc('tag:option', 1, False) + + def invert(self): + """反选""" + if not self.is_multi: + raise TypeError("只能对多项选框执行反选。") + change = False + for i in self.options: + change = True + mode = 'false' if i.states.is_selected else 'true' + i.run_js(f'this.selected={mode};') + if change: + self._dispatch_change() + + def clear(self): + """清除所有已选项""" + if not self.is_multi: + raise TypeError("只能在多选菜单执行此操作。") + return self._by_loc('tag:option', 1, True) + + def by_text(self, text, timeout=None): + """此方法用于根据text值选择项。当元素是多选列表时,可以接收list或tuple + :param text: text属性值,传入list或tuple可选择多项 + :param timeout: 超时时间,为None默认使用页面超时时间 + :return: 是否选择成功 + """ + return self._select(text, 'text', False, timeout) + + def by_value(self, value, timeout=None): + """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple + :param value: value属性值,传入list或tuple可选择多项 + :param timeout: 超时时间,为None默认使用页面超时时间 + :return: 是否选择成功 + """ + return self._select(value, 'value', False, timeout) + + def by_index(self, index, timeout=None): + """此方法用于根据index值选择项。当元素是多选列表时,可以接收list或tuple + :param index: 序号,0开始,传入list或tuple可选择多项 + :param timeout: 超时时间,为None默认使用页面超时时间 + :return: 是否选择成功 + """ + return self._select(index, 'index', False, timeout) + + def by_loc(self, loc, timeout=None): + """用定位符选择指定的项 + :param loc: 定位符 + :param timeout: 超时时间 + :return: 是否选择成功 + """ + return self._by_loc(loc, timeout) + + def by_option(self, option): + """选中单个或多个option元素 + :param option: option元素或它们组成的列表 + :return: None + """ + self._select_options(option, 'true') + + def cancel_by_text(self, text, timeout=None): + """此方法用于根据text值取消选择项。当元素是多选列表时,可以接收list或tuple + :param text: 文本,传入list或tuple可取消多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: 是否取消成功 + """ + return self._select(text, 'text', True, timeout) + + def cancel_by_value(self, value, timeout=None): + """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple + :param value: value属性值,传入list或tuple可取消多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: 是否取消成功 + """ + return self._select(value, 'value', True, timeout) + + def cancel_by_index(self, index, timeout=None): + """此方法用于根据index值取消选择项。当元素是多选列表时,可以接收list或tuple + :param index: 序号,0开始,传入list或tuple可取消多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: 是否取消成功 + """ + return self._select(index, 'index', True, timeout) + + def cancel_by_loc(self, loc, timeout=None): + """用定位符取消选择指定的项 + :param loc: 定位符 + :param timeout: 超时时间 + :return: 是否选择成功 + """ + return self._by_loc(loc, timeout, True) + + def cancel_by_option(self, option): + """取消选中单个或多个option元素 + :param option: option元素或它们组成的列表 + :return: None + """ + self._select_options(option, 'false') + + def _by_loc(self, loc, timeout=None, cancel=False): + """用定位符取消选择指定的项 + :param loc: 定位符 + :param timeout: 超时时间 + :param cancel: 是否取消选择 + :return: 是否选择成功 + """ + eles = self._ele.eles(loc, timeout) + if not eles: + return False + + mode = 'false' if cancel else 'true' + if self.is_multi: + self._select_options(eles, mode) + else: + self._select_options(eles[0], mode) + return True + + def _select(self, condition, para_type='text', cancel=False, timeout=None): + """选定或取消选定下拉列表中子元素 + :param condition: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param cancel: 是否取消选择 + :return: 是否选择成功 + """ + if not self.is_multi and isinstance(condition, (list, tuple)): + raise TypeError('单选列表只能传入str格式。') + + mode = 'false' if cancel else 'true' + timeout = timeout if timeout is not None else self._ele.page.timeout + condition = set(condition) if isinstance(condition, (list, tuple)) else {condition} + + if para_type in ('text', 'value'): + return self._text_value([str(i) for i in condition], para_type, mode, timeout) + elif para_type == 'index': + return self._index(condition, mode, timeout) + + def _text_value(self, condition, para_type, mode, timeout): + """执行text和value搜索 + :param condition: 条件set + :param para_type: 参数类型,可选 'text'、'value' + :param mode: 'true' 或 'false' + :param timeout: 超时时间 + :return: 是否选择成功 + """ + ok = False + text_len = len(condition) + eles = [] + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if para_type == 'text': + eles = [i for i in self.options if i.text in condition] + elif para_type == 'value': + eles = [i for i in self.options if i.attr('value') in condition] + + if len(eles) >= text_len: + ok = True + break + + if ok: + self._select_options(eles, mode) + return True + + return False + + def _index(self, condition, mode, timeout): + """执行index搜索 + :param condition: 条件set + :param mode: 'true' 或 'false' + :param timeout: 超时时间 + :return: 是否选择成功 + """ + ok = False + condition = [int(i) for i in condition] + text_len = max(condition) + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if len(self.options) >= text_len: + ok = True + break + + if ok: + eles = self.options + eles = [eles[i - 1] for i in condition] + self._select_options(eles, mode) + return True + + return False + + def _select_options(self, option, mode): + """选中或取消某个选项 + :param option: options元素对象 + :param mode: 选中还是取消 + :return: None + """ + if isinstance(option, (list, tuple, set)): + if not self.is_multi and len(option) > 1: + option = option[:1] + for o in option: + o.run_js(f'this.selected={mode};') + self._dispatch_change() + else: + option.run_js(f'this.selected={mode};') + self._dispatch_change() + + def _dispatch_change(self): + """触发修改动作""" + self._ele.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') diff --git a/DrissionPage/_units/selector.pyi b/DrissionPage/_units/selector.pyi new file mode 100644 index 0000000..8e74554 --- /dev/null +++ b/DrissionPage/_units/selector.pyi @@ -0,0 +1,73 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, Tuple, List + +from .._elements.chromium_element import ChromiumElement + + +class SelectElement(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... + + @property + def is_multi(self) -> bool: ... + + @property + def options(self) -> List[ChromiumElement]: ... + + @property + def selected_option(self) -> Union[ChromiumElement, None]: ... + + @property + def selected_options(self) -> List[ChromiumElement]: ... + + def clear(self) -> None: ... + + def all(self) -> None: ... + + def by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ... + + def by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ... + + def by_option(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ... + + def cancel_by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def cancel_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def cancel_by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ... + + def cancel_by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ... + + def cancel_by_option(self, + option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ... + + def invert(self) -> None: ... + + def _by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None, cancel: bool = False) -> bool: ... + + def _select(self, + condition: Union[str, int, list, tuple] = None, + para_type: str = 'text', + cancel: bool = False, + timeout: float = None) -> bool: ... + + def _text_value(self, condition: Union[list, set], para_type: str, mode: str, timeout: float) -> bool: ... + + def _index(self, condition: set, mode: str, timeout: float) -> bool: ... + + def _select_options(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]], + mode: str) -> None: ... + + def _dispatch_change(self) -> None: ... diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py new file mode 100644 index 0000000..7928a93 --- /dev/null +++ b/DrissionPage/_units/setter.py @@ -0,0 +1,642 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path + +from requests.structures import CaseInsensitiveDict + +from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter +from .._functions.tools import show_or_hide_browser + + +class BasePageSetter(object): + def __init__(self, page): + self._page = page + + def NoneElement_value(self, value=None, on_off=True): + """设置空元素是否返回设定值 + :param value: 返回的设定值 + :param on_off: 是否启用 + :return: None + """ + self._page._none_ele_return_value = on_off + self._page._none_ele_value = value + + +class ChromiumBaseSetter(BasePageSetter): + def __init__(self, page): + super().__init__(page) + self._cookies_setter = None + + @property + def load_mode(self): + """返回用于设置页面加载策略的对象""" + return LoadMode(self._page) + + @property + def scroll(self): + """返回用于设置页面滚动设置的对象""" + return PageScrollSetter(self._page.scroll) + + @property + def cookies(self): + """返回用于设置cookies的对象""" + if self._cookies_setter is None: + self._cookies_setter = CookiesSetter(self._page) + return self._cookies_setter + + def retry_times(self, times): + """设置连接失败重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败重连间隔""" + self._page.retry_interval = interval + + def timeouts(self, base=None, page_load=None, script=None, implicit=None): + """设置超时时间,单位为秒 + :param base: 基本等待时间,除页面加载和脚本超时,其它等待默认使用 + :param page_load: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: None + """ + base = base if base is not None else implicit + if base is not None: + self._page.timeouts.base = base + self._page._timeout = base + + if page_load is not None: + self._page.timeouts.page_load = page_load + + if script is not None: + self._page.timeouts.script = script + + def user_agent(self, ua, platform=None): + """为当前tab设置user agent,只在当前tab有效 + :param ua: user agent字符串 + :param platform: platform字符串 + :return: None + """ + keys = {'userAgent': ua} + if platform: + keys['platform'] = platform + self._page.run_cdp('Emulation.setUserAgentOverride', **keys) + + def session_storage(self, item, value): + """设置或删除某项sessionStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + self._page.run_cdp_loaded('DOMStorage.enable') + i = self._page.run_cdp('Storage.getStorageKeyForFrame', frameId=self._page._frame_id)['storageKey'] + if value is False: + self._page.run_cdp('DOMStorage.removeDOMStorageItem', + storageId={'storageKey': i, 'isLocalStorage': False}, key=item) + else: + self._page.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': False}, + key=item, value=value) + self._page.run_cdp_loaded('DOMStorage.disable') + + def local_storage(self, item, value): + """设置或删除某项localStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + self._page.run_cdp_loaded('DOMStorage.enable') + i = self._page.run_cdp('Storage.getStorageKeyForFrame', frameId=self._page._frame_id)['storageKey'] + if value is False: + self._page.run_cdp('DOMStorage.removeDOMStorageItem', + storageId={'storageKey': i, 'isLocalStorage': True}, key=item) + else: + self._page.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': True}, + key=item, value=value) + self._page.run_cdp_loaded('DOMStorage.disable') + + def upload_files(self, files): + """等待上传的文件路径 + :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 + :return: None + """ + if not self._page._upload_list: + self._page.driver.set_callback('Page.fileChooserOpened', self._page._onFileChooserOpened) + self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) + + if isinstance(files, str): + files = files.split('\n') + self._page._upload_list = [str(Path(i).absolute()) for i in files] + + def headers(self, headers: dict) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + self._page.run_cdp('Network.enable') + self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) + + def auto_handle_alert(self, on_off=True, accept=True): + """设置是否启用自动处理弹窗 + :param on_off: bool表示开或关 + :param accept: bool表示确定还是取消 + :return: None + """ + self._page._alert.auto = accept if on_off else None + + def blocked_urls(self, urls): + """设置要忽略的url + :param urls: 要忽略的url,可用*通配符,可输入多个,传入None时清空已设置的内容 + :return: None + """ + if not urls: + urls = [] + elif isinstance(urls, str): + urls = (urls,) + if not isinstance(urls, (list, tuple)): + raise TypeError('urls需传入str、list或tuple类型。') + self._page.run_cdp('Network.enable') + self._page.run_cdp('Network.setBlockedURLs', urls=urls) + + # --------------即将废弃--------------- + + @property + def load_strategy(self): + """返回用于设置页面加载策略的对象""" + return LoadMode(self._page) + + +class TabSetter(ChromiumBaseSetter): + def __init__(self, page): + super().__init__(page) + + @property + def window(self): + """返回用于设置浏览器窗口的对象""" + return WindowSetter(self._page) + + def download_path(self, path): + """设置下载路径 + :param path: 下载路径 + :return: None + """ + path = str(Path(path).absolute()) + self._page._download_path = path + self._page.browser._dl_mgr.set_path(self._page.tab_id, path) + if self._page._DownloadKit: + self._page._DownloadKit.set.goal_path(path) + + def download_file_name(self, name=None, suffix=None): + """设置下一个被下载文件的名称 + :param name: 文件名,可不含后缀,会自动使用远程文件后缀 + :param suffix: 后缀名,显式设置后缀名,不使用远程文件后缀 + :return: None + """ + self._page.browser._dl_mgr.set_rename(self._page.tab_id, name, suffix) + + def when_download_file_exists(self, mode): + """设置当存在同名文件时的处理方式 + :param mode: 可在 'rename', 'overwrite', 'skip', 'r', 'o', 's'中选择 + :return: None + """ + types = {'rename': 'rename', 'overwrite': 'overwrite', 'skip': 'skip', 'r': 'rename', 'o': 'overwrite', + 's': 'skip'} + mode = types.get(mode, mode) + if mode not in types: + raise ValueError(f'''mode参数只能是 '{"', '".join(types.keys())}' 之一,现在是:{mode}''') + + self._page.browser._dl_mgr.set_file_exists(self._page.tab_id, mode) + + def activate(self): + """使标签页处于最前面""" + self._page.browser.activate_tab(self._page.tab_id) + + +class ChromiumPageSetter(TabSetter): + + def tab_to_front(self, tab_or_id=None): + """激活标签页使其处于最前面 + :param tab_or_id: 标签页对象或id,为None表示当前标签页 + :return: None + """ + if not tab_or_id: + tab_or_id = self._page.tab_id + elif not isinstance(tab_or_id, str): # 传入Tab对象 + tab_or_id = tab_or_id.tab_id + self._page.browser.activate_tab(tab_or_id) + + @property + def window(self): + """返回用于设置浏览器窗口的对象""" + return PageWindowSetter(self._page) + + +class SessionPageSetter(BasePageSetter): + def __init__(self, page): + """ + :param page: SessionPage对象 + """ + super().__init__(page) + self._cookies_setter = None + + @property + def cookies(self): + """返回用于设置cookies的对象""" + if self._cookies_setter is None: + self._cookies_setter = SessionCookiesSetter(self._page) + return self._cookies_setter + + def retry_times(self, times): + """设置连接失败时重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败时重连间隔""" + self._page.retry_interval = interval + + def download_path(self, path): + """设置下载路径 + :param path: 下载路径 + :return: None + """ + path = str(Path(path).absolute()) + self._page._download_path = path + if self._page._DownloadKit: + self._page._DownloadKit.set.goal_path(path) + + def timeout(self, second): + """设置连接超时时间 + :param second: 秒数 + :return: None + """ + self._page.timeout = second + + def encoding(self, encoding, set_all=True): + """设置编码 + :param encoding: 编码名称,如果要取消之前的设置,传入None + :param set_all: 是否设置对象参数,为False则只设置当前Response + :return: None + """ + if set_all: + self._page._encoding = encoding if encoding else None + if self._page.response: + self._page.response.encoding = encoding + + def headers(self, headers): + """设置通用的headers + :param headers: dict形式的headers + :return: None + """ + self._page._headers = CaseInsensitiveDict(headers) + + def header(self, attr, value): + """设置headers中一个项 + :param attr: 设置名称 + :param value: 设置值 + :return: None + """ + self._page._headers[attr] = value + + def user_agent(self, ua): + """设置user agent + :param ua: user agent + :return: None + """ + self._page._headers['user-agent'] = ua + + def proxies(self, http=None, https=None): + """设置proxies参数 + :param http: http代理地址 + :param https: https代理地址 + :return: None + """ + self._page.session.proxies = {'http': http, 'https': https} + + def auth(self, auth): + """设置认证元组或对象 + :param auth: 认证元组或对象 + :return: None + """ + self._page.session.auth = auth + + def hooks(self, hooks): + """设置回调方法 + :param hooks: 回调方法 + :return: None + """ + self._page.session.hooks = hooks + + def params(self, params): + """设置查询参数字典 + :param params: 查询参数字典 + :return: None + """ + self._page.session.params = params + + def verify(self, on_off): + """设置是否验证SSL证书 + :param on_off: 是否验证 SSL 证书 + :return: None + """ + self._page.session.verify = on_off + + def cert(self, cert): + """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 + :param cert: 证书路径或元组 + :return: None + """ + self._page.session.cert = cert + + def stream(self, on_off): + """设置是否使用流式响应内容 + :param on_off: 是否使用流式响应内容 + :return: None + """ + self._page.session.stream = on_off + + def trust_env(self, on_off): + """设置是否信任环境 + :param on_off: 是否信任环境 + :return: None + """ + self._page.session.trust_env = on_off + + def max_redirects(self, times): + """设置最大重定向次数 + :param times: 最大重定向次数 + :return: None + """ + self._page.session.max_redirects = times + + def add_adapter(self, url, adapter): + """添加适配器 + :param url: 适配器对应url + :param adapter: 适配器对象 + :return: None + """ + self._page.session.mount(url, adapter) + + +class WebPageSetter(ChromiumPageSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumPageSetter(self._page) + + @property + def cookies(self): + """返回用于设置cookies的对象""" + if self._cookies_setter is None: + self._cookies_setter = WebPageCookiesSetter(self._page) + return self._cookies_setter + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page.mode == 's': + self._session_setter.headers(headers) + else: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page.mode == 's': + self._session_setter.user_agent(ua) + else: + self._chromium_setter.user_agent(ua, platform) + + +class WebPageTabSetter(TabSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumBaseSetter(self._page) + + @property + def cookies(self): + """返回用于设置cookies的对象""" + if self._cookies_setter is None: + self._cookies_setter = WebPageCookiesSetter(self._page) + return self._cookies_setter + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page._has_session: + self._session_setter.headers(headers) + if self._page._has_driver: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page._has_session: + self._session_setter.user_agent(ua) + if self._page._has_driver: + self._chromium_setter.user_agent(ua, platform) + + +class ChromiumElementSetter(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + def attr(self, attr, value): + """设置元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele._node_id, name=attr, value=str(value)) + + def prop(self, prop, value): + """设置元素property属性 + :param prop: 属性名 + :param value: 属性值 + :return: None + """ + value = value.replace('"', r'\"') + self._ele.run_js(f'this.{prop}="{value}";') + + def innerHTML(self, html): + """设置元素innerHTML + :param html: html文本 + :return: None + """ + self.prop('innerHTML', html) + + +class ChromiumFrameSetter(ChromiumBaseSetter): + def attr(self, attr, value): + """设置frame元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._page.frame_ele.set.attr(attr, value) + + +class LoadMode(object): + """用于设置页面加载策略的类""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + + def __call__(self, value): + """设置加载策略 + :param value: 可选 'normal', 'eager', 'none' + :return: None + """ + if value.lower() not in ('normal', 'eager', 'none'): + raise ValueError("只能选择 'normal', 'eager', 'none'。") + self._page._load_mode = value + + def normal(self): + """设置页面加载策略为normal""" + self._page._load_mode = 'normal' + + def eager(self): + """设置页面加载策略为eager""" + self._page._load_mode = 'eager' + + def none(self): + """设置页面加载策略为none""" + self._page._load_mode = 'none' + + +class PageScrollSetter(object): + def __init__(self, scroll): + self._scroll = scroll + + def wait_complete(self, on_off=True): + """设置滚动命令后是否等待完成 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + self._scroll._wait_complete = on_off + + def smooth(self, on_off=True): + """设置页面滚动是否平滑滚动 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + b = 'smooth' if on_off else 'auto' + self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') + self._scroll._wait_complete = on_off + + +class WindowSetter(object): + """用于设置窗口大小的类""" + + def __init__(self, page): + """ + :param page: 页面对象 + """ + self._page = page + self._window_id = self._get_info()['windowId'] + + def max(self): + """窗口最大化""" + s = self._get_info()['bounds']['windowState'] + if s in ('fullscreen', 'minimized'): + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'maximized'}) + + def mini(self): + """窗口最小化""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'minimized'}) + + def full(self): + """设置窗口为全屏""" + s = self._get_info()['bounds']['windowState'] + if s == 'minimized': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'fullscreen'}) + + def normal(self): + """设置窗口为常规模式""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'normal'}) + + def size(self, width=None, height=None): + """设置窗口大小 + :param width: 窗口宽度 + :param height: 窗口高度 + :return: None + """ + if width or height: + s = self._get_info()['bounds']['windowState'] + if s != 'normal': + self._perform({'windowState': 'normal'}) + info = self._get_info()['bounds'] + width = width - 16 if width else info['width'] + height = height + 7 if height else info['height'] + self._perform({'width': width, 'height': height}) + + def location(self, x=None, y=None): + """设置窗口在屏幕中的位置,相对左上角坐标 + :param x: 距离顶部距离 + :param y: 距离左边距离 + :return: None + """ + if x is not None or y is not None: + self.normal() + info = self._get_info()['bounds'] + x = x if x is not None else info['left'] + y = y if y is not None else info['top'] + self._perform({'left': x - 8, 'top': y}) + + def _get_info(self): + """获取窗口位置及大小信息""" + return self._page.run_cdp('Browser.getWindowForTarget') + + def _perform(self, bounds): + """执行改变窗口大小操作 + :param bounds: 控制数据 + :return: None + """ + self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) + + # ------------即将废除---------- + + def maximized(self): + """窗口最大化""" + self.max() + + def minimized(self): + """窗口最小化""" + self.mini() + + def fullscreen(self): + """设置窗口为全屏""" + self.full() + + +class PageWindowSetter(WindowSetter): + def hide(self): + """隐藏浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=True) + + def show(self): + """显示浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=False) diff --git a/DrissionPage/_units/setter.pyi b/DrissionPage/_units/setter.pyi new file mode 100644 index 0000000..a5ece0b --- /dev/null +++ b/DrissionPage/_units/setter.pyi @@ -0,0 +1,233 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from pathlib import Path +from typing import Union, Tuple, Literal, Any, Optional + +from requests.adapters import HTTPAdapter +from requests.auth import HTTPBasicAuth + +from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter +from .scroller import PageScroller +from .._base.base import BasePage +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame +from .._pages.chromium_page import ChromiumPage +from .._pages.chromium_tab import ChromiumTab +from .._pages.session_page import SessionPage +from .._pages.web_page import WebPage + +FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o'] + + +class BasePageSetter(object): + def __init__(self, page: BasePage): + self._page: BasePage = ... + + def NoneElement_value(self, value: Any = None, on_off: bool = True) -> None: ... + + +class ChromiumBaseSetter(BasePageSetter): + def __init__(self, page): + self._page: ChromiumBase = ... + self._cookies_setter: CookiesSetter = ... + + @property + def load_mode(self) -> LoadMode: ... + + @property + def scroll(self) -> PageScrollSetter: ... + + @property + def cookies(self) -> CookiesSetter: ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def timeouts(self, base: float = None, page_load: float = None, script: float = None) -> None: ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def session_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def local_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def auto_handle_alert(self, on_off: bool = True, accept: bool = True) -> None: ... + + def upload_files(self, files: Union[str, list, tuple]) -> None: ... + + def blocked_urls(self, urls: Optional[list, tuple, str]) -> None: ... + + +class TabSetter(ChromiumBaseSetter): + def __init__(self, page): ... + + @property + def window(self) -> WindowSetter: ... + + def download_path(self, path: Union[str, Path]) -> None: ... + + def download_file_name(self, name: str = None, suffix: str = None) -> None: ... + + def when_download_file_exists(self, mode: FILE_EXISTS) -> None: ... + + def activate(self) -> None: ... + + +class ChromiumPageSetter(TabSetter): + _page: ChromiumPage = ... + + @property + def window(self) -> PageWindowSetter: ... + + def main_tab(self, tab_id: str = None) -> None: ... + + def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... + + +class SessionPageSetter(BasePageSetter): + def __init__(self, page: SessionPage): + self._page: SessionPage = ... + self._cookies_setter: SessionCookiesSetter = ... + + @property + def cookies(self) -> SessionCookiesSetter: ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def download_path(self, path: Union[str, Path]) -> None: ... + + def timeout(self, second: float) -> None: ... + + def encoding(self, encoding: Optional[str, None], set_all: bool = True) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def header(self, attr: str, value: str) -> None: ... + + def user_agent(self, ua: str) -> None: ... + + def proxies(self, http: str = None, https: str = None) -> None: ... + + def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... + + def hooks(self, hooks: Union[dict, None]) -> None: ... + + def params(self, params: Union[dict, None]) -> None: ... + + def verify(self, on_off: Union[bool, None]) -> None: ... + + def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... + + def stream(self, on_off: Union[bool, None]) -> None: ... + + def trust_env(self, on_off: Union[bool, None]) -> None: ... + + def max_redirects(self, times: Union[int, None]) -> None: ... + + def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... + + +class WebPageSetter(ChromiumPageSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumPageSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + @property + def cookies(self) -> WebPageCookiesSetter: ... + + +class WebPageTabSetter(TabSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumBaseSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + @property + def cookies(self) -> WebPageCookiesSetter: ... + + +class ChromiumElementSetter(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + def attr(self, attr: str, value: str) -> None: ... + + def prop(self, prop: str, value: str) -> None: ... + + def innerHTML(self, html: str) -> None: ... + + +class ChromiumFrameSetter(ChromiumBaseSetter): + _page: ChromiumFrame = ... + + def attr(self, attr: str, value: str) -> None: ... + + +class LoadMode(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + + def __call__(self, value: str) -> None: ... + + def normal(self) -> None: ... + + def eager(self) -> None: ... + + def none(self) -> None: ... + + +class PageScrollSetter(object): + def __init__(self, scroll: PageScroller): + self._scroll: PageScroller = ... + + def wait_complete(self, on_off: bool = True): ... + + def smooth(self, on_off: bool = True): ... + + +class WindowSetter(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + self._window_id: str = ... + + def max(self) -> None: ... + + def mini(self) -> None: ... + + def full(self) -> None: ... + + def normal(self) -> None: ... + + def size(self, width: int = None, height: int = None) -> None: ... + + def location(self, x: int = None, y: int = None) -> None: ... + + def _get_info(self) -> dict: ... + + def _perform(self, bounds: dict) -> None: ... + + +class PageWindowSetter(WindowSetter): + _page: ChromiumPage = ... + + def hide(self) -> None: ... + + def show(self) -> None: ... diff --git a/DrissionPage/_units/states.py b/DrissionPage/_units/states.py new file mode 100644 index 0000000..bdf99cc --- /dev/null +++ b/DrissionPage/_units/states.py @@ -0,0 +1,175 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from .._functions.web import location_in_viewport +from ..errors import CDPError, NoRectError, PageDisconnectedError, ElementLostError + + +class ElementStates(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + @property + def is_selected(self): + """返回元素是否被选择""" + return self._ele.run_js('return this.selected;') + + @property + def is_checked(self): + """返回元素是否被选择""" + return self._ele.run_js('return this.checked;') + + @property + def is_displayed(self): + """返回元素是否显示""" + return not (self._ele.style('visibility') == 'hidden' or self._ele.run_js('return this.offsetParent === null;') + or self._ele.style('display') == 'none' or self._ele.prop('hidden')) + + @property + def is_enabled(self): + """返回元素是否可用""" + return not self._ele.run_js('return this.disabled;') + + @property + def is_alive(self): + """返回元素是否仍在DOM中""" + try: + self._ele.attrs + return True + except Exception: + return False + + @property + def is_in_viewport(self): + """返回元素是否出现在视口中,以元素click_point为判断""" + x, y = self._ele.rect.click_point + return location_in_viewport(self._ele.page, x, y) if x else False + + @property + def is_whole_in_viewport(self): + """返回元素是否整个都在视口内""" + x1, y1 = self._ele.rect.location + w, h = self._ele.rect.size + x2, y2 = x1 + w, y1 + h + return location_in_viewport(self._ele.page, x1, y1) and location_in_viewport(self._ele.page, x2, y2) + + @property + def is_covered(self): + """返回元素是否被覆盖,与是否在视口中无关,如被覆盖返回覆盖元素的backend id,否则返回False""" + lx, ly = self._ele.rect.click_point + try: + bid = self._ele.page.run_cdp('DOM.getNodeForLocation', x=int(lx), y=int(ly)).get('backendNodeId') + return bid if bid != self._ele._backend_id else False + except CDPError: + return False + + @property + def has_rect(self): + """返回元素是否拥有位置和大小,没有返回False,有返回四个角在页面中坐标组成的列表""" + try: + return self._ele.rect.corners + except NoRectError: + return False + + +class ShadowRootStates(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + @property + def is_enabled(self): + """返回元素是否可用""" + return not self._ele.run_js('return this.disabled;') + + @property + def is_alive(self): + """返回元素是否仍在DOM中""" + try: + self._ele.page.run_cdp('DOM.describeNode', backendNodeId=self._ele._backend_id) + return True + except Exception: + return False + + +class PageStates(object): + """Page对象、Tab对象使用""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + + @property + def is_loading(self): + """返回页面是否在加载状态""" + return self._page._is_loading + + @property + def is_alive(self): + """返回页面对象是否仍然可用""" + try: + self._page.run_cdp('Page.getLayoutMetrics') + return True + except PageDisconnectedError: + return False + + @property + def ready_state(self): + """返回当前页面加载状态,'connecting' 'loading' 'interactive' 'complete'""" + return self._page._ready_state + + @property + def has_alert(self): + """返回当前页面是否存在弹窗""" + return self._page._has_alert + + +class FrameStates(object): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + self._frame = frame + + @property + def is_loading(self): + """返回页面是否在加载状态""" + return self._frame._is_loading + + @property + def is_alive(self): + """返回frame元素是否可用,且里面仍挂载有frame""" + try: + node = self._frame._target_page.run_cdp('DOM.describeNode', + backendNodeId=self._frame._frame_ele._backend_id)['node'] + except (ElementLostError, PageDisconnectedError): + return False + return 'frameId' in node + + @property + def ready_state(self): + """返回加载状态""" + return self._frame._ready_state + + @property + def is_displayed(self): + """返回iframe是否显示""" + return not (self._frame.frame_ele.style('visibility') == 'hidden' + or self._frame.frame_ele.run_js('return this.offsetParent === null;') + or self._frame.frame_ele.style('display') == 'none') + + @property + def has_alert(self): + """返回当前页面是否存在弹窗""" + return self._frame._has_alert diff --git a/DrissionPage/_units/states.pyi b/DrissionPage/_units/states.pyi new file mode 100644 index 0000000..067c0bd --- /dev/null +++ b/DrissionPage/_units/states.pyi @@ -0,0 +1,95 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union, Tuple, List, Optional, Literal + +from .._elements.chromium_element import ShadowRoot, ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame + + +class ElementStates(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + @property + def is_selected(self) -> bool: ... + + @property + def is_checked(self) -> bool: ... + + @property + def is_displayed(self) -> bool: ... + + @property + def is_enabled(self) -> bool: ... + + @property + def is_alive(self) -> bool: ... + + @property + def is_in_viewport(self) -> bool: ... + + @property + def is_whole_in_viewport(self) -> bool: ... + + @property + def is_covered(self) -> Union[Literal[False], int]: ... + + @property + def has_rect(self) -> Union[bool, List[Tuple[float, float]]]: ... + + +class ShadowRootStates(object): + def __init__(self, ele: ShadowRoot): + """ + :param ele: ChromiumElement + """ + self._ele: ShadowRoot = ... + + @property + def is_enabled(self) -> bool: ... + + @property + def is_alive(self) -> bool: ... + + +class PageStates(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + + @property + def is_loading(self) -> bool: ... + + @property + def is_alive(self) -> bool: ... + + @property + def ready_state(self) -> Optional[str]: ... + + @property + def has_alert(self) -> bool: ... + + +class FrameStates(object): + def __init__(self, frame: ChromiumFrame): + self._frame: ChromiumFrame = ... + + @property + def is_loading(self) -> bool: ... + + @property + def is_alive(self) -> bool: ... + + @property + def ready_state(self) -> str: ... + + @property + def is_displayed(self) -> bool: ... + + @property + def has_alert(self) -> bool: ... diff --git a/DrissionPage/_units/waiter.py b/DrissionPage/_units/waiter.py new file mode 100644 index 0000000..1569b0e --- /dev/null +++ b/DrissionPage/_units/waiter.py @@ -0,0 +1,469 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from time import sleep, perf_counter + +from .._functions.settings import Settings +from ..errors import WaitTimeoutError, NoRectError + + +class BaseWaiter(object): + def __init__(self, page_or_ele): + """ + :param page_or_ele: 页面对象或元素对象 + """ + self._driver = page_or_ele + + def __call__(self, second): + """等待若干秒 + :param second: 秒数 + :return: None + """ + sleep(second) + + def ele_deleted(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素从DOM中删除 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.deleted(timeout, raise_err=raise_err) if ele else True + + def ele_displayed(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素变成显示状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._driver.timeout + end_time = perf_counter() + timeout + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout) + timeout = end_time - perf_counter() + if timeout <= 0: + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待元素显示失败(等待{timeout}秒)。') + else: + return False + return ele.wait.displayed(timeout, raise_err=raise_err) + + def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素变成隐藏状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._driver.timeout + end_time = perf_counter() + timeout + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout) + timeout = end_time - perf_counter() + if timeout <= 0: + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待元素显示失败(等待{timeout}秒)。') + else: + return False + return ele.wait.hidden(timeout, raise_err=raise_err) + + def ele_loaded(self, loc, timeout=None, raise_err=None): + """等待元素加载到DOM + :param loc: 要等待的元素,输入定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 成功返回元素对象,失败返回False + """ + ele = self._driver._ele(loc, raise_err=False, timeout=timeout) + if ele: + return ele + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待元素加载失败(等待{timeout}秒)。') + else: + return False + + def load_start(self, timeout=None, raise_err=None): + """等待页面开始加载 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) + + def load_complete(self, timeout=None, raise_err=None): + """等待页面加载完成 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, start=False, raise_err=raise_err) + + def upload_paths_inputted(self): + """等待自动填写上传文件路径""" + end_time = perf_counter() + self._driver.timeout + while perf_counter() < end_time: + if not self._driver._upload_list: + return True + sleep(.01) + return False + + def download_begin(self, timeout=None, cancel_it=False): + """等待浏览器下载开始,可将其拦截 + :param timeout: 超时时间,None使用页面对象超时时间 + :param cancel_it: 是否取消该任务 + :return: 成功返回任务对象,失败返回False + """ + self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, False if cancel_it else True) + if timeout is None: + timeout = self._driver.timeout + + r = False + end_time = perf_counter() + timeout + while perf_counter() < end_time: + v = self._driver.browser._dl_mgr.get_flag(self._driver.tab_id) + if not isinstance(v, bool): + r = v + break + + self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, None) + return r + + def url_change(self, text, exclude=False, timeout=None, raise_err=None): + """等待url变成包含或不包含指定文本 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当url不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._change('url', text, exclude, timeout, raise_err) + + def title_change(self, text, exclude=False, timeout=None, raise_err=None): + """等待title变成包含或不包含指定文本 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当title不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._change('title', text, exclude, timeout, raise_err) + + def _change(self, arg, text, exclude=False, timeout=None, raise_err=None): + """等待指定属性变成包含或不包含指定文本 + :param arg: 要被匹配的属性 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当属性不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._driver.timeout + + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if arg == 'url': + val = self._driver.url + elif arg == 'title': + val = self._driver.title + else: + raise ValueError + if (not exclude and text in val) or (exclude and text not in val): + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待{arg}改变失败(等待{timeout}秒)。') + else: + return False + + def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): + """等待页面开始加载或加载完成 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param start: 等待开始还是结束 + :param gap: 间隔秒数 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout != 0: + if timeout is None or timeout is True: + timeout = self._driver.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._driver._is_loading == start: + return True + sleep(gap) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待页面加载失败(等待{timeout}秒)。') + else: + return False + + # -----------即将废弃----------- + + def data_packets(self, count=1, timeout=None, fix_count: bool = True): + """等待符合要求的数据包到达指定数量 + :param count: 需要捕捉的数据包数量 + :param timeout: 超时时间,为None无限等待 + :param fix_count: 是否必须满足总数要求,发生超时,为True返回False,为False返回已捕捉到的数据包 + :return: count为1时返回数据包对象,大于1时返回列表,超时且fix_count为True时返回False""" + return self._driver.listen.wait(count, timeout, fix_count) + + +class TabWaiter(BaseWaiter): + + def downloads_done(self, timeout=None, cancel_if_timeout=True): + """等待所有浏览器下载任务结束 + :param timeout: 超时时间,为None时无限等待 + :param cancel_if_timeout: 超时时是否取消剩余任务 + :return: 是否等待成功 + """ + if not timeout: + while self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + sleep(.5) + return True + + else: + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + return True + sleep(.5) + + if self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + if cancel_if_timeout: + for m in self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + m.cancel() + return False + else: + return True + + def alert_closed(self): + """等待弹出框关闭""" + while not self._driver.states.has_alert: + sleep(.2) + while self._driver.states.has_alert: + sleep(.2) + + +class PageWaiter(TabWaiter): + def __init__(self, page): + super().__init__(page) + # self._listener = None + + def new_tab(self, timeout=None, raise_err=None): + """等待新标签页出现 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 等到新标签页返回其id,否则返回False + """ + timeout = timeout if timeout is not None else self._driver.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + latest_tab = self._driver.latest_tab + if self._driver.tab_id != latest_tab: + return latest_tab + sleep(.01) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待新标签页失败(等待{timeout}秒)。') + else: + return False + + def all_downloads_done(self, timeout=None, cancel_if_timeout=True): + """等待所有浏览器下载任务结束 + :param timeout: 超时时间,为None时无限等待 + :param cancel_if_timeout: 超时时是否取消剩余任务 + :return: 是否等待成功 + """ + if not timeout: + while self._driver.browser._dl_mgr._missions: + sleep(.5) + return True + + else: + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if not self._driver.browser._dl_mgr._missions: + return True + sleep(.5) + + if self._driver.browser._dl_mgr._missions: + if cancel_if_timeout: + for m in list(self._driver.browser._dl_mgr._missions.values()): + m.cancel() + return False + else: + return True + + +class ElementWaiter(object): + """等待元素在dom中某种状态,如删除、显示、隐藏""" + + def __init__(self, page, ele): + """等待元素在dom中某种状态,如删除、显示、隐藏 + :param page: 元素所在页面 + :param ele: 要等待的元素 + """ + self._page = page + self._ele = ele + + def __call__(self, second): + """等待若干秒 + :param second: 秒数 + :return: None + """ + sleep(second) + + def deleted(self, timeout=None, raise_err=None): + """等待元素从dom删除 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_alive', False, timeout, raise_err, err_text='等待元素被删除失败。') + + def displayed(self, timeout=None, raise_err=None): + """等待元素从dom显示 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', True, timeout, raise_err, err_text='等待元素显示失败。') + + def hidden(self, timeout=None, raise_err=None): + """等待元素从dom隐藏 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', False, timeout, raise_err, err_text='等待元素隐藏失败。') + + def covered(self, timeout=None, raise_err=None): + """等待当前元素被遮盖 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', True, timeout, raise_err, err_text='等待元素被覆盖失败。') + + def not_covered(self, timeout=None, raise_err=None): + """等待当前元素不被遮盖 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', False, timeout, raise_err, err_text='等待元素不被覆盖失败。') + + def enabled(self, timeout=None, raise_err=None): + """等待当前元素变成可用 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', True, timeout, raise_err, err_text='等待元素变成可用失败。') + + def disabled(self, timeout=None, raise_err=None): + """等待当前元素变成不可用 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', False, timeout, raise_err, err_text='等待元素变成不可用失败。') + + def disabled_or_deleted(self, timeout=None, raise_err=None): + """等待当前元素变成不可用或从DOM移除 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if not self._ele.states.is_enabled or not self._ele.states.is_alive: + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待元素隐藏或被删除失败(等待{timeout}秒)。') + else: + return False + + def stop_moving(self, gap=.1, timeout=None, raise_err=None): + """等待当前元素停止运动 + :param gap: 检测间隔时间 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + try: + size = self._ele.states.has_rect + location = self._ele.rect.location + break + except NoRectError: + pass + else: + raise NoRectError + + while perf_counter() < end_time: + sleep(gap) + if self._ele.rect.size == size and self._ele.rect.location == location: + return True + size = self._ele.rect.size + location = self._ele.rect.location + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待元素停止运动失败(等待{timeout}秒)。') + else: + return False + + def has_rect(self, timeout=None, raise_err=None): + """等待当前元素有大小及位置属性 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('has_rect', True, timeout, raise_err, err_text='等待元素拥有大小及位置属性失败(等待{}秒)。') + + def _wait_state(self, attr, mode=False, timeout=None, raise_err=None, err_text=None): + """等待元素某个元素状态到达指定状态 + :param attr: 状态名称 + :param mode: True或False + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :param err_text: 抛出错误时显示的信息 + :return: 是否等待成功 + """ + err_text = err_text or '等待元素状态改变失败(等待{}秒)。' + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._ele.states.__getattribute__(attr) == mode: + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(err_text.format(timeout)) + else: + return False + + +class FrameWaiter(BaseWaiter, ElementWaiter): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + super().__init__(frame) + super(BaseWaiter, self).__init__(frame, frame.frame_ele) diff --git a/DrissionPage/_units/waiter.pyi b/DrissionPage/_units/waiter.pyi new file mode 100644 index 0000000..a0c8603 --- /dev/null +++ b/DrissionPage/_units/waiter.pyi @@ -0,0 +1,112 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" +from typing import Union + +from .downloader import DownloadMission +from .._elements.chromium_element import ChromiumElement +from .._pages.chromium_base import ChromiumBase +from .._pages.chromium_frame import ChromiumFrame +from .._pages.chromium_page import ChromiumPage + + +class BaseWaiter(object): + def __init__(self, page: ChromiumBase): + self._driver: ChromiumBase = ... + + def __call__(self, second: float) -> None: ... + + def ele_deleted(self, + loc_or_ele: Union[str, tuple, ChromiumElement], + timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_displayed(self, + loc_or_ele: Union[str, tuple, ChromiumElement], + timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_loaded(self, + loc: Union[str, tuple], + timeout: float = None, + raise_err: bool = None) -> Union[bool, ChromiumElement]: ... + + def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ... + + def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def upload_paths_inputted(self) -> bool: ... + + def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ... + + def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... + + def url_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... + + def title_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... + + def _change(self, arg: str, text: str, exclude: bool = False, timeout: float = None, + raise_err: bool = None) -> bool: ... + + +class TabWaiter(BaseWaiter): + + def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... + + def alert_closed(self) -> None: ... + + +class PageWaiter(TabWaiter): + _driver: ChromiumPage = ... + + def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ... + + def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... + + +class ElementWaiter(object): + def __init__(self, page: ChromiumBase, ele: ChromiumElement): + self._ele: ChromiumElement = ... + self._page: ChromiumBase = ... + + def __call__(self, second: float) -> None: ... + + def deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def displayed(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def has_rect(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def stop_moving(self, gap: float = .1, timeout: float = None, raise_err: bool = None) -> bool: ... + + def _wait_state(self, + attr: str, + mode: bool = False, + timeout: float = None, + raise_err: bool = None, + err_text: str = None) -> bool: ... + + +class FrameWaiter(BaseWaiter, ElementWaiter): + def __init__(self, frame: ChromiumFrame): ... diff --git a/DrissionPage/action_chains.pyi b/DrissionPage/action_chains.pyi deleted file mode 100644 index 3b54a49..0000000 --- a/DrissionPage/action_chains.pyi +++ /dev/null @@ -1,75 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, Tuple - -from .chromium_base import ChromiumBase -from .chromium_driver import ChromiumDriver -from .chromium_element import ChromiumElement -from .chromium_page import ChromiumPage - - -class ActionChains: - - def __init__(self, page: ChromiumBase): - self.page: ChromiumPage = ... - self._dr: ChromiumDriver = ... - self.modifier: int = ... - self.curr_x: int = ... - self.curr_y: int = ... - - def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[int, int], str], - offset_x: int = 0, offset_y: int = 0) -> ActionChains: ... - - def move(self, offset_x: int = 0, offset_y: int = 0) -> ActionChains: ... - - def click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def r_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def m_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def db_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def _hold(self, on_ele: Union[ChromiumElement, str] = None, button: str = 'left', - count: int = 1) -> ActionChains: ... - - def _release(self, button: str) -> ActionChains: ... - - def scroll(self, delta_x: int = 0, delta_y: int = 0, - on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ... - - def up(self, pixel: int) -> ActionChains: ... - - def down(self, pixel: int) -> ActionChains: ... - - def left(self, pixel: int) -> ActionChains: ... - - def right(self, pixel: int) -> ActionChains: ... - - def key_down(self, key: str) -> ActionChains: ... - - def key_up(self, key: str) -> ActionChains: ... - - def type(self, text: Union[str, list, tuple]) -> ActionChains: ... - - def wait(self, second: float) -> ActionChains: ... - - def _get_key_data(self, key: str, action: str) -> dict: ... - - -def location_to_client(page, lx: int, ly: int) -> tuple: ... diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py deleted file mode 100644 index 368af2a..0000000 --- a/DrissionPage/chromium_base.py +++ /dev/null @@ -1,1459 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from base64 import b64decode -from json import loads, JSONDecodeError -from os import sep -from pathlib import Path -from re import search -from threading import Thread -from time import perf_counter, sleep, time - -from requests import Session - -from .base import BasePage -from .chromium_driver import ChromiumDriver -from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chromium_ele -from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement -from .commons.locator import get_loc -from .commons.tools import get_usable_path, clean_folder -from .commons.web import set_browser_cookies, ResponseData, location_in_viewport -from .errors import ContextLossError, ElementLossError, AlertExistsError, CallMethodError, TabClosedError, \ - NoRectError, BrowserConnectError -from .session_element import make_session_ele - - -class ChromiumBase(BasePage): - """标签页、frame、页面基类""" - - def __init__(self, address, tab_id=None, timeout=None): - """ - :param address: 浏览器 ip:port - :param tab_id: 要控制的标签页id,不指定默认为激活的 - :param timeout: 超时时间 - """ - self._is_loading = None - self._root_id = None # object id - self._debug = False - self._debug_recorder = None - self._tab_obj = None - self._set = None - self._screencast = None - - if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): - address = f'127.0.0.1:{address}' - - self._set_start_options(address, None) - self._set_runtime_settings() - self._connect_browser(tab_id) - timeout = timeout if timeout is not None else self.timeouts.implicit - super().__init__(timeout) - - def _set_start_options(self, address, none): - """设置浏览器启动属性 - :param address: 'ip:port' - :param none: 用于后代继承 - :return: None - """ - self.address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') - - def _set_runtime_settings(self): - self._timeouts = Timeout(self) - self._page_load_strategy = 'normal' - - def _connect_browser(self, tab_id=None): - """连接浏览器,在第一次时运行 - :param tab_id: 要控制的标签页id,不指定默认为激活的 - :return: None - """ - self._chromium_init() - if not tab_id: - json = self._control_session.get(f'http://{self.address}/json').json() - tab_id = [i['id'] for i in json if i['type'] == 'page'] - if not tab_id: - raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') - tab_id = tab_id[0] - self._driver_init(tab_id) - self._get_document() - self._first_run = False - - def _chromium_init(self): - """浏览器初始设置""" - self._control_session = Session() - self._control_session.keep_alive = False - self._first_run = True - self._is_reading = False - self._upload_list = None - self._wait = None - self._scroll = None - - def _driver_init(self, tab_id): - """新建页面、页面刷新、切换标签页后要进行的cdp参数初始化 - :param tab_id: 要跳转到的标签页id - :return: None - """ - self._is_loading = True - self._tab_obj = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) - - self._tab_obj.start() - self._tab_obj.DOM.enable() - self._tab_obj.Page.enable() - self._tab_obj.call_method('Emulation.setFocusEmulationEnabled', enabled=True) - - self._tab_obj.Page.frameStoppedLoading = self._onFrameStoppedLoading - self._tab_obj.Page.frameStartedLoading = self._onFrameStartedLoading - self._tab_obj.DOM.documentUpdated = self._onDocumentUpdated - self._tab_obj.Page.loadEventFired = self._onLoadEventFired - self._tab_obj.Page.frameNavigated = self._onFrameNavigated - - def _get_document(self): - """刷新cdp使用的document数据""" - if not self._is_reading: - self._is_reading = True - - if self._debug: - print('获取document') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) - - try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉 - self._wait_loaded() - except TabClosedError: - return - - end_time = perf_counter() + 10 - while perf_counter() < end_time: - try: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) - break - - except Exception: - if self._debug: - print('重试获取document') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错')) - - sleep(.1) - - else: - raise RuntimeError('获取document失败。') - - if self._debug: - print('获取document结束') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) - - self._is_loading = False - self._is_reading = False - - def _wait_loaded(self, timeout=None): - """等待页面加载完成 - :param timeout: 超时时间 - :return: 是否成功,超时返回False - """ - timeout = timeout if timeout is not None else self.timeouts.page_load - - end_time = perf_counter() + timeout - while perf_counter() < end_time: - state = self.ready_state - if state is None: # 存在alert的情况 - return None - - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), 'waiting', state)) - - if state == 'complete': - return True - elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'): - self.stop_loading() - return True - elif self.page_load_strategy == 'none': - self.stop_loading() - return True - sleep(.1) - - self.stop_loading() - return False - - def _onFrameStartedLoading(self, **kwargs): - """页面开始加载时触发""" - if kwargs['frameId'] == self.tab_id: - self._is_loading = True - - if self._debug: - print('页面开始加载 FrameStartedLoading') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading')) - - def _onFrameStoppedLoading(self, **kwargs): - """页面加载完成后触发""" - if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading: - if self._debug: - print('页面停止加载 FrameStoppedLoading') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading')) - - self._get_document() - - def _onLoadEventFired(self, **kwargs): - """在页面刷新、变化后重新读取页面内容""" - if self._debug: - print('loadEventFired') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired')) - - self._get_document() - - def _onDocumentUpdated(self, **kwargs): - """页面跳转时触发""" - if self._debug: - print('documentUpdated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated')) - - def _onFrameNavigated(self, **kwargs): - """页面跳转时触发""" - if kwargs['frame'].get('parentId', None) == self.tab_id and self._first_run is False and self._is_loading: - self._is_loading = True - if self._debug: - print('navigated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) - - def _onFileChooserOpened(self, **kwargs): - """文件选择框打开时触发""" - if self._upload_list: - files = self._upload_list if kwargs['mode'] == 'selectMultiple' else self._upload_list[:1] - self.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=kwargs['backendNodeId']) - - self.driver.Page.fileChooserOpened = None - self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) - self._upload_list = None - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele = page('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: ChromiumElement对象 - """ - return self.ele(loc_or_str, timeout) - - @property - def driver(self): - """返回用于控制浏览器的ChromiumDriver对象""" - if self._tab_obj is None: - raise RuntimeError('浏览器已关闭或链接已断开。') - return self._tab_obj - - @property - def is_loading(self): - """返回页面是否正在加载状态""" - return self._is_loading - - @property - def is_alive(self): - """返回页面对象是否仍然可用""" - try: - self.run_cdp('Page.getLayoutMetrics') - return True - except TabClosedError: - return False - - @property - def title(self): - """返回当前页面title""" - return self.run_cdp_loaded('Target.getTargetInfo', targetId=self.tab_id)['targetInfo']['title'] - - @property - def url(self): - """返回当前页面url""" - return self.run_cdp_loaded('Target.getTargetInfo', targetId=self.tab_id)['targetInfo']['url'] - - @property - def _browser_url(self): - """用于被WebPage覆盖""" - return self.url - - @property - def html(self): - """返回当前页面html文本""" - self.wait.load_complete() - return self.run_cdp('DOM.getOuterHTML', objectId=self._root_id)['outerHTML'] - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" - try: - return loads(self('t:pre', timeout=.5).text) - except JSONDecodeError: - return None - - @property - def tab_id(self): - """返回当前标签页id""" - return self.driver.id if self.driver.status == 'started' else '' - - @property - def ready_state(self): - """返回当前页面加载状态,'loading' 'interactive' 'complete',有弹出框时返回None""" - while True: - try: - return self.run_cdp('Runtime.evaluate', expression='document.readyState;')['result']['value'] - except (AlertExistsError, TypeError): - return None - except ContextLossError: - continue - - @property - def size(self): - """返回页面总宽高,格式:(宽, 高)""" - r = self.run_cdp_loaded('Page.getLayoutMetrics')['contentSize'] - return r['width'], r['height'] - - @property - def active_ele(self): - """返回当前焦点所在元素""" - return self.run_js_loaded('return document.activeElement;') - - @property - def page_load_strategy(self): - """返回页面加载策略,有3种:'none'、'normal'、'eager'""" - return self._page_load_strategy - - @property - def user_agent(self): - """返回user agent""" - return self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - - @property - def scroll(self): - """返回用于滚动滚动条的对象""" - self.wait.load_complete() - if self._scroll is None: - self._scroll = ChromiumPageScroll(self) - return self._scroll - - @property - def timeouts(self): - """返回timeouts设置""" - return self._timeouts - - @property - def upload_list(self): - """返回等待上传文件列表""" - return self._upload_list - - @property - def wait(self): - """返回用于等待的对象""" - if self._wait is None: - self._wait = ChromiumBaseWaiter(self) - return self._wait - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = ChromiumBaseSetter(self) - return self._set - - @property - def screencast(self): - """返回用于录屏的对象""" - if self._screencast is None: - self._screencast = Screencast(self) - return self._screencast - - def run_cdp(self, cmd, **cmd_args): - """执行Chrome DevTools Protocol语句 - :param cmd: 协议项目 - :param cmd_args: 参数 - :return: 执行的结果 - """ - if self.driver.has_alert and cmd != HANDLE_ALERT_METHOD: - raise AlertExistsError - - r = self.driver.call_method(cmd, **cmd_args) - if ERROR not in r: - return r - - error = r[ERROR] - if error in ('Cannot find context with specified id', 'Inspected target navigated or closed'): - raise ContextLossError - elif error in ('Could not find node with given id', 'Could not find object with given id', - 'No node with given id found', 'Node with given id does not belong to the document', - 'No node found for given backend id'): - raise ElementLossError - elif error == 'tab closed': - raise TabClosedError - elif error == 'alert exists': - pass - elif error in ('Node does not have a layout object', 'Could not compute box model.'): - raise NoRectError - elif r['type'] == 'call_method_error': - raise CallMethodError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}') - else: - raise RuntimeError(r) - - def run_cdp_loaded(self, cmd, **cmd_args): - """执行Chrome DevTools Protocol语句,执行前等待页面加载完毕 - :param cmd: 协议项目 - :param cmd_args: 参数 - :return: 执行的结果 - """ - self.wait.load_complete() - return self.run_cdp(cmd, **cmd_args) - - def run_js(self, script, *args, as_expr=False): - """运行javascript代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: 运行的结果 - """ - return run_js(self, script, as_expr, self.timeouts.script, args) - - def run_js_loaded(self, script, *args, as_expr=False): - """运行javascript代码,执行前等待页面加载完毕 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: 运行的结果 - """ - self.wait.load_complete() - return run_js(self, script, as_expr, self.timeouts.script, args) - - def run_async_js(self, script, *args, as_expr=False): - """以异步方式执行js代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: None - """ - from threading import Thread - Thread(target=run_js, args=(self, script, as_expr, self.timeouts.script, args)).start() - - def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None): - """访问url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间 - :return: 目标url是否可用 - """ - retry, interval = self._before_connect(url, retry, interval) - self._url_available = self._d_connect(self._url, - times=retry, - interval=interval, - show_errmsg=show_errmsg, - timeout=timeout) - return self._url_available - - def get_cookies(self, as_dict=False, all_domains=False, all_info=False): - """获取cookies信息 - :param as_dict: 为True时返回由{name: value}键值对组成的dict,为True时返回list且all_info无效 - :param all_domains: 是否返回所有域的cookies - :param all_info: 是否返回所有信息,为False时只返回name、value、domain - :return: cookies信息 - """ - txt = 'Storage' if all_domains else 'Network' - cookies = self.run_cdp_loaded(f'{txt}.getCookies')['cookies'] - - if as_dict: - return {cookie['name']: cookie['value'] for cookie in cookies} - elif all_info: - return cookies - else: - return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} - for cookie in cookies] - - def ele(self, loc_or_ele, timeout=None): - """获取第一个符合条件的元素对象 - :param loc_or_ele: 定位符或元素对象 - :param timeout: 查找超时时间 - :return: ChromiumElement对象 - """ - return self._ele(loc_or_ele, timeout=timeout) - - def eles(self, loc_or_str, timeout=None): - """获取所有符合条件的元素对象 - :param loc_or_str: 定位符或元素对象 - :param timeout: 查找超时时间 - :return: ChromiumElement对象组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_ele) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象组成的列表 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): - """执行元素查找 - :param loc_or_ele: 定位符或元素对象 - :param timeout: 查找超时时间 - :param single: 是否只返回第一个 - :param relative: WebPage用的表示是否相对定位的参数 - :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 - :return: ChromiumElement对象或元素对象组成的列表 - """ - if isinstance(loc_or_ele, (str, tuple)): - loc = get_loc(loc_or_ele)[1] - elif isinstance(loc_or_ele, ChromiumElement) or str(type(loc_or_ele)).endswith(".ChromiumFrame'>"): - return loc_or_ele - else: - raise ValueError('loc_or_str参数只能是tuple、str、ChromiumElement类型。') - - ok = False - nodeIds = None - - timeout = timeout if timeout is not None else self.timeout - end_time = perf_counter() + timeout - - try: - search_result = self.run_cdp_loaded('DOM.performSearch', query=loc, includeUserAgentShadowDOM=True) - count = search_result['resultCount'] - except ContextLossError: - search_result = None - count = 0 - - while True: - if count > 0: - count = 1 if single else count - try: - nodeIds = self.run_cdp_loaded('DOM.getSearchResults', searchId=search_result['searchId'], - fromIndex=0, toIndex=count) - if nodeIds['nodeIds'][0] != 0: - ok = True - - except Exception: - pass - - if ok: - try: - if single: - r = make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) - break - else: - r = [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] - break - - except ElementLossError: - ok = False - - try: - search_result = self.run_cdp_loaded('DOM.performSearch', query=loc, includeUserAgentShadowDOM=True) - count = search_result['resultCount'] - except ContextLossError: - pass - - if perf_counter() >= end_time: - return NoneElement() if single else [] - - sleep(.1) - - try: - self.run_cdp('DOM.discardSearchResults', searchId=search_result['searchId']) - except: - pass - return r - - def refresh(self, ignore_cache=False): - """刷新当前页面 - :param ignore_cache: 是否忽略缓存 - :return: None - """ - self._is_loading = True - self.run_cdp('Page.reload', ignoreCache=ignore_cache) - self.wait.load_start() - - def forward(self, steps=1): - """在浏览历史中前进若干步 - :param steps: 前进步数 - :return: None - """ - self._forward_or_back(steps) - - def back(self, steps=1): - """在浏览历史中后退若干步 - :param steps: 后退步数 - :return: None - """ - self._forward_or_back(-steps) - - def _forward_or_back(self, steps): - """执行浏览器前进或后退,会跳过url相同的历史记录 - :param steps: 步数 - :return: None - """ - if steps == 0: - return - - history = self.run_cdp('Page.getNavigationHistory') - index = history['currentIndex'] - history = history['entries'] - direction = 1 if steps > 0 else -1 - curr_url = history[index]['url'] - nid = None - for num in range(abs(steps)): - for i in history[index::direction]: - index += direction - if i['url'] != curr_url: - nid = i['id'] - curr_url = i['url'] - break - - if nid: - self._is_loading = True - self.run_cdp('Page.navigateToHistoryEntry', entryId=nid) - - def stop_loading(self): - """页面停止加载""" - if self._debug: - print('停止页面加载') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载')) - - self.run_cdp('Page.stopLoading') - while self.ready_state not in ('complete', None): - sleep(.1) - - def remove_ele(self, loc_or_ele): - """从页面上删除一个元素 - :param loc_or_ele: 元素对象或定位符 - :return: None - """ - if not loc_or_ele: - return - ele = self._ele(loc_or_ele, raise_err=False) - if ele: - self.run_cdp('DOM.removeNode', nodeId=ele.ids.node_id) - - def get_frame(self, loc_ind_ele, timeout=None): - """获取页面中一个frame对象,可传入定位符、iframe序号、ChromiumFrame对象,序号从1开始 - :param loc_ind_ele: 定位符、iframe序号、ChromiumFrame对象 - :param timeout: 查找元素超时时间 - :return: ChromiumFrame对象 - """ - if isinstance(loc_ind_ele, str): - if not loc_ind_ele.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', - 'text:', 'text=', 'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', - 'css=', 'c:', 'c=')): - loc_ind_ele = f'xpath://*[(name()="iframe" or name()="frame") and ' \ - f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]' - ele = self._ele(loc_ind_ele, timeout=timeout) - if ele and not str(type(ele)).endswith(".ChromiumFrame'>"): - raise TypeError('该定位符不是指向frame元素。') - return ele - - elif isinstance(loc_ind_ele, tuple): - ele = self._ele(loc_ind_ele, timeout=timeout) - if ele and not str(type(ele)).endswith(".ChromiumFrame'>"): - raise TypeError('该定位符不是指向frame元素。') - return ele - - elif isinstance(loc_ind_ele, int): - if loc_ind_ele < 1: - raise ValueError('序号必须大于0。') - xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele}]' - return self._ele(xpath, timeout=timeout) - - elif str(type(loc_ind_ele)).endswith(".ChromiumFrame'>"): - return loc_ind_ele - - else: - raise TypeError('必须传入定位符、iframe序号、id、name、ChromiumFrame对象其中之一。') - - def get_frames(self, loc=None, timeout=None): - """获取所有符号条件的frame对象 - :param loc: 定位符,为None时返回所有 - :param timeout: 查找超时时间 - :return: ChromiumFrame对象组成的列表 - """ - loc = loc or 'xpath://*[name()="iframe" or name()="frame"]' - frames = self._ele(loc, timeout=timeout, single=False, raise_err=False) - return [i for i in frames if str(type(i)).endswith(".ChromiumFrame'>")] - - def get_session_storage(self, item=None): - """获取sessionStorage信息,不设置item则获取全部 - :param item: 要获取的项,不设置则返回全部 - :return: sessionStorage一个或所有项内容 - """ - if item: - js = f'sessionStorage.getItem("{item}");' - return self.run_js_loaded(js, as_expr=True) - else: - js = ''' - var dp_ls_len = sessionStorage.length; - var dp_ls_arr = new Array(); - for(var i = 0; i < dp_ls_len; i++) { - var getKey = sessionStorage.key(i); - var getVal = sessionStorage.getItem(getKey); - dp_ls_arr[i] = {'key': getKey, 'val': getVal} - } - return dp_ls_arr; - ''' - return {i['key']: i['val'] for i in self.run_js_loaded(js)} - - def get_local_storage(self, item=None): - """获取localStorage信息,不设置item则获取全部 - :param item: 要获取的项目,不设置则返回全部 - :return: localStorage一个或所有项内容 - """ - if item: - js = f'localStorage.getItem("{item}");' - return self.run_js_loaded(js, as_expr=True) - else: - js = ''' - var dp_ls_len = localStorage.length; - var dp_ls_arr = new Array(); - for(var i = 0; i < dp_ls_len; i++) { - var getKey = localStorage.key(i); - var getVal = localStorage.getItem(getKey); - dp_ls_arr[i] = {'key': getKey, 'val': getVal} - } - return dp_ls_arr; - ''' - return {i['key']: i['val'] for i in self.run_js_loaded(js)} - - def get_screenshot(self, path=None, as_bytes=None, as_base64=None, - full_page=False, left_top=None, right_bottom=None): - """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 - :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' - :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 - :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 - :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 - :param left_top: 截取范围左上角坐标 - :param right_bottom: 截取范围右下角角坐标 - :return: 图片完整路径或字节文本 - """ - return self._get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, - full_page=full_page, left_top=left_top, right_bottom=right_bottom) - - def clear_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): - """清除缓存,可选要清除的项 - :param session_storage: 是否清除sessionStorage - :param local_storage: 是否清除localStorage - :param cache: 是否清除cache - :param cookies: 是否清除cookies - :return: None - """ - if session_storage: - self.run_js('sessionStorage.clear();', as_expr=True) - if local_storage: - self.run_js('localStorage.clear();', as_expr=True) - if cache: - self.run_cdp_loaded('Network.clearBrowserCache') - if cookies: - self.run_cdp_loaded('Network.clearBrowserCookies') - - def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None): - """尝试连接,重试若干次 - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param show_errmsg: 是否抛出异常 - :param timeout: 连接超时时间 - :return: 是否成功,返回None表示不确定 - """ - err = None - timeout = timeout if timeout is not None else self.timeouts.page_load - - for t in range(times + 1): - err = None - result = self.run_cdp('Page.navigate', url=to_url) - - is_timeout = self._wait_loaded(timeout) - if is_timeout is None: - return None - is_timeout = not is_timeout - self.wait.load_complete() - - if is_timeout: - err = TimeoutError('页面连接超时。') - if 'errorText' in result: - err = ConnectionError(result['errorText']) - - if not err: - break - - if t < times: - sleep(interval) - while self.ready_state not in ('complete', None): - sleep(.1) - if self._debug or show_errmsg: - print(f'重试 {to_url}') - - if err: - if show_errmsg: - raise err if err is not None else ConnectionError('连接异常。') - return False - - return True - - def _get_screenshot(self, path=None, as_bytes=None, as_base64=None, - full_page=False, left_top=None, right_bottom=None, ele=None): - """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 - :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' - :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 - :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 - :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 - :param left_top: 截取范围左上角坐标 - :param right_bottom: 截取范围右下角角坐标 - :param ele: 为异域iframe内元素截图设置 - :return: 图片完整路径或字节文本 - """ - if as_bytes: - if as_bytes is True: - pic_type = 'png' - else: - if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): - raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") - pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes - - elif as_base64: - if as_base64 is True: - pic_type = 'png' - else: - if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): - raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") - pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 - - else: - if not path: - path = f'{self.title}.jpg' - path = get_usable_path(path) - pic_type = path.suffix.lower() - if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'): - raise TypeError(f'不支持的文件格式:{pic_type}。') - pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] - - width, height = self.size - if full_page: - vp = {'x': 0, 'y': 0, 'width': width, 'height': height, 'scale': 1} - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, - captureBeyondViewport=True, clip=vp)['data'] - else: - if left_top and right_bottom: - x, y = left_top - w = right_bottom[0] - x - h = right_bottom[1] - y - v = not (location_in_viewport(self, x, y) and - location_in_viewport(self, right_bottom[0], right_bottom[1])) - - if v: - if (self.run_js('return document.body.scrollHeight > window.innerHeight;') and - not self.run_js('return document.body.scrollWidth > window.innerWidth;')): - x += 10 - # elif heng and not shu: - # y += 5 - - vp = {'x': x, 'y': y, 'width': w, 'height': h, 'scale': 1} - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, - captureBeyondViewport=v, clip=vp)['data'] - - else: - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type)['data'] - - if as_base64: - return png - - from base64 import b64decode - png = b64decode(png) - - if as_bytes: - return png - - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, 'wb') as f: - f.write(png) - return str(path.absolute()) - - -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page = page - - @property - def load_strategy(self): - """返回用于设置页面加载策略的对象""" - return PageLoadStrategy(self._page) - - @property - def scroll(self): - """返回用于设置页面滚动设置的对象""" - return PageScrollSetter(self._page.scroll) - - def retry_times(self, times): - """设置连接失败重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败重连间隔""" - self._page.retry_interval = interval - - def timeouts(self, implicit=None, page_load=None, script=None): - """设置超时时间,单位为秒 - :param implicit: 查找元素超时时间 - :param page_load: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: None - """ - if implicit is not None: - self._page.timeouts.implicit = implicit - self._page._timeout = implicit - - if page_load is not None: - self._page.timeouts.page_load = page_load - - if script is not None: - self._page.timeouts.script = script - - def user_agent(self, ua, platform=None): - """为当前tab设置user agent,只在当前tab有效 - :param ua: user agent字符串 - :param platform: platform字符串 - :return: None - """ - keys = {'userAgent': ua} - if platform: - keys['platform'] = platform - self._page.run_cdp('Emulation.setUserAgentOverride', **keys) - - def session_storage(self, item, value): - """设置或删除某项sessionStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def local_storage(self, item, value): - """设置或删除某项localStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def cookies(self, cookies): - """设置cookies值 - :param cookies: cookies信息 - :return: None - """ - set_browser_cookies(self._page, cookies) - - def upload_files(self, files): - """等待上传的文件路径 - :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 - :return: None - """ - if not self._page._upload_list: - self._page.driver.Page.fileChooserOpened = self._page._onFileChooserOpened - self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) - - if isinstance(files, str): - files = files.split('\n') - self._page._upload_list = [str(Path(i).absolute()) for i in files] - - def headers(self, headers: dict) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - self._page.run_cdp('Network.enable') - self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) - - -class ChromiumBaseWaiter(object): - def __init__(self, page_or_ele): - """ - :param page_or_ele: 页面对象或元素对象 - """ - self._driver = page_or_ele - self._listener = None - - def ele_delete(self, loc_or_ele, timeout=None): - """等待元素从DOM中删除 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :return: 是否等待成功 - """ - if isinstance(loc_or_ele, (str, tuple)): - ele = self._driver._ele(loc_or_ele, timeout=.3, raise_err=False) - return ele.wait.delete(timeout) if ele else True - return loc_or_ele.wait.delete(timeout) - - def ele_display(self, loc_or_ele, timeout=None): - """等待元素变成显示状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False) - return ele.wait.display(timeout) if ele else False - - def ele_hidden(self, loc_or_ele, timeout=None): - """等待元素变成隐藏状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False) - return ele.wait.hidden(timeout) - - def load_start(self, timeout=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, gap=.002) - - def load_complete(self, timeout=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, start=False) - - def upload_paths_inputted(self): - """等待自动填写上传文件路径""" - while self._driver._upload_list: - sleep(.01) - - def _loading(self, timeout=None, start=True, gap=.01): - """等待页面开始加载或加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param start: 等待开始还是结束 - :param gap: 间隔秒数 - :return: 是否等待成功 - """ - if timeout != 0: - if timeout is None or timeout is True: - timeout = self._driver.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._driver.is_loading == start: - return True - sleep(gap) - return False - - def set_targets(self, targets, is_regex=False): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个 - :param is_regex: 设置的target是否正则表达式 - :return: None - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.set_targets(targets, is_regex) - - def data_packets(self, timeout=None, any_one=False): - """等待指定数据包加载完成 - :param timeout: 超时时间,为None则使用页面对象timeout - :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 - :return: ResponseData对象或监听结果字典 - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - return self._listener.listen(timeout, any_one) - - def stop_listening(self): - """停止监听数据包""" - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.stop() - - -class NetworkListener(object): - def __init__(self, page): - self._page = page - self._targets = None - self._is_regex = False - self._results = {} - self._single = False - self._requests = {} - - def set_targets(self, targets, is_regex=False): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个 - :param is_regex: 设置的target是否正则表达式 - :return: None - """ - if not isinstance(targets, (str, list, tuple, set)): - raise TypeError('targets只能是str、list、tuple、set。') - self._is_regex = is_regex - if isinstance(targets, str): - self._targets = {targets} - self._single = True - else: - self._targets = set(targets) - self._single = False - self._page.run_cdp('Network.enable') - if targets is not None: - self._page.driver.Network.requestWillBeSent = self._requestWillBeSent - self._page.driver.Network.responseReceived = self._response_received - self._page.driver.Network.loadingFinished = self._loading_finished - else: - self.stop() - - def stop(self): - """停止监听数据包""" - self._page.run_cdp('Network.disable') - self._page.driver.Network.requestWillBeSent = None - self._page.driver.Network.responseReceived = None - self._page.driver.Network.loadingFinished = None - - def listen(self, timeout=None, any_one=False): - """等待指定数据包加载完成 - :param timeout: 超时时间,为None则使用页面对象timeout - :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 - :return: ResponseData对象或监听结果字典 - """ - if self._targets is None: - raise RuntimeError('必须先用set_targets()设置等待目标。') - - timeout = timeout if timeout is not None else self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._results and (any_one or set(self._results) == self._targets): - break - sleep(.1) - - self._requests = {} - if not self._results: - return False - r = list(self._results.values())[0] if self._single else self._results - self._results = {} - return r - - def _response_received(self, **kwargs): - """接收到返回信息时处理方法""" - if kwargs['requestId'] in self._requests: - self._requests[kwargs['requestId']]['response'] = kwargs['response'] - - def _loading_finished(self, **kwargs): - """请求完成时处理方法""" - request_id = kwargs['requestId'] - request = self._requests.get(request_id) - if request: - try: - r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) - body = r['body'] - is_base64 = r['base64Encoded'] - except CallMethodError: - body = '' - is_base64 = False - - target = request['target'] - rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) - rd.method = request['method'] - rd.postData = request['post_data'] - rd._base64_body = is_base64 - rd.requestHeaders = request['request_headers'] - self._results[target] = rd - - def _requestWillBeSent(self, **kwargs): - """接收到请求时的回调函数""" - for target in self._targets: - if (self._is_regex and search(target, kwargs['request']['url'])) or ( - not self._is_regex and target in kwargs['request']['url']): - self._requests[kwargs['requestId']] = {'target': target, - 'method': kwargs['request']['method'], - 'post_data': kwargs['request'].get('postData', None), - 'request_headers': kwargs['request']['headers']} - break - - -class ChromiumPageScroll(ChromiumScroll): - def __init__(self, page): - """ - :param page: 页面对象 - """ - super().__init__(page) - self.t1 = 'window' - self.t2 = 'document.documentElement' - - def to_see(self, loc_or_ele, center=None): - """滚动页面直到元素可见 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 - :return: None - """ - ele = self._driver._ele(loc_or_ele) - self._to_see(ele, center) - - def _to_see(self, ele, center): - """执行滚动页面直到元素可见 - :param ele: 元素对象 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 - :return: None - """ - txt = 'true' if center else 'false' - ele.run_js(f'this.scrollIntoViewIfNeeded({txt});') - if center or (center is not False and ele.states.is_covered): - ele.run_js('''function getWindowScrollTop() {var scroll_top = 0; - if (document.documentElement && document.documentElement.scrollTop) { - scroll_top = document.documentElement.scrollTop; - } else if (document.body) {scroll_top = document.body.scrollTop;} - return scroll_top;} - const { top, height } = this.getBoundingClientRect(); - const elCenter = top + height / 2; - const center = window.innerHeight / 2; - window.scrollTo({top: getWindowScrollTop() - (center - elCenter), - behavior: 'instant'});''') - self._wait_scrolled() - - -class Timeout(object): - """用于保存d模式timeout信息的类""" - - def __init__(self, page, implicit=None, page_load=None, script=None): - """ - :param page: ChromiumBase页面 - :param implicit: 默认超时时间 - :param page_load: 页面加载超时时间 - :param script: js超时时间 - """ - self._page = page - self.implicit = 10 if implicit is None else implicit - self.page_load = 30 if page_load is None else page_load - self.script = 30 if script is None else script - - def __repr__(self): - return str({'implicit': self.implicit, 'page_load': self.page_load, 'script': self.script}) - - -class PageLoadStrategy(object): - """用于设置页面加载策略的类""" - - def __init__(self, page): - """ - :param page: ChromiumBase对象 - """ - self._page = page - - def __call__(self, value): - """设置加载策略 - :param value: 可选 'normal', 'eager', 'none' - :return: None - """ - if value.lower() not in ('normal', 'eager', 'none'): - raise ValueError("只能选择 'normal', 'eager', 'none'。") - self._page._page_load_strategy = value - - def normal(self): - """设置页面加载策略为normal""" - self._page._page_load_strategy = 'normal' - - def eager(self): - """设置页面加载策略为eager""" - self._page._page_load_strategy = 'eager' - - def none(self): - """设置页面加载策略为none""" - self._page._page_load_strategy = 'none' - - -class PageScrollSetter(object): - def __init__(self, scroll): - self._scroll = scroll - - def wait_complete(self, on_off=True): - """设置滚动命令后是否等待完成 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - self._scroll._wait_complete = on_off - - def smooth(self, on_off=True): - """设置页面滚动是否平滑滚动 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - b = 'smooth' if on_off else 'auto' - self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') - self._scroll._wait_complete = on_off - - -class Screencast(object): - def __init__(self, page): - self._page = page - self._path = None - self._running = False - self._enable = False - self._mode = 'video' - - @property - def set_mode(self): - """返回用于设置录屏幕式的对象""" - return ScreencastMode(self) - - def start(self, save_path=None): - """开始录屏 - :param save_path: 录屏保存位置 - :return: None - """ - self.set_save_path(save_path) - if self._path is None: - raise ValueError('save_path必须设置。') - clean_folder(self._path) - if self._mode.startswith('frugal'): - self._page.driver.Page.screencastFrame = self._onScreencastFrame - self._page.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100) - - elif not self._mode.startswith('js'): - self._running = True - self._enable = True - Thread(target=self._run).start() - - else: - js = ''' - async function () { - stream = await navigator.mediaDevices.getDisplayMedia({video: true, audio: true}) - mime = MediaRecorder.isTypeSupported("video/webm; codecs=vp9") - ? "video/webm; codecs=vp9" - : "video/webm" - mediaRecorder = new MediaRecorder(stream, {mimeType: mime}) - DrissionPage_Screencast_chunks = [] - mediaRecorder.addEventListener('dataavailable', function(e) { - DrissionPage_Screencast_blob_ok = false; - DrissionPage_Screencast_chunks.push(e.data); - DrissionPage_Screencast_blob_ok = true; - }) - mediaRecorder.start() - - mediaRecorder.addEventListener('stop', function(){ - while(DrissionPage_Screencast_blob_ok==false){} - DrissionPage_Screencast_blob = new Blob(DrissionPage_Screencast_chunks, - {type: DrissionPage_Screencast_chunks[0].type}); - }) - } - ''' - print('请手动选择要录制的目标。') - self._page.run_js('var DrissionPage_Screencast_blob;var DrissionPage_Screencast_blob_ok=false;') - self._page.run_js(js) - - def stop(self, video_name=None): - """停止录屏 - :param video_name: 视频文件名,为None时以当前时间名命 - :return: 文件路径 - """ - if video_name and not video_name.endswith('mp4'): - video_name = f'{video_name}.mp4' - name = f'{time()}.mp4' if not video_name else video_name - path = f'{self._path}{sep}{name}' - - if self._mode.startswith('js'): - self._page.run_js('mediaRecorder.stop();', as_expr=True) - while not self._page.run_js('return DrissionPage_Screencast_blob_ok;'): - sleep(.1) - blob = self._page.run_js('return DrissionPage_Screencast_blob;') - uuid = self._page.run_cdp('IO.resolveBlob', objectId=blob['result']['objectId'])['uuid'] - data = self._page.run_cdp('IO.read', handle=f'blob:{uuid}')['data'] - with open(path, 'wb') as f: - f.write(b64decode(data)) - return path - - if self._mode.startswith('frugal'): - self._page.driver.Page.screencastFrame = None - self._page.run_cdp('Page.stopScreencast') - else: - self._enable = False - while self._running: - sleep(.1) - - if self._mode.endswith('imgs'): - return str(Path(self._path).absolute()) - - if not str(video_name).isascii() or not str(self._path).isascii(): - raise TypeError('转换成视频仅支持英文路径和文件名。') - - try: - from cv2 import VideoWriter, imread, VideoWriter_fourcc - from numpy import fromfile, uint8 - except ModuleNotFoundError: - raise ModuleNotFoundError('请先安装cv2,pip install opencv-python') - - pic_list = Path(self._path).glob('*.jpg') - img = imread(str(next(pic_list))) - imgInfo = img.shape - size = (imgInfo[1], imgInfo[0]) - - videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size) - - for i in pic_list: - img = imread(str(i)) - videoWrite.write(img) - - clean_folder(self._path, ignore=(name,)) - return f'{self._path}{sep}{name}' - - def set_save_path(self, save_path=None): - """设置保存路径 - :param save_path: 保存路径 - :return: None - """ - if save_path: - save_path = Path(save_path) - if save_path.exists() and save_path.is_file(): - raise TypeError('save_path必须指定文件夹。') - save_path.mkdir(parents=True, exist_ok=True) - self._path = save_path - - def _run(self): - """非节俭模式运行方法""" - self._running = True - while self._enable: - p = self._path / f'{time()}.jpg' - self._page.get_screenshot(path=p) - sleep(.04) - self._running = False - - def _onScreencastFrame(self, **kwargs): - """节俭模式运行方法""" - with open(f'{self._path}\\{kwargs["metadata"]["timestamp"]}.jpg', 'wb') as f: - f.write(b64decode(kwargs['data'])) - self._page.run_cdp('Page.screencastFrameAck', sessionId=kwargs['sessionId']) - - -class ScreencastMode(object): - def __init__(self, screencast): - self._screencast = screencast - - def video_mode(self): - self._screencast._mode = 'video' - - def frugal_video_mode(self): - self._screencast._mode = 'frugal_video' - - def js_video_mode(self): - self._screencast._mode = 'js_video' - - def frugal_imgs_mode(self): - self._screencast._mode = 'frugal_imgs' - - def imgs_mode(self): - self._screencast._mode = 'imgs' diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi deleted file mode 100644 index 94d962d..0000000 --- a/DrissionPage/chromium_base.pyi +++ /dev/null @@ -1,372 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union, Tuple, List, Any, Dict - -from DataRecorder import Recorder -from requests import Session -from requests.cookies import RequestsCookieJar - -from .base import BasePage -from .chromium_driver import ChromiumDriver -from .chromium_element import ChromiumElement, ChromiumScroll -from .chromium_frame import ChromiumFrame -from .commons.constants import NoneElement -from .commons.web import ResponseData -from .session_element import SessionElement - - -class ChromiumBase(BasePage): - - def __init__(self, - address: Union[str, int], - tab_id: str = None, - timeout: float = None): - self._control_session: Session = ... - self.address: str = ... - self._tab_obj: ChromiumDriver = ... - self._is_reading: bool = ... - self._timeouts: Timeout = ... - self._first_run: bool = ... - self._is_loading: bool = ... - self._page_load_strategy: str = ... - self._scroll: ChromiumScroll = ... - self._url: str = ... - self._root_id: str = ... - self._debug: bool = ... - self._debug_recorder: Recorder = ... - self._upload_list: list = ... - self._wait: ChromiumBaseWaiter = ... - self._set: ChromiumBaseSetter = ... - self._screencast: Screencast = ... - - def _connect_browser(self, tab_id: str = None) -> None: ... - - def _chromium_init(self): ... - - def _driver_init(self, tab_id: str) -> None: ... - - def _get_document(self) -> None: ... - - def _wait_loaded(self, timeout: float = None) -> bool: ... - - def _onFrameStartedLoading(self, **kwargs): ... - - def _onFrameStoppedLoading(self, **kwargs): ... - - def _onLoadEventFired(self, **kwargs): ... - - def _onDocumentUpdated(self, **kwargs): ... - - def _onFrameNavigated(self, **kwargs): ... - - def _onFileChooserOpened(self, **kwargs): ... - - def _set_start_options(self, address, none) -> None: ... - - def _set_runtime_settings(self) -> None: ... - - def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement], - timeout: float = None) -> ChromiumElement: ... - - @property - def title(self) -> str: ... - - @property - def driver(self) -> ChromiumDriver: ... - - @property - def is_loading(self) -> bool: ... - - @property - def is_alive(self) -> bool: ... - - @property - def url(self) -> str: ... - - @property - def _browser_url(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> Union[dict, None]: ... - - @property - def tab_id(self) -> str: ... - - @property - def ready_state(self) -> Union[str, None]: ... - - @property - def size(self) -> Tuple[int, int]: ... - - @property - def active_ele(self) -> ChromiumElement: ... - - @property - def page_load_strategy(self) -> str: ... - - @property - def user_agent(self) -> str: ... - - @property - def scroll(self) -> ChromiumPageScroll: ... - - @property - def timeouts(self) -> Timeout: ... - - @property - def upload_list(self) -> list: ... - - @property - def wait(self) -> ChromiumBaseWaiter: ... - - @property - def set(self) -> ChromiumBaseSetter: ... - - @property - def screencast(self) -> Screencast: ... - - def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - - def run_js_loaded(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - - def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None, - timeout: float = None) -> Union[None, bool]: ... - - def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[ - list, dict]: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None) -> ChromiumElement: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ - -> Union[SessionElement, str, NoneElement]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _find_elements(self, - loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... - - def refresh(self, ignore_cache: bool = False) -> None: ... - - def forward(self, steps: int = 1) -> None: ... - - def back(self, steps: int = 1) -> None: ... - - def _forward_or_back(self, steps: int) -> None: ... - - def stop_loading(self) -> None: ... - - def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None: ... - - def get_frame(self, loc_ind_ele: Union[str, int, tuple, ChromiumFrame], timeout: float = None) -> ChromiumFrame: ... - - def get_frames(self, loc: Union[str, tuple] = None, timeout: float = None) -> List[ChromiumFrame]: ... - - def run_cdp(self, cmd: str, **cmd_args) -> dict: ... - - def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict: ... - - def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def get_screenshot(self, path: [str, Path] = None, - as_bytes: [bool, str] = None, as_base64: [bool, str] = None, - full_page: bool = False, - left_top: Tuple[int, int] = None, - right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ... - - def _get_screenshot(self, path: [str, Path] = None, - as_bytes: [bool, str] = None, as_base64: [bool, str] = None, - full_page: bool = False, - left_top: Tuple[int, int] = None, - right_bottom: Tuple[int, int] = None, - ele: ChromiumElement = None) -> Union[str, bytes]: ... - - def clear_cache(self, - session_storage: bool = True, - local_storage: bool = True, - cache: bool = True, - cookies: bool = True) -> None: ... - - def _d_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - show_errmsg: bool = False, - timeout: float = None) -> Union[bool, None]: ... - - -class ChromiumBaseWaiter(object): - def __init__(self, page: ChromiumBase): - self._driver: ChromiumBase = ... - self._listener: NetworkListener = ... - - def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... - - def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... - - def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... - - def _loading(self, timeout: float = None, start: bool = True, gap: float = .01) -> bool: ... - - def load_start(self, timeout: float = None) -> bool: ... - - def load_complete(self, timeout: float = None) -> bool: ... - - def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... - - def stop_listening(self) -> None: ... - - def data_packets(self, timeout: float = None, - any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... - - def upload_paths_inputted(self) -> None: ... - - -class NetworkListener(object): - def __init__(self, page): - self._page: ChromiumBase = ... - self._targets: Union[str, dict] = ... - self._single: bool = ... - self._results: Union[ResponseData, Dict[str, ResponseData], False] = ... - self._is_regex: bool = ... - self._requests: dict = ... - - def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... - - def stop(self) -> None: ... - - def listen(self, timeout: float = None, - any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... - - def _response_received(self, **kwargs) -> None: ... - - def _loading_finished(self, **kwargs) -> None: ... - - def _requestWillBeSent(self, **kwargs) -> None: ... - - -class ChromiumPageScroll(ChromiumScroll): - def __init__(self, page: ChromiumBase): ... - - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ... - - def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... - - -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page: ChromiumBase = ... - - @property - def load_strategy(self) -> PageLoadStrategy: ... - - @property - def scroll(self) -> PageScrollSetter: ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def session_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def local_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def upload_files(self, files: Union[str, list, tuple]) -> None: ... - - -class Timeout(object): - - def __init__(self, page: ChromiumBase, implicit=None, page_load=None, script=None): - self._page: ChromiumBase = ... - self.implicit: float = ... - self.page_load: float = ... - self.script: float = ... - - -class PageLoadStrategy(object): - def __init__(self, page: ChromiumBase): - self._page: ChromiumBase = ... - - def __call__(self, value: str) -> None: ... - - def normal(self) -> None: ... - - def eager(self) -> None: ... - - def none(self) -> None: ... - - -class PageScrollSetter(object): - def __init__(self, scroll: ChromiumPageScroll): - self._scroll: ChromiumPageScroll = ... - - def wait_complete(self, on_off: bool = True): ... - - def smooth(self, on_off: bool = True): ... - - -class Screencast(object): - def __init__(self, page: ChromiumBase): - self._page: ChromiumBase = ... - self._path: Path = ... - self._running: bool = ... - self._enable: bool = ... - self._mode: str = ... - - @property - def set_mode(self) -> ScreencastMode: ... - - def start(self, save_path: Union[str, Path] = None) -> None: ... - - def stop(self, video_name: str = None) -> str: ... - - def set_save_path(self, save_path: Union[str, Path] = None) -> None: ... - - def _run(self) -> None: ... - - def _onScreencastFrame(self, **kwargs) -> None: ... - - -class ScreencastMode(object): - def __init__(self, screencast: Screencast): - self._screencast: Screencast = ... - - def video_mode(self) -> None: ... - - def frugal_video_mode(self) -> None: ... - - def js_video_mode(self) -> None: ... - - def frugal_imgs_mode(self) -> None: ... - - def imgs_mode(self) -> None: ... diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py deleted file mode 100644 index f1a09a3..0000000 --- a/DrissionPage/chromium_driver.py +++ /dev/null @@ -1,250 +0,0 @@ -# -*- coding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from functools import partial -from json import dumps, loads -from queue import Queue, Empty -from threading import Thread, Event - -from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ - create_connection - -from .errors import CallMethodError - - -class GenericAttr(object): - def __init__(self, name, tab): - self.__dict__['name'] = name - self.__dict__['tab'] = tab - - def __getattr__(self, item): - method_name = f"{self.name}.{item}" - event_listener = self.tab.get_listener(method_name) - - if event_listener: - return event_listener - - return partial(self.tab.call_method, method_name) - - def __setattr__(self, key, value): - self.tab.set_listener(f"{self.name}.{key}", value) - - -class ChromiumDriver(object): - _INITIAL_ = 'initial' - _STARTED_ = 'started' - _STOPPED_ = 'stopped' - - def __init__(self, tab_id, tab_type, address): - """ - :param tab_id: 标签页id - :param tab_type: 标签页类型 - :param address: 浏览器连接地址 - """ - self.id = tab_id - self.address = address - self.type = tab_type - self.debug = False - self.has_alert = False - - self._websocket_url = f'ws://{address}/devtools/{tab_type}/{tab_id}' - self._cur_id = 0 - self._ws = None - - self._recv_th = Thread(target=self._recv_loop) - self._handle_event_th = Thread(target=self._handle_event_loop) - self._recv_th.daemon = True - self._handle_event_th.daemon = True - - self._stopped = Event() - self._started = False - self.status = self._INITIAL_ - - self.event_handlers = {} - self.method_results = {} - self.event_queue = Queue() - - def _send(self, message, timeout=None): - """发送信息到浏览器,并返回浏览器返回的信息 - :param message: 发送给浏览器的数据 - :param timeout: 超时时间 - :return: 浏览器返回的数据 - """ - if 'id' not in message: - self._cur_id += 1 - message['id'] = self._cur_id - - message_json = dumps(message) - - if self.debug: - print(f"发> {message_json}") - - if not isinstance(timeout, (int, float)) or timeout > 1: - q_timeout = 1 - else: - q_timeout = timeout / 2.0 - - try: - self.method_results[message['id']] = Queue() - self._ws.send(message_json) - - while not self._stopped.is_set(): - try: - if isinstance(timeout, (int, float)): - if timeout < q_timeout: - q_timeout = timeout - timeout -= q_timeout - - return self.method_results[message['id']].get(timeout=q_timeout) - - except Empty: - if self.has_alert: - return {'error': {'message': 'alert exists'}, 'type': 'alert_exists'} - - if isinstance(timeout, (int, float)) and timeout <= 0: - raise TimeoutError(f"调用{message['method']}超时。") - - continue - - except Exception: - return None - - finally: - self.method_results.pop(message['id'], None) - - def _recv_loop(self): - """接收浏览器信息的守护线程方法""" - while not self._stopped.is_set(): - try: - self._ws.settimeout(1) - message_json = self._ws.recv() - message = loads(message_json) - except WebSocketTimeoutException: - continue - except (WebSocketException, OSError, WebSocketConnectionClosedException): - self.stop() - return - - if self.debug: - print(f'<收 {message_json}') - - if "method" in message: - self.event_queue.put(message) - - elif "id" in message: - if message["id"] in self.method_results: - self.method_results[message['id']].put(message) - - elif self.debug: - print(f'未知信息:{message}') - - def _handle_event_loop(self): - """当接收到浏览器信息,执行已绑定的方法""" - while not self._stopped.is_set(): - try: - event = self.event_queue.get(timeout=1) - except Empty: - continue - - if event['method'] in self.event_handlers: - try: - self.event_handlers[event['method']](**event['params']) - except Exception as e: - raise - # raise RuntimeError(f"\n回调函数错误:\n{e}") - - self.event_queue.task_done() - - def __getattr__(self, item): - attr = GenericAttr(item, self) - setattr(self, item, attr) - return attr - - def call_method(self, _method, *args, **kwargs): - """执行cdp方法 - :param _method: cdp方法名 - :param args: cdp参数 - :param kwargs: cdp参数 - :return: 执行结果 - """ - if not self._started: - self.start() - # raise RuntimeError("不能在启动前调用方法。") - if args: - raise CallMethodError("参数必须是key=value形式。") - - if self._stopped.is_set(): - return {'error': 'tab closed', 'type': 'tab_closed'} - - timeout = kwargs.pop("_timeout", None) - result = self._send({"method": _method, "params": kwargs}, timeout=timeout) - if result is None: - return {'error': 'tab closed', 'type': 'tab_closed'} - if 'result' not in result and 'error' in result: - return {'error': result['error']['message'], - 'type': result.get('type', 'call_method_error'), - 'method': _method, - 'args': kwargs} - - return result['result'] - - def start(self): - """启动连接""" - if self._started: - return False - if not self._websocket_url: - raise RuntimeError("已存在另一个连接。") - - self._started = True - self.status = self._STARTED_ - self._stopped.clear() - self._ws = create_connection(self._websocket_url, enable_multithread=True, - suppress_origin=True) - self._recv_th.start() - self._handle_event_th.start() - return True - - def stop(self): - """中断连接""" - if self._stopped.is_set(): - return False - if not self._started: - return True - - self.status = self._STOPPED_ - self._stopped.set() - if self._ws: - self._ws.close() - self._ws = None - self.event_handlers.clear() - self.method_results.clear() - self.event_queue.queue.clear() - return True - - def set_listener(self, event, callback): - """绑定cdp event和回调方法 - :param event: cdp event - :param callback: 绑定到cdp event的回调方法 - :return: 回调方法 - """ - if not callback: - return self.event_handlers.pop(event, None) - if not callable(callback): - raise RuntimeError("方法不能调用。") - - self.event_handlers[event] = callback - return True - - def get_listener(self, event): - """获取cdp event对应的回调方法 - :param event: cdp event - :return: 回调方法 - """ - return self.event_handlers.get(event, None) - - def __str__(self): - return f"<ChromiumDriver {self.id}>" - - __repr__ = __str__ diff --git a/DrissionPage/chromium_driver.pyi b/DrissionPage/chromium_driver.pyi deleted file mode 100644 index df1bf9e..0000000 --- a/DrissionPage/chromium_driver.pyi +++ /dev/null @@ -1,60 +0,0 @@ -# -*- coding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from queue import Queue -from threading import Thread, Event -from typing import Union, Callable - - -class GenericAttr(object): - def __init__(self, name: str, tab: ChromiumDriver): ... - - def __getattr__(self, item: str) -> Callable: ... - - def __setattr__(self, key: str, value: Callable) -> None: ... - - -class ChromiumDriver(object): - _INITIAL_: str - _STARTED_: str - _STOPPED_: str - id: str - address: str - type: str - debug: bool - has_alert: bool - _websocket_url: str - _cur_id: int - _ws = None - _recv_th: Thread - _handle_event_th: Thread - _stopped: Event - _started: bool - status: str - event_handlers: dict - method_results: dict - event_queue: Queue - - def __init__(self, tab_id: str, tab_type: str, address: str): ... - - def _send(self, message: dict, timeout: float = None) -> dict: ... - - def _recv_loop(self) -> None: ... - - def _handle_event_loop(self) -> None: ... - - def __getattr__(self, item: str) -> Callable: ... - - def call_method(self, _method: str, *args, **kwargs) -> dict: ... - - def start(self) -> bool: ... - - def stop(self) -> bool: ... - - def set_listener(self, event: str, callback: Union[Callable, None]) -> Union[Callable, None, bool]: ... - - def get_listener(self, event: str) -> Union[Callable, None]: ... - - def __str__(self) -> str: ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py deleted file mode 100644 index 353182b..0000000 --- a/DrissionPage/chromium_element.py +++ /dev/null @@ -1,2134 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from os import sep -from os.path import basename -from pathlib import Path -from time import perf_counter, sleep - -from .base import DrissionElement, BaseElement -from .commons.constants import FRAME_ELEMENT, NoneElement, Settings -from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinitions -from .commons.locator import get_loc -from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll -from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ - CallMethodError, NoResourceError, CanNotClickError -from .session_element import make_session_ele - - -class ChromiumElement(DrissionElement): - """控制浏览器元素的对象""" - - def __init__(self, page, node_id=None, obj_id=None, backend_id=None): - """node_id、obj_id和backend_id必须至少传入一个 - :param page: 元素所在ChromePage页面对象 - :param node_id: cdp中的node id - :param obj_id: js中的object id - :param backend_id: backend id - """ - super().__init__(page) - self._select = None - self._scroll = None - self._locations = None - self._set = None - self._states = None - self._pseudo = None - self._click = None - self._tag = None - self._wait = None - - if node_id and obj_id and backend_id: - self._node_id = node_id - self._obj_id = obj_id - self._backend_id = backend_id - elif node_id: - self._node_id = node_id - self._obj_id = self._get_obj_id(node_id) - self._backend_id = self._get_backend_id(self._node_id) - elif obj_id: - self._node_id = self._get_node_id(obj_id) - self._obj_id = obj_id - self._backend_id = self._get_backend_id(self._node_id) - elif backend_id: - self._obj_id = self._get_obj_id(backend_id=backend_id) - self._node_id = self._get_node_id(obj_id=self._obj_id) - self._backend_id = backend_id - else: - raise ElementLossError - - self._ids = ChromiumElementIds(self) - doc = self.run_js('return this.ownerDocument;') - self._doc_id = doc['objectId'] if doc else None - - def __repr__(self): - attrs = self.attrs - attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] - return f'<ChromiumElement {self.tag} {" ".join(attrs)}>' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: ChromiumElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - @property - def tag(self): - """返回元素tag""" - if self._tag is None: - self._tag = self.page.run_cdp('DOM.describeNode', - backendNodeId=self._backend_id)['node']['localName'].lower() - return self._tag - - @property - def html(self): - """返回元素outerHTML文本""" - return self.page.run_cdp('DOM.getOuterHTML', backendNodeId=self._backend_id)['outerHTML'] - - @property - def inner_html(self): - """返回元素innerHTML文本""" - return self.run_js('return this.innerHTML;') - - @property - def attrs(self): - """返回元素所有attribute属性""" - try: - attrs = self.page.run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] - return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} - except CallMethodError: # 文档根元素不能调用此方法 - return {} - - @property - def text(self): - """返回元素内所有文本,文本已格式化""" - return get_ele_txt(make_session_ele(self.html)) - - @property - def raw_text(self): - """返回未格式化处理的元素内文本""" - return self.prop('innerText') - - # -----------------d模式独有属性------------------- - @property - def ids(self): - """返回获取内置id的对象""" - return self._ids - - @property - def size(self): - """返回元素宽和高组成的元组""" - border = self.page.run_cdp('DOM.getBoxModel', backendNodeId=self._backend_id)['model']['border'] - return int(border[2] - border[0]), int(border[5] - border[1]) - - @property - def set(self): - """返回用于设置元素属性的对象""" - if self._set is None: - self._set = ChromiumElementSetter(self) - return self._set - - @property - def states(self): - """返回用于获取元素状态的对象""" - if self._states is None: - self._states = ChromiumElementStates(self) - return self._states - - @property - def pseudo(self): - """返回用于获取伪元素内容的对象""" - if self._pseudo is None: - self._pseudo = Pseudo(self) - return self._pseudo - - @property - def location(self): - """返回元素左上角的绝对坐标""" - return self.locations.location - - @property - def locations(self): - """返回用于获取元素位置的对象""" - if self._locations is None: - self._locations = Locations(self) - return self._locations - - @property - def shadow_root(self): - """返回当前元素的shadow_root元素对象""" - info = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] - if not info.get('shadowRoots', None): - return None - - return ChromiumShadowRoot(self, backend_id=info['shadowRoots'][0]['backendNodeId']) - - @property - def sr(self): - """返回当前元素的shadow_root元素对象""" - return self.shadow_root - - @property - def scroll(self): - """用于滚动滚动条的对象""" - if self._scroll is None: - self._scroll = ChromiumElementScroll(self) - return self._scroll - - @property - def click(self): - """返回用于点击的对象""" - if self._click is None: - self._click = Click(self) - return self._click - - @property - def wait(self): - """返回用于等待的对象""" - if self._wait is None: - self._wait = ChromiumElementWaiter(self.page, self) - return self._wait - - @property - def select(self): - """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" - if self._select is None: - if self.tag != 'select': - self._select = False - else: - self._select = ChromiumSelect(self) - - return self._select - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - return super().parent(level_or_loc) - - def child(self, filter_loc='', index=1, timeout=0, ele_only=True): - """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 直接子元素或节点文本 - """ - return super().child(index, filter_loc, timeout, ele_only=ele_only) - - def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): - """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 兄弟元素或节点文本 - """ - return super().prev(index, filter_loc, timeout, ele_only=ele_only) - - def next(self, filter_loc='', index=1, timeout=0, ele_only=True): - """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 兄弟元素或节点文本 - """ - return super().next(index, filter_loc, timeout, ele_only=ele_only) - - def before(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 本元素前面的某个元素或节点 - """ - return super().before(index, filter_loc, timeout, ele_only=ele_only) - - def after(self, filter_loc='', index=1, timeout=None, ele_only=True): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param index: 第几个查询结果,1开始 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 本元素后面的某个元素或节点 - """ - return super().after(index, filter_loc, timeout, ele_only=ele_only) - - def children(self, filter_loc='', timeout=0, ele_only=True): - """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 直接子元素或节点文本组成的列表 - """ - return super().children(filter_loc, timeout, ele_only=ele_only) - - def prevs(self, filter_loc='', timeout=0, ele_only=True): - """返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().prevs(filter_loc, timeout, ele_only=ele_only) - - def nexts(self, filter_loc='', timeout=0, ele_only=True): - """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().nexts(filter_loc, timeout, ele_only=ele_only) - - def befores(self, filter_loc='', timeout=None, ele_only=True): - """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 本元素前面的元素或节点组成的列表 - """ - return super().befores(filter_loc, timeout, ele_only=ele_only) - - def afters(self, filter_loc='', timeout=None, ele_only=True): - """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 - :return: 本元素后面的元素或节点组成的列表 - """ - return super().afters(filter_loc, timeout, ele_only=ele_only) - - def attr(self, attr): - """返回一个attribute属性值 - :param attr: 属性名 - :return: 属性值文本,没有该属性返回None - """ - attrs = self.attrs - if attr == 'href': # 获取href属性时返回绝对url - link = attrs.get('href', None) - if not link or link.lower().startswith(('javascript:', 'mailto:')): - return link - else: - return make_absolute_link(link, self.page) - - elif attr == 'src': - return make_absolute_link(attrs.get('src', None), self.page) - - elif attr == 'text': - return self.text - - elif attr == 'innerText': - return self.raw_text - - elif attr in ('html', 'outerHTML'): - return self.html - - elif attr == 'innerHTML': - return self.inner_html - - else: - return attrs.get(attr, None) - - def remove_attr(self, attr): - """删除元素一个attribute属性 - :param attr: 属性名 - :return: None - """ - self.run_js(f'this.removeAttribute("{attr}");') - - def prop(self, prop): - """获取一个property属性值 - :param prop: 属性名 - :return: 属性值文本 - """ - p = self.page.run_cdp('Runtime.getProperties', objectId=self._obj_id)['result'] - for i in p: - if i['name'] == prop: - if 'value' not in i or 'value' not in i['value']: - return None - - value = i['value']['value'] - return format_html(value) if isinstance(value, str) else value - - def run_js(self, script, *args, as_expr=False): - """对本元素执行javascript代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: 运行的结果 - """ - return run_js(self, script, as_expr, self.page.timeouts.script, args) - - def run_async_js(self, script, *args, as_expr=False): - """以异步方式对本元素执行javascript代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: None - """ - from threading import Thread - Thread(target=run_js, args=(self, script, as_expr, self.page.timeouts.script, args, True)).start() - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: ChromiumElement对象或属性、文本 - """ - return self._ele(loc_or_str, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: ChromiumElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素,以SessionElement形式返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - if self.tag in FRAME_ELEMENT: - return make_session_ele(self.inner_html, loc_or_str) - return make_session_ele(self, loc_or_str) - - def s_eles(self, loc_or_str=None): - """查找所有符合条件的元素,以SessionElement列表形式返回 - :param loc_or_str: 定位符 - :return: SessionElement或属性、文本组成的列表 - """ - if self.tag in FRAME_ELEMENT: - return make_session_ele(self.inner_html, loc_or_str, single=False) - return make_session_ele(self, loc_or_str, single=False) - - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 - :return: ChromiumElement对象或文本、属性或其组成的列表 - """ - return find_in_chromium_ele(self, loc_or_str, single, timeout, relative=relative) - - def style(self, style, pseudo_ele=''): - """返回元素样式属性值,可获取伪元素属性值 - :param style: 样式属性名称 - :param pseudo_ele: 伪元素名称(如有) - :return: 样式属性的值 - """ - if pseudo_ele: - pseudo_ele = f', "{pseudo_ele}"' if pseudo_ele.startswith(':') else f', "::{pseudo_ele}"' - js = f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");' - return self.run_js(js) - - def get_src(self, timeout=None, base64_to_bytes=True): - """返回元素src资源,base64的会转为bytes返回,其它返回str - :param timeout: 等待资源加载的超时时间 - :param base64_to_bytes: 为True时,如果是base64数据,转换为bytes格式 - :return: 资源内容 - """ - timeout = self.page.timeout if timeout is None else timeout - if self.tag == 'img': # 等待图片加载完成 - js = ('return this.complete && typeof this.naturalWidth != "undefined" ' - '&& this.naturalWidth > 0 && typeof this.naturalHeight != "undefined" ' - '&& this.naturalHeight > 0') - end_time = perf_counter() + timeout - while not self.run_js(js) and perf_counter() < end_time: - sleep(.1) - - result = None - end_time = perf_counter() + timeout - while perf_counter() < end_time: - src = self.prop('currentSrc') - if not src: - continue - - node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] - frame = node.get('frameId', None) - frame = frame or self.page.tab_id - - try: - result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) - break - except CallMethodError: - sleep(.1) - - if not result: - return None - - if result['base64Encoded']: - if base64_to_bytes: - from base64 import b64decode - data = b64decode(result['content']) - else: - data = result['content'] - else: - data = result['content'] - return data - - def save(self, path=None, rename=None, timeout=None): - """保存图片或其它有src属性的元素的资源 - :param path: 文件保存路径,为None时保存到当前文件夹 - :param rename: 文件名称,为None时从资源url获取 - :param timeout: 等待资源加载的超时时间 - :return: None - """ - data = self.get_src(timeout=timeout) - if not data: - raise NoResourceError - - path = path or '.' - rename = rename or basename(self.prop('currentSrc')) - write_type = 'wb' if isinstance(data, bytes) else 'w' - - Path(path).mkdir(parents=True, exist_ok=True) - with open(f'{path}{sep}{rename}', write_type) as f: - f.write(data) - - def get_screenshot(self, path=None, as_bytes=None, as_base64=None): - """对当前元素截图,可保存到文件,或以字节方式返回 - :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' - :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 - :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 - :return: 图片完整路径或字节文本 - """ - if self.tag == 'img': # 等待图片加载完成 - js = ('return this.complete && typeof this.naturalWidth != "undefined" ' - '&& this.naturalWidth > 0 && typeof this.naturalHeight != "undefined" ' - '&& this.naturalHeight > 0') - end_time = perf_counter() + self.page.timeout - while not self.run_js(js) and perf_counter() < end_time: - sleep(.1) - - self.scroll.to_see(True) - left, top = self.location - width, height = self.size - left_top = (left, top) - right_bottom = (left + width, top + height) - if not path: - path = f'{self.tag}.jpg' - return self.page._get_screenshot(path, as_bytes=as_bytes, as_base64=as_base64, full_page=False, - left_top=left_top, right_bottom=right_bottom, ele=self) - - def input(self, vals, clear=True): - """输入文本或组合键,也可用于输入文件路径到input元素(路径间用\n间隔) - :param vals: 文本值或按键组合 - :param clear: 输入前是否清空文本框 - :return: None - """ - if self.tag == 'input' and self.attr('type') == 'file': - return self._set_file_input(vals) - - if clear and vals not in ('\n', '\ue007'): - self.clear(by_js=False) - else: - self._input_focus() - - # ------------处理字符------------- - if not isinstance(vals, (tuple, list)): - vals = (str(vals),) - modifier, vals = keys_to_typing(vals) - - if modifier != 0: # 包含修饰符 - for key in vals: - send_key(self, modifier, key) - return - - if vals.endswith(('\n', '\ue007')): - self.page.run_cdp('Input.insertText', text=vals[:-1]) - send_key(self, modifier, '\n') - else: - self.page.run_cdp('Input.insertText', text=vals) - - def clear(self, by_js=False): - """清空元素文本 - :param by_js: 是否用js方式清空,为False则用全选+del模拟输入删除 - :return: None - """ - if by_js: - self.run_js("this.value='';") - - else: - self._input_focus() - self.input(('\ue009', 'a', '\ue017'), clear=False) - - def _input_focus(self): - """输入前使元素获取焦点""" - try: - self.page.run_cdp('DOM.focus', backendNodeId=self._backend_id) - except Exception: - self.click(by_js=None) - - def focus(self): - """使元素获取焦点""" - try: - self.page.run_cdp('DOM.focus', backendNodeId=self._backend_id) - except Exception: - self.run_js('this.focus();') - - def hover(self, offset_x=None, offset_y=None): - """鼠标悬停,可接受偏移量,偏移量相对于元素左上角坐标。不传入x或y值时悬停在元素中点 - :param offset_x: 相对元素左上角坐标的x轴偏移量 - :param offset_y: 相对元素左上角坐标的y轴偏移量 - :return: None - """ - self.page.scroll.to_see(self) - x, y = offset_scroll(self, offset_x, offset_y) - self.page.run_cdp('Input.dispatchMouseEvent', type='mouseMoved', x=x, y=y) - - def drag(self, offset_x=0, offset_y=0, duration=.5): - """拖拽当前元素到相对位置 - :param offset_x: x变化值 - :param offset_y: y变化值 - :param duration: 拖动用时,传入0即瞬间到j达 - :return: None - """ - curr_x, curr_y = self.locations.midpoint - offset_x += curr_x - offset_y += curr_y - self.drag_to((offset_x, offset_y), duration) - - def drag_to(self, ele_or_loc, duration=.5): - """拖拽当前元素,目标为另一个元素或坐标元组(x, y) - :param ele_or_loc: 另一个元素或坐标元组,坐标为元素中点的坐标 - :param duration: 拖动用时,传入0即瞬间到j达 - :return: None - """ - # x, y:目标点坐标 - if isinstance(ele_or_loc, ChromiumElement): - target_x, target_y = ele_or_loc.locations.midpoint - elif isinstance(ele_or_loc, (list, tuple)): - target_x, target_y = ele_or_loc - else: - raise TypeError('需要ChromiumElement对象或坐标。') - - current_x, current_y = self.locations.midpoint - width = target_x - current_x - height = target_y - current_y - - duration = .02 if duration < .02 else duration - num = int(duration * 50) - - # 将要经过的点存入列表 - points = [(int(current_x + i * (width / num)), int(current_y + i * (height / num))) for i in range(1, num)] - points.append((target_x, target_y)) - - from .action_chains import ActionChains - actions = ActionChains(self.page) - actions.hold(self) - - # 逐个访问要经过的点 - for x, y in points: - t = perf_counter() - actions.move(x - current_x, y - current_y) - current_x, current_y = x, y - ss = .02 - perf_counter() + t - if ss > 0: - sleep(ss) - actions.release() - - def _get_obj_id(self, node_id=None, backend_id=None): - """根据传入node id或backend id获取js中的object id - :param node_id: cdp中的node id - :param backend_id: backend id - :return: js中的object id - """ - if node_id: - return self.page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] - else: - return self.page.run_cdp('DOM.resolveNode', backendNodeId=backend_id)['object']['objectId'] - - def _get_node_id(self, obj_id=None, backend_id=None): - """根据传入object id或backend id获取cdp中的node id - :param obj_id: js中的object id - :param backend_id: backend id - :return: cdp中的node id - """ - if obj_id: - return self.page.run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] - else: - return self.page.run_cdp('DOM.describeNode', backendNodeId=backend_id)['node']['nodeId'] - - def _get_backend_id(self, node_id): - """根据传入node id获取backend id - :param node_id: - :return: backend id - """ - return self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node']['backendNodeId'] - - def _get_ele_path(self, mode): - """返获取绝对的css路径或xpath路径""" - if mode == 'xpath': - txt1 = 'var tag = el.nodeName.toLowerCase();' - txt3 = ''' && sib.nodeName.toLowerCase()==tag''' - txt4 = ''' - if(nth>1){path = '/' + tag + '[' + nth + ']' + path;} - else{path = '/' + tag + path;}''' - txt5 = '''return path;''' - - elif mode == 'css': - txt1 = '' - txt3 = '' - txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' - txt5 = '''return path.substr(1);''' - - else: - raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") - - js = '''function(){ - function e(el) { - if (!(el instanceof Element)) return; - var path = ''; - while (el.nodeType === Node.ELEMENT_NODE) { - ''' + txt1 + ''' - var sib = el, nth = 0; - while (sib) { - if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} - sib = sib.previousSibling; - } - ''' + txt4 + ''' - el = el.parentNode; - } - ''' + txt5 + ''' - } - return e(this);} - ''' - t = self.run_js(js) - return f':root{t}' if mode == 'css' else t - - def _set_file_input(self, files): - """对上传控件写入路径 - :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 - :return: None - """ - if isinstance(files, str): - files = files.split('\n') - files = [str(Path(i).absolute()) for i in files] - self.page.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=self._backend_id) - - -class ChromiumShadowRoot(BaseElement): - """ChromiumShadowRoot是用于处理ShadowRoot的类,使用方法和ChromiumElement基本一致""" - - def __init__(self, parent_ele, obj_id=None, backend_id=None): - """ - :param parent_ele: shadow root 所在父元素 - :param obj_id: js中的object id - :param backend_id: cdp中的backend id - """ - super().__init__(parent_ele.page) - self.parent_ele = parent_ele - if backend_id: - self._backend_id = backend_id - self._obj_id = self._get_obj_id(backend_id) - self._node_id = self._get_node_id(self._obj_id) - elif obj_id: - self._obj_id = obj_id - self._node_id = self._get_node_id(obj_id) - self._backend_id = self._get_backend_id(self._node_id) - self._ids = Ids(self) - self._states = None - - def __repr__(self): - return f'<ChromiumShadowRoot in {self.parent_ele}>' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - @property - def tag(self): - """返回元素标签名""" - return 'shadow-root' - - @property - def html(self): - """返回outerHTML文本""" - return f'<shadow_root>{self.inner_html}</shadow_root>' - - @property - def inner_html(self): - """返回内部的html文本""" - return self.run_js('return this.innerHTML;') - - @property - def ids(self): - """返回获取内置id的对象""" - return self._ids - - @property - def states(self): - """返回用于获取元素状态的对象""" - if self._states is None: - self._states = ShadowRootStates(self) - return self._states - - def run_js(self, script, *args, as_expr=False): - """运行javascript代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: 运行的结果 - """ - return run_js(self, script, as_expr, self.page.timeouts.script, args) - - def run_async_js(self, script, *args, as_expr=False): - """以异步方式执行js代码 - :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :param as_expr: 是否作为表达式运行,为True时args无效 - :return: None - """ - from threading import Thread - Thread(target=run_js, args=(self, script, as_expr, self.page.timeouts.script, args)).start() - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: ChromiumElement对象 - """ - if isinstance(level_or_loc, int): - loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' - - elif isinstance(level_or_loc, (tuple, str)): - loc = get_loc(level_or_loc, True) - - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' - - else: - raise TypeError('level_or_loc参数只能是tuple、int或str。') - - return self.parent_ele._ele(loc, timeout=0, relative=True, raise_err=False) - - def child(self, filter_loc='', index=1): - """返回直接子元素元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param index: 第几个查询结果,1开始 - :return: 直接子元素或节点文本组成的列表 - """ - nodes = self.children(filter_loc=filter_loc) - if not nodes: - if Settings.raise_ele_not_found: - raise ElementNotFoundError - else: - return NoneElement() - - try: - return nodes[index - 1] - except IndexError: - if Settings.raise_ele_not_found: - raise ElementNotFoundError - else: - return NoneElement() - - def next(self, filter_loc='', index=1): - """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 第几个查询结果,1开始 - :return: ChromiumElement对象 - """ - nodes = self.nexts(filter_loc=filter_loc) - if nodes: - return nodes[index - 1] - if Settings.raise_ele_not_found: - raise ElementNotFoundError - else: - return NoneElement() - - def before(self, filter_loc='', index=1): - """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果,1开始 - :return: 本元素前面的某个元素或节点 - """ - nodes = self.befores(filter_loc=filter_loc) - if nodes: - return nodes[index - 1] - if Settings.raise_ele_not_found: - raise ElementNotFoundError - else: - return NoneElement() - - def after(self, filter_loc='', index=1): - """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param index: 后面第几个查询结果,1开始 - :return: 本元素后面的某个元素或节点 - """ - nodes = self.afters(filter_loc=filter_loc) - if nodes: - return nodes[index - 1] - if Settings.raise_ele_not_found: - raise ElementNotFoundError - else: - return NoneElement() - - def children(self, filter_loc=''): - """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: 直接子元素或节点文本组成的列表 - """ - if not filter_loc: - loc = '*' - else: - loc = get_loc(filter_loc, True) # 把定位符转换为xpath - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = loc[1].lstrip('./') - - loc = f'xpath:./{loc}' - return self._ele(loc, single=False, relative=True) - - def nexts(self, filter_loc=''): - """返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: ChromiumElement对象组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) - - def befores(self, filter_loc=''): - """返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素前面的元素或节点组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./preceding::{loc}' - return self.parent_ele._ele(xpath, single=False, relative=True) - - def afters(self, filter_loc=''): - """返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选 - 查找范围不限同级元素,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素后面的元素或节点组成的列表 - """ - eles1 = self.nexts(filter_loc) - loc = get_loc(filter_loc, True)[1].lstrip('./') - xpath = f'xpath:./following::{loc}' - return eles1 + self.parent_ele._ele(xpath, single=False, relative=True) - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: ChromiumElement对象 - """ - return self._ele(loc_or_str, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: ChromiumElement对象组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def s_ele(self, loc_or_str=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_str) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _find_elements(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置 - :return: ChromiumElement对象或其组成的列表 - """ - loc = get_loc(loc_or_str) - if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): - loc = loc[0], loc[1][5:] - - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - eles = make_session_ele(self.html).eles(loc) - while not eles and perf_counter() - t1 <= timeout: - eles = make_session_ele(self.html).eles(loc) - - if not eles: - return NoneElement() if single else eles - - css_paths = [i.css_path[47:] for i in eles] - if single: - node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css_paths[0])['nodeId'] - return make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement() - - else: - results = [] - for i in css_paths: - node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] - if node_id: - results.append(make_chromium_ele(self.page, node_id=node_id)) - return results - - def _get_node_id(self, obj_id): - """返回元素node id""" - return self.page.run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] - - def _get_obj_id(self, back_id): - """返回元素object id""" - return self.page.run_cdp('DOM.resolveNode', backendNodeId=back_id)['object']['objectId'] - - def _get_backend_id(self, node_id): - """返回元素object id""" - r = self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node'] - self._tag = r['localName'].lower() - return r['backendNodeId'] - - -class Ids(object): - def __init__(self, ele): - self._ele = ele - - @property - def node_id(self): - """返回元素cdp中的node id""" - return self._ele._node_id - - @property - def obj_id(self): - """返回元素js中的object id""" - return self._ele._obj_id - - @property - def backend_id(self): - """返回backend id""" - return self._ele._backend_id - - -class ChromiumElementIds(Ids): - @property - def doc_id(self): - """返回所在document的object id""" - return self._ele._doc_id - - -def find_in_chromium_ele(ele, loc, single=True, timeout=None, relative=True): - """在chromium元素中查找 - :param ele: ChromiumElement对象 - :param loc: 元素定位元组 - :param single: True则返回第一个,False则返回全部 - :param timeout: 查找元素超时时间 - :param relative: WebPage用于标记是否相对定位使用 - :return: 返回ChromiumElement元素或它们组成的列表 - """ - # ---------------处理定位符--------------- - if isinstance(loc, (str, tuple)): - loc = get_loc(loc) - else: - raise ValueError(f"定位符必须为str或长度为2的tuple对象。现在是:{loc}") - - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc_str}' - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): - loc_str = f'{ele.css_path}{loc[1]}' - loc = loc[0], loc_str - - timeout = timeout if timeout is not None else ele.page.timeout - - # ---------------执行查找----------------- - if loc[0] == 'xpath': - return find_by_xpath(ele, loc[1], single, timeout, relative=relative) - - else: - return find_by_css(ele, loc[1], single, timeout) - - -def find_by_xpath(ele, xpath, single, timeout, relative=True): - """执行用xpath在元素中查找元素 - :param ele: 在此元素中查找 - :param xpath: 查找语句 - :param single: 是否只返回第一个结果 - :param timeout: 超时时间 - :param relative: 是否相对定位 - :return: ChromiumElement或其组成的列表 - """ - type_txt = '9' if single else '7' - node_txt = 'this.contentDocument' if ele.tag in FRAME_ELEMENT and not relative else 'this' - js = make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt) - r = ele.page.run_cdp_loaded('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele.ids.obj_id, - returnByValue=False, awaitPromise=True, userGesture=True) - if r['result']['type'] == 'string': - return r['result']['value'] - - if 'exceptionDetails' in r: - if 'The result is not a node set' in r['result']['description']: - js = make_js_for_find_ele_by_xpath(xpath, '1', node_txt) - r = ele.page.run_cdp_loaded('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele.ids.obj_id, - returnByValue=False, awaitPromise=True, userGesture=True) - return r['result']['value'] - else: - raise SyntaxError(f'查询语句错误:\n{r}') - - end_time = perf_counter() + timeout - while (r['result']['subtype'] == 'null' - or r['result']['description'] == 'NodeList(0)') and perf_counter() < end_time: - r = ele.page.run_cdp_loaded('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele.ids.obj_id, - returnByValue=False, awaitPromise=True, userGesture=True) - - if single: - return NoneElement() if r['result']['subtype'] == 'null' \ - else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) - - if r['result']['description'] == 'NodeList(0)': - return [] - else: - r = ele.page.run_cdp_loaded('Runtime.getProperties', objectId=r['result']['objectId'], - ownProperties=True)['result'] - return [make_chromium_ele(ele.page, obj_id=i['value']['objectId']) - if i['value']['type'] == 'object' else i['value']['value'] - for i in r[:-1]] - - -def find_by_css(ele, selector, single, timeout): - """执行用css selector在元素中查找元素 - :param ele: 在此元素中查找 - :param selector: 查找语句 - :param single: 是否只返回第一个结果 - :param timeout: 超时时间 - :return: ChromiumElement或其组成的列表 - """ - selector = selector.replace('"', r'\"') - find_all = '' if single else 'All' - node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame', 'shadow-root') else 'this' - js = f'function(){{return {node_txt}.querySelector{find_all}("{selector}");}}' - r = ele.page.run_cdp_loaded('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele.ids.obj_id, - returnByValue=False, awaitPromise=True, userGesture=True) - - end_time = perf_counter() + timeout - while ('exceptionDetails' in r or r['result']['subtype'] == 'null' or - r['result']['description'] == 'NodeList(0)') and perf_counter() < end_time: - r = ele.page.run_cdp_loaded('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele.ids.obj_id, - returnByValue=False, awaitPromise=True, userGesture=True) - - if 'exceptionDetails' in r: - raise SyntaxError(f'查询语句错误:\n{r}') - - if single: - return NoneElement() if r['result']['subtype'] == 'null' \ - else make_chromium_ele(ele.page, obj_id=r['result']['objectId']) - - if r['result']['description'] == 'NodeList(0)': - return [] - else: - r = ele.page.run_cdp_loaded('Runtime.getProperties', objectId=r['result']['objectId'], - ownProperties=True)['result'] - return [make_chromium_ele(ele.page, obj_id=i['value']['objectId']) for i in r] - - -def make_chromium_ele(page, node_id=None, obj_id=None): - """根据node id或object id生成相应元素对象 - :param page: ChromiumPage对象 - :param node_id: 元素的node id - :param obj_id: 元素的object id - :return: ChromiumElement对象或ChromiumFrame对象 - """ - if node_id: - node = page.run_cdp('DOM.describeNode', nodeId=node_id) - if node['node']['nodeName'] in ('#text', '#comment'): - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - obj_id = page.run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] - - elif obj_id: - node = page.run_cdp('DOM.describeNode', objectId=obj_id) - if node['node']['nodeName'] in ('#text', '#comment'): - return node['node']['nodeValue'] - backend_id = node['node']['backendNodeId'] - node_id = node['node']['nodeId'] - - else: - raise ElementLossError - - ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=backend_id) - if ele.tag in FRAME_ELEMENT: - from .chromium_frame import ChromiumFrame - ele = ChromiumFrame(page, ele) - - return ele - - -def make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt): - """生成用xpath在元素中查找元素的js文本 - :param xpath: xpath文本 - :param type_txt: 查找类型 - :param node_txt: 节点类型 - :return: js文本 - """ - for_txt = '' - - # 获取第一个元素、节点或属性 - if type_txt == '9': - return_txt = ''' -if(e.singleNodeValue==null){return null;} -else if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} -else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} -else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} -else{return e.singleNodeValue;}''' - - # 按顺序获取所有元素、节点或属性 - elif type_txt == '7': - for_txt = """ -var a=new Array(); -for(var i = 0; i <e.snapshotLength ; i++){ -if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);} -else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);} -else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);} -else{a.push(e.snapshotItem(i));}}""" - return_txt = 'return a;' - - elif type_txt == '2': - return_txt = 'return e.stringValue;' - elif type_txt == '1': - return_txt = 'return e.numberValue;' - else: - return_txt = 'return e.singleNodeValue;' - - xpath = xpath.replace(r"'", r"\'") - js = f'function(){{var e=document.evaluate(\'{xpath}\',{node_txt},null,{type_txt},null);\n{for_txt}\n{return_txt}}}' - - return js - - -def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): - """运行javascript代码 - :param page_or_ele: 页面对象或元素对象 - :param script: js文本 - :param as_expr: 是否作为表达式运行,为True时args无效 - :param timeout: 超时时间 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... - :return: js执行结果 - """ - if isinstance(page_or_ele, (ChromiumElement, ChromiumShadowRoot)): - page = page_or_ele.page - obj_id = page_or_ele.ids.obj_id - is_page = False - else: - page = page_or_ele - obj_id = page_or_ele._root_id - is_page = True - - try: - if as_expr: - res = page.run_cdp('Runtime.evaluate', expression=script, returnByValue=False, - awaitPromise=True, userGesture=True, timeout=timeout * 1000) - - else: - args = args or () - if not is_js_func(script): - script = f'function(){{{script}}}' - res = page.run_cdp('Runtime.callFunctionOn', functionDeclaration=script, objectId=obj_id, - arguments=[convert_argument(arg) for arg in args], returnByValue=False, - awaitPromise=True, userGesture=True) - - except ContextLossError: - if is_page: - raise ContextLossError('页面已被刷新,请尝试等待页面加载完成再执行操作。') - else: - raise ElementLossError('原来获取到的元素对象已不在页面内。') - - if res is None and page.driver.has_alert: # 存在alert的情况 - return None - - exceptionDetails = res.get('exceptionDetails') - if exceptionDetails: - raise JavaScriptError(f'\njavascript运行错误:\n{script}\n错误信息: \n{exceptionDetails}') - - try: - return parse_js_result(page, page_or_ele, res.get('result')) - except Exception: - return res - - -def parse_js_result(page, ele, result): - """解析js返回的结果""" - if 'unserializableValue' in result: - return result['unserializableValue'] - - the_type = result['type'] - - if the_type == 'object': - sub_type = result.get('subtype', None) - if sub_type == 'null': - return None - - elif sub_type == 'node': - class_name = result['className'] - if class_name == 'ShadowRoot': - return ChromiumShadowRoot(ele, obj_id=result['objectId']) - elif class_name == 'HTMLDocument': - return result - else: - return make_chromium_ele(page, obj_id=result['objectId']) - - elif sub_type == 'array': - r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], - ownProperties=True)['result'] - return [parse_js_result(page, ele, result=i['value']) for i in r[:-1]] - - elif 'objectId' in result and result['className'].lower() == 'object': # dict - r = page.run_cdp('Runtime.getProperties', objectId=result['objectId'], - ownProperties=True)['result'] - return {i['name']: parse_js_result(page, ele, result=i['value']) for i in r} - - else: - return result['value'] - - elif the_type == 'undefined': - return None - - else: - return result['value'] - - -def convert_argument(arg): - """把参数转换成js能够接收的形式""" - if isinstance(arg, ChromiumElement): - return {'objectId': arg.ids.obj_id} - - elif isinstance(arg, (int, float, str, bool)): - return {'value': arg} - - from math import inf - if arg == inf: - return {'unserializableValue': 'Infinity'} - if arg == -inf: - return {'unserializableValue': '-Infinity'} - - -def send_enter(ele): - """发送回车""" - data = {'type': 'keyDown', 'modifiers': 0, 'windowsVirtualKeyCode': 13, 'code': 'Enter', 'key': 'Enter', - 'text': '\r', 'autoRepeat': False, 'unmodifiedText': '\r', 'location': 0, 'isKeypad': False} - - ele.page.run_cdp('Input.dispatchKeyEvent', **data) - data['type'] = 'keyUp' - ele.page.run_cdp('Input.dispatchKeyEvent', **data) - - -def send_key(ele, modifier, key): - """发送一个字,在键盘中的字符触发按键,其它直接发送文本""" - if key not in keyDefinitions: - ele.page.run_cdp('Input.insertText', text=key) - - else: - description = keyDescriptionForString(modifier, key) - text = description['text'] - data = {'type': 'keyDown' if text else 'rawKeyDown', - 'modifiers': modifier, - 'windowsVirtualKeyCode': description['keyCode'], - 'code': description['code'], - 'key': description['key'], - 'text': text, - 'autoRepeat': False, - 'unmodifiedText': text, - 'location': description['location'], - 'isKeypad': description['location'] == 3} - - ele.page.run_cdp('Input.dispatchKeyEvent', **data) - data['type'] = 'keyUp' - ele.page.run_cdp('Input.dispatchKeyEvent', **data) - - -class ChromiumElementStates(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - @property - def is_selected(self): - """返回元素是否被选择""" - return self._ele.run_js('return this.selected;') - - @property - def is_checked(self): - """返回元素是否被选择""" - return self._ele.run_js('return this.checked;') - - @property - def is_displayed(self): - """返回元素是否显示""" - return not (self._ele.style('visibility') == 'hidden' - or self._ele.run_js('return this.offsetParent === null;') - or self._ele.style('display') == 'none') - - @property - def is_enabled(self): - """返回元素是否可用""" - return not self._ele.run_js('return this.disabled;') - - @property - def is_alive(self): - """返回元素是否仍在DOM中""" - try: - d = self._ele.attrs - return True - except Exception: - return False - - @property - def is_in_viewport(self): - """返回元素是否出现在视口中,以元素可以接受点击的点为判断""" - x, y = self._ele.locations.click_point - return location_in_viewport(self._ele.page, x, y) if x else False - - @property - def is_covered(self): - """返回元素是否被覆盖,与是否在视口中无关""" - lx, ly = self._ele.locations.click_point - try: - r = self._ele.page.run_cdp('DOM.getNodeForLocation', x=lx, y=ly) - except CallMethodError: - return False - - if r.get('backendNodeId') != self._ele.ids.backend_id: - return True - - return False - - -class ShadowRootStates(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - @property - def is_enabled(self): - """返回元素是否可用""" - return not self._ele.run_js('return this.disabled;') - - @property - def is_alive(self): - """返回元素是否仍在DOM中""" - try: - self._ele.page.run_cdp('DOM.describeNode', backendNodeId=self._ele.ids.backend_id) - return True - except Exception: - return False - - -class ChromiumElementSetter(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - def attr(self, attr, value): - """设置元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele.ids.node_id, name=attr, value=str(value)) - - def prop(self, prop, value): - """设置元素property属性 - :param prop: 属性名 - :param value: 属性值 - :return: None - """ - value = value.replace('"', r'\"') - self._ele.run_js(f'this.{prop}="{value}";') - - def innerHTML(self, html): - """设置元素innerHTML - :param html: html文本 - :return: None - """ - self.prop('innerHTML', html) - - -class Locations(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - @property - def location(self): - """返回元素左上角的绝对坐标""" - cl = self.viewport_location - return self._get_page_coord(cl[0], cl[1]) - - @property - def midpoint(self): - """返回元素中间点的绝对坐标""" - cl = self.viewport_midpoint - return self._get_page_coord(cl[0], cl[1]) - - @property - def click_point(self): - """返回元素接受点击的点的绝对坐标""" - cl = self.viewport_click_point - return self._get_page_coord(cl[0], cl[1]) - - @property - def viewport_location(self): - """返回元素左上角在视口中的坐标""" - m = self._get_viewport_rect('border') - return int(m[0]), int(m[1]) - - @property - def viewport_midpoint(self): - """返回元素中间点在视口中的坐标""" - m = self._get_viewport_rect('border') - return int(m[0] + (m[2] - m[0]) // 2), int(m[3] + (m[5] - m[3]) // 2) - - @property - def viewport_click_point(self): - """返回元素接受点击的点视口坐标""" - m = self._get_viewport_rect('padding') - return int(self.viewport_midpoint[0]), int(m[1]) + 1 - - @property - def screen_location(self): - """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" - vx, vy = self._ele.page.rect.viewport_location - ex, ey = self.viewport_location - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) - - @property - def screen_midpoint(self): - """返回元素中点在屏幕上坐标,左上角为(0, 0)""" - vx, vy = self._ele.page.rect.viewport_location - ex, ey = self.viewport_midpoint - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) - - @property - def screen_click_point(self): - """返回元素中点在屏幕上坐标,左上角为(0, 0)""" - vx, vy = self._ele.page.rect.viewport_location - ex, ey = self.viewport_click_point - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) - - def _get_viewport_rect(self, quad): - """按照类型返回在可视窗口中的范围 - :param quad: 方框类型,margin border padding - :return: 四个角坐标,大小为0时返回None - """ - return self._ele.page.run_cdp('DOM.getBoxModel', backendNodeId=self._ele.ids.backend_id)['model'][quad] - - def _get_page_coord(self, x, y): - """根据视口坐标获取绝对坐标""" - # js = 'return document.documentElement.scrollLeft+" "+document.documentElement.scrollTop;' - # xy = self._ele.run_js(js) - # sx, sy = xy.split(' ') - r = self._ele.page.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport'] - sx = r['pageX'] - sy = r['pageY'] - return x + sx, y + sy - - -class Click(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - def __call__(self, by_js=False, timeout=1): - """点击元素 - 如果遇到遮挡,可选择是否用js点击 - :param by_js: 是否用js点击,为None时先用模拟点击,遇到遮挡改用js,为True时直接用js点击,为False时只用模拟点击 - :param timeout: 模拟点击的超时时间,等待元素可见、不被遮挡、进入视口 - :return: 是否点击成功 - """ - return self.left(by_js, timeout) - - def left(self, by_js=False, timeout=1): - """点击元素,可选择是否用js点击 - :param by_js: 是否用js点击,为None时先用模拟点击,遇到遮挡改用js,为True时直接用js点击,为False时只用模拟点击 - :param timeout: 模拟点击的超时时间,等待元素可见、不被遮挡、进入视口 - :return: 是否点击成功 - """ - if not by_js: - try: - self._ele.scroll.to_see() - can_click = False - - timeout = self._ele.page.timeout if timeout is None else timeout - if timeout == 0: - if self._ele.states.is_in_viewport and self._ele.states.is_enabled and self._ele.states.is_displayed: - can_click = True - else: - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._ele.states.is_in_viewport and self._ele.states.is_enabled and self._ele.states.is_displayed: - can_click = True - break - - if not self._ele.states.is_in_viewport: - by_js = True - - elif can_click and (by_js is False or not self._ele.states.is_covered): - client_x, client_y = self._ele.locations.viewport_midpoint if self._ele.tag == 'input' \ - else self._ele.locations.viewport_click_point - self._click(client_x, client_y) - return True - - except NoRectError: - by_js = True - - if by_js is not False: - self._ele.run_js('this.click();') - return True - - if Settings.raise_click_failed: - raise CanNotClickError - return False - - def right(self): - """右键单击""" - self._ele.page.scroll.to_see(self._ele) - x, y = self._ele.locations.viewport_click_point - self._click(x, y, 'right') - - def middle(self): - """中键单击""" - self._ele.page.scroll.to_see(self._ele) - x, y = self._ele.locations.viewport_click_point - self._click(x, y, 'middle') - - def at(self, offset_x=None, offset_y=None, button='left', count=1): - """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中间点 - :param offset_x: 相对元素左上角坐标的x轴偏移量 - :param offset_y: 相对元素左上角坐标的y轴偏移量 - :param button: 点击哪个键,可选 left, middle, right, back, forward - :param count: 点击次数 - :return: None - """ - self._ele.page.scroll.to_see(self._ele) - if offset_x is None and offset_y is None: - w, h = self._ele.size - offset_x = w // 2 - offset_y = h // 2 - x, y = offset_scroll(self._ele, offset_x, offset_y) - self._click(x, y, button, count) - - def twice(self): - """双击元素""" - self.at(count=2) - - def _click(self, client_x, client_y, button='left', count=1): - """实施点击 - :param client_x: 视口中的x坐标 - :param client_y: 视口中的y坐标 - :param button: 'left' 'right' 'middle' 'back' 'forward' - :param count: 点击次数 - :return: None - """ - self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mousePressed', - x=client_x, y=client_y, button=button, clickCount=count) - # sleep(.05) - self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mouseReleased', - x=client_x, y=client_y, button=button) - - -class ChromiumScroll(object): - """用于滚动的对象""" - - def __init__(self, ele): - """ - :param ele: 元素对象 - """ - self._driver = ele - self.t1 = self.t2 = 'this' - self._wait_complete = False - - def _run_js(self, js): - js = js.format(self.t1, self.t2, self.t2) - self._driver.run_js(js) - self._wait_scrolled() - - def to_top(self): - """滚动到顶端,水平位置不变""" - self._run_js('{}.scrollTo({}.scrollLeft, 0);') - - def to_bottom(self): - """滚动到底端,水平位置不变""" - self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight);') - - def to_half(self): - """滚动到垂直中间位置,水平位置不变""" - self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight/2);') - - def to_rightmost(self): - """滚动到最右边,垂直位置不变""" - self._run_js('{}.scrollTo({}.scrollWidth, {}.scrollTop);') - - def to_leftmost(self): - """滚动到最左边,垂直位置不变""" - self._run_js('{}.scrollTo(0, {}.scrollTop);') - - def to_location(self, x, y): - """滚动到指定位置 - :param x: 水平距离 - :param y: 垂直距离 - :return: None - """ - self._run_js(f'{{}}.scrollTo({x}, {y});') - - def up(self, pixel=300): - """向上滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self._run_js(f'{{}}.scrollBy(0, {pixel});') - - def down(self, pixel=300): - """向下滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self._run_js(f'{{}}.scrollBy(0, {pixel});') - - def left(self, pixel=300): - """向左滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self._run_js(f'{{}}.scrollBy({pixel}, 0);') - - def right(self, pixel=300): - """向右滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self._run_js(f'{{}}.scrollBy({pixel}, 0);') - - def _wait_scrolled(self): - if not self._wait_complete: - return - - page = self._driver.page if isinstance(self._driver, ChromiumElement) else self._driver - r = page.run_cdp('Page.getLayoutMetrics') - x = r['layoutViewport']['pageX'] - y = r['layoutViewport']['pageY'] - - while True: - sleep(.1) - r = page.run_cdp('Page.getLayoutMetrics') - x1 = r['layoutViewport']['pageX'] - y1 = r['layoutViewport']['pageY'] - - if x == x1 and y == y1: - break - - x = x1 - y = y1 - - -class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center=None): - """滚动页面直到元素可见 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 - :return: None - """ - self._driver.page.scroll.to_see(self._driver, center=center) - - -class ChromiumSelect(object): - """ChromiumSelect 类专门用于处理 d 模式下 select 标签""" - - def __init__(self, ele): - """ - :param ele: select 元素对象 - """ - if ele.tag != 'select': - raise TypeError("select方法只能在<select>元素使用。") - - self._ele = ele - - def __call__(self, text_or_index, timeout=None): - """选定下拉列表中子元素 - :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - para_type = 'index' if isinstance(text_or_index, int) else 'text' - timeout = timeout if timeout is not None else self._ele.page.timeout - return self._select(text_or_index, para_type, timeout=timeout) - - @property - def is_multi(self): - """返回是否多选表单""" - return self._ele.attr('multiple') is not None - - @property - def options(self): - """返回所有选项元素组成的列表""" - return [e for e in self._ele.eles('xpath://option') if isinstance(e, ChromiumElement)] - - @property - def selected_option(self): - """返回第一个被选中的option元素 - :return: ChromiumElement对象或None - """ - ele = self._ele.run_js('return this.options[this.selectedIndex];') - return ele - - @property - def selected_options(self): - """返回所有被选中的option元素列表 - :return: ChromiumElement对象组成的列表 - """ - return [x for x in self.options if x.states.is_selected] - - def all(self): - """全选""" - if not self.is_multi: - raise TypeError("只能在多选菜单执行此操作。") - return self._by_loc('tag:option', 1, False) - - def invert(self): - """反选""" - if not self.is_multi: - raise TypeError("只能对多项选框执行反选。") - change = False - for i in self.options: - change = True - mode = 'false' if i.states.is_selected else 'true' - i.run_js(f'this.selected={mode};') - if change: - self._dispatch_change() - - def clear(self): - """清除所有已选项""" - if not self.is_multi: - raise TypeError("只能在多选菜单执行此操作。") - return self._by_loc('tag:option', 1, True) - - def by_text(self, text, timeout=None): - """此方法用于根据text值选择项。当元素是多选列表时,可以接收list或tuple - :param text: text属性值,传入list或tuple可选择多项 - :param timeout: 超时时间,为None默认使用页面超时时间 - :return: 是否选择成功 - """ - return self._select(text, 'text', False, timeout) - - def by_value(self, value, timeout=None): - """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可选择多项 - :param timeout: 超时时间,为None默认使用页面超时时间 - :return: 是否选择成功 - """ - return self._select(value, 'value', False, timeout) - - def by_index(self, index, timeout=None): - """此方法用于根据index值选择项。当元素是多选列表时,可以接收list或tuple - :param index: 序号,0开始,传入list或tuple可选择多项 - :param timeout: 超时时间,为None默认使用页面超时时间 - :return: 是否选择成功 - """ - return self._select(index, 'index', False, timeout) - - def by_loc(self, loc, timeout=None): - """用定位符选择指定的项 - :param loc: 定位符 - :param timeout: 超时时间 - :return: 是否选择成功 - """ - return self._by_loc(loc, timeout) - - def cancel_by_text(self, text, timeout=None): - """此方法用于根据text值取消选择项。当元素是多选列表时,可以接收list或tuple - :param text: 文本,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: 是否取消成功 - """ - return self._select(text, 'text', True, timeout) - - def cancel_by_value(self, value, timeout=None): - """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: 是否取消成功 - """ - return self._select(value, 'value', True, timeout) - - def cancel_by_index(self, index, timeout=None): - """此方法用于根据index值取消选择项。当元素是多选列表时,可以接收list或tuple - :param index: 序号,0开始,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: 是否取消成功 - """ - return self._select(index, 'index', True, timeout) - - def cancel_by_loc(self, loc, timeout=None): - """用定位符取消选择指定的项 - :param loc: 定位符 - :param timeout: 超时时间 - :return: 是否选择成功 - """ - return self._by_loc(loc, timeout, True) - - def _by_loc(self, loc, timeout=None, cancel=False): - """用定位符取消选择指定的项 - :param loc: 定位符 - :param timeout: 超时时间 - :param cancel: 是否取消选择 - :return: 是否选择成功 - """ - eles = self._ele.eles(loc, timeout) - if not eles: - return False - - mode = 'false' if cancel else 'true' - if self.is_multi: - for ele in eles: - ele.run_js(f'this.selected={mode};') - self._dispatch_change() - return True - - eles[0].run_js(f'this.selected={mode};') - self._dispatch_change() - return True - - def _select(self, condition, para_type='text', cancel=False, timeout=None): - """选定或取消选定下拉列表中子元素 - :param condition: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选 'text'、'value'、'index' - :param cancel: 是否取消选择 - :return: 是否选择成功 - """ - if not self.is_multi and isinstance(condition, (list, tuple)): - raise TypeError('单选列表只能传入str格式。') - - mode = 'false' if cancel else 'true' - timeout = timeout if timeout is not None else self._ele.page.timeout - condition = {condition} if isinstance(condition, (str, int)) else set(condition) - - if para_type in ('text', 'value'): - return self._text_value(condition, para_type, mode, timeout) - elif para_type == 'index': - return self._index(condition, mode, timeout) - - def _text_value(self, condition, para_type, mode, timeout): - """执行text和value搜索 - :param condition: 条件set - :param para_type: 参数类型,可选 'text'、'value' - :param mode: 'true' 或 'false' - :param timeout: 超时时间 - :return: 是否选择成功 - """ - ok = False - text_len = len(condition) - eles = [] - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if para_type == 'text': - eles = [i for i in self.options if i.text in condition] - elif para_type == 'value': - eles = [i for i in self.options if i.attr('value') in condition] - - if len(eles) >= text_len: - ok = True - break - - if ok: - for i in eles: - i.run_js(f'this.selected={mode};') - - self._dispatch_change() - return True - - return False - - def _index(self, condition, mode, timeout): - """执行index搜索 - :param condition: 条件set - :param mode: 'true' 或 'false' - :param timeout: 超时时间 - :return: 是否选择成功 - """ - ok = False - condition = [int(i) for i in condition] - text_len = max(condition) - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if len(self.options) >= text_len: - ok = True - break - - if ok: - eles = self.options - for i in condition: - eles[i - 1].run_js(f'this.selected={mode};') - - self._dispatch_change() - return True - - return False - - def _dispatch_change(self): - """触发修改动作""" - self._ele.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') - - -class ChromiumElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" - - def __init__(self, page, ele): - """等待元素在dom中某种状态,如删除、显示、隐藏 - :param page: 元素所在页面 - :param ele: 要等待的元素 - """ - self._page = page - self._ele = ele - - def delete(self, timeout=None): - """等待元素从dom删除 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_alive', False, timeout) - - def display(self, timeout=None): - """等待元素从dom显示 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', True, timeout) - - def hidden(self, timeout=None): - """等待元素从dom隐藏 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', False, timeout) - - def covered(self, timeout=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', True, timeout) - - def not_covered(self, timeout=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', False, timeout) - - def enabled(self, timeout=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', True, timeout) - - def disabled(self, timeout=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', False, timeout) - - def disabled_or_delete(self, timeout=None): - """等待当前元素变成不可用或从DOM移除 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if not self._ele.states.is_enabled or not self._ele.states.is_alive: - return True - sleep(.05) - - return False - - def _wait_state(self, attr, mode=False, timeout=None): - """等待元素某个bool状态到达指定状态 - :param attr: 状态名称 - :param mode: True或False - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._ele.states.__getattribute__(attr) == mode: - return True - sleep(.05) - - return False - - -class Pseudo(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - @property - def before(self): - """返回当前元素的::before伪元素内容""" - return self._ele.style('content', 'before') - - @property - def after(self): - """返回当前元素的::after伪元素内容""" - return self._ele.style('content', 'after') diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi deleted file mode 100644 index 37de21f..0000000 --- a/DrissionPage/chromium_element.pyi +++ /dev/null @@ -1,591 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union, Tuple, List, Any - -from .base import DrissionElement, BaseElement -from .chromium_base import ChromiumBase -from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage -from .commons.constants import NoneElement -from .session_element import SessionElement -from .web_page import WebPage - - -class ChromiumElement(DrissionElement): - - def __init__(self, - page: ChromiumBase, - node_id: str = None, obj_id: str = None, backend_id: str = None): - self._tag: str = ... - self.page: Union[ChromiumPage, WebPage] = ... - self._node_id: str = ... - self._obj_id: str = ... - self._backend_id: str = ... - self._doc_id: str = ... - self._ids: ChromiumElementIds = ... - self._scroll: ChromiumElementScroll = ... - self._click: Click = ... - self._select: ChromiumSelect = ... - self._wait: ChromiumElementWaiter = ... - self._locations: Locations = ... - self._set: ChromiumElementSetter = ... - self._states: ChromiumElementStates = ... - self._pseudo: Pseudo = ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[ChromiumElement, str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - # -----------------d模式独有属性------------------- - @property - def ids(self) -> ChromiumElementIds: ... - - @property - def size(self) -> Tuple[int, int]: ... - - @property - def set(self) -> ChromiumElementSetter: ... - - @property - def states(self) -> ChromiumElementStates: ... - - @property - def location(self) -> Tuple[int, int]: ... - - @property - def locations(self) -> Locations: ... - - @property - def pseudo(self) -> Pseudo: ... - - @property - def shadow_root(self) -> Union[None, ChromiumShadowRoot]: ... - - @property - def sr(self) -> Union[None, ChromiumShadowRoot]: ... - - @property - def scroll(self) -> ChromiumElementScroll: ... - - @property - def click(self) -> Click: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... - - def child(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = 0, - ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - - def prev(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = 0, - ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - - def next(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = 0, - ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - - def before(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - - def after(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - - def children(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def prevs(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - @property - def wait(self) -> ChromiumElementWaiter: ... - - @property - def select(self) -> ChromiumSelect: ... - - def attr(self, attr: str) -> Union[str, None]: ... - - def remove_attr(self, attr: str) -> None: ... - - def prop(self, prop: str) -> Union[str, int, None]: ... - - def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - - def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[ChromiumElement, str]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, str]]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, NoneElement]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[Union[SessionElement, str]]: ... - - def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, relative: bool = False, raise_err: bool = False) \ - -> Union[ChromiumElement, ChromiumFrame, str, NoneElement, - List[Union[ChromiumElement, ChromiumFrame, str]]]: ... - - def style(self, style: str, pseudo_ele: str = '') -> str: ... - - def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ... - - def save(self, path: [str, bool] = None, rename: str = None, timeout: float = None) -> None: ... - - def get_screenshot(self, path: [str, Path] = None, as_bytes: [bool, str] = None, - as_base64: [bool, str] = None) -> Union[str, bytes]: ... - - def input(self, vals: Any, clear: bool = True) -> None: ... - - def _set_file_input(self, files: Union[str, list, tuple]) -> None: ... - - def clear(self, by_js: bool = False) -> None: ... - - def _input_focus(self) -> None: ... - - def focus(self) -> None: ... - - def hover(self, offset_x: int = None, offset_y: int = None) -> None: ... - - def drag(self, offset_x: int = 0, offset_y: int = 0, duration: float = 0.5) -> None: ... - - def drag_to(self, ele_or_loc: Union[tuple, ChromiumElement], duration: float = 0.5) -> None: ... - - def _get_obj_id(self, node_id: str = None, backend_id: str = None) -> str: ... - - def _get_node_id(self, obj_id: str = None, backend_id: str = None) -> str: ... - - def _get_backend_id(self, node_id: str) -> str: ... - - def _get_ele_path(self, mode: str) -> str: ... - - -class ChromiumElementStates(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - @property - def is_selected(self) -> bool: ... - - @property - def is_checked(self) -> bool: ... - - @property - def is_displayed(self) -> bool: ... - - @property - def is_enabled(self) -> bool: ... - - @property - def is_alive(self) -> bool: ... - - @property - def is_in_viewport(self) -> bool: ... - - @property - def is_covered(self) -> bool: ... - - -class ChromiumShadowRoot(BaseElement): - - def __init__(self, - parent_ele: ChromiumElement, - obj_id: str = None, - backend_id: str = None): - self._obj_id: str = ... - self._ids: Ids = ... - self._node_id: str = ... - self._backend_id: str = ... - self.page: ChromiumPage = ... - self.parent_ele: ChromiumElement = ... - self._states: ShadowRootStates = ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> ChromiumElement: ... - - @property - def ids(self) -> Ids: ... - - @property - def states(self) -> ShadowRootStates: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - - def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - - def parent(self, level_or_loc: Union[str, int] = 1) -> ChromiumElement: ... - - def child(self, filter_loc: Union[tuple, str] = '', - index: int = 1) -> Union[ChromiumElement, str, None]: ... - - def next(self, filter_loc: Union[tuple, str] = '', - index: int = 1) -> Union[ChromiumElement, str, None]: ... - - def before(self, filter_loc: Union[tuple, str] = '', - index: int = 1) -> Union[ChromiumElement, str, None]: ... - - def after(self, filter_loc: Union[tuple, str] = '', - index: int = 1) -> Union[ChromiumElement, str, None]: ... - - def children(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[ChromiumElement]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, NoneElement]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, - single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement, - ChromiumFrame, str]]]: ... - - def _get_node_id(self, obj_id: str) -> str: ... - - def _get_obj_id(self, back_id: str) -> str: ... - - def _get_backend_id(self, node_id: str) -> str: ... - - -class Ids(object): - def __init__(self, ele: Union[ChromiumElement, ChromiumShadowRoot]): - self._ele: Union[ChromiumElement, ChromiumShadowRoot] = ... - - @property - def node_id(self) -> str: ... - - @property - def obj_id(self) -> str: ... - - @property - def backend_id(self) -> str: ... - - -class ChromiumElementIds(Ids): - @property - def doc_id(self) -> str: ... - - -def find_in_chromium_ele(ele: ChromiumElement, - loc: Union[str, Tuple[str, str]], - single: bool = True, - timeout: float = None, - relative: bool = True) \ - -> Union[ChromiumElement, str, NoneElement, List[Union[ChromiumElement, str]]]: ... - - -def find_by_xpath(ele: ChromiumElement, - xpath: str, - single: bool, - timeout: float, - relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... - - -def find_by_css(ele: ChromiumElement, - selector: str, - single: bool, - timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ... - - -def make_chromium_ele(page: ChromiumBase, node_id: str = ..., obj_id: str = ...) \ - -> Union[ChromiumElement, ChromiumFrame, str]: ... - - -def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... - - -def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ChromiumShadowRoot], script: str, - as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ... - - -def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ... - - -def convert_argument(arg: Any) -> dict: ... - - -def send_enter(ele: ChromiumElement) -> None: ... - - -def send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ... - - -class ChromiumElementSetter(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - def attr(self, attr: str, value: str) -> None: ... - - def prop(self, prop: str, value: str) -> None: ... - - def innerHTML(self, html: str) -> None: ... - - -class ShadowRootStates(object): - def __init__(self, ele: ChromiumShadowRoot): - """ - :param ele: ChromiumElement - """ - self._ele: ChromiumShadowRoot = ... - - @property - def is_enabled(self) -> bool: ... - - @property - def is_alive(self) -> bool: ... - - -class Locations(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - @property - def location(self) -> Tuple[int, int]: ... - - @property - def midpoint(self) -> Tuple[int, int]: ... - - @property - def click_point(self) -> Tuple[int, int]: ... - - @property - def viewport_location(self) -> Tuple[int, int]: ... - - @property - def viewport_midpoint(self) -> Tuple[int, int]: ... - - @property - def viewport_click_point(self) -> Tuple[int, int]: ... - - @property - def screen_location(self) -> Tuple[int, int]: ... - - @property - def screen_midpoint(self) -> Tuple[int, int]: ... - - @property - def screen_click_point(self) -> Tuple[int, int]: ... - - def _get_viewport_rect(self, quad: str) -> Union[list, None]: ... - - def _get_page_coord(self, x: int, y: int) -> Tuple[int, int]: ... - - -class Click(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - def __call__(self, by_js: Union[None, bool] = False, timeout: float = 1) -> bool: ... - - def left(self, by_js: Union[None, bool] = False, timeout: float = 1) -> bool: ... - - def right(self) -> None: ... - - def middle(self) -> None: ... - - def at(self, offset_x: int = None, offset_y: int = None, button: str = 'left', count: int = 1) -> None: ... - - def twice(self, by_js: bool = False) -> None: ... - - def _click(self, client_x: int, client_y: int, button: str = 'left', count: int = 1) -> None: ... - - -class ChromiumScroll(object): - def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement, ChromiumFrame]): - self.t1: str = ... - self.t2: str = ... - self._driver: Union[ChromiumPage, ChromiumElement, ChromiumFrame] = ... - self._wait_complete: bool = ... - - def _run_js(self, js: str): ... - - def to_top(self) -> None: ... - - def to_bottom(self) -> None: ... - - def to_half(self) -> None: ... - - def to_rightmost(self) -> None: ... - - def to_leftmost(self) -> None: ... - - def to_location(self, x: int, y: int) -> None: ... - - def up(self, pixel: int = 300) -> None: ... - - def down(self, pixel: int = 300) -> None: ... - - def left(self, pixel: int = 300) -> None: ... - - def right(self, pixel: int = 300) -> None: ... - - def _wait_scrolled(self) -> None: ... - - -class ChromiumElementScroll(ChromiumScroll): - - def to_see(self, center: Union[bool, None] = None) -> None: ... - - -class ChromiumSelect(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - @property - def is_multi(self) -> bool: ... - - @property - def options(self) -> List[ChromiumElement]: ... - - @property - def selected_option(self) -> Union[ChromiumElement, None]: ... - - @property - def selected_options(self) -> List[ChromiumElement]: ... - - def clear(self) -> None: ... - - def all(self) -> None: ... - - def by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ... - - def by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ... - - def cancel_by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def cancel_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def cancel_by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ... - - def cancel_by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ... - - def invert(self) -> None: ... - - def _by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None, cancel: bool = False) -> bool: ... - - def _select(self, - condition: Union[str, int, list, tuple] = None, - para_type: str = 'text', - cancel: bool = False, - timeout: float = None) -> bool: ... - - def _text_value(self, condition: set, para_type: str, mode: str, timeout: float) -> bool: ... - - def _index(self, condition: set, mode: str, timeout: float) -> bool: ... - - def _dispatch_change(self) -> None: ... - - -class ChromiumElementWaiter(object): - def __init__(self, - page: ChromiumBase, - ele: ChromiumElement): - self._ele: ChromiumElement = ... - self._page: ChromiumBase = ... - - def delete(self, timeout: float = None) -> bool: ... - - def display(self, timeout: float = None) -> bool: ... - - def hidden(self, timeout: float = None) -> bool: ... - - def covered(self, timeout: float = None) -> bool: ... - - def not_covered(self, timeout: float = None) -> bool: ... - - def enabled(self, timeout: float = None) -> bool: ... - - def disabled(self, timeout: float = None) -> bool: ... - - def disabled_or_delete(self, timeout: float = None) -> bool: ... - - def _wait_state(self, attr: str, mode: bool = False, timeout: float = None) -> bool: ... - - -class Pseudo(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - @property - def before(self) -> str: ... - - @property - def after(self) -> str: ... diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi deleted file mode 100644 index d8232f7..0000000 --- a/DrissionPage/chromium_frame.pyi +++ /dev/null @@ -1,215 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union, Tuple, List, Any - -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter -from .chromium_element import ChromiumElement, Locations, ChromiumElementStates, ChromiumElementWaiter - - -class ChromiumFrame(ChromiumBase): - - def __init__(self, page: ChromiumBase, ele: ChromiumElement): - self.page: ChromiumBase = ... - self.frame_id: str = ... - self._frame_ele: ChromiumElement = ... - self._backend_id: str = ... - self._doc_ele: ChromiumElement = ... - self._is_diff_domain: bool = ... - self.doc_ele: ChromiumElement = ... - self._states: ChromiumElementStates = ... - self._ids: ChromiumFrameIds = ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[ChromiumElement, str]: ... - - def _check_alive(self) -> None: ... - - def __repr__(self) -> str: ... - - def _runtime_settings(self) -> None: ... - - def _driver_init(self, tab_id: str) -> None: ... - - def _reload(self) -> None: ... - - def _check_ok(self) -> None: ... - - def _get_new_document(self) -> None: ... - - def _onFrameAttached(self, **kwargs): ... - - def _onFrameDetached(self, **kwargs): ... - - @property - def ids(self) -> ChromiumFrameIds: ... - - @property - def frame_ele(self) -> ChromiumElement: ... - - @property - def tag(self) -> str: ... - - @property - def url(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def title(self) -> str: ... - - @property - def cookies(self) -> dict: ... - - @property - def attrs(self) -> dict: ... - - @property - def frame_size(self) -> Tuple[int, int]: ... - - @property - def size(self) -> Tuple[int, int]: ... - - @property - def active_ele(self) -> ChromiumElement: ... - - @property - def location(self) -> Tuple[int, int]: ... - - @property - def locations(self) -> Locations: ... - - @property - def xpath(self) -> str: ... - - @property - def css_path(self) -> str: ... - - @property - def ready_state(self) -> str: ... - - @property - def is_alive(self) -> bool: ... - - @property - def scroll(self) -> ChromiumFrameScroll: ... - - @property - def set(self) -> ChromiumFrameSetter: ... - - @property - def states(self) -> ChromiumElementStates: ... - - @property - def wait(self) -> FrameWaiter: ... - - def refresh(self) -> None: ... - - def attr(self, attr: str) -> Union[str, None]: ... - - def remove_attr(self, attr: str) -> None: ... - - def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... - - def prev(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = 0, - ele_only: bool = True) -> Union[ChromiumElement, str]: ... - - def next(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = 0, - ele_only: bool = True) -> Union[ChromiumElement, str]: ... - - def before(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union[ChromiumElement, str]: ... - - def after(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union[ChromiumElement, str]: ... - - def prevs(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '', - timeout: float = 0, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... - - def get_screenshot(self, path: [str, Path] = None, - as_bytes: [bool, str] = None, - as_base64: [bool, str] = None) -> Union[str, bytes]: ... - - def _get_screenshot(self, path: [str, Path] = None, - as_bytes: [bool, str] = None, as_base64: [bool, str] = None, - full_page: bool = False, - left_top: Tuple[int, int] = None, - right_bottom: Tuple[int, int] = None, - ele: ChromiumElement = None) -> Union[str, bytes]: ... - - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ - -> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ... - - def _d_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - show_errmsg: bool = False, - timeout: float = None) -> Union[bool, None]: ... - - def _is_inner_frame(self) -> bool: ... - - -class ChromiumFrameIds(object): - def __init__(self, frame: ChromiumFrame): - self._frame: ChromiumFrame = ... - - @property - def tab_id(self) -> str: ... - - @property - def backend_id(self) -> str: ... - - @property - def obj_id(self) -> str: ... - - @property - def node_id(self) -> str: ... - - -class ChromiumFrameScroll(ChromiumPageScroll): - def __init__(self, frame: ChromiumFrame) -> None: ... - - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ... - - -class ChromiumFrameSetter(ChromiumBaseSetter): - _page: ChromiumFrame = ... - - def attr(self, attr: str, value: str) -> None: ... - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame: ChromiumFrame): ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py deleted file mode 100644 index c654f95..0000000 --- a/DrissionPage/chromium_page.py +++ /dev/null @@ -1,800 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from platform import system -from threading import Thread -from time import perf_counter, sleep -from warnings import warn - -from requests import Session - -from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBaseWaiter -from .chromium_driver import ChromiumDriver -from .chromium_tab import ChromiumTab -from .commons.browser import connect_browser -from .commons.web import set_session_cookies -from .configs.chromium_options import ChromiumOptions -from .errors import CallMethodError, BrowserConnectError -from .session_page import DownloadSetter - - -class ChromiumPage(ChromiumBase): - """用于管理浏览器的类""" - - def __init__(self, addr_driver_opts=None, tab_id=None, timeout=None): - """ - :param addr_driver_opts: 浏览器地址:端口、ChromiumDriver对象或ChromiumOptions对象 - :param tab_id: 要控制的标签页id,不指定默认为激活的 - :param timeout: 超时时间 - """ - self._download_set = None - self._download_path = None - super().__init__(addr_driver_opts, tab_id, timeout) - - def _set_start_options(self, addr_driver_opts, none): - """设置浏览器启动属性 - :param addr_driver_opts: 'ip:port'、ChromiumDriver、ChromiumOptions - :param none: 用于后代继承 - :return: None - """ - if not addr_driver_opts or str(type(addr_driver_opts)).endswith(("ChromiumOptions'>", "DriverOptions'>")): - self._driver_options = addr_driver_opts or ChromiumOptions(addr_driver_opts) - - # 接收浏览器地址和端口 - elif isinstance(addr_driver_opts, str): - self._driver_options = ChromiumOptions() - self._driver_options.debugger_address = addr_driver_opts - - # 接收传递过来的ChromiumDriver,浏览器 - elif isinstance(addr_driver_opts, ChromiumDriver): - self._driver_options = ChromiumOptions(read_file=False) - self._driver_options.debugger_address = addr_driver_opts.address - self._tab_obj = addr_driver_opts - - else: - raise TypeError('只能接收ChromiumDriver或ChromiumOptions类型参数。') - - self.address = self._driver_options.debugger_address.replace('localhost', - '127.0.0.1').lstrip('http://').lstrip('https://') - - def _set_runtime_settings(self): - """设置运行时用到的属性""" - self._timeouts = Timeout(self, - page_load=self._driver_options.timeouts['pageLoad'], - script=self._driver_options.timeouts['script'], - implicit=self._driver_options.timeouts['implicit']) - self._page_load_strategy = self._driver_options.page_load_strategy - self._download_path = self._driver_options.download_path - - def _connect_browser(self, tab_id=None): - """连接浏览器,在第一次时运行 - :param tab_id: 要控制的标签页id,不指定默认为激活的 - :return: None - """ - self._chromium_init() - - if not self._tab_obj: # 不是传入driver的情况 - connect_browser(self._driver_options) - if not tab_id: - json = self._control_session.get(f'http://{self.address}/json').json() - tab_id = [i['id'] for i in json if i['type'] == 'page'] - if not tab_id: - raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') - tab_id = tab_id[0] - - self._driver_init(tab_id) - - self._page_init() - self._get_document() - self._first_run = False - - def _page_init(self): - """页面相关设置""" - ws = self._control_session.get(f'http://{self.address}/json/version').json()['webSocketDebuggerUrl'] - self._browser_driver = ChromiumDriver(ws.split('/')[-1], 'browser', self.address) - self._browser_driver.start() - - self._alert = Alert() - self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open - self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close - - self._rect = None - self._main_tab = self.tab_id - try: - self.download_set.by_browser() - except CallMethodError: - pass - - self._process_id = None - r = self.browser_driver.SystemInfo.getProcessInfo() - if 'processInfo' not in r: - return None - for i in r['processInfo']: - if i['type'] == 'browser': - self._process_id = i['id'] - break - - @property - def browser_driver(self): - """返回用于控制浏览器cdp的driver""" - return self._browser_driver - - @property - def tabs_count(self): - """返回标签页数量""" - return len(self.tabs) - - @property - def tabs(self): - """返回所有标签页id组成的列表""" - j = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp - return [i['id'] for i in j if i['type'] == 'page' and not i['url'].startswith('devtools://') and i[ - 'url'] != 'chrome://privacy-sandbox-dialog/notice'] - - @property - def main_tab(self): - return self._main_tab - - @property - def latest_tab(self): - """返回最新的标签页id,最新标签页指最后创建或最后被激活的""" - return self.tabs[0] - - @property - def process_id(self): - """返回浏览器进程id""" - return self._process_id - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = ChromiumPageSetter(self) - return self._set - - @property - def download_path(self): - """返回默认下载路径""" - p = self._download_path or '' - return str(Path(p).absolute()) - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = ChromiumDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit - - @property - def rect(self): - if self._rect is None: - self._rect = ChromiumTabRect(self) - return self._rect - - @property - def wait(self): - """返回用于等待的对象""" - if self._wait is None: - self._wait = ChromiumPageWaiter(self) - return self._wait - - def get_tab(self, tab_id=None): - """获取一个标签页对象 - :param tab_id: 要获取的标签页id,为None时获取当前tab - :return: 标签页对象 - """ - tab_id = tab_id or self.tab_id - return ChromiumTab(self, tab_id) - - def find_tabs(self, title=None, url=None, tab_type=None, single=True): - """查找符合条件的tab,返回它们的id组成的列表 - :param title: 要匹配title的文本 - :param url: 要匹配url的文本 - :param tab_type: tab类型,可用列表输入多个 - :param single: 是否返回首个结果的id,为False返回所有信息 - :return: tab id或tab dict - """ - tabs = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp - if isinstance(tab_type, str): - tab_type = {tab_type} - elif isinstance(tab_type, (list, tuple, set)): - tab_type = set(tab_type) - elif tab_type is not None: - raise TypeError('tab_type只能是set、list、tuple、str、None。') - - r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) - and (tab_type is None or i['type'] in tab_type))] - return r[0]['id'] if r and single else r - - def new_tab(self, url=None, switch_to=False): - """新建一个标签页,该标签页在最后面 - :param url: 新标签页跳转到的网址 - :param switch_to: 新建标签页后是否把焦点移过去 - :return: 新标签页的id - """ - if switch_to: - begin_tabs = set(self.tabs) - len_tabs = len(begin_tabs) - tid = self.run_cdp('Target.createTarget', url='')['targetId'] - - tabs = self.tabs - while len(tabs) == len_tabs: - tabs = self.tabs - sleep(.005) - - new_tab = set(tabs) - begin_tabs - self._to_tab(new_tab.pop(), read_doc=False) - if url: - self.get(url) - - elif url: - tid = self.run_cdp('Target.createTarget', url=url)['targetId'] - - else: - tid = self.run_cdp('Target.createTarget', url='')['targetId'] - - return tid - - def to_main_tab(self): - """跳转到主标签页""" - self.to_tab(self._main_tab) - - def to_tab(self, tab_or_id=None, activate=True): - """跳转到标签页 - :param tab_or_id: 标签页对象或id,默认跳转到main_tab - :param activate: 切换后是否变为活动状态 - :return: None - """ - self._to_tab(tab_or_id, activate) - - def _to_tab(self, tab_or_id=None, activate=True, read_doc=True): - """跳转到标签页 - :param tab_or_id: 标签页对象或id,默认跳转到main_tab - :param activate: 切换后是否变为活动状态 - :param read_doc: 切换后是否读取文档 - :return: None - """ - tabs = self.tabs - if not tab_or_id: - tab_id = self._main_tab - elif isinstance(tab_or_id, ChromiumTab): - tab_id = tab_or_id.tab_id - else: - tab_id = tab_or_id - - if tab_id not in tabs: - tab_id = self.latest_tab - - if activate: - self._control_session.get(f'http://{self.address}/json/activate/{tab_id}') - - if tab_id == self.tab_id: - return - - self.driver.stop() - self._driver_init(tab_id) - if read_doc and self.ready_state in ('complete', None): - self._get_document() - - def close_tabs(self, tabs_or_ids=None, others=False): - """关闭传入的标签页,默认关闭当前页。可传入多个 - :param tabs_or_ids: 要关闭的标签页对象或id,可传入列表或元组,为None时关闭当前页 - :param others: 是否关闭指定标签页之外的 - :return: None - """ - all_tabs = set(self.tabs) - if isinstance(tabs_or_ids, str): - tabs = {tabs_or_ids} - elif isinstance(tabs_or_ids, ChromiumTab): - tabs = {tabs_or_ids.tab_id} - elif tabs_or_ids is None: - tabs = {self.tab_id} - elif isinstance(tabs_or_ids, (list, tuple)): - tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids) - else: - raise TypeError('tabs_or_ids参数只能传入标签页对象或id。') - - if others: - tabs = all_tabs - tabs - - end_len = len(all_tabs) - len(tabs) - if end_len <= 0: - self.quit() - return - - if self.tab_id in tabs: - self.driver.stop() - - for tab in tabs: - self._control_session.get(f'http://{self.address}/json/close/{tab}') - while len(self.tabs) != end_len: - sleep(.1) - - if self._main_tab in tabs: - self._main_tab = self.tabs[0] - - self.to_tab() - - def close_other_tabs(self, tabs_or_ids=None): - """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 - :param tabs_or_ids: 要保留的标签页对象或id,可传入列表或元组,为None时保存当前页 - :return: None - """ - self.close_tabs(tabs_or_ids, True) - - def handle_alert(self, accept=True, send=None, timeout=None): - """处理提示框,可以自动等待提示框出现 - :param accept: True表示确认,False表示取消,其它值不会按按钮但依然返回文本值 - :param send: 处理prompt提示框时可输入文本 - :param timeout: 等待提示框出现的超时时间,为None则使用self.timeout属性的值 - :return: 提示框内容文本,未等到提示框则返回False - """ - timeout = self.timeout if timeout is None else timeout - timeout = .1 if timeout <= 0 else timeout - end_time = perf_counter() + timeout - while not self._alert.activated and perf_counter() < end_time: - sleep(.1) - if not self._alert.activated: - return False - - res_text = self._alert.text - if self._alert.type == 'prompt': - self.driver.Page.handleJavaScriptDialog(accept=accept, promptText=send) - else: - self.driver.Page.handleJavaScriptDialog(accept=accept) - return res_text - - def quit(self): - """关闭浏览器""" - self._tab_obj.Browser.close() - self._tab_obj.stop() - - if self.process_id: - from os import popen - from platform import system - txt = f'tasklist | findstr {self.process_id}' if system().lower() == 'windows' \ - else f'ps -ef | grep {self.process_id}' - while True: - p = popen(txt) - if f' {self.process_id} ' not in p.read(): - break - - def _on_alert_close(self, **kwargs): - """alert关闭时触发的方法""" - self._alert.activated = False - self._alert.text = None - self._alert.type = None - self._alert.defaultPrompt = None - self._alert.response_accept = kwargs.get('result') - self._alert.response_text = kwargs['userInput'] - self._tab_obj.has_alert = False - - def _on_alert_open(self, **kwargs): - """alert出现时触发的方法""" - self._alert.activated = True - self._alert.text = kwargs['message'] - self._alert.type = kwargs['message'] - self._alert.defaultPrompt = kwargs.get('defaultPrompt', None) - self._alert.response_accept = None - self._alert.response_text = None - self._tab_obj.has_alert = True - - -class ChromiumPageWaiter(ChromiumBaseWaiter): - def __init__(self, page: ChromiumBase): - super().__init__(page) - self._listener = None - - def download_begin(self, timeout=None): - """等待浏览器下载开始 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 - """ - return self._driver.download_set.wait_download_begin(timeout) - - def new_tab(self, timeout=None): - """等待新标签页出现 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 - """ - timeout = timeout if timeout is not None else self._driver.timeout - end_time = perf_counter() + timeout - while self._driver.tab_id == self._driver.latest_tab and perf_counter() < end_time: - sleep(.01) - - -class ChromiumTabRect(object): - def __init__(self, page): - self._page = page - - @property - def window_state(self): - """返回窗口状态:normal、fullscreen、maximized、 minimized""" - return self._get_browser_rect()['windowState'] - - @property - def browser_location(self): - """返回浏览器在屏幕上的坐标,左上角为(0, 0)""" - r = self._get_browser_rect() - if r['windowState'] in ('maximized', 'fullscreen'): - return 0, 0 - return r['left'] + 7, r['top'] - - @property - def page_location(self): - """返回页面左上角在屏幕中坐标,左上角为(0, 0)""" - w, h = self.viewport_location - r = self._get_page_rect()['layoutViewport'] - return w - r['pageX'], h - r['pageY'] - - @property - def viewport_location(self): - """返回视口在屏幕中坐标,左上角为(0, 0)""" - w_bl, h_bl = self.browser_location - w_bs, h_bs = self.browser_size - w_vs, h_vs = self.viewport_size_with_scrollbar - return w_bl + w_bs - w_vs, h_bl + h_bs - h_vs - - @property - def browser_size(self): - """返回浏览器大小""" - r = self._get_browser_rect() - if r['windowState'] == 'fullscreen': - return r['width'], r['height'] - elif r['windowState'] == 'maximized': - return r['width'] - 16, r['height'] - 16 - else: - return r['width'] - 16, r['height'] - 7 - - @property - def page_size(self): - """返回页面总宽高,格式:(宽, 高)""" - r = self._get_page_rect()['contentSize'] - return r['width'], r['height'] - - @property - def viewport_size(self): - """返回视口宽高,不包括滚动条,格式:(宽, 高)""" - r = self._get_page_rect()['visualViewport'] - return r['clientWidth'], r['clientHeight'] - - @property - def viewport_size_with_scrollbar(self): - """返回视口宽高,包括滚动条,格式:(宽, 高)""" - r = self._page.run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();') - w, h = r.split(' ') - return int(w), int(h) - - def _get_page_rect(self): - """获取页面范围信息""" - return self._page.run_cdp_loaded('Page.getLayoutMetrics') - - def _get_browser_rect(self): - """获取浏览器范围信息""" - return self._page.browser_driver.Browser.getWindowForTarget(targetId=self._page.tab_id)['bounds'] - - -class ChromiumDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page): - """ - :param page: ChromiumPage对象 - """ - super().__init__(page) - self._behavior = 'allow' - self._download_th = None - self._session = None - self._waiting_download = False - self._download_begin = False - - @property - def session(self): - """返回用于DownloadKit的Session对象""" - if self._session is None: - self._session = Session() - return self._session - - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - self._cookies_to_session() - return self.DownloadKit - - def save_path(self, path): - """设置下载路径 - :param path: 下载路径 - :return: None - """ - path = path or '' - path = Path(path).absolute() - path.mkdir(parents=True, exist_ok=True) - path = str(path) - self._page._download_path = path - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=path, - eventsEnabled=True) - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path) - - self.DownloadKit.goal_path = path - - def by_browser(self): - """设置使用浏览器下载文件""" - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser - except CallMethodError: - self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) - self._page.driver.Page.downloadWillBegin = self._download_by_browser - - self._behavior = 'allow' - - def by_DownloadKit(self): - """设置使用DownloadKit下载文件""" - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit - except CallMethodError: - raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') - self._behavior = 'deny' - - def wait_download_begin(self, timeout=None): - """等待浏览器下载开始 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 - """ - self._waiting_download = True - result = False - timeout = timeout if timeout is not None else self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._download_begin: - result = True - break - sleep(.05) - self._download_begin = False - self._waiting_download = False - return result - - def _cookies_to_session(self): - """把driver对象的cookies复制到session对象""" - ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": ua}) - set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False)) - - def _download_by_DownloadKit(self, **kwargs): - """拦截浏览器下载并用downloadKit下载""" - url = kwargs['url'] - if url.startswith('blob:'): - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - sleep(2) - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - - else: - self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid']) - self._page.download.add(file_url=url, goal_path=self._page.download_path, - rename=kwargs['suggestedFilename']) - if self._download_th is None or not self._download_th.is_alive(): - self._download_th = Thread(target=self._wait_download_complete, daemon=False) - self._download_th.start() - - if self._waiting_download: - self._download_begin = True - - def _download_by_browser(self, **kwargs): - """使用浏览器下载时调用""" - if self._waiting_download: - self._download_begin = True - - def _wait_download_complete(self): - """等待下载完成""" - self._page.download.wait() - - -class Alert(object): - """用于保存alert信息的类""" - - def __init__(self): - self.activated = False - self.text = None - self.type = None - self.defaultPrompt = None - self.response_accept = None - self.response_text = None - - -class WindowSetter(object): - """用于设置窗口大小的类""" - - def __init__(self, page): - """ - :param page: 页面对象 - """ - self._page = page - self._window_id = self._get_info()['windowId'] - - def maximized(self): - """窗口最大化""" - s = self._get_info()['bounds']['windowState'] - if s in ('fullscreen', 'minimized'): - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'maximized'}) - - def minimized(self): - """窗口最小化""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'minimized'}) - - def fullscreen(self): - """设置窗口为全屏""" - s = self._get_info()['bounds']['windowState'] - if s == 'minimized': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'fullscreen'}) - - def normal(self): - """设置窗口为常规模式""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'normal'}) - - def size(self, width=None, height=None): - """设置窗口大小 - :param width: 窗口宽度 - :param height: 窗口高度 - :return: None - """ - if width or height: - s = self._get_info()['bounds']['windowState'] - if s != 'normal': - self._perform({'windowState': 'normal'}) - info = self._get_info()['bounds'] - width = width - 16 if width else info['width'] - height = height + 7 if height else info['height'] - self._perform({'width': width, 'height': height}) - - def location(self, x=None, y=None): - """设置窗口在屏幕中的位置,相对左上角坐标 - :param x: 距离顶部距离 - :param y: 距离左边距离 - :return: None - """ - if x is not None or y is not None: - self.normal() - info = self._get_info()['bounds'] - x = x if x is not None else info['left'] - y = y if y is not None else info['top'] - self._perform({'left': x - 8, 'top': y}) - - def hide(self): - """隐藏浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=True) - - def show(self): - """显示浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=False) - - def _get_info(self): - """获取窗口位置及大小信息""" - return self._page.run_cdp('Browser.getWindowForTarget') - - def _perform(self, bounds): - """执行改变窗口大小操作 - :param bounds: 控制数据 - :return: None - """ - self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) - - -class ChromiumPageSetter(ChromiumBaseSetter): - def main_tab(self, tab_id=None): - """设置主tab - :param tab_id: 标签页id,不传入则设置当前tab - :return: None - """ - self._page._main_tab = tab_id or self._page.tab_id - - @property - def window(self): - """返回用于设置浏览器窗口的对象""" - return WindowSetter(self._page) - - def tab_to_front(self, tab_or_id=None): - """激活标签页使其处于最前面 - :param tab_or_id: 标签页对象或id,为None表示当前标签页 - :return: None - """ - if not tab_or_id: - tab_or_id = self._page.tab_id - elif isinstance(tab_or_id, ChromiumTab): - tab_or_id = tab_or_id.tab_id - self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') - - -def show_or_hide_browser(page, hide=True): - """执行显示或隐藏浏览器窗口 - :param page: ChromePage对象 - :param hide: 是否隐藏 - :return: None - """ - if not page.address.startswith(('127.0.0.1', 'localhost')): - return - - if system().lower() != 'windows': - raise OSError('该方法只能在Windows系统使用。') - - try: - from win32gui import ShowWindow - from win32con import SW_HIDE, SW_SHOW - except ImportError: - raise ImportError('请先安装:pip install pypiwin32') - - pid = page.process_id - if not pid: - return None - hds = get_chrome_hwnds_from_pid(pid, page.title) - sw = SW_HIDE if hide else SW_SHOW - for hd in hds: - ShowWindow(hd, sw) - - -def get_browser_progress_id(progress, address): - """获取浏览器进程id - :param progress: 已知的进程对象,没有时传入None - :param address: 浏览器管理地址,含端口 - :return: 进程id或None - """ - if progress: - return progress.pid - - from os import popen - port = address.split(':')[-1] - txt = '' - progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') - for progress in progresses: - if 'LISTENING' in progress: - txt = progress - break - if not txt: - return None - - return txt.split(' ')[-1] - - -def get_chrome_hwnds_from_pid(pid, title): - """通过PID查询句柄ID - :param pid: 进程id - :param title: 窗口标题 - :return: 进程句柄组成的列表 - """ - try: - from win32gui import IsWindow, GetWindowText, EnumWindows - from win32process import GetWindowThreadProcessId - except ImportError: - raise ImportError('请先安装win32gui,pip install pypiwin32') - - def callback(hwnd, hds): - if IsWindow(hwnd) and title in GetWindowText(hwnd): - _, found_pid = GetWindowThreadProcessId(hwnd) - if str(found_pid) == str(pid): - hds.append(hwnd) - return True - - hwnds = [] - EnumWindows(callback, hwnds) - return hwnds diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi deleted file mode 100644 index 5fde86a..0000000 --- a/DrissionPage/chromium_page.pyi +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from os import popen -from pathlib import Path -from threading import Thread -from typing import Union, Tuple, List - -from DownloadKit import DownloadKit -from requests import Session - -from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter, NetworkListener -from .chromium_driver import ChromiumDriver -from .chromium_tab import ChromiumTab -from .configs.chromium_options import ChromiumOptions -from .configs.driver_options import DriverOptions -from .session_page import DownloadSetter - - -class ChromiumPage(ChromiumBase): - - def __init__(self, - addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver, DriverOptions] = None, - tab_id: str = None, - timeout: float = None): - self._driver_options: [ChromiumDriver, DriverOptions] = ... - self._process_id: str = ... - self._window_setter: WindowSetter = ... - self._main_tab: str = ... - self._alert: Alert = ... - self._download_path: str = ... - self._download_set: ChromiumDownloadSetter = ... - self._browser_driver: ChromiumDriver = ... - self._rect: ChromiumTabRect = ... - - def _connect_browser(self, - addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, - tab_id: str = None) -> None: ... - - def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions], none) -> None: ... - - def _page_init(self) -> None: ... - - @property - def browser_driver(self) -> ChromiumDriver: ... - - @property - def tabs_count(self) -> int: ... - - @property - def tabs(self) -> List[str]: ... - - @property - def rect(self) -> ChromiumTabRect: ... - - @property - def wait(self) -> ChromiumPageWaiter: ... - - @property - def main_tab(self) -> str: ... - - @property - def latest_tab(self) -> str: ... - - @property - def process_id(self) -> Union[None, int]: ... - - @property - def set(self) -> ChromiumPageSetter: ... - - @property - def download_set(self) -> ChromiumDownloadSetter: ... - - @property - def download(self) -> DownloadKit: ... - - @property - def download_path(self) -> str: ... - - def get_tab(self, tab_id: str = None) -> ChromiumTab: ... - - def find_tabs(self, title: str = None, url: str = None, - tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ... - - def new_tab(self, url: str = None, switch_to: bool = False) -> str: ... - - def to_main_tab(self) -> None: ... - - def to_tab(self, tab_or_id: Union[str, ChromiumTab] = None, activate: bool = True) -> None: ... - - def _to_tab(self, tab_or_id: Union[str, ChromiumTab] = None, activate: bool = True, - read_doc: bool = True) -> None: ... - - def close_tabs(self, tabs_or_ids: Union[ - str, ChromiumTab, List[Union[str, ChromiumTab]], Tuple[Union[str, ChromiumTab]]] = None, - others: bool = False) -> None: ... - - def close_other_tabs(self, tabs_or_ids: Union[ - str, ChromiumTab, List[Union[str, ChromiumTab]], Tuple[Union[str, ChromiumTab]]] = None) -> None: ... - - def handle_alert(self, accept: bool = True, send: str = None, timeout: float = None) -> Union[str, False]: ... - - def quit(self) -> None: ... - - def _on_alert_close(self, **kwargs): ... - - def _on_alert_open(self, **kwargs): ... - - -class ChromiumPageWaiter(ChromiumBaseWaiter): - _driver: ChromiumPage = ... - _listener: Union[NetworkListener, None] = ... - - def download_begin(self, timeout: float = None) -> bool: ... - - def new_tab(self, timeout: float = None) -> bool: ... - - -class ChromiumTabRect(object): - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - - @property - def window_state(self) -> str: ... - - @property - def browser_location(self) -> Tuple[int, int]: ... - - @property - def page_location(self) -> Tuple[int, int]: ... - - @property - def viewport_location(self) -> Tuple[int, int]: ... - - @property - def browser_size(self) -> Tuple[int, int]: ... - - @property - def page_size(self) -> Tuple[int, int]: ... - - @property - def viewport_size(self) -> Tuple[int, int]: ... - - @property - def viewport_size_with_scrollbar(self) -> Tuple[int, int]: ... - - def _get_page_rect(self) -> dict: ... - - def _get_browser_rect(self) -> dict: ... - - -class ChromiumDownloadSetter(DownloadSetter): - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - self._behavior: str = ... - self._download_th: Thread = ... - self._session: Session = None - self._waiting_download: bool = ... - self._download_begin: bool = ... - - @property - def session(self) -> Session: ... - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... - - def save_path(self, path: Union[str, Path]) -> None: ... - - def by_browser(self) -> None: ... - - def by_DownloadKit(self) -> None: ... - - def wait_download_begin(self, timeout: float = None) -> bool: ... - - def _cookies_to_session(self) -> None: ... - - def _download_by_DownloadKit(self, **kwargs) -> None: ... - - def _download_by_browser(self, **kwargs) -> None: ... - - def _wait_download_complete(self) -> None: ... - - -class Alert(object): - - def __init__(self): - self.activated: bool = ... - self.text: str = ... - self.type: str = ... - self.defaultPrompt: str = ... - self.response_accept: str = ... - self.response_text: str = ... - - -class WindowSetter(object): - - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - self._window_id: str = ... - - def maximized(self) -> None: ... - - def minimized(self) -> None: ... - - def fullscreen(self) -> None: ... - - def normal(self) -> None: ... - - def size(self, width: int = None, height: int = None) -> None: ... - - def location(self, x: int = None, y: int = None) -> None: ... - - def hide(self) -> None: ... - - def show(self) -> None: ... - - def _get_info(self) -> dict: ... - - def _perform(self, bounds: dict) -> None: ... - - -def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... - - -def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... - - -def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... - - -class ChromiumPageSetter(ChromiumBaseSetter): - _page: ChromiumPage = ... - - def main_tab(self, tab_id: str = None) -> None: ... - - @property - def window(self) -> WindowSetter: ... - - def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... diff --git a/DrissionPage/common.py b/DrissionPage/common.py index f1225cc..df2b6aa 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -1,14 +1,15 @@ # -*- coding:utf-8 -*- """ -@Author : g1879 -@Contact : g1879@qq.com -实用工具 +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. """ -from FlowViewer import Listener, RequestMan +from ._elements.session_element import make_session_ele +from ._functions.by import By +from ._functions.keys import Keys +from ._functions.settings import Settings +from ._functions.tools import wait_until, configs_to_here +from ._units.actions import Actions -from .session_element import make_session_ele - -from .action_chains import ActionChains -from .commons.keys import Keys -from .commons.by import By -from .commons.constants import Settings +__all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here'] diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py deleted file mode 100644 index 9a7cbec..0000000 --- a/DrissionPage/commons/browser.py +++ /dev/null @@ -1,235 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from json import load, dump, JSONDecodeError -from pathlib import Path -from platform import system -from subprocess import Popen, DEVNULL -from tempfile import gettempdir -from time import perf_counter, sleep - -from requests import get as requests_get - -from .tools import port_is_using -from ..configs.chromium_options import ChromiumOptions -from ..errors import BrowserConnectError - - -def connect_browser(option): - """连接或启动浏览器 - :param option: DriverOptions对象 - :return: chrome 路径和进程对象组成的元组 - """ - debugger_address = option.debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') - chrome_path = option.browser_path - - ip, port = debugger_address.split(':') - if ip != '127.0.0.1': - test_connect(ip, port) - return None, None - - if port_is_using(ip, port): - test_connect(ip, port) - return None, None - - args = get_launch_args(option) - set_prefs(option) - - # ----------创建浏览器进程---------- - try: - debugger = _run_browser(port, chrome_path, args) - - # 传入的路径找不到,主动在ini文件、注册表、系统变量中找 - except FileNotFoundError: - from ..easy_set import get_chrome_path - chrome_path = get_chrome_path(show_msg=False) - - if not chrome_path: - raise FileNotFoundError('无法找到chrome路径,请手动配置。') - - debugger = _run_browser(port, chrome_path, args) - - test_connect(ip, port) - return chrome_path, debugger - - -def get_launch_args(opt): - """从DriverOptions获取命令行启动参数 - :param opt: DriverOptions或ChromiumOptions - :return: 启动参数列表 - """ - # ----------处理arguments----------- - result = set() - has_user_path = False - headless = False - for i in opt.arguments: - if i.startswith(('--load-extension=', '--remote-debugging-port=')): - continue - elif i.startswith('--user-data-dir') and not opt.system_user_path: - result.add(f'--user-data-dir={Path(i[16:]).absolute()}') - has_user_path = True - continue - elif i.startswith('--headless'): - headless = True - - result.add(i) - - if not has_user_path and not opt.system_user_path: - port = opt.debugger_address.split(':')[-1] if opt.debugger_address else '0' - path = Path(gettempdir()) / 'DrissionPage' / f'userData_{port}' - path.mkdir(parents=True, exist_ok=True) - result.add(f'--user-data-dir={path}') - - if not headless and system().lower() == 'linux': - from os import popen - r = popen('systemctl list-units | grep graphical.target') - if 'graphical.target' not in r.read(): - result.add('--headless=new') - - result = list(result) - - # ----------处理插件extensions------------- - ext = opt.extensions if isinstance(opt, ChromiumOptions) else opt._extension_files - if ext: - ext = ','.join(set(ext)) - ext = f'--load-extension={ext}' - result.append(ext) - - return result - - -def set_prefs(opt): - """处理启动配置中的prefs项,目前只能对已存在文件夹配置 - :param opt: DriverOptions或ChromiumOptions - :return: None - """ - if isinstance(opt, ChromiumOptions): - prefs = opt.preferences - del_list = opt._prefs_to_del - else: - prefs = opt.experimental_options.get('prefs', []) - del_list = [] - - if not opt.user_data_path: - return - - args = opt.arguments - user = 'Default' - for arg in args: - if arg.startswith('--profile-directory'): - user = arg.split('=')[-1].strip() - break - - prefs_file = Path(opt.user_data_path) / user / 'Preferences' - - if not prefs_file.exists(): - prefs_file.parent.mkdir(parents=True, exist_ok=True) - with open(prefs_file, 'w') as f: - f.write('{}') - - with open(prefs_file, "r", encoding='utf-8') as f: - try: - prefs_dict = load(f) - except JSONDecodeError: - prefs_dict = {} - - for pref in prefs: - value = prefs[pref] - pref = pref.split('.') - _make_leave_in_dict(prefs_dict, pref, 0, len(pref)) - _set_value_to_dict(prefs_dict, pref, value) - - for pref in del_list: - _remove_arg_from_dict(prefs_dict, pref) - - with open(prefs_file, 'w', encoding='utf-8') as f: - dump(prefs_dict, f) - - -def test_connect(ip, port): - """测试浏览器是否可用 - :param ip: 浏览器ip - :param port: 浏览器端口 - :return: None - """ - end_time = perf_counter() + 30 - while perf_counter() < end_time: - try: - tabs = requests_get(f'http://{ip}:{port}/json', timeout=10).json() - for tab in tabs: - if tab['type'] == 'page': - return - except Exception: - sleep(.2) - - if ip in ('127.0.0.1', 'localhost'): - raise BrowserConnectError(f'\n连接浏览器失败,可能原因:\n1、浏览器未启动\n2、{port}端口不是Chromium内核浏览器\n' - f'3、该浏览器未允许控制\n4、和已打开的浏览器冲突\n' - f'请尝试用ChromiumOptions指定别的端口和指定浏览器路径') - raise BrowserConnectError(f'{ip}:{port}浏览器无法链接。') - - -def _run_browser(port, path: str, args) -> Popen: - """创建chrome进程 - :param port: 端口号 - :param path: 浏览器路径 - :param args: 启动参数 - :return: 进程对象 - """ - p = Path(path) - p = str(p / 'chrome') if p.is_dir() else str(path) - arguments = [p, f'--remote-debugging-port={port}'] - arguments.extend(args) - try: - return Popen(arguments, shell=False, stdout=DEVNULL, stderr=DEVNULL) - except FileNotFoundError: - raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。') - - -def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None: - """把prefs中a.b.c形式的属性转为a['b']['c']形式 - :param target_dict: 要处理的字典 - :param src: 属性层级列表[a, b, c] - :param num: 当前处理第几个 - :param end: src长度 - :return: None - """ - if num == end: - return - if src[num] not in target_dict: - target_dict[src[num]] = {} - num += 1 - _make_leave_in_dict(target_dict[src[num - 1]], src, num, end) - - -def _set_value_to_dict(target_dict: dict, src: list, value) -> None: - """把a.b.c形式的属性的值赋值到a['b']['c']形式的字典中 - :param target_dict: 要处理的字典 - :param src: 属性层级列表[a, b, c] - :param value: 属性值 - :return: None - """ - src = "']['".join(src) - src = f"target_dict['{src}']=value" - exec(src) - - -def _remove_arg_from_dict(target_dict: dict, arg: str) -> None: - """把a.b.c形式的属性从字典中删除 - :param target_dict: 要处理的字典 - :param arg: 层级属性,形式'a.b.c' - :return: None - """ - args = arg.split('.') - args = [f"['{i}']" for i in args] - src = ''.join(args) - src = f"target_dict{src}" - try: - exec(src) - src = ''.join(args[:-1]) - src = f"target_dict{src}.pop({args[-1][1:-1]})" - exec(src) - except: - pass diff --git a/DrissionPage/commons/browser.pyi b/DrissionPage/commons/browser.pyi deleted file mode 100644 index 2324000..0000000 --- a/DrissionPage/commons/browser.pyi +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union - -from DrissionPage.configs.chromium_options import ChromiumOptions -from DrissionPage.configs.driver_options import DriverOptions - - -def connect_browser(option: Union[ChromiumOptions, DriverOptions]) -> tuple: ... - - -def get_launch_args(opt: Union[ChromiumOptions, DriverOptions]) -> list: ... - - -def set_prefs(opt: Union[ChromiumOptions, DriverOptions]) -> None: ... diff --git a/DrissionPage/commons/constants.py b/DrissionPage/commons/constants.py deleted file mode 100644 index 612beb3..0000000 --- a/DrissionPage/commons/constants.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from ..errors import ElementNotFoundError - -HANDLE_ALERT_METHOD = 'Page.handleJavaScriptDialog' -FRAME_ELEMENT = ('iframe', 'frame') -ERROR = 'error' - - -class Settings(object): - raise_ele_not_found = False - raise_click_failed = False - - -class NoneElement(object): - _instance = None - - def __new__(cls): - if not cls._instance: - cls._instance = super(NoneElement, cls).__new__(cls) - return cls._instance - - def __call__(self, *args, **kwargs): - raise ElementNotFoundError - - def __getattr__(self, item): - raise ElementNotFoundError - - def __eq__(self, other): - if other is None: - return True - - def __bool__(self): - return False - - def __repr__(self): - return 'None' diff --git a/DrissionPage/commons/locator.pyi b/DrissionPage/commons/locator.pyi deleted file mode 100644 index 1038890..0000000 --- a/DrissionPage/commons/locator.pyi +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union - - -def get_loc(loc: Union[tuple, str], translate_css: bool = False) -> tuple: ... - - -def str_to_loc(loc: str) -> tuple: ... - - -def translate_loc(loc: tuple) -> tuple: ... diff --git a/DrissionPage/commons/tools.py b/DrissionPage/commons/tools.py deleted file mode 100644 index 1a70f15..0000000 --- a/DrissionPage/commons/tools.py +++ /dev/null @@ -1,152 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from re import search, sub -from shutil import rmtree -from zipfile import ZipFile - - -def get_exe_from_port(port): - """获取端口号第一条进程的可执行文件路径 - :param port: 端口号 - :return: 可执行文件的绝对路径 - """ - from os import popen - - pid = get_pid_from_port(port) - if not pid: - return - else: - file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n') - return file_lst[2].strip() if len(file_lst) > 2 else None - - -def get_pid_from_port(port): - """获取端口号第一条进程的pid - :param port: 端口号 - :return: 进程id - """ - from platform import system - if system().lower() != 'windows' or port is None: - return None - - from os import popen - from time import perf_counter - - try: # 避免Anaconda中可能产生的报错 - process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] - - t = perf_counter() - while not process and perf_counter() - t < 5: - process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] - - return process.split(' ')[-1] or None - - except Exception: - return None - - -def get_usable_path(path): - """检查文件或文件夹是否有重名,并返回可以使用的路径 - :param path: 文件或文件夹路径 - :return: 可用的路径,Path对象 - """ - path = Path(path) - parent = path.parent - path = parent / make_valid_name(path.name) - name = path.stem if path.is_file() else path.name - ext = path.suffix if path.is_file() else '' - - first_time = True - - while path.exists(): - r = search(r'(.*)_(\d+)$', name) - - if not r or (r and first_time): - src_name, num = name, '1' - else: - src_name, num = r.group(1), int(r.group(2)) + 1 - - name = f'{src_name}_{num}' - path = parent / f'{name}{ext}' - first_time = None - - return path - - -def make_valid_name(full_name): - """获取有效的文件名 - :param full_name: 文件名 - :return: 可用的文件名 - """ - # ----------------去除前后空格---------------- - full_name = full_name.strip() - - # ----------------使总长度不大于255个字符(一个汉字是2个字符)---------------- - r = search(r'(.*)(\.[^.]+$)', full_name) # 拆分文件名和后缀名 - if r: - name, ext = r.group(1), r.group(2) - ext_long = len(ext) - else: - name, ext = full_name, '' - ext_long = 0 - - while get_long(name) > 255 - ext_long: - name = name[:-1] - - full_name = f'{name}{ext}' - - # ----------------去除不允许存在的字符---------------- - return sub(r'[<>/\\|:*?\n]', '', full_name) - - -def get_long(txt): - """返回字符串中字符个数(一个汉字是2个字符) - :param txt: 字符串 - :return: 字符个数 - """ - txt_len = len(txt) - return int((len(txt.encode('utf-8')) - txt_len) / 2 + txt_len) - - -def port_is_using(ip, port): - """检查端口是否被占用 - :param ip: 浏览器地址 - :param port: 浏览器端口 - :return: bool - """ - from socket import socket, AF_INET, SOCK_STREAM - s = socket(AF_INET, SOCK_STREAM) - s.settimeout(.1) - result = s.connect_ex((ip, int(port))) - s.close() - return result == 0 - - -def clean_folder(folder_path, ignore=None): - """清空一个文件夹,除了ignore里的文件和文件夹 - :param folder_path: 要清空的文件夹路径 - :param ignore: 忽略列表 - :return: None - """ - ignore = [] if not ignore else ignore - p = Path(folder_path) - - for f in p.iterdir(): - if f.name not in ignore: - if f.is_file(): - f.unlink() - elif f.is_dir(): - rmtree(f, True) - - -def unzip(zip_path, to_path): - """解压下载的chromedriver.zip文件""" - if not zip_path: - return - - with ZipFile(zip_path, 'r') as f: - return [f.extract(f.namelist()[0], path=to_path)] diff --git a/DrissionPage/commons/tools.pyi b/DrissionPage/commons/tools.pyi deleted file mode 100644 index a95722d..0000000 --- a/DrissionPage/commons/tools.pyi +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union - - -def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... - - -def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ... - - -def get_usable_path(path: Union[str, Path]) -> Path: ... - - -def make_valid_name(full_name: str) -> str: ... - - -def get_long(txt) -> int: ... - - -def port_is_using(ip: str, port: Union[str, int]) -> bool: ... - - -def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... - - -def unzip(zip_path: str, to_path: str) -> Union[list, None]: ... diff --git a/DrissionPage/commons/web.pyi b/DrissionPage/commons/web.pyi deleted file mode 100644 index 6e22253..0000000 --- a/DrissionPage/commons/web.pyi +++ /dev/null @@ -1,108 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from http.cookiejar import Cookie -from typing import Union - -from requests import Session -from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict - -from DrissionPage.base import DrissionElement, BasePage -from DrissionPage.chromium_element import ChromiumElement -from DrissionPage.chromium_base import ChromiumBase - - -class ResponseData(object): - - def __init__(self, request_id: str, response: dict, body: str, tab: str, target: str): - self.requestId: str = ... - self.response: CaseInsensitiveDict = ... - self.rawBody: str = ... - self._body: Union[str, dict, bytes] = ... - self._base64_body: bool = ... - self.tab: str = ... - self.target: str = ... - self.method: str = ... - self._postData: dict = ... - self._rawPostData: str = ... - self.url: str = ... - self.status: str = ... - self.statusText: str = ... - self.headersText: str = ... - self.mimeType: str = ... - self.requestHeadersText: str = ... - self.connectionReused: str = ... - self.connectionId: str = ... - self.remoteIPAddress: str = ... - self.remotePort: str = ... - self.fromDiskCache: str = ... - self.fromServiceWorker: str = ... - self.fromPrefetchCache: str = ... - self.encodedDataLength: str = ... - self.timing: str = ... - self.serviceWorkerResponseSource: str = ... - self.responseTime: str = ... - self.cacheStorageCacheName: str = ... - self.protocol: str = ... - self.securityState: str = ... - self.securityDetails: str = ... - - def __getattr__(self, item: str) -> Union[str, None]: ... - - def __getitem__(self, item: str) -> Union[str, None]: ... - - def __repr__(self) -> str: ... - - @property - def headers(self) -> Union[CaseInsensitiveDict, None]: ... - - @property - def requestHeaders(self) -> Union[CaseInsensitiveDict, None]: ... - - @requestHeaders.setter - def requestHeaders(self, val: dict) -> None: ... - - @property - def postData(self) -> Union[dict, str, None]: ... - - @postData.setter - def postData(self, val: Union[str, dict]) -> None: ... - - @property - def body(self) -> Union[str, dict, bytes]: ... - - -def get_ele_txt(e: DrissionElement) -> str: ... - - -def format_html(text: str) -> str: ... - - -def location_in_viewport(page: ChromiumBase, loc_x: int, loc_y: int) -> bool: ... - - -def offset_scroll(ele: ChromiumElement, offset_x: int, offset_y: int) -> tuple: ... - - -def make_absolute_link(link, page: BasePage = None) -> str: ... - - -def is_js_func(func: str) -> bool: ... - - -def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ... - - -def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ... - - -def set_session_cookies(session: Session, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - -def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - -def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ... diff --git a/DrissionPage/configs/chromium_options.pyi b/DrissionPage/configs/chromium_options.pyi deleted file mode 100644 index 68937fc..0000000 --- a/DrissionPage/configs/chromium_options.pyi +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union, Tuple, Any - - -class ChromiumOptions(object): - def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None): - self.ini_path: str = ... - self._driver_path: str = ... - self._user_data_path: str = ... - self._download_path: str = ... - self._arguments: list = ... - self._binary_location: str = ... - self._user: str = ... - self._page_load_strategy: str = ... - self._timeouts: dict = ... - self._proxy: str = ... - self._debugger_address: str = ... - self._extensions: list = ... - self._prefs: dict = ... - self._prefs_to_del: list = ... - self._auto_port: bool = ... - self._system_user_path: bool = ... - - @property - def download_path(self) -> str: ... - - @property - def browser_path(self) -> str: ... - - @property - def user_data_path(self) -> str: ... - - @property - def user(self) -> str: ... - - @property - def page_load_strategy(self) -> str: ... - - @property - def timeouts(self) -> dict: ... - - @property - def proxy(self) -> str: ... - - @property - def debugger_address(self) -> str: ... - - @property - def arguments(self) -> list: ... - - @debugger_address.setter - def debugger_address(self, address: str): ... - - @property - def extensions(self) -> list: ... - - @property - def preferences(self) -> dict: ... - - @property - def system_user_path(self) -> bool: ... - - def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ... - - def remove_argument(self, value: str) -> ChromiumOptions: ... - - def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ... - - def remove_extensions(self) -> ChromiumOptions: ... - - def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ... - - def remove_pref(self, arg: str) -> ChromiumOptions: ... - - def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ... - - def set_timeouts(self, implicit: float = None, pageLoad: float = None, - script: float = None) -> ChromiumOptions: ... - - def set_user(self, user: str = 'Default') -> ChromiumOptions: ... - - def set_headless(self, on_off: bool = True) -> ChromiumOptions: ... - - def set_no_imgs(self, on_off: bool = True) -> ChromiumOptions: ... - - def set_no_js(self, on_off: bool = True) -> ChromiumOptions: ... - - def set_mute(self, on_off: bool = True) -> ChromiumOptions: ... - - def set_user_agent(self, user_agent: str) -> ChromiumOptions: ... - - def set_proxy(self, proxy: str) -> ChromiumOptions: ... - - def set_page_load_strategy(self, value: str) -> ChromiumOptions: ... - - def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None, - debugger_address: str = None, download_path: Union[str, Path] = None, - user_data_path: Union[str, Path] = None, cache_path: Union[str, Path] = None) -> ChromiumOptions: ... - - def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: ... - - def auto_port(self, on_off: bool = True) -> ChromiumOptions: ... - - def save(self, path: Union[str, Path] = None) -> str: ... - - def save_to_default(self) -> str: ... - - -class PortFinder(object): - used_port: list = ... - - @staticmethod - def get_port() -> Tuple[int, str]: ... diff --git a/DrissionPage/configs/driver_options.py b/DrissionPage/configs/driver_options.py deleted file mode 100644 index ce30964..0000000 --- a/DrissionPage/configs/driver_options.py +++ /dev/null @@ -1,364 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path - -from selenium.webdriver.chrome.options import Options - -from .options_manage import OptionsManager - - -class DriverOptions(Options): - """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, - 增加了删除配置和保存到文件方法。 - """ - - def __init__(self, read_file=True, ini_path=None): - """初始化,默认从文件读取设置 - :param read_file: 是否从默认ini文件中读取配置信息 - :param ini_path: ini文件路径,为None则读取默认ini文件 - """ - super().__init__() - self._user_data_path = None - - if read_file: - self.ini_path = str(ini_path) if ini_path else str(Path(__file__).parent / 'configs.ini') - om = OptionsManager(self.ini_path) - options_dict = om.chrome_options - - self._driver_path = om.paths.get('chromedriver_path', None) - self._download_path = om.paths.get('download_path', None) - self._binary_location = options_dict.get('binary_location', '') - self._arguments = options_dict.get('arguments', []) - self._extensions = options_dict.get('extensions', []) - self._experimental_options = options_dict.get('experimental_options', {}) - self._debugger_address = options_dict.get('debugger_address', None) - self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') - self.system_user_path = options_dict.get('system_user_path', False) - - for arg in self._arguments: - if arg.startswith('--user-data-dir='): - self.set_paths(user_data_path=arg[16:]) - break - - self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) - return - - self._driver_path = None - self._download_path = None - self.ini_path = None - self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} - self._debugger_address = '127.0.0.1:9222' - self.system_user_path = False - - @property - def driver_path(self): - """chromedriver文件路径""" - return self._driver_path - - @property - def download_path(self): - """默认下载路径文件路径""" - return self._download_path - - @property - def chrome_path(self): - """浏览器启动文件路径""" - return self.browser_path - - @property - def browser_path(self): - """浏览器启动文件路径""" - return self.binary_location or 'chrome' - - @property - def user_data_path(self): - """返回用户文件夹路径""" - return self._user_data_path - - # -------------重写父类方法,实现链式操作------------- - def add_argument(self, argument): - """添加一个配置项 - :param argument: 配置项内容 - :return: 当前对象 - """ - super().add_argument(argument) - return self - - def set_capability(self, name, value): - """设置一个capability - :param name: capability名称 - :param value: capability值 - :return: 当前对象 - """ - super().set_capability(name, value) - return self - - def add_extension(self, extension): - """添加插件 - :param extension: crx文件路径 - :return: 当前对象 - """ - super().add_extension(extension) - return self - - def add_encoded_extension(self, extension): - """将带有扩展数据的 Base64 编码字符串添加到将用于将其提取到 ChromeDriver 的列表中 - :param extension: 带有扩展数据的 Base64 编码字符串 - :return: 当前对象 - """ - super().add_encoded_extension(extension) - return self - - def add_experimental_option(self, name, value): - """添加一个实验选项到浏览器 - :param name: 选项名称 - :param value: 选项值 - :return: 当前对象 - """ - super().add_experimental_option(name, value) - return self - - # -------------重写父类方法结束------------- - - def save(self, path=None): - """保存设置到文件 - :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 - :return: 保存文件的绝对路径 - """ - if path == 'default': - path = (Path(__file__).parent / 'configs.ini').absolute() - - elif path is None: - if self.ini_path: - path = Path(self.ini_path).absolute() - else: - path = (Path(__file__).parent / 'configs.ini').absolute() - - else: - path = Path(path).absolute() - - path = path / 'config.ini' if path.is_dir() else path - - if path.exists(): - om = OptionsManager(str(path)) - else: - om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) - - options = self.as_dict() - - for i in options: - if i == 'driver_path': - om.set_item('paths', 'chromedriver_path', options[i]) - elif i == 'download_path': - om.set_item('paths', 'download_path', options[i]) - else: - om.set_item('chrome_options', i, options[i]) - - path = str(path) - om.save(path) - - return path - - def save_to_default(self): - """保存当前配置到默认ini文件""" - return self.save('default') - - def remove_argument(self, value): - """移除一个argument项 - :param value: 设置项名,有值的设置项传入设置名称即可 - :return: 当前对象 - """ - del_list = [] - - for argument in self._arguments: - if argument.startswith(value): - del_list.append(argument) - - for del_arg in del_list: - self._arguments.remove(del_arg) - - return self - - def remove_experimental_option(self, key): - """移除一个实验设置,传入key值删除 - :param key: 实验设置的名称 - :return: 当前对象 - """ - if key in self._experimental_options: - self._experimental_options.pop(key) - - return self - - def remove_all_extensions(self): - """移除所有插件 - :return: 当前对象 - """ - # 因插件是以整个文件储存,难以移除其中一个,故如须设置则全部移除再重设 - self._extensions = [] - return self - - def set_argument(self, arg, value): - """设置浏览器配置的argument属性 - :param arg: 属性名 - :param value: 属性值,有值的属性传入值,没有的传入bool - :return: 当前对象 - """ - self.remove_argument(arg) - - if value: - arg_str = arg if isinstance(value, bool) else f'{arg}={value}' - self.add_argument(arg_str) - - return self - - def set_timeouts(self, implicit=None, pageLoad=None, script=None): - """设置超时时间,设置单位为秒,selenium4以上版本有效 - :param implicit: 查找元素超时时间 - :param pageLoad: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: 当前对象 - """ - if implicit is not None: - self.timeouts['implicit'] = implicit - if pageLoad is not None: - self.timeouts['pageLoad'] = pageLoad - if script is not None: - self.timeouts['script'] = script - - return self - - def set_headless(self, on_off=True): - """设置是否隐藏浏览器界面 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--headless', on_off) - - def set_no_imgs(self, on_off=True): - """设置是否加载图片 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--blink-settings=imagesEnabled=false', on_off) - - def set_no_js(self, on_off=True): - """设置是否禁用js - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--disable-javascript', on_off) - - def set_mute(self, on_off=True): - """设置是否静音 - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--mute-audio', on_off) - - def set_user_agent(self, user_agent): - """设置user agent - :param user_agent: user agent文本 - :return: 当前对象 - """ - return self.set_argument('--user-agent', user_agent) - - def set_proxy(self, proxy): - """设置代理 - :param proxy: 代理url和端口 - :return: 当前对象 - """ - return self.set_argument('--proxy-server', proxy) - - def set_page_load_strategy(self, value): - """设置page_load_strategy,可接收 'normal', 'eager', 'none' - selenium4以上版本才支持此功能 - normal:默认情况下使用, 等待所有资源下载完成 - eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 - none:完全不阻塞WebDriver - :param value: 可接收 'normal', 'eager', 'none' - :return: 当前对象 - """ - if value not in ('normal', 'eager', 'none'): - raise ValueError("只能选择'normal', 'eager', 'none'。") - self.page_load_strategy = value.lower() - return self - - def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None, - debugger_address=None, download_path=None, user_data_path=None, cache_path=None): - """快捷的路径设置函数 - :param driver_path: chromedriver.exe路径 - :param chrome_path: chrome.exe路径 - :param browser_path: 浏览器可执行文件路径 - :param local_port: 本地端口号 - :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 - :param download_path: 下载文件路径 - :param user_data_path: 用户数据路径 - :param cache_path: 缓存路径 - :return: 当前对象 - """ - if driver_path is not None: - self._driver_path = str(driver_path) - - if chrome_path is not None: - self.binary_location = str(chrome_path) - - if browser_path is not None: - self.binary_location = str(browser_path) - - if local_port is not None: - self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}' - - if debugger_address is not None: - self.debugger_address = debugger_address - - if download_path is not None: - self._download_path = str(download_path) - - if user_data_path is not None: - self.set_argument('--user-data-dir', str(user_data_path)) - self._user_data_path = user_data_path - - if cache_path is not None: - self.set_argument('--disk-cache-dir', str(cache_path)) - - return self - - def as_dict(self): - """已dict方式返回所有配置信息""" - return chrome_options_to_dict(self) - - -def chrome_options_to_dict(options): - """把chrome配置对象转换为字典 - :param options: chrome配置对象,字典或DriverOptions对象 - :return: 配置字典 - """ - if options in (False, None): - return DriverOptions(read_file=False).as_dict() - - if isinstance(options, dict): - return options - - re_dict = dict() - attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', - 'page_load_strategy', 'download_path'] - - options_dir = options.__dir__() - for attr in attrs: - try: - re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None - except Exception: - pass - - if 'timeouts' in options_dir and 'timeouts' in options._caps: - timeouts = options.__getattribute__('timeouts') - re_dict['timeouts'] = timeouts - - return re_dict diff --git a/DrissionPage/configs/driver_options.pyi b/DrissionPage/configs/driver_options.pyi deleted file mode 100644 index cb16b21..0000000 --- a/DrissionPage/configs/driver_options.pyi +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union, List - -from selenium.webdriver.chrome.options import Options - - -class DriverOptions(Options): - - def __init__(self, read_file: bool = True, ini_path: Union[str, Path] = None): - self.ini_path: str = ... - self._driver_path: str = ... - self._user_data_path: str = ... - self._download_path: str = ... - - @property - def driver_path(self) -> str: ... - - @property - def download_path(self) -> str: ... - - @property - def chrome_path(self) -> str: ... - - @property - def browser_path(self) -> str: ... - - @property - def user_data_path(self) -> str: ... - - # -------------重写父类方法,实现链式操作------------- - def add_argument(self, argument: str) -> DriverOptions: ... - - def set_capability(self, name: str, value: str) -> DriverOptions: ... - - def add_extension(self, extension: str) -> DriverOptions: ... - - def add_encoded_extension(self, extension: str) -> DriverOptions: ... - - def add_experimental_option(self, name: str, value: Union[str, int, dict, List[str]]) -> DriverOptions: ... - - # -------------重写父类方法结束------------- - - def save(self, path: str = None) -> str: ... - - def save_to_default(self) -> str: ... - - def remove_argument(self, value: str) -> DriverOptions: ... - - def remove_experimental_option(self, key: str) -> DriverOptions: ... - - def remove_all_extensions(self) -> DriverOptions: ... - - def set_argument(self, arg: str, value: Union[bool, str]) -> DriverOptions: ... - - def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> DriverOptions: ... - - def set_headless(self, on_off: bool = True) -> DriverOptions: ... - - def set_no_imgs(self, on_off: bool = True) -> DriverOptions: ... - - def set_no_js(self, on_off: bool = True) -> DriverOptions: ... - - def set_mute(self, on_off: bool = True) -> DriverOptions: ... - - def set_user_agent(self, user_agent: str) -> DriverOptions: ... - - def set_proxy(self, proxy: str) -> DriverOptions: ... - - def set_page_load_strategy(self, value: str) -> DriverOptions: ... - - def set_paths(self, - driver_path: Union[str, Path] = None, - chrome_path: Union[str, Path] = None, - browser_path: Union[str, Path] = None, - local_port: Union[int, str] = None, - debugger_address: str = None, - download_path: str = None, - user_data_path: str = None, - cache_path: str = None) -> DriverOptions: ... - - def as_dict(self) -> dict: ... - - -def chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: ... diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py deleted file mode 100644 index b8defe1..0000000 --- a/DrissionPage/easy_set.py +++ /dev/null @@ -1,430 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from os import popen -from pathlib import Path -from re import search -from typing import Union - -from .commons.constants import Settings -from .commons.tools import unzip -from .configs.chromium_options import ChromiumOptions -from .configs.options_manage import OptionsManager -from .session_page import SessionPage - -try: - from selenium import webdriver - from .mixpage.drission import Drission - from .configs.driver_options import DriverOptions -except ModuleNotFoundError: - pass - - -def raise_when_ele_not_found(on_off=True): - """设置全局变量,找不到元素时是否抛出异常 - :param on_off: True 或 False - :return: None - """ - Settings.raise_ele_not_found = on_off - - -def configs_to_here(save_name=None): - """把默认ini文件复制到当前目录 - :param save_name: 指定文件名,为None则命名为'dp_configs.ini' - :return: None - """ - om = OptionsManager('default') - save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini' - om.save(save_name) - - -def show_settings(ini_path=None): - """打印ini文件内容 - :param ini_path: ini文件路径 - :return: None - """ - OptionsManager(ini_path).show() - - -def set_paths(driver_path=None, - chrome_path=None, - browser_path=None, - local_port=None, - debugger_address=None, - download_path=None, - user_data_path=None, - cache_path=None, - ini_path=None, - check_version=False): - """快捷的路径设置函数 - :param driver_path: chromedriver.exe路径 - :param chrome_path: 浏览器可执行文件路径 - :param browser_path: 浏览器可执行文件路径 - :param local_port: 本地端口号 - :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 - :param download_path: 下载文件路径 - :param user_data_path: 用户数据路径 - :param cache_path: 缓存路径 - :param ini_path: 要修改的ini文件路径 - :param check_version: 是否检查chromedriver和chrome是否匹配 - :return: None - """ - om = OptionsManager(ini_path) - - def format_path(path: str) -> str: - return str(path) if path else '' - - if driver_path is not None: - om.set_item('paths', 'chromedriver_path', format_path(driver_path)) - - if chrome_path is not None: - om.set_item('chrome_options', 'binary_location', format_path(chrome_path)) - - if browser_path is not None: - om.set_item('chrome_options', 'binary_location', format_path(browser_path)) - - if local_port is not None: - om.set_item('chrome_options', 'debugger_address', f'127.0.0.1:{local_port}') - - if debugger_address is not None: - address = debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') - om.set_item('chrome_options', 'debugger_address', address) - - if download_path is not None: - om.set_item('paths', 'download_path', format_path(download_path)) - - om.save() - - if user_data_path is not None: - set_argument('--user-data-dir', format_path(user_data_path), ini_path) - - if cache_path is not None: - set_argument('--disk-cache-dir', format_path(cache_path), ini_path) - - if check_version: - check_driver_version(format_path(driver_path), format_path(browser_path)) - - -def use_auto_port(on_off=True, ini_path=None): - """设置启动浏览器时使用自动分配的端口和临时文件夹 - :param on_off: 是否开启自动端口 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off参数只能输入bool值。') - om = OptionsManager(ini_path) - om.set_item('chrome_options', 'auto_port', on_off) - om.save() - - -def use_system_user_path(on_off=True, ini_path=None): - """设置是否使用系统安装的浏览器默认用户文件夹 - :param on_off: 开或关 - :param ini_path: 要修改的ini文件路径 - :return: 当前对象 - """ - if not isinstance(on_off, bool): - raise TypeError('on_off参数只能输入bool值。') - om = OptionsManager(ini_path) - om.set_item('chrome_options', 'system_user_path', on_off) - om.save() - - -def set_argument(arg, value=None, ini_path=None): - """设置浏览器配置argument属性 - :param arg: 属性名 - :param value: 属性值,有值的属性传入值,没有的传入None - :param ini_path: 要修改的ini文件路径 - :return: None - """ - co = ChromiumOptions(ini_path=ini_path) - co.set_argument(arg, value) - co.save() - - -def set_headless(on_off=True, ini_path=None): - """设置是否隐藏浏览器界面 - :param on_off: 开或关 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - on_off = 'new' if on_off else False - set_argument('--headless', on_off, ini_path) - - -def set_no_imgs(on_off=True, ini_path=None): - """设置是否禁止加载图片 - :param on_off: 开或关 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - on_off = None if on_off else False - set_argument('--blink-settings=imagesEnabled=false', on_off, ini_path) - - -def set_no_js(on_off=True, ini_path=None): - """设置是否禁用js - :param on_off: 开或关 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - on_off = None if on_off else False - set_argument('--disable-javascript', on_off, ini_path) - - -def set_mute(on_off=True, ini_path=None): - """设置是否静音 - :param on_off: 开或关 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - on_off = None if on_off else False - set_argument('--mute-audio', on_off, ini_path) - - -def set_user_agent(user_agent, ini_path=None): - """设置user agent - :param user_agent: user agent文本 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - set_argument('--user-agent', user_agent, ini_path) - - -def set_proxy(proxy, ini_path=None): - """设置代理 - :param proxy: 代理网址和端口 - :param ini_path: 要修改的ini文件路径 - :return: None - """ - set_argument('--proxy-server', proxy, ini_path) - - -def check_driver_version(driver_path=None, chrome_path=None): - """检查传入的chrome和chromedriver是否匹配 - :param driver_path: chromedriver.exe路径 - :param chrome_path: chrome.exe路径 - :return: 是否匹配 - """ - print('正在检测可用性...') - om = OptionsManager() - driver_path = driver_path or om.get_value('paths', 'chromedriver_path') or 'chromedriver' - chrome_path = str(chrome_path or om.get_value('chrome_options', 'binary_location')) - do = DriverOptions(read_file=False) - do.add_argument('--headless') - - if chrome_path: - do.binary_location = chrome_path - - try: - driver = webdriver.Chrome(driver_path, options=do) - driver.quit() - print('版本匹配,可正常使用。') - - return True - - except Exception as e: - print(f'出现异常:\n{e}\n可执行easy_set.get_match_driver()自动下载匹配的版本。\n' - f'或自行从以下网址下载:http://npm.taobao.org/mirrors/chromedriver/') - - return False - - -# -------------------------自动识别chrome版本号并下载对应driver------------------------ -def get_match_driver(ini_path='default', - save_path=None, - chrome_path=None, - show_msg=True, - check_version=True): - """自动识别chrome版本并下载匹配的driver - :param ini_path: 要读取和修改的ini文件路径 - :param save_path: chromedriver保存路径 - :param chrome_path: 指定chrome.exe位置 - :param show_msg: 是否打印信息 - :param check_version: 是否检查版本匹配 - :return: None - """ - save_path = save_path or str(Path(__file__).parent) - - chrome_path = chrome_path or get_chrome_path(ini_path, show_msg) - chrome_path = Path(chrome_path).absolute() if chrome_path else None - if show_msg: - print('chrome.exe路径', chrome_path) - - ver = _get_chrome_version(str(chrome_path)) - if show_msg: - print('version', ver) - - zip_path = _download_driver(ver, save_path, show_msg=show_msg) - - if not zip_path and show_msg: - print('没有找到对应版本的driver。') - - try: - driver_path = unzip(zip_path, save_path)[0] - except TypeError: - driver_path = None - - if show_msg: - print('解压路径', driver_path) - - if driver_path: - Path(zip_path).unlink() - if ini_path: - set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False) - - if check_version: - if not check_driver_version(driver_path, chrome_path) and show_msg: - print('获取失败,请手动配置。') - else: - if show_msg: - print('获取失败,请手动配置。') - - return driver_path - - -def get_chrome_path(ini_path=None, - show_msg=True, - from_ini=True, - from_regedit=True, - from_system_path=True): - """从ini文件或系统变量中获取chrome.exe的路径 - :param ini_path: ini文件路径 - :param show_msg: 是否打印信息 - :param from_ini: 是否从ini文件获取 - :param from_regedit: 是否从注册表获取 - :param from_system_path: 是否从系统路径获取 - :return: chrome.exe路径 - """ - # -----------从ini文件中获取-------------- - if ini_path and from_ini: - try: - path = OptionsManager(ini_path).chrome_options['binary_location'] - except KeyError: - path = None - else: - path = None - - if path and Path(path).is_file(): - if show_msg: - print('ini文件中', end='') - return str(path) - - from platform import system - sys = system().lower() - if sys in ('macos', 'darwin'): - return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' - - elif sys == 'linux': - paths = ('/usr/bin/google-chrome', '/opt/google/chrome/google-chrome', - '/user/lib/chromium-browser/chromium-browser') - for p in paths: - if Path(p).exists(): - return p - return None - - elif sys != 'windows': - return None - - # -----------从注册表中获取-------------- - if from_regedit: - import winreg - try: - key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, - r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', - reserved=0, access=winreg.KEY_READ) - # key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Google\Chrome\BLBeacon\version', - # reserved=0, access=winreg.KEY_READ) - k = winreg.EnumValue(key, 0) - winreg.CloseKey(key) - - if show_msg: - print('注册表中', end='') - - return k[1] - - except FileNotFoundError: - pass - - # -----------从系统变量中获取-------------- - if from_system_path: - try: - paths = popen('set path').read().lower() - except: - return None - r = search(r'[^;]*chrome[^;]*', paths) - - if r: - path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe' - - if path.exists(): - if show_msg: - print('系统变量中', end='') - return str(path) - - paths = paths.split(';') - - for path in paths: - path = Path(path) / 'chrome.exe' - - try: - if path.exists(): - if show_msg: - print('系统变量中', end='') - return str(path) - except OSError: - pass - - -def _get_chrome_version(path: str) -> Union[str, None]: - """根据文件路径获取版本号 - :param path: chrome.exe文件路径 - :return: 版本号 - """ - if not path: - return - - path = str(path).replace('\\', '\\\\') - - try: - return (popen(f'wmic datafile where "name=\'{path}\'" get version').read() - .lower().split('\n')[2].replace(' ', '')) - except Exception: - return None - - -def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]: - """根据传入的版本号到镜像网站查找,下载最相近的 - :param version: 本地版本号 - :return: 保存地址 - """ - if not version: - return - - main_ver = version.split('.')[0] - remote_ver = None - - page = SessionPage(Drission().session) - page.get('https://registry.npmmirror.com/-/binary/chromedriver/') - - for version in page.json: - # 遍历所有版本,跳过大版本不一致的,如果有完全匹配的,获取url,如果没有,获取最后一个版本的url - if not version['name'].startswith(f'{main_ver}.'): - continue - - remote_ver = version['name'] - if version['name'] == f'{version}/': - break - - if remote_ver: - url = f'https://cdn.npmmirror.com/binaries/chromedriver/{remote_ver}chromedriver_win32.zip' - save_path = save_path or str(Path(__file__).parent) - result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg) - - if result[0]: - return result[1] - - return None diff --git a/DrissionPage/easy_set.pyi b/DrissionPage/easy_set.pyi deleted file mode 100644 index 7ea52ca..0000000 --- a/DrissionPage/easy_set.pyi +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Union - - -def raise_when_ele_not_found(on_off: bool = True) -> None: ... - - -def configs_to_here(file_name: Union[Path, str] = None) -> None: ... - - -def show_settings(ini_path: Union[str, Path] = None) -> None: ... - - -def set_paths(driver_path: Union[str, Path] = None, - chrome_path: Union[str, Path] = None, - browser_path: Union[str, Path] = None, - local_port: Union[int, str] = None, - debugger_address: str = None, - download_path: Union[str, Path] = None, - user_data_path: Union[str, Path] = None, - cache_path: Union[str, Path] = None, - ini_path: Union[str, Path] = None, - check_version: bool = False) -> None: ... - - -def use_auto_port(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def use_system_user_path(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def set_argument(arg: str, value: Union[bool, str] = None, ini_path: Union[str, Path] = None) -> None: ... - - -def set_headless(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def set_no_imgs(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def set_no_js(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def set_mute(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... - - -def set_user_agent(user_agent: str, ini_path: Union[str, Path] = None) -> None: ... - - -def set_proxy(proxy: str, ini_path: Union[str, Path] = None) -> None: ... - - -def check_driver_version(driver_path: Union[str, Path] = None, chrome_path: str = None) -> bool: ... - - -# -------------------------自动识别chrome版本号并下载对应driver------------------------ -def get_match_driver(ini_path: Union[str, None] = 'default', - save_path: str = None, - chrome_path: str = None, - show_msg: bool = True, - check_version: bool = True) -> Union[str, None]: ... - - -def get_chrome_path(ini_path: str = None, - show_msg: bool = True, - from_ini: bool = True, - from_regedit: bool = True, - from_system_path: bool = True, ) -> Union[str, None]: ... diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 3bcfca0..9ed94df 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -1,39 +1,54 @@ # -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved. +@License : BSD 3-Clause. +""" class BaseError(Exception): _info = None def __init__(self, ErrorInfo=None): - super().__init__(self) # 初始化父类 self._info = ErrorInfo or self._info def __str__(self): return self._info +class ElementNotFoundError(BaseError): + _info = '\n没有找到元素。' + + def __init__(self, ErrorInfo=None, method=None, arguments=None): + super().__init__(ErrorInfo=ErrorInfo) + self.method = method + self.arguments = arguments + + def __str__(self): + method = f'\nmethod: {self.method}' if self.method else '' + arguments = f'\nargs: {self.arguments}' if self.arguments else '' + return f'{self._info}{method}{arguments}' + + class AlertExistsError(BaseError): _info = '存在未处理的提示框。' -class ContextLossError(BaseError): +class ContextLostError(BaseError): _info = '页面被刷新,请操作前尝试等待页面刷新或加载完成。' -class ElementLossError(BaseError): - _info = '元素对象因刷新已失效。' +class ElementLostError(BaseError): + _info = '元素对象已失效。可能是页面整体刷新,或js局部刷新把元素替换或去除了。' -class CallMethodError(BaseError): +class CDPError(BaseError): _info = '方法调用错误。' -class TabClosedError(BaseError): - _info = '标签页已关闭。' - - -class ElementNotFoundError(BaseError): - _info = '没有找到元素。' +class PageDisconnectedError(BaseError): + _info = '与页面的连接已断开。' class JavaScriptError(BaseError): @@ -54,3 +69,23 @@ class NoResourceError(BaseError): class CanNotClickError(BaseError): _info = '该元素无法滚动到视口或被遮挡,无法点击。' + + +class GetDocumentError(BaseError): + _info = '获取文档失败。' + + +class WaitTimeoutError(BaseError): + _info = '等待失败。' + + +class WrongURLError(BaseError): + _info = '无效的url。' + + +class StorageError(BaseError): + _info = '无法操作当前存储数据。' + + +class CookieFormatError(BaseError): + _info = 'cookie格式不正确。' diff --git a/DrissionPage/session_element.pyi b/DrissionPage/session_element.pyi deleted file mode 100644 index 4d455e1..0000000 --- a/DrissionPage/session_element.pyi +++ /dev/null @@ -1,131 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Tuple - -from lxml.html import HtmlElement - -from .base import DrissionElement, BaseElement -from .chromium_base import ChromiumBase -from .chromium_element import ChromiumElement -from .chromium_frame import ChromiumFrame -from .commons.constants import NoneElement -from mixpage.driver_element import DriverElement -from mixpage.driver_page import DriverPage -from .session_page import SessionPage - - -class SessionElement(DrissionElement): - - def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None): - self._inner_ele: HtmlElement = ... - self.page: SessionPage = ... - - @property - def inner_ele(self) -> HtmlElement: ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... - - def child(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union['SessionElement', str, None]: ... - - def prev(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union['SessionElement', str, None]: ... - - def next(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union['SessionElement', str, None]: ... - - def before(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union['SessionElement', str, None]: ... - - def after(self, filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None, - ele_only: bool = True) -> Union['SessionElement', str, None]: ... - - def children(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union['SessionElement', str]]: ... - - def prevs(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union['SessionElement', str]]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union['SessionElement', str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union['SessionElement', str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '', - timeout: float = None, - ele_only: bool = True) -> List[Union['SessionElement', str]]: ... - - def attr(self, attr: str) -> Union[str, None]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, NoneElement]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def s_ele(self, - loc_or_str: Union[Tuple[str, str], str] = None) -> Union['SessionElement', str, NoneElement]: ... - - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[Union['SessionElement', str]]: ... - - def _find_elements(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True, - relative: bool = False, - raise_err: bool = None) \ - -> Union['SessionElement', str, NoneElement, List[Union['SessionElement', str]]]: ... - - def _get_ele_path(self, mode: str) -> str: ... - - -def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, DriverElement, BaseElement, -ChromiumFrame, ChromiumBase, DriverPage], - loc: Union[str, Tuple[str, str]] = None, - single: bool = True) -> Union[ - SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ... diff --git a/MANIFEST.in b/MANIFEST.in index 4c619d1..962ba91 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include DrissionPage/configs/configs.ini +include DrissionPage/_configs/configs.ini include DrissionPage/*.pyi include DrissionPage/*/*.py include DrissionPage/*/*.pyi \ No newline at end of file diff --git a/README.md b/README.md index 022a82c..9dfc9ec 100644 --- a/README.md +++ b/README.md @@ -22,34 +22,20 @@ DrissionPage 是一个基于 python 的网页自动化工具。 支持系统:Windows、Linux、Mac -python 版本:3.6 及以上 +python 版本:3.8 及以上 支持浏览器:Chromium 内核浏览器(如 Chrome 和 Edge),electron 应用 --- +# 🛠 如何使用 + **📖 使用文档:** [点击查看](https://g1879.gitee.io/drissionpagedocs) **交流 QQ 群:** 636361957 --- -# 🔥 新版尝鲜 - -4.0 在 3.x 的基础上对底层进行了大幅重构,新增大量功能,改善运行效率和稳定性,优化项目结构,解决很多存在的问题。对比旧版本有质的提高。 - -现已发布 beta 版,欢迎尝鲜。 - -[4.0功能介绍](https://g1879.gitee.io/drissionpagedocs/whatsnew/4_0/) - -安装(目前是b14,关注文档,可能会有更新版本): - -```console -pip install DrissionPage==4.0.0b14 -``` - ---- - # 📕 背景 用 requests 做数据采集面对要登录的网站时,要分析数据包、JS 源码,构造复杂的请求,往往还要应付验证码、JS 混淆、签名参数等反爬手段,门槛较高,开发效率不高。 @@ -64,7 +50,7 @@ pip install DrissionPage==4.0.0b14 # 💡 理念 -简洁!易用 !方便! +简洁而强大! --- @@ -118,7 +104,7 @@ pip install DrissionPage==4.0.0b14 - 还有很多细节,这里不一一列举,欢迎实际使用中体验:) ---- +--- # 🔖 版本历史 diff --git a/requirements.txt b/requirements.txt index 4e712ad..8a430e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ requests lxml cssselect -DownloadKit>=1.0.0 -FlowViewer>=0.3.0 -websocket-client +DownloadKit>=2.0.0 +websocket-client>=1.7.0 click -tldextract \ No newline at end of file +tldextract +psutil \ No newline at end of file diff --git a/setup.py b/setup.py index dcaef44..4e39d05 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.35", + version="4.0.1", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -22,19 +22,19 @@ setup( 'lxml', 'requests', 'cssselect', - 'DownloadKit>=1.0.0', - 'FlowViewer>=0.3.0', - 'websocket-client', + 'DownloadKit>=2.0.0', + 'websocket-client>=1.7.0', 'click', - 'tldextract' + 'tldextract', + 'psutil' ], classifiers=[ - "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.8", "Development Status :: 4 - Beta", "Topic :: Utilities", "License :: OSI Approved :: BSD License", ], - python_requires='>=3.6', + python_requires='>=3.8', entry_points={ 'console_scripts': [ 'dp = DrissionPage.commons.cli:main',