Merge branch 'dev'

# Conflicts:
#	.gitee/ISSUE_TEMPLATE.zh-CN.md
#	DrissionPage/__init__.py
#	DrissionPage/_base/base.py
#	DrissionPage/_base/base.pyi
#	DrissionPage/_configs/chromium_options.py
#	DrissionPage/_configs/configs.ini
#	DrissionPage/_configs/driver_options.py
#	DrissionPage/_configs/driver_options.pyi
#	DrissionPage/_configs/session_options.py
#	DrissionPage/_elements/chromium_element.py
#	DrissionPage/_functions/cli.py
#	DrissionPage/_functions/tools.pyi
#	DrissionPage/_functions/web.py
#	DrissionPage/_functions/web.pyi
#	DrissionPage/_pages/chromium_frame.py
#	DrissionPage/_pages/chromium_tab.py
#	DrissionPage/_pages/chromium_tab.pyi
#	DrissionPage/_pages/session_page.py
#	DrissionPage/_pages/session_page.pyi
#	DrissionPage/_pages/web_page.py
#	DrissionPage/_pages/web_page.pyi
#	DrissionPage/_units/setter.py
#	DrissionPage/_units/setter.pyi
#	DrissionPage/_units/waiter.py
#	DrissionPage/chromium_base.py
#	DrissionPage/chromium_base.pyi
#	DrissionPage/chromium_driver.py
#	DrissionPage/chromium_driver.pyi
#	DrissionPage/chromium_element.pyi
#	DrissionPage/chromium_frame.pyi
#	DrissionPage/chromium_page.py
#	DrissionPage/chromium_page.pyi
#	DrissionPage/commons/browser.py
#	DrissionPage/commons/browser.pyi
#	DrissionPage/commons/constants.py
#	DrissionPage/commons/tools.py
#	DrissionPage/easy_set.py
#	DrissionPage/easy_set.pyi
#	DrissionPage/errors.py
#	DrissionPage/session_element.pyi
#	README.md
#	setup.py
This commit is contained in:
g1879 2024-01-08 20:23:14 +08:00
commit a1ec25dc44
98 changed files with 12391 additions and 9814 deletions

View File

@ -1,3 +1,3 @@
- 使用上的问题请先查看文档[使用文档](http://g1879.gitee.io/drissionpagedocs)
- 遇到bug请详细描述如何重现并附上代码
- 提问前先给本库打个星,谢谢
1. 使用上的问题请先查看文档[使用文档](http://g1879.gitee.io/drissionpagedocs)
2. 遇到bug请详细描述如何重现并附上代码
3. 提问前先给本库打个星,谢谢

View File

@ -1,23 +1,17 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
# 常用页面类
from .chromium_page import ChromiumPage
from .session_page import SessionPage
from .web_page import WebPage
from ._pages.chromium_page import ChromiumPage
from ._pages.session_page import SessionPage
from ._pages.web_page import WebPage
# 启动配置类
from .configs.chromium_options import ChromiumOptions
from .configs.session_options import SessionOptions
from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
# 旧版页面类和启动配置类
try:
from .mixpage.mix_page import MixPage
from .mixpage.drission import Drission
from .configs.driver_options import DriverOptions
except ModuleNotFoundError:
pass
__version__ = '3.2.35'
__all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__']
__version__ = '4.0.1'

View File

@ -1,16 +1,20 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from abc import abstractmethod
from re import sub
from urllib.parse import quote
from .commons.constants import Settings, NoneElement
from .commons.locator import get_loc
from .commons.web import format_html
from .errors import ElementNotFoundError
from DownloadKit import DownloadKit
from .._functions.settings import Settings
from .._functions.locator import get_loc
from .._functions.web import format_html
from .._elements.none_element import NoneElement
from ..errors import ElementNotFoundError
class BaseParser(object):
@ -20,7 +24,7 @@ class BaseParser(object):
return self.ele(loc_or_str)
def ele(self, loc_or_ele, timeout=None):
return self._ele(loc_or_ele, timeout, True)
return self._ele(loc_or_ele, timeout, True, method='ele()')
def eles(self, loc_or_str, timeout=None):
return self._ele(loc_or_str, timeout, False)
@ -36,7 +40,7 @@ class BaseParser(object):
def s_eles(self, loc_or_str):
pass
def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None):
def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None):
pass
@abstractmethod
@ -58,24 +62,21 @@ class BaseElement(BaseParser):
def parent(self, level_or_loc=1):
pass
def prev(self, index=1):
return None # ShadowRootElement直接继承
def prevs(self) -> None:
return None # ShadowRootElement直接继承
def next(self, index=1):
pass
def nexts(self):
pass
def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None):
def _ele(self, loc_or_str, timeout=None, single=True, relative=False, raise_err=None, method=None):
r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err)
if not single or raise_err is False:
if r or isinstance(r, list):
return r
if not r and (Settings.raise_ele_not_found or raise_err is True):
raise ElementNotFoundError
if Settings.raise_when_ele_not_found or raise_err is True:
raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_str})
r.method = method
r.args = {'loc_or_str': loc_or_str}
return r
@abstractmethod
@ -84,8 +85,8 @@ class BaseElement(BaseParser):
class DrissionElement(BaseElement):
"""DriverElement、ChromiumElement 和 SessionElement的基类
但不是ShadowRootElement的基类"""
"""ChromiumElement 和 SessionElement的基类
但不是ShadowRoot的基类"""
@property
def link(self):
@ -119,9 +120,10 @@ class DrissionElement(BaseElement):
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
def parent(self, level_or_loc=1):
def parent(self, level_or_loc=1, index=1):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果
:return: 上级元素对象
"""
if isinstance(level_or_loc, int):
@ -133,104 +135,127 @@ class DrissionElement(BaseElement):
if loc[0] == 'css selector':
raise ValueError('此css selector语法不受支持请换成xpath。')
loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}'
loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]'
else:
raise TypeError('level_or_loc参数只能是tuple、int或str。')
return self._ele(loc, timeout=0, relative=True, raise_err=False)
return self._ele(loc, timeout=0, relative=True, raise_err=False, method='parent()')
def child(self, index=1, filter_loc='', timeout=None, ele_only=True):
def child(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param index: 第几个查询结果1开始
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param index: 第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
if isinstance(filter_loc, int):
index = filter_loc
filter_loc = ''
nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only)
if not nodes:
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
try:
return nodes[index - 1]
except IndexError:
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'child()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
def prev(self, index=1, filter_loc='', timeout=0, ele_only=True):
def prev(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param index: 前面第几个查询结果1开始
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
if isinstance(filter_loc, int):
index = filter_loc
filter_loc = ''
nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only)
if nodes:
return nodes[-1]
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'prev()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'prev()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def next(self, index=1, filter_loc='', timeout=0, ele_only=True):
def next(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param index: 后面第几个查询结果1开始
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
if isinstance(filter_loc, int):
index = filter_loc
filter_loc = ''
nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only)
if nodes:
return nodes[0]
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'next()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'next()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def before(self, index=1, filter_loc='', timeout=None, ele_only=True):
def before(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param index: 前面第几个查询结果1开始
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
if isinstance(filter_loc, int):
index = filter_loc
filter_loc = ''
nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only)
if nodes:
return nodes[-1]
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'before()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'before()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def after(self, index=1, filter_loc='', timeout=None, ele_only=True):
def after(self, filter_loc='', index=1, timeout=None, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param index: 后面第几个查询结果1开始
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
if isinstance(filter_loc, int):
index = filter_loc
filter_loc = ''
nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only)
if nodes:
return nodes[0]
if Settings.raise_ele_not_found:
raise ElementNotFoundError
if Settings.raise_when_ele_not_found:
raise ElementNotFoundError(None, 'after()', {'filter_loc': filter_loc,
'index': index, 'ele_only': ele_only})
else:
return NoneElement()
return NoneElement(self.page, 'after()', {'filter_loc': filter_loc, 'index': index, 'ele_only': ele_only})
def children(self, filter_loc='', timeout=None, ele_only=True):
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
@ -246,19 +271,19 @@ class DrissionElement(BaseElement):
nodes = self._ele(loc, timeout=timeout, single=False, relative=True)
return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
def prevs(self, filter_loc='', timeout=0, ele_only=True):
def prevs(self, filter_loc='', timeout=None, ele_only=True):
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout, ele_only=ele_only)
def nexts(self, filter_loc='', timeout=0, ele_only=True):
def nexts(self, filter_loc='', timeout=None, ele_only=True):
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
@ -267,7 +292,7 @@ class DrissionElement(BaseElement):
def befores(self, filter_loc='', timeout=None, ele_only=True):
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
@ -277,7 +302,7 @@ class DrissionElement(BaseElement):
def afters(self, filter_loc='', timeout=None, ele_only=True):
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的元素或节点组成的列表
"""
@ -291,8 +316,8 @@ class DrissionElement(BaseElement):
:param filter_loc: 用于筛选的查询语法
:param direction: 'following' 'preceding'查找的方向
:param brother: 查找范围在同级查找还是整个dom前后查找
:param timeout: 查找等待时间
:return: DriverElement对象或字符串
:param timeout: 查找等待时间
:return: 元素对象或字符串
"""
if index is not None and index < 1:
raise ValueError('index必须大于等于1。')
@ -346,18 +371,22 @@ class DrissionElement(BaseElement):
class BasePage(BaseParser):
"""页面类的基类"""
def __init__(self, timeout=None):
def __init__(self):
"""初始化函数"""
self._url = None
self.timeout = timeout if timeout is not None else 10
self._timeout = 10
self._url_available = None
self.retry_times = 3
self.retry_interval = 2
self._url_available = None
self._DownloadKit = None
self._download_path = None
self._none_ele_return_value = False
self._none_ele_value = None
@property
def title(self):
"""返回网页title"""
ele = self._ele('xpath://title', raise_err=False)
ele = self._ele('xpath://title', raise_err=False, method='title')
return ele.text if ele else None
@property
@ -380,17 +409,17 @@ class BasePage(BaseParser):
"""返回当前访问的url有效性"""
return self._url_available
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数和间隔组成的tuple
"""
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
return retry, interval
@property
def download_path(self):
"""返回默认下载路径"""
return self._download_path
@property
def download(self):
"""返回下载器对象"""
if self._DownloadKit is None:
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
return self._DownloadKit
# ----------------以下属性或方法由后代实现----------------
@property
@ -401,6 +430,10 @@ class BasePage(BaseParser):
def json(self):
return
@property
def user_agent(self):
return
@abstractmethod
def get_cookies(self, as_dict=False, all_info=False):
return {}
@ -409,16 +442,19 @@ class BasePage(BaseParser):
def get(self, url, show_errmsg=False, retry=None, interval=None):
pass
def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None):
def _ele(self, loc_or_ele, timeout=None, single=True, raise_err=None, method=None):
if not loc_or_ele:
raise ElementNotFoundError
raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele})
r = self._find_elements(loc_or_ele, timeout=timeout, single=single, raise_err=raise_err)
if not single or raise_err is False:
if r or isinstance(r, list):
return r
if not r and (Settings().raise_ele_not_found is True or raise_err is True):
raise ElementNotFoundError
if Settings.raise_when_ele_not_found or raise_err is True:
raise ElementNotFoundError(None, method, {'loc_or_str': loc_or_ele})
r.method = method
r.args = {'loc_or_str': loc_or_ele}
return r
@abstractmethod

View File

@ -1,12 +1,16 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from abc import abstractmethod
from typing import Union, Tuple, List
from typing import Union, Tuple, List, Any
from .commons.constants import NoneElement
from DownloadKit import DownloadKit
from .._elements.none_element import NoneElement
class BaseParser(object):
@ -25,7 +29,8 @@ class BaseParser(object):
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ...
def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ...
def _ele(self, loc_or_ele, timeout: float = None, single: bool = True,
raise_err: bool = None, method: str = None): ...
@abstractmethod
def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ...
@ -41,7 +46,7 @@ class BaseElement(BaseParser):
def tag(self) -> str: ...
def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True,
relative: bool = False, raise_err: bool = None): ...
relative: bool = False, raise_err: bool = None, method: str = None): ...
@abstractmethod
def _find_elements(self, loc_or_str, timeout: float = None, single: bool = True, relative: bool = False,
@ -78,59 +83,41 @@ class DrissionElement(BaseElement):
def texts(self, text_node_only: bool = False) -> list: ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ...
def child(self, index: int = 1,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1,
timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def prev(self, index: int = 1,
filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1,
timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def next(self, index: int = 1,
filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1,
timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def before(self, index: int = 1,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1,
timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def after(self, index: int = 1,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1,
timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
def children(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
def children(self, filter_loc: Union[tuple, str] = '', timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def prevs(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
def prevs(self, filter_loc: Union[tuple, str] = '', timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def nexts(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
def nexts(self, filter_loc: Union[tuple, str] = '', timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def befores(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
def befores(self, filter_loc: Union[tuple, str] = '', timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def afters(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
def afters(self, filter_loc: Union[tuple, str] = '', timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def _get_brothers(self, index: int = None,
filter_loc: Union[tuple, str] = '',
direction: str = 'following',
brother: bool = True,
timeout: float = 0.5,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
def _get_brothers(self, index: int = None, filter_loc: Union[tuple, str] = '',
direction: str = 'following', brother: bool = True,
timeout: float = 0.5, ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
# ----------------以下属性或方法由后代实现----------------
@property
@ -150,11 +137,15 @@ class DrissionElement(BaseElement):
class BasePage(BaseParser):
def __init__(self, timeout: float = None):
def __init__(self):
self._url_available: bool = ...
self.retry_times: int = ...
self.retry_interval: float = ...
self._timeout = float = ...
self._timeout: float = ...
self._download_path: str = ...
self._DownloadKit: DownloadKit = ...
self._none_ele_return_value: bool = ...
self._none_ele_value: Any = ...
@property
def title(self) -> Union[str, None]: ...
@ -171,7 +162,11 @@ class BasePage(BaseParser):
@property
def url_available(self) -> bool: ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
@property
def download_path(self) -> str: ...
@property
def download(self) -> DownloadKit: ...
# ----------------以下属性或方法由后代实现----------------
@property
@ -180,17 +175,17 @@ class BasePage(BaseParser):
@property
def json(self) -> dict: ...
@property
def user_agent(self) -> str: ...
@abstractmethod
def get_cookies(self, as_dict: bool = False, all_info: bool = False) -> Union[list, dict]: ...
@abstractmethod
def get(self,
url: str,
show_errmsg: bool = False,
retry: int = None,
interval: float = None): ...
def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ...
def _ele(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ...
def _ele(self, loc_or_ele, timeout: float = None, single: bool = True,
raise_err: bool = None, method: str = None): ...
@abstractmethod
def _find_elements(self, loc_or_ele, timeout: float = None, single: bool = True, raise_err: bool = None): ...

View File

@ -0,0 +1,216 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from shutil import rmtree
from time import sleep, perf_counter
from websocket import WebSocketBadStatusException
from .driver import BrowserDriver, Driver
from .._functions.tools import stop_process_on_port, raise_error
from .._units.downloader import DownloadManager
from ..errors import PageDisconnectedError
__ERROR__ = 'error'
class Browser(object):
BROWSERS = {}
def __new__(cls, address, browser_id, page):
"""
:param address: 浏览器地址
:param browser_id: 浏览器id
:param page: ChromiumPage对象
"""
if browser_id in cls.BROWSERS:
return cls.BROWSERS[browser_id]
return object.__new__(cls)
def __init__(self, address, browser_id, page):
"""
:param address: 浏览器地址
:param browser_id: 浏览器id
:param page: ChromiumPage对象
"""
if hasattr(self, '_created'):
return
self._created = True
Browser.BROWSERS[browser_id] = self
self.page = page
self.address = address
self._driver = BrowserDriver(browser_id, 'browser', address, self)
self.id = browser_id
self._frames = {}
self._drivers = {}
# self._drivers = {t: Driver(t, 'page', address) for t in self.tabs}
self._connected = False
self._process_id = None
r = self.run_cdp('SystemInfo.getProcessInfo')
for i in r.get('processInfo', []):
if i['type'] == 'browser':
self._process_id = i['id']
break
self.run_cdp('Target.setDiscoverTargets', discover=True)
self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed)
self._driver.set_callback('Target.targetCreated', self._onTargetCreated)
def _get_driver(self, tab_id):
"""获取对应tab id的Driver
:param tab_id: 标签页id
:return: Driver对象
"""
return self._drivers.pop(tab_id, Driver(tab_id, 'page', self.address))
def _onTargetCreated(self, **kwargs):
"""标签页创建时执行"""
if (kwargs['targetInfo']['type'] in ('page', 'webview')
and not kwargs['targetInfo']['url'].startswith('devtools://')):
try:
self._drivers[kwargs['targetInfo']['targetId']] = Driver(kwargs['targetInfo']['targetId'],
'page', self.address)
except WebSocketBadStatusException:
pass
def _onTargetDestroyed(self, **kwargs):
"""标签页关闭时执行"""
tab_id = kwargs['targetId']
if hasattr(self, '_dl_mgr'):
self._dl_mgr.clear_tab_info(tab_id)
for key in [k for k, i in self._frames.items() if i == tab_id]:
self._frames.pop(key, None)
self._drivers.pop(tab_id, None)
def connect_to_page(self):
"""执行与page相关的逻辑"""
if not self._connected:
self._dl_mgr = DownloadManager(self)
self._connected = True
def run_cdp(self, cmd, **cmd_args):
"""执行Chrome DevTools Protocol语句
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
ignore = cmd_args.pop('_ignore', None)
r = self._driver.run(cmd, **cmd_args)
return r if __ERROR__ not in r else raise_error(r, ignore)
@property
def driver(self):
return self._driver
@property
def tabs_count(self):
"""返回标签页数量"""
j = self.run_cdp('Target.getTargets')['targetInfos'] # 不要改用get避免卡死
return len([i for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')])
@property
def tabs(self):
"""返回所有标签页id组成的列表"""
j = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp因为顺序不对
return [i['id'] for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')]
@property
def process_id(self):
"""返回浏览器进程id"""
return self._process_id
def find_tabs(self, title=None, url=None, tab_type=None, single=True):
"""查找符合条件的tab返回它们的id组成的列表
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:param single: 是否返回首个结果的id为False返回所有信息
:return: tab id或tab列表
"""
tabs = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp
if isinstance(tab_type, str):
tab_type = {tab_type}
elif isinstance(tab_type, (list, tuple, set)):
tab_type = set(tab_type)
elif tab_type is not None:
raise TypeError('tab_type只能是set、list、tuple、str、None。')
r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url'])
and (tab_type is None or i['type'] in tab_type))]
return r[0]['id'] if r and single else r
def close_tab(self, tab_id):
"""关闭标签页
:param tab_id: 标签页id
:return: None
"""
self.run_cdp('Target.closeTarget', targetId=tab_id, _ignore=PageDisconnectedError)
def activate_tab(self, tab_id):
"""使标签页变为活动状态
:param tab_id: 标签页id
:return: None
"""
self.run_cdp('Target.activateTarget', targetId=tab_id)
def get_window_bounds(self, tab_id=None):
"""返回浏览器窗口位置和大小信息
:param tab_id: 标签页id
:return: 窗口大小字典
"""
return self.run_cdp('Browser.getWindowForTarget', targetId=tab_id or self.id)['bounds']
def quit(self, timeout=5, force=False):
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 是否立刻强制终止进程
:return: None
"""
try:
self.run_cdp('Browser.close')
except PageDisconnectedError:
return
if force:
ip, port = self.address.split(':')
if ip not in ('127.0.0.1', 'localhost'):
return
stop_process_on_port(port)
return
if self.process_id:
from os import popen
from platform import system
txt = f'tasklist | findstr {self.process_id}' if system().lower() == 'windows' \
else f'ps -ef | grep {self.process_id}'
end_time = perf_counter() + timeout
while perf_counter() < end_time:
p = popen(txt)
sleep(.1)
try:
if f' {self.process_id} ' not in p.read():
return
except TypeError:
pass
def _on_quit(self):
Browser.BROWSERS.pop(self.id, None)
if self.page._chromium_options.is_auto_port and self.page._chromium_options.user_data_path:
path = Path(self.page._chromium_options.user_data_path)
end_time = perf_counter() + 7
while perf_counter() < end_time:
if not path.exists():
break
try:
rmtree(path)
break
except (PermissionError, FileNotFoundError, OSError):
pass

View File

@ -0,0 +1,64 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import List, Optional, Union
from .driver import BrowserDriver, Driver
from .._pages.chromium_page import ChromiumPage
from .._units.downloader import DownloadManager
class Browser(object):
BROWSERS: dict = ...
page: ChromiumPage = ...
_driver: BrowserDriver = ...
id: str = ...
address: str = ...
_frames: dict = ...
_drivers: dict = ...
_process_id: Optional[int] = ...
_dl_mgr: DownloadManager = ...
_connected: bool = ...
def __new__(cls, address: str, browser_id: str, page: ChromiumPage): ...
def __init__(self, address: str, browser_id: str, page: ChromiumPage): ...
def _get_driver(self, tab_id: str) -> Driver: ...
def run_cdp(self, cmd, **cmd_args) -> dict: ...
@property
def driver(self) -> BrowserDriver: ...
@property
def tabs_count(self) -> int: ...
@property
def tabs(self) -> List[str]: ...
@property
def process_id(self) -> Optional[int]: ...
def find_tabs(self, title: str = None, url: str = None,
tab_type: Union[str, list, tuple] = None, single: bool = True) -> Union[str, List[str]]: ...
def close_tab(self, tab_id: str) -> None: ...
def activate_tab(self, tab_id: str) -> None: ...
def get_window_bounds(self, tab_id: str = None) -> dict: ...
def connect_to_page(self) -> None: ...
def _onTargetCreated(self, **kwargs) -> None: ...
def _onTargetDestroyed(self, **kwargs) -> None: ...
def quit(self, timeout: float = 5, force: bool = False) -> None: ...
def _on_quit(self) -> None: ...

View File

@ -0,0 +1,273 @@
# -*- coding: utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from json import dumps, loads, JSONDecodeError
from queue import Queue, Empty
from threading import Thread, Event
from time import perf_counter, sleep
from requests import get
from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection,
WebSocketException)
from ..errors import PageDisconnectedError
class Driver(object):
def __init__(self, tab_id, tab_type, address):
"""
:param tab_id: 标签页id
:param tab_type: 标签页类型
:param address: 浏览器连接地址
"""
self.id = tab_id
self.address = address
self.type = tab_type
self._debug = False
self.alert_flag = False # 标记alert出现跳过一条请求后复原
self._websocket_url = f'ws://{address}/devtools/{tab_type}/{tab_id}'
self._cur_id = 0
self._ws = None
self._recv_th = Thread(target=self._recv_loop)
self._handle_event_th = Thread(target=self._handle_event_loop)
self._recv_th.daemon = True
self._handle_event_th.daemon = True
self._handle_immediate_event_th = None
self._stopped = Event()
self.event_handlers = {}
self.immediate_event_handlers = {}
self.method_results = {}
self.event_queue = Queue()
self.immediate_event_queue = Queue()
self.start()
def _send(self, message, timeout=None):
"""发送信息到浏览器,并返回浏览器返回的信息
:param message: 发送给浏览器的数据
:param timeout: 超时时间为None表示无限
:return: 浏览器返回的数据
"""
self._cur_id += 1
ws_id = self._cur_id
message['id'] = ws_id
message_json = dumps(message)
# if self._debug:
# if self._debug is True or (isinstance(self._debug, str) and
# message.get('method', '').startswith(self._debug)):
# print(f'发> {message_json}')
# elif isinstance(self._debug, (list, tuple, set)):
# for m in self._debug:
# if message.get('method', '').startswith(m):
# print(f'发> {message_json}')
# break
end_time = perf_counter() + timeout if timeout is not None else None
self.method_results[ws_id] = Queue()
try:
self._ws.send(message_json)
if timeout == 0:
self.method_results.pop(ws_id, None)
return {'id': ws_id, 'result': {}}
except (OSError, WebSocketConnectionClosedException):
self.method_results.pop(ws_id, None)
return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'}
while not self._stopped.is_set():
try:
result = self.method_results[ws_id].get(timeout=.2)
self.method_results.pop(ws_id, None)
return result
except Empty:
if self.alert_flag and message['method'].startswith(('Input.', 'Runtime.')):
return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'}
if timeout is not None and perf_counter() > end_time:
self.method_results.pop(ws_id, None)
return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'} \
if self.alert_flag else {'error': {'message': 'timeout'}, 'type': 'timeout'}
continue
return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'}
def _recv_loop(self):
"""接收浏览器信息的守护线程方法"""
while not self._stopped.is_set():
try:
# self._ws.settimeout(1)
msg_json = self._ws.recv()
msg = loads(msg_json)
except WebSocketTimeoutException:
continue
except (WebSocketException, OSError, WebSocketConnectionClosedException, JSONDecodeError):
self._stop()
return
# if self._debug:
# if self._debug is True or 'id' in msg or (isinstance(self._debug, str)
# and msg.get('method', '').startswith(self._debug)):
# print(f'<收 {msg_json}')
# elif isinstance(self._debug, (list, tuple, set)):
# for m in self._debug:
# if msg.get('method', '').startswith(m):
# print(f'<收 {msg_json}')
# break
if 'method' in msg:
if msg['method'].startswith('Page.javascriptDialog'):
self.alert_flag = msg['method'].endswith('Opening')
function = self.immediate_event_handlers.get(msg['method'])
if function:
self._handle_immediate_event(function, msg['params'])
else:
self.event_queue.put(msg)
elif msg.get('id') in self.method_results:
self.method_results[msg['id']].put(msg)
# elif self._debug:
# print(f'未知信息:{msg}')
def _handle_event_loop(self):
"""当接收到浏览器信息,执行已绑定的方法"""
while not self._stopped.is_set():
try:
event = self.event_queue.get(timeout=1)
except Empty:
continue
function = self.event_handlers.get(event['method'])
if function:
function(**event['params'])
self.event_queue.task_done()
def _handle_immediate_event_loop(self):
while not self._stopped.is_set() and not self.immediate_event_queue.empty():
function, kwargs = self.immediate_event_queue.get(timeout=1)
try:
function(**kwargs)
except PageDisconnectedError:
pass
def _handle_immediate_event(self, function, kwargs):
"""处理立即执行的动作
:param function: 要运行下方法
:param kwargs: 方法参数
:return: None
"""
self.immediate_event_queue.put((function, kwargs))
if self._handle_immediate_event_th is None or not self._handle_immediate_event_th.is_alive():
self._handle_immediate_event_th = Thread(target=self._handle_immediate_event_loop)
self._handle_immediate_event_th.daemon = True
self._handle_immediate_event_th.start()
def run(self, _method, **kwargs):
"""执行cdp方法
:param _method: cdp方法名
:param args: cdp参数
:param kwargs: cdp参数
:return: 执行结果
"""
if self._stopped.is_set():
return {'error': 'connection disconnected', 'type': 'connection_error'}
timeout = kwargs.pop('_timeout', 30)
result = self._send({'method': _method, 'params': kwargs}, timeout=timeout)
if 'result' not in result and 'error' in result:
return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'),
'method': _method, 'args': kwargs, 'timeout': timeout}
else:
return result['result']
def start(self):
"""启动连接"""
self._stopped.clear()
self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True)
self._recv_th.start()
self._handle_event_th.start()
return True
def stop(self):
"""中断连接"""
self._stop()
while self._handle_event_th.is_alive() or self._recv_th.is_alive():
sleep(.1)
return True
def _stop(self):
"""中断连接"""
if self._stopped.is_set():
return False
self._stopped.set()
if self._ws:
self._ws.close()
self._ws = None
try:
while not self.event_queue.empty():
event = self.event_queue.get_nowait()
function = self.event_handlers.get(event['method'])
if function:
function(**event['params'])
except:
pass
self.event_handlers.clear()
self.method_results.clear()
self.event_queue.queue.clear()
def set_callback(self, event, callback, immediate=False):
"""绑定cdp event和回调方法
:param event: cdp event
:param callback: 绑定到cdp event的回调方法
:param immediate: 是否要立即处理的动作
:return: None
"""
handler = self.immediate_event_handlers if immediate else self.event_handlers
if callback:
handler[event] = callback
else:
handler.pop(event, None)
class BrowserDriver(Driver):
BROWSERS = {}
def __new__(cls, tab_id, tab_type, address, browser):
if tab_id in cls.BROWSERS:
return cls.BROWSERS[tab_id]
return object.__new__(cls)
def __init__(self, tab_id, tab_type, address, browser):
if hasattr(self, '_created'):
return
self._created = True
BrowserDriver.BROWSERS[tab_id] = self
super().__init__(tab_id, tab_type, address)
self.browser = browser
def __repr__(self):
return f'<BrowserDriver {self.id}>'
def get(self, url):
r = get(url, headers={'Connection': 'close'})
r.close()
return r
def _stop(self):
super()._stop()
self.browser._on_quit()

View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from queue import Queue
from threading import Thread, Event
from typing import Union, Callable, Dict, Optional
from requests import Response
from websocket import WebSocket
from .browser import Browser
class GenericAttr(object):
def __init__(self, name: str, tab: Driver): ...
def __getattr__(self, item: str) -> Callable: ...
def __setattr__(self, key: str, value: Callable) -> None: ...
class Driver(object):
id: str
address: str
type: str
# _debug: bool
alert_flag: bool
_websocket_url: str
_cur_id: int
_ws: Optional[WebSocket]
_recv_th: Thread
_handle_event_th: Thread
_handle_immediate_event_th: Optional[Thread]
_stopped: Event
event_handlers: dict
immediate_event_handlers: dict
method_results: dict
event_queue: Queue
immediate_event_queue: Queue
def __init__(self, tab_id: str, tab_type: str, address: str): ...
def _send(self, message: dict, timeout: float = None) -> dict: ...
def _recv_loop(self) -> None: ...
def _handle_event_loop(self) -> None: ...
def _handle_immediate_event_loop(self): ...
def _handle_immediate_event(self, function: Callable, kwargs: dict): ...
def run(self, _method: str, **kwargs) -> dict: ...
def start(self) -> bool: ...
def stop(self) -> bool: ...
def _stop(self) -> None: ...
def set_callback(self, event: str, callback: Union[Callable, None], immediate: bool = False) -> None: ...
class BrowserDriver(Driver):
BROWSERS: Dict[str, Driver] = ...
browser: Browser = ...
def __new__(cls, tab_id: str, tab_type: str, address: str, browser: Browser): ...
def __init__(self, tab_id: str, tab_type: str, address: str, browser: Browser): ...
def get(self, url) -> Response: ...

View File

@ -1,13 +1,18 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from re import search
from shutil import rmtree
from tempfile import gettempdir, TemporaryDirectory
from threading import Lock
from .options_manage import OptionsManager
from ..commons.tools import port_is_using, clean_folder
from .._functions.tools import port_is_using, clean_folder
class ChromiumOptions(object):
@ -19,22 +24,28 @@ class ChromiumOptions(object):
self._user_data_path = None
self._user = 'Default'
self._prefs_to_del = []
self.clear_file_flags = False
self._headless = None
if read_file is not False:
ini_path = str(ini_path) if ini_path else None
om = OptionsManager(ini_path)
self.ini_path = om.ini_path
options = om.chrome_options
self._download_path = om.paths.get('download_path', None)
options = om.chromium_options
self._download_path = om.paths.get('download_path', None) or None
self._tmp_path = om.paths.get('tmp_path', None) or None
self._arguments = options.get('arguments', [])
self._binary_location = options.get('binary_location', '')
self._browser_path = options.get('browser_path', '')
self._extensions = options.get('extensions', [])
self._prefs = options.get('experimental_options', {}).get('prefs', {})
self._debugger_address = options.get('debugger_address', None)
self._page_load_strategy = options.get('page_load_strategy', 'normal')
self._proxy = om.proxies.get('http', None)
self._prefs = options.get('prefs', {})
self._flags = options.get('flags', {})
self._address = options.get('address', None)
self._load_mode = options.get('load_mode', 'normal')
self._system_user_path = options.get('system_user_path', False)
self._existing_only = options.get('existing_only', False)
self._proxy = om.proxies.get('http', None) or om.proxies.get('https', None)
user_path = user = False
for arg in self._arguments:
@ -48,29 +59,39 @@ class ChromiumOptions(object):
break
timeouts = om.timeouts
self._timeouts = {'implicit': timeouts['implicit'],
self._timeouts = {'base': timeouts['base'],
'pageLoad': timeouts['page_load'],
'script': timeouts['script']}
self._auto_port = options.get('auto_port', False)
if self._auto_port:
port, path = PortFinder().get_port()
self._debugger_address = f'127.0.0.1:{port}'
self._address = f'127.0.0.1:{port}'
self.set_argument('--user-data-dir', path)
others = om.others
self._retry_times = others.get('retry_times', 3)
self._retry_interval = others.get('retry_interval', 2)
return
self.ini_path = None
self._binary_location = "chrome"
self._browser_path = "chrome"
self._arguments = []
self._download_path = None
self._tmp_path = None
self._extensions = []
self._prefs = {}
self._timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30}
self._debugger_address = '127.0.0.1:9222'
self._page_load_strategy = 'normal'
self._flags = {}
self._timeouts = {'base': 10, 'pageLoad': 30, 'script': 30}
self._address = '127.0.0.1:9222'
self._load_mode = 'normal'
self._proxy = None
self._auto_port = False
self._system_user_path = False
self._existing_only = False
self._retry_times = 3
self._retry_interval = 2
@property
def download_path(self):
@ -80,22 +101,27 @@ class ChromiumOptions(object):
@property
def browser_path(self):
"""浏览器启动文件路径"""
return self._binary_location
return self._browser_path
@property
def user_data_path(self):
"""返回用户数据文件夹路径"""
return self._user_data_path
@property
def tmp_path(self):
"""返回临时文件夹路径"""
return self._tmp_path
@property
def user(self):
"""返回用户配置文件夹名称"""
return self._user
@property
def page_load_strategy(self):
def load_mode(self):
"""返回页面加载策略,'normal', 'eager', 'none'"""
return self._page_load_strategy
return self._load_mode
@property
def timeouts(self):
@ -108,15 +134,9 @@ class ChromiumOptions(object):
return self._proxy
@property
def debugger_address(self):
def address(self):
"""返回浏览器地址ip:port"""
return self._debugger_address
@debugger_address.setter
def debugger_address(self, address):
"""设置浏览器地址格式ip:port"""
address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
self._debugger_address = address
return self._address
@property
def arguments(self):
@ -133,11 +153,48 @@ class ChromiumOptions(object):
"""返回用户首选项配置"""
return self._prefs
@property
def flags(self):
"""返回实验项配置"""
return self._flags
@property
def system_user_path(self):
"""返回是否使用系统安装的浏览器所使用的用户数据文件夹"""
return self._system_user_path
@property
def is_existing_only(self):
"""返回是否只接管现有浏览器方式"""
return self._existing_only
@property
def is_auto_port(self):
"""返回是否使用自动端口和用户文件"""
return self._auto_port
@property
def retry_times(self):
"""返回连接失败时的重试次数"""
return self._retry_times
@property
def retry_interval(self):
"""返回连接失败时的重试间隔(秒)"""
return self._retry_interval
def set_retry(self, times=None, interval=None):
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
if times is not None:
self._retry_times = times
if interval is not None:
self._retry_interval = interval
return self
def set_argument(self, arg, value=None):
"""设置浏览器配置的argument属性
:param arg: 属性名
@ -201,7 +258,7 @@ class ChromiumOptions(object):
:param arg: 设置项名称
:return: 当前对象
"""
self._prefs.pop(arg)
self._prefs.pop(arg, None)
return self
def remove_pref_from_file(self, arg):
@ -212,15 +269,33 @@ class ChromiumOptions(object):
self._prefs_to_del.append(arg)
return self
def set_timeouts(self, implicit=None, pageLoad=None, script=None):
def set_flag(self, flag, value=None):
"""设置实验项
:param flag: 设置项名称
:param value: 设置项的值为False则删除该项
:return: 当前对象
"""
if value is False:
self._flags.pop(flag, None)
else:
self._flags[flag] = value
return self
def clear_flags_in_file(self):
"""删除浏览器配置文件中已设置的实验项"""
self.clear_file_flags = True
return self
def set_timeouts(self, base=None, pageLoad=None, script=None, implicit=None):
"""设置超时时间,单位为秒
:param implicit: 默认超时时间
:param base: 默认超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
if implicit is not None:
self._timeouts['implicit'] = implicit
base = base if base is not None else implicit
if base is not None:
self._timeouts['base'] = base
if pageLoad is not None:
self._timeouts['pageLoad'] = pageLoad
if script is not None:
@ -237,15 +312,15 @@ class ChromiumOptions(object):
self._user = user
return self
def set_headless(self, on_off=True):
def headless(self, on_off=True):
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:return: 当前对象
"""
on_off = 'new' if on_off else False
on_off = 'new' if on_off else 'false'
return self.set_argument('--headless', on_off)
def set_no_imgs(self, on_off=True):
def no_imgs(self, on_off=True):
"""设置是否加载图片
:param on_off: 开或关
:return: 当前对象
@ -253,7 +328,7 @@ class ChromiumOptions(object):
on_off = None if on_off else False
return self.set_argument('--blink-settings=imagesEnabled=false', on_off)
def set_no_js(self, on_off=True):
def no_js(self, on_off=True):
"""设置是否禁用js
:param on_off: 开或关
:return: 当前对象
@ -261,7 +336,7 @@ class ChromiumOptions(object):
on_off = None if on_off else False
return self.set_argument('--disable-javascript', on_off)
def set_mute(self, on_off=True):
def mute(self, on_off=True):
"""设置是否静音
:param on_off: 开或关
:return: 当前对象
@ -269,6 +344,22 @@ class ChromiumOptions(object):
on_off = None if on_off else False
return self.set_argument('--mute-audio', on_off)
def incognito(self, on_off=True):
"""设置是否使用无痕模式启动
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--incognito', on_off)
def ignore_certificate_errors(self, on_off=True):
"""设置是否忽略证书错误
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--ignore-certificate-errors', on_off)
def set_user_agent(self, user_agent):
"""设置user agent
:param user_agent: user agent文本
@ -281,11 +372,15 @@ class ChromiumOptions(object):
:param proxy: 代理url和端口
:return: 当前对象
"""
if search(r'.*?:.*?@.*?\..*', proxy):
print('你似乎在设置使用账号密码的代理,暂时不支持这种代理,可自行用插件实现需求。')
if proxy.lower().startswith('socks'):
print('你似乎在设置使用socks代理暂时不支持这种代理可自行用插件实现需求。')
self._proxy = proxy
return self.set_argument('--proxy-server', proxy)
def set_page_load_strategy(self, value):
"""设置page_load_strategy,可接收 'normal', 'eager', 'none'
def set_load_mode(self, value):
"""设置load_mode,可接收 'normal', 'eager', 'none'
normal默认情况下使用, 等待所有资源下载完成
eagerDOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中
none完全不阻塞
@ -293,46 +388,104 @@ class ChromiumOptions(object):
:return: 当前对象
"""
if value not in ('normal', 'eager', 'none'):
raise ValueError("只能选择'normal', 'eager', 'none'")
self._page_load_strategy = value.lower()
raise ValueError("只能选择 'normal', 'eager', 'none'")
self._load_mode = value.lower()
return self
def set_paths(self, browser_path=None, local_port=None, debugger_address=None, download_path=None,
user_data_path=None, cache_path=None):
def set_paths(self, browser_path=None, local_port=None, address=None, download_path=None,
user_data_path=None, cache_path=None, debugger_address=None):
"""快捷的路径设置函数
:param browser_path: 浏览器可执行文件路径
:param local_port: 本地端口号
:param debugger_address: 调试浏览器地址127.0.0.1:9222
:param address: 调试浏览器地址127.0.0.1:9222
:param download_path: 下载文件路径
:param user_data_path: 用户数据路径
:param cache_path: 缓存路径
:return: 当前对象
"""
address = address or debugger_address
if browser_path is not None:
self._binary_location = str(browser_path)
self._auto_port = False
self.set_browser_path(browser_path)
if local_port is not None:
self._debugger_address = f'127.0.0.1:{local_port}'
self._auto_port = False
self.set_local_port(local_port)
if debugger_address is not None:
self.debugger_address = debugger_address
if address is not None:
self.set_address(address)
if download_path is not None:
self._download_path = str(download_path)
self.set_download_path(download_path)
if user_data_path is not None:
u = str(user_data_path)
self.set_argument('--user-data-dir', u)
self._user_data_path = u
self._auto_port = False
self.set_user_data_path(user_data_path)
if cache_path is not None:
self.set_argument('--disk-cache-dir', str(cache_path))
self.set_cache_path(cache_path)
return self
def set_local_port(self, port):
"""设置本地启动端口
:param port: 端口号
:return: 当前对象
"""
self._address = f'127.0.0.1:{port}'
self._auto_port = False
return self
def set_address(self, address):
"""设置浏览器地址,格式'ip:port'
:param address: 浏览器地址
:return: 当前对象
"""
address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
self._address = address
return self
def set_browser_path(self, path):
"""设置浏览器可执行文件路径
:param path: 浏览器路径
:return: 当前对象
"""
self._browser_path = str(path)
self._auto_port = False
return self
def set_download_path(self, path):
"""设置下载文件保存路径
:param path: 下载路径
:return: 当前对象
"""
self._download_path = str(path)
return self
def set_tmp_path(self, path):
"""设置临时文件文件保存路径
:param path: 下载路径
:return: 当前对象
"""
self._tmp_path = str(path)
return self
def set_user_data_path(self, path):
"""设置用户文件夹路径
:param path: 用户文件夹路径
:return: 当前对象
"""
u = str(path)
self.set_argument('--user-data-dir', u)
self._user_data_path = u
self._auto_port = False
return self
def set_cache_path(self, path):
"""设置缓存路径
:param path: 缓存路径
:return: 当前对象
"""
self.set_argument('--disk-cache-dir', str(path))
return self
def use_system_user_path(self, on_off=True):
"""设置是否使用系统安装的浏览器默认用户文件夹
:param on_off: 开或关
@ -341,19 +494,28 @@ class ChromiumOptions(object):
self._system_user_path = on_off
return self
def auto_port(self, on_off=True):
def auto_port(self, on_off=True, tmp_path=None):
"""自动获取可用端口
:param on_off: 是否开启自动获取端口号
:param tmp_path: 临时文件保存路径为None时保存到系统临时文件夹on_off为False时此参数无效
:return: 当前对象
"""
if on_off:
port, path = PortFinder().get_port()
self.set_paths(local_port=port, user_data_path=path)
self._auto_port = True
if tmp_path:
self._tmp_path = str(tmp_path)
else:
self._auto_port = False
return self
def existing_only(self, on_off=True):
"""设置只接管已有浏览器,不自动启动新的
:param on_off: 是否开启自动获取端口号
:return: 当前对象
"""
self._existing_only = on_off
return self
def save(self, path=None):
"""保存设置到文件
:param path: ini文件的路径 None 保存到当前读取的配置文件传入 'default' 保存到默认ini文件
@ -378,24 +540,26 @@ class ChromiumOptions(object):
else:
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
# 设置chrome_options
attrs = ('debugger_address', 'binary_location', 'arguments', 'extensions', 'user', 'page_load_strategy',
'auto_port', 'system_user_path')
# 设置chromium_options
attrs = ('address', 'browser_path', 'arguments', 'extensions', 'user', 'load_mode',
'auto_port', 'system_user_path', 'existing_only', 'flags')
for i in attrs:
om.set_item('chrome_options', i, self.__getattribute__(f'_{i}'))
om.set_item('chromium_options', i, self.__getattribute__(f'_{i}'))
# 设置代理
om.set_item('proxies', 'http', self._proxy)
om.set_item('proxies', 'https', self._proxy)
# 设置路径
om.set_item('paths', 'download_path', self._download_path)
om.set_item('paths', 'download_path', self._download_path or '')
om.set_item('paths', 'tmp_path', self._tmp_path or '')
# 设置timeout
om.set_item('timeouts', 'implicit', self._timeouts['implicit'])
om.set_item('timeouts', 'base', self._timeouts['base'])
om.set_item('timeouts', 'page_load', self._timeouts['pageLoad'])
om.set_item('timeouts', 'script', self._timeouts['script'])
# 设置重试
om.set_item('others', 'retry_times', self.retry_times)
om.set_item('others', 'retry_interval', self.retry_interval)
# 设置prefs
eo = om.chrome_options.get('experimental_options', {})
eo['prefs'] = self._prefs
om.set_item('chrome_options', 'experimental_options', eo)
om.set_item('chromium_options', 'prefs', self._prefs)
path = str(path)
om.save(path)
@ -406,12 +570,67 @@ class ChromiumOptions(object):
"""保存当前配置到默认ini文件"""
return self.save('default')
def __repr__(self):
return f'<ChromiumOptions at {id(self)}>'
# ---------------即将废弃--------------
@property
def debugger_address(self):
"""返回浏览器地址ip:port"""
return self._address
@debugger_address.setter
def debugger_address(self, address):
"""设置浏览器地址格式ip:port"""
self.set_address(address)
def set_page_load_strategy(self, value):
return self.set_load_mode(value)
def set_headless(self, on_off=True):
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:return: 当前对象
"""
on_off = 'new' if on_off else 'false'
return self.set_argument('--headless', on_off)
def set_no_imgs(self, on_off=True):
"""设置是否加载图片
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--blink-settings=imagesEnabled=false', on_off)
def set_no_js(self, on_off=True):
"""设置是否禁用js
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--disable-javascript', on_off)
def set_mute(self, on_off=True):
"""设置是否静音
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--mute-audio', on_off)
class PortFinder(object):
used_port = []
used_port = {}
lock = Lock()
def __init__(self):
self.tmp_dir = Path(gettempdir()) / 'DrissionPage' / 'TempFolder'
def __init__(self, path=None):
"""
:param path: 临时文件保存路径为None时使用系统临时文件夹
"""
tmp = Path(path) if path else Path(gettempdir()) / 'DrissionPage'
self.tmp_dir = tmp / 'UserTempFolder'
self.tmp_dir.mkdir(parents=True, exist_ok=True)
if not PortFinder.used_port:
clean_folder(self.tmp_dir)
@ -420,12 +639,21 @@ class PortFinder(object):
"""查找一个可用端口
:return: 可以使用的端口和用户文件夹路径组成的元组
"""
for i in range(9600, 19800):
if i in PortFinder.used_port or port_is_using('127.0.0.1', i):
continue
with PortFinder.lock:
for i in range(9600, 19600):
if i in PortFinder.used_port:
continue
elif port_is_using('127.0.0.1', i):
PortFinder.used_port[i] = None
continue
path = TemporaryDirectory(dir=self.tmp_dir).name
PortFinder.used_port[i] = path
return i, path
path = TemporaryDirectory(dir=self.tmp_dir)
PortFinder.used_port.append(i)
return i, path.name
for i in range(9600, 19600):
if port_is_using('127.0.0.1', i):
continue
rmtree(PortFinder.used_port[i], ignore_errors=True)
return i, TemporaryDirectory(dir=self.tmp_dir).name
raise OSError('未找到可用端口。')

View File

@ -0,0 +1,173 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from threading import Lock
from typing import Union, Tuple, Any, Literal, Optional
class ChromiumOptions(object):
def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None):
self.ini_path: str = ...
self._driver_path: str = ...
self._user_data_path: str = ...
self._download_path: str = ...
self._tmp_path: str = ...
self._arguments: list = ...
self._browser_path: str = ...
self._user: str = ...
self._load_mode: str = ...
self._timeouts: dict = ...
self._proxy: str = ...
self._address: str = ...
self._extensions: list = ...
self._prefs: dict = ...
self._flags: dict = ...
self._prefs_to_del: list = ...
self.clear_file_flags: bool = ...
self._auto_port: bool = ...
self._system_user_path: bool = ...
self._existing_only: bool = ...
self._headless: bool = ...
self._retry_times: int = ...
self._retry_interval: float = ...
@property
def download_path(self) -> str: ...
@property
def browser_path(self) -> str: ...
@property
def user_data_path(self) -> str: ...
@property
def tmp_path(self) -> Optional[str]: ...
@property
def user(self) -> str: ...
@property
def load_mode(self) -> str: ...
@property
def timeouts(self) -> dict: ...
@property
def proxy(self) -> str: ...
@property
def address(self) -> str: ...
@property
def arguments(self) -> list: ...
@property
def extensions(self) -> list: ...
@property
def preferences(self) -> dict: ...
@property
def flags(self) -> dict: ...
@property
def system_user_path(self) -> bool: ...
@property
def is_existing_only(self) -> bool: ...
@property
def is_auto_port(self) -> bool: ...
@property
def retry_times(self) -> int: ...
@property
def retry_interval(self) -> float: ...
def set_retry(self, times: int = None, interval: float = None) -> ChromiumOptions: ...
def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ...
def remove_argument(self, value: str) -> ChromiumOptions: ...
def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ...
def remove_extensions(self) -> ChromiumOptions: ...
def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ...
def remove_pref(self, arg: str) -> ChromiumOptions: ...
def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ...
def set_flag(self, flag: str, value: Union[int, str, bool] = None) -> ChromiumOptions: ...
def clear_flags_in_file(self) -> ChromiumOptions: ...
def set_timeouts(self, base: float = None, pageLoad: float = None,
script: float = None) -> ChromiumOptions: ...
def set_user(self, user: str = 'Default') -> ChromiumOptions: ...
def headless(self, on_off: bool = True) -> ChromiumOptions: ...
def no_imgs(self, on_off: bool = True) -> ChromiumOptions: ...
def no_js(self, on_off: bool = True) -> ChromiumOptions: ...
def mute(self, on_off: bool = True) -> ChromiumOptions: ...
def incognito(self, on_off: bool = True) -> ChromiumOptions: ...
def set_user_agent(self, user_agent: str) -> ChromiumOptions: ...
def set_proxy(self, proxy: str) -> ChromiumOptions: ...
def ignore_certificate_errors(self, on_off=True) -> ChromiumOptions: ...
def set_load_mode(self, value: Literal['normal', 'eager', 'none']) -> ChromiumOptions: ...
def set_browser_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_local_port(self, port: Union[str, int]) -> ChromiumOptions: ...
def set_address(self, address: str) -> ChromiumOptions: ...
def set_download_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_tmp_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_user_data_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_cache_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None,
address: str = None, download_path: Union[str, Path] = None, user_data_path: Union[str, Path] = None,
cache_path: Union[str, Path] = None) -> ChromiumOptions: ...
def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: ...
def auto_port(self, on_off: bool = True, tmp_path: Union[str, Path] = None) -> ChromiumOptions: ...
def existing_only(self, on_off: bool = True) -> ChromiumOptions: ...
def save(self, path: Union[str, Path] = None) -> str: ...
def save_to_default(self) -> str: ...
class PortFinder(object):
used_port: dict = ...
lock: Lock = ...
tmp_dir: Path = ...
def __init__(self, path: Union[str, Path] = None): ...
@staticmethod
def get_port() -> Tuple[int, str]: ...

View File

@ -1,22 +1,25 @@
[paths]
download_path =
tmp_path =
[chrome_options]
debugger_address = 127.0.0.1:9222
binary_location = chrome
arguments = ['--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking']
[chromium_options]
address = 127.0.0.1:9222
browser_path = chrome
arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking']
extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}}
page_load_strategy = normal
prefs = {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}
flags = {}
load_mode = normal
user = Default
auto_port = False
system_user_path = False
existing_only = False
[session_options]
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': 'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7'}
[timeouts]
implicit = 10
base = 10
page_load = 30
script = 30
@ -24,3 +27,6 @@ script = 30
http =
https =
[others]
retry_times = 3
retry_interval = 2

View File

@ -1,7 +1,9 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from configparser import RawConfigParser, NoSectionError, NoOptionError
from pathlib import Path
@ -26,7 +28,8 @@ class OptionsManager(object):
self.ini_path = str(path)
if not Path(self.ini_path).exists():
raise FileNotFoundError('ini文件不存在。')
input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
'pagedocs/advance/packaging/')
self._conf = RawConfigParser()
self._conf.read(self.ini_path, encoding='utf-8')

View File

@ -1,7 +1,9 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from configparser import RawConfigParser
from typing import Any

View File

@ -1,7 +1,9 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
@ -9,7 +11,7 @@ from requests import Session
from requests.structures import CaseInsensitiveDict
from .options_manage import OptionsManager
from ..commons.web import cookies_to_tuple, set_session_cookies
from .._functions.web import cookies_to_tuple, set_session_cookies
class SessionOptions(object):
@ -22,6 +24,9 @@ class SessionOptions(object):
"""
self.ini_path = None
self._download_path = None
self._timeout = 10
self._del_set = set() # 记录要从ini文件删除的参数
self._headers = None
self._cookies = None
self._auth = None
@ -34,46 +39,51 @@ class SessionOptions(object):
self._stream = None
self._trust_env = None
self._max_redirects = None
self._timeout = 10
self._retry_times = 3
self._retry_interval = 2
self._del_set = set() # 记录要从ini文件删除的参数
if read_file is False:
return
if read_file is not False:
ini_path = str(ini_path) if ini_path else None
om = OptionsManager(ini_path)
self.ini_path = om.ini_path
options_dict = om.session_options
ini_path = str(ini_path) if ini_path else None
om = OptionsManager(ini_path)
self.ini_path = om.ini_path
if options_dict.get('headers', None) is not None:
self.set_headers(options_dict['headers'])
options = om.session_options
if options.get('headers', None) is not None:
self.set_headers(options['headers'])
if options_dict.get('cookies', None) is not None:
self.set_cookies(options_dict['cookies'])
if options.get('cookies', None) is not None:
self.set_cookies(options['cookies'])
if options_dict.get('auth', None) is not None:
self._auth = options_dict['auth']
if options.get('auth', None) is not None:
self._auth = options['auth']
if options_dict.get('params', None) is not None:
self._params = options_dict['params']
if options.get('params', None) is not None:
self._params = options['params']
if options_dict.get('verify', None) is not None:
self._verify = options_dict['verify']
if options.get('verify', None) is not None:
self._verify = options['verify']
if options_dict.get('cert', None) is not None:
self._cert = options_dict['cert']
if options.get('cert', None) is not None:
self._cert = options['cert']
if options_dict.get('stream', None) is not None:
self._stream = options_dict['stream']
if options.get('stream', None) is not None:
self._stream = options['stream']
if options_dict.get('trust_env', None) is not None:
self._trust_env = options_dict['trust_env']
if options.get('trust_env', None) is not None:
self._trust_env = options['trust_env']
if options_dict.get('max_redirects', None) is not None:
self._max_redirects = options_dict['max_redirects']
if options.get('max_redirects', None) is not None:
self._max_redirects = options['max_redirects']
self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None))
self._timeout = om.timeouts.get('implicit', 10)
self._download_path = om.paths.get('download_path', None)
self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None))
self._timeout = om.timeouts.get('base', 10)
self._download_path = om.paths.get('download_path', None) or None
others = om.others
self._retry_times = others.get('retry_times', 3)
self._retry_interval = others.get('retry_interval', 2)
# ===========须独立处理的项开始============
@property
@ -81,13 +91,12 @@ class SessionOptions(object):
"""返回默认下载路径属性信息"""
return self._download_path
def set_paths(self, download_path=None):
def set_download_path(self, path):
"""设置默认下载路径
:param download_path: 下载路径
:param path: 下载路径
:return: 返回当前对象
"""
if download_path is not None:
self._download_path = str(download_path)
self._download_path = str(path)
return self
@property
@ -110,14 +119,35 @@ class SessionOptions(object):
self._proxies = {}
return self._proxies
def set_proxies(self, http, https=None):
def set_proxies(self, http=None, https=None):
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: 返回当前对象
"""
proxies = None if http == https is None else {'http': http, 'https': https or http}
self._sets('proxies', proxies)
self._sets('proxies', {'http': http, 'https': https})
return self
@property
def retry_times(self):
"""返回连接失败时的重试次数"""
return self._retry_times
@property
def retry_interval(self):
"""返回连接失败时的重试间隔(秒)"""
return self._retry_interval
def set_retry(self, times=None, interval=None):
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
if times is not None:
self._retry_times = times
if interval is not None:
self._retry_interval = interval
return self
# ===========须独立处理的项结束============
@ -162,8 +192,7 @@ class SessionOptions(object):
return self
attr = attr.lower()
if attr in self._headers:
self._headers.pop(attr)
self._headers.pop(attr, None)
return self
@ -351,10 +380,12 @@ class SessionOptions(object):
if i not in ('download_path', 'timeout', 'proxies'):
om.set_item('session_options', i, options[i])
om.set_item('paths', 'download_path', self.download_path)
om.set_item('timeouts', 'implicit', self.timeout)
om.set_item('paths', 'download_path', self.download_path or '')
om.set_item('timeouts', 'base', self.timeout)
om.set_item('proxies', 'http', self.proxies.get('http', None))
om.set_item('proxies', 'https', self.proxies.get('https', None))
om.set_item('others', 'retry_times', self.retry_times)
om.set_item('others', 'retry_interval', self.retry_interval)
for i in self._del_set:
if i == 'download_path':
@ -379,25 +410,57 @@ class SessionOptions(object):
return session_options_to_dict(self)
def make_session(self):
"""根据内在的配置生成Session对象"""
"""根据内在的配置生成Session对象ua从对象中分离"""
s = Session()
h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict()
if self.headers:
s.headers = CaseInsensitiveDict(self.headers)
if self.cookies:
set_session_cookies(s, self.cookies)
if self.adapters:
for url, adapter in self.adapters:
s.mount(url, adapter)
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'stream', 'trust_env', 'max_redirects']
for i in attrs:
for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']:
attr = self.__getattribute__(i)
if attr:
s.__setattr__(i, attr)
return s
return s, h
def from_session(self, session, headers=None):
"""从Session对象中读取配置
:param session: Session对象
:param headers: headers
:return: 当前对象
"""
self._headers = CaseInsensitiveDict(**session.headers, **headers) if headers else session.headers
self._cookies = session.cookies
self._auth = session.auth
self._proxies = session.proxies
self._hooks = session.hooks
self._params = session.params
self._verify = session.verify
self._cert = session.cert
self._stream = session.stream
self._trust_env = session.trust_env
self._max_redirects = session.max_redirects
if session.adapters:
self._adapters = [(k, i) for k, i in session.adapters.items()]
return self
# --------------即将废弃---------------
def set_paths(self, download_path=None):
"""设置默认下载路径
:param download_path: 下载路径
:return: 返回当前对象
"""
if download_path is not None:
self._download_path = str(download_path)
return self
def __repr__(self):
return f'<SessionOptions at {id(self)}>'
def session_options_to_dict(options):

View File

@ -1,15 +1,18 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Any, Union, Tuple
from typing import Any, Union, Tuple, Optional
from requests import Session
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
class SessionOptions(object):
@ -30,11 +33,13 @@ class SessionOptions(object):
self._max_redirects: int = ...
self._timeout: float = ...
self._del_set: set = ...
self._retry_times: int = ...
self._retry_interval: float = ...
@property
def download_path(self) -> str: ...
def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ...
def set_download_path(self, path: Union[str, Path]) -> SessionOptions: ...
@property
def timeout(self) -> float: ...
@ -65,6 +70,14 @@ class SessionOptions(object):
def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: ...
@property
def retry_times(self) -> int: ...
@property
def retry_interval(self) -> float: ...
def set_retry(self, times: int = None, interval: float = None) -> SessionOptions: ...
@property
def hooks(self) -> dict: ...
@ -113,7 +126,9 @@ class SessionOptions(object):
def as_dict(self) -> dict: ...
def make_session(self) -> Session: ...
def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ...
def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ...
def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ...

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,358 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union, Tuple, List, Any, Literal
from .none_element import NoneElement
from .._base.base import DrissionElement, BaseElement
from .._elements.session_element import SessionElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab
from .._pages.web_page import WebPage
from .._units.clicker import Clicker
from .._units.rect import ElementRect
from .._units.scroller import ElementScroller
from .._units.selector import SelectElement
from .._units.setter import ChromiumElementSetter
from .._units.states import ShadowRootStates, ElementStates
from .._units.waiter import ElementWaiter
PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True]
class ChromiumElement(DrissionElement):
def __init__(self, page: ChromiumBase, node_id: int = None, obj_id: str = None, backend_id: int = None):
self._tag: str = ...
self.page: Union[ChromiumPage, WebPage] = ...
self._node_id: int = ...
self._obj_id: str = ...
self._backend_id: int = ...
self._doc_id: str = ...
self._scroll: ElementScroller = ...
self._clicker: Clicker = ...
self._select: SelectElement = ...
self._wait: ElementWaiter = ...
self._rect: ElementRect = ...
self._set: ChromiumElementSetter = ...
self._states: ElementStates = ...
self._pseudo: Pseudo = ...
def __repr__(self) -> str: ...
def __call__(self, loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
def __eq__(self, other: ChromiumElement) -> bool: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def text(self) -> str: ...
@property
def raw_text(self) -> str: ...
# -----------------d模式独有属性-------------------
@property
def set(self) -> ChromiumElementSetter: ...
@property
def states(self) -> ElementStates: ...
@property
def rect(self) -> ElementRect: ...
@property
def pseudo(self) -> Pseudo: ...
@property
def shadow_root(self) -> Union[None, ShadowRoot]: ...
@property
def sr(self) -> Union[None, ShadowRoot]: ...
@property
def scroll(self) -> ElementScroller: ...
@property
def click(self) -> Clicker: ...
def parent(self,
level_or_loc: Union[tuple, str, int] = 1,
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def child(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ...
def prev(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ...
def next(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ...
def before(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ...
def after(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, NoneElement]: ...
def children(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def prevs(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def nexts(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def befores(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def afters(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
@property
def wait(self) -> ElementWaiter: ...
@property
def select(self) -> SelectElement: ...
def check(self, uncheck: bool = False, by_js: bool = False) -> None: ...
def attr(self, attr: str) -> Union[str, None]: ...
def remove_attr(self, attr: str) -> None: ...
def prop(self, prop: str) -> Union[str, int, None]: ...
def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ...
def run_async_js(self, script: str, *args, as_expr: bool = False) -> None: ...
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[ChromiumElement]: ...
def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[SessionElement]: ...
def _find_elements(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None,
single: bool = True,
relative: bool = False,
raise_err: bool = False) -> Union[ChromiumElement, ChromiumFrame, NoneElement,
List[Union[ChromiumElement, ChromiumFrame]]]: ...
def style(self, style: str, pseudo_ele: str = '') -> str: ...
def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ...
def save(self, path: [str, bool] = None, name: str = None, timeout: float = None) -> str: ...
def get_screenshot(self,
path: [str, Path] = None,
name: str = None,
as_bytes: PIC_TYPE = None,
as_base64: PIC_TYPE = None,
scroll_to_center: bool = True) -> Union[str, bytes]: ...
def input(self, vals: Any, clear: bool = True, by_js: bool = False) -> None: ...
def _set_file_input(self, files: Union[str, list, tuple]) -> None: ...
def clear(self, by_js: bool = False) -> None: ...
def _input_focus(self) -> None: ...
def focus(self) -> None: ...
def hover(self, offset_x: int = None, offset_y: int = None) -> None: ...
def drag(self, offset_x: int = 0, offset_y: int = 0, duration: float = 0.5) -> None: ...
def drag_to(self, ele_or_loc: Union[tuple, ChromiumElement], duration: float = 0.5) -> None: ...
def _get_obj_id(self, node_id: int = None, backend_id: int = None) -> str: ...
def _get_node_id(self, obj_id: str = None, backend_id: int = None) -> int: ...
def _get_backend_id(self, node_id: int) -> int: ...
def _get_ele_path(self, mode: str) -> str: ...
class ShadowRoot(BaseElement):
def __init__(self, parent_ele: ChromiumElement, obj_id: str = None, backend_id: int = None):
self._obj_id: str = ...
self._node_id: int = ...
self._backend_id: int = ...
self.page: ChromiumPage = ...
self.parent_ele: ChromiumElement = ...
self._states: ShadowRootStates = ...
def __repr__(self) -> str: ...
def __call__(self, loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> ChromiumElement: ...
def __eq__(self, other: ShadowRoot) -> bool: ...
@property
def states(self) -> ShadowRootStates: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ...
def run_async_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> None: ...
def parent(self, level_or_loc: Union[str, int] = 1, index: int = 1) -> ChromiumElement: ...
def child(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def next(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def before(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def after(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def children(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ...
def nexts(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ...
def befores(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ...
def afters(self, filter_loc: Union[tuple, str] = '') -> List[ChromiumElement]: ...
def ele(self, loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
def eles(self, loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[ChromiumElement]: ...
def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None,
single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement,
ChromiumFrame, str]]]: ...
def _get_node_id(self, obj_id: str) -> int: ...
def _get_obj_id(self, back_id: int) -> str: ...
def _get_backend_id(self, node_id: int) -> int: ...
def find_in_chromium_ele(ele: ChromiumElement, loc: Union[str, Tuple[str, str]],
single: bool = True, timeout: float = None, relative: bool = True) \
-> Union[ChromiumElement, NoneElement, List[ChromiumElement]]: ...
def find_by_xpath(ele: ChromiumElement, xpath: str, single: bool, timeout: float,
relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ...
def find_by_css(ele: ChromiumElement, selector: str, single: bool,
timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ...
def make_chromium_ele(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame],
node_id: int = ...,
obj_id: str = ...) -> Union[ChromiumElement, ChromiumFrame, str]: ...
def make_chromium_eles(page: Union[ChromiumBase, ChromiumPage, WebPage, ChromiumTab, ChromiumFrame],
node_ids: Union[tuple, list] = None,
obj_ids: Union[tuple, list] = None,
single: bool = True,
ele_only: bool = True) -> Union[ChromiumElement, ChromiumFrame, NoneElement,
List[Union[ChromiumElement, ChromiumFrame]]]: ...
def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ...
def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ShadowRoot], script: str,
as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ...
def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ...
def convert_argument(arg: Any) -> dict: ...
class Pseudo(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def before(self) -> str: ...
@property
def after(self) -> str: ...

View File

@ -0,0 +1,49 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from ..errors import ElementNotFoundError
class NoneElement(object):
def __init__(self, page=None, method=None, args=None):
if page:
self._none_ele_value = page._none_ele_value
self._none_ele_return_value = page._none_ele_return_value
else:
self._none_ele_value = None
self._none_ele_return_value = False
self.method = method
self.args = args
def __call__(self, *args, **kwargs):
if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args)
else:
return self
def __getattr__(self, item):
if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args)
elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before',
'after', 'get_frame', 'shadow_root', 'sr'):
return self
else:
if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html',
'attrs', 'text', 'raw_text'):
return self._none_ele_value
else:
raise ElementNotFoundError(None, self.method, self.args)
def __eq__(self, other):
if other is None:
return True
def __bool__(self):
return False
def __repr__(self):
return 'None'

View File

@ -1,18 +1,20 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from html import unescape
from re import match, DOTALL
from re import match, sub, DOTALL
from lxml.etree import tostring
from lxml.html import HtmlElement, fromstring
from .base import DrissionElement, BasePage, BaseElement
from .commons.constants import NoneElement
from .commons.locator import get_loc
from .commons.web import get_ele_txt, make_absolute_link
from .none_element import NoneElement
from .._base.base import DrissionElement, BasePage, BaseElement
from .._functions.locator import get_loc
from .._functions.web import get_ele_txt, make_absolute_link
class SessionElement(DrissionElement):
@ -43,6 +45,9 @@ class SessionElement(DrissionElement):
"""
return self.ele(loc_or_str)
def __eq__(self, other):
return self.xpath == getattr(other, 'xpath', None)
@property
def tag(self):
"""返回元素类型"""
@ -194,10 +199,10 @@ class SessionElement(DrissionElement):
return link
else: # 其它情况直接返回绝对url
return make_absolute_link(link, self.page)
return make_absolute_link(link, self.page.url)
elif attr == 'src':
return make_absolute_link(self.inner_ele.get('src'), self.page)
return make_absolute_link(self.inner_ele.get('src'), self.page.url)
elif attr == 'text':
return self.text
@ -220,7 +225,7 @@ class SessionElement(DrissionElement):
:param timeout: 不起实际作用用于和DriverElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
return self._ele(loc_or_str)
return self._ele(loc_or_str, method='ele()')
def eles(self, loc_or_str, timeout=None):
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
@ -235,7 +240,7 @@ class SessionElement(DrissionElement):
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本
"""
return self._ele(loc_or_str)
return self._ele(loc_or_str, method='s_ele()')
def s_eles(self, loc_or_str):
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
@ -266,14 +271,14 @@ class SessionElement(DrissionElement):
while ele:
if mode == 'css':
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
path_str = f'>:nth-child({brothers + 1}){path_str}'
path_str = f'>{ele.tag}:nth-child({brothers + 1}){path_str}'
else:
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
ele = ele.parent()
return f':root{path_str[1:]}' if mode == 'css' else path_str
return f'{path_str[1:]}' if mode == 'css' else path_str
def make_session_ele(html_or_ele, loc=None, single=True):
@ -334,23 +339,26 @@ def make_session_ele(html_or_ele, loc=None, single=True):
page = html_or_ele.page
xpath = html_or_ele.xpath
# ChromiumElement兼容传入的元素在iframe内的情况
html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele.ids.doc_id)['outerHTML'] \
if html_or_ele.ids.doc_id else html_or_ele.page.html
html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML'] \
if html_or_ele._doc_id else html_or_ele.page.html
html_or_ele = fromstring(html)
html_or_ele = html_or_ele.xpath(xpath)[0]
# 各种页面对象
elif isinstance(html_or_ele, BasePage):
page = html_or_ele
html_or_ele = fromstring(html_or_ele.html)
html = html_or_ele.html
if html.startswith('<?xml '):
html = sub(r'^<\?xml.*?>', '', html)
html_or_ele = fromstring(html)
# 直接传入html文本
elif isinstance(html_or_ele, str):
page = None
html_or_ele = fromstring(html_or_ele)
# ShadowRootElement, ChromiumShadowRoot, ChromiumFrame
elif isinstance(html_or_ele, BaseElement) or the_type.endswith(".ChromiumFrame'>"):
# ShadowRoot
elif isinstance(html_or_ele, BaseElement):
page = html_or_ele.page
html_or_ele = fromstring(html_or_ele.html)
@ -375,7 +383,7 @@ def make_session_ele(html_or_ele, loc=None, single=True):
elif isinstance(ele, str):
return ele
else:
return NoneElement()
return NoneElement(page)
else: # 返回全部
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n']

View File

@ -0,0 +1,145 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, List, Tuple, Optional
from lxml.html import HtmlElement
from .none_element import NoneElement
from .._base.base import DrissionElement, BaseElement
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.session_page import SessionPage
class SessionElement(DrissionElement):
def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None):
self._inner_ele: HtmlElement = ...
self.page: SessionPage = ...
@property
def inner_ele(self) -> HtmlElement: ...
def __repr__(self) -> str: ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[SessionElement, NoneElement]: ...
def __eq__(self, other: SessionElement) -> bool: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def text(self) -> str: ...
@property
def raw_text(self) -> str: ...
def parent(self,
level_or_loc: Union[tuple, str, int] = 1,
index: int = 1) -> Union[SessionElement, NoneElement]: ...
def child(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ...
def prev(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ...
def next(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ...
def before(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ...
def after(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str, NoneElement]: ...
def children(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[SessionElement, str]]: ...
def prevs(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[SessionElement, str]]: ...
def nexts(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[SessionElement, str]]: ...
def befores(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[SessionElement, str]]: ...
def afters(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[SessionElement, str]]: ...
def attr(self, attr: str) -> Optional[str]: ...
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[SessionElement, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[SessionElement]: ...
def s_ele(self,
loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ...
def s_eles(self,
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def _find_elements(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None,
single: bool = True,
relative: bool = False,
raise_err: bool = None) \
-> Union[SessionElement, NoneElement, List[SessionElement]]: ...
def _get_ele_path(self, mode: str) -> str: ...
def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame,
ChromiumBase],
loc: Union[str, Tuple[str, str]] = None,
single: bool = True) -> Union[
SessionElement, NoneElement, List[SessionElement]]: ...

View File

@ -0,0 +1,354 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from json import load, dump, JSONDecodeError
from os import popen
from pathlib import Path
from platform import system
from re import search
from subprocess import Popen, DEVNULL
from tempfile import gettempdir
from time import perf_counter, sleep
from requests import get as requests_get
from .tools import port_is_using
from .._configs.options_manage import OptionsManager
from ..errors import BrowserConnectError
def connect_browser(option):
"""连接或启动浏览器
:param option: ChromiumOptions对象
:return: 返回是否接管的浏览器
"""
address = option.address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
chrome_path = option.browser_path
ip, port = address.split(':')
if ip != '127.0.0.1' or port_is_using(ip, port) or option.is_existing_only:
test_connect(ip, port)
option._headless = False
for i in option.arguments:
if i.startswith('--headless') and not i.endswith('=false'):
option._headless = True
break
return True
# ----------创建浏览器进程----------
args = get_launch_args(option)
set_prefs(option)
set_flags(option)
try:
_run_browser(port, chrome_path, args)
# 传入的路径找不到主动在ini文件、注册表、系统变量中找
except FileNotFoundError:
chrome_path = get_chrome_path()
if not chrome_path:
raise FileNotFoundError('无法找到浏览器可执行文件路径,请手动配置。')
_run_browser(port, chrome_path, args)
test_connect(ip, port)
return False
def get_launch_args(opt):
"""从ChromiumOptions获取命令行启动参数
:param opt: ChromiumOptions
:return: 启动参数列表
"""
# ----------处理arguments-----------
result = set()
has_user_path = False
headless = None
for i in opt.arguments:
if i.startswith(('--load-extension=', '--remote-debugging-port=')):
continue
elif i.startswith('--user-data-dir') and not opt.system_user_path:
result.add(f'--user-data-dir={Path(i[16:]).absolute()}')
has_user_path = True
continue
elif i.startswith('--headless'):
if i == '--headless=false':
headless = False
continue
elif i == '--headless':
i = '--headless=new'
headless = True
else:
headless = True
result.add(i)
if not has_user_path and not opt.system_user_path:
port = opt.address.split(':')[-1] if opt.address else '0'
p = Path(opt.tmp_path) if opt.tmp_path else Path(gettempdir()) / 'DrissionPage'
path = p / f'userData_{port}'
path.mkdir(parents=True, exist_ok=True)
opt.set_user_data_path(path)
result.add(f'--user-data-dir={path}')
if headless is None and system().lower() == 'linux':
from os import popen
r = popen('systemctl list-units | grep graphical.target')
if 'graphical.target' not in r.read():
headless = True
result.add('--headless=new')
result = list(result)
opt._headless = headless
# ----------处理插件extensions-------------
ext = [str(Path(e).absolute()) for e in opt.extensions]
if ext:
ext = ','.join(set(ext))
ext = f'--load-extension={ext}'
result.append(ext)
return result
def set_prefs(opt):
"""处理启动配置中的prefs项目前只能对已存在文件夹配置
:param opt: ChromiumOptions
:return: None
"""
if not opt.user_data_path or (not opt.preferences and not opt._prefs_to_del):
return
prefs = opt.preferences
del_list = opt._prefs_to_del
user = 'Default'
for arg in opt.arguments:
if arg.startswith('--profile-directory'):
user = arg.split('=')[-1].strip()
break
prefs_file = Path(opt.user_data_path) / user / 'Preferences'
if not prefs_file.exists():
prefs_file.parent.mkdir(parents=True, exist_ok=True)
with open(prefs_file, 'w') as f:
f.write('{}')
with open(prefs_file, "r", encoding='utf-8') as f:
try:
prefs_dict = load(f)
except JSONDecodeError:
prefs_dict = {}
for pref in prefs:
value = prefs[pref]
pref = pref.split('.')
_make_leave_in_dict(prefs_dict, pref, 0, len(pref))
_set_value_to_dict(prefs_dict, pref, value)
for pref in del_list:
_remove_arg_from_dict(prefs_dict, pref)
with open(prefs_file, 'w', encoding='utf-8') as f:
dump(prefs_dict, f)
def set_flags(opt):
"""处理启动配置中的prefs项目前只能对已存在文件夹配置
:param opt: ChromiumOptions
:return: None
"""
if not opt.user_data_path or (not opt.clear_file_flags and not opt.flags):
return
state_file = Path(opt.user_data_path) / 'Local State'
if not state_file.exists():
state_file.parent.mkdir(parents=True, exist_ok=True)
with open(state_file, 'w') as f:
f.write('{}')
with open(state_file, "r", encoding='utf-8') as f:
try:
states_dict = load(f)
except JSONDecodeError:
states_dict = {}
flags_list = [] if opt.clear_file_flags else states_dict.setdefault(
'browser', {}).setdefault('enabled_labs_experiments', [])
flags_dict = {}
for i in flags_list:
f = str(i).split('@', 1)
flags_dict[f[0]] = None if len(f) == 1 else f[1]
for k, i in opt.flags.items():
flags_dict[k] = i
states_dict['browser']['enabled_labs_experiments'] = [f'{k}@{i}' if i else k for k, i in flags_dict.items()]
with open(state_file, 'w', encoding='utf-8') as f:
dump(states_dict, f)
def test_connect(ip, port, timeout=30):
"""测试浏览器是否可用
:param ip: 浏览器ip
:param port: 浏览器端口
:param timeout: 超时时间
:return: None
"""
end_time = perf_counter() + timeout
while perf_counter() < end_time:
try:
tabs = requests_get(f'http://{ip}:{port}/json', timeout=10, headers={'Connection': 'close'},
proxies={'http': None, 'https': None}).json()
for tab in tabs:
if tab['type'] in ('page', 'webview'):
return
except Exception:
sleep(.2)
raise BrowserConnectError(f'\n{ip}:{port}浏览器无法链接。\n请确认:\n1、该端口为浏览器\n'
f'2、已添加\'--remote-debugging-port={port}\'启动项\n'
f'3、用户文件夹没有和已打开的浏览器冲突\n'
f'4、如为无界面系统请添加\'--headless=new\'参数\n'
f'5、如果是Linux系统可能还要添加\'--no-sandbox\'启动参数\n'
f'可使用ChromiumOptions设置端口和用户文件夹路径。')
def _run_browser(port, path: str, args) -> Popen:
"""创建chrome进程
:param port: 端口号
:param path: 浏览器路径
:param args: 启动参数
:return: 进程对象
"""
p = Path(path)
p = str(p / 'chrome') if p.is_dir() else str(path)
arguments = [p, f'--remote-debugging-port={port}']
arguments.extend(args)
try:
return Popen(arguments, shell=False, stdout=DEVNULL, stderr=DEVNULL)
except FileNotFoundError:
raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。')
def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None:
"""把prefs中a.b.c形式的属性转为a['b']['c']形式
:param target_dict: 要处理的字典
:param src: 属性层级列表[a, b, c]
:param num: 当前处理第几个
:param end: src长度
:return: None
"""
if num == end:
return
if src[num] not in target_dict:
target_dict[src[num]] = {}
num += 1
_make_leave_in_dict(target_dict[src[num - 1]], src, num, end)
def _set_value_to_dict(target_dict: dict, src: list, value) -> None:
"""把a.b.c形式的属性的值赋值到a['b']['c']形式的字典中
:param target_dict: 要处理的字典
:param src: 属性层级列表[a, b, c]
:param value: 属性值
:return: None
"""
src = "']['".join(src)
src = f"target_dict['{src}']=value"
exec(src)
def _remove_arg_from_dict(target_dict: dict, arg: str) -> None:
"""把a.b.c形式的属性从字典中删除
:param target_dict: 要处理的字典
:param arg: 层级属性形式'a.b.c'
:return: None
"""
args = arg.split('.')
args = [f"['{i}']" for i in args]
src = ''.join(args)
src = f"target_dict{src}"
try:
exec(src)
src = ''.join(args[:-1])
src = f"target_dict{src}.pop({args[-1][1:-1]})"
exec(src)
except:
pass
def get_chrome_path():
"""从ini文件或系统变量中获取chrome可执行文件的路径"""
# -----------从ini文件中获取--------------
path = OptionsManager().chromium_options.get('browser_path', None)
if path and Path(path).is_file():
return str(path)
# -----------使用which获取-----------
from shutil import which
path = (which('chrome') or which('chromium') or which('google-chrome') or which('google-chrome-stable')
or which('google-chrome-unstable') or which('google-chrome-beta'))
if path:
return path
# -----------从MAC和Linux默认路径获取-----------
from platform import system
sys = system().lower()
if sys in ('macos', 'darwin'):
p = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
return p if Path(p).exists() else None
elif sys == 'linux':
paths = ('/usr/bin/google-chrome', '/opt/google/chrome/google-chrome',
'/user/lib/chromium-browser/chromium-browser')
for p in paths:
if Path(p).exists():
return p
return None
elif sys != 'windows':
return None
# -----------从注册表中获取--------------
import winreg
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe',
reserved=0, access=winreg.KEY_READ)
k = winreg.EnumValue(key, 0)
winreg.CloseKey(key)
return k[1]
except FileNotFoundError:
pass
# -----------从系统变量中获取--------------
try:
paths = popen('set path').read().lower()
except:
return None
r = search(r'[^;]*chrome[^;]*', paths)
if r:
path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe'
if path.exists():
return str(path)
paths = paths.split(';')
for path in paths:
path = Path(path) / 'chrome.exe'
try:
if path.exists():
return str(path)
except OSError:
pass

View File

@ -0,0 +1,28 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union
from .._configs.chromium_options import ChromiumOptions
def connect_browser(option: ChromiumOptions) -> bool: ...
def get_launch_args(opt: ChromiumOptions) -> list: ...
def set_prefs(opt: ChromiumOptions) -> None: ...
def set_flags(opt: ChromiumOptions) -> None: ...
def test_connect(ip: str, port: Union[int, str], timeout: float = 30) -> None: ...
def get_chrome_path() -> Union[str, None]: ...

View File

@ -1,4 +1,12 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
class By:
ID = 'id'
XPATH = 'xpath'

View File

@ -1,7 +1,15 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from click import command, option
from ..chromium_page import ChromiumPage
from ..easy_set import set_paths, configs_to_here as ch
from .._functions.tools import configs_to_here as ch
from .._configs.chromium_options import ChromiumOptions
from .._pages.chromium_page import ChromiumPage
@command()
@ -24,5 +32,22 @@ def main(set_browser_path, set_user_path, configs_to_here, launch_browser):
ChromiumPage(port)
def set_paths(browser_path=None, user_data_path=None):
"""快捷的路径设置函数
:param browser_path: 浏览器可执行文件路径
:param user_data_path: 用户数据路径
:return: None
"""
co = ChromiumOptions()
if browser_path is not None:
co.set_browser_path(browser_path)
if user_data_path is not None:
co.set_user_data_path(user_data_path)
co.save()
if __name__ == '__main__':
main()

View File

@ -1,9 +1,11 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import List, Tuple, Dict
from ..errors import AlertExistsError
class Keys:
@ -339,10 +341,10 @@ modifierBit = {'\ue00a': 1,
'\ue008': 8}
def keys_to_typing(value) -> Tuple[int, str]:
def keys_to_typing(value):
"""把要输入的内容连成字符串,去掉其中 ctrl 等键。
返回的modifier表示是否有按下组合键"""
typing: List[str] = []
typing = []
modifier = 0
for val in value:
if val in ('\ue009', '\ue008', '\ue00a', '\ue03d'):
@ -359,7 +361,7 @@ def keys_to_typing(value) -> Tuple[int, str]:
return modifier, ''.join(typing)
def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C901
def keyDescriptionForString(_modifiers, keyString): # noqa: C901
shift = _modifiers & 8
description = {'key': '',
'keyCode': 0,
@ -367,7 +369,7 @@ def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C
'text': '',
'location': 0}
definition: Dict = keyDefinitions.get(keyString) # type: ignore
definition = keyDefinitions.get(keyString) # type: ignore
if not definition:
raise ValueError(f'未知按键:{keyString}')
@ -399,3 +401,50 @@ def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: # noqa: C
description['text'] = ''
return description
def send_key(page, modifier, key):
"""发送一个字,在键盘中的字符触发按键,其它直接发送文本"""
if key not in keyDefinitions:
page.run_cdp('Input.insertText', text=key, _ignore=AlertExistsError)
else:
description = keyDescriptionForString(modifier, key)
text = description['text']
data = {'type': 'keyDown' if text else 'rawKeyDown',
'modifiers': modifier,
'windowsVirtualKeyCode': description['keyCode'],
'code': description['code'],
'key': description['key'],
'text': text,
'autoRepeat': False,
'unmodifiedText': text,
'location': description['location'],
'isKeypad': description['location'] == 3,
'_ignore': AlertExistsError}
page.run_cdp('Input.dispatchKeyEvent', **data)
data['type'] = 'keyUp'
page.run_cdp('Input.dispatchKeyEvent', **data)
def input_text_or_keys(page, text_or_keys):
"""输入文本也可输入组合键组合键用tuple形式输入
:param page: ChromiumBase对象
:param text_or_keys: 文本值或按键组合
:return: self
"""
if not isinstance(text_or_keys, (tuple, list)):
text_or_keys = (str(text_or_keys),)
modifier, text_or_keys = keys_to_typing(text_or_keys)
if modifier != 0: # 包含修饰符
for key in text_or_keys:
send_key(page, modifier, key)
return
if text_or_keys.endswith(('\n', '\ue007')):
page.run_cdp('Input.insertText', text=text_or_keys[:-1], _ignore=AlertExistsError)
send_key(page, modifier, '\n')
else:
page.run_cdp('Input.insertText', text=text_or_keys, _ignore=AlertExistsError)

View File

@ -0,0 +1,99 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Tuple, Dict, Union, Any
from .._pages.chromium_base import ChromiumBase
class Keys:
"""特殊按键"""
NULL: str
CANCEL: str
HELP: str
BACKSPACE: str
BACK_SPACE: str
TAB: str
CLEAR: str
RETURN: str
ENTER: str
SHIFT: str
LEFT_SHIFT: str
CONTROL: str
CTRL: str
LEFT_CONTROL: str
ALT: str
LEFT_ALT: str
PAUSE: str
ESCAPE: str
SPACE: str
PAGE_UP: str
PAGE_DOWN: str
END: str
HOME: str
LEFT: str
ARROW_LEFT: str
UP: str
ARROW_UP: str
RIGHT: str
ARROW_RIGHT: str
DOWN: str
ARROW_DOWN: str
INSERT: str
DELETE: str
DEL: str
SEMICOLON: str
EQUALS: str
NUMPAD0: str
NUMPAD1: str
NUMPAD2: str
NUMPAD3: str
NUMPAD4: str
NUMPAD5: str
NUMPAD6: str
NUMPAD7: str
NUMPAD8: str
NUMPAD9: str
MULTIPLY: str
ADD: str
SUBTRACT: str
DECIMAL: str
DIVIDE: str
F1: str
F2: str
F3: str
F4: str
F5: str
F6: str
F7: str
F8: str
F9: str
F10: str
F11: str
F12: str
META: str
COMMAND: str
keyDefinitions: dict = ...
modifierBit: dict = ...
def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]: ...
def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: ...
def send_key(page: ChromiumBase, modifier: int, key: str) -> None: ...
def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None: ...

View File

@ -1,23 +1,32 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from re import split
from .by import By
def get_loc(loc, translate_css=False):
"""接收selenium定位元组或本库定位语法转换为标准定位元组可翻译css selector为xpath
:param loc: selenium定位元组或本库定位语法
:param translate_css: 是否翻译css selector为xpath
def is_loc(text):
"""返回text是否定位符"""
return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=',
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def get_loc(loc, translate_css=False, css_mode=False):
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath用于相对定位
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组
"""
if isinstance(loc, tuple):
loc = translate_loc(loc)
loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
elif isinstance(loc, str):
loc = str_to_loc(loc)
loc = str_to_css_loc(loc) if css_mode else str_to_xpath_loc(loc)
else:
raise TypeError('loc参数只能是tuple或str。')
@ -34,7 +43,7 @@ def get_loc(loc, translate_css=False):
return loc
def str_to_loc(loc):
def str_to_xpath_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
@ -61,39 +70,30 @@ def str_to_loc(loc):
# ------------------------------------------------------------------
# 多属性查找
if loc.startswith('@@') and loc != '@@':
loc_str = _make_multi_xpath_str('*', loc)
elif loc.startswith('@|') and loc != '@|':
loc_str = _make_multi_xpath_str('*', loc, False)
if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'):
loc_str = _make_multi_xpath_str('*', loc)[1]
# 单属性查找
elif loc.startswith('@') and loc != '@':
loc_str = _make_single_xpath_str('*', loc)
loc_str = _make_single_xpath_str('*', loc)[1]
# 根据tag name查找
elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='):
at_ind = loc.find('@')
if at_ind == -1:
loc_str = f'//*[name()="{loc[4:]}"]'
elif loc[at_ind:].startswith(('@@', '@|', '@!')):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:])[1]
else:
if loc[at_ind:].startswith('@@'):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:])
elif loc[at_ind:].startswith('@|'):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:], False)
else:
loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:])
loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:])[1]
# 根据文本查找
elif loc.startswith('text='):
loc_str = f'//*[text()={_make_search_str(loc[5:])}]'
elif loc.startswith('text:') and loc != 'text:':
loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..'
elif loc.startswith('text^') and loc != 'text^':
loc_str = f'//*/text()[starts-with(., {_make_search_str(loc[5:])})]/..'
elif loc.startswith('text$') and loc != 'text$':
loc_str = f'//*/text()[substring(., string-length(.) - string-length({_make_search_str(loc[5:])}) +1) = ' \
f'{_make_search_str(loc[5:])}]/..'
@ -121,8 +121,72 @@ def str_to_loc(loc):
return loc_by, loc_str
def _make_single_xpath_str(tag: str, text: str) -> str:
"""生成xpath语句
def str_to_css_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
loc_by = 'css selector'
if loc.startswith('.'):
if loc.startswith(('.=', '.:', '.^', '.$')):
loc = loc.replace('.', '@class', 1)
else:
loc = loc.replace('.', '@class=', 1)
elif loc.startswith('#'):
if loc.startswith(('#=', '#:', '#^', '#$')):
loc = loc.replace('#', '@id', 1)
else:
loc = loc.replace('#', '@id=', 1)
elif loc.startswith(('t:', 't=')):
loc = f'tag:{loc[2:]}'
elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')):
loc = f'text{loc[2:]}'
# ------------------------------------------------------------------
# 多属性查找
if loc.startswith(('@@', '@|', '@!')) and loc not in ('@@', '@|', '@!'):
loc_str = _make_multi_css_str('*', loc)[1]
# 单属性查找
elif loc.startswith('@') and loc != '@':
loc_by, loc_str = _make_single_css_str('*', loc)
# 根据tag name查找
elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='):
at_ind = loc.find('@')
if at_ind == -1:
loc_str = loc[4:]
elif loc[at_ind:].startswith(('@@', '@|', '@!')):
loc_by, loc_str = _make_multi_css_str(loc[4:at_ind], loc[at_ind:])
else:
loc_by, loc_str = _make_single_css_str(loc[4:at_ind], loc[at_ind:])
# 根据文本查找
elif loc.startswith(('text=', 'text:', 'text^', 'text$', 'xpath=', 'xpath:', 'x:', 'x=')):
loc_by, loc_str = str_to_xpath_loc(loc)
# 用css selector查找
elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='):
loc_str = loc[4:]
elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='):
loc_str = loc[2:]
# 根据文本模糊查找
elif loc:
loc_by, loc_str = str_to_xpath_loc(loc)
else:
loc_str = '*'
return loc_by, loc_str
def _make_single_xpath_str(tag: str, text: str) -> tuple:
"""生成单属性xpath语句
:param tag: 标签名
:param text: 待处理的字符串
:return: xpath字符串
@ -137,16 +201,13 @@ def _make_single_xpath_str(tag: str, text: str) -> str:
r = split(r'([:=$^])', text, maxsplit=1)
len_r = len(r)
len_r0 = len(r[0])
if len_r != 3 and len_r0 > 1:
arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}'
elif len_r == 3 and len_r0 > 1:
if len_r == 3 and len_r0 > 1:
symbol = r[1]
if symbol == '=': # 精确查找
arg = '.' if r[0] in ('@text()', '@tx()') else r[0]
arg_str = f'{arg}={_make_search_str(r[2])}'
elif symbol == '^': # 开头开头
elif symbol == '^': # 匹配开头
if r[0] in ('@text()', '@tx()'):
txt_str = f'/text()[starts-with(., {_make_search_str(r[2])})]/..'
arg_str = ''
@ -172,24 +233,32 @@ def _make_single_xpath_str(tag: str, text: str) -> str:
else:
raise ValueError(f'符号不正确:{symbol}')
elif len_r != 3 and len_r0 > 1:
arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}'
if arg_str:
arg_list.append(arg_str)
arg_str = ' and '.join(arg_list)
return f'//*[{arg_str}]{txt_str}' if arg_str else f'//*{txt_str}'
return 'xpath', f'//*[{arg_str}]{txt_str}' if arg_str else f'//*{txt_str}'
def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str:
def _make_multi_xpath_str(tag: str, text: str) -> tuple:
"""生成多属性查找的xpath语句
:param tag: 标签名
:param text: 待处理的字符串
:param _and: 是否与方式
:return: xpath字符串
"""
arg_list = []
args = text.split('@@') if _and else text.split('@|')
args = split(r'(@!|@@|@\|)', text)[1:]
if '@@' in args and '@|' in args:
raise ValueError('@@和@|不能同时出现在一个定位语句中。')
elif '@@' in args:
_and = True
else: # @|
_and = False
for arg in args[1:]:
r = split(r'([:=$^])', arg, maxsplit=1)
for k in range(0, len(args) - 1, 2):
r = split(r'([:=$^])', args[k + 1], maxsplit=1)
arg_str = ''
len_r = len(r)
@ -197,8 +266,7 @@ def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str:
arg_str = 'not(@*)'
else:
r[0], ignore = (r[0][1:], True) if r[0][0] == '-' else (r[0], None) # 是否去除某个属性
ignore = True if args[k] == '@!' else False # 是否去除某个属性
if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性
arg_str = 'normalize-space(text())' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
@ -232,7 +300,7 @@ def _make_multi_xpath_str(tag: str, text: str, _and: bool = True) -> str:
condition = f' and ({arg_str})' if arg_str else ''
arg_str = f'name()="{tag}"{condition}'
return f'//*[{arg_str}]' if arg_str else f'//*'
return 'xpath', f'//*[{arg_str}]' if arg_str else f'//*'
def _make_search_str(search_str: str) -> str:
@ -252,6 +320,68 @@ def _make_search_str(search_str: str) -> str:
return search_str
def _make_multi_css_str(tag: str, text: str) -> tuple:
"""生成多属性查找的css selector语句
:param tag: 标签名
:param text: 待处理的字符串
:return: css selector字符串
"""
arg_list = []
args = split(r'(@!|@@|@\|)', text)[1:]
if '@@' in args and '@|' in args:
raise ValueError('@@和@|不能同时出现在一个定位语句中。')
elif '@@' in args:
_and = True
else: # @|
_and = False
for k in range(0, len(args) - 1, 2):
r = split(r'([:=$^])', args[k + 1], maxsplit=1)
if not r[0] or r[0].startswith(('text()', 'tx()')):
return _make_multi_xpath_str(tag, text)
arg_str = ''
len_r = len(r)
ignore = True if args[k] == '@!' else False # 是否去除某个属性
if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性
arg_str = f'[{r[0]}]'
elif len_r == 3: # 属性名和内容都有
d = {'=': '', '^': '^', '$': '$', ':': '*'}
arg_str = f'[{r[0]}{d[r[1]]}={css_trans(r[2])}]'
if arg_str and ignore:
arg_str = f':not({arg_str})'
if arg_str:
arg_list.append(arg_str)
if _and:
return 'css selector', f'{tag}{"".join(arg_list)}'
return 'css selector', f'{tag}{("," + tag).join(arg_list)}'
def _make_single_css_str(tag: str, text: str) -> tuple:
"""生成单属性css selector语句
:param tag: 标签名
:param text: 待处理的字符串
:return: css selector字符串
"""
if text == '@' or text.startswith(('@text()', '@tx()')):
return _make_single_xpath_str(tag, text)
r = split(r'([:=$^])', text, maxsplit=1)
if len(r) == 3:
d = {'=': '', '^': '^', '$': '$', ':': '*'}
arg_str = f'[{r[0][1:]}{d[r[1]]}={css_trans(r[2])}]'
else:
arg_str = f'[{css_trans(r[0][1:])}]'
return 'css selector', f'{tag}{arg_str}'
def translate_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
@ -276,7 +406,7 @@ def translate_loc(loc):
elif loc_0 == By.CLASS_NAME:
loc_str = f'//*[@class="{loc[1]}"]'
elif loc_0 == By.PARTIAL_LINK_TEXT:
elif loc_0 == By.LINK_TEXT:
loc_str = f'//a[text()="{loc[1]}"]'
elif loc_0 == By.NAME:
@ -292,3 +422,53 @@ def translate_loc(loc):
raise ValueError('无法识别的定位符。')
return loc_by, loc_str
def translate_css_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')
loc_by = By.CSS_SELECTOR
loc_0 = loc[0].lower()
if loc_0 == By.XPATH:
loc_by = By.XPATH
loc_str = loc[1]
elif loc_0 == By.CSS_SELECTOR:
loc_by = loc_0
loc_str = loc[1]
elif loc_0 == By.ID:
loc_str = f'#{css_trans(loc[1])}'
elif loc_0 == By.CLASS_NAME:
loc_str = f'.{css_trans(loc[1])}'
elif loc_0 == By.LINK_TEXT:
loc_by = By.XPATH
loc_str = f'//a[text()="{css_trans(loc[1])}"]'
elif loc_0 == By.NAME:
loc_str = f'*[@name={css_trans(loc[1])}]'
elif loc_0 == By.TAG_NAME:
loc_str = loc[1]
elif loc_0 == By.PARTIAL_LINK_TEXT:
loc_by = By.XPATH
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
return loc_by, loc_str
def css_trans(txt):
c = ('!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@',
'[', '\\', ']', '^', '`', ',', '{', '|', '}', '~', ' ')
return ''.join([fr'\{i}' if i in c else i for i in txt])

View File

@ -0,0 +1,26 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union
def is_loc(text: str) -> bool: ...
def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool = False) -> tuple: ...
def str_to_xpath_loc(loc: str) -> tuple: ...
def translate_loc(loc: tuple) -> tuple: ...
def translate_css_loc(loc: tuple) -> tuple: ...
def css_trans(txt: str) -> str: ...

View File

@ -0,0 +1,13 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
class Settings(object):
raise_when_ele_not_found = False
raise_when_click_failed = False
raise_when_wait_failed = False

View File

@ -0,0 +1,229 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from platform import system
from shutil import rmtree
from time import perf_counter, sleep
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
from .._configs.options_manage import OptionsManager
from ..errors import (ContextLostError, ElementLostError, CDPError, PageDisconnectedError, NoRectError,
AlertExistsError, WrongURLError, StorageError, CookieFormatError, JavaScriptError)
def port_is_using(ip, port):
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
from socket import socket, AF_INET, SOCK_STREAM
s = socket(AF_INET, SOCK_STREAM)
s.settimeout(.1)
result = s.connect_ex((ip, int(port)))
s.close()
return result == 0
def clean_folder(folder_path, ignore=None):
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
ignore = [] if not ignore else ignore
p = Path(folder_path)
for f in p.iterdir():
if f.name not in ignore:
if f.is_file():
f.unlink()
elif f.is_dir():
rmtree(f, True)
def show_or_hide_browser(page, hide=True):
"""执行显示或隐藏浏览器窗口
:param page: ChromePage对象
:param hide: 是否隐藏
:return: None
"""
if not page.address.startswith(('127.0.0.1', 'localhost')):
return
if system().lower() != 'windows':
raise OSError('该方法只能在Windows系统使用。')
try:
from win32gui import ShowWindow
from win32con import SW_HIDE, SW_SHOW
except ImportError:
raise ImportError('请先安装pip install pypiwin32')
pid = page.process_id
if not pid:
return None
hds = get_chrome_hwnds_from_pid(pid, page.title)
sw = SW_HIDE if hide else SW_SHOW
for hd in hds:
ShowWindow(hd, sw)
def get_browser_progress_id(progress, address):
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
if progress:
return progress.pid
from os import popen
port = address.split(':')[-1]
txt = ''
progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n')
for progress in progresses:
if 'LISTENING' in progress:
txt = progress
break
if not txt:
return None
return txt.split(' ')[-1]
def get_chrome_hwnds_from_pid(pid, title):
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
try:
from win32gui import IsWindow, GetWindowText, EnumWindows
from win32process import GetWindowThreadProcessId
except ImportError:
raise ImportError('请先安装win32guipip install pypiwin32')
def callback(hwnd, hds):
if IsWindow(hwnd) and title in GetWindowText(hwnd):
_, found_pid = GetWindowThreadProcessId(hwnd)
if str(found_pid) == str(pid):
hds.append(hwnd)
return True
hwnds = []
EnumWindows(callback, hwnds)
return hwnds
def wait_until(page, condition, timeout=10, poll=0.1, raise_err=True):
"""等待返回值不为False或空直到超时
:param page: DrissionPage对象
:param condition: 等待条件返回值不为False则停止等待
:param timeout: 超时时间
:param poll: 轮询间隔
:param raise_err: 是否抛出异常
:return: DP Element or bool
"""
end_time = perf_counter() + timeout
if isinstance(condition, str) or isinstance(condition, tuple):
if not callable(getattr(page, 's_ele', None)):
raise AttributeError('page对象缺少s_ele方法')
condition_method = lambda page: page.s_ele(condition)
elif callable(condition):
condition_method = condition
else:
raise ValueError('condition必须是函数或者字符串或者元组')
while perf_counter() < end_time:
try:
value = condition_method(page)
if value:
return value
except Exception:
pass
sleep(poll)
if perf_counter() > end_time:
break
if raise_err:
raise TimeoutError(f'等待超时(等待{timeout}秒)。')
else:
return False
def stop_process_on_port(port):
"""强制关闭某个端口内的进程
:param port: 端口号
:return: None
"""
for proc in process_iter(['pid', 'connections']):
try:
connections = proc.connections()
except (AccessDenied, NoSuchProcess):
continue
for conn in connections:
if conn.laddr.port == int(port):
try:
proc.terminate()
except (NoSuchProcess, AccessDenied, ZombieProcess):
pass
except Exception as e:
print(f"{proc.pid} {port}: {e}")
def configs_to_here(save_name=None):
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
om = OptionsManager('default')
save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini'
om.save(save_name)
def raise_error(result, ignore=None):
"""抛出error对应报错
:param result: 包含error的dict
:param ignore: 要忽略的错误
:return: None
"""
error = result['error']
if error in ('Cannot find context with specified id', 'Inspected target navigated or closed'):
r = ContextLostError()
elif error in ('Could not find node with given id', 'Could not find object with given id',
'No node with given id found', 'Node with given id does not belong to the document',
'No node found for given backend id'):
r = ElementLostError()
elif error in ('connection disconnected', 'No target with given id found'):
r = PageDisconnectedError()
elif error == 'alert exists.':
r = AlertExistsError()
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
r = NoRectError()
elif error == 'Cannot navigate to invalid URL':
r = WrongURLError(f'无效的url{result["args"]["url"]}。也许要加上"http://"')
elif error == 'Frame corresponds to an opaque origin and its storage key cannot be serialized':
r = StorageError()
elif error == 'Sanitizing cookie failed':
r = CookieFormatError(f'cookie格式不正确{result["args"]}')
elif error == 'Given expression does not evaluate to a function':
r = JavaScriptError(f'传入的js无法解析成函数\n{result["args"]["functionDeclaration"]}')
elif result['type'] in ('call_method_error', 'timeout'):
from DrissionPage import __version__
from time import process_time
txt = f'\n错误:{result["error"]}\nmethod{result["method"]}\nargs{result["args"]}\n' \
f'版本:{__version__}\n运行时间:{process_time()}\n出现这个错误可能意味着程序有bug请把错误信息和重现方法' \
'告知作者,谢谢。\n报告网站https://gitee.com/g1879/DrissionPage/issues'
r = TimeoutError(txt) if result['type'] == 'timeout' else CDPError(txt)
else:
r = RuntimeError(result)
if not ignore or not isinstance(r, ignore):
raise r

View File

@ -0,0 +1,40 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from os import popen
from pathlib import Path
from typing import Union
from types import FunctionType
from .._pages.chromium_page import ChromiumPage
def port_is_using(ip: str, port: Union[str, int]) -> bool: ...
def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ...
def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ...
def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ...
def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ...
def wait_until(page, condition: Union[FunctionType, str, tuple], timeout: float, poll: float, raise_err: bool): ...
def stop_process_on_port(port: Union[int, str]) -> None: ...
def configs_to_here(file_name: Union[Path, str] = None) -> None: ...
def raise_error(result: dict, ignore=None) -> None: ...

View File

@ -1,105 +1,20 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from base64 import b64decode
from datetime import datetime
from html import unescape
from http.cookiejar import Cookie
from json import loads, JSONDecodeError
from re import sub
from urllib.parse import urlparse, urljoin, urlunparse
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
from tldextract import extract
class ResponseData(object):
"""返回的数据包管理类"""
__slots__ = ('requestId', 'response', 'rawBody', 'tab', 'target', 'url', 'status', 'statusText', 'securityDetails',
'headersText', 'mimeType', 'requestHeadersText', 'connectionReused', 'connectionId', 'remoteIPAddress',
'remotePort', 'fromDiskCache', 'fromServiceWorker', 'fromPrefetchCache', 'encodedDataLength', 'timing',
'serviceWorkerResponseSource', 'responseTime', 'cacheStorageCacheName', 'protocol', 'securityState',
'_requestHeaders', '_body', '_base64_body', '_rawPostData', '_postData', 'method')
def __init__(self, request_id, response, body, tab, target):
"""
:param response: response的数据
:param body: response包含的内容
:param tab: 产生这个数据包的tab的id
:param target: 监听目标
"""
self.requestId = request_id
self.response = CaseInsensitiveDict(response)
self.rawBody = body
self.tab = tab
self.target = target
self._requestHeaders = None
self._postData = None
self._body = None
self._base64_body = False
self._rawPostData = None
def __getattr__(self, item):
return self.response.get(item, None)
def __getitem__(self, item):
return self.response.get(item, None)
def __repr__(self):
return f'<ResponseData target={self.target} request_id={self.requestId}>'
@property
def headers(self):
"""以大小写不敏感字典返回headers数据"""
headers = self.response.get('headers', None)
return CaseInsensitiveDict(headers) if headers else None
@property
def requestHeaders(self):
"""以大小写不敏感字典返回requestHeaders数据"""
if self._requestHeaders:
return self._requestHeaders
headers = self.response.get('requestHeaders', None)
return CaseInsensitiveDict(headers) if headers else None
@requestHeaders.setter
def requestHeaders(self, val):
"""设置requestHeaders"""
self._requestHeaders = val
@property
def postData(self):
"""返回postData数据"""
if self._postData is None and self._rawPostData:
try:
self._postData = loads(self._rawPostData)
except (JSONDecodeError, TypeError):
self._postData = self._rawPostData
return self._postData
@postData.setter
def postData(self, val):
"""设置postData"""
self._rawPostData = val
@property
def body(self):
"""返回body内容如果是json格式自动进行转换如果时图片格式进行base64转换其它格式直接返回文本"""
if self._body is None:
if self._base64_body:
self._body = b64decode(self.rawBody)
else:
try:
self._body = loads(self.rawBody)
except (JSONDecodeError, TypeError):
self._body = self.rawBody
return self._body
def get_ele_txt(e):
"""获取元素内所有文本
:param e: 元素对象
@ -142,7 +57,7 @@ def get_ele_txt(e):
if sub('[ \n\t\r]', '', el) != '': # 字符除了回车和空格还有其它内容
txt = el
if not pre:
txt = txt.replace('\n', ' ').strip(' ')
txt = txt.replace('\r\n', ' ').replace('\n', ' ').strip(' ')
txt = sub(r' {2,}', ' ', txt)
str_list.append(txt)
@ -190,8 +105,6 @@ def location_in_viewport(page, loc_x, loc_y):
if (x< scrollLeft || y < scrollTop || x > vWidth + scrollLeft || y > vHeight + scrollTop){{return false;}}
return true;}}'''
return page.run_js(js)
# const vWidth = window.innerWidth || document.documentElement.clientWidth;
# const vHeight = window.innerHeight || document.documentElement.clientHeight;
def offset_scroll(ele, offset_x, offset_y):
@ -202,39 +115,40 @@ def offset_scroll(ele, offset_x, offset_y):
:param offset_y: 偏移量y
:return: 视口中的坐标
"""
loc_x, loc_y = ele.location
cp_x, cp_y = ele.locations.click_point
loc_x, loc_y = ele.rect.location
cp_x, cp_y = ele.rect.click_point
lx = loc_x + offset_x if offset_x else cp_x
ly = loc_y + offset_y if offset_y else cp_y
if not location_in_viewport(ele.page, lx, ly):
clientWidth = ele.page.run_js('return document.body.clientWidth;')
clientHeight = ele.page.run_js('return document.body.clientHeight;')
ele.page.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2)
cl_x, cl_y = ele.locations.viewport_location
ccp_x, ccp_y = ele.locations.viewport_click_point
cl_x, cl_y = ele.rect.viewport_location
ccp_x, ccp_y = ele.rect.viewport_click_point
cx = cl_x + offset_x if offset_x else ccp_x
cy = cl_y + offset_y if offset_y else ccp_y
return cx, cy
def make_absolute_link(link, page=None):
def make_absolute_link(link, baseURI=None):
"""获取绝对url
:param link: 超链接
:param page: 页面对象
:param baseURI: 页面或iframe的url
:return: 绝对链接
"""
if not link:
return link
link = link.strip()
parsed = urlparse(link)._asdict()
# 是相对路径与页面url拼接并返回
if not parsed['netloc']:
return urljoin(page.url, link) if page else link
return urljoin(baseURI, link) if baseURI else link
# 是绝对路径但缺少协议从页面url获取协议并修复
if not parsed['scheme'] and page:
parsed['scheme'] = urlparse(page.url).scheme
if not parsed['scheme'] and baseURI:
parsed['scheme'] = urlparse(baseURI).scheme
parsed = tuple(v for v in parsed.values())
return urlunparse(parsed)
@ -254,20 +168,20 @@ def is_js_func(func):
def cookie_to_dict(cookie):
"""把Cookie对象转为dict格式
:param cookie: Cookie对象
:param cookie: Cookie对象字符串或字典
:return: cookie字典
"""
if isinstance(cookie, Cookie):
cookie_dict = cookie.__dict__.copy()
cookie_dict.pop('rfc2109')
cookie_dict.pop('_rest')
cookie_dict.pop('rfc2109', None)
cookie_dict.pop('_rest', None)
return cookie_dict
elif isinstance(cookie, dict):
cookie_dict = cookie
elif isinstance(cookie, str):
cookie = cookie.split(',' if ',' in cookie else ';')
cookie = cookie.rstrip(';,').split(',' if ',' in cookie else ';')
cookie_dict = {}
for key, attr in enumerate(cookie):
@ -296,7 +210,7 @@ def cookies_to_tuple(cookies):
cookies = tuple(cookie_to_dict(cookie) for cookie in cookies)
elif isinstance(cookies, str):
cookies = tuple(cookie_to_dict(cookie.lstrip()) for cookie in cookies.split(";"))
cookies = tuple(cookie_to_dict(c.lstrip()) for c in cookies.rstrip(';,').split(',' if ',' in cookies else ';'))
elif isinstance(cookies, dict):
cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies)
@ -334,31 +248,51 @@ def set_browser_cookies(page, cookies):
:param cookies: cookies信息
:return: None
"""
cookies = cookies_to_tuple(cookies)
for cookie in cookies:
for cookie in cookies_to_tuple(cookies):
if 'expiry' in cookie:
cookie['expires'] = int(cookie['expiry'])
cookie.pop('expiry')
if 'expires' in cookie:
cookie['expires'] = int(cookie['expires'])
if not cookie['expires']:
cookie.pop('expires')
elif isinstance(cookie['expires'], str):
if cookie['expires'].isdigit():
cookie['expires'] = int(cookie['expires'])
elif cookie['expires'].replace('.', '').isdigit():
cookie['expires'] = float(cookie['expires'])
else:
try:
cookie['expires'] = datetime.strptime(cookie['expires'],
'%a, %d %b %Y %H:%M:%S GMT').timestamp()
except ValueError:
cookie['expires'] = datetime.strptime(cookie['expires'],
'%a, %d %b %y %H:%M:%S GMT').timestamp()
if cookie['value'] is None:
cookie['value'] = ''
elif not isinstance(cookie['value'], str):
cookie['value'] = str(cookie['value'])
if cookie['name'].startswith('__Secure-'):
cookie['secure'] = True
if cookie['name'].startswith('__Host-'):
cookie['path'] = '/'
cookie['secure'] = True
cookie['url'] = page.url
page.run_cdp_loaded('Network.setCookie', **cookie)
continue # 不用设置域名,可退出
else:
if cookie.get('domain', None):
try:
page.run_cdp_loaded('Network.setCookie', **cookie)
if is_cookie_in_driver(page, cookie):
continue
except Exception:
pass
if cookie.get('domain', None):
try:
page.run_cdp_loaded('Network.setCookie', **cookie)
if is_cookie_in_driver(page, cookie):
continue
except Exception:
pass
ex_url = extract(page._browser_url)
d_list = ex_url.subdomain.split('.')
@ -384,7 +318,13 @@ def is_cookie_in_driver(page, cookie):
:param cookie: dict格式cookie
:return: bool
"""
for c in page.get_cookies():
if cookie['name'] == c['name'] and cookie['value'] == c['value']:
return True
if 'domain' in cookie:
for c in page.get_cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain',
None):
return True
else:
for c in page.get_cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value']:
return True
return False

View File

@ -0,0 +1,49 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from http.cookiejar import Cookie
from typing import Union
from requests import Session
from requests.cookies import RequestsCookieJar
from .._base.base import BasePage, DrissionElement
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
def get_ele_txt(e: DrissionElement) -> str: ...
def format_html(text: str) -> str: ...
def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool: ...
def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> tuple: ...
def make_absolute_link(link: str, baseURI: str = None) -> str: ...
def is_js_func(func: str) -> bool: ...
def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ...
def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ...
def set_session_cookies(session: Session, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ...

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,282 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union, Tuple, List, Any, Optional, Literal
from .chromium_tab import ChromiumTab
from .._base.base import BasePage
from .._base.browser import Browser
from .._base.driver import Driver
from .._elements.chromium_element import ChromiumElement
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._units.actions import Actions
from .._units.listener import Listener
from .._units.rect import TabRect
from .._units.screencast import Screencast
from .._units.scroller import Scroller, PageScroller
from .._units.setter import ChromiumBaseSetter
from .._units.states import PageStates
from .._units.waiter import BaseWaiter
PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True]
class ChromiumBase(BasePage):
def __init__(self,
address: Union[str, int],
tab_id: str = None,
timeout: float = None):
self._browser: Browser = ...
self._page: ChromiumPage = ...
self.address: str = ...
self._driver: Driver = ...
self._frame_id: str = ...
self._is_reading: bool = ...
self._is_timeout: bool = ...
self._timeouts: Timeout = ...
self._first_run: bool = ...
self._is_loading: bool = ...
self._load_mode: str = ...
self._scroll: Scroller = ...
self._url: str = ...
self._root_id: str = ...
self._upload_list: list = ...
self._wait: BaseWaiter = ...
self._set: ChromiumBaseSetter = ...
self._screencast: Screencast = ...
self._actions: Actions = ...
self._listener: Listener = ...
self._states: PageStates = ...
self._alert: Alert = ...
self._has_alert: bool = ...
self._doc_got: bool = ...
self._load_end_time: float = ...
self._init_jss: list = ...
self._ready_state: Optional[str] = ...
self._rect: TabRect = ...
def _connect_browser(self, tab_id: str = None) -> None: ...
def _driver_init(self, tab_id: str) -> None: ...
def _get_document(self, timeout: float = 10) -> bool: ...
def _wait_loaded(self, timeout: float = None) -> bool: ...
def _onFrameDetached(self, **kwargs) -> None: ...
def _onFrameAttached(self, **kwargs) -> None: ...
def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameNavigated(self, **kwargs): ...
def _onDomContentEventFired(self, **kwargs): ...
def _onLoadEventFired(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ...
def _onFileChooserOpened(self, **kwargs): ...
def _wait_to_stop(self): ...
def _d_set_start_options(self, address) -> None: ...
def _d_set_runtime_settings(self) -> None: ...
def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
@property
def _js_ready_state(self) -> str: ...
@property
def browser(self) -> Browser: ...
@property
def title(self) -> str: ...
@property
def driver(self) -> Driver: ...
@property
def url(self) -> str: ...
@property
def _browser_url(self) -> str: ...
@property
def html(self) -> str: ...
@property
def json(self) -> Union[dict, None]: ...
@property
def _target_id(self) -> str: ...
@property
def tab_id(self) -> str: ...
@property
def active_ele(self) -> ChromiumElement: ...
@property
def load_mode(self) -> str: ...
@property
def user_agent(self) -> str: ...
@property
def scroll(self) -> PageScroller: ...
@property
def rect(self) -> TabRect: ...
@property
def timeouts(self) -> Timeout: ...
@property
def upload_list(self) -> list: ...
@property
def wait(self) -> BaseWaiter: ...
@property
def set(self) -> ChromiumBaseSetter: ...
@property
def screencast(self) -> Screencast: ...
@property
def actions(self) -> Actions: ...
@property
def listen(self) -> Listener: ...
@property
def states(self) -> PageStates: ...
def run_js(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ...
def run_js_loaded(self, script: str, *args, as_expr: bool = False, timeout: float = None) -> Any: ...
def run_async_js(self, script: str, *args, as_expr: bool = False) -> None: ...
def get(self, url: str, show_errmsg: bool = False, retry: int = None,
interval: float = None, timeout: float = None) -> Union[None, bool]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[list, dict]: ...
def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
def eles(self, loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[ChromiumElement]: ...
def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \
-> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ...
def refresh(self, ignore_cache: bool = False) -> None: ...
def forward(self, steps: int = 1) -> None: ...
def back(self, steps: int = 1) -> None: ...
def _forward_or_back(self, steps: int) -> None: ...
def stop_loading(self) -> None: ...
def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None: ...
def get_frame(self, loc_ind_ele: Union[str, int, tuple, ChromiumFrame], timeout: float = None) -> ChromiumFrame: ...
def get_frames(self, loc: Union[str, tuple] = None, timeout: float = None) -> List[ChromiumFrame]: ...
def run_cdp(self, cmd: str, **cmd_args) -> dict: ...
def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict: ...
def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ...
def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ...
def add_init_js(self, script: str) -> str: ...
def remove_init_js(self, script_id: str = None) -> None: ...
def get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None,
as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ...
def _get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None,
as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[float, float] = None,
right_bottom: Tuple[float, float] = None, ele: ChromiumElement = None) -> Union[str, bytes]: ...
def clear_cache(self, session_storage: bool = True, local_storage: bool = True, cache: bool = True,
cookies: bool = True) -> None: ...
def disconnect(self) -> None: ...
def handle_alert(self, accept: bool = True, send: str = None, timeout: float = None,
next_one: bool = False) -> Union[str, False]: ...
def _handle_alert(self, accept: bool = True, send: str = None, timeout: float = None,
next_one: bool = False) -> Union[str, False]: ...
def _on_alert_close(self, **kwargs): ...
def _on_alert_open(self, **kwargs): ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]: ...
class Timeout(object):
def __init__(self, page: ChromiumBase, base=None, page_load=None, script=None):
self._page: ChromiumBase = ...
self.base: float = ...
self.page_load: float = ...
self.script: float = ...
class Alert(object):
def __init__(self):
self.activated: bool = ...
self.text: str = ...
self.type: str = ...
self.defaultPrompt: str = ...
self.response_accept: str = ...
self.response_text: str = ...
self.handle_next: Optional[bool] = ...
self.next_text: str = ...
self.auto: Optional[bool] = ...
def get_mhtml(page: Union[ChromiumPage, ChromiumTab],
path: Union[str, Path] = None,
name: str = None) -> str: ...
def get_pdf(page: Union[ChromiumPage, ChromiumTab],
path: Union[str, Path] = None,
name: str = None, kwargs: dict=None) -> bytes: ...

View File

@ -1,67 +1,96 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from re import search
from threading import Thread
from copy import copy
from re import search, findall, DOTALL
from time import sleep, perf_counter
from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter
from .chromium_element import ChromiumElement, ChromiumElementWaiter
from .commons.tools import get_usable_path
from .errors import ContextLossError
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._units.listener import FrameListener
from .._units.rect import FrameRect
from .._units.scroller import FrameScroller
from .._units.setter import ChromiumFrameSetter
from .._units.states import FrameStates
from .._units.waiter import FrameWaiter
from ..errors import ContextLostError, ElementLostError, PageDisconnectedError, JavaScriptError
class ChromiumFrame(ChromiumBase):
def __init__(self, page, ele):
self.page = page
def __init__(self, page, ele, info=None):
"""
:param page: frame所在的页面对象
:param ele: frame所在元素
:param info: frame所在元素信息
"""
page_type = str(type(page))
if 'ChromiumPage' in page_type or 'WebPage' in page_type:
self._page = self._target_page = self.tab = page
self._browser = page.browser
else: # Tab、Frame
self._page = page.page
self._browser = self._page.browser
self._target_page = page
self.tab = page.tab if 'ChromiumFrame' in page_type else page
self.address = page.address
node = page.run_cdp('DOM.describeNode', backendNodeId=ele.ids.backend_id)['node']
self.frame_id = node['frameId']
self._backend_id = ele.ids.backend_id
self._tab_id = page.tab_id
self._backend_id = ele._backend_id
self._frame_ele = ele
self._states = None
self._reloading = False
node = info['node'] if not info else page.run_cdp('DOM.describeNode', backendNodeId=ele._backend_id)['node']
self._frame_id = node['frameId']
if self._is_inner_frame():
self._is_diff_domain = False
self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId'])
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
super().__init__(page.address, page.tab_id, page.timeout)
else:
self._is_diff_domain = True
super().__init__(page.address, self.frame_id, page.timeout)
delattr(self, '_frame_id')
super().__init__(page.address, node['frameId'], page.timeout)
obj_id = super().run_js('document;', as_expr=True)['objectId']
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
self._ids = ChromiumFrameIds(self)
end_time = perf_counter() + 2
while perf_counter() < end_time and self.url == 'about:blank':
self._rect = None
end_time = perf_counter() + 5
while perf_counter() < end_time:
if self.url not in (None, 'about:blank'):
break
sleep(.1)
t = Thread(target=self._check_alive)
t.daemon = True
t.start()
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 超时时间
:param timeout: 超时时间
:return: ChromiumElement对象或属性文本
"""
return self.ele(loc_or_str, timeout)
def __eq__(self, other):
return self._frame_id == getattr(other, '_frame_id', None)
def __repr__(self):
attrs = self.frame_ele.attrs
attrs = self._frame_ele.attrs
attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs]
return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>'
def _runtime_settings(self):
def _d_set_runtime_settings(self):
"""重写设置浏览器运行参数方法"""
self._timeouts = self.page.timeouts
self._page_load_strategy = self.page.page_load_strategy
if not hasattr(self, '_timeouts'):
self._timeouts = copy(self._target_page.timeouts)
self.retry_times = self._target_page.retry_times
self.retry_interval = self._target_page.retry_interval
self._download_path = self._target_page.download_path
self._load_mode = self._target_page._load_mode if not self._is_diff_domain else 'normal'
def _driver_init(self, tab_id):
def _driver_init(self, tab_id, is_init=True):
"""避免出现服务器500错误
:param tab_id: 要跳转到的标签页id
:return: None
@ -69,112 +98,174 @@ class ChromiumFrame(ChromiumBase):
try:
super()._driver_init(tab_id)
except:
self._control_session.get(f'http://{self.address}/json')
self.browser.driver.get(f'http://{self.address}/json')
super()._driver_init(tab_id)
self._driver.set_callback('Inspector.detached', self._onInspectorDetached, immediate=True)
self._driver.set_callback('Page.frameDetached', None)
self._driver.set_callback('Page.frameDetached', self._onFrameDetached, immediate=True)
def _reload(self):
"""重新获取document"""
debug = self._debug
if debug:
print('reload')
self._is_loading = True
# d_debug = self.driver._debug
self._reloading = True
self._doc_got = False
self._frame_ele = ChromiumElement(self.page, backend_id=self._backend_id)
node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node']
self._driver.stop()
try:
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
end_time = perf_counter() + 2
while perf_counter() < end_time:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
if 'frameId' in node:
break
else:
return
except (ElementLostError, PageDisconnectedError):
return
if self._is_inner_frame():
self._is_diff_domain = False
self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId'])
super().__init__(self.address, self.page.tab_id, self.page.timeout)
self._debug = debug
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
self._frame_id = node['frameId']
if self._listener:
self._listener._to_target(self._target_page.tab_id, self.address, self)
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
# self.driver._debug = d_debug
else:
self._is_diff_domain = True
self._tab_obj.stop()
super().__init__(self.address, self.frame_id, self.page.timeout)
obj_id = super().run_js('document;', as_expr=True)['objectId']
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
self._debug = debug
def _check_ok(self):
"""用于应付同域异域之间跳转导致元素丢失问题"""
if self._tab_obj._stopped.is_set():
self._reload()
try:
self.page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id)
except Exception:
self._reload()
# sleep(2)
def _get_new_document(self):
"""刷新cdp使用的document数据"""
if not self._is_reading:
self._is_reading = True
if self._debug:
print('---获取document')
end_time = perf_counter() + 3
while self.is_alive and perf_counter() < end_time:
try:
if self._is_diff_domain is False:
node = self.page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId'])
else:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
break
except Exception:
sleep(.1)
if self._listener:
self._listener._to_target(node['frameId'], self.address, self)
end_time = perf_counter() + self.timeouts.page_load
super().__init__(self.address, node['frameId'], self._target_page.timeout)
timeout = end_time - perf_counter()
if timeout <= 0:
timeout = .5
self._wait_loaded(timeout)
# while perf_counter() < end_time:
# try:
# obj_id = super().run_js('document;', as_expr=True)['objectId']
# self.doc_ele = ChromiumElement(self, obj_id=obj_id)
# break
# except Exception as e:
# sleep(.1)
# if self._debug:
# print(f'获取doc失败重试 {e}')
# else:
# raise RuntimeError('获取document失败。')
# raise GetDocumentError
if self._debug:
print('---获取document结束')
# self.driver._debug = d_debug
self._is_loading = False
self._is_loading = False
self._reloading = False
def _get_document(self, timeout=10):
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._is_reading:
return
self._is_reading = True
try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
else:
timeout = timeout if timeout >= .5 else .5
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
self._root_id = self.doc_ele._obj_id
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
return True
except:
return False
finally:
if not self._reloading: # 阻止reload时标识
self._is_loading = False
self._is_reading = False
def _onFrameNavigated(self, **kwargs):
"""页面跳转时触发"""
if kwargs['frame']['id'] == self.frame_id and self._first_run is False and self._is_loading:
self._is_loading = True
def _onInspectorDetached(self, **kwargs):
"""异域转同域或退出"""
self._reload()
if self._debug:
print('navigated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
def _onFrameDetached(self, **kwargs):
"""同域变异域"""
self.browser._frames.pop(kwargs['frameId'], None)
if kwargs['frameId'] == self._frame_id:
self._reload()
def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
# 用于覆盖父类方法,不能删
self._get_new_document()
if self._debug:
print('loadEventFired')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时触发"""
if kwargs['frameId'] == self.frame_id:
self._is_loading = True
if self._debug:
print('页面开始加载 FrameStartedLoading')
def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后触发"""
if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading:
if self._debug:
print('页面停止加载 FrameStoppedLoading')
self._get_new_document()
# ----------挂件----------
@property
def ids(self):
return self._ids
def scroll(self):
"""返回用于滚动的对象"""
self.wait.load_complete()
if self._scroll is None:
self._scroll = FrameScroller(self)
return self._scroll
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = ChromiumFrameSetter(self)
return self._set
@property
def states(self):
"""返回用于获取状态信息的对象"""
if self._states is None:
self._states = FrameStates(self)
return self._states
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = FrameWaiter(self)
return self._wait
@property
def rect(self):
"""返回获取坐标和大小的对象"""
if self._rect is None:
self._rect = FrameRect(self)
return self._rect
@property
def listen(self):
"""返回用于聆听数据包的对象"""
if self._listener is None:
self._listener = FrameListener(self)
return self._listener
# ----------挂件----------
@property
def _obj_id(self):
"""返回frame元素的object id"""
return self.frame_ele._obj_id
@property
def _node_id(self):
"""返回cdp中的node id"""
return self.frame_ele._node_id
@property
def page(self):
return self._page
@property
def frame_ele(self):
@ -184,150 +275,88 @@ class ChromiumFrame(ChromiumBase):
@property
def tag(self):
"""返回元素tag"""
self._check_ok()
return self.frame_ele.tag
@property
def url(self):
"""返回frame当前访问的url"""
self._check_ok()
return self.doc_ele.run_js('return this.location.href;')
try:
return self.doc_ele.run_js('return this.location.href;')
except JavaScriptError:
return None
@property
def html(self):
"""返回元素outerHTML文本"""
self._check_ok()
tag = self.tag
out_html = self.page.run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele.ids.backend_id)['outerHTML']
sign = search(rf'<{tag}.*?>', out_html).group(0)
out_html = self._target_page.run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML']
sign = search(rf'<{tag}.*?>', out_html, DOTALL).group(0)
return f'{sign}{self.inner_html}</{tag}>'
@property
def inner_html(self):
"""返回元素innerHTML文本"""
self._check_ok()
return self.doc_ele.run_js('return this.documentElement.outerHTML;')
@property
def title(self):
"""返回页面title"""
self._check_ok()
r = self._ele('t:title', raise_err=False)
return r.text if r else None
@property
def cookies(self):
"""以dict格式返回cookies"""
self._check_ok()
return super().cookies if self._is_diff_domain else self.doc_ele.run_js('return this.cookie;')
@property
def attrs(self):
"""返回frame元素所有attribute属性"""
self._check_ok()
return self.frame_ele.attrs
@property
def frame_size(self):
"""返回frame内页面尺寸格式(长, 高)"""
self._check_ok()
w = self.doc_ele.run_js('return this.body.scrollWidth')
h = self.doc_ele.run_js('return this.body.scrollHeight')
return w, h
@property
def size(self):
"""返回frame元素大小"""
self._check_ok()
return self.frame_ele.size
@property
def active_ele(self):
"""返回当前焦点所在元素"""
self._check_ok()
return self.doc_ele.run_js('return this.activeElement;')
@property
def location(self):
"""返回frame元素左上角的绝对坐标"""
self._check_ok()
return self.frame_ele.location
@property
def locations(self):
"""返回用于获取元素位置的对象"""
return self.frame_ele.locations
@property
def xpath(self):
"""返回frame的xpath绝对路径"""
self._check_ok()
return self.frame_ele.xpath
@property
def css_path(self):
"""返回frame的css selector绝对路径"""
self._check_ok()
return self.frame_ele.css_path
@property
def ready_state(self):
def tab_id(self):
"""返回frame所在tab的id"""
return self._tab_id
@property
def download_path(self):
return self._download_path
@property
def _js_ready_state(self):
"""返回当前页面加载状态,'loading' 'interactive' 'complete'"""
if self._is_diff_domain:
try:
return super().ready_state
except:
return 'complete'
return super()._js_ready_state
else:
end_time = perf_counter() + 3
while self.is_alive and perf_counter() < end_time:
try:
return self.doc_ele.run_js('return this.readyState;')
except ContextLostError:
try:
return self.doc_ele.run_js('return this.readyState;')
except ContextLossError:
try:
node = self.run_cdp('DOM.describeNode', backendNodeId=self.frame_ele.ids.backend_id)['node']
doc = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId'])
return doc.run_js('return this.readyState;')
except:
pass
sleep(.1)
# raise RuntimeError('获取document失败。')
@property
def is_alive(self):
"""返回是否仍可用"""
return self.states.is_alive
@property
def scroll(self):
"""返回用于等待的对象"""
return ChromiumFrameScroll(self)
@property
def set(self):
"""返回用于等待的对象"""
if self._set is None:
self._set = ChromiumFrameSetter(self)
return self._set
@property
def states(self):
"""返回用于获取状态信息的对象"""
return self.frame_ele.states
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = FrameWaiter(self)
return self._wait
node = self.run_cdp('DOM.describeNode', backendNodeId=self.frame_ele._backend_id)['node']
doc = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
return doc.run_js('return this.readyState;')
except:
return None
def refresh(self):
"""刷新frame页面"""
self._check_ok()
self.doc_ele.run_js('this.location.reload();')
def attr(self, attr):
@ -335,7 +364,6 @@ class ChromiumFrame(ChromiumBase):
:param attr: 属性名
:return: 属性值文本没有该属性返回None
"""
self._check_ok()
return self.frame_ele.attr(attr)
def remove_attr(self, attr):
@ -343,50 +371,47 @@ class ChromiumFrame(ChromiumBase):
:param attr: 属性名
:return: None
"""
self._check_ok()
self.frame_ele.remove_attr(attr)
def run_js(self, script, *args, as_expr=False):
def run_js(self, script, *args, as_expr=False, timeout=None):
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]...
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
self._check_ok()
if script.startswith('this.scrollIntoView'):
return self.frame_ele.run_js(script, *args, as_expr=as_expr)
return self.frame_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout)
else:
return self.doc_ele.run_js(script, *args, as_expr=as_expr)
return self.doc_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout)
def parent(self, level_or_loc=1):
def parent(self, level_or_loc=1, index=1):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果
:return: 上级元素对象
"""
self._check_ok()
return self.frame_ele.parent(level_or_loc)
return self.frame_ele.parent(level_or_loc, index)
def prev(self, filter_loc='', index=1, timeout=0, ele_only=True):
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param filter_loc: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
self._check_ok()
return self.frame_ele.prev(filter_loc, index, timeout, ele_only=ele_only)
def next(self, filter_loc='', index=1, timeout=0, ele_only=True):
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param filter_loc: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
self._check_ok()
return self.frame_ele.next(filter_loc, index, timeout, ele_only=ele_only)
def before(self, filter_loc='', index=1, timeout=None, ele_only=True):
@ -394,11 +419,10 @@ class ChromiumFrame(ChromiumBase):
查找范围不限同级元素而是整个DOM文档
:param filter_loc: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
self._check_ok()
return self.frame_ele.before(filter_loc, index, timeout, ele_only=ele_only)
def after(self, filter_loc='', index=1, timeout=None, ele_only=True):
@ -406,31 +430,28 @@ class ChromiumFrame(ChromiumBase):
查找范围不限同级元素而是整个DOM文档
:param filter_loc: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
self._check_ok()
return self.frame_ele.after(filter_loc, index, timeout, ele_only=ele_only)
def prevs(self, filter_loc='', timeout=0, ele_only=True):
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
self._check_ok()
return self.frame_ele.prevs(filter_loc, timeout, ele_only=ele_only)
def nexts(self, filter_loc='', timeout=0, ele_only=True):
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param filter_loc: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
self._check_ok()
return self.frame_ele.nexts(filter_loc, timeout, ele_only=ele_only)
def befores(self, filter_loc='', timeout=None, ele_only=True):
@ -441,7 +462,6 @@ class ChromiumFrame(ChromiumBase):
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
self._check_ok()
return self.frame_ele.befores(filter_loc, timeout, ele_only=ele_only)
def afters(self, filter_loc='', timeout=None, ele_only=True):
@ -452,22 +472,23 @@ class ChromiumFrame(ChromiumBase):
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
self._check_ok()
return self.frame_ele.afters(filter_loc, timeout, ele_only=ele_only)
def get_screenshot(self, path=None, as_bytes=None, as_base64=None):
def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None):
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 完整路径后缀可选 'jpg','jpeg','png','webp'
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:return: 图片完整路径或字节文本
"""
return self.frame_ele.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64)
return self.frame_ele.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64)
def _get_screenshot(self, path=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
def _get_screenshot(self, path=None, name=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page=False, left_top=None, right_bottom=None, ele=None):
"""实现对元素截图
:param path: 完整路径后缀可选 'jpg','jpeg','png','webp'
"""实现截图
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
@ -477,7 +498,7 @@ class ChromiumFrame(ChromiumBase):
:return: 图片完整路径或字节文本
"""
if not self._is_diff_domain:
return super().get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64,
return super().get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64,
full_page=full_page, left_top=left_top, right_bottom=right_bottom)
if as_bytes:
@ -497,20 +518,25 @@ class ChromiumFrame(ChromiumBase):
pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64
else:
if not path:
path = f'{self.title}.jpg'
path = get_usable_path(path)
pic_type = path.suffix.lower()
if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'):
raise TypeError(f'不支持的文件格式:{pic_type}')
pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:]
path = str(path).rstrip('\\/') if path else '.'
if path and path.endswith(('.jpg', '.jpeg', '.png', '.webp')):
pic_type = path.rsplit('.', 1)[-1]
elif name and name.endswith(('.jpg', '.jpeg', '.png', '.webp')):
pic_type = name.rsplit('.', 1)[-1]
else:
pic_type = 'jpeg'
if pic_type == 'jpg':
pic_type = 'jpeg'
self.frame_ele.scroll.to_see(center=True)
self.scroll.to_see(ele, center=True)
cx, cy = ele.locations.viewport_location
w, h = ele.size
cx, cy = ele.rect.viewport_location
w, h = ele.rect.size
img_data = f'data:image/{pic_type};base64,{self.frame_ele.get_screenshot(as_base64=True)}'
body = self.page('t:body')
body = self.tab('t:body')
first_child = body('c::first-child')
if not isinstance(first_child, ChromiumElement):
first_child = first_child.frame_ele
@ -526,13 +552,13 @@ class ChromiumFrame(ChromiumBase):
top = int(self.frame_ele.style('border-top').split('px')[0])
left = int(self.frame_ele.style('border-left').split('px')[0])
r = self.page.run_cdp('Page.getLayoutMetrics')['visualViewport']
r = self.tab.run_cdp('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
r = self.page.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64,
left_top=(cx + left + sx, cy + top + sy),
right_bottom=(cx + w + left + sx, cy + h + top + sy))
self.page.remove_ele(new_ele)
r = self.tab.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64,
left_top=(cx + left + sx, cy + top + sy),
right_bottom=(cx + w + left + sx, cy + h + top + sy))
self.tab.remove_ele(new_ele)
return r
def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None):
@ -544,130 +570,39 @@ class ChromiumFrame(ChromiumBase):
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象
"""
self._check_ok()
if isinstance(loc_or_ele, ChromiumElement):
return loc_or_ele
self.wait.load_complete()
return self.doc_ele._ele(loc_or_ele, timeout, raise_err=raise_err) \
if single else self.doc_ele.eles(loc_or_ele, timeout)
def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None):
"""尝试连接,重试若干次
:param to_url: 要访问的url
:param times: 重试次数
:param interval: 重试间隔
:param show_errmsg: 是否抛出异常
:param timeout: 连接超时时间
:return: 是否成功返回None表示不确定
"""
self._check_ok()
err = None
timeout = timeout if timeout is not None else self.timeouts.page_load
for t in range(times + 1):
err = None
result = self.driver.Page.navigate(url=to_url, frameId=self.frame_id)
is_timeout = not self._wait_loaded(timeout)
sleep(.5)
self.wait.load_complete()
if is_timeout:
err = TimeoutError('页面连接超时。')
if 'errorText' in result:
err = ConnectionError(result['errorText'])
if not err:
break
if t < times:
sleep(interval)
while self.ready_state not in ('complete', None):
sleep(.1)
if self._debug:
print('重试')
if show_errmsg:
print(f'重试 {to_url}')
if err:
if show_errmsg:
raise err if err is not None else ConnectionError('连接异常。')
return False
return True
return self.doc_ele._ele(loc_or_ele, timeout,
raise_err=raise_err) if single else self.doc_ele.eles(loc_or_ele, timeout)
def _is_inner_frame(self):
"""返回当前frame是否同域"""
return self.frame_id in str(self.page.run_cdp('Page.getFrameTree')['frameTree'])
return self._frame_id in str(self._target_page.run_cdp('Page.getFrameTree')['frameTree'])
def _check_alive(self):
"""检测iframe是否有效线程方法"""
while self.is_alive:
sleep(1)
self.driver.stop()
class ChromiumFrameIds(object):
def __init__(self, frame):
self._frame = frame
# ----------------即将废弃-----------------
@property
def tab_id(self):
"""返回当前标签页id"""
return self._frame.page.tab_id
def is_alive(self):
"""返回是否仍可用"""
return self.states.is_alive
@property
def backend_id(self):
"""返回cdp中的node id"""
return self._frame._backend_id
def page_size(self):
"""返回frame内页面尺寸格式(宽,, 高)"""
return self.rect.size
@property
def obj_id(self):
"""返回frame元素的object id"""
return self._frame.frame_ele.ids.obj_id
def size(self):
"""返回frame元素大小"""
return self.frame_ele.rect.size
@property
def node_id(self):
"""返回cdp中的node id"""
return self._frame.frame_ele.ids.node_id
def location(self):
"""返回frame元素左上角的绝对坐标"""
return self.frame_ele.rect.location
class ChromiumFrameScroll(ChromiumPageScroll):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
self._driver = frame.doc_ele
self.t1 = self.t2 = 'this.documentElement'
self._wait_complete = False
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele)
self._to_see(ele, center)
class ChromiumFrameSetter(ChromiumBaseSetter):
def attr(self, attr, value):
"""设置frame元素attribute属性
:param attr: 属性名
:param value: 属性值
:return: None
"""
self._page._check_ok()
self._page.frame_ele.set.attr(attr, value)
class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
super().__init__(frame)
super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele)
@property
def locations(self):
"""返回用于获取元素位置的对象"""
return self.frame_ele.rect

View File

@ -0,0 +1,217 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union, Tuple, List, Any
from .chromium_base import ChromiumBase
from .chromium_page import ChromiumPage
from .chromium_tab import ChromiumTab
from .web_page import WebPage
from .._elements.chromium_element import ChromiumElement
from .._elements.none_element import NoneElement
from .._units.listener import FrameListener
from .._units.rect import FrameRect
from .._units.scroller import FrameScroller
from .._units.setter import ChromiumFrameSetter
from .._units.states import FrameStates
from .._units.waiter import FrameWaiter
class ChromiumFrame(ChromiumBase):
def __init__(self,
page: Union[ChromiumPage, WebPage, ChromiumTab, ChromiumFrame],
ele: ChromiumElement,
info: dict = None):
self._page: ChromiumPage = ...
self._target_page: ChromiumBase = ...
self.tab: ChromiumTab = ...
self._tab_id: str = ...
self._frame_ele: ChromiumElement = ...
self._backend_id: int = ...
self._doc_ele: ChromiumElement = ...
self._is_diff_domain: bool = ...
self.doc_ele: ChromiumElement = ...
self._states: FrameStates = ...
self._reloading: bool = ...
self._rect: FrameRect = ...
self._listener: FrameListener = ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, NoneElement]: ...
def __eq__(self, other: ChromiumFrame) -> bool: ...
def _check_alive(self) -> None: ...
def __repr__(self) -> str: ...
def _d_set_runtime_settings(self) -> None: ...
def _driver_init(self, tab_id: str) -> None: ...
def _reload(self) -> None: ...
def _get_document(self, timeout: float = 10) -> bool: ...
def _onFrameStoppedLoading(self, **kwargs): ...
def _onInspectorDetached(self, **kwargs): ...
@property
def page(self) -> Union[ChromiumPage, WebPage]: ...
@property
def frame_ele(self) -> ChromiumElement: ...
@property
def tag(self) -> str: ...
@property
def url(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def title(self) -> str: ...
@property
def cookies(self) -> dict: ...
@property
def attrs(self) -> dict: ...
@property
def rect(self) -> FrameRect: ...
@property
def listen(self) -> FrameListener: ...
@property
def _obj_id(self) -> str: ...
@property
def _node_id(self) -> int: ...
@property
def active_ele(self) -> ChromiumElement: ...
@property
def xpath(self) -> str: ...
@property
def css_path(self) -> str: ...
@property
def scroll(self) -> FrameScroller: ...
@property
def set(self) -> ChromiumFrameSetter: ...
@property
def states(self) -> FrameStates: ...
@property
def wait(self) -> FrameWaiter: ...
@property
def tab_id(self) -> str: ...
@property
def download_path(self) -> str: ...
def refresh(self) -> None: ...
def attr(self, attr: str) -> Union[str, None]: ...
def remove_attr(self, attr: str) -> None: ...
def run_js(self,
script: str,
*args,
as_expr: bool = False,
timeout: float = None) -> Any: ...
def parent(self,
level_or_loc: Union[tuple, str, int] = 1,
index: int = 1) -> Union[ChromiumElement, NoneElement]: ...
def prev(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ...
def next(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ...
def before(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ...
def after(self,
filter_loc: Union[tuple, str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, NoneElement, str]: ...
def prevs(self,
filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def nexts(self,
filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def befores(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def afters(self,
filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def get_screenshot(self,
path: [str, Path] = None,
name: str = None,
as_bytes: [bool, str] = None,
as_base64: [bool, str] = None) -> Union[str, bytes]: ...
def _get_screenshot(self,
path: [str, Path] = None,
name: str = None,
as_bytes: [bool, str] = None,
as_base64: [bool, str] = None,
full_page: bool = False,
left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None,
ele: ChromiumElement = None) -> Union[str, bytes]: ...
def _find_elements(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None,
single: bool = True,
relative: bool = False,
raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ...
def _is_inner_frame(self) -> bool: ...

View File

@ -0,0 +1,283 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from time import sleep, perf_counter
from requests import get
from .._base.browser import Browser
from .._functions.browser import connect_browser
from .._configs.chromium_options import ChromiumOptions, PortFinder
from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf, Timeout
from .._pages.chromium_tab import ChromiumTab
from .._units.setter import ChromiumPageSetter
from .._units.waiter import PageWaiter
from ..errors import BrowserConnectError
class ChromiumPage(ChromiumBase):
"""用于管理浏览器的类"""
def __init__(self, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
addr_or_opts = addr_or_opts or addr_driver_opts
self._page = self
address = self._handle_options(addr_or_opts)
self._run_browser()
super().__init__(address, tab_id)
self.set.timeouts(base=timeout)
self._page_init()
def _handle_options(self, addr_or_opts):
"""设置浏览器启动属性
:param addr_or_opts: 'ip:port'ChromiumOptionsDriver
:return: 返回浏览器地址
"""
if not addr_or_opts:
self._chromium_options = ChromiumOptions(addr_or_opts)
elif isinstance(addr_or_opts, ChromiumOptions):
if addr_or_opts.is_auto_port:
port, path = PortFinder(addr_or_opts.tmp_path).get_port()
addr_or_opts.set_address(f'127.0.0.1:{port}')
addr_or_opts.set_user_data_path(path)
addr_or_opts.auto_port()
self._chromium_options = addr_or_opts
elif isinstance(addr_or_opts, str):
self._chromium_options = ChromiumOptions()
self._chromium_options.set_address(addr_or_opts)
elif isinstance(addr_or_opts, int):
self._chromium_options = ChromiumOptions()
self._chromium_options.set_local_port(addr_or_opts)
else:
raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。')
return self._chromium_options.address
def _run_browser(self):
"""连接浏览器"""
is_exist = connect_browser(self._chromium_options)
try:
ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'})
if not ws:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
except KeyError:
raise BrowserConnectError('浏览器版本太旧,请升级。')
except:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
self._browser = Browser(self._chromium_options.address, ws, self)
if (is_exist and self._chromium_options._headless is False and
'headless' in self._browser.run_cdp('Browser.getVersion')['userAgent'].lower()):
self._browser.quit(3)
connect_browser(self._chromium_options)
ws = get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'})
ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
self._browser = Browser(self._chromium_options.address, ws, self)
def _d_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeouts = Timeout(self, page_load=self._chromium_options.timeouts['pageLoad'],
script=self._chromium_options.timeouts['script'],
base=self._chromium_options.timeouts['base'])
if self._chromium_options.timeouts['base'] is not None:
self._timeout = self._chromium_options.timeouts['base']
self._load_mode = self._chromium_options.load_mode
self._download_path = None if self._chromium_options.download_path is None \
else str(Path(self._chromium_options.download_path).absolute())
self.retry_times = self._chromium_options.retry_times
self.retry_interval = self._chromium_options.retry_interval
def _page_init(self):
"""浏览器相关设置"""
self._browser.connect_to_page()
# ----------挂件----------
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = ChromiumPageSetter(self)
return self._set
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = PageWaiter(self)
return self._wait
# ----------挂件----------
@property
def browser(self):
"""返回用于控制浏览器cdp的driver"""
return self._browser
@property
def tabs_count(self):
"""返回标签页数量"""
return self.browser.tabs_count
@property
def tabs(self):
"""返回所有标签页id组成的列表"""
return self.browser.tabs
@property
def latest_tab(self):
"""返回最新的标签页id最新标签页指最后创建或最后被激活的"""
return self.tabs[0]
@property
def process_id(self):
"""返回浏览器进程id"""
return self.browser.process_id
def save(self, path=None, name=None, as_pdf=False, **kwargs):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
return get_pdf(self, path, name, kwargs)if as_pdf else get_mhtml(self, path, name)
def get_tab(self, id_or_num=None):
"""获取一个标签页对象
:param id_or_num: 要获取的标签页id或序号为None时获取当前tab序号不是视觉排列顺序而是激活顺序
:return: 标签页对象
"""
if isinstance(id_or_num, str):
return ChromiumTab(self, id_or_num)
elif isinstance(id_or_num, int):
return ChromiumTab(self, self.tabs[id_or_num])
elif id_or_num is None:
return ChromiumTab(self, self.tab_id)
elif isinstance(id_or_num, ChromiumTab):
return id_or_num
else:
raise TypeError(f'id_or_num需传入tab id或序号{id_or_num}')
def find_tabs(self, title=None, url=None, tab_type=None, single=True):
"""查找符合条件的tab返回它们的id组成的列表
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:param single: 是否返回首个结果的id为False返回所有信息
:return: tab id或tab列表
"""
return self._browser.find_tabs(title, url, tab_type, single)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = ChromiumTab(self, tab_id=self._new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
def _new_tab(self, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
bid = None
if new_context:
bid = self.browser.run_cdp('Target.createBrowserContext')['browserContextId']
kwargs = {'url': ''}
if new_window:
kwargs['newWindow'] = True
if background:
kwargs['background'] = True
if bid:
kwargs['browserContextId'] = bid
return self.browser.run_cdp('Target.createTarget', **kwargs)['targetId']
def close(self):
"""关闭Page管理的标签页"""
self.browser.close_tab(self.tab_id)
def close_tabs(self, tabs_or_ids=None, others=False):
"""关闭传入的标签页,默认关闭当前页。可传入多个
:param tabs_or_ids: 要关闭的标签页对象或id可传入列表或元组为None时关闭当前页
:param others: 是否关闭指定标签页之外的
:return: None
"""
all_tabs = set(self.tabs)
if isinstance(tabs_or_ids, str):
tabs = {tabs_or_ids}
elif isinstance(tabs_or_ids, ChromiumTab):
tabs = {tabs_or_ids.tab_id}
elif tabs_or_ids is None:
tabs = {self.tab_id}
elif isinstance(tabs_or_ids, (list, tuple)):
tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids)
else:
raise TypeError('tabs_or_ids参数只能传入标签页对象或id。')
if others:
tabs = all_tabs - tabs
end_len = len(set(all_tabs) - set(tabs))
if end_len <= 0:
self.quit()
return
for tab in tabs:
self.browser.close_tab(tab)
sleep(.2)
end_time = perf_counter() + 3
while self.tabs_count != end_len and perf_counter() < end_time:
sleep(.1)
def quit(self, timeout=5, force=True):
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:return: None
"""
self.browser.quit(timeout, force)
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'
# ----------即将废弃-----------
def close_other_tabs(self, tabs_or_ids=None):
"""关闭传入的标签页以外标签页,默认保留当前页。可传入多个
:param tabs_or_ids: 要保留的标签页对象或id可传入列表或元组为None时保存当前页
:return: None
"""
self.close_tabs(tabs_or_ids, True)
def get_rename(original, rename):
if '.' in rename:
return rename
else:
suffix = original[original.rfind('.'):] if '.' in original else ''
return f'{rename}{suffix}'

View File

@ -0,0 +1,99 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union, Tuple, List, Optional
from .._base.browser import Browser
from .._configs.chromium_options import ChromiumOptions
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_tab import ChromiumTab
from .._units.rect import TabRect
from .._units.setter import ChromiumPageSetter
from .._units.waiter import PageWaiter
class ChromiumPage(ChromiumBase):
def __init__(self,
addr_or_opts: Union[str, int, ChromiumOptions] = None,
tab_id: str = None,
timeout: float = None):
self._chromium_options: ChromiumOptions = ...
self._browser: Browser = ...
self._rect: Optional[TabRect] = ...
def _handle_options(self, addr_or_opts: Union[str, ChromiumOptions]) -> str: ...
def _run_browser(self) -> None: ...
def _page_init(self) -> None: ...
@property
def browser(self) -> Browser: ...
@property
def tabs_count(self) -> int: ...
@property
def tabs(self) -> List[str]: ...
@property
def wait(self) -> PageWaiter: ...
@property
def main_tab(self) -> str: ...
@property
def latest_tab(self) -> str: ...
@property
def process_id(self) -> Optional[int]: ...
@property
def set(self) -> ChromiumPageSetter: ...
def save(self,
path: Union[str, Path] = None,
name: str = None,
as_pdf: bool = False,
landscape: bool = ...,
displayHeaderFooter: bool = ...,
printBackground: bool = ...,
scale: float = ...,
paperWidth: float = ...,
paperHeight: float = ...,
marginTop: float = ...,
marginBottom: float = ...,
marginLeft: float = ...,
marginRight: float = ...,
pageRanges: str = ...,
headerTemplate: str = ...,
footerTemplate: str = ...,
preferCSSPageSize: bool = ...,
generateTaggedPDF: bool = ...,
generateDocumentOutline: bool = ...) -> Union[bytes, str]: ...
def get_tab(self, tab_id: Union[str, ChromiumTab, int] = None) -> ChromiumTab: ...
def find_tabs(self, title: str = None, url: str = None,
tab_type: Union[str, list, tuple] = None, single: bool = True) -> Union[str, List[str]]: ...
def new_tab(self, url: str = None, new_window: bool = False, background: bool = False,
new_context: bool = False) -> ChromiumTab: ...
def _new_tab(self, new_window: bool = False, background: bool = False, new_context: bool = False) -> str: ...
def close(self) -> None: ...
def close_tabs(self, tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]],
Tuple[Union[str, ChromiumTab]]] = None, others: bool = False) -> None: ...
def quit(self, timeout: float = 5, force: bool = True) -> None: ...
def get_rename(original: str, rename: str) -> str: ...

View File

@ -1,13 +1,19 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from copy import copy
from .chromium_base import ChromiumBase, ChromiumBaseSetter
from .commons.web import set_session_cookies, set_browser_cookies
from .session_page import SessionPage, SessionPageSetter, DownloadSetter
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._functions.web import set_session_cookies, set_browser_cookies
from .._pages.chromium_base import ChromiumBase, get_mhtml, get_pdf
from .._pages.session_page import SessionPage
from .._units.setter import TabSetter, WebPageTabSetter
from .._units.waiter import TabWaiter
class ChromiumTab(ChromiumBase):
@ -18,49 +24,74 @@ class ChromiumTab(ChromiumBase):
:param page: ChromiumPage对象
:param tab_id: 要控制的标签页id不指定默认为激活的
"""
self.page = page
self._page = page
self._browser = page.browser
super().__init__(page.address, tab_id, page.timeout)
self._rect = None
def _set_runtime_settings(self):
def _d_set_runtime_settings(self):
"""重写设置浏览器运行参数方法"""
self._timeouts = self.page.timeouts
self._timeouts = copy(self.page.timeouts)
self.retry_times = self.page.retry_times
self.retry_interval = self.page.retry_interval
self._page_load_strategy = self.page.page_load_strategy
self._load_mode = self.page._load_mode
self._download_path = self.page.download_path
def close(self):
"""关闭当前标签页"""
self.page.close_tabs(self.tab_id)
@property
def rect(self):
"""返回获取窗口坐标和大小的对象"""
return self.page.rect
def page(self):
"""返回总体page对象"""
return self._page
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = TabSetter(self)
return self._set
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = TabWaiter(self)
return self._wait
def save(self, path=None, name=None, as_pdf=False, **kwargs):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
return get_pdf(self, path, name, kwargs) if as_pdf else get_mhtml(self, path, name)
def __repr__(self):
return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>'
class WebPageTab(SessionPage, ChromiumTab):
class WebPageTab(SessionPage, ChromiumTab, BasePage):
def __init__(self, page, tab_id):
"""
:param page: WebPage对象
:param tab_id: 要控制的标签页id
"""
self.page = page
self.address = page.address
self._debug = page._debug
self._debug_recorder = page._debug_recorder
self._mode = 'd'
self._has_driver = True
self._has_session = True
self._session = copy(page.session)
self._response = None
self._download_set = None
self._download_path = None
self._set = None
super(SessionPage, self)._set_runtime_settings()
self._connect_browser(tab_id)
super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session),
page._headers))
super(SessionPage, self).__init__(page=page, tab_id=tab_id)
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
ele = page('@id=ele_id')
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 超时时间
:param timeout: 超时时间
:return: 子元素对象
"""
if self._mode == 'd':
@ -68,6 +99,13 @@ class WebPageTab(SessionPage, ChromiumTab):
elif self._mode == 's':
return super().__call__(loc_or_str)
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = WebPageTabSetter(self)
return self._set
@property
def url(self):
"""返回当前url"""
@ -79,7 +117,7 @@ class WebPageTab(SessionPage, ChromiumTab):
@property
def _browser_url(self):
"""返回浏览器当前url"""
return super(SessionPage, self).url if self._tab_obj else None
return super(SessionPage, self).url if self._driver else None
@property
def title(self):
@ -89,6 +127,14 @@ class WebPageTab(SessionPage, ChromiumTab):
elif self._mode == 'd':
return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def html(self):
"""返回页面html文本"""
@ -143,7 +189,7 @@ class WebPageTab(SessionPage, ChromiumTab):
@property
def timeout(self):
"""返回通用timeout设置"""
return self.timeouts.implicit
return self.timeouts.base
@timeout.setter
def timeout(self, second):
@ -151,34 +197,15 @@ class WebPageTab(SessionPage, ChromiumTab):
:param second: 秒数
:return: None
"""
self.set.timeouts(implicit=second)
@property
def set(self):
"""返回用于等待的对象"""
if self._set is None:
self._set = WebPageTabSetter(self)
return self._set
@property
def download_set(self):
"""返回下载设置对象"""
if self._download_set is None:
self._download_set = WebPageTabDownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set._switched_DownloadKit
self.set.timeouts(base=second)
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param timeout: 连接超时时间
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeouts.page_load属性值
:param kwargs: 连接参数s模式专用
:return: url是否可用d模式返回None时表示不确定
"""
@ -189,24 +216,25 @@ class WebPageTab(SessionPage, ChromiumTab):
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param data: post方式时提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param kwargs: 连接参数
:return: url是否可用
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
if self.mode == 'd':
self.cookies_to_session()
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
return super().post(url, show_errmsg, retry, interval, **kwargs)
def ele(self, loc_or_ele, timeout=None):
"""返回第一个符合条件的元素、属性或节点文本
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
@ -217,7 +245,7 @@ class WebPageTab(SessionPage, ChromiumTab):
def eles(self, loc_or_str, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
if self._mode == 's':
@ -261,8 +289,8 @@ class WebPageTab(SessionPage, ChromiumTab):
# s模式转d模式
if self._mode == 'd':
if self._tab_obj is None:
self._connect_browser(self.page._driver_options)
if self._driver is None:
self._connect_browser(self.page._chromium_options)
self._url = None if not self._has_driver else super(SessionPage, self).url
self._has_driver = True
@ -289,7 +317,7 @@ class WebPageTab(SessionPage, ChromiumTab):
self.get(url)
def cookies_to_session(self, copy_user_agent=True):
"""driver对象的cookies复制到session对象
"""浏览器的cookies复制到session对象
:param copy_user_agent: 是否复制ua信息
:return: None
"""
@ -297,8 +325,8 @@ class WebPageTab(SessionPage, ChromiumTab):
return
if copy_user_agent:
selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": selenium_user_agent})
user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._headers.update({"User-Agent": user_agent})
set_session_cookies(self.session, super(SessionPage, self).get_cookies())
@ -306,9 +334,6 @@ class WebPageTab(SessionPage, ChromiumTab):
"""把session对象的cookies复制到浏览器"""
if not self._has_driver:
return
# set_browser_cookies(self, super().get_cookies(as_dict=True))
# set_browser_cookies(self, super().get_cookies(all_domains=True))
set_browser_cookies(self, super().get_cookies())
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
@ -323,10 +348,17 @@ class WebPageTab(SessionPage, ChromiumTab):
elif self._mode == 'd':
return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info)
def close(self):
"""关闭当前标签页"""
self.page.close_tabs(self.tab_id)
self._session.close()
if self._response is not None:
self._response.close()
def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用
:param timeout: 查找元素超时时间d模式专用
:param single: True则返回第一个False则返回全部
:param relative: WebPage用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
@ -338,53 +370,5 @@ class WebPageTab(SessionPage, ChromiumTab):
return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single,
relative=relative)
class WebPageTabSetter(ChromiumBaseSetter):
def __init__(self, page):
super().__init__(page)
self._session_setter = SessionPageSetter(self._page)
self._chromium_setter = ChromiumBaseSetter(self._page)
def cookies(self, cookies):
"""添加cookies信息到浏览器或session对象
:param cookies: 可以接收`CookieJar``list``tuple``str``dict`格式的`cookies`
:return: None
"""
if self._page.mode == 'd' and self._page._has_driver:
self._chromium_setter.cookies(cookies)
elif self._page.mode == 's' and self._page._has_session:
self._session_setter.cookies(cookies)
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
if self._page._has_session:
self._session_setter.headers(headers)
if self._page._has_driver:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._page._has_session:
self._session_setter.user_agent(ua)
if self._page._has_driver:
self._chromium_setter.user_agent(ua, platform)
class WebPageTabDownloadSetter(DownloadSetter):
"""用于设置下载参数的类"""
def __init__(self, page):
super().__init__(page)
self._session = page.session
@property
def _switched_DownloadKit(self):
"""返回从浏览器同步cookies后的Session对象"""
if self._page.mode == 'd':
ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._page.session.headers.update({"User-Agent": ua})
set_session_cookies(self._page.session, self._page.get_cookies(as_dict=False, all_domains=False))
return self.DownloadKit
def __repr__(self):
return f'<WebPageTab browser_id={self.browser.id} tab_id={self.tab_id}>'

View File

@ -1,45 +1,85 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Tuple, Any, List
from pathlib import Path
from typing import Union, Tuple, Any, List, Optional
from DownloadKit import DownloadKit
from requests import Session, Response
from .chromium_base import ChromiumBase, ChromiumBaseSetter
from .chromium_element import ChromiumElement
from .chromium_base import ChromiumBase
from .chromium_frame import ChromiumFrame
from .chromium_page import ChromiumPage, ChromiumTabRect
from .session_element import SessionElement
from .session_page import SessionPage, SessionPageSetter, DownloadSetter
from .chromium_page import ChromiumPage
from .session_page import SessionPage
from .web_page import WebPage
from .._base.browser import Browser
from .._elements.chromium_element import ChromiumElement
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._units.rect import TabRect
from .._units.setter import TabSetter, WebPageTabSetter
from .._units.waiter import TabWaiter
class ChromiumTab(ChromiumBase):
def __init__(self, page: ChromiumPage, tab_id: str = None):
self.page: ChromiumPage = ...
self._page: ChromiumPage = ...
self._browser: Browser = ...
self._rect: Optional[TabRect] = ...
def _set_runtime_settings(self) -> None: ...
def _d_set_runtime_settings(self) -> None: ...
def close(self) -> None: ...
@property
def rect(self) -> ChromiumTabRect: ...
def page(self) -> ChromiumPage: ...
@property
def set(self) -> TabSetter: ...
@property
def wait(self) -> TabWaiter: ...
def save(self,
path: Union[str, Path] = None,
name: str = None,
as_pdf: bool = False,
landscape: bool = ...,
displayHeaderFooter: bool = ...,
printBackground: bool = ...,
scale: float = ...,
paperWidth: float = ...,
paperHeight: float = ...,
marginTop: float = ...,
marginBottom: float = ...,
marginLeft: float = ...,
marginRight: float = ...,
pageRanges: str = ...,
headerTemplate: str = ...,
footerTemplate: str = ...,
preferCSSPageSize: bool = ...,
generateTaggedPDF: bool = ...,
generateDocumentOutline: bool = ...) -> Union[bytes, str]: ...
class WebPageTab(SessionPage, ChromiumTab):
def __init__(self, page: WebPage, tab_id: str):
self.page: WebPage = ...
self._page: WebPage = ...
self._browser: Browser = ...
self._mode: str = ...
self._has_driver = ...
self._has_session = ...
self._download_set = ...
self._download_path = ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ...
@property
def page(self) -> WebPage: ...
@property
def url(self) -> Union[str, None]: ...
@ -50,6 +90,9 @@ class WebPageTab(SessionPage, ChromiumTab):
@property
def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...
@ -102,16 +145,16 @@ class WebPageTab(SessionPage, ChromiumTab):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
timeout: float = None) -> Union[ChromiumElement, SessionElement, str]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union[ChromiumElement, SessionElement, str]]: ...
timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ...
def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \
-> Union[SessionElement, str, None]: ...
-> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ...
@ -122,6 +165,8 @@ class WebPageTab(SessionPage, ChromiumTab):
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
def close(self) -> None: ...
# ----------------重写SessionPage的函数-----------------------
def post(self,
url: str,
@ -141,40 +186,12 @@ class WebPageTab(SessionPage, ChromiumTab):
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
cert: Any | None = ...) -> Union[bool, Response]: ...
@property
def set(self) -> WebPageTabSetter: ...
@property
def download(self) -> DownloadKit: ...
@property
def download_set(self) -> WebPageTabDownloadSetter: ...
def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[
Union[ChromiumElement, str, ChromiumFrame]]]: ...
class WebPageTabSetter(ChromiumBaseSetter):
_page: WebPage = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: dict) -> None: ...
def cookies(self, cookies) -> None: ...
class WebPageTabDownloadSetter(DownloadSetter):
"""用于设置下载参数的类"""
def __init__(self, page: WebPageTab):
self._page: WebPageTab = ...
@property
def _switched_DownloadKit(self) -> DownloadKit: ...
-> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement], List[
Union[ChromiumElement, ChromiumFrame]]]: ...

View File

@ -1,21 +1,24 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from re import search
from pathlib import Path
from re import search, DOTALL
from time import sleep
from urllib.parse import urlparse
from urllib.parse import urlparse, quote
from DownloadKit import DownloadKit
from requests import Session
from requests import Session, Response
from requests.structures import CaseInsensitiveDict
from tldextract import extract
from .base import BasePage
from .commons.web import cookie_to_dict, set_session_cookies
from .configs.session_options import SessionOptions
from .session_element import SessionElement, make_session_ele
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._elements.session_element import SessionElement, make_session_ele
from .._functions.web import cookie_to_dict
from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
@ -24,22 +27,23 @@ class SessionPage(BasePage):
def __init__(self, session_or_options=None, timeout=None):
"""
:param session_or_options: Session对象或SessionOptions对象
:param timeout: 连接超时时间为None时从ini文件读取
:param timeout: 连接超时时间为None时从ini文件读取或默认10
"""
super(SessionPage, SessionPage).__init__(self)
self._headers = None
self._response = None
self._download_set = None
self._session = None
self._set = None
self._set_start_options(session_or_options, None)
self._set_runtime_settings()
self._encoding = None
self._s_set_start_options(session_or_options)
self._s_set_runtime_settings()
self._create_session()
timeout = timeout if timeout is not None else self.timeout
super().__init__(timeout)
if timeout is not None:
self.timeout = timeout
def _set_start_options(self, session_or_options, none):
def _s_set_start_options(self, session_or_options):
"""启动配置
:param session_or_options: SessionSessionOptions
:param none: 用于后代继承
:param session_or_options: SessionSessionOptions对象
:return: None
"""
if not session_or_options or isinstance(session_or_options, SessionOptions):
@ -47,17 +51,22 @@ class SessionPage(BasePage):
elif isinstance(session_or_options, Session):
self._session_options = SessionOptions()
self._headers = session_or_options.headers
session_or_options.headers = None
self._session = session_or_options
def _set_runtime_settings(self):
def _s_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeout = self._session_options.timeout
self._download_path = self._session_options.download_path
self._download_path = None if self._session_options.download_path is None \
else str(Path(self._session_options.download_path).absolute())
self.retry_times = self._session_options.retry_times
self.retry_interval = self._session_options.retry_interval
def _create_session(self):
"""创建内建Session对象"""
if not self._session:
self._session = self._session_options.make_session()
self._session, self._headers = self._session_options.make_session()
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
@ -85,6 +94,11 @@ class SessionPage(BasePage):
"""返回当前访问url"""
return self._url
@property
def raw_data(self):
"""返回页面原始数据"""
return self.response.content if self.response else b''
@property
def html(self):
"""返回页面的html文本"""
@ -101,53 +115,64 @@ class SessionPage(BasePage):
@property
def user_agent(self):
"""返回user agent"""
return self.session.headers.get('user-agent', '')
@property
def download_path(self):
"""返回下载路径"""
return self._download_path
@property
def download_set(self):
"""返回用于设置下载参数的对象"""
if self._download_set is None:
self._download_set = DownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set.DownloadKit
return self._headers.get('user-agent', '')
@property
def session(self):
"""返回session对象"""
"""返回Session对象"""
return self._session
@property
def response(self):
"""返回访问url得到的response对象"""
"""返回访问url得到的Response对象"""
return self._response
@property
def encoding(self):
"""返回设置的编码"""
return self._encoding
@property
def set(self):
"""返回用于等待的对象"""
"""返回用于设置的对象"""
if self._set is None:
self._set = SessionPageSetter(self)
return self._set
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url
:param url: 目标url
"""用get方式跳转到url,可输入文件路径
:param url: 目标url可指定本地文件路径
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param timeout: 连接超时时间
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeout属性值
:param kwargs: 连接参数
:return: url是否可用
"""
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
if isinstance(url, Path):
url = str(url.absolute())
if not url.lower().startswith('http'):
if url.startswith('file:///'):
url = url[8:]
if Path(url).exists():
with open(url, 'rb') as f:
r = Response()
r._content = f.read()
r.status_code = 200
self._response = r
return
return self._s_connect(url, 'get', show_errmsg, retry, interval, **kwargs)
def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象timeout属性值
:param kwargs: 连接参数
:return: url是否可用
"""
return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs)
def ele(self, loc_or_ele, timeout=None):
"""返回页面中符合条件的第一个元素、属性或节点文本
@ -155,7 +180,7 @@ class SessionPage(BasePage):
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
return self._ele(loc_or_ele)
return self._ele(loc_or_ele, method='ele()')
def eles(self, loc_or_str, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
@ -170,7 +195,7 @@ class SessionPage(BasePage):
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
:return: SessionElement对象或属性文本
"""
return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele)
return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele, method='s_ele()')
def s_eles(self, loc_or_str):
"""返回页面中符合条件的所有元素、属性或节点文本
@ -218,23 +243,28 @@ class SessionPage(BasePage):
r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']})
return r
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url
:param url: 目标url
:param data: 提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param kwargs: 连接参数
:return: url是否可用
"""
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
def close(self):
"""关闭Session对象"""
self._session.close()
if self._response is not None:
self._response.close()
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数和间隔组成的tuple
"""
self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%')
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
return retry, interval
def _s_connect(self, url, mode, show_errmsg=False, retry=None, interval=None, **kwargs):
"""执行get或post连接
:param url: 目标url
:param mode: 'get' 'post'
:param data: 提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
@ -242,7 +272,7 @@ class SessionPage(BasePage):
:return: url是否可用
"""
retry, interval = self._before_connect(url, retry, interval)
self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs)
self._response, info = self._make_response(self._url, mode, retry, interval, show_errmsg, **kwargs)
if self._response is None:
self._url_available = False
@ -258,14 +288,13 @@ class SessionPage(BasePage):
return self._url_available
def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs):
def _make_response(self, url, mode='get', retry=None, interval=None, show_errmsg=False, **kwargs):
"""生成Response对象
:param url: 目标url
:param mode: 'get' 'post'
:param data: post方式要提交的数据
:param show_errmsg: 是否显示和抛出异常
:param kwargs: 其它参数
:return: tuple第一位为Response或None第二位为出错信息或'Success'
:return: tuple第一位为Response或None第二位为出错信息或 'Success'
"""
kwargs = CaseInsensitiveDict(kwargs)
if 'headers' not in kwargs:
@ -277,14 +306,16 @@ class SessionPage(BasePage):
parsed_url = urlparse(url)
hostname = parsed_url.hostname
scheme = parsed_url.scheme
if not check_headers(kwargs, self.session.headers, 'Referer'):
if not check_headers(kwargs, self._headers, 'Referer'):
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
if 'Host' not in kwargs['headers']:
kwargs['headers']['Host'] = hostname
if not check_headers(kwargs, self.session.headers, 'timeout'):
if not check_headers(kwargs, self._headers, 'timeout'):
kwargs['timeout'] = self.timeout
kwargs['headers'] = {**self._headers, **kwargs['headers']}
r = err = None
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval
@ -293,9 +324,12 @@ class SessionPage(BasePage):
if mode == 'get':
r = self.session.get(url, **kwargs)
elif mode == 'post':
r = self.session.post(url, data=data, **kwargs)
r = self.session.post(url, **kwargs)
if r:
if r and r.content:
if self._encoding:
r.encoding = self._encoding
return r, 'Success'
return set_charset(r), 'Success'
except Exception as e:
@ -309,202 +343,22 @@ class SessionPage(BasePage):
if show_errmsg:
print(f'重试 {url}')
if r is None:
if show_errmsg:
if err:
raise err
else:
raise ConnectionError('连接失败')
return None, '连接失败' if err is None else err
if show_errmsg:
if err:
raise err
elif r is not None:
raise ConnectionError(f'状态码:{r.status_code}') if r.content else ConnectionError('返回内容为空。')
else:
raise ConnectionError('连接失败')
if not r.ok:
if show_errmsg:
raise ConnectionError(f'状态码:{r.status_code}')
return r, f'状态码:{r.status_code}'
else:
if r is not None:
return (r, f'状态码:{r.status_code}') if r.content else (None, '返回内容为空')
else:
return None, '连接失败' if err is None else err
class SessionPageSetter(object):
def __init__(self, page):
self._page = page
def retry_times(self, times):
"""设置连接失败时重连次数"""
self._page.retry_times = times
def retry_interval(self, interval):
"""设置连接失败时重连间隔"""
self._page.retry_interval = interval
def timeout(self, second):
"""设置连接超时时间
:param second: 秒数
:return: None
"""
self._page.timeout = second
def cookies(self, cookies):
"""为Session对象设置cookies
:param cookies: cookies信息
:return: None
"""
set_session_cookies(self._page.session, cookies)
def headers(self, headers):
"""设置通用的headers
:param headers: dict形式的headers
:return: None
"""
self._page.session.headers = CaseInsensitiveDict(headers)
def header(self, attr, value):
"""设置headers中一个项
:param attr: 设置名称
:param value: 设置值
:return: None
"""
self._page.session.headers[attr.lower()] = value
def user_agent(self, ua):
"""设置user agent
:param ua: user agent
:return: None
"""
self._page.session.headers['user-agent'] = ua
def proxies(self, http, https=None):
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: None
"""
proxies = None if http == https is None else {'http': http, 'https': https or http}
self._page.session.proxies = proxies
def auth(self, auth):
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: None
"""
self._page.session.auth = auth
def hooks(self, hooks):
"""设置回调方法
:param hooks: 回调方法
:return: None
"""
self._page.session.hooks = hooks
def params(self, params):
"""设置查询参数字典
:param params: 查询参数字典
:return: None
"""
self._page.session.params = params
def verify(self, on_off):
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: None
"""
self._page.session.verify = on_off
def cert(self, cert):
"""SSL客户端证书文件的路径(.pem格式),或(cert, key)元组
:param cert: 证书路径或元组
:return: None
"""
self._page.session.cert = cert
def stream(self, on_off):
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: None
"""
self._page.session.stream = on_off
def trust_env(self, on_off):
"""设置是否信任环境
:param on_off: 是否信任环境
:return: None
"""
self._page.session.trust_env = on_off
def max_redirects(self, times):
"""设置最大重定向次数
:param times: 最大重定向次数
:return: None
"""
self._page.session.max_redirects = times
def add_adapter(self, url, adapter):
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: None
"""
self._page.session.mount(url, adapter)
class DownloadSetter(object):
"""用于设置下载参数的类"""
def __init__(self, page):
self._page = page
self._DownloadKit = None
@property
def DownloadKit(self):
if self._DownloadKit is None:
self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path)
return self._DownloadKit
@property
def if_file_exists(self):
"""返回用于设置存在同名文件时处理方法的对象"""
return FileExists(self)
def split(self, on_off):
"""设置是否允许拆分大文件用多线程下载
:param on_off: 是否启用多线程下载大文件
:return: None
"""
self.DownloadKit.split = on_off
def save_path(self, path):
"""设置下载保存路径
:param path: 下载保存路径
:return: None
"""
path = path if path is None else str(path)
self._page._download_path = path
self.DownloadKit.goal_path = path
class FileExists(object):
"""用于设置存在同名文件时处理方法"""
def __init__(self, setter):
"""
:param setter: DownloadSetter对象
"""
self._setter = setter
def __call__(self, mode):
if mode not in ('skip', 'rename', 'overwrite'):
raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'")
self._setter.DownloadKit.file_exists = mode
def skip(self):
"""设为跳过"""
self._setter.DownloadKit.file_exists = 'skip'
def rename(self):
"""设为重命名,文件名后加序号"""
self._setter.DownloadKit._file_exists = 'rename'
def overwrite(self):
"""设为覆盖"""
self._setter.DownloadKit._file_exists = 'overwrite'
def __repr__(self):
return f'<SessionPage url={self.url}>'
def check_headers(kwargs, headers, arg):
@ -525,7 +379,7 @@ def set_charset(response):
# 在headers中获取不到编码且如果是网页
elif content_type.replace(' ', '').startswith('text/html'):
re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content)
re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content, DOTALL)
if re_result:
charset = re_result.group(1).decode()

View File

@ -1,51 +1,48 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Any, Union, Tuple, List
from typing import Any, Union, Tuple, List, Optional
from DownloadKit import DownloadKit
from requests import Session, Response
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
from .commons.constants import NoneElement
from .base import BasePage
from .chromium_page import ChromiumPage
from .configs.session_options import SessionOptions
from .session_element import SessionElement
from .web_page import WebPage
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
def __init__(self,
session_or_options: Union[Session, SessionOptions] = None,
timeout: float = None):
self._headers: Optional[CaseInsensitiveDict] = ...
self._session: Session = ...
self._session_options: SessionOptions = ...
self._url: str = ...
self._response: Response = ...
self._download_path: str = ...
self._download_set: DownloadSetter = ...
self._url_available: bool = ...
self.timeout: float = ...
self.retry_times: int = ...
self.retry_interval: float = ...
self._set: SessionPageSetter = ...
self._encoding: str = ...
def _set_start_options(self, session_or_options, none) -> None: ...
def _s_set_start_options(self, session_or_options: Union[Session, SessionOptions]) -> None: ...
def _s_set_runtime_settings(self) -> None: ...
def _create_session(self) -> None: ...
def _set_runtime_settings(self) -> None: ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str, SessionElement],
timeout: float = None) -> Union[SessionElement, str, NoneElement]: ...
timeout: float = None) -> Union[SessionElement, NoneElement]: ...
# -----------------共有属性和方法-------------------
@property
@ -57,6 +54,9 @@ class SessionPage(BasePage):
@property
def _session_url(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...
@ -69,11 +69,8 @@ class SessionPage(BasePage):
@property
def download_path(self) -> str: ...
@property
def download_set(self) -> DownloadSetter: ...
def get(self,
url: str,
url: Union[Path, str],
show_errmsg: bool | None = False,
retry: int | None = None,
interval: float | None = None,
@ -94,23 +91,28 @@ class SessionPage(BasePage):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
timeout: float = None) -> Union[SessionElement, str, NoneElement]: ...
timeout: float = None) -> Union[SessionElement, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union[SessionElement, str]]: ...
timeout: float = None) -> List[SessionElement]: ...
def s_ele(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \
-> Union[SessionElement, str, NoneElement]: ...
-> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement],
timeout: float = None, single: bool = True, raise_err: bool = None) \
-> Union[SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ...
def _find_elements(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
timeout: float = None,
single: bool = True,
raise_err: bool = None) \
-> Union[SessionElement, NoneElement, List[SessionElement]]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
def get_cookies(self,
as_dict: bool = False,
all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
# ----------------session独有属性和方法-----------------------
@ -121,17 +123,17 @@ class SessionPage(BasePage):
def response(self) -> Response: ...
@property
def set(self) -> SessionPageSetter: ...
def encoding(self) -> str: ...
@property
def download(self) -> DownloadKit: ...
def set(self) -> SessionPageSetter: ...
def post(self,
url: str,
data: Union[dict, str, None] = ...,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
data: Union[dict, str, None] = ...,
timeout: float | None = ...,
params: dict | None = ...,
json: Union[dict, str, None] = ...,
@ -146,10 +148,13 @@ class SessionPage(BasePage):
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
def close(self) -> None: ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
def _s_connect(self,
url: str,
mode: str,
data: Union[dict, str, None] = None,
show_errmsg: bool = False,
retry: int = None,
interval: float = None,
@ -158,82 +163,14 @@ class SessionPage(BasePage):
def _make_response(self,
url: str,
mode: str = 'get',
data: Union[dict, str] = None,
retry: int = None,
interval: float = None,
show_errmsg: bool = False,
**kwargs) -> tuple: ...
class SessionPageSetter(object):
def __init__(self, page: SessionPage):
self._page: SessionPage = ...
def retry_times(self, times: int) -> None: ...
def retry_interval(self, interval: float) -> None: ...
def timeout(self, second: float) -> None: ...
def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def headers(self, headers: dict) -> None: ...
def header(self, attr: str, value: str) -> None: ...
def user_agent(self, ua: str) -> None: ...
def proxies(self, http, https=None) -> None: ...
def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ...
def hooks(self, hooks: Union[dict, None]) -> None: ...
def params(self, params: Union[dict, None]) -> None: ...
def verify(self, on_off: Union[bool, None]) -> None: ...
def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ...
def stream(self, on_off: Union[bool, None]) -> None: ...
def trust_env(self, on_off: Union[bool, None]) -> None: ...
def max_redirects(self, times: Union[int, None]) -> None: ...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ...
class DownloadSetter(object):
def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]):
self._page: SessionPage = ...
self._DownloadKit: DownloadKit = ...
@property
def DownloadKit(self) -> DownloadKit: ...
@property
def if_file_exists(self) -> FileExists: ...
def split(self, on_off: bool) -> None: ...
def save_path(self, path: Union[str, Path]): ...
class FileExists(object):
def __init__(self, setter: DownloadSetter):
self._setter: DownloadSetter = ...
def __call__(self, mode: str) -> None: ...
def skip(self) -> None: ...
def rename(self) -> None: ...
def overwrite(self) -> None: ...
def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict],
def check_headers(kwargs: Union[dict, CaseInsensitiveDict],
headers: Union[dict, CaseInsensitiveDict],
arg: str) -> bool: ...

View File

@ -1,133 +1,48 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from warnings import warn
from requests import Session
from .base import BasePage
from .chromium_base import ChromiumBase, Timeout
from .chromium_driver import ChromiumDriver
from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter
from .chromium_page import ChromiumPage
from .chromium_tab import WebPageTab
from .commons.web import set_session_cookies, set_browser_cookies
from .configs.chromium_options import ChromiumOptions
from .configs.session_options import SessionOptions
from .errors import CallMethodError
from .session_page import SessionPage, SessionPageSetter
from .session_page import SessionPage
from .._base.base import BasePage
from .._configs.chromium_options import ChromiumOptions
from .._functions.web import set_session_cookies, set_browser_cookies
from .._units.setter import WebPageSetter
class WebPage(SessionPage, ChromiumPage, BasePage):
"""整合浏览器和request的页面类"""
def __init__(self, mode='d', timeout=None, driver_or_options=None, session_or_options=None):
def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None):
"""初始化函数
:param mode: 'd' 's'即driver模式和session模式
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param driver_or_options: ChromiumDriver对象或DriverOptions对象只使用s模式时应传入False
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param chromium_options: Driver对象只使用s模式时应传入False
:param session_or_options: Session对象或SessionOptions对象只使用d模式时应传入False
"""
chromium_options = chromium_options or driver_or_options
self._mode = mode.lower()
if self._mode not in ('s', 'd'):
raise ValueError('mode参数只能是s或d。')
self._has_driver = True
self._has_session = True
self._debug = False
self._debug_recorder = None
self.address = None
self._session = None
self._tab_obj = None
self._driver_options = None
self._session_options = None
self._response = None
self._download_set = None
self._set = None
self._screencast = None
self._DownloadKit = None
self._set_start_options(driver_or_options, session_or_options)
self._set_runtime_settings()
self._connect_browser()
self._create_session()
t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit
super(ChromiumBase, self).__init__(t) # 调用Base的__init__()
def _set_start_options(self, dr_opt, se_opt):
"""处理两种模式的设置
:param dr_opt: ChromiumDriver或DriverOptions对象为None则从ini读取为False用默认信息创建
:param se_opt: SessionSessionOptions对象或配置信息为None则从ini读取为False用默认信息创建
:return: None
"""
# 浏览器配置
if isinstance(dr_opt, ChromiumDriver):
self._tab_obj = dr_opt
self._driver_options = ChromiumOptions()
self._driver_options.debugger_address = dr_opt.address
dr_opt = False
else:
if dr_opt is None:
self._driver_options = ChromiumOptions()
elif dr_opt is False:
self._driver_options = ChromiumOptions(read_file=False)
elif str(type(dr_opt)).endswith(("ChromiumOptions'>", "DriverOptions'>")):
self._driver_options = dr_opt
else:
raise TypeError('driver_or_options参数只能接收ChromiumDriver, ChromiumOptions、None或False。')
self.address = self._driver_options.debugger_address.replace('localhost',
'127.0.0.1').lstrip('http://').lstrip('https://')
# Session配置
if isinstance(se_opt, Session):
self._session = se_opt
self._session_options = SessionOptions()
se_opt = False
else:
if se_opt is None:
self._session_options = SessionOptions()
elif se_opt is False:
self._session_options = SessionOptions(read_file=False)
elif isinstance(se_opt, SessionOptions):
self._session_options = se_opt
else:
raise TypeError('session_or_options参数只能接收Session, SessionOptions、None或False。')
self._timeouts = Timeout(self)
self._page_load_strategy = self._driver_options.page_load_strategy
self._download_path = None
if se_opt is not False:
self.set.timeouts(implicit=self._session_options.timeout)
self._download_path = self._session_options.download_path
if dr_opt is not False:
t = self._driver_options.timeouts
self.set.timeouts(t['implicit'], t['pageLoad'], t['script'])
self._download_path = self._driver_options.download_path
def _set_runtime_settings(self):
"""设置运行时用到的属性"""
pass
super().__init__(session_or_options=session_or_options)
if not chromium_options:
chromium_options = ChromiumOptions(read_file=chromium_options)
chromium_options.set_timeouts(base=self._timeout).set_paths(download_path=self.download_path)
super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout)
self.change_mode(self._mode, go=False, copy_cookies=False)
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
ele = page('@id=ele_id')
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 超时时间
:param timeout: 超时时间
:return: 子元素对象
"""
if self._mode == 'd':
@ -135,6 +50,13 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 's':
return super().__call__(loc_or_str)
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = WebPageSetter(self)
return self._set
@property
def url(self):
"""返回当前url"""
@ -146,7 +68,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property
def _browser_url(self):
"""返回浏览器当前url"""
return super(SessionPage, self).url if self._tab_obj else None
return super(SessionPage, self).url if self._driver else None
@property
def title(self):
@ -156,6 +78,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def html(self):
"""返回页面html文本"""
@ -210,7 +140,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property
def timeout(self):
"""返回通用timeout设置"""
return self.timeouts.implicit
return self.timeouts.base
@timeout.setter
def timeout(self, second):
@ -218,39 +148,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
:param second: 秒数
:return: None
"""
self.set.timeouts(implicit=second)
@property
def download_path(self):
"""返回默认下载路径"""
return super(SessionPage, self).download_path
@property
def download_set(self):
"""返回下载设置对象"""
if self._download_set is None:
self._download_set = WebPageDownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set._switched_DownloadKit
@property
def set(self):
"""返回用于等待的对象"""
if self._set is None:
self._set = WebPageSetter(self)
return self._set
self.set.timeouts(base=second)
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param timeout: 连接超时时间
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeouts.page_load属性值
:param kwargs: 连接参数s模式专用
:return: url是否可用d模式返回None时表示不确定
"""
@ -261,24 +167,25 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param data: post方式时提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param kwargs: 连接参数
:return: url是否可用
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
if self.mode == 'd':
self.cookies_to_session()
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
return super().post(url, show_errmsg, retry, interval, **kwargs)
def ele(self, loc_or_ele, timeout=None):
"""返回第一个符合条件的元素、属性或节点文本
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
@ -289,7 +196,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def eles(self, loc_or_str, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
if self._mode == 's':
@ -333,8 +240,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
# s模式转d模式
if self._mode == 'd':
if self._tab_obj is None:
self._connect_browser(self._driver_options)
if self._driver is None:
self._connect_browser(self._chromium_options)
self._url = None if not self._has_driver else super(SessionPage, self).url
self._has_driver = True
@ -370,7 +277,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
if copy_user_agent:
user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": user_agent})
self._headers.update({"User-Agent": user_agent})
set_session_cookies(self.session, super(SessionPage, self).get_cookies())
@ -378,9 +285,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
"""把session对象的cookies复制到浏览器"""
if not self._has_driver:
return
# set_browser_cookies(self, super().get_cookies(as_dict=True))
# set_browser_cookies(self, super().get_cookies(all_domains=True))
set_browser_cookies(self, super().get_cookies())
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
@ -395,24 +299,45 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info)
def get_tab(self, tab_id=None):
def get_tab(self, id_or_num=None):
"""获取一个标签页对象
:param tab_id: 要获取的标签页id为None时获取当前tab
:param id_or_num: 要获取的标签页id或序号为None时获取当前tab序号不是视觉排列顺序而是激活顺序
:return: 标签页对象
"""
tab_id = tab_id or self.tab_id
return WebPageTab(self, tab_id)
if isinstance(id_or_num, str):
return WebPageTab(self, id_or_num)
elif isinstance(id_or_num, int):
return WebPageTab(self, self.tabs[id_or_num])
elif id_or_num is None:
return WebPageTab(self, self.tab_id)
elif isinstance(id_or_num, WebPageTab):
return id_or_num
else:
raise TypeError(f'id_or_num需传入tab id或序号{id_or_num}')
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = WebPageTab(self, tab_id=self._new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
def close_driver(self):
"""关闭driver及浏览器"""
if self._has_driver:
self.change_mode('s')
try:
self.driver.Browser.close()
self.driver.run('Browser.close')
except Exception:
pass
self._tab_obj.stop()
self._tab_obj = None
self._driver.stop()
self._driver = None
self._has_driver = None
def close_session(self):
@ -420,10 +345,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
if self._has_session:
self.change_mode('d')
self._session.close()
if self._response is not None:
self._response.close()
self._session = None
self._response = None
self._has_session = None
def close(self):
"""关闭标签页和Session"""
if self._has_driver:
self.close_tabs(self.tab_id)
if self._session:
self._session.close()
if self._response is not None:
self._response.close()
def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
@ -439,111 +375,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single,
relative=relative)
def quit(self):
"""关闭浏览器关闭session"""
def quit(self, timeout=5, force=True):
"""关闭浏览器和Session
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:return: None
"""
if self._has_session:
self._session.close()
self._session = None
self._response = None
self._has_session = None
if self._has_driver:
super(SessionPage, self).quit()
self._tab_obj = None
super(SessionPage, self).quit(timeout, force)
self._driver = None
self._has_driver = None
class WebPageSetter(ChromiumPageSetter):
def __init__(self, page):
super().__init__(page)
self._session_setter = SessionPageSetter(self._page)
self._chromium_setter = ChromiumPageSetter(self._page)
def cookies(self, cookies):
"""添加cookies信息到浏览器或session对象
:param cookies: 可以接收`CookieJar``list``tuple``str``dict`格式的`cookies`
:return: None
"""
if self._page.mode == 'd' and self._page._has_driver:
self._chromium_setter.cookies(cookies)
elif self._page.mode == 's' and self._page._has_session:
self._session_setter.cookies(cookies)
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
if self._page.mode == 's':
self._session_setter.headers(headers)
else:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._page.mode == 's':
self._session_setter.user_agent(ua)
else:
self._chromium_setter.user_agent(ua, platform)
class WebPageDownloadSetter(ChromiumDownloadSetter):
"""用于设置下载参数的类"""
def __init__(self, page):
super().__init__(page)
self._session = page.session
@property
def _switched_DownloadKit(self):
"""返回从浏览器同步cookies后的Session对象"""
if self._page.mode == 'd':
self._cookies_to_session()
return self.DownloadKit
def save_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = path or ''
path = Path(path).absolute()
path.mkdir(parents=True, exist_ok=True)
path = str(path)
self._page._download_path = path
self.DownloadKit.goal_path = path
if self._page._has_driver:
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior=self._behavior, downloadPath=path,
eventsEnabled=True)
except CallMethodError:
warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。')
self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path)
def by_browser(self):
"""设置使用浏览器下载文件"""
if not self._page._has_driver:
raise RuntimeError('浏览器未连接。')
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True,
downloadPath=self._page.download_path)
self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser
except CallMethodError:
warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。')
self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
self._page.driver.Page.downloadWillBegin = self._download_by_browser
self._behavior = 'allow'
def by_DownloadKit(self):
"""设置使用DownloadKit下载文件"""
if self._page._has_driver:
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True)
self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit
except CallMethodError:
raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。')
self._behavior = 'deny'
def __repr__(self):
return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>'

View File

@ -1,24 +1,26 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Tuple, List, Any
from DownloadKit import DownloadKit
from requests import Session, Response
from .base import BasePage
from .chromium_driver import ChromiumDriver
from .chromium_element import ChromiumElement
from .chromium_frame import ChromiumFrame
from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter
from .chromium_page import ChromiumPage
from .chromium_tab import WebPageTab
from .configs.chromium_options import ChromiumOptions
from .configs.driver_options import DriverOptions
from .configs.session_options import SessionOptions
from .session_element import SessionElement
from .session_page import SessionPage, SessionPageSetter
from .session_page import SessionPage
from .._base.base import BasePage
from .._base.driver import Driver
from .._configs.chromium_options import ChromiumOptions
from .._configs.session_options import SessionOptions
from .._elements.chromium_element import ChromiumElement
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._units.setter import WebPageSetter
class WebPage(SessionPage, ChromiumPage, BasePage):
@ -26,21 +28,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def __init__(self,
mode: str = 'd',
timeout: float = None,
driver_or_options: Union[ChromiumDriver, ChromiumOptions, DriverOptions, bool] = None,
chromium_options: Union[ChromiumOptions, bool] = None,
session_or_options: Union[Session, SessionOptions, bool] = None) -> None:
self._mode: str = ...
self._has_driver: bool = ...
self._has_session: bool = ...
self.address: str = ...
self._session_options: Union[SessionOptions, None] = ...
self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ...
self._download_set: WebPageDownloadSetter = ...
self._download_path: str = ...
self._tab_obj: ChromiumDriver = ...
self._chromium_options: Union[ChromiumOptions, None] = ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ...
# -----------------共有属性和方法-------------------
@property
@ -52,6 +50,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property
def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...
@ -82,12 +83,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@timeout.setter
def timeout(self, second: float) -> None: ...
@property
def download_path(self) -> str: ...
@property
def download_set(self) -> WebPageDownloadSetter: ...
def get(self,
url: str,
show_errmsg: bool = False,
@ -110,16 +105,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
timeout: float = None) -> Union[ChromiumElement, SessionElement, str]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union[ChromiumElement, SessionElement, str]]: ...
timeout: float = None) -> List[Union[ChromiumElement, SessionElement]]: ...
def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \
-> Union[SessionElement, str, None]: ...
def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) -> Union[SessionElement, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: ...
def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ...
@ -127,15 +121,25 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def cookies_to_browser(self) -> None: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
def get_cookies(self,
as_dict: bool = False,
all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
def get_tab(self, tab_id: str = None) -> WebPageTab: ...
def get_tab(self, id_or_num: Union[str, WebPageTab, int] = None) -> WebPageTab: ...
def new_tab(self,
url: str = None,
new_window: bool = False,
background: bool = False,
new_context: bool = False) -> WebPageTab: ...
def close_driver(self) -> None: ...
def close_session(self) -> None: ...
def close(self) -> None: ...
# ----------------重写SessionPage的函数-----------------------
def post(self,
url: str,
@ -155,52 +159,22 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
@property
def download(self) -> DownloadKit: ...
cert: Any | None = ...) -> Union[bool, Response]: ...
@property
def set(self) -> WebPageSetter: ...
def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[
Union[ChromiumElement, str, ChromiumFrame]]]: ...
def _find_elements(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float = None,
single: bool = True,
relative: bool = False,
raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, NoneElement, List[SessionElement],
List[Union[ChromiumElement, ChromiumFrame]]]: ...
def _set_start_options(self, dr_opt: Union[ChromiumDriver, DriverOptions, bool, None],
def _set_start_options(self,
dr_opt: Union[Driver, bool, None],
se_opt: Union[Session, SessionOptions, bool, None]) -> None: ...
def quit(self) -> None: ...
def _on_download_begin(self, **kwargs): ...
class WebPageSetter(ChromiumPageSetter):
_page: WebPage = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumPageSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: dict) -> None: ...
def cookies(self, cookies) -> None: ...
class WebPageDownloadSetter(ChromiumDownloadSetter):
def __init__(self, page: WebPage):
self._page: WebPage = ...
self._behavior: str = ...
self._session: Session = None
@property
def _switched_DownloadKit(self) -> DownloadKit: ...
def save_path(self, path) -> None: ...
def by_browser(self) -> None: ...
def by_DownloadKit(self) -> None: ...
def _download_by_DownloadKit(self, **kwargs) -> None: ...
def quit(self, timeout: float = 5, force: bool = True) -> None: ...

View File

@ -1,20 +1,23 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from time import sleep
from time import sleep, perf_counter
from .commons.keys import modifierBit, keyDescriptionForString
from .commons.web import location_in_viewport
from ..errors import AlertExistsError
from .._functions.keys import modifierBit, keyDescriptionForString, input_text_or_keys, Keys
from .._functions.web import location_in_viewport
class ActionChains:
class Actions:
"""用于实现动作链的类"""
def __init__(self, page):
"""
:param page: ChromiumPage对象
:param page: ChromiumBase对象
"""
self.page = page
self._dr = page.driver
@ -22,12 +25,13 @@ class ActionChains:
self.curr_x = 0 # 视口坐标
self.curr_y = 0
def move_to(self, ele_or_loc, offset_x=0, offset_y=0):
def move_to(self, ele_or_loc, offset_x=0, offset_y=0, duration=.5):
"""鼠标移动到元素中点,或页面上的某个绝对坐标。可设置偏移量
当带偏移量时偏移量相对于元素左上角坐标
:param ele_or_loc: 元素对象绝对坐标或文本定位符坐标为tuple(int, int)形式
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: self
"""
is_loc = False
@ -38,7 +42,7 @@ class ActionChains:
elif isinstance(ele_or_loc, str) or 'ChromiumElement' in str(type(ele_or_loc)):
ele_or_loc = self.page(ele_or_loc)
self.page.scroll.to_see(ele_or_loc)
x, y = ele_or_loc.location if offset_x or offset_y else ele_or_loc.locations.midpoint
x, y = ele_or_loc.rect.location if offset_x or offset_y else ele_or_loc.rect.midpoint
lx = x + offset_x
ly = y + offset_y
else:
@ -50,29 +54,44 @@ class ActionChains:
clientHeight = self.page.run_js('return document.body.clientHeight;')
self.page.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2)
# # 这样设计为了应付那些不随滚动条滚动的元素
# 这样设计为了应付那些不随滚动条滚动的元素
if is_loc:
cx, cy = location_to_client(self.page, lx, ly)
else:
x, y = ele_or_loc.locations.viewport_location if offset_x or offset_y \
else ele_or_loc.locations.viewport_midpoint
x, y = ele_or_loc.rect.viewport_location if offset_x or offset_y \
else ele_or_loc.rect.viewport_midpoint
cx = x + offset_x
cy = y + offset_y
self._dr.Input.dispatchMouseEvent(type='mouseMoved', x=cx, y=cy, modifiers=self.modifier)
self.curr_x = cx
self.curr_y = cy
ox = cx - self.curr_x
oy = cy - self.curr_y
self.move(ox, oy, duration)
return self
def move(self, offset_x=0, offset_y=0):
def move(self, offset_x=0, offset_y=0, duration=.5):
"""鼠标相对当前位置移动若干位置
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: self
"""
self.curr_x += offset_x
self.curr_y += offset_y
self._dr.Input.dispatchMouseEvent(type='mouseMoved', x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
duration = .02 if duration < .02 else duration
num = int(duration * 50)
points = [(self.curr_x + i * (offset_x / num),
self.curr_y + i * (offset_y / num)) for i in range(1, num)]
points.append((self.curr_x + offset_x, self.curr_y + offset_y))
for x, y in points:
t = perf_counter()
self.curr_x = x
self.curr_y = y
self._dr.run('Input.dispatchMouseEvent', type='mouseMoved',
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
ss = .02 - perf_counter() + t
if ss > 0:
sleep(ss)
return self
def click(self, on_ele=None):
@ -121,7 +140,7 @@ class ActionChains:
:return: self
"""
if on_ele:
self.move_to(on_ele)
self.move_to(on_ele, duration=0)
self._release('left')
return self
@ -139,7 +158,7 @@ class ActionChains:
:return: self
"""
if on_ele:
self.move_to(on_ele)
self.move_to(on_ele, duration=0)
self._release('right')
return self
@ -157,7 +176,7 @@ class ActionChains:
:return: self
"""
if on_ele:
self.move_to(on_ele)
self.move_to(on_ele, duration=0)
self._release('middle')
return self
@ -169,9 +188,9 @@ class ActionChains:
:return: self
"""
if on_ele:
self.move_to(on_ele)
self._dr.Input.dispatchMouseEvent(type='mousePressed', button=button, clickCount=count,
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
self.move_to(on_ele, duration=0)
self._dr.run('Input.dispatchMouseEvent', type='mousePressed', button=button, clickCount=count,
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
return self
def _release(self, button):
@ -179,8 +198,8 @@ class ActionChains:
:param button: 要释放的按键
:return: self
"""
self._dr.Input.dispatchMouseEvent(type='mouseReleased', button=button, clickCount=1,
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
self._dr.run('Input.dispatchMouseEvent', type='mouseReleased', button=button, clickCount=1,
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
return self
def scroll(self, delta_x=0, delta_y=0, on_ele=None):
@ -191,9 +210,9 @@ class ActionChains:
:return: self
"""
if on_ele:
self.move_to(on_ele)
self._dr.Input.dispatchMouseEvent(type='mouseWheel', x=self.curr_x, y=self.curr_y,
deltaX=delta_x, deltaY=delta_y, modifiers=self.modifier)
self.move_to(on_ele, duration=0)
self._dr.run('Input.dispatchMouseEvent', type='mouseWheel', x=self.curr_x, y=self.curr_y,
deltaX=delta_x, deltaY=delta_y, modifiers=self.modifier)
return self
def up(self, pixel):
@ -225,15 +244,17 @@ class ActionChains:
return self.move(pixel, 0)
def key_down(self, key):
"""按下键盘上的按键
:param key: 按键特殊字符见Keys
"""按下键盘上的按键
:param key: 使用Keys获取的按键'DEL'形式按键名称
:return: self
"""
key = getattr(Keys, key.upper(), key)
if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量
self.modifier |= modifierBit.get(key, 0)
return self
data = self._get_key_data(key, 'keyDown')
data['_ignore'] = AlertExistsError
self.page.run_cdp('Input.dispatchKeyEvent', **data)
return self
@ -242,24 +263,39 @@ class ActionChains:
:param key: 按键特殊字符见Keys
:return: self
"""
key = getattr(Keys, key.upper(), key)
if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量
self.modifier ^= modifierBit.get(key, 0)
return self
data = self._get_key_data(key, 'keyUp')
data['_ignore'] = AlertExistsError
self.page.run_cdp('Input.dispatchKeyEvent', **data)
return self
def type(self, text):
"""输入文本
:param text: 要输入的文本特殊字符和多个文本可用list或tuple传入
def type(self, keys):
"""用模拟键盘按键方式输入文本,可输入字符串,也可输入组合键,只能输入键盘上有的字符
:param keys: 要按下的按键特殊字符和多个文本可用list或tuple传入
:return: self
"""
for i in text:
modifiers = []
for i in keys:
for character in i:
self.key_down(character)
sleep(.05)
self.key_up(character)
if character in ('\ue009', '\ue008', '\ue00a', '\ue03d'):
modifiers.append(character)
else:
self.key_up(character)
for m in modifiers:
self.key_up(m)
return self
def input(self, text):
"""输入文本也可输入组合键组合键用tuple形式输入
:param text: 文本值或按键组合
:return: self
"""
input_text_or_keys(self.page, text)
return self
def wait(self, second):

View File

@ -0,0 +1,108 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Tuple, Any, Literal
from .._base.driver import Driver
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
KEYS = Literal['NULL', 'CANCEL', 'HELP', 'BACKSPACE', 'BACK_SPACE', 'meta',
'TAB', 'CLEAR', 'RETURN', 'ENTER', 'SHIFT', 'LEFT_SHIFT', 'CONTROL', 'command ',
'CTRL', 'LEFT_CONTROL', 'ALT', 'LEFT_ALT', 'PAUSE', 'ESCAPE', 'SPACE',
'PAGE_UP', 'PAGE_DOWN', 'END', 'HOME', 'LEFT', 'ARROW_LEFT', 'UP',
'ARROW_UP', 'RIGHT', 'ARROW_RIGHT', 'DOWN', 'ARROW_DOWN', 'INSERT',
'DELETE', 'DEL', 'SEMICOLON', 'EQUALS', 'NUMPAD0', 'NUMPAD1', 'NUMPAD2',
'NUMPAD3', 'NUMPAD4', 'NUMPAD5', 'NUMPAD6', 'NUMPAD7', 'NUMPAD8', 'NUMPAD9',
'MULTIPLY', 'ADD', 'SUBTRACT', 'DECIMAL', 'DIVIDE', 'F1', 'F2',
'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'META', 'COMMAND ',
'null', 'cancel', 'help', 'backspace', 'back_space', 'tab', 'clear', 'return', 'enter',
'shift', 'left_shift', 'control', 'ctrl', 'left_control', 'alt', 'left_alt', 'pause',
'escape', 'space', 'page_up', 'page_down', 'end', 'home', 'left', 'arrow_left', 'up',
'arrow_up', 'right', 'arrow_right', 'down', 'arrow_down', 'insert', 'delete', 'del',
'semicolon', 'equals', 'numpad0', 'numpad1', 'numpad2', 'numpad3', 'numpad4', 'numpad5',
'numpad6', 'numpad7', 'numpad8', 'numpad9', 'multiply', 'add', 'subtract', 'decimal',
'divide', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
'\ue000', '\ue002', '\ue003', '\ue004', '\ue005', '\ue006', '\ue007', '\ue008', '\ue009',
'\ue009', '\ue00a', '\ue00b', '\ue00c', '\ue00d', '\ue00e', '\ue00f', '\ue010', '\ue011',
'\ue012', '\ue013', '\ue014', '\ue015', '\ue016', '\ue017', '\ue017', '\ue018', '\ue019',
'\ue01a', '\ue01b', '\ue01c', '\ue01d', '\ue01e', '\ue01f', '\ue020', '\ue021', '\ue022',
'\ue023', '\ue024', '\ue025', '\ue027', '\ue028', '\ue029', '\ue031', '\ue032', '\ue033', '\ue034',
'\ue035', '\ue036', '\ue037', '\ue038', '\ue039', '\ue03a', '\ue03b', '\ue03c', '\ue03d', '\ue03d',
'`', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '=', 'q', 'w',
'e', 'r', 't', 'y', 'u', 'i', 'o', 'p', '[', ']', '\\', 'a', 's', 'd', 'f',
'g', 'h', 'j', 'k', 'l', ';', '\'', 'z', 'x', 'c', 'v', 'b', 'n', 'm', ',',
'.', '/', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '_', '+',
'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P', '{', '}', 'A', 'S', 'D',
'F', 'G', 'H', 'J', 'K', 'L', ':', '"', 'Z', 'X', 'C', 'V', 'B', 'N', 'M', '<', '>', '?'
]
class Actions:
def __init__(self, page: ChromiumBase):
self.page: ChromiumBase = ...
self._dr: Driver = ...
self.modifier: int = ...
self.curr_x: int = ...
self.curr_y: int = ...
def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[int, int], str],
offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ...
def move(self, offset_x: int = 0, offset_y: int = 0, duration: float = .5) -> Actions: ...
def click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def db_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def _hold(self, on_ele: Union[ChromiumElement, str] = None, button: str = 'left',
count: int = 1) -> Actions: ...
def _release(self, button: str) -> Actions: ...
def scroll(self, delta_x: int = 0, delta_y: int = 0,
on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def up(self, pixel: int) -> Actions: ...
def down(self, pixel: int) -> Actions: ...
def left(self, pixel: int) -> Actions: ...
def right(self, pixel: int) -> Actions: ...
def key_down(self, key: Union[KEYS, str]) -> Actions: ...
def key_up(self, key: Union[KEYS, str]) -> Actions: ...
def type(self, keys: Union[KEYS, str, list, tuple]) -> Actions: ...
def input(self, text: Any) -> Actions: ...
def wait(self, second: float) -> Actions: ...
def _get_key_data(self, key: str, action: str) -> dict: ...
def location_to_client(page, lx: int, ly: int) -> tuple: ...

View File

@ -0,0 +1,161 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from time import perf_counter, sleep
from .._functions.settings import Settings
from .._functions.web import offset_scroll
from ..errors import CanNotClickError, CDPError, NoRectError, AlertExistsError
class Clicker(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
def __call__(self, by_js=False, timeout=1.5, wait_stop=True):
"""点击元素
如果遇到遮挡可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
return self.left(by_js, timeout, wait_stop)
def left(self, by_js=False, timeout=1.5, wait_stop=True):
"""点击元素可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
if self._ele.tag == 'option':
if self._ele.states.is_selected:
self._ele.parent('t:select').select.cancel_by_option(self._ele)
else:
self._ele.parent('t:select').select.by_option(self._ele)
return
if not by_js: # 模拟点击
can_click = False
timeout = self._ele.page.timeout if timeout is None else timeout
rect = None
if timeout == 0:
try:
self._ele.scroll.to_see()
if self._ele.states.is_enabled and self._ele.states.is_displayed:
rect = self._ele.rect.viewport_corners
can_click = True
except NoRectError:
if by_js is False:
raise
else:
rect = self._ele.states.has_rect
end_time = perf_counter() + timeout
while not rect and perf_counter() < end_time:
rect = self._ele.states.has_rect
sleep(.001)
if wait_stop and rect:
self._ele.wait.stop_moving(timeout=end_time - perf_counter())
if rect:
self._ele.scroll.to_see()
rect = self._ele.rect.corners
while perf_counter() < end_time:
if self._ele.states.is_enabled and self._ele.states.is_displayed:
can_click = True
break
sleep(.001)
elif by_js is False:
raise NoRectError
if can_click and not self._ele.states.is_in_viewport:
by_js = True
elif can_click and (by_js is False or not self._ele.states.is_covered):
x = rect[1][0] - (rect[1][0] - rect[0][0]) / 2
y = rect[0][0] + 3
try:
r = self._ele.page.run_cdp('DOM.getNodeForLocation', x=x, y=y, includeUserAgentShadowDOM=True,
ignorePointerEventsNone=True)
if r['backendNodeId'] != self._ele._backend_id:
vx, vy = self._ele.rect.viewport_midpoint
else:
vx, vy = self._ele.rect.viewport_click_point
except CDPError:
vx, vy = self._ele.rect.viewport_midpoint
self._click(vx, vy)
return True
if by_js is not False:
self._ele.run_js('this.click();')
return True
if Settings.raise_when_click_failed:
raise CanNotClickError
return False
def right(self):
"""右键单击"""
self._ele.page.scroll.to_see(self._ele)
x, y = self._ele.rect.viewport_click_point
self._click(x, y, 'right')
def middle(self):
"""中键单击"""
self._ele.page.scroll.to_see(self._ele)
x, y = self._ele.rect.viewport_click_point
self._click(x, y, 'middle')
def at(self, offset_x=None, offset_y=None, button='left', count=1):
"""带偏移量点击本元素相对于左上角坐标。不传入x或y值时点击元素中间点
:param offset_x: 相对元素左上角坐标的x轴偏移量
:param offset_y: 相对元素左上角坐标的y轴偏移量
:param button: 点击哪个键可选 left, middle, right, back, forward
:param count: 点击次数
:return: None
"""
self._ele.page.scroll.to_see(self._ele)
if offset_x is None and offset_y is None:
w, h = self._ele.rect.size
offset_x = w // 2
offset_y = h // 2
x, y = offset_scroll(self._ele, offset_x, offset_y)
self._click(x, y, button, count)
def multiple(self, times=2):
"""多次点击
:param times: 默认双击
:return: None
"""
self.at(count=times)
def _click(self, client_x, client_y, button='left', count=1):
"""实施点击
:param client_x: 视口中的x坐标
:param client_y: 视口中的y坐标
:param button: 'left' 'right' 'middle' 'back' 'forward'
:param count: 点击次数
:return: None
"""
self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mousePressed', x=client_x,
y=client_y, button=button, clickCount=count, _ignore=AlertExistsError)
# sleep(.05)
self._ele.page.run_cdp('Input.dispatchMouseEvent', type='mouseReleased', x=client_x,
y=client_y, button=button, _ignore=AlertExistsError)
# -------------即将废弃--------------
def twice(self):
"""双击元素"""
self.at(count=2)

View File

@ -0,0 +1,29 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Optional
from .._elements.chromium_element import ChromiumElement
class Clicker(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def __call__(self, by_js: Optional[bool] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ...
def left(self, by_js: Optional[bool] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ...
def right(self) -> None: ...
def middle(self) -> None: ...
def at(self, offset_x: float = None, offset_y: float = None, button: str = 'left', count: int = 1) -> None: ...
def multiple(self, times: int = 2) -> None: ...
def _click(self, client_x: float, client_y: float, button: str = 'left', count: int = 1) -> None: ...

View File

@ -0,0 +1,105 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from http.cookiejar import Cookie
from .._functions.web import set_browser_cookies, set_session_cookies
class CookiesSetter(object):
def __init__(self, page):
self._page = page
def __call__(self, cookies):
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
if (isinstance(cookies, dict) and 'name' in cookies and 'value' in cookies) or isinstance(cookies, Cookie):
cookies = [cookies]
set_browser_cookies(self._page, cookies)
def remove(self, name, url=None, domain=None, path=None):
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选
:param domain: cookie的domain字段可选
:param path: cookie的path字段可选
:return: None
"""
d = {'name': name}
if url is not None:
d['url'] = url
if domain is not None:
d['domain'] = domain
if path is not None:
d['path'] = path
self._page.run_cdp('Network.deleteCookies', **d)
def clear(self):
"""清除cookies"""
self._page.run_cdp('Network.clearBrowserCookies')
class SessionCookiesSetter(object):
def __init__(self, page):
self._page = page
def __call__(self, cookies):
"""设置多个cookie注意不要传入单个
:param cookies: cookies信息
:return: None
"""
if (isinstance(cookies, dict) and 'name' in cookies and 'value' in cookies) or isinstance(cookies, Cookie):
cookies = [cookies]
set_session_cookies(self._page.session, cookies)
def remove(self, name):
"""删除一个cookie
:param name: cookie的name字段
:return: None
"""
self._page.session.cookies.set(name, None)
def clear(self):
"""清除cookies"""
self._page.session.cookies.clear()
class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter):
def __call__(self, cookies):
"""设置多个cookie注意不要传入单个
:param cookies: cookies信息
:return: None
"""
if self._page.mode == 'd' and self._page._has_driver:
super().__call__(cookies)
elif self._page.mode == 's' and self._page._has_session:
super(CookiesSetter, self).__call__(cookies)
def remove(self, name, url=None, domain=None, path=None):
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选d模式时才有效
:param domain: cookie的domain字段可选d模式时才有效
:param path: cookie的path字段可选d模式时才有效
:return: None
"""
if self._page.mode == 'd' and self._page._has_driver:
super().remove(name, url, domain, path)
elif self._page.mode == 's' and self._page._has_session:
if url or domain or path:
raise AttributeError('url、domain、path参数只有d模式下有效。')
super(CookiesSetter, self).remove(name)
def clear(self):
"""清除cookies"""
if self._page.mode == 'd' and self._page._has_driver:
super().clear()
elif self._page.mode == 's' and self._page._has_session:
super(CookiesSetter, self).clear()

View File

@ -0,0 +1,52 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from http.cookiejar import Cookie
from typing import Union
from requests.cookies import RequestsCookieJar
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_tab import WebPageTab
from .._pages.session_page import SessionPage
from .._pages.web_page import WebPage
class CookiesSetter(object):
_page: ChromiumBase
def __init__(self, page: ChromiumBase): ...
def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ...
def clear(self) -> None: ...
class SessionCookiesSetter(object):
_page: SessionPage
def __init__(self, page: SessionPage): ...
def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def remove(self, name: str) -> None: ...
def clear(self) -> None: ...
class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter):
_page: Union[WebPage, WebPageTab]
def __init__(self, page: SessionPage): ...
def __call__(self, cookies: Union[RequestsCookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ...
def clear(self) -> None: ...

View File

@ -0,0 +1,330 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from os.path import sep
from pathlib import Path
from shutil import move
from time import sleep, perf_counter
from DataRecorder.tools import get_usable_path
class DownloadManager(object):
def __init__(self, browser):
"""
:param browser: Browser对象
"""
self._browser = browser
self._page = browser.page
self._when_download_file_exists = 'rename'
t = TabDownloadSettings(self._page.tab_id)
t.path = self._page.download_path
self._missions = {} # {guid: DownloadMission}
self._tab_missions = {} # {tab_id: DownloadMission}
self._flags = {} # {tab_id: [bool, DownloadMission]}
if self._page.download_path:
self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress)
self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin)
r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=self._page.download_path,
behavior='allowAndName', eventsEnabled=True)
if 'error' in r:
print('浏览器版本太低无法使用下载管理功能。')
self._running = True
else:
self._running = False
@property
def missions(self):
"""返回所有未完成的下载任务"""
return self._missions
def set_path(self, tab_id, path):
"""设置某个tab的下载路径
:param tab_id: tab id
:param path: 下载路径绝对路径str
:return: None
"""
TabDownloadSettings(tab_id).path = path
if tab_id == self._page.tab_id or not self._running:
self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress)
self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin)
r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=path,
behavior='allowAndName', eventsEnabled=True)
if 'error' in r:
print('浏览器版本太低无法使用下载管理功能。')
self._running = True
def set_rename(self, tab_id, rename=None, suffix=None):
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param rename: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
ts = TabDownloadSettings(tab_id)
ts.rename = rename
ts.suffix = suffix
def set_file_exists(self, tab_id, mode):
"""设置某个tab下载文件重名时执行的策略
:param tab_id: tab id
:param mode: 下载路径
:return: None
"""
TabDownloadSettings(tab_id).when_file_exists = mode
def set_flag(self, tab_id, flag):
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param flag: 等待标志
:return: None
"""
self._flags[tab_id] = flag
def get_flag(self, tab_id):
"""获取tab下载等待标记
:param tab_id: tab id
:return: 任务对象或False
"""
return self._flags.get(tab_id, None)
def get_tab_missions(self, tab_id):
"""获取某个tab正在下载的任务
:param tab_id:
:return: 下载任务组成的列表
"""
return self._tab_missions.get(tab_id, [])
def set_done(self, mission, state, final_path=None):
"""设置任务结束
:param mission: 任务对象
:param state: 任务状态
:param final_path: 最终路径
:return: None
"""
if mission.state not in ('canceled', 'skipped'):
mission.state = state
mission.final_path = final_path
if mission.tab_id in self._tab_missions and mission.id in self._tab_missions[mission.tab_id]:
self._tab_missions[mission.tab_id].remove(mission.id)
self._missions.pop(mission.id, None)
mission._is_done = True
def cancel(self, mission):
"""取消任务
:param mission: 任务对象
:return: None
"""
mission.state = 'canceled'
self._browser.run_cdp('Browser.cancelDownload', guid=mission.id)
if mission.final_path:
Path(mission.final_path).unlink(True)
def skip(self, mission):
"""跳过任务
:param mission: 任务对象
:return: None
"""
mission.state = 'skipped'
self._browser.run_cdp('Browser.cancelDownload', guid=mission.id)
def clear_tab_info(self, tab_id):
"""当tab关闭时清除有关信息
:param tab_id: 标签页id
:return: None
"""
self._tab_missions.pop(tab_id, None)
self._flags.pop(tab_id, None)
TabDownloadSettings.TABS.pop(tab_id, None)
def _onDownloadWillBegin(self, **kwargs):
"""用于获取弹出新标签页触发的下载任务"""
guid = kwargs['guid']
tab_id = self._browser._frames.get(kwargs['frameId'], self._page.tab_id)
settings = TabDownloadSettings(tab_id if tab_id in TabDownloadSettings.TABS else self._page.tab_id)
if settings.rename:
if settings.suffix is not None:
name = f'{settings.rename}.{settings.suffix}' if settings.suffix else settings.rename
else:
tmp = kwargs['suggestedFilename'].rsplit('.', 1)
ext_name = tmp[-1] if len(tmp) > 1 else ''
tmp = settings.rename.rsplit('.', 1)
ext_rename = tmp[-1] if len(tmp) > 1 else ''
name = settings.rename if ext_rename == ext_name else f'{settings.rename}.{ext_name}'
settings.rename = None
settings.suffix = None
elif settings.suffix is not None:
name = kwargs["suggestedFilename"].rsplit(".", 1)[0]
if settings.suffix:
name = f'{name}.{settings.suffix}'
settings.suffix = None
else:
name = kwargs['suggestedFilename']
skip = False
goal_path = Path(settings.path) / name
if goal_path.exists():
if settings.when_file_exists == 'skip':
skip = True
elif settings.when_file_exists == 'overwrite':
goal_path.unlink()
m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._page.download_path)
self._missions[guid] = m
if self.get_flag(tab_id) is False: # 取消该任务
self.cancel(m)
elif skip:
self.skip(m)
else:
self._tab_missions.setdefault(tab_id, []).append(guid)
if self.get_flag(tab_id) is not None:
self._flags[tab_id] = m
def _onDownloadProgress(self, **kwargs):
"""下载状态变化时执行"""
if kwargs['guid'] in self._missions:
mission = self._missions[kwargs['guid']]
if kwargs['state'] == 'inProgress':
mission.received_bytes = kwargs['receivedBytes']
mission.total_bytes = kwargs['totalBytes']
elif kwargs['state'] == 'completed':
if mission.state == 'skipped':
Path(f'{mission.save_path}{sep}{mission.id}').unlink(True)
self.set_done(mission, 'skipped')
return
mission.received_bytes = kwargs['receivedBytes']
mission.total_bytes = kwargs['totalBytes']
form_path = f'{mission.path}{sep}{mission.id}'
to_path = str(get_usable_path(f'{mission.path}{sep}{mission.name}'))
move(form_path, to_path)
self.set_done(mission, 'completed', final_path=to_path)
else: # 'canceled'
self.set_done(mission, 'canceled')
class TabDownloadSettings(object):
TABS = {}
def __new__(cls, tab_id):
"""
:param tab_id: tab id
"""
if tab_id in cls.TABS:
return cls.TABS[tab_id]
return object.__new__(cls)
def __init__(self, tab_id):
"""
:param tab_id: tab id
"""
if hasattr(self, '_created'):
return
self._created = True
self.tab_id = tab_id
self.rename = None
self.suffix = None
self.path = ''
self.when_file_exists = 'rename'
TabDownloadSettings.TABS[tab_id] = self
class DownloadMission(object):
def __init__(self, mgr, tab_id, _id, path, name, url, save_path):
"""
:param mgr: BrowserDownloadManager对象
:param tab_id: 标签页id
:param _id: 任务id
:param path: 保存路径
:param name: 文件名
:param url: url
:param save_path: 下载路径
"""
self._mgr = mgr
self.url = url
self.tab_id = tab_id
self.id = _id
self.path = path
self.name = name
self.state = 'running'
self.total_bytes = None
self.received_bytes = 0
self.final_path = None
self.save_path = save_path
self._is_done = False
def __repr__(self):
return f'<DownloadMission {id(self)} {self.rate}>'
@property
def rate(self):
"""以百分比形式返回下载进度"""
return round((self.received_bytes / self.total_bytes) * 100, 2) if self.total_bytes else None
@property
def is_done(self):
"""返回任务是否在运行中"""
return self._is_done
def cancel(self):
"""取消该任务,如任务已完成,删除已下载的文件"""
self._mgr.cancel(self)
def wait(self, show=True, timeout=None, cancel_if_timeout=True):
"""等待任务结束
:param show: 是否显示下载信息
:param timeout: 超时时间为None则无限等待
:param cancel_if_timeout: 超时时是否取消任务
:return: 等待成功返回完整路径否则返回False
"""
if show:
print(f'url{self.url}')
end_time = perf_counter()
while self.name is None and perf_counter() < end_time:
sleep(0.01)
print(f'文件名:{self.name}')
print(f'目标路径:{self.path}')
if timeout is None:
while not self.is_done:
if show:
print(f'\r{self.rate}% ', end='')
sleep(.2)
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if show:
print(f'\r{self.rate}% ', end='')
sleep(.2)
if not self.is_done and cancel_if_timeout:
self.cancel()
if show:
if self.state == 'completed':
print(f'下载完成 {self.final_path}')
elif self.state == 'canceled':
print(f'下载取消')
elif self.state == 'skipped':
print(f'已跳过')
print()
return self.final_path if self.final_path else False

View File

@ -0,0 +1,89 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Dict, Optional, Union, Literal
from .._base.browser import Browser
from .._pages.chromium_page import ChromiumPage
class DownloadManager(object):
_browser: Browser = ...
_page: ChromiumPage = ...
_missions: Dict[str, DownloadMission] = ...
_tab_missions: dict = ...
_flags: dict = ...
_running: bool = ...
def __init__(self, browser: Browser): ...
@property
def missions(self) -> Dict[str, DownloadMission]: ...
def set_path(self, tab_id: str, path: str) -> None: ...
def set_rename(self, tab_id: str, rename: str = None, suffix: str = None) -> None: ...
def set_file_exists(self, tab_id: str, mode: Literal['rename', 'skip', 'overwrite']) -> None: ...
def set_flag(self, tab_id: str, flag: Optional[bool, DownloadMission]) -> None: ...
def get_flag(self, tab_id: str) -> Optional[bool, DownloadMission]: ...
def get_tab_missions(self, tab_id: str) -> list: ...
def set_done(self, mission: DownloadMission, state: str, final_path: str = None) -> None: ...
def cancel(self, mission: DownloadMission) -> None: ...
def skip(self, mission: DownloadMission) -> None: ...
def clear_tab_info(self, tab_id: str) -> None: ...
def _onDownloadWillBegin(self, **kwargs) -> None: ...
def _onDownloadProgress(self, **kwargs) -> None: ...
class TabDownloadSettings(object):
TABS: dict = ...
tab_id: str = ...
waiting_flag: Optional[bool, dict] = ...
rename: Optional[str] = ...
suffix: Optional[str] = ...
path: Optional[str] = ...
when_file_exists: str = ...
def __init__(self, tab_id: str): ...
class DownloadMission(object):
tab_id: str = ...
_mgr: DownloadManager = ...
url: str = ...
id: str = ...
path: str = ...
name: str = ...
state: str = ...
total_bytes: Optional[int] = ...
received_bytes: int = ...
final_path: Optional[str] = ...
save_path: str = ...
_is_done: bool = ...
def __init__(self, mgr: DownloadManager, tab_id: str, _id: str, path: str, name: str, url: str,
save_path: str): ...
@property
def rate(self) -> float: ...
@property
def is_done(self) -> bool: ...
def cancel(self) -> None: ...
def wait(self, show: bool = True, timeout=None, cancel_if_timeout=True) -> Union[bool, str]: ...

View File

@ -0,0 +1,595 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from base64 import b64decode
from json import JSONDecodeError, loads
from queue import Queue
from re import search
from time import perf_counter, sleep
from requests.structures import CaseInsensitiveDict
from .._base.driver import Driver
from .._functions.settings import Settings
from ..errors import WaitTimeoutError
class Listener(object):
"""监听器基类"""
def __init__(self, page):
"""
:param page: ChromiumBase对象
"""
self._page = page
self._address = page.address
self._target_id = page._target_id
self._driver = None
self._running_requests = 0
self._caught = None
self._request_ids = None
self._extra_info_ids = None
self.listening = False
self.tab_id = None
self._targets = True
self._is_regex = False
self._method = ('GET', 'POST')
self._res_type = True
@property
def targets(self):
"""返回监听目标"""
return self._targets
def set_targets(self, targets=True, is_regex=False, method=('GET', 'POST'), res_type=True):
"""指定要等待的数据包
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式
:param method: 设置监听的请求类型可指定多个为True时监听全部
:param res_type: 设置监听的资源类型可指定多个为True时监听全部可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
if targets is not None:
if not isinstance(targets, (str, list, tuple, set)) and targets is not True:
raise TypeError('targets只能是str、list、tuple、set、True。')
if targets is True:
self._targets = True
else:
self._targets = {targets} if isinstance(targets, str) else set(targets)
if is_regex is not None:
self._is_regex = is_regex
if method is not None:
if isinstance(method, str):
self._method = {method.upper()}
elif isinstance(method, (list, tuple, set)):
self._method = set(i.upper() for i in method)
elif method is True:
self._method = True
else:
raise TypeError('method参数只能是str、list、tuple、set、True类型。')
if res_type is not None:
if isinstance(res_type, str):
self._res_type = {res_type.upper()}
elif isinstance(res_type, (list, tuple, set)):
self._res_type = set(i.upper() for i in res_type)
elif res_type is True:
self._res_type = True
else:
raise TypeError('res_type参数只能是str、list、tuple、set、True类型。')
def start(self, targets=None, is_regex=None, method=None, res_type=None):
"""拦截目标请求,每次拦截前清空结果
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式为None时保持原来设置
:param method: 设置监听的请求类型可指定多个默认('GET', 'POST')为True时监听全部为None时保持原来设置
:param res_type: 设置监听的资源类型可指定多个默认为True时监听全部为None时保持原来设置可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
if targets or is_regex is not None or method or res_type:
self.set_targets(targets, is_regex, method, res_type)
self.clear()
if self.listening:
return
self._driver = Driver(self._target_id, 'page', self._address)
self._driver.run('Network.enable')
self._set_callback()
self.listening = True
def wait(self, count=1, timeout=None, fit_count=True, raise_err=None):
"""等待符合要求的数据包到达指定数量
:param count: 需要捕捉的数据包数量
:param timeout: 超时时间为None无限等待
:param fit_count: 是否必须满足总数要求发生超时为True返回False为False返回已捕捉到的数据包
:param raise_err: 超时时是否抛出错误为None时根据Settings设置
:return: count为1时返回数据包对象大于1时返回列表超时且fit_count为True时返回False
"""
if not self.listening:
raise RuntimeError('监听未启动或已暂停。')
if not timeout:
while self._caught.qsize() < count:
sleep(.05)
fail = False
else:
end = perf_counter() + timeout
while True:
if perf_counter() > end:
fail = True
break
if self._caught.qsize() >= count:
fail = False
break
if fail:
if fit_count or not self._caught.qsize():
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待数据包失败(等待{timeout}秒)。')
else:
return False
else:
return [self._caught.get_nowait() for _ in range(self._caught.qsize())]
if count == 1:
return self._caught.get_nowait()
return [self._caught.get_nowait() for _ in range(count)]
def steps(self, count=None, timeout=None, gap=1):
"""用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页)
:param count: 需捕获的数据包总数为None表示无限
:param timeout: 每个数据包等待时间为None表示无限
:param gap: 每接收到多少个数据包返回一次数据
:return: 用于在接收到监听目标时触发动作的可迭代对象
"""
caught = 0
end = perf_counter() + timeout if timeout else None
while True:
if timeout and perf_counter() > end:
return
if self._caught.qsize() >= gap:
yield self._caught.get_nowait() if gap == 1 else [self._caught.get_nowait() for _ in range(gap)]
if timeout:
end = perf_counter() + timeout
if count:
caught += gap
if caught >= count:
return
sleep(.05)
def stop(self):
"""停止监听,清空已监听到的列表"""
if self.listening:
self.pause()
self.clear()
self._driver.stop()
self._driver = None
def pause(self, clear=True):
"""暂停监听
:param clear: 是否清空已获取队列
:return: None
"""
if self.listening:
self._driver.set_callback('Network.requestWillBeSent', None)
self._driver.set_callback('Network.responseReceived', None)
self._driver.set_callback('Network.loadingFinished', None)
self._driver.set_callback('Network.loadingFailed', None)
self.listening = False
if clear:
self.clear()
def resume(self):
"""继续暂停的监听"""
if self.listening:
return
self._set_callback()
self.listening = True
def clear(self):
"""清空结果"""
self._request_ids = {}
self._extra_info_ids = {}
self._caught = Queue(maxsize=0)
self._running_requests = 0
def wait_silent(self, timeout=None):
"""等待所有请求结束
:param timeout: 超时为None时无限等待
:return: 返回是否等待成功
"""
if not self.listening:
raise RuntimeError('监听未启动用listen.start()启动。')
if timeout is None:
while self._running_requests > 0:
sleep(.1)
return True
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._running_requests <= 0:
return True
sleep(.1)
else:
return False
def _to_target(self, target_id, address, page):
"""切换监听的页面对象
:param target_id: 新页面对象_target_id
:param address: 新页面对象address
:param page: 新页面对象
:return: None
"""
self._target_id = target_id
self._address = address
self._page = page
debug = False
if self._driver:
debug = self._driver._debug
self._driver.stop()
if self.listening:
self._driver = Driver(self._target_id, 'page', self._address)
self._driver._debug = debug
self._driver.run('Network.enable')
self._set_callback()
def _set_callback(self):
"""设置监听请求的回调函数"""
self._driver.set_callback('Network.requestWillBeSent', self._requestWillBeSent)
self._driver.set_callback('Network.requestWillBeSentExtraInfo', self._requestWillBeSentExtraInfo)
self._driver.set_callback('Network.responseReceived', self._response_received)
self._driver.set_callback('Network.responseReceivedExtraInfo', self._responseReceivedExtraInfo)
self._driver.set_callback('Network.loadingFinished', self._loading_finished)
self._driver.set_callback('Network.loadingFailed', self._loading_failed)
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
self._running_requests += 1
p = None
if self._targets is True:
if ((self._method is True or kwargs['request']['method'] in self._method)
and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)):
rid = kwargs['requestId']
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, True))
p._raw_request = kwargs
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
p._raw_post_data = self._driver.run('Network.getRequestPostData',
requestId=rid).get('postData', None)
else:
rid = kwargs['requestId']
for target in self._targets:
if (((self._is_regex and search(target, kwargs['request']['url']))
or (not self._is_regex and target in kwargs['request']['url']))
and (self._method is True or kwargs['request']['method'] in self._method)
and (self._res_type is True or kwargs.get('type', '').upper() in self._res_type)):
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, target))
p._raw_request = kwargs
break
self._extra_info_ids.setdefault(kwargs['requestId'], {})['obj'] = p if p else False
def _requestWillBeSentExtraInfo(self, **kwargs):
"""接收到请求额外信息时的回调函数"""
self._running_requests += 1
self._extra_info_ids.setdefault(kwargs['requestId'], {})['request'] = kwargs
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
request = self._request_ids.get(kwargs['requestId'], None)
if request:
request._raw_response = kwargs['response']
request._resource_type = kwargs['type']
def _responseReceivedExtraInfo(self, **kwargs):
"""接收到返回额外信息时的回调函数"""
self._running_requests -= 1
r = self._extra_info_ids.get(kwargs['requestId'], None)
if r:
obj = r.get('obj', None)
if obj is False:
self._extra_info_ids.pop(kwargs['requestId'], None)
elif isinstance(obj, DataPacket):
obj._requestExtraInfo = r.get('request', None)
obj._responseExtraInfo = kwargs
self._extra_info_ids.pop(kwargs['requestId'], None)
else:
r['response'] = kwargs
def _loading_finished(self, **kwargs):
"""请求完成时处理方法"""
self._running_requests -= 1
rid = kwargs['requestId']
packet = self._request_ids.get(rid)
if packet:
r = self._driver.run('Network.getResponseBody', requestId=rid)
if 'body' in r:
packet._raw_body = r['body']
packet._base64_body = r['base64Encoded']
else:
packet._raw_body = ''
packet._base64_body = False
if (packet._raw_request['request'].get('hasPostData', None)
and not packet._raw_request['request'].get('postData', None)):
r = self._driver.run('Network.getRequestPostData', requestId=rid, _timeout=1)
packet._raw_post_data = r.get('postData', None)
r = self._extra_info_ids.get(kwargs['requestId'], None)
if r:
obj = r.get('obj', None)
if obj is False or (isinstance(obj, DataPacket) and not self._extra_info_ids.get('request')):
self._extra_info_ids.pop(kwargs['requestId'], None)
elif isinstance(obj, DataPacket) and self._extra_info_ids.get('response'):
response = r.get('response')
obj._requestExtraInfo = r['request']
obj._responseExtraInfo = response
self._extra_info_ids.pop(kwargs['requestId'], None)
self._request_ids.pop(rid, None)
if packet:
self._caught.put(packet)
def _loading_failed(self, **kwargs):
"""请求失败时的回调方法"""
self._running_requests -= 1
r_id = kwargs['requestId']
dp = self._request_ids.get(r_id, None)
if dp:
dp._raw_fail_info = kwargs
dp._resource_type = kwargs['type']
dp.is_failed = True
r = self._extra_info_ids.get(kwargs['requestId'], None)
if r:
obj = r.get('obj', None)
if obj is False and r.get('response'):
self._extra_info_ids.pop(kwargs['requestId'], None)
elif isinstance(obj, DataPacket):
response = r.get('response')
if response:
obj._requestExtraInfo = r['request']
obj._responseExtraInfo = response
self._extra_info_ids.pop(kwargs['requestId'], None)
self._request_ids.pop(r_id, None)
if dp:
self._caught.put(dp)
class FrameListener(Listener):
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
if not self._page._is_diff_domain and kwargs.get('frameId', None) != self._page._frame_id:
return
super()._requestWillBeSent(**kwargs)
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
if not self._page._is_diff_domain and kwargs.get('frameId', None) != self._page._frame_id:
return
super()._response_received(**kwargs)
class DataPacket(object):
"""返回的数据包管理类"""
def __init__(self, tab_id, target):
"""
:param tab_id: 产生这个数据包的tab的id
:param target: 监听目标
"""
self.tab_id = tab_id
self.target = target
self.is_failed = False
self._raw_request = None
self._raw_post_data = None
self._raw_response = None
self._raw_body = None
self._raw_fail_info = None
self._request = None
self._response = None
self._fail_info = None
self._base64_body = False
self._requestExtraInfo = None
self._responseExtraInfo = None
self._resource_type = None
def __repr__(self):
t = f'"{self.target}"' if self.target is not True else True
return f'<DataPacket target={t} url="{self.url}">'
@property
def _request_extra_info(self):
return self._requestExtraInfo
@property
def _response_extra_info(self):
return self._responseExtraInfo
@property
def url(self):
return self.request.url
@property
def method(self):
return self.request.method
@property
def frameId(self):
return self._raw_request.get('frameId')
@property
def resourceType(self):
return self._resource_type
@property
def request(self):
if self._request is None:
self._request = Request(self, self._raw_request['request'], self._raw_post_data)
return self._request
@property
def response(self):
if self._response is None:
self._response = Response(self, self._raw_response, self._raw_body, self._base64_body)
return self._response
@property
def fail_info(self):
if self._fail_info is None:
self._fail_info = FailInfo(self, self._raw_fail_info)
return self._fail_info
def wait_extra_info(self, timeout=None):
"""等待额外的信息加载完成
:param timeout: 超时时间None为无限等待
:return: 是否等待成功
"""
if timeout is None:
while self._responseExtraInfo is None:
sleep(.1)
return True
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._responseExtraInfo is not None:
return True
sleep(.1)
else:
return False
class Request(object):
def __init__(self, data_packet, raw_request, post_data):
self._data_packet = data_packet
self._request = raw_request
self._raw_post_data = post_data
self._postData = None
self._headers = None
def __getattr__(self, item):
return self._request.get(item, None)
@property
def headers(self):
"""以大小写不敏感字典返回headers数据"""
if self._headers is None:
self._headers = CaseInsensitiveDict(self._request['headers'])
return self._headers
@property
def postData(self):
"""返回postData数据"""
if self._postData is None:
if self._raw_post_data:
postData = self._raw_post_data
elif self._request.get('postData', None):
postData = self._request['postData']
else:
postData = False
try:
self._postData = loads(postData)
except (JSONDecodeError, TypeError):
self._postData = postData
return self._postData
@property
def extra_info(self):
return RequestExtraInfo(self._data_packet._request_extra_info or {})
class Response(object):
def __init__(self, data_packet, raw_response, raw_body, base64_body):
self._data_packet = data_packet
self._response = raw_response
self._raw_body = raw_body
self._is_base64_body = base64_body
self._body = None
self._headers = None
def __getattr__(self, item):
return self._response.get(item, None) if self._response else None
@property
def headers(self):
"""以大小写不敏感字典返回headers数据"""
if self._headers is None:
self._headers = CaseInsensitiveDict(self._response['headers'])
return self._headers
@property
def raw_body(self):
"""返回未被处理的body文本"""
return self._raw_body
@property
def body(self):
"""返回body内容如果是json格式自动进行转换如果时图片格式进行base64转换其它格式直接返回文本"""
if self._body is None:
if self._is_base64_body:
self._body = b64decode(self._raw_body)
else:
try:
self._body = loads(self._raw_body)
except (JSONDecodeError, TypeError):
self._body = self._raw_body
return self._body
@property
def extra_info(self):
return ResponseExtraInfo(self._data_packet._response_extra_info or {})
class ExtraInfo(object):
def __init__(self, extra_info):
self._extra_info = extra_info
@property
def all_info(self):
"""以dict形式返回所有额外信息"""
return self._extra_info
def __getattr__(self, item):
return self._extra_info.get(item, None)
class RequestExtraInfo(ExtraInfo):
pass
class ResponseExtraInfo(ExtraInfo):
pass
class FailInfo(object):
def __init__(self, data_packet, fail_info):
self._data_packet = data_packet
self._fail_info = fail_info
def __getattr__(self, item):
return self._fail_info.get(item, None) if self._fail_info else None

View File

@ -0,0 +1,263 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from queue import Queue
from typing import Union, Dict, List, Iterable, Optional, Literal
from requests.structures import CaseInsensitiveDict
from .._base.driver import Driver
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
__RES_TYPE__ = Literal['Document', 'Stylesheet', 'Image', 'Media', 'Font', 'Script', 'TextTrack', 'XHR', 'Fetch',
'Prefetch', 'EventSource', 'WebSocket', 'Manifest', 'SignedExchange', 'Ping', 'CSPViolationReport', 'Preflight', 'Other']
class Listener(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
self._address: str = ...
self._target_id: str = ...
self._targets: Union[str, dict] = ...
self._method: set = ...
self._res_type: set = ...
self._caught: Queue = ...
self._is_regex: bool = ...
self._driver: Driver = ...
self._request_ids: dict = ...
self._extra_info_ids: dict = ...
self.listening: bool = ...
self._running_requests: int = ...
@property
def targets(self) -> Optional[set]: ...
def set_targets(self,
targets: Optional[str, list, tuple, set, bool] = True,
is_regex: Optional[bool] = False,
method: Optional[str, list, tuple, set, bool] = ('GET', 'POST'),
res_type: Optional[__RES_TYPE__, list, tuple, set, bool] = True) -> None: ...
def start(self,
targets: Optional[str, list, tuple, set, bool] = None,
is_regex: Optional[bool] = None,
method: Optional[str, list, tuple, set, bool] = None,
res_type: Optional[__RES_TYPE__, list, tuple, set, bool] = None) -> None: ...
def stop(self) -> None: ...
def pause(self, clear: bool = True) -> None: ...
def resume(self) -> None: ...
def wait(self,
count: int = 1,
timeout: float = None,
fit_count: bool = True,
raise_err: bool = None) -> Union[List[DataPacket], DataPacket, None]: ...
@property
def results(self) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ...
def clear(self) -> None: ...
def wait_silent(self, timeout=None) -> bool: ...
def _to_target(self, target_id: str, address: str, page: ChromiumBase) -> None: ...
def _requestWillBeSent(self, **kwargs) -> None: ...
def _requestWillBeSentExtraInfo(self, **kwargs) -> None: ...
def _response_received(self, **kwargs) -> None: ...
def _responseReceivedExtraInfo(self, **kwargs) -> None: ...
def _loading_finished(self, **kwargs) -> None: ...
def _loading_failed(self, **kwargs) -> None: ...
def steps(self,
count: int = None,
timeout: float = None,
gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]: ...
def _set_callback(self) -> None: ...
class FrameListener(Listener):
def __init__(self, page: ChromiumFrame):
self._page: ChromiumFrame = ...
self._is_diff: bool = ...
class DataPacket(object):
"""返回的数据包管理类"""
def __init__(self, tab_id: str, target: [str, bool]):
self.tab_id: str = ...
self.target: str = ...
self.is_failed: bool = ...
self._raw_request: Optional[dict] = ...
self._raw_response: Optional[dict] = ...
self._raw_post_data: str = ...
self._raw_body: str = ...
self._raw_fail_info: Optional[dict] = ...
self._base64_body: bool = ...
self._request: Request = ...
self._response: Response = ...
self._fail_info: Optional[FailInfo] = ...
self._resource_type: str = ...
self._requestExtraInfo: Optional[dict] = ...
self._responseExtraInfo: Optional[dict] = ...
@property
def _request_extra_info(self) -> Optional[dict]: ...
@property
def _response_extra_info(self) -> Optional[dict]: ...
@property
def url(self) -> str: ...
@property
def method(self) -> str: ...
@property
def frameId(self) -> str: ...
@property
def resourceType(self) -> str: ...
@property
def request(self) -> Request: ...
@property
def response(self) -> Response: ...
@property
def fail_info(self) -> Optional[FailInfo]: ...
def wait_extra_info(self, timeout: float = None) -> bool: ...
class Request(object):
url: str = ...
_headers: Union[CaseInsensitiveDict, None] = ...
method: str = ...
urlFragment = ...
hasPostData = ...
postDataEntries = ...
mixedContentType = ...
initialPriority = ...
referrerPolicy = ...
isLinkPreload = ...
trustTokenParams = ...
isSameSite = ...
def __init__(self, data_packet: DataPacket, raw_request: dict, post_data: str):
self._data_packet: DataPacket = ...
self._request: dict = ...
self._raw_post_data: str = ...
self._postData: str = ...
@property
def headers(self) -> dict: ...
@property
def postData(self) -> Union[str, dict]: ...
@property
def extra_info(self) -> Optional[RequestExtraInfo]: ...
class Response(object):
url = ...
status = ...
statusText = ...
headersText = ...
mimeType = ...
requestHeaders = ...
requestHeadersText = ...
connectionReused = ...
connectionId = ...
remoteIPAddress = ...
remotePort = ...
fromDiskCache = ...
fromServiceWorker = ...
fromPrefetchCache = ...
encodedDataLength = ...
timing = ...
serviceWorkerResponseSource = ...
responseTime = ...
cacheStorageCacheName = ...
protocol = ...
alternateProtocolUsage = ...
securityState = ...
securityDetails = ...
def __init__(self, data_packet: DataPacket, raw_response: dict, raw_body: str, base64_body: bool):
self._data_packet: DataPacket = ...
self._response: dict = ...
self._raw_body: str = ...
self._is_base64_body: bool = ...
self._body: Union[str, dict] = ...
self._headers: dict = ...
@property
def extra_info(self) -> Optional[ResponseExtraInfo]: ...
@property
def headers(self) -> CaseInsensitiveDict: ...
@property
def raw_body(self) -> str: ...
@property
def body(self) -> Union[str, dict]: ...
class ExtraInfo(object):
def __init__(self, extra_info: dict):
self._extra_info: dict = ...
@property
def all_info(self) -> dict: ...
class RequestExtraInfo(ExtraInfo):
requestId: str = ...
associatedCookies: List[dict] = ...
headers: dict = ...
connectTiming: dict = ...
clientSecurityState: dict = ...
siteHasCookieInOtherPartition: bool = ...
class ResponseExtraInfo(ExtraInfo):
requestId: str = ...
blockedCookies: List[dict] = ...
headers: dict = ...
resourceIPAddressSpace: str = ...
statusCode: int = ...
headersText: str = ...
cookiePartitionKey: str = ...
cookiePartitionKeyOpaque: bool = ...
class FailInfo(object):
_data_packet: DataPacket
_fail_info: dict
_fail_info: float
errorText: str
canceled: bool
blockedReason: Optional[str]
corsErrorStatus: Optional[str]
def __init__(self, data_packet: DataPacket, fail_info: dict): ...

227
DrissionPage/_units/rect.py Normal file
View File

@ -0,0 +1,227 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
class ElementRect(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def corners(self):
"""返回元素四个角坐标顺序坐上、右上、右下、左下没有大小的元素抛出NoRectError"""
vr = self._get_viewport_rect('border')
r = self._ele.page.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
return [(vr[0] + sx, vr[1] + sy), (vr[2] + sx, vr[3] + sy), (vr[4] + sx, vr[5] + sy), (vr[6] + sx, vr[7] + sy)]
@property
def viewport_corners(self):
"""返回元素四个角视口坐标顺序坐上、右上、右下、左下没有大小的元素抛出NoRectError"""
r = self._get_viewport_rect('border')
return (r[0], r[1]), (r[2], r[3]), (r[4], r[5]), (r[6], r[7])
@property
def size(self):
"""返回元素大小,格式(宽, 高)"""
border = self._ele.page.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id,
nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model']['border']
return border[2] - border[0], border[5] - border[1]
@property
def location(self):
"""返回元素左上角的绝对坐标"""
cl = self.viewport_location
return self._get_page_coord(cl[0], cl[1])
@property
def midpoint(self):
"""返回元素中间点的绝对坐标"""
cl = self.viewport_midpoint
return self._get_page_coord(cl[0], cl[1])
@property
def click_point(self):
"""返回元素接受点击的点的绝对坐标"""
cl = self.viewport_click_point
return self._get_page_coord(cl[0], cl[1])
@property
def viewport_location(self):
"""返回元素左上角在视口中的坐标"""
m = self._get_viewport_rect('border')
return m[0], m[1]
@property
def viewport_midpoint(self):
"""返回元素中间点在视口中的坐标"""
m = self._get_viewport_rect('border')
return m[0] + (m[2] - m[0]) // 2, m[3] + (m[5] - m[3]) // 2
@property
def viewport_click_point(self):
"""返回元素接受点击的点视口坐标"""
m = self._get_viewport_rect('padding')
return self.viewport_midpoint[0], m[1] + 3
@property
def screen_location(self):
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.page.rect.viewport_location
ex, ey = self.viewport_location
pr = self._ele.page.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
@property
def screen_midpoint(self):
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.page.rect.viewport_location
ex, ey = self.viewport_midpoint
pr = self._ele.page.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
@property
def screen_click_point(self):
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.page.rect.viewport_location
ex, ey = self.viewport_click_point
pr = self._ele.page.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
def _get_viewport_rect(self, quad):
"""按照类型返回在可视窗口中的范围
:param quad: 方框类型margin border padding
:return: 四个角坐标
"""
return self._ele.page.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id,
nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model'][quad]
def _get_page_coord(self, x, y):
"""根据视口坐标获取绝对坐标"""
r = self._ele.page.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
return x + sx, y + sy
class TabRect(object):
def __init__(self, page):
self._page = page
@property
def window_state(self):
"""返回窗口状态normal、fullscreen、maximized、 minimized"""
return self._get_window_rect()['windowState']
@property
def window_location(self):
"""返回窗口在屏幕上的坐标,左上角为(0, 0)"""
r = self._get_window_rect()
if r['windowState'] in ('maximized', 'fullscreen'):
return 0, 0
return r['left'] + 7, r['top']
@property
def window_size(self):
"""返回窗口大小"""
r = self._get_window_rect()
if r['windowState'] == 'fullscreen':
return r['width'], r['height']
elif r['windowState'] == 'maximized':
return r['width'] - 16, r['height'] - 16
else:
return r['width'] - 16, r['height'] - 7
@property
def page_location(self):
"""返回页面左上角在屏幕中坐标,左上角为(0, 0)"""
w, h = self.viewport_location
r = self._get_page_rect()['layoutViewport']
return w - r['pageX'], h - r['pageY']
@property
def viewport_location(self):
"""返回视口在屏幕中坐标,左上角为(0, 0)"""
w_bl, h_bl = self.window_location
w_bs, h_bs = self.window_size
w_vs, h_vs = self.viewport_size_with_scrollbar
return w_bl + w_bs - w_vs, h_bl + h_bs - h_vs
@property
def size(self):
"""返回页面总宽高,格式:(宽, 高)"""
r = self._get_page_rect()['contentSize']
return r['width'], r['height']
@property
def viewport_size(self):
"""返回视口宽高,不包括滚动条,格式:(宽, 高)"""
r = self._get_page_rect()['visualViewport']
return r['clientWidth'], r['clientHeight']
@property
def viewport_size_with_scrollbar(self):
"""返回视口宽高,包括滚动条,格式:(宽, 高)"""
r = self._page.run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();')
w, h = r.split(' ')
return int(w), int(h)
def _get_page_rect(self):
"""获取页面范围信息"""
return self._page.run_cdp_loaded('Page.getLayoutMetrics')
def _get_window_rect(self):
"""获取窗口范围信息"""
return self._page.browser.get_window_bounds(self._page.tab_id)
class FrameRect(object):
"""异域iframe使用"""
def __init__(self, frame):
self._frame = frame
@property
def location(self):
"""返回iframe元素左上角的绝对坐标"""
return self._frame.frame_ele.rect.location
@property
def viewport_location(self):
"""返回元素在视口中坐标,左上角为(0, 0)"""
return self._frame.frame_ele.rect.viewport_location
@property
def screen_location(self):
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
return self._frame.frame_ele.rect.screen_location
@property
def size(self):
"""返回frame内页面尺寸格式(宽, 高)"""
w = self._frame.doc_ele.run_js('return this.body.scrollWidth')
h = self._frame.doc_ele.run_js('return this.body.scrollHeight')
return w, h
@property
def viewport_size(self):
"""返回视口宽高,格式:(宽, 高)"""
return self._frame.frame_ele.rect.size
@property
def corners(self):
"""返回元素四个角坐标,顺序:坐上、右上、右下、左下"""
return self._frame.frame_ele.rect.corners
@property
def viewport_corners(self):
"""返回元素四个角视口坐标,顺序:坐上、右上、右下、左下"""
return self._frame.frame_ele.rect.viewport_corners

View File

@ -0,0 +1,120 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Tuple, Union, List
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab, WebPageTab
from .._pages.web_page import WebPage
class ElementRect(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def size(self) -> Tuple[float, float]: ...
@property
def location(self) -> Tuple[float, float]: ...
@property
def midpoint(self) -> Tuple[float, float]: ...
@property
def click_point(self) -> Tuple[float, float]: ...
@property
def viewport_location(self) -> Tuple[float, float]: ...
@property
def viewport_midpoint(self) -> Tuple[float, float]: ...
@property
def viewport_click_point(self) -> Tuple[float, float]: ...
@property
def screen_location(self) -> Tuple[float, float]: ...
@property
def screen_midpoint(self) -> Tuple[float, float]: ...
@property
def screen_click_point(self) -> Tuple[float, float]: ...
@property
def corners(self) -> Tuple[Tuple[float, float], ...]: ...
@property
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ...
def _get_viewport_rect(self, quad: str) -> Union[list, None]: ...
def _get_page_coord(self, x: float, y: float) -> Tuple[float, float]: ...
class TabRect(object):
def __init__(self, page: ChromiumBase):
self._page: Union[ChromiumPage, ChromiumTab, WebPage, WebPageTab] = ...
@property
def window_state(self) -> str: ...
@property
def window_location(self) -> Tuple[int, int]: ...
@property
def page_location(self) -> Tuple[int, int]: ...
@property
def viewport_location(self) -> Tuple[int, int]: ...
@property
def window_size(self) -> Tuple[int, int]: ...
@property
def size(self) -> Tuple[int, int]: ...
@property
def viewport_size(self) -> Tuple[int, int]: ...
@property
def viewport_size_with_scrollbar(self) -> Tuple[int, int]: ...
def _get_page_rect(self) -> dict: ...
def _get_window_rect(self) -> dict: ...
class FrameRect(object):
def __init__(self, frame: ChromiumFrame):
self._frame: ChromiumFrame = ...
@property
def location(self) -> Tuple[float, float]: ...
@property
def viewport_location(self) -> Tuple[float, float]: ...
@property
def screen_location(self) -> Tuple[float, float]: ...
@property
def size(self) -> Tuple[float, float]: ...
@property
def viewport_size(self) -> Tuple[float, float]: ...
@property
def corners(self) -> Tuple[Tuple[float, float], ...]: ...
@property
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ...

View File

@ -0,0 +1,192 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from base64 import b64decode
from os.path import sep
from pathlib import Path
from random import randint
from shutil import rmtree
from tempfile import gettempdir
from threading import Thread
from time import sleep, time
class Screencast(object):
def __init__(self, page):
self._page = page
self._path = None
self._tmp_path = None
self._running = False
self._enable = False
self._mode = 'video'
@property
def set_mode(self):
"""返回用于设置录屏幕式的对象"""
return ScreencastMode(self)
def start(self, save_path=None):
"""开始录屏
:param save_path: 录屏保存位置
:return: None
"""
self.set_save_path(save_path)
if self._path is None:
raise ValueError('save_path必须设置。')
if self._mode in ('frugal_video', 'video'):
if self._page.browser.page._chromium_options.tmp_path:
self._tmp_path = Path(
self._page.browser.page._chromium_options.tmp_path) / f'screencast_tmp_{time()}_{randint(0, 100)}'
else:
self._tmp_path = Path(gettempdir()) / 'DrissionPage' / f'screencast_tmp_{time()}_{randint(0, 100)}'
self._tmp_path.mkdir(parents=True, exist_ok=True)
if self._mode.startswith('frugal'):
self._page.driver.set_callback('Page.screencastFrame', self._onScreencastFrame)
self._page.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100)
elif not self._mode.startswith('js'):
self._running = True
self._enable = True
Thread(target=self._run).start()
else: # js模式
js = '''
async function () {
stream = await navigator.mediaDevices.getDisplayMedia({video: true, audio: true})
mime = MediaRecorder.isTypeSupported("video/webm; codecs=vp9")
? "video/webm; codecs=vp9"
: "video/webm"
mediaRecorder = new MediaRecorder(stream, {mimeType: mime})
DrissionPage_Screencast_chunks = []
mediaRecorder.addEventListener('dataavailable', function(e) {
DrissionPage_Screencast_blob_ok = false;
DrissionPage_Screencast_chunks.push(e.data);
DrissionPage_Screencast_blob_ok = true;
})
mediaRecorder.start()
mediaRecorder.addEventListener('stop', function(){
while(DrissionPage_Screencast_blob_ok==false){}
DrissionPage_Screencast_blob = new Blob(DrissionPage_Screencast_chunks,
{type: DrissionPage_Screencast_chunks[0].type});
})
}
'''
print('请手动选择要录制的目标。')
self._page.run_js('var DrissionPage_Screencast_blob;var DrissionPage_Screencast_blob_ok=false;')
self._page.run_js(js)
def stop(self, video_name=None):
"""停止录屏
:param video_name: 视频文件名为None时以当前时间名命
:return: 文件路径
"""
if video_name and not video_name.endswith('mp4'):
video_name = f'{video_name}.mp4'
name = f'{time()}.mp4' if not video_name else video_name
path = f'{self._path}{sep}{name}'
if self._mode.startswith('js'):
self._page.run_js('mediaRecorder.stop();', as_expr=True)
while not self._page.run_js('return DrissionPage_Screencast_blob_ok;'):
sleep(.1)
blob = self._page.run_js('return DrissionPage_Screencast_blob;')
uuid = self._page.run_cdp('IO.resolveBlob', objectId=blob['result']['objectId'])['uuid']
data = self._page.run_cdp('IO.read', handle=f'blob:{uuid}')['data']
with open(path, 'wb') as f:
f.write(b64decode(data))
return path
if self._mode.startswith('frugal'):
self._page.driver.set_callback('Page.screencastFrame', None)
self._page.run_cdp('Page.stopScreencast')
else:
self._enable = False
while self._running:
sleep(.1)
if self._mode.endswith('imgs'):
return str(Path(self._path).absolute())
if not str(self._path).isascii():
raise TypeError('转换成视频仅支持英文路径和文件名。')
try:
from cv2 import VideoWriter, imread, VideoWriter_fourcc
from numpy import fromfile, uint8
except ModuleNotFoundError:
raise ModuleNotFoundError('请先安装cv2pip install opencv-python')
pic_list = Path(self._tmp_path or self._path).glob('*.jpg')
img = imread(str(next(pic_list)))
imgInfo = img.shape
size = (imgInfo[1], imgInfo[0])
videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size)
for i in pic_list:
img = imread(str(i))
videoWrite.write(img)
rmtree(self._tmp_path)
self._tmp_path = None
return f'{self._path}{sep}{name}'
def set_save_path(self, save_path=None):
"""设置保存路径
:param save_path: 保存路径
:return: None
"""
if save_path:
save_path = Path(save_path)
if save_path.exists() and save_path.is_file():
raise TypeError('save_path必须指定文件夹。')
save_path.mkdir(parents=True, exist_ok=True)
self._path = save_path
def _run(self):
"""非节俭模式运行方法"""
self._running = True
path = self._tmp_path or self._path
while self._enable:
self._page.get_screenshot(path=path, name=f'{time()}.jpg')
sleep(.04)
self._running = False
def _onScreencastFrame(self, **kwargs):
"""节俭模式运行方法"""
path = self._tmp_path or self._path
with open(f'{path}{sep}{kwargs["metadata"]["timestamp"]}.jpg', 'wb') as f:
f.write(b64decode(kwargs['data']))
self._page.run_cdp('Page.screencastFrameAck', sessionId=kwargs['sessionId'])
class ScreencastMode(object):
def __init__(self, screencast):
self._screencast = screencast
def video_mode(self):
"""持续视频模式,生成的视频没有声音"""
self._screencast._mode = 'video'
def frugal_video_mode(self):
"""设置节俭视频模式,页面有变化时才录制,生成的视频没有声音"""
self._screencast._mode = 'frugal_video'
def js_video_mode(self):
"""设置使用js录制视频模式可生成有声音的视频但需要手动启动"""
self._screencast._mode = 'js_video'
def frugal_imgs_mode(self):
"""设置节俭视频模式,页面有变化时才截图"""
self._screencast._mode = 'frugal_imgs'
def imgs_mode(self):
"""设置图片模式,持续对页面进行截图"""
self._screencast._mode = 'imgs'

View File

@ -0,0 +1,49 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union
from .._pages.chromium_base import ChromiumBase
class Screencast(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
self._path: Path = ...
self._tmp_path: Path = ...
self._running: bool = ...
self._enable: bool = ...
self._mode: str = ...
@property
def set_mode(self) -> ScreencastMode: ...
def start(self, save_path: Union[str, Path] = None) -> None: ...
def stop(self, video_name: str = None) -> str: ...
def set_save_path(self, save_path: Union[str, Path] = None) -> None: ...
def _run(self) -> None: ...
def _onScreencastFrame(self, **kwargs) -> None: ...
class ScreencastMode(object):
def __init__(self, screencast: Screencast):
self._screencast: Screencast = ...
def video_mode(self) -> None: ...
def frugal_video_mode(self) -> None: ...
def js_video_mode(self) -> None: ...
def frugal_imgs_mode(self) -> None: ...
def imgs_mode(self) -> None: ...

View File

@ -0,0 +1,177 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from time import sleep, perf_counter
class Scroller(object):
"""用于滚动的对象"""
def __init__(self, ele):
"""
:param ele: 元素对象
"""
self._driver = ele
self.t1 = self.t2 = 'this'
self._wait_complete = False
def _run_js(self, js):
js = js.format(self.t1, self.t2, self.t2)
self._driver.run_js(js)
self._wait_scrolled()
def to_top(self):
"""滚动到顶端,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, 0);')
def to_bottom(self):
"""滚动到底端,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight);')
def to_half(self):
"""滚动到垂直中间位置,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight/2);')
def to_rightmost(self):
"""滚动到最右边,垂直位置不变"""
self._run_js('{}.scrollTo({}.scrollWidth, {}.scrollTop);')
def to_leftmost(self):
"""滚动到最左边,垂直位置不变"""
self._run_js('{}.scrollTo(0, {}.scrollTop);')
def to_location(self, x, y):
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
self._run_js(f'{{}}.scrollTo({x}, {y});')
def up(self, pixel=300):
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
pixel = -pixel
self._run_js(f'{{}}.scrollBy(0, {pixel});')
def down(self, pixel=300):
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
self._run_js(f'{{}}.scrollBy(0, {pixel});')
def left(self, pixel=300):
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
pixel = -pixel
self._run_js(f'{{}}.scrollBy({pixel}, 0);')
def right(self, pixel=300):
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
self._run_js(f'{{}}.scrollBy({pixel}, 0);')
def _wait_scrolled(self):
"""等待滚动结束"""
if not self._wait_complete:
return
page = self._driver.page if 'ChromiumElement' in str(type(self._driver)) else self._driver
r = page.run_cdp('Page.getLayoutMetrics')
x = r['layoutViewport']['pageX']
y = r['layoutViewport']['pageY']
end_time = perf_counter() + page.timeout
while perf_counter() < end_time:
sleep(.1)
r = page.run_cdp('Page.getLayoutMetrics')
x1 = r['layoutViewport']['pageX']
y1 = r['layoutViewport']['pageY']
if x == x1 and y == y1:
break
x = x1
y = y1
class ElementScroller(Scroller):
def to_see(self, center=None):
"""滚动页面直到元素可见
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
self._driver.page.scroll.to_see(self._driver, center=center)
def to_center(self):
"""元素尽量滚动到视口中间"""
self._driver.page.scroll.to_see(self._driver, center=True)
class PageScroller(Scroller):
def __init__(self, page):
"""
:param page: 页面对象
"""
super().__init__(page)
self.t1 = 'window'
self.t2 = 'document.documentElement'
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = self._driver._ele(loc_or_ele)
self._to_see(ele, center)
def _to_see(self, ele, center):
"""执行滚动页面直到元素可见
:param ele: 元素对象
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
txt = 'true' if center else 'false'
ele.run_js(f'this.scrollIntoViewIfNeeded({txt});')
if center or (center is not False and ele.states.is_covered):
ele.run_js('''function getWindowScrollTop() {var scroll_top = 0;
if (document.documentElement && document.documentElement.scrollTop) {
scroll_top = document.documentElement.scrollTop;
} else if (document.body) {scroll_top = document.body.scrollTop;}
return scroll_top;}
const { top, height } = this.getBoundingClientRect();
const elCenter = top + height / 2;
const center = window.innerHeight / 2;
window.scrollTo({top: getWindowScrollTop() - (center - elCenter),
behavior: 'instant'});''')
self._wait_scrolled()
class FrameScroller(PageScroller):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
super().__init__(frame.doc_ele)
self.t1 = self.t2 = 'this.documentElement'
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = loc_or_ele if 'ChromiumElement' in str(type(loc_or_ele)) else self._driver._ele(loc_or_ele)
self._to_see(ele, center)

View File

@ -0,0 +1,77 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
class Scroller(object):
def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement]):
self.t1: str = ...
self.t2: str = ...
self._driver: Union[ChromiumBase, ChromiumElement] = ...
self._wait_complete: bool = ...
def _run_js(self, js: str): ...
def to_top(self) -> None: ...
def to_bottom(self) -> None: ...
def to_half(self) -> None: ...
def to_rightmost(self) -> None: ...
def to_leftmost(self) -> None: ...
def to_location(self, x: int, y: int) -> None: ...
def up(self, pixel: int = 300) -> None: ...
def down(self, pixel: int = 300) -> None: ...
def left(self, pixel: int = 300) -> None: ...
def right(self, pixel: int = 300) -> None: ...
def _wait_scrolled(self) -> None: ...
class ElementScroller(Scroller):
def to_see(self, center: Union[bool, None] = None) -> None: ...
def to_center(self) -> None: ...
class PageScroller(Scroller):
def __init__(self, page: ChromiumBase): ...
def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ...
def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ...
class FrameScroller(PageScroller):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
self._driver = frame.doc_ele
self.t1 = self.t2 = 'this.documentElement'
self._wait_complete = False
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele)
self._to_see(ele, center)

View File

@ -0,0 +1,267 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from time import perf_counter
class SelectElement(object):
"""用于处理 select 标签"""
def __init__(self, ele):
"""
:param ele: select 元素对象
"""
if ele.tag != 'select':
raise TypeError("select方法只能在<select>元素使用。")
self._ele = ele
def __call__(self, text_or_index, timeout=None):
"""选定下拉列表中子元素
:param text_or_index: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param timeout: 超时时间不输入默认实用页面超时时间
:return: None
"""
para_type = 'index' if isinstance(text_or_index, int) else 'text'
timeout = timeout if timeout is not None else self._ele.page.timeout
return self._select(text_or_index, para_type, timeout=timeout)
@property
def is_multi(self):
"""返回是否多选表单"""
return self._ele.attr('multiple') is not None
@property
def options(self):
"""返回所有选项元素组成的列表"""
return [i for i in self._ele.eles('xpath://option') if not isinstance(i, int)]
@property
def selected_option(self):
"""返回第一个被选中的option元素
:return: ChromiumElement对象或None
"""
ele = self._ele.run_js('return this.options[this.selectedIndex];')
return ele
@property
def selected_options(self):
"""返回所有被选中的option元素列表
:return: ChromiumElement对象组成的列表
"""
return [x for x in self.options if x.states.is_selected]
def all(self):
"""全选"""
if not self.is_multi:
raise TypeError("只能在多选菜单执行此操作。")
return self._by_loc('tag:option', 1, False)
def invert(self):
"""反选"""
if not self.is_multi:
raise TypeError("只能对多项选框执行反选。")
change = False
for i in self.options:
change = True
mode = 'false' if i.states.is_selected else 'true'
i.run_js(f'this.selected={mode};')
if change:
self._dispatch_change()
def clear(self):
"""清除所有已选项"""
if not self.is_multi:
raise TypeError("只能在多选菜单执行此操作。")
return self._by_loc('tag:option', 1, True)
def by_text(self, text, timeout=None):
"""此方法用于根据text值选择项。当元素是多选列表时可以接收list或tuple
:param text: text属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(text, 'text', False, timeout)
def by_value(self, value, timeout=None):
"""此方法用于根据value值选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(value, 'value', False, timeout)
def by_index(self, index, timeout=None):
"""此方法用于根据index值选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号0开始传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(index, 'index', False, timeout)
def by_loc(self, loc, timeout=None):
"""用定位符选择指定的项
:param loc: 定位符
:param timeout: 超时时间
:return: 是否选择成功
"""
return self._by_loc(loc, timeout)
def by_option(self, option):
"""选中单个或多个option元素
:param option: option元素或它们组成的列表
:return: None
"""
self._select_options(option, 'true')
def cancel_by_text(self, text, timeout=None):
"""此方法用于根据text值取消选择项。当元素是多选列表时可以接收list或tuple
:param text: 文本传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(text, 'text', True, timeout)
def cancel_by_value(self, value, timeout=None):
"""此方法用于根据value值取消选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(value, 'value', True, timeout)
def cancel_by_index(self, index, timeout=None):
"""此方法用于根据index值取消选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号0开始传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(index, 'index', True, timeout)
def cancel_by_loc(self, loc, timeout=None):
"""用定位符取消选择指定的项
:param loc: 定位符
:param timeout: 超时时间
:return: 是否选择成功
"""
return self._by_loc(loc, timeout, True)
def cancel_by_option(self, option):
"""取消选中单个或多个option元素
:param option: option元素或它们组成的列表
:return: None
"""
self._select_options(option, 'false')
def _by_loc(self, loc, timeout=None, cancel=False):
"""用定位符取消选择指定的项
:param loc: 定位符
:param timeout: 超时时间
:param cancel: 是否取消选择
:return: 是否选择成功
"""
eles = self._ele.eles(loc, timeout)
if not eles:
return False
mode = 'false' if cancel else 'true'
if self.is_multi:
self._select_options(eles, mode)
else:
self._select_options(eles[0], mode)
return True
def _select(self, condition, para_type='text', cancel=False, timeout=None):
"""选定或取消选定下拉列表中子元素
:param condition: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param para_type: 参数类型可选 'text''value''index'
:param cancel: 是否取消选择
:return: 是否选择成功
"""
if not self.is_multi and isinstance(condition, (list, tuple)):
raise TypeError('单选列表只能传入str格式。')
mode = 'false' if cancel else 'true'
timeout = timeout if timeout is not None else self._ele.page.timeout
condition = set(condition) if isinstance(condition, (list, tuple)) else {condition}
if para_type in ('text', 'value'):
return self._text_value([str(i) for i in condition], para_type, mode, timeout)
elif para_type == 'index':
return self._index(condition, mode, timeout)
def _text_value(self, condition, para_type, mode, timeout):
"""执行text和value搜索
:param condition: 条件set
:param para_type: 参数类型可选 'text''value'
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: 是否选择成功
"""
ok = False
text_len = len(condition)
eles = []
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if para_type == 'text':
eles = [i for i in self.options if i.text in condition]
elif para_type == 'value':
eles = [i for i in self.options if i.attr('value') in condition]
if len(eles) >= text_len:
ok = True
break
if ok:
self._select_options(eles, mode)
return True
return False
def _index(self, condition, mode, timeout):
"""执行index搜索
:param condition: 条件set
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: 是否选择成功
"""
ok = False
condition = [int(i) for i in condition]
text_len = max(condition)
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if len(self.options) >= text_len:
ok = True
break
if ok:
eles = self.options
eles = [eles[i - 1] for i in condition]
self._select_options(eles, mode)
return True
return False
def _select_options(self, option, mode):
"""选中或取消某个选项
:param option: options元素对象
:param mode: 选中还是取消
:return: None
"""
if isinstance(option, (list, tuple, set)):
if not self.is_multi and len(option) > 1:
option = option[:1]
for o in option:
o.run_js(f'this.selected={mode};')
self._dispatch_change()
else:
option.run_js(f'this.selected={mode};')
self._dispatch_change()
def _dispatch_change(self):
"""触发修改动作"""
self._ele.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));')

View File

@ -0,0 +1,73 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Tuple, List
from .._elements.chromium_element import ChromiumElement
class SelectElement(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ...
@property
def is_multi(self) -> bool: ...
@property
def options(self) -> List[ChromiumElement]: ...
@property
def selected_option(self) -> Union[ChromiumElement, None]: ...
@property
def selected_options(self) -> List[ChromiumElement]: ...
def clear(self) -> None: ...
def all(self) -> None: ...
def by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ...
def by_option(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ...
def cancel_by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ...
def cancel_by_option(self,
option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ...
def invert(self) -> None: ...
def _by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None, cancel: bool = False) -> bool: ...
def _select(self,
condition: Union[str, int, list, tuple] = None,
para_type: str = 'text',
cancel: bool = False,
timeout: float = None) -> bool: ...
def _text_value(self, condition: Union[list, set], para_type: str, mode: str, timeout: float) -> bool: ...
def _index(self, condition: set, mode: str, timeout: float) -> bool: ...
def _select_options(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]],
mode: str) -> None: ...
def _dispatch_change(self) -> None: ...

View File

@ -0,0 +1,642 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from requests.structures import CaseInsensitiveDict
from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter
from .._functions.tools import show_or_hide_browser
class BasePageSetter(object):
def __init__(self, page):
self._page = page
def NoneElement_value(self, value=None, on_off=True):
"""设置空元素是否返回设定值
:param value: 返回的设定值
:param on_off: 是否启用
:return: None
"""
self._page._none_ele_return_value = on_off
self._page._none_ele_value = value
class ChromiumBaseSetter(BasePageSetter):
def __init__(self, page):
super().__init__(page)
self._cookies_setter = None
@property
def load_mode(self):
"""返回用于设置页面加载策略的对象"""
return LoadMode(self._page)
@property
def scroll(self):
"""返回用于设置页面滚动设置的对象"""
return PageScrollSetter(self._page.scroll)
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = CookiesSetter(self._page)
return self._cookies_setter
def retry_times(self, times):
"""设置连接失败重连次数"""
self._page.retry_times = times
def retry_interval(self, interval):
"""设置连接失败重连间隔"""
self._page.retry_interval = interval
def timeouts(self, base=None, page_load=None, script=None, implicit=None):
"""设置超时时间,单位为秒
:param base: 基本等待时间除页面加载和脚本超时其它等待默认使用
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: None
"""
base = base if base is not None else implicit
if base is not None:
self._page.timeouts.base = base
self._page._timeout = base
if page_load is not None:
self._page.timeouts.page_load = page_load
if script is not None:
self._page.timeouts.script = script
def user_agent(self, ua, platform=None):
"""为当前tab设置user agent只在当前tab有效
:param ua: user agent字符串
:param platform: platform字符串
:return: None
"""
keys = {'userAgent': ua}
if platform:
keys['platform'] = platform
self._page.run_cdp('Emulation.setUserAgentOverride', **keys)
def session_storage(self, item, value):
"""设置或删除某项sessionStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
self._page.run_cdp_loaded('DOMStorage.enable')
i = self._page.run_cdp('Storage.getStorageKeyForFrame', frameId=self._page._frame_id)['storageKey']
if value is False:
self._page.run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': False}, key=item)
else:
self._page.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': False},
key=item, value=value)
self._page.run_cdp_loaded('DOMStorage.disable')
def local_storage(self, item, value):
"""设置或删除某项localStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
self._page.run_cdp_loaded('DOMStorage.enable')
i = self._page.run_cdp('Storage.getStorageKeyForFrame', frameId=self._page._frame_id)['storageKey']
if value is False:
self._page.run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': True}, key=item)
else:
self._page.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': True},
key=item, value=value)
self._page.run_cdp_loaded('DOMStorage.disable')
def upload_files(self, files):
"""等待上传的文件路径
:param files: 文件路径列表或字符串字符串时多个文件用回车分隔
:return: None
"""
if not self._page._upload_list:
self._page.driver.set_callback('Page.fileChooserOpened', self._page._onFileChooserOpened)
self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True)
if isinstance(files, str):
files = files.split('\n')
self._page._upload_list = [str(Path(i).absolute()) for i in files]
def headers(self, headers: dict) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
self._page.run_cdp('Network.enable')
self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers)
def auto_handle_alert(self, on_off=True, accept=True):
"""设置是否启用自动处理弹窗
:param on_off: bool表示开或关
:param accept: bool表示确定还是取消
:return: None
"""
self._page._alert.auto = accept if on_off else None
def blocked_urls(self, urls):
"""设置要忽略的url
:param urls: 要忽略的url可用*通配符可输入多个传入None时清空已设置的内容
:return: None
"""
if not urls:
urls = []
elif isinstance(urls, str):
urls = (urls,)
if not isinstance(urls, (list, tuple)):
raise TypeError('urls需传入str、list或tuple类型。')
self._page.run_cdp('Network.enable')
self._page.run_cdp('Network.setBlockedURLs', urls=urls)
# --------------即将废弃---------------
@property
def load_strategy(self):
"""返回用于设置页面加载策略的对象"""
return LoadMode(self._page)
class TabSetter(ChromiumBaseSetter):
def __init__(self, page):
super().__init__(page)
@property
def window(self):
"""返回用于设置浏览器窗口的对象"""
return WindowSetter(self._page)
def download_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = str(Path(path).absolute())
self._page._download_path = path
self._page.browser._dl_mgr.set_path(self._page.tab_id, path)
if self._page._DownloadKit:
self._page._DownloadKit.set.goal_path(path)
def download_file_name(self, name=None, suffix=None):
"""设置下一个被下载文件的名称
:param name: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
self._page.browser._dl_mgr.set_rename(self._page.tab_id, name, suffix)
def when_download_file_exists(self, mode):
"""设置当存在同名文件时的处理方式
:param mode: 可在 'rename', 'overwrite', 'skip', 'r', 'o', 's'中选择
:return: None
"""
types = {'rename': 'rename', 'overwrite': 'overwrite', 'skip': 'skip', 'r': 'rename', 'o': 'overwrite',
's': 'skip'}
mode = types.get(mode, mode)
if mode not in types:
raise ValueError(f'''mode参数只能是 '{"', '".join(types.keys())}' 之一,现在是:{mode}''')
self._page.browser._dl_mgr.set_file_exists(self._page.tab_id, mode)
def activate(self):
"""使标签页处于最前面"""
self._page.browser.activate_tab(self._page.tab_id)
class ChromiumPageSetter(TabSetter):
def tab_to_front(self, tab_or_id=None):
"""激活标签页使其处于最前面
:param tab_or_id: 标签页对象或id为None表示当前标签页
:return: None
"""
if not tab_or_id:
tab_or_id = self._page.tab_id
elif not isinstance(tab_or_id, str): # 传入Tab对象
tab_or_id = tab_or_id.tab_id
self._page.browser.activate_tab(tab_or_id)
@property
def window(self):
"""返回用于设置浏览器窗口的对象"""
return PageWindowSetter(self._page)
class SessionPageSetter(BasePageSetter):
def __init__(self, page):
"""
:param page: SessionPage对象
"""
super().__init__(page)
self._cookies_setter = None
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = SessionCookiesSetter(self._page)
return self._cookies_setter
def retry_times(self, times):
"""设置连接失败时重连次数"""
self._page.retry_times = times
def retry_interval(self, interval):
"""设置连接失败时重连间隔"""
self._page.retry_interval = interval
def download_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = str(Path(path).absolute())
self._page._download_path = path
if self._page._DownloadKit:
self._page._DownloadKit.set.goal_path(path)
def timeout(self, second):
"""设置连接超时时间
:param second: 秒数
:return: None
"""
self._page.timeout = second
def encoding(self, encoding, set_all=True):
"""设置编码
:param encoding: 编码名称如果要取消之前的设置传入None
:param set_all: 是否设置对象参数为False则只设置当前Response
:return: None
"""
if set_all:
self._page._encoding = encoding if encoding else None
if self._page.response:
self._page.response.encoding = encoding
def headers(self, headers):
"""设置通用的headers
:param headers: dict形式的headers
:return: None
"""
self._page._headers = CaseInsensitiveDict(headers)
def header(self, attr, value):
"""设置headers中一个项
:param attr: 设置名称
:param value: 设置值
:return: None
"""
self._page._headers[attr] = value
def user_agent(self, ua):
"""设置user agent
:param ua: user agent
:return: None
"""
self._page._headers['user-agent'] = ua
def proxies(self, http=None, https=None):
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: None
"""
self._page.session.proxies = {'http': http, 'https': https}
def auth(self, auth):
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: None
"""
self._page.session.auth = auth
def hooks(self, hooks):
"""设置回调方法
:param hooks: 回调方法
:return: None
"""
self._page.session.hooks = hooks
def params(self, params):
"""设置查询参数字典
:param params: 查询参数字典
:return: None
"""
self._page.session.params = params
def verify(self, on_off):
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: None
"""
self._page.session.verify = on_off
def cert(self, cert):
"""SSL客户端证书文件的路径(.pem格式),或(cert, key)元组
:param cert: 证书路径或元组
:return: None
"""
self._page.session.cert = cert
def stream(self, on_off):
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: None
"""
self._page.session.stream = on_off
def trust_env(self, on_off):
"""设置是否信任环境
:param on_off: 是否信任环境
:return: None
"""
self._page.session.trust_env = on_off
def max_redirects(self, times):
"""设置最大重定向次数
:param times: 最大重定向次数
:return: None
"""
self._page.session.max_redirects = times
def add_adapter(self, url, adapter):
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: None
"""
self._page.session.mount(url, adapter)
class WebPageSetter(ChromiumPageSetter):
def __init__(self, page):
super().__init__(page)
self._session_setter = SessionPageSetter(self._page)
self._chromium_setter = ChromiumPageSetter(self._page)
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = WebPageCookiesSetter(self._page)
return self._cookies_setter
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
if self._page.mode == 's':
self._session_setter.headers(headers)
else:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._page.mode == 's':
self._session_setter.user_agent(ua)
else:
self._chromium_setter.user_agent(ua, platform)
class WebPageTabSetter(TabSetter):
def __init__(self, page):
super().__init__(page)
self._session_setter = SessionPageSetter(self._page)
self._chromium_setter = ChromiumBaseSetter(self._page)
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = WebPageCookiesSetter(self._page)
return self._cookies_setter
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
if self._page._has_session:
self._session_setter.headers(headers)
if self._page._has_driver:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._page._has_session:
self._session_setter.user_agent(ua)
if self._page._has_driver:
self._chromium_setter.user_agent(ua, platform)
class ChromiumElementSetter(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
def attr(self, attr, value):
"""设置元素attribute属性
:param attr: 属性名
:param value: 属性值
:return: None
"""
self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele._node_id, name=attr, value=str(value))
def prop(self, prop, value):
"""设置元素property属性
:param prop: 属性名
:param value: 属性值
:return: None
"""
value = value.replace('"', r'\"')
self._ele.run_js(f'this.{prop}="{value}";')
def innerHTML(self, html):
"""设置元素innerHTML
:param html: html文本
:return: None
"""
self.prop('innerHTML', html)
class ChromiumFrameSetter(ChromiumBaseSetter):
def attr(self, attr, value):
"""设置frame元素attribute属性
:param attr: 属性名
:param value: 属性值
:return: None
"""
self._page.frame_ele.set.attr(attr, value)
class LoadMode(object):
"""用于设置页面加载策略的类"""
def __init__(self, page):
"""
:param page: ChromiumBase对象
"""
self._page = page
def __call__(self, value):
"""设置加载策略
:param value: 可选 'normal', 'eager', 'none'
:return: None
"""
if value.lower() not in ('normal', 'eager', 'none'):
raise ValueError("只能选择 'normal', 'eager', 'none'")
self._page._load_mode = value
def normal(self):
"""设置页面加载策略为normal"""
self._page._load_mode = 'normal'
def eager(self):
"""设置页面加载策略为eager"""
self._page._load_mode = 'eager'
def none(self):
"""设置页面加载策略为none"""
self._page._load_mode = 'none'
class PageScrollSetter(object):
def __init__(self, scroll):
self._scroll = scroll
def wait_complete(self, on_off=True):
"""设置滚动命令后是否等待完成
:param on_off: 开或关
:return: None
"""
if not isinstance(on_off, bool):
raise TypeError('on_off必须为bool。')
self._scroll._wait_complete = on_off
def smooth(self, on_off=True):
"""设置页面滚动是否平滑滚动
:param on_off: 开或关
:return: None
"""
if not isinstance(on_off, bool):
raise TypeError('on_off必须为bool。')
b = 'smooth' if on_off else 'auto'
self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");')
self._scroll._wait_complete = on_off
class WindowSetter(object):
"""用于设置窗口大小的类"""
def __init__(self, page):
"""
:param page: 页面对象
"""
self._page = page
self._window_id = self._get_info()['windowId']
def max(self):
"""窗口最大化"""
s = self._get_info()['bounds']['windowState']
if s in ('fullscreen', 'minimized'):
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'maximized'})
def mini(self):
"""窗口最小化"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'minimized'})
def full(self):
"""设置窗口为全屏"""
s = self._get_info()['bounds']['windowState']
if s == 'minimized':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'fullscreen'})
def normal(self):
"""设置窗口为常规模式"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'normal'})
def size(self, width=None, height=None):
"""设置窗口大小
:param width: 窗口宽度
:param height: 窗口高度
:return: None
"""
if width or height:
s = self._get_info()['bounds']['windowState']
if s != 'normal':
self._perform({'windowState': 'normal'})
info = self._get_info()['bounds']
width = width - 16 if width else info['width']
height = height + 7 if height else info['height']
self._perform({'width': width, 'height': height})
def location(self, x=None, y=None):
"""设置窗口在屏幕中的位置,相对左上角坐标
:param x: 距离顶部距离
:param y: 距离左边距离
:return: None
"""
if x is not None or y is not None:
self.normal()
info = self._get_info()['bounds']
x = x if x is not None else info['left']
y = y if y is not None else info['top']
self._perform({'left': x - 8, 'top': y})
def _get_info(self):
"""获取窗口位置及大小信息"""
return self._page.run_cdp('Browser.getWindowForTarget')
def _perform(self, bounds):
"""执行改变窗口大小操作
:param bounds: 控制数据
:return: None
"""
self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds)
# ------------即将废除----------
def maximized(self):
"""窗口最大化"""
self.max()
def minimized(self):
"""窗口最小化"""
self.mini()
def fullscreen(self):
"""设置窗口为全屏"""
self.full()
class PageWindowSetter(WindowSetter):
def hide(self):
"""隐藏浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._page, hide=True)
def show(self):
"""显示浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._page, hide=False)

View File

@ -0,0 +1,233 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from typing import Union, Tuple, Literal, Any, Optional
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter
from .scroller import PageScroller
from .._base.base import BasePage
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab
from .._pages.session_page import SessionPage
from .._pages.web_page import WebPage
FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']
class BasePageSetter(object):
def __init__(self, page: BasePage):
self._page: BasePage = ...
def NoneElement_value(self, value: Any = None, on_off: bool = True) -> None: ...
class ChromiumBaseSetter(BasePageSetter):
def __init__(self, page):
self._page: ChromiumBase = ...
self._cookies_setter: CookiesSetter = ...
@property
def load_mode(self) -> LoadMode: ...
@property
def scroll(self) -> PageScrollSetter: ...
@property
def cookies(self) -> CookiesSetter: ...
def retry_times(self, times: int) -> None: ...
def retry_interval(self, interval: float) -> None: ...
def timeouts(self, base: float = None, page_load: float = None, script: float = None) -> None: ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def session_storage(self, item: str, value: Union[str, bool]) -> None: ...
def local_storage(self, item: str, value: Union[str, bool]) -> None: ...
def headers(self, headers: dict) -> None: ...
def auto_handle_alert(self, on_off: bool = True, accept: bool = True) -> None: ...
def upload_files(self, files: Union[str, list, tuple]) -> None: ...
def blocked_urls(self, urls: Optional[list, tuple, str]) -> None: ...
class TabSetter(ChromiumBaseSetter):
def __init__(self, page): ...
@property
def window(self) -> WindowSetter: ...
def download_path(self, path: Union[str, Path]) -> None: ...
def download_file_name(self, name: str = None, suffix: str = None) -> None: ...
def when_download_file_exists(self, mode: FILE_EXISTS) -> None: ...
def activate(self) -> None: ...
class ChromiumPageSetter(TabSetter):
_page: ChromiumPage = ...
@property
def window(self) -> PageWindowSetter: ...
def main_tab(self, tab_id: str = None) -> None: ...
def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ...
class SessionPageSetter(BasePageSetter):
def __init__(self, page: SessionPage):
self._page: SessionPage = ...
self._cookies_setter: SessionCookiesSetter = ...
@property
def cookies(self) -> SessionCookiesSetter: ...
def retry_times(self, times: int) -> None: ...
def retry_interval(self, interval: float) -> None: ...
def download_path(self, path: Union[str, Path]) -> None: ...
def timeout(self, second: float) -> None: ...
def encoding(self, encoding: Optional[str, None], set_all: bool = True) -> None: ...
def headers(self, headers: dict) -> None: ...
def header(self, attr: str, value: str) -> None: ...
def user_agent(self, ua: str) -> None: ...
def proxies(self, http: str = None, https: str = None) -> None: ...
def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ...
def hooks(self, hooks: Union[dict, None]) -> None: ...
def params(self, params: Union[dict, None]) -> None: ...
def verify(self, on_off: Union[bool, None]) -> None: ...
def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ...
def stream(self, on_off: Union[bool, None]) -> None: ...
def trust_env(self, on_off: Union[bool, None]) -> None: ...
def max_redirects(self, times: Union[int, None]) -> None: ...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ...
class WebPageSetter(ChromiumPageSetter):
_page: WebPage = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumPageSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: dict) -> None: ...
@property
def cookies(self) -> WebPageCookiesSetter: ...
class WebPageTabSetter(TabSetter):
_page: WebPage = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: dict) -> None: ...
@property
def cookies(self) -> WebPageCookiesSetter: ...
class ChromiumElementSetter(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def attr(self, attr: str, value: str) -> None: ...
def prop(self, prop: str, value: str) -> None: ...
def innerHTML(self, html: str) -> None: ...
class ChromiumFrameSetter(ChromiumBaseSetter):
_page: ChromiumFrame = ...
def attr(self, attr: str, value: str) -> None: ...
class LoadMode(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
def __call__(self, value: str) -> None: ...
def normal(self) -> None: ...
def eager(self) -> None: ...
def none(self) -> None: ...
class PageScrollSetter(object):
def __init__(self, scroll: PageScroller):
self._scroll: PageScroller = ...
def wait_complete(self, on_off: bool = True): ...
def smooth(self, on_off: bool = True): ...
class WindowSetter(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
self._window_id: str = ...
def max(self) -> None: ...
def mini(self) -> None: ...
def full(self) -> None: ...
def normal(self) -> None: ...
def size(self, width: int = None, height: int = None) -> None: ...
def location(self, x: int = None, y: int = None) -> None: ...
def _get_info(self) -> dict: ...
def _perform(self, bounds: dict) -> None: ...
class PageWindowSetter(WindowSetter):
_page: ChromiumPage = ...
def hide(self) -> None: ...
def show(self) -> None: ...

View File

@ -0,0 +1,175 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from .._functions.web import location_in_viewport
from ..errors import CDPError, NoRectError, PageDisconnectedError, ElementLostError
class ElementStates(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def is_selected(self):
"""返回元素是否被选择"""
return self._ele.run_js('return this.selected;')
@property
def is_checked(self):
"""返回元素是否被选择"""
return self._ele.run_js('return this.checked;')
@property
def is_displayed(self):
"""返回元素是否显示"""
return not (self._ele.style('visibility') == 'hidden' or self._ele.run_js('return this.offsetParent === null;')
or self._ele.style('display') == 'none' or self._ele.prop('hidden'))
@property
def is_enabled(self):
"""返回元素是否可用"""
return not self._ele.run_js('return this.disabled;')
@property
def is_alive(self):
"""返回元素是否仍在DOM中"""
try:
self._ele.attrs
return True
except Exception:
return False
@property
def is_in_viewport(self):
"""返回元素是否出现在视口中以元素click_point为判断"""
x, y = self._ele.rect.click_point
return location_in_viewport(self._ele.page, x, y) if x else False
@property
def is_whole_in_viewport(self):
"""返回元素是否整个都在视口内"""
x1, y1 = self._ele.rect.location
w, h = self._ele.rect.size
x2, y2 = x1 + w, y1 + h
return location_in_viewport(self._ele.page, x1, y1) and location_in_viewport(self._ele.page, x2, y2)
@property
def is_covered(self):
"""返回元素是否被覆盖与是否在视口中无关如被覆盖返回覆盖元素的backend id否则返回False"""
lx, ly = self._ele.rect.click_point
try:
bid = self._ele.page.run_cdp('DOM.getNodeForLocation', x=int(lx), y=int(ly)).get('backendNodeId')
return bid if bid != self._ele._backend_id else False
except CDPError:
return False
@property
def has_rect(self):
"""返回元素是否拥有位置和大小没有返回False有返回四个角在页面中坐标组成的列表"""
try:
return self._ele.rect.corners
except NoRectError:
return False
class ShadowRootStates(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def is_enabled(self):
"""返回元素是否可用"""
return not self._ele.run_js('return this.disabled;')
@property
def is_alive(self):
"""返回元素是否仍在DOM中"""
try:
self._ele.page.run_cdp('DOM.describeNode', backendNodeId=self._ele._backend_id)
return True
except Exception:
return False
class PageStates(object):
"""Page对象、Tab对象使用"""
def __init__(self, page):
"""
:param page: ChromiumBase对象
"""
self._page = page
@property
def is_loading(self):
"""返回页面是否在加载状态"""
return self._page._is_loading
@property
def is_alive(self):
"""返回页面对象是否仍然可用"""
try:
self._page.run_cdp('Page.getLayoutMetrics')
return True
except PageDisconnectedError:
return False
@property
def ready_state(self):
"""返回当前页面加载状态,'connecting' 'loading' 'interactive' 'complete'"""
return self._page._ready_state
@property
def has_alert(self):
"""返回当前页面是否存在弹窗"""
return self._page._has_alert
class FrameStates(object):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
self._frame = frame
@property
def is_loading(self):
"""返回页面是否在加载状态"""
return self._frame._is_loading
@property
def is_alive(self):
"""返回frame元素是否可用且里面仍挂载有frame"""
try:
node = self._frame._target_page.run_cdp('DOM.describeNode',
backendNodeId=self._frame._frame_ele._backend_id)['node']
except (ElementLostError, PageDisconnectedError):
return False
return 'frameId' in node
@property
def ready_state(self):
"""返回加载状态"""
return self._frame._ready_state
@property
def is_displayed(self):
"""返回iframe是否显示"""
return not (self._frame.frame_ele.style('visibility') == 'hidden'
or self._frame.frame_ele.run_js('return this.offsetParent === null;')
or self._frame.frame_ele.style('display') == 'none')
@property
def has_alert(self):
"""返回当前页面是否存在弹窗"""
return self._frame._has_alert

View File

@ -0,0 +1,95 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union, Tuple, List, Optional, Literal
from .._elements.chromium_element import ShadowRoot, ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
class ElementStates(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def is_selected(self) -> bool: ...
@property
def is_checked(self) -> bool: ...
@property
def is_displayed(self) -> bool: ...
@property
def is_enabled(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
@property
def is_in_viewport(self) -> bool: ...
@property
def is_whole_in_viewport(self) -> bool: ...
@property
def is_covered(self) -> Union[Literal[False], int]: ...
@property
def has_rect(self) -> Union[bool, List[Tuple[float, float]]]: ...
class ShadowRootStates(object):
def __init__(self, ele: ShadowRoot):
"""
:param ele: ChromiumElement
"""
self._ele: ShadowRoot = ...
@property
def is_enabled(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
class PageStates(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
@property
def is_loading(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
@property
def ready_state(self) -> Optional[str]: ...
@property
def has_alert(self) -> bool: ...
class FrameStates(object):
def __init__(self, frame: ChromiumFrame):
self._frame: ChromiumFrame = ...
@property
def is_loading(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
@property
def ready_state(self) -> str: ...
@property
def is_displayed(self) -> bool: ...
@property
def has_alert(self) -> bool: ...

View File

@ -0,0 +1,469 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from time import sleep, perf_counter
from .._functions.settings import Settings
from ..errors import WaitTimeoutError, NoRectError
class BaseWaiter(object):
def __init__(self, page_or_ele):
"""
:param page_or_ele: 页面对象或元素对象
"""
self._driver = page_or_ele
def __call__(self, second):
"""等待若干秒
:param second: 秒数
:return: None
"""
sleep(second)
def ele_deleted(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素从DOM中删除
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0)
return ele.wait.deleted(timeout, raise_err=raise_err) if ele else True
def ele_displayed(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素变成显示状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._driver.timeout
end_time = perf_counter() + timeout
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout)
timeout = end_time - perf_counter()
if timeout <= 0:
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素显示失败(等待{timeout}秒)。')
else:
return False
return ele.wait.displayed(timeout, raise_err=raise_err)
def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素变成隐藏状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._driver.timeout
end_time = perf_counter() + timeout
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout)
timeout = end_time - perf_counter()
if timeout <= 0:
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素显示失败(等待{timeout}秒)。')
else:
return False
return ele.wait.hidden(timeout, raise_err=raise_err)
def ele_loaded(self, loc, timeout=None, raise_err=None):
"""等待元素加载到DOM
:param loc: 要等待的元素输入定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
ele = self._driver._ele(loc, raise_err=False, timeout=timeout)
if ele:
return ele
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素加载失败(等待{timeout}秒)。')
else:
return False
def load_start(self, timeout=None, raise_err=None):
"""等待页面开始加载
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._loading(timeout=timeout, gap=.002, raise_err=raise_err)
def load_complete(self, timeout=None, raise_err=None):
"""等待页面加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._loading(timeout=timeout, start=False, raise_err=raise_err)
def upload_paths_inputted(self):
"""等待自动填写上传文件路径"""
end_time = perf_counter() + self._driver.timeout
while perf_counter() < end_time:
if not self._driver._upload_list:
return True
sleep(.01)
return False
def download_begin(self, timeout=None, cancel_it=False):
"""等待浏览器下载开始,可将其拦截
:param timeout: 超时时间None使用页面对象超时时间
:param cancel_it: 是否取消该任务
:return: 成功返回任务对象失败返回False
"""
self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, False if cancel_it else True)
if timeout is None:
timeout = self._driver.timeout
r = False
end_time = perf_counter() + timeout
while perf_counter() < end_time:
v = self._driver.browser._dl_mgr.get_flag(self._driver.tab_id)
if not isinstance(v, bool):
r = v
break
self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, None)
return r
def url_change(self, text, exclude=False, timeout=None, raise_err=None):
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._change('url', text, exclude, timeout, raise_err)
def title_change(self, text, exclude=False, timeout=None, raise_err=None):
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._change('title', text, exclude, timeout, raise_err)
def _change(self, arg, text, exclude=False, timeout=None, raise_err=None):
"""等待指定属性变成包含或不包含指定文本
:param arg: 要被匹配的属性
:param text: 用于识别的文本
:param exclude: 是否排除为True时当属性不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._driver.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if arg == 'url':
val = self._driver.url
elif arg == 'title':
val = self._driver.title
else:
raise ValueError
if (not exclude and text in val) or (exclude and text not in val):
return True
sleep(.05)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待{arg}改变失败(等待{timeout}秒)。')
else:
return False
def _loading(self, timeout=None, start=True, gap=.01, raise_err=None):
"""等待页面开始加载或加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param start: 等待开始还是结束
:param gap: 间隔秒数
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout != 0:
if timeout is None or timeout is True:
timeout = self._driver.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._driver._is_loading == start:
return True
sleep(gap)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待页面加载失败(等待{timeout}秒)。')
else:
return False
# -----------即将废弃-----------
def data_packets(self, count=1, timeout=None, fix_count: bool = True):
"""等待符合要求的数据包到达指定数量
:param count: 需要捕捉的数据包数量
:param timeout: 超时时间为None无限等待
:param fix_count: 是否必须满足总数要求发生超时为True返回False为False返回已捕捉到的数据包
:return: count为1时返回数据包对象大于1时返回列表超时且fix_count为True时返回False"""
return self._driver.listen.wait(count, timeout, fix_count)
class TabWaiter(BaseWaiter):
def downloads_done(self, timeout=None, cancel_if_timeout=True):
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
if not timeout:
while self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
sleep(.5)
return True
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
return True
sleep(.5)
if self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
if cancel_if_timeout:
for m in self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
m.cancel()
return False
else:
return True
def alert_closed(self):
"""等待弹出框关闭"""
while not self._driver.states.has_alert:
sleep(.2)
while self._driver.states.has_alert:
sleep(.2)
class PageWaiter(TabWaiter):
def __init__(self, page):
super().__init__(page)
# self._listener = None
def new_tab(self, timeout=None, raise_err=None):
"""等待新标签页出现
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等到新标签页返回其id否则返回False
"""
timeout = timeout if timeout is not None else self._driver.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
latest_tab = self._driver.latest_tab
if self._driver.tab_id != latest_tab:
return latest_tab
sleep(.01)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待新标签页失败(等待{timeout}秒)。')
else:
return False
def all_downloads_done(self, timeout=None, cancel_if_timeout=True):
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
if not timeout:
while self._driver.browser._dl_mgr._missions:
sleep(.5)
return True
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._driver.browser._dl_mgr._missions:
return True
sleep(.5)
if self._driver.browser._dl_mgr._missions:
if cancel_if_timeout:
for m in list(self._driver.browser._dl_mgr._missions.values()):
m.cancel()
return False
else:
return True
class ElementWaiter(object):
"""等待元素在dom中某种状态如删除、显示、隐藏"""
def __init__(self, page, ele):
"""等待元素在dom中某种状态如删除、显示、隐藏
:param page: 元素所在页面
:param ele: 要等待的元素
"""
self._page = page
self._ele = ele
def __call__(self, second):
"""等待若干秒
:param second: 秒数
:return: None
"""
sleep(second)
def deleted(self, timeout=None, raise_err=None):
"""等待元素从dom删除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_alive', False, timeout, raise_err, err_text='等待元素被删除失败。')
def displayed(self, timeout=None, raise_err=None):
"""等待元素从dom显示
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_displayed', True, timeout, raise_err, err_text='等待元素显示失败。')
def hidden(self, timeout=None, raise_err=None):
"""等待元素从dom隐藏
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_displayed', False, timeout, raise_err, err_text='等待元素隐藏失败。')
def covered(self, timeout=None, raise_err=None):
"""等待当前元素被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_covered', True, timeout, raise_err, err_text='等待元素被覆盖失败。')
def not_covered(self, timeout=None, raise_err=None):
"""等待当前元素不被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_covered', False, timeout, raise_err, err_text='等待元素不被覆盖失败。')
def enabled(self, timeout=None, raise_err=None):
"""等待当前元素变成可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_enabled', True, timeout, raise_err, err_text='等待元素变成可用失败。')
def disabled(self, timeout=None, raise_err=None):
"""等待当前元素变成不可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_enabled', False, timeout, raise_err, err_text='等待元素变成不可用失败。')
def disabled_or_deleted(self, timeout=None, raise_err=None):
"""等待当前元素变成不可用或从DOM移除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._page.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._ele.states.is_enabled or not self._ele.states.is_alive:
return True
sleep(.05)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素隐藏或被删除失败(等待{timeout}秒)。')
else:
return False
def stop_moving(self, gap=.1, timeout=None, raise_err=None):
"""等待当前元素停止运动
:param gap: 检测间隔时间
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._page.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
try:
size = self._ele.states.has_rect
location = self._ele.rect.location
break
except NoRectError:
pass
else:
raise NoRectError
while perf_counter() < end_time:
sleep(gap)
if self._ele.rect.size == size and self._ele.rect.location == location:
return True
size = self._ele.rect.size
location = self._ele.rect.location
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素停止运动失败(等待{timeout}秒)。')
else:
return False
def has_rect(self, timeout=None, raise_err=None):
"""等待当前元素有大小及位置属性
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('has_rect', True, timeout, raise_err, err_text='等待元素拥有大小及位置属性失败(等待{}秒)。')
def _wait_state(self, attr, mode=False, timeout=None, raise_err=None, err_text=None):
"""等待元素某个元素状态到达指定状态
:param attr: 状态名称
:param mode: True或False
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:param err_text: 抛出错误时显示的信息
:return: 是否等待成功
"""
err_text = err_text or '等待元素状态改变失败(等待{}秒)。'
if timeout is None:
timeout = self._page.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._ele.states.__getattribute__(attr) == mode:
return True
sleep(.05)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(err_text.format(timeout))
else:
return False
class FrameWaiter(BaseWaiter, ElementWaiter):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
super().__init__(frame)
super(BaseWaiter, self).__init__(frame, frame.frame_ele)

View File

@ -0,0 +1,112 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import Union
from .downloader import DownloadMission
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
class BaseWaiter(object):
def __init__(self, page: ChromiumBase):
self._driver: ChromiumBase = ...
def __call__(self, second: float) -> None: ...
def ele_deleted(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
timeout: float = None,
raise_err: bool = None) -> bool: ...
def ele_displayed(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
timeout: float = None,
raise_err: bool = None) -> bool: ...
def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None,
raise_err: bool = None) -> bool: ...
def ele_loaded(self,
loc: Union[str, tuple],
timeout: float = None,
raise_err: bool = None) -> Union[bool, ChromiumElement]: ...
def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ...
def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def upload_paths_inputted(self) -> bool: ...
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ...
def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
def url_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ...
def title_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ...
def _change(self, arg: str, text: str, exclude: bool = False, timeout: float = None,
raise_err: bool = None) -> bool: ...
class TabWaiter(BaseWaiter):
def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
def alert_closed(self) -> None: ...
class PageWaiter(TabWaiter):
_driver: ChromiumPage = ...
def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ...
def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
class ElementWaiter(object):
def __init__(self, page: ChromiumBase, ele: ChromiumElement):
self._ele: ChromiumElement = ...
self._page: ChromiumBase = ...
def __call__(self, second: float) -> None: ...
def deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def displayed(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def has_rect(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def stop_moving(self, gap: float = .1, timeout: float = None, raise_err: bool = None) -> bool: ...
def _wait_state(self,
attr: str,
mode: bool = False,
timeout: float = None,
raise_err: bool = None,
err_text: str = None) -> bool: ...
class FrameWaiter(BaseWaiter, ElementWaiter):
def __init__(self, frame: ChromiumFrame): ...

View File

@ -1,75 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from typing import Union, Tuple
from .chromium_base import ChromiumBase
from .chromium_driver import ChromiumDriver
from .chromium_element import ChromiumElement
from .chromium_page import ChromiumPage
class ActionChains:
def __init__(self, page: ChromiumBase):
self.page: ChromiumPage = ...
self._dr: ChromiumDriver = ...
self.modifier: int = ...
self.curr_x: int = ...
self.curr_y: int = ...
def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[int, int], str],
offset_x: int = 0, offset_y: int = 0) -> ActionChains: ...
def move(self, offset_x: int = 0, offset_y: int = 0) -> ActionChains: ...
def click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def r_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def m_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def db_click(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def _hold(self, on_ele: Union[ChromiumElement, str] = None, button: str = 'left',
count: int = 1) -> ActionChains: ...
def _release(self, button: str) -> ActionChains: ...
def scroll(self, delta_x: int = 0, delta_y: int = 0,
on_ele: Union[ChromiumElement, str] = None) -> ActionChains: ...
def up(self, pixel: int) -> ActionChains: ...
def down(self, pixel: int) -> ActionChains: ...
def left(self, pixel: int) -> ActionChains: ...
def right(self, pixel: int) -> ActionChains: ...
def key_down(self, key: str) -> ActionChains: ...
def key_up(self, key: str) -> ActionChains: ...
def type(self, text: Union[str, list, tuple]) -> ActionChains: ...
def wait(self, second: float) -> ActionChains: ...
def _get_key_data(self, key: str, action: str) -> dict: ...
def location_to_client(page, lx: int, ly: int) -> tuple: ...

File diff suppressed because it is too large Load Diff

View File

@ -1,372 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, Tuple, List, Any, Dict
from DataRecorder import Recorder
from requests import Session
from requests.cookies import RequestsCookieJar
from .base import BasePage
from .chromium_driver import ChromiumDriver
from .chromium_element import ChromiumElement, ChromiumScroll
from .chromium_frame import ChromiumFrame
from .commons.constants import NoneElement
from .commons.web import ResponseData
from .session_element import SessionElement
class ChromiumBase(BasePage):
def __init__(self,
address: Union[str, int],
tab_id: str = None,
timeout: float = None):
self._control_session: Session = ...
self.address: str = ...
self._tab_obj: ChromiumDriver = ...
self._is_reading: bool = ...
self._timeouts: Timeout = ...
self._first_run: bool = ...
self._is_loading: bool = ...
self._page_load_strategy: str = ...
self._scroll: ChromiumScroll = ...
self._url: str = ...
self._root_id: str = ...
self._debug: bool = ...
self._debug_recorder: Recorder = ...
self._upload_list: list = ...
self._wait: ChromiumBaseWaiter = ...
self._set: ChromiumBaseSetter = ...
self._screencast: Screencast = ...
def _connect_browser(self, tab_id: str = None) -> None: ...
def _chromium_init(self): ...
def _driver_init(self, tab_id: str) -> None: ...
def _get_document(self) -> None: ...
def _wait_loaded(self, timeout: float = None) -> bool: ...
def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ...
def _onLoadEventFired(self, **kwargs): ...
def _onDocumentUpdated(self, **kwargs): ...
def _onFrameNavigated(self, **kwargs): ...
def _onFileChooserOpened(self, **kwargs): ...
def _set_start_options(self, address, none) -> None: ...
def _set_runtime_settings(self) -> None: ...
def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement],
timeout: float = None) -> ChromiumElement: ...
@property
def title(self) -> str: ...
@property
def driver(self) -> ChromiumDriver: ...
@property
def is_loading(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
@property
def url(self) -> str: ...
@property
def _browser_url(self) -> str: ...
@property
def html(self) -> str: ...
@property
def json(self) -> Union[dict, None]: ...
@property
def tab_id(self) -> str: ...
@property
def ready_state(self) -> Union[str, None]: ...
@property
def size(self) -> Tuple[int, int]: ...
@property
def active_ele(self) -> ChromiumElement: ...
@property
def page_load_strategy(self) -> str: ...
@property
def user_agent(self) -> str: ...
@property
def scroll(self) -> ChromiumPageScroll: ...
@property
def timeouts(self) -> Timeout: ...
@property
def upload_list(self) -> list: ...
@property
def wait(self) -> ChromiumBaseWaiter: ...
@property
def set(self) -> ChromiumBaseSetter: ...
@property
def screencast(self) -> Screencast: ...
def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ...
def run_js_loaded(self, script: str, *args: Any, as_expr: bool = False) -> Any: ...
def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ...
def get(self,
url: str,
show_errmsg: bool = False,
retry: int = None,
interval: float = None,
timeout: float = None) -> Union[None, bool]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[
list, dict]: ...
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None) -> ChromiumElement: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[ChromiumElement]: ...
def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \
-> Union[SessionElement, str, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ...
def _find_elements(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ...
def refresh(self, ignore_cache: bool = False) -> None: ...
def forward(self, steps: int = 1) -> None: ...
def back(self, steps: int = 1) -> None: ...
def _forward_or_back(self, steps: int) -> None: ...
def stop_loading(self) -> None: ...
def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None: ...
def get_frame(self, loc_ind_ele: Union[str, int, tuple, ChromiumFrame], timeout: float = None) -> ChromiumFrame: ...
def get_frames(self, loc: Union[str, tuple] = None, timeout: float = None) -> List[ChromiumFrame]: ...
def run_cdp(self, cmd: str, **cmd_args) -> dict: ...
def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict: ...
def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ...
def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ...
def get_screenshot(self, path: [str, Path] = None,
as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page: bool = False,
left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ...
def _get_screenshot(self, path: [str, Path] = None,
as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page: bool = False,
left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None,
ele: ChromiumElement = None) -> Union[str, bytes]: ...
def clear_cache(self,
session_storage: bool = True,
local_storage: bool = True,
cache: bool = True,
cookies: bool = True) -> None: ...
def _d_connect(self,
to_url: str,
times: int = 0,
interval: float = 1,
show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]: ...
class ChromiumBaseWaiter(object):
def __init__(self, page: ChromiumBase):
self._driver: ChromiumBase = ...
self._listener: NetworkListener = ...
def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ...
def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ...
def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ...
def _loading(self, timeout: float = None, start: bool = True, gap: float = .01) -> bool: ...
def load_start(self, timeout: float = None) -> bool: ...
def load_complete(self, timeout: float = None) -> bool: ...
def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ...
def stop_listening(self) -> None: ...
def data_packets(self, timeout: float = None,
any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ...
def upload_paths_inputted(self) -> None: ...
class NetworkListener(object):
def __init__(self, page):
self._page: ChromiumBase = ...
self._targets: Union[str, dict] = ...
self._single: bool = ...
self._results: Union[ResponseData, Dict[str, ResponseData], False] = ...
self._is_regex: bool = ...
self._requests: dict = ...
def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ...
def stop(self) -> None: ...
def listen(self, timeout: float = None,
any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ...
def _response_received(self, **kwargs) -> None: ...
def _loading_finished(self, **kwargs) -> None: ...
def _requestWillBeSent(self, **kwargs) -> None: ...
class ChromiumPageScroll(ChromiumScroll):
def __init__(self, page: ChromiumBase): ...
def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ...
def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ...
class ChromiumBaseSetter(object):
def __init__(self, page):
self._page: ChromiumBase = ...
@property
def load_strategy(self) -> PageLoadStrategy: ...
@property
def scroll(self) -> PageScrollSetter: ...
def retry_times(self, times: int) -> None: ...
def retry_interval(self, interval: float) -> None: ...
def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def session_storage(self, item: str, value: Union[str, bool]) -> None: ...
def local_storage(self, item: str, value: Union[str, bool]) -> None: ...
def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def headers(self, headers: dict) -> None: ...
def upload_files(self, files: Union[str, list, tuple]) -> None: ...
class Timeout(object):
def __init__(self, page: ChromiumBase, implicit=None, page_load=None, script=None):
self._page: ChromiumBase = ...
self.implicit: float = ...
self.page_load: float = ...
self.script: float = ...
class PageLoadStrategy(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
def __call__(self, value: str) -> None: ...
def normal(self) -> None: ...
def eager(self) -> None: ...
def none(self) -> None: ...
class PageScrollSetter(object):
def __init__(self, scroll: ChromiumPageScroll):
self._scroll: ChromiumPageScroll = ...
def wait_complete(self, on_off: bool = True): ...
def smooth(self, on_off: bool = True): ...
class Screencast(object):
def __init__(self, page: ChromiumBase):
self._page: ChromiumBase = ...
self._path: Path = ...
self._running: bool = ...
self._enable: bool = ...
self._mode: str = ...
@property
def set_mode(self) -> ScreencastMode: ...
def start(self, save_path: Union[str, Path] = None) -> None: ...
def stop(self, video_name: str = None) -> str: ...
def set_save_path(self, save_path: Union[str, Path] = None) -> None: ...
def _run(self) -> None: ...
def _onScreencastFrame(self, **kwargs) -> None: ...
class ScreencastMode(object):
def __init__(self, screencast: Screencast):
self._screencast: Screencast = ...
def video_mode(self) -> None: ...
def frugal_video_mode(self) -> None: ...
def js_video_mode(self) -> None: ...
def frugal_imgs_mode(self) -> None: ...
def imgs_mode(self) -> None: ...

View File

@ -1,250 +0,0 @@
# -*- coding: utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from functools import partial
from json import dumps, loads
from queue import Queue, Empty
from threading import Thread, Event
from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \
create_connection
from .errors import CallMethodError
class GenericAttr(object):
def __init__(self, name, tab):
self.__dict__['name'] = name
self.__dict__['tab'] = tab
def __getattr__(self, item):
method_name = f"{self.name}.{item}"
event_listener = self.tab.get_listener(method_name)
if event_listener:
return event_listener
return partial(self.tab.call_method, method_name)
def __setattr__(self, key, value):
self.tab.set_listener(f"{self.name}.{key}", value)
class ChromiumDriver(object):
_INITIAL_ = 'initial'
_STARTED_ = 'started'
_STOPPED_ = 'stopped'
def __init__(self, tab_id, tab_type, address):
"""
:param tab_id: 标签页id
:param tab_type: 标签页类型
:param address: 浏览器连接地址
"""
self.id = tab_id
self.address = address
self.type = tab_type
self.debug = False
self.has_alert = False
self._websocket_url = f'ws://{address}/devtools/{tab_type}/{tab_id}'
self._cur_id = 0
self._ws = None
self._recv_th = Thread(target=self._recv_loop)
self._handle_event_th = Thread(target=self._handle_event_loop)
self._recv_th.daemon = True
self._handle_event_th.daemon = True
self._stopped = Event()
self._started = False
self.status = self._INITIAL_
self.event_handlers = {}
self.method_results = {}
self.event_queue = Queue()
def _send(self, message, timeout=None):
"""发送信息到浏览器,并返回浏览器返回的信息
:param message: 发送给浏览器的数据
:param timeout: 超时时间
:return: 浏览器返回的数据
"""
if 'id' not in message:
self._cur_id += 1
message['id'] = self._cur_id
message_json = dumps(message)
if self.debug:
print(f"发> {message_json}")
if not isinstance(timeout, (int, float)) or timeout > 1:
q_timeout = 1
else:
q_timeout = timeout / 2.0
try:
self.method_results[message['id']] = Queue()
self._ws.send(message_json)
while not self._stopped.is_set():
try:
if isinstance(timeout, (int, float)):
if timeout < q_timeout:
q_timeout = timeout
timeout -= q_timeout
return self.method_results[message['id']].get(timeout=q_timeout)
except Empty:
if self.has_alert:
return {'error': {'message': 'alert exists'}, 'type': 'alert_exists'}
if isinstance(timeout, (int, float)) and timeout <= 0:
raise TimeoutError(f"调用{message['method']}超时。")
continue
except Exception:
return None
finally:
self.method_results.pop(message['id'], None)
def _recv_loop(self):
"""接收浏览器信息的守护线程方法"""
while not self._stopped.is_set():
try:
self._ws.settimeout(1)
message_json = self._ws.recv()
message = loads(message_json)
except WebSocketTimeoutException:
continue
except (WebSocketException, OSError, WebSocketConnectionClosedException):
self.stop()
return
if self.debug:
print(f'<收 {message_json}')
if "method" in message:
self.event_queue.put(message)
elif "id" in message:
if message["id"] in self.method_results:
self.method_results[message['id']].put(message)
elif self.debug:
print(f'未知信息:{message}')
def _handle_event_loop(self):
"""当接收到浏览器信息,执行已绑定的方法"""
while not self._stopped.is_set():
try:
event = self.event_queue.get(timeout=1)
except Empty:
continue
if event['method'] in self.event_handlers:
try:
self.event_handlers[event['method']](**event['params'])
except Exception as e:
raise
# raise RuntimeError(f"\n回调函数错误\n{e}")
self.event_queue.task_done()
def __getattr__(self, item):
attr = GenericAttr(item, self)
setattr(self, item, attr)
return attr
def call_method(self, _method, *args, **kwargs):
"""执行cdp方法
:param _method: cdp方法名
:param args: cdp参数
:param kwargs: cdp参数
:return: 执行结果
"""
if not self._started:
self.start()
# raise RuntimeError("不能在启动前调用方法。")
if args:
raise CallMethodError("参数必须是key=value形式。")
if self._stopped.is_set():
return {'error': 'tab closed', 'type': 'tab_closed'}
timeout = kwargs.pop("_timeout", None)
result = self._send({"method": _method, "params": kwargs}, timeout=timeout)
if result is None:
return {'error': 'tab closed', 'type': 'tab_closed'}
if 'result' not in result and 'error' in result:
return {'error': result['error']['message'],
'type': result.get('type', 'call_method_error'),
'method': _method,
'args': kwargs}
return result['result']
def start(self):
"""启动连接"""
if self._started:
return False
if not self._websocket_url:
raise RuntimeError("已存在另一个连接。")
self._started = True
self.status = self._STARTED_
self._stopped.clear()
self._ws = create_connection(self._websocket_url, enable_multithread=True,
suppress_origin=True)
self._recv_th.start()
self._handle_event_th.start()
return True
def stop(self):
"""中断连接"""
if self._stopped.is_set():
return False
if not self._started:
return True
self.status = self._STOPPED_
self._stopped.set()
if self._ws:
self._ws.close()
self._ws = None
self.event_handlers.clear()
self.method_results.clear()
self.event_queue.queue.clear()
return True
def set_listener(self, event, callback):
"""绑定cdp event和回调方法
:param event: cdp event
:param callback: 绑定到cdp event的回调方法
:return: 回调方法
"""
if not callback:
return self.event_handlers.pop(event, None)
if not callable(callback):
raise RuntimeError("方法不能调用。")
self.event_handlers[event] = callback
return True
def get_listener(self, event):
"""获取cdp event对应的回调方法
:param event: cdp event
:return: 回调方法
"""
return self.event_handlers.get(event, None)
def __str__(self):
return f"<ChromiumDriver {self.id}>"
__repr__ = __str__

View File

@ -1,60 +0,0 @@
# -*- coding: utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from queue import Queue
from threading import Thread, Event
from typing import Union, Callable
class GenericAttr(object):
def __init__(self, name: str, tab: ChromiumDriver): ...
def __getattr__(self, item: str) -> Callable: ...
def __setattr__(self, key: str, value: Callable) -> None: ...
class ChromiumDriver(object):
_INITIAL_: str
_STARTED_: str
_STOPPED_: str
id: str
address: str
type: str
debug: bool
has_alert: bool
_websocket_url: str
_cur_id: int
_ws = None
_recv_th: Thread
_handle_event_th: Thread
_stopped: Event
_started: bool
status: str
event_handlers: dict
method_results: dict
event_queue: Queue
def __init__(self, tab_id: str, tab_type: str, address: str): ...
def _send(self, message: dict, timeout: float = None) -> dict: ...
def _recv_loop(self) -> None: ...
def _handle_event_loop(self) -> None: ...
def __getattr__(self, item: str) -> Callable: ...
def call_method(self, _method: str, *args, **kwargs) -> dict: ...
def start(self) -> bool: ...
def stop(self) -> bool: ...
def set_listener(self, event: str, callback: Union[Callable, None]) -> Union[Callable, None, bool]: ...
def get_listener(self, event: str) -> Union[Callable, None]: ...
def __str__(self) -> str: ...

File diff suppressed because it is too large Load Diff

View File

@ -1,591 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, Tuple, List, Any
from .base import DrissionElement, BaseElement
from .chromium_base import ChromiumBase
from .chromium_frame import ChromiumFrame
from .chromium_page import ChromiumPage
from .commons.constants import NoneElement
from .session_element import SessionElement
from .web_page import WebPage
class ChromiumElement(DrissionElement):
def __init__(self,
page: ChromiumBase,
node_id: str = None, obj_id: str = None, backend_id: str = None):
self._tag: str = ...
self.page: Union[ChromiumPage, WebPage] = ...
self._node_id: str = ...
self._obj_id: str = ...
self._backend_id: str = ...
self._doc_id: str = ...
self._ids: ChromiumElementIds = ...
self._scroll: ChromiumElementScroll = ...
self._click: Click = ...
self._select: ChromiumSelect = ...
self._wait: ChromiumElementWaiter = ...
self._locations: Locations = ...
self._set: ChromiumElementSetter = ...
self._states: ChromiumElementStates = ...
self._pseudo: Pseudo = ...
def __repr__(self) -> str: ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, str, None]: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def text(self) -> str: ...
@property
def raw_text(self) -> str: ...
# -----------------d模式独有属性-------------------
@property
def ids(self) -> ChromiumElementIds: ...
@property
def size(self) -> Tuple[int, int]: ...
@property
def set(self) -> ChromiumElementSetter: ...
@property
def states(self) -> ChromiumElementStates: ...
@property
def location(self) -> Tuple[int, int]: ...
@property
def locations(self) -> Locations: ...
@property
def pseudo(self) -> Pseudo: ...
@property
def shadow_root(self) -> Union[None, ChromiumShadowRoot]: ...
@property
def sr(self) -> Union[None, ChromiumShadowRoot]: ...
@property
def scroll(self) -> ChromiumElementScroll: ...
@property
def click(self) -> Click: ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ...
def child(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str, None]: ...
def prev(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str, None]: ...
def next(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str, None]: ...
def before(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, None]: ...
def after(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str, None]: ...
def children(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def prevs(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def nexts(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def befores(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def afters(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
@property
def wait(self) -> ChromiumElementWaiter: ...
@property
def select(self) -> ChromiumSelect: ...
def attr(self, attr: str) -> Union[str, None]: ...
def remove_attr(self, attr: str) -> None: ...
def prop(self, prop: str) -> Union[str, int, None]: ...
def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ...
def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ...
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, str]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union[ChromiumElement, str]]: ...
def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[Union[SessionElement, str]]: ...
def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None,
single: bool = True, relative: bool = False, raise_err: bool = False) \
-> Union[ChromiumElement, ChromiumFrame, str, NoneElement,
List[Union[ChromiumElement, ChromiumFrame, str]]]: ...
def style(self, style: str, pseudo_ele: str = '') -> str: ...
def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ...
def save(self, path: [str, bool] = None, rename: str = None, timeout: float = None) -> None: ...
def get_screenshot(self, path: [str, Path] = None, as_bytes: [bool, str] = None,
as_base64: [bool, str] = None) -> Union[str, bytes]: ...
def input(self, vals: Any, clear: bool = True) -> None: ...
def _set_file_input(self, files: Union[str, list, tuple]) -> None: ...
def clear(self, by_js: bool = False) -> None: ...
def _input_focus(self) -> None: ...
def focus(self) -> None: ...
def hover(self, offset_x: int = None, offset_y: int = None) -> None: ...
def drag(self, offset_x: int = 0, offset_y: int = 0, duration: float = 0.5) -> None: ...
def drag_to(self, ele_or_loc: Union[tuple, ChromiumElement], duration: float = 0.5) -> None: ...
def _get_obj_id(self, node_id: str = None, backend_id: str = None) -> str: ...
def _get_node_id(self, obj_id: str = None, backend_id: str = None) -> str: ...
def _get_backend_id(self, node_id: str) -> str: ...
def _get_ele_path(self, mode: str) -> str: ...
class ChromiumElementStates(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def is_selected(self) -> bool: ...
@property
def is_checked(self) -> bool: ...
@property
def is_displayed(self) -> bool: ...
@property
def is_enabled(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
@property
def is_in_viewport(self) -> bool: ...
@property
def is_covered(self) -> bool: ...
class ChromiumShadowRoot(BaseElement):
def __init__(self,
parent_ele: ChromiumElement,
obj_id: str = None,
backend_id: str = None):
self._obj_id: str = ...
self._ids: Ids = ...
self._node_id: str = ...
self._backend_id: str = ...
self.page: ChromiumPage = ...
self.parent_ele: ChromiumElement = ...
self._states: ShadowRootStates = ...
def __repr__(self) -> str: ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> ChromiumElement: ...
@property
def ids(self) -> Ids: ...
@property
def states(self) -> ShadowRootStates: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ...
def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ...
def parent(self, level_or_loc: Union[str, int] = 1) -> ChromiumElement: ...
def child(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, str, None]: ...
def next(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, str, None]: ...
def before(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, str, None]: ...
def after(self, filter_loc: Union[tuple, str] = '',
index: int = 1) -> Union[ChromiumElement, str, None]: ...
def children(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ...
def nexts(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ...
def befores(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ...
def afters(self, filter_loc: Union[tuple, str] = '') -> List[Union[ChromiumElement, str]]: ...
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[ChromiumElement]: ...
def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, NoneElement]: ...
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ...
def _find_elements(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None,
single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, NoneElement, str, List[Union[ChromiumElement,
ChromiumFrame, str]]]: ...
def _get_node_id(self, obj_id: str) -> str: ...
def _get_obj_id(self, back_id: str) -> str: ...
def _get_backend_id(self, node_id: str) -> str: ...
class Ids(object):
def __init__(self, ele: Union[ChromiumElement, ChromiumShadowRoot]):
self._ele: Union[ChromiumElement, ChromiumShadowRoot] = ...
@property
def node_id(self) -> str: ...
@property
def obj_id(self) -> str: ...
@property
def backend_id(self) -> str: ...
class ChromiumElementIds(Ids):
@property
def doc_id(self) -> str: ...
def find_in_chromium_ele(ele: ChromiumElement,
loc: Union[str, Tuple[str, str]],
single: bool = True,
timeout: float = None,
relative: bool = True) \
-> Union[ChromiumElement, str, NoneElement, List[Union[ChromiumElement, str]]]: ...
def find_by_xpath(ele: ChromiumElement,
xpath: str,
single: bool,
timeout: float,
relative: bool = True) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ...
def find_by_css(ele: ChromiumElement,
selector: str,
single: bool,
timeout: float) -> Union[ChromiumElement, List[ChromiumElement], NoneElement]: ...
def make_chromium_ele(page: ChromiumBase, node_id: str = ..., obj_id: str = ...) \
-> Union[ChromiumElement, ChromiumFrame, str]: ...
def make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ...
def run_js(page_or_ele: Union[ChromiumBase, ChromiumElement, ChromiumShadowRoot], script: str,
as_expr: bool = False, timeout: float = None, args: tuple = ...) -> Any: ...
def parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ...
def convert_argument(arg: Any) -> dict: ...
def send_enter(ele: ChromiumElement) -> None: ...
def send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ...
class ChromiumElementSetter(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def attr(self, attr: str, value: str) -> None: ...
def prop(self, prop: str, value: str) -> None: ...
def innerHTML(self, html: str) -> None: ...
class ShadowRootStates(object):
def __init__(self, ele: ChromiumShadowRoot):
"""
:param ele: ChromiumElement
"""
self._ele: ChromiumShadowRoot = ...
@property
def is_enabled(self) -> bool: ...
@property
def is_alive(self) -> bool: ...
class Locations(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def location(self) -> Tuple[int, int]: ...
@property
def midpoint(self) -> Tuple[int, int]: ...
@property
def click_point(self) -> Tuple[int, int]: ...
@property
def viewport_location(self) -> Tuple[int, int]: ...
@property
def viewport_midpoint(self) -> Tuple[int, int]: ...
@property
def viewport_click_point(self) -> Tuple[int, int]: ...
@property
def screen_location(self) -> Tuple[int, int]: ...
@property
def screen_midpoint(self) -> Tuple[int, int]: ...
@property
def screen_click_point(self) -> Tuple[int, int]: ...
def _get_viewport_rect(self, quad: str) -> Union[list, None]: ...
def _get_page_coord(self, x: int, y: int) -> Tuple[int, int]: ...
class Click(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def __call__(self, by_js: Union[None, bool] = False, timeout: float = 1) -> bool: ...
def left(self, by_js: Union[None, bool] = False, timeout: float = 1) -> bool: ...
def right(self) -> None: ...
def middle(self) -> None: ...
def at(self, offset_x: int = None, offset_y: int = None, button: str = 'left', count: int = 1) -> None: ...
def twice(self, by_js: bool = False) -> None: ...
def _click(self, client_x: int, client_y: int, button: str = 'left', count: int = 1) -> None: ...
class ChromiumScroll(object):
def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement, ChromiumFrame]):
self.t1: str = ...
self.t2: str = ...
self._driver: Union[ChromiumPage, ChromiumElement, ChromiumFrame] = ...
self._wait_complete: bool = ...
def _run_js(self, js: str): ...
def to_top(self) -> None: ...
def to_bottom(self) -> None: ...
def to_half(self) -> None: ...
def to_rightmost(self) -> None: ...
def to_leftmost(self) -> None: ...
def to_location(self, x: int, y: int) -> None: ...
def up(self, pixel: int = 300) -> None: ...
def down(self, pixel: int = 300) -> None: ...
def left(self, pixel: int = 300) -> None: ...
def right(self, pixel: int = 300) -> None: ...
def _wait_scrolled(self) -> None: ...
class ChromiumElementScroll(ChromiumScroll):
def to_see(self, center: Union[bool, None] = None) -> None: ...
class ChromiumSelect(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ...
@property
def is_multi(self) -> bool: ...
@property
def options(self) -> List[ChromiumElement]: ...
@property
def selected_option(self) -> Union[ChromiumElement, None]: ...
@property
def selected_options(self) -> List[ChromiumElement]: ...
def clear(self) -> None: ...
def all(self) -> None: ...
def by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ...
def cancel_by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None) -> bool: ...
def invert(self) -> None: ...
def _by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None, cancel: bool = False) -> bool: ...
def _select(self,
condition: Union[str, int, list, tuple] = None,
para_type: str = 'text',
cancel: bool = False,
timeout: float = None) -> bool: ...
def _text_value(self, condition: set, para_type: str, mode: str, timeout: float) -> bool: ...
def _index(self, condition: set, mode: str, timeout: float) -> bool: ...
def _dispatch_change(self) -> None: ...
class ChromiumElementWaiter(object):
def __init__(self,
page: ChromiumBase,
ele: ChromiumElement):
self._ele: ChromiumElement = ...
self._page: ChromiumBase = ...
def delete(self, timeout: float = None) -> bool: ...
def display(self, timeout: float = None) -> bool: ...
def hidden(self, timeout: float = None) -> bool: ...
def covered(self, timeout: float = None) -> bool: ...
def not_covered(self, timeout: float = None) -> bool: ...
def enabled(self, timeout: float = None) -> bool: ...
def disabled(self, timeout: float = None) -> bool: ...
def disabled_or_delete(self, timeout: float = None) -> bool: ...
def _wait_state(self, attr: str, mode: bool = False, timeout: float = None) -> bool: ...
class Pseudo(object):
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
@property
def before(self) -> str: ...
@property
def after(self) -> str: ...

View File

@ -1,215 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, Tuple, List, Any
from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter
from .chromium_element import ChromiumElement, Locations, ChromiumElementStates, ChromiumElementWaiter
class ChromiumFrame(ChromiumBase):
def __init__(self, page: ChromiumBase, ele: ChromiumElement):
self.page: ChromiumBase = ...
self.frame_id: str = ...
self._frame_ele: ChromiumElement = ...
self._backend_id: str = ...
self._doc_ele: ChromiumElement = ...
self._is_diff_domain: bool = ...
self.doc_ele: ChromiumElement = ...
self._states: ChromiumElementStates = ...
self._ids: ChromiumFrameIds = ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElement, str]: ...
def _check_alive(self) -> None: ...
def __repr__(self) -> str: ...
def _runtime_settings(self) -> None: ...
def _driver_init(self, tab_id: str) -> None: ...
def _reload(self) -> None: ...
def _check_ok(self) -> None: ...
def _get_new_document(self) -> None: ...
def _onFrameAttached(self, **kwargs): ...
def _onFrameDetached(self, **kwargs): ...
@property
def ids(self) -> ChromiumFrameIds: ...
@property
def frame_ele(self) -> ChromiumElement: ...
@property
def tag(self) -> str: ...
@property
def url(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def title(self) -> str: ...
@property
def cookies(self) -> dict: ...
@property
def attrs(self) -> dict: ...
@property
def frame_size(self) -> Tuple[int, int]: ...
@property
def size(self) -> Tuple[int, int]: ...
@property
def active_ele(self) -> ChromiumElement: ...
@property
def location(self) -> Tuple[int, int]: ...
@property
def locations(self) -> Locations: ...
@property
def xpath(self) -> str: ...
@property
def css_path(self) -> str: ...
@property
def ready_state(self) -> str: ...
@property
def is_alive(self) -> bool: ...
@property
def scroll(self) -> ChromiumFrameScroll: ...
@property
def set(self) -> ChromiumFrameSetter: ...
@property
def states(self) -> ChromiumElementStates: ...
@property
def wait(self) -> FrameWaiter: ...
def refresh(self) -> None: ...
def attr(self, attr: str) -> Union[str, None]: ...
def remove_attr(self, attr: str) -> None: ...
def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ...
def prev(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
def next(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
def before(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
def after(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
def prevs(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def nexts(self, filter_loc: Union[tuple, str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def befores(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def afters(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
def get_screenshot(self, path: [str, Path] = None,
as_bytes: [bool, str] = None,
as_base64: [bool, str] = None) -> Union[str, bytes]: ...
def _get_screenshot(self, path: [str, Path] = None,
as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page: bool = False,
left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None,
ele: ChromiumElement = None) -> Union[str, bytes]: ...
def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \
-> Union[ChromiumElement, ChromiumFrame, None, List[Union[ChromiumElement, ChromiumFrame]]]: ...
def _d_connect(self,
to_url: str,
times: int = 0,
interval: float = 1,
show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]: ...
def _is_inner_frame(self) -> bool: ...
class ChromiumFrameIds(object):
def __init__(self, frame: ChromiumFrame):
self._frame: ChromiumFrame = ...
@property
def tab_id(self) -> str: ...
@property
def backend_id(self) -> str: ...
@property
def obj_id(self) -> str: ...
@property
def node_id(self) -> str: ...
class ChromiumFrameScroll(ChromiumPageScroll):
def __init__(self, frame: ChromiumFrame) -> None: ...
def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ...
class ChromiumFrameSetter(ChromiumBaseSetter):
_page: ChromiumFrame = ...
def attr(self, attr: str, value: str) -> None: ...
class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter):
def __init__(self, frame: ChromiumFrame): ...

View File

@ -1,800 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from platform import system
from threading import Thread
from time import perf_counter, sleep
from warnings import warn
from requests import Session
from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBaseWaiter
from .chromium_driver import ChromiumDriver
from .chromium_tab import ChromiumTab
from .commons.browser import connect_browser
from .commons.web import set_session_cookies
from .configs.chromium_options import ChromiumOptions
from .errors import CallMethodError, BrowserConnectError
from .session_page import DownloadSetter
class ChromiumPage(ChromiumBase):
"""用于管理浏览器的类"""
def __init__(self, addr_driver_opts=None, tab_id=None, timeout=None):
"""
:param addr_driver_opts: 浏览器地址:端口ChromiumDriver对象或ChromiumOptions对象
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
self._download_set = None
self._download_path = None
super().__init__(addr_driver_opts, tab_id, timeout)
def _set_start_options(self, addr_driver_opts, none):
"""设置浏览器启动属性
:param addr_driver_opts: 'ip:port'ChromiumDriverChromiumOptions
:param none: 用于后代继承
:return: None
"""
if not addr_driver_opts or str(type(addr_driver_opts)).endswith(("ChromiumOptions'>", "DriverOptions'>")):
self._driver_options = addr_driver_opts or ChromiumOptions(addr_driver_opts)
# 接收浏览器地址和端口
elif isinstance(addr_driver_opts, str):
self._driver_options = ChromiumOptions()
self._driver_options.debugger_address = addr_driver_opts
# 接收传递过来的ChromiumDriver浏览器
elif isinstance(addr_driver_opts, ChromiumDriver):
self._driver_options = ChromiumOptions(read_file=False)
self._driver_options.debugger_address = addr_driver_opts.address
self._tab_obj = addr_driver_opts
else:
raise TypeError('只能接收ChromiumDriver或ChromiumOptions类型参数。')
self.address = self._driver_options.debugger_address.replace('localhost',
'127.0.0.1').lstrip('http://').lstrip('https://')
def _set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeouts = Timeout(self,
page_load=self._driver_options.timeouts['pageLoad'],
script=self._driver_options.timeouts['script'],
implicit=self._driver_options.timeouts['implicit'])
self._page_load_strategy = self._driver_options.page_load_strategy
self._download_path = self._driver_options.download_path
def _connect_browser(self, tab_id=None):
"""连接浏览器,在第一次时运行
:param tab_id: 要控制的标签页id不指定默认为激活的
:return: None
"""
self._chromium_init()
if not self._tab_obj: # 不是传入driver的情况
connect_browser(self._driver_options)
if not tab_id:
json = self._control_session.get(f'http://{self.address}/json').json()
tab_id = [i['id'] for i in json if i['type'] == 'page']
if not tab_id:
raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。')
tab_id = tab_id[0]
self._driver_init(tab_id)
self._page_init()
self._get_document()
self._first_run = False
def _page_init(self):
"""页面相关设置"""
ws = self._control_session.get(f'http://{self.address}/json/version').json()['webSocketDebuggerUrl']
self._browser_driver = ChromiumDriver(ws.split('/')[-1], 'browser', self.address)
self._browser_driver.start()
self._alert = Alert()
self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open
self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close
self._rect = None
self._main_tab = self.tab_id
try:
self.download_set.by_browser()
except CallMethodError:
pass
self._process_id = None
r = self.browser_driver.SystemInfo.getProcessInfo()
if 'processInfo' not in r:
return None
for i in r['processInfo']:
if i['type'] == 'browser':
self._process_id = i['id']
break
@property
def browser_driver(self):
"""返回用于控制浏览器cdp的driver"""
return self._browser_driver
@property
def tabs_count(self):
"""返回标签页数量"""
return len(self.tabs)
@property
def tabs(self):
"""返回所有标签页id组成的列表"""
j = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp
return [i['id'] for i in j if i['type'] == 'page' and not i['url'].startswith('devtools://') and i[
'url'] != 'chrome://privacy-sandbox-dialog/notice']
@property
def main_tab(self):
return self._main_tab
@property
def latest_tab(self):
"""返回最新的标签页id最新标签页指最后创建或最后被激活的"""
return self.tabs[0]
@property
def process_id(self):
"""返回浏览器进程id"""
return self._process_id
@property
def set(self):
"""返回用于等待的对象"""
if self._set is None:
self._set = ChromiumPageSetter(self)
return self._set
@property
def download_path(self):
"""返回默认下载路径"""
p = self._download_path or ''
return str(Path(p).absolute())
@property
def download_set(self):
"""返回用于设置下载参数的对象"""
if self._download_set is None:
self._download_set = ChromiumDownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set._switched_DownloadKit
@property
def rect(self):
if self._rect is None:
self._rect = ChromiumTabRect(self)
return self._rect
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = ChromiumPageWaiter(self)
return self._wait
def get_tab(self, tab_id=None):
"""获取一个标签页对象
:param tab_id: 要获取的标签页id为None时获取当前tab
:return: 标签页对象
"""
tab_id = tab_id or self.tab_id
return ChromiumTab(self, tab_id)
def find_tabs(self, title=None, url=None, tab_type=None, single=True):
"""查找符合条件的tab返回它们的id组成的列表
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:param single: 是否返回首个结果的id为False返回所有信息
:return: tab id或tab dict
"""
tabs = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp
if isinstance(tab_type, str):
tab_type = {tab_type}
elif isinstance(tab_type, (list, tuple, set)):
tab_type = set(tab_type)
elif tab_type is not None:
raise TypeError('tab_type只能是set、list、tuple、str、None。')
r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url'])
and (tab_type is None or i['type'] in tab_type))]
return r[0]['id'] if r and single else r
def new_tab(self, url=None, switch_to=False):
"""新建一个标签页,该标签页在最后面
:param url: 新标签页跳转到的网址
:param switch_to: 新建标签页后是否把焦点移过去
:return: 新标签页的id
"""
if switch_to:
begin_tabs = set(self.tabs)
len_tabs = len(begin_tabs)
tid = self.run_cdp('Target.createTarget', url='')['targetId']
tabs = self.tabs
while len(tabs) == len_tabs:
tabs = self.tabs
sleep(.005)
new_tab = set(tabs) - begin_tabs
self._to_tab(new_tab.pop(), read_doc=False)
if url:
self.get(url)
elif url:
tid = self.run_cdp('Target.createTarget', url=url)['targetId']
else:
tid = self.run_cdp('Target.createTarget', url='')['targetId']
return tid
def to_main_tab(self):
"""跳转到主标签页"""
self.to_tab(self._main_tab)
def to_tab(self, tab_or_id=None, activate=True):
"""跳转到标签页
:param tab_or_id: 标签页对象或id默认跳转到main_tab
:param activate: 切换后是否变为活动状态
:return: None
"""
self._to_tab(tab_or_id, activate)
def _to_tab(self, tab_or_id=None, activate=True, read_doc=True):
"""跳转到标签页
:param tab_or_id: 标签页对象或id默认跳转到main_tab
:param activate: 切换后是否变为活动状态
:param read_doc: 切换后是否读取文档
:return: None
"""
tabs = self.tabs
if not tab_or_id:
tab_id = self._main_tab
elif isinstance(tab_or_id, ChromiumTab):
tab_id = tab_or_id.tab_id
else:
tab_id = tab_or_id
if tab_id not in tabs:
tab_id = self.latest_tab
if activate:
self._control_session.get(f'http://{self.address}/json/activate/{tab_id}')
if tab_id == self.tab_id:
return
self.driver.stop()
self._driver_init(tab_id)
if read_doc and self.ready_state in ('complete', None):
self._get_document()
def close_tabs(self, tabs_or_ids=None, others=False):
"""关闭传入的标签页,默认关闭当前页。可传入多个
:param tabs_or_ids: 要关闭的标签页对象或id可传入列表或元组为None时关闭当前页
:param others: 是否关闭指定标签页之外的
:return: None
"""
all_tabs = set(self.tabs)
if isinstance(tabs_or_ids, str):
tabs = {tabs_or_ids}
elif isinstance(tabs_or_ids, ChromiumTab):
tabs = {tabs_or_ids.tab_id}
elif tabs_or_ids is None:
tabs = {self.tab_id}
elif isinstance(tabs_or_ids, (list, tuple)):
tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids)
else:
raise TypeError('tabs_or_ids参数只能传入标签页对象或id。')
if others:
tabs = all_tabs - tabs
end_len = len(all_tabs) - len(tabs)
if end_len <= 0:
self.quit()
return
if self.tab_id in tabs:
self.driver.stop()
for tab in tabs:
self._control_session.get(f'http://{self.address}/json/close/{tab}')
while len(self.tabs) != end_len:
sleep(.1)
if self._main_tab in tabs:
self._main_tab = self.tabs[0]
self.to_tab()
def close_other_tabs(self, tabs_or_ids=None):
"""关闭传入的标签页以外标签页,默认保留当前页。可传入多个
:param tabs_or_ids: 要保留的标签页对象或id可传入列表或元组为None时保存当前页
:return: None
"""
self.close_tabs(tabs_or_ids, True)
def handle_alert(self, accept=True, send=None, timeout=None):
"""处理提示框,可以自动等待提示框出现
:param accept: True表示确认False表示取消其它值不会按按钮但依然返回文本值
:param send: 处理prompt提示框时可输入文本
:param timeout: 等待提示框出现的超时时间为None则使用self.timeout属性的值
:return: 提示框内容文本未等到提示框则返回False
"""
timeout = self.timeout if timeout is None else timeout
timeout = .1 if timeout <= 0 else timeout
end_time = perf_counter() + timeout
while not self._alert.activated and perf_counter() < end_time:
sleep(.1)
if not self._alert.activated:
return False
res_text = self._alert.text
if self._alert.type == 'prompt':
self.driver.Page.handleJavaScriptDialog(accept=accept, promptText=send)
else:
self.driver.Page.handleJavaScriptDialog(accept=accept)
return res_text
def quit(self):
"""关闭浏览器"""
self._tab_obj.Browser.close()
self._tab_obj.stop()
if self.process_id:
from os import popen
from platform import system
txt = f'tasklist | findstr {self.process_id}' if system().lower() == 'windows' \
else f'ps -ef | grep {self.process_id}'
while True:
p = popen(txt)
if f' {self.process_id} ' not in p.read():
break
def _on_alert_close(self, **kwargs):
"""alert关闭时触发的方法"""
self._alert.activated = False
self._alert.text = None
self._alert.type = None
self._alert.defaultPrompt = None
self._alert.response_accept = kwargs.get('result')
self._alert.response_text = kwargs['userInput']
self._tab_obj.has_alert = False
def _on_alert_open(self, **kwargs):
"""alert出现时触发的方法"""
self._alert.activated = True
self._alert.text = kwargs['message']
self._alert.type = kwargs['message']
self._alert.defaultPrompt = kwargs.get('defaultPrompt', None)
self._alert.response_accept = None
self._alert.response_text = None
self._tab_obj.has_alert = True
class ChromiumPageWaiter(ChromiumBaseWaiter):
def __init__(self, page: ChromiumBase):
super().__init__(page)
self._listener = None
def download_begin(self, timeout=None):
"""等待浏览器下载开始
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:return: 是否等到下载开始
"""
return self._driver.download_set.wait_download_begin(timeout)
def new_tab(self, timeout=None):
"""等待新标签页出现
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:return: 是否等到下载开始
"""
timeout = timeout if timeout is not None else self._driver.timeout
end_time = perf_counter() + timeout
while self._driver.tab_id == self._driver.latest_tab and perf_counter() < end_time:
sleep(.01)
class ChromiumTabRect(object):
def __init__(self, page):
self._page = page
@property
def window_state(self):
"""返回窗口状态normal、fullscreen、maximized、 minimized"""
return self._get_browser_rect()['windowState']
@property
def browser_location(self):
"""返回浏览器在屏幕上的坐标,左上角为(0, 0)"""
r = self._get_browser_rect()
if r['windowState'] in ('maximized', 'fullscreen'):
return 0, 0
return r['left'] + 7, r['top']
@property
def page_location(self):
"""返回页面左上角在屏幕中坐标,左上角为(0, 0)"""
w, h = self.viewport_location
r = self._get_page_rect()['layoutViewport']
return w - r['pageX'], h - r['pageY']
@property
def viewport_location(self):
"""返回视口在屏幕中坐标,左上角为(0, 0)"""
w_bl, h_bl = self.browser_location
w_bs, h_bs = self.browser_size
w_vs, h_vs = self.viewport_size_with_scrollbar
return w_bl + w_bs - w_vs, h_bl + h_bs - h_vs
@property
def browser_size(self):
"""返回浏览器大小"""
r = self._get_browser_rect()
if r['windowState'] == 'fullscreen':
return r['width'], r['height']
elif r['windowState'] == 'maximized':
return r['width'] - 16, r['height'] - 16
else:
return r['width'] - 16, r['height'] - 7
@property
def page_size(self):
"""返回页面总宽高,格式:(宽, 高)"""
r = self._get_page_rect()['contentSize']
return r['width'], r['height']
@property
def viewport_size(self):
"""返回视口宽高,不包括滚动条,格式:(宽, 高)"""
r = self._get_page_rect()['visualViewport']
return r['clientWidth'], r['clientHeight']
@property
def viewport_size_with_scrollbar(self):
"""返回视口宽高,包括滚动条,格式:(宽, 高)"""
r = self._page.run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();')
w, h = r.split(' ')
return int(w), int(h)
def _get_page_rect(self):
"""获取页面范围信息"""
return self._page.run_cdp_loaded('Page.getLayoutMetrics')
def _get_browser_rect(self):
"""获取浏览器范围信息"""
return self._page.browser_driver.Browser.getWindowForTarget(targetId=self._page.tab_id)['bounds']
class ChromiumDownloadSetter(DownloadSetter):
"""用于设置下载参数的类"""
def __init__(self, page):
"""
:param page: ChromiumPage对象
"""
super().__init__(page)
self._behavior = 'allow'
self._download_th = None
self._session = None
self._waiting_download = False
self._download_begin = False
@property
def session(self):
"""返回用于DownloadKit的Session对象"""
if self._session is None:
self._session = Session()
return self._session
@property
def _switched_DownloadKit(self):
"""返回从浏览器同步cookies后的Session对象"""
self._cookies_to_session()
return self.DownloadKit
def save_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = path or ''
path = Path(path).absolute()
path.mkdir(parents=True, exist_ok=True)
path = str(path)
self._page._download_path = path
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=path,
eventsEnabled=True)
except CallMethodError:
warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。')
self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path)
self.DownloadKit.goal_path = path
def by_browser(self):
"""设置使用浏览器下载文件"""
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True,
downloadPath=self._page.download_path)
self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser
except CallMethodError:
self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
self._page.driver.Page.downloadWillBegin = self._download_by_browser
self._behavior = 'allow'
def by_DownloadKit(self):
"""设置使用DownloadKit下载文件"""
try:
self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True)
self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit
except CallMethodError:
raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。')
self._behavior = 'deny'
def wait_download_begin(self, timeout=None):
"""等待浏览器下载开始
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:return: 是否等到下载开始
"""
self._waiting_download = True
result = False
timeout = timeout if timeout is not None else self._page.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._download_begin:
result = True
break
sleep(.05)
self._download_begin = False
self._waiting_download = False
return result
def _cookies_to_session(self):
"""把driver对象的cookies复制到session对象"""
ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": ua})
set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False))
def _download_by_DownloadKit(self, **kwargs):
"""拦截浏览器下载并用downloadKit下载"""
url = kwargs['url']
if url.startswith('blob:'):
self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True,
downloadPath=self._page.download_path)
sleep(2)
self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True)
else:
self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid'])
self._page.download.add(file_url=url, goal_path=self._page.download_path,
rename=kwargs['suggestedFilename'])
if self._download_th is None or not self._download_th.is_alive():
self._download_th = Thread(target=self._wait_download_complete, daemon=False)
self._download_th.start()
if self._waiting_download:
self._download_begin = True
def _download_by_browser(self, **kwargs):
"""使用浏览器下载时调用"""
if self._waiting_download:
self._download_begin = True
def _wait_download_complete(self):
"""等待下载完成"""
self._page.download.wait()
class Alert(object):
"""用于保存alert信息的类"""
def __init__(self):
self.activated = False
self.text = None
self.type = None
self.defaultPrompt = None
self.response_accept = None
self.response_text = None
class WindowSetter(object):
"""用于设置窗口大小的类"""
def __init__(self, page):
"""
:param page: 页面对象
"""
self._page = page
self._window_id = self._get_info()['windowId']
def maximized(self):
"""窗口最大化"""
s = self._get_info()['bounds']['windowState']
if s in ('fullscreen', 'minimized'):
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'maximized'})
def minimized(self):
"""窗口最小化"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'minimized'})
def fullscreen(self):
"""设置窗口为全屏"""
s = self._get_info()['bounds']['windowState']
if s == 'minimized':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'fullscreen'})
def normal(self):
"""设置窗口为常规模式"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'normal'})
def size(self, width=None, height=None):
"""设置窗口大小
:param width: 窗口宽度
:param height: 窗口高度
:return: None
"""
if width or height:
s = self._get_info()['bounds']['windowState']
if s != 'normal':
self._perform({'windowState': 'normal'})
info = self._get_info()['bounds']
width = width - 16 if width else info['width']
height = height + 7 if height else info['height']
self._perform({'width': width, 'height': height})
def location(self, x=None, y=None):
"""设置窗口在屏幕中的位置,相对左上角坐标
:param x: 距离顶部距离
:param y: 距离左边距离
:return: None
"""
if x is not None or y is not None:
self.normal()
info = self._get_info()['bounds']
x = x if x is not None else info['left']
y = y if y is not None else info['top']
self._perform({'left': x - 8, 'top': y})
def hide(self):
"""隐藏浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._page, hide=True)
def show(self):
"""显示浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._page, hide=False)
def _get_info(self):
"""获取窗口位置及大小信息"""
return self._page.run_cdp('Browser.getWindowForTarget')
def _perform(self, bounds):
"""执行改变窗口大小操作
:param bounds: 控制数据
:return: None
"""
self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds)
class ChromiumPageSetter(ChromiumBaseSetter):
def main_tab(self, tab_id=None):
"""设置主tab
:param tab_id: 标签页id不传入则设置当前tab
:return: None
"""
self._page._main_tab = tab_id or self._page.tab_id
@property
def window(self):
"""返回用于设置浏览器窗口的对象"""
return WindowSetter(self._page)
def tab_to_front(self, tab_or_id=None):
"""激活标签页使其处于最前面
:param tab_or_id: 标签页对象或id为None表示当前标签页
:return: None
"""
if not tab_or_id:
tab_or_id = self._page.tab_id
elif isinstance(tab_or_id, ChromiumTab):
tab_or_id = tab_or_id.tab_id
self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}')
def show_or_hide_browser(page, hide=True):
"""执行显示或隐藏浏览器窗口
:param page: ChromePage对象
:param hide: 是否隐藏
:return: None
"""
if not page.address.startswith(('127.0.0.1', 'localhost')):
return
if system().lower() != 'windows':
raise OSError('该方法只能在Windows系统使用。')
try:
from win32gui import ShowWindow
from win32con import SW_HIDE, SW_SHOW
except ImportError:
raise ImportError('请先安装pip install pypiwin32')
pid = page.process_id
if not pid:
return None
hds = get_chrome_hwnds_from_pid(pid, page.title)
sw = SW_HIDE if hide else SW_SHOW
for hd in hds:
ShowWindow(hd, sw)
def get_browser_progress_id(progress, address):
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
if progress:
return progress.pid
from os import popen
port = address.split(':')[-1]
txt = ''
progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n')
for progress in progresses:
if 'LISTENING' in progress:
txt = progress
break
if not txt:
return None
return txt.split(' ')[-1]
def get_chrome_hwnds_from_pid(pid, title):
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
try:
from win32gui import IsWindow, GetWindowText, EnumWindows
from win32process import GetWindowThreadProcessId
except ImportError:
raise ImportError('请先安装win32guipip install pypiwin32')
def callback(hwnd, hds):
if IsWindow(hwnd) and title in GetWindowText(hwnd):
_, found_pid = GetWindowThreadProcessId(hwnd)
if str(found_pid) == str(pid):
hds.append(hwnd)
return True
hwnds = []
EnumWindows(callback, hwnds)
return hwnds

View File

@ -1,241 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from os import popen
from pathlib import Path
from threading import Thread
from typing import Union, Tuple, List
from DownloadKit import DownloadKit
from requests import Session
from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter, NetworkListener
from .chromium_driver import ChromiumDriver
from .chromium_tab import ChromiumTab
from .configs.chromium_options import ChromiumOptions
from .configs.driver_options import DriverOptions
from .session_page import DownloadSetter
class ChromiumPage(ChromiumBase):
def __init__(self,
addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver, DriverOptions] = None,
tab_id: str = None,
timeout: float = None):
self._driver_options: [ChromiumDriver, DriverOptions] = ...
self._process_id: str = ...
self._window_setter: WindowSetter = ...
self._main_tab: str = ...
self._alert: Alert = ...
self._download_path: str = ...
self._download_set: ChromiumDownloadSetter = ...
self._browser_driver: ChromiumDriver = ...
self._rect: ChromiumTabRect = ...
def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None,
tab_id: str = None) -> None: ...
def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions], none) -> None: ...
def _page_init(self) -> None: ...
@property
def browser_driver(self) -> ChromiumDriver: ...
@property
def tabs_count(self) -> int: ...
@property
def tabs(self) -> List[str]: ...
@property
def rect(self) -> ChromiumTabRect: ...
@property
def wait(self) -> ChromiumPageWaiter: ...
@property
def main_tab(self) -> str: ...
@property
def latest_tab(self) -> str: ...
@property
def process_id(self) -> Union[None, int]: ...
@property
def set(self) -> ChromiumPageSetter: ...
@property
def download_set(self) -> ChromiumDownloadSetter: ...
@property
def download(self) -> DownloadKit: ...
@property
def download_path(self) -> str: ...
def get_tab(self, tab_id: str = None) -> ChromiumTab: ...
def find_tabs(self, title: str = None, url: str = None,
tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ...
def new_tab(self, url: str = None, switch_to: bool = False) -> str: ...
def to_main_tab(self) -> None: ...
def to_tab(self, tab_or_id: Union[str, ChromiumTab] = None, activate: bool = True) -> None: ...
def _to_tab(self, tab_or_id: Union[str, ChromiumTab] = None, activate: bool = True,
read_doc: bool = True) -> None: ...
def close_tabs(self, tabs_or_ids: Union[
str, ChromiumTab, List[Union[str, ChromiumTab]], Tuple[Union[str, ChromiumTab]]] = None,
others: bool = False) -> None: ...
def close_other_tabs(self, tabs_or_ids: Union[
str, ChromiumTab, List[Union[str, ChromiumTab]], Tuple[Union[str, ChromiumTab]]] = None) -> None: ...
def handle_alert(self, accept: bool = True, send: str = None, timeout: float = None) -> Union[str, False]: ...
def quit(self) -> None: ...
def _on_alert_close(self, **kwargs): ...
def _on_alert_open(self, **kwargs): ...
class ChromiumPageWaiter(ChromiumBaseWaiter):
_driver: ChromiumPage = ...
_listener: Union[NetworkListener, None] = ...
def download_begin(self, timeout: float = None) -> bool: ...
def new_tab(self, timeout: float = None) -> bool: ...
class ChromiumTabRect(object):
def __init__(self, page: ChromiumPage):
self._page: ChromiumPage = ...
@property
def window_state(self) -> str: ...
@property
def browser_location(self) -> Tuple[int, int]: ...
@property
def page_location(self) -> Tuple[int, int]: ...
@property
def viewport_location(self) -> Tuple[int, int]: ...
@property
def browser_size(self) -> Tuple[int, int]: ...
@property
def page_size(self) -> Tuple[int, int]: ...
@property
def viewport_size(self) -> Tuple[int, int]: ...
@property
def viewport_size_with_scrollbar(self) -> Tuple[int, int]: ...
def _get_page_rect(self) -> dict: ...
def _get_browser_rect(self) -> dict: ...
class ChromiumDownloadSetter(DownloadSetter):
def __init__(self, page: ChromiumPage):
self._page: ChromiumPage = ...
self._behavior: str = ...
self._download_th: Thread = ...
self._session: Session = None
self._waiting_download: bool = ...
self._download_begin: bool = ...
@property
def session(self) -> Session: ...
@property
def _switched_DownloadKit(self) -> DownloadKit: ...
def save_path(self, path: Union[str, Path]) -> None: ...
def by_browser(self) -> None: ...
def by_DownloadKit(self) -> None: ...
def wait_download_begin(self, timeout: float = None) -> bool: ...
def _cookies_to_session(self) -> None: ...
def _download_by_DownloadKit(self, **kwargs) -> None: ...
def _download_by_browser(self, **kwargs) -> None: ...
def _wait_download_complete(self) -> None: ...
class Alert(object):
def __init__(self):
self.activated: bool = ...
self.text: str = ...
self.type: str = ...
self.defaultPrompt: str = ...
self.response_accept: str = ...
self.response_text: str = ...
class WindowSetter(object):
def __init__(self, page: ChromiumPage):
self._page: ChromiumPage = ...
self._window_id: str = ...
def maximized(self) -> None: ...
def minimized(self) -> None: ...
def fullscreen(self) -> None: ...
def normal(self) -> None: ...
def size(self, width: int = None, height: int = None) -> None: ...
def location(self, x: int = None, y: int = None) -> None: ...
def hide(self) -> None: ...
def show(self) -> None: ...
def _get_info(self) -> dict: ...
def _perform(self, bounds: dict) -> None: ...
def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ...
def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ...
def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ...
class ChromiumPageSetter(ChromiumBaseSetter):
_page: ChromiumPage = ...
def main_tab(self, tab_id: str = None) -> None: ...
@property
def window(self) -> WindowSetter: ...
def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ...

View File

@ -1,14 +1,15 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
实用工具
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from FlowViewer import Listener, RequestMan
from ._elements.session_element import make_session_ele
from ._functions.by import By
from ._functions.keys import Keys
from ._functions.settings import Settings
from ._functions.tools import wait_until, configs_to_here
from ._units.actions import Actions
from .session_element import make_session_ele
from .action_chains import ActionChains
from .commons.keys import Keys
from .commons.by import By
from .commons.constants import Settings
__all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here']

View File

@ -1,235 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from json import load, dump, JSONDecodeError
from pathlib import Path
from platform import system
from subprocess import Popen, DEVNULL
from tempfile import gettempdir
from time import perf_counter, sleep
from requests import get as requests_get
from .tools import port_is_using
from ..configs.chromium_options import ChromiumOptions
from ..errors import BrowserConnectError
def connect_browser(option):
"""连接或启动浏览器
:param option: DriverOptions对象
:return: chrome 路径和进程对象组成的元组
"""
debugger_address = option.debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
chrome_path = option.browser_path
ip, port = debugger_address.split(':')
if ip != '127.0.0.1':
test_connect(ip, port)
return None, None
if port_is_using(ip, port):
test_connect(ip, port)
return None, None
args = get_launch_args(option)
set_prefs(option)
# ----------创建浏览器进程----------
try:
debugger = _run_browser(port, chrome_path, args)
# 传入的路径找不到主动在ini文件、注册表、系统变量中找
except FileNotFoundError:
from ..easy_set import get_chrome_path
chrome_path = get_chrome_path(show_msg=False)
if not chrome_path:
raise FileNotFoundError('无法找到chrome路径请手动配置。')
debugger = _run_browser(port, chrome_path, args)
test_connect(ip, port)
return chrome_path, debugger
def get_launch_args(opt):
"""从DriverOptions获取命令行启动参数
:param opt: DriverOptions或ChromiumOptions
:return: 启动参数列表
"""
# ----------处理arguments-----------
result = set()
has_user_path = False
headless = False
for i in opt.arguments:
if i.startswith(('--load-extension=', '--remote-debugging-port=')):
continue
elif i.startswith('--user-data-dir') and not opt.system_user_path:
result.add(f'--user-data-dir={Path(i[16:]).absolute()}')
has_user_path = True
continue
elif i.startswith('--headless'):
headless = True
result.add(i)
if not has_user_path and not opt.system_user_path:
port = opt.debugger_address.split(':')[-1] if opt.debugger_address else '0'
path = Path(gettempdir()) / 'DrissionPage' / f'userData_{port}'
path.mkdir(parents=True, exist_ok=True)
result.add(f'--user-data-dir={path}')
if not headless and system().lower() == 'linux':
from os import popen
r = popen('systemctl list-units | grep graphical.target')
if 'graphical.target' not in r.read():
result.add('--headless=new')
result = list(result)
# ----------处理插件extensions-------------
ext = opt.extensions if isinstance(opt, ChromiumOptions) else opt._extension_files
if ext:
ext = ','.join(set(ext))
ext = f'--load-extension={ext}'
result.append(ext)
return result
def set_prefs(opt):
"""处理启动配置中的prefs项目前只能对已存在文件夹配置
:param opt: DriverOptions或ChromiumOptions
:return: None
"""
if isinstance(opt, ChromiumOptions):
prefs = opt.preferences
del_list = opt._prefs_to_del
else:
prefs = opt.experimental_options.get('prefs', [])
del_list = []
if not opt.user_data_path:
return
args = opt.arguments
user = 'Default'
for arg in args:
if arg.startswith('--profile-directory'):
user = arg.split('=')[-1].strip()
break
prefs_file = Path(opt.user_data_path) / user / 'Preferences'
if not prefs_file.exists():
prefs_file.parent.mkdir(parents=True, exist_ok=True)
with open(prefs_file, 'w') as f:
f.write('{}')
with open(prefs_file, "r", encoding='utf-8') as f:
try:
prefs_dict = load(f)
except JSONDecodeError:
prefs_dict = {}
for pref in prefs:
value = prefs[pref]
pref = pref.split('.')
_make_leave_in_dict(prefs_dict, pref, 0, len(pref))
_set_value_to_dict(prefs_dict, pref, value)
for pref in del_list:
_remove_arg_from_dict(prefs_dict, pref)
with open(prefs_file, 'w', encoding='utf-8') as f:
dump(prefs_dict, f)
def test_connect(ip, port):
"""测试浏览器是否可用
:param ip: 浏览器ip
:param port: 浏览器端口
:return: None
"""
end_time = perf_counter() + 30
while perf_counter() < end_time:
try:
tabs = requests_get(f'http://{ip}:{port}/json', timeout=10).json()
for tab in tabs:
if tab['type'] == 'page':
return
except Exception:
sleep(.2)
if ip in ('127.0.0.1', 'localhost'):
raise BrowserConnectError(f'\n连接浏览器失败,可能原因:\n1、浏览器未启动\n2、{port}端口不是Chromium内核浏览器\n'
f'3、该浏览器未允许控制\n4、和已打开的浏览器冲突\n'
f'请尝试用ChromiumOptions指定别的端口和指定浏览器路径')
raise BrowserConnectError(f'{ip}:{port}浏览器无法链接。')
def _run_browser(port, path: str, args) -> Popen:
"""创建chrome进程
:param port: 端口号
:param path: 浏览器路径
:param args: 启动参数
:return: 进程对象
"""
p = Path(path)
p = str(p / 'chrome') if p.is_dir() else str(path)
arguments = [p, f'--remote-debugging-port={port}']
arguments.extend(args)
try:
return Popen(arguments, shell=False, stdout=DEVNULL, stderr=DEVNULL)
except FileNotFoundError:
raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。')
def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None:
"""把prefs中a.b.c形式的属性转为a['b']['c']形式
:param target_dict: 要处理的字典
:param src: 属性层级列表[a, b, c]
:param num: 当前处理第几个
:param end: src长度
:return: None
"""
if num == end:
return
if src[num] not in target_dict:
target_dict[src[num]] = {}
num += 1
_make_leave_in_dict(target_dict[src[num - 1]], src, num, end)
def _set_value_to_dict(target_dict: dict, src: list, value) -> None:
"""把a.b.c形式的属性的值赋值到a['b']['c']形式的字典中
:param target_dict: 要处理的字典
:param src: 属性层级列表[a, b, c]
:param value: 属性值
:return: None
"""
src = "']['".join(src)
src = f"target_dict['{src}']=value"
exec(src)
def _remove_arg_from_dict(target_dict: dict, arg: str) -> None:
"""把a.b.c形式的属性从字典中删除
:param target_dict: 要处理的字典
:param arg: 层级属性形式'a.b.c'
:return: None
"""
args = arg.split('.')
args = [f"['{i}']" for i in args]
src = ''.join(args)
src = f"target_dict{src}"
try:
exec(src)
src = ''.join(args[:-1])
src = f"target_dict{src}.pop({args[-1][1:-1]})"
exec(src)
except:
pass

View File

@ -1,18 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from typing import Union
from DrissionPage.configs.chromium_options import ChromiumOptions
from DrissionPage.configs.driver_options import DriverOptions
def connect_browser(option: Union[ChromiumOptions, DriverOptions]) -> tuple: ...
def get_launch_args(opt: Union[ChromiumOptions, DriverOptions]) -> list: ...
def set_prefs(opt: Union[ChromiumOptions, DriverOptions]) -> None: ...

View File

@ -1,40 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from ..errors import ElementNotFoundError
HANDLE_ALERT_METHOD = 'Page.handleJavaScriptDialog'
FRAME_ELEMENT = ('iframe', 'frame')
ERROR = 'error'
class Settings(object):
raise_ele_not_found = False
raise_click_failed = False
class NoneElement(object):
_instance = None
def __new__(cls):
if not cls._instance:
cls._instance = super(NoneElement, cls).__new__(cls)
return cls._instance
def __call__(self, *args, **kwargs):
raise ElementNotFoundError
def __getattr__(self, item):
raise ElementNotFoundError
def __eq__(self, other):
if other is None:
return True
def __bool__(self):
return False
def __repr__(self):
return 'None'

View File

@ -1,15 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from typing import Union
def get_loc(loc: Union[tuple, str], translate_css: bool = False) -> tuple: ...
def str_to_loc(loc: str) -> tuple: ...
def translate_loc(loc: tuple) -> tuple: ...

View File

@ -1,152 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from re import search, sub
from shutil import rmtree
from zipfile import ZipFile
def get_exe_from_port(port):
"""获取端口号第一条进程的可执行文件路径
:param port: 端口号
:return: 可执行文件的绝对路径
"""
from os import popen
pid = get_pid_from_port(port)
if not pid:
return
else:
file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n')
return file_lst[2].strip() if len(file_lst) > 2 else None
def get_pid_from_port(port):
"""获取端口号第一条进程的pid
:param port: 端口号
:return: 进程id
"""
from platform import system
if system().lower() != 'windows' or port is None:
return None
from os import popen
from time import perf_counter
try: # 避免Anaconda中可能产生的报错
process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0]
t = perf_counter()
while not process and perf_counter() - t < 5:
process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0]
return process.split(' ')[-1] or None
except Exception:
return None
def get_usable_path(path):
"""检查文件或文件夹是否有重名,并返回可以使用的路径
:param path: 文件或文件夹路径
:return: 可用的路径Path对象
"""
path = Path(path)
parent = path.parent
path = parent / make_valid_name(path.name)
name = path.stem if path.is_file() else path.name
ext = path.suffix if path.is_file() else ''
first_time = True
while path.exists():
r = search(r'(.*)_(\d+)$', name)
if not r or (r and first_time):
src_name, num = name, '1'
else:
src_name, num = r.group(1), int(r.group(2)) + 1
name = f'{src_name}_{num}'
path = parent / f'{name}{ext}'
first_time = None
return path
def make_valid_name(full_name):
"""获取有效的文件名
:param full_name: 文件名
:return: 可用的文件名
"""
# ----------------去除前后空格----------------
full_name = full_name.strip()
# ----------------使总长度不大于255个字符一个汉字是2个字符----------------
r = search(r'(.*)(\.[^.]+$)', full_name) # 拆分文件名和后缀名
if r:
name, ext = r.group(1), r.group(2)
ext_long = len(ext)
else:
name, ext = full_name, ''
ext_long = 0
while get_long(name) > 255 - ext_long:
name = name[:-1]
full_name = f'{name}{ext}'
# ----------------去除不允许存在的字符----------------
return sub(r'[<>/\\|:*?\n]', '', full_name)
def get_long(txt):
"""返回字符串中字符个数一个汉字是2个字符
:param txt: 字符串
:return: 字符个数
"""
txt_len = len(txt)
return int((len(txt.encode('utf-8')) - txt_len) / 2 + txt_len)
def port_is_using(ip, port):
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
from socket import socket, AF_INET, SOCK_STREAM
s = socket(AF_INET, SOCK_STREAM)
s.settimeout(.1)
result = s.connect_ex((ip, int(port)))
s.close()
return result == 0
def clean_folder(folder_path, ignore=None):
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
ignore = [] if not ignore else ignore
p = Path(folder_path)
for f in p.iterdir():
if f.name not in ignore:
if f.is_file():
f.unlink()
elif f.is_dir():
rmtree(f, True)
def unzip(zip_path, to_path):
"""解压下载的chromedriver.zip文件"""
if not zip_path:
return
with ZipFile(zip_path, 'r') as f:
return [f.extract(f.namelist()[0], path=to_path)]

View File

@ -1,31 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union
def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ...
def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ...
def get_usable_path(path: Union[str, Path]) -> Path: ...
def make_valid_name(full_name: str) -> str: ...
def get_long(txt) -> int: ...
def port_is_using(ip: str, port: Union[str, int]) -> bool: ...
def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ...
def unzip(zip_path: str, to_path: str) -> Union[list, None]: ...

View File

@ -1,108 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from http.cookiejar import Cookie
from typing import Union
from requests import Session
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
from DrissionPage.base import DrissionElement, BasePage
from DrissionPage.chromium_element import ChromiumElement
from DrissionPage.chromium_base import ChromiumBase
class ResponseData(object):
def __init__(self, request_id: str, response: dict, body: str, tab: str, target: str):
self.requestId: str = ...
self.response: CaseInsensitiveDict = ...
self.rawBody: str = ...
self._body: Union[str, dict, bytes] = ...
self._base64_body: bool = ...
self.tab: str = ...
self.target: str = ...
self.method: str = ...
self._postData: dict = ...
self._rawPostData: str = ...
self.url: str = ...
self.status: str = ...
self.statusText: str = ...
self.headersText: str = ...
self.mimeType: str = ...
self.requestHeadersText: str = ...
self.connectionReused: str = ...
self.connectionId: str = ...
self.remoteIPAddress: str = ...
self.remotePort: str = ...
self.fromDiskCache: str = ...
self.fromServiceWorker: str = ...
self.fromPrefetchCache: str = ...
self.encodedDataLength: str = ...
self.timing: str = ...
self.serviceWorkerResponseSource: str = ...
self.responseTime: str = ...
self.cacheStorageCacheName: str = ...
self.protocol: str = ...
self.securityState: str = ...
self.securityDetails: str = ...
def __getattr__(self, item: str) -> Union[str, None]: ...
def __getitem__(self, item: str) -> Union[str, None]: ...
def __repr__(self) -> str: ...
@property
def headers(self) -> Union[CaseInsensitiveDict, None]: ...
@property
def requestHeaders(self) -> Union[CaseInsensitiveDict, None]: ...
@requestHeaders.setter
def requestHeaders(self, val: dict) -> None: ...
@property
def postData(self) -> Union[dict, str, None]: ...
@postData.setter
def postData(self, val: Union[str, dict]) -> None: ...
@property
def body(self) -> Union[str, dict, bytes]: ...
def get_ele_txt(e: DrissionElement) -> str: ...
def format_html(text: str) -> str: ...
def location_in_viewport(page: ChromiumBase, loc_x: int, loc_y: int) -> bool: ...
def offset_scroll(ele: ChromiumElement, offset_x: int, offset_y: int) -> tuple: ...
def make_absolute_link(link, page: BasePage = None) -> str: ...
def is_js_func(func: str) -> bool: ...
def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ...
def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ...
def set_session_cookies(session: Session, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ...

View File

@ -1,118 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, Tuple, Any
class ChromiumOptions(object):
def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None):
self.ini_path: str = ...
self._driver_path: str = ...
self._user_data_path: str = ...
self._download_path: str = ...
self._arguments: list = ...
self._binary_location: str = ...
self._user: str = ...
self._page_load_strategy: str = ...
self._timeouts: dict = ...
self._proxy: str = ...
self._debugger_address: str = ...
self._extensions: list = ...
self._prefs: dict = ...
self._prefs_to_del: list = ...
self._auto_port: bool = ...
self._system_user_path: bool = ...
@property
def download_path(self) -> str: ...
@property
def browser_path(self) -> str: ...
@property
def user_data_path(self) -> str: ...
@property
def user(self) -> str: ...
@property
def page_load_strategy(self) -> str: ...
@property
def timeouts(self) -> dict: ...
@property
def proxy(self) -> str: ...
@property
def debugger_address(self) -> str: ...
@property
def arguments(self) -> list: ...
@debugger_address.setter
def debugger_address(self, address: str): ...
@property
def extensions(self) -> list: ...
@property
def preferences(self) -> dict: ...
@property
def system_user_path(self) -> bool: ...
def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ...
def remove_argument(self, value: str) -> ChromiumOptions: ...
def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ...
def remove_extensions(self) -> ChromiumOptions: ...
def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ...
def remove_pref(self, arg: str) -> ChromiumOptions: ...
def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ...
def set_timeouts(self, implicit: float = None, pageLoad: float = None,
script: float = None) -> ChromiumOptions: ...
def set_user(self, user: str = 'Default') -> ChromiumOptions: ...
def set_headless(self, on_off: bool = True) -> ChromiumOptions: ...
def set_no_imgs(self, on_off: bool = True) -> ChromiumOptions: ...
def set_no_js(self, on_off: bool = True) -> ChromiumOptions: ...
def set_mute(self, on_off: bool = True) -> ChromiumOptions: ...
def set_user_agent(self, user_agent: str) -> ChromiumOptions: ...
def set_proxy(self, proxy: str) -> ChromiumOptions: ...
def set_page_load_strategy(self, value: str) -> ChromiumOptions: ...
def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None,
debugger_address: str = None, download_path: Union[str, Path] = None,
user_data_path: Union[str, Path] = None, cache_path: Union[str, Path] = None) -> ChromiumOptions: ...
def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: ...
def auto_port(self, on_off: bool = True) -> ChromiumOptions: ...
def save(self, path: Union[str, Path] = None) -> str: ...
def save_to_default(self) -> str: ...
class PortFinder(object):
used_port: list = ...
@staticmethod
def get_port() -> Tuple[int, str]: ...

View File

@ -1,364 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from selenium.webdriver.chrome.options import Options
from .options_manage import OptionsManager
class DriverOptions(Options):
"""chrome浏览器配置类继承自selenium.webdriver.chrome.options的Options类
增加了删除配置和保存到文件方法
"""
def __init__(self, read_file=True, ini_path=None):
"""初始化,默认从文件读取设置
:param read_file: 是否从默认ini文件中读取配置信息
:param ini_path: ini文件路径为None则读取默认ini文件
"""
super().__init__()
self._user_data_path = None
if read_file:
self.ini_path = str(ini_path) if ini_path else str(Path(__file__).parent / 'configs.ini')
om = OptionsManager(self.ini_path)
options_dict = om.chrome_options
self._driver_path = om.paths.get('chromedriver_path', None)
self._download_path = om.paths.get('download_path', None)
self._binary_location = options_dict.get('binary_location', '')
self._arguments = options_dict.get('arguments', [])
self._extensions = options_dict.get('extensions', [])
self._experimental_options = options_dict.get('experimental_options', {})
self._debugger_address = options_dict.get('debugger_address', None)
self.page_load_strategy = options_dict.get('page_load_strategy', 'normal')
self.system_user_path = options_dict.get('system_user_path', False)
for arg in self._arguments:
if arg.startswith('--user-data-dir='):
self.set_paths(user_data_path=arg[16:])
break
self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30})
return
self._driver_path = None
self._download_path = None
self.ini_path = None
self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30}
self._debugger_address = '127.0.0.1:9222'
self.system_user_path = False
@property
def driver_path(self):
"""chromedriver文件路径"""
return self._driver_path
@property
def download_path(self):
"""默认下载路径文件路径"""
return self._download_path
@property
def chrome_path(self):
"""浏览器启动文件路径"""
return self.browser_path
@property
def browser_path(self):
"""浏览器启动文件路径"""
return self.binary_location or 'chrome'
@property
def user_data_path(self):
"""返回用户文件夹路径"""
return self._user_data_path
# -------------重写父类方法,实现链式操作-------------
def add_argument(self, argument):
"""添加一个配置项
:param argument: 配置项内容
:return: 当前对象
"""
super().add_argument(argument)
return self
def set_capability(self, name, value):
"""设置一个capability
:param name: capability名称
:param value: capability值
:return: 当前对象
"""
super().set_capability(name, value)
return self
def add_extension(self, extension):
"""添加插件
:param extension: crx文件路径
:return: 当前对象
"""
super().add_extension(extension)
return self
def add_encoded_extension(self, extension):
"""将带有扩展数据的 Base64 编码字符串添加到将用于将其提取到 ChromeDriver 的列表中
:param extension: 带有扩展数据的 Base64 编码字符串
:return: 当前对象
"""
super().add_encoded_extension(extension)
return self
def add_experimental_option(self, name, value):
"""添加一个实验选项到浏览器
:param name: 选项名称
:param value: 选项值
:return: 当前对象
"""
super().add_experimental_option(name, value)
return self
# -------------重写父类方法结束-------------
def save(self, path=None):
"""保存设置到文件
:param path: ini文件的路径 None 保存到当前读取的配置文件传入 'default' 保存到默认ini文件
:return: 保存文件的绝对路径
"""
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
elif path is None:
if self.ini_path:
path = Path(self.ini_path).absolute()
else:
path = (Path(__file__).parent / 'configs.ini').absolute()
else:
path = Path(path).absolute()
path = path / 'config.ini' if path.is_dir() else path
if path.exists():
om = OptionsManager(str(path))
else:
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
options = self.as_dict()
for i in options:
if i == 'driver_path':
om.set_item('paths', 'chromedriver_path', options[i])
elif i == 'download_path':
om.set_item('paths', 'download_path', options[i])
else:
om.set_item('chrome_options', i, options[i])
path = str(path)
om.save(path)
return path
def save_to_default(self):
"""保存当前配置到默认ini文件"""
return self.save('default')
def remove_argument(self, value):
"""移除一个argument项
:param value: 设置项名有值的设置项传入设置名称即可
:return: 当前对象
"""
del_list = []
for argument in self._arguments:
if argument.startswith(value):
del_list.append(argument)
for del_arg in del_list:
self._arguments.remove(del_arg)
return self
def remove_experimental_option(self, key):
"""移除一个实验设置传入key值删除
:param key: 实验设置的名称
:return: 当前对象
"""
if key in self._experimental_options:
self._experimental_options.pop(key)
return self
def remove_all_extensions(self):
"""移除所有插件
:return: 当前对象
"""
# 因插件是以整个文件储存,难以移除其中一个,故如须设置则全部移除再重设
self._extensions = []
return self
def set_argument(self, arg, value):
"""设置浏览器配置的argument属性
:param arg: 属性名
:param value: 属性值有值的属性传入值没有的传入bool
:return: 当前对象
"""
self.remove_argument(arg)
if value:
arg_str = arg if isinstance(value, bool) else f'{arg}={value}'
self.add_argument(arg_str)
return self
def set_timeouts(self, implicit=None, pageLoad=None, script=None):
"""设置超时时间设置单位为秒selenium4以上版本有效
:param implicit: 查找元素超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
if implicit is not None:
self.timeouts['implicit'] = implicit
if pageLoad is not None:
self.timeouts['pageLoad'] = pageLoad
if script is not None:
self.timeouts['script'] = script
return self
def set_headless(self, on_off=True):
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:return: 当前对象
"""
on_off = True if on_off else False
return self.set_argument('--headless', on_off)
def set_no_imgs(self, on_off=True):
"""设置是否加载图片
:param on_off: 开或关
:return: 当前对象
"""
on_off = True if on_off else False
return self.set_argument('--blink-settings=imagesEnabled=false', on_off)
def set_no_js(self, on_off=True):
"""设置是否禁用js
:param on_off: 开或关
:return: 当前对象
"""
on_off = True if on_off else False
return self.set_argument('--disable-javascript', on_off)
def set_mute(self, on_off=True):
"""设置是否静音
:param on_off: 开或关
:return: 当前对象
"""
on_off = True if on_off else False
return self.set_argument('--mute-audio', on_off)
def set_user_agent(self, user_agent):
"""设置user agent
:param user_agent: user agent文本
:return: 当前对象
"""
return self.set_argument('--user-agent', user_agent)
def set_proxy(self, proxy):
"""设置代理
:param proxy: 代理url和端口
:return: 当前对象
"""
return self.set_argument('--proxy-server', proxy)
def set_page_load_strategy(self, value):
"""设置page_load_strategy可接收 'normal', 'eager', 'none'
selenium4以上版本才支持此功能
normal默认情况下使用, 等待所有资源下载完成
eagerDOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中
none完全不阻塞WebDriver
:param value: 可接收 'normal', 'eager', 'none'
:return: 当前对象
"""
if value not in ('normal', 'eager', 'none'):
raise ValueError("只能选择'normal', 'eager', 'none'")
self.page_load_strategy = value.lower()
return self
def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None,
debugger_address=None, download_path=None, user_data_path=None, cache_path=None):
"""快捷的路径设置函数
:param driver_path: chromedriver.exe路径
:param chrome_path: chrome.exe路径
:param browser_path: 浏览器可执行文件路径
:param local_port: 本地端口号
:param debugger_address: 调试浏览器地址127.0.0.1:9222
:param download_path: 下载文件路径
:param user_data_path: 用户数据路径
:param cache_path: 缓存路径
:return: 当前对象
"""
if driver_path is not None:
self._driver_path = str(driver_path)
if chrome_path is not None:
self.binary_location = str(chrome_path)
if browser_path is not None:
self.binary_location = str(browser_path)
if local_port is not None:
self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}'
if debugger_address is not None:
self.debugger_address = debugger_address
if download_path is not None:
self._download_path = str(download_path)
if user_data_path is not None:
self.set_argument('--user-data-dir', str(user_data_path))
self._user_data_path = user_data_path
if cache_path is not None:
self.set_argument('--disk-cache-dir', str(cache_path))
return self
def as_dict(self):
"""已dict方式返回所有配置信息"""
return chrome_options_to_dict(self)
def chrome_options_to_dict(options):
"""把chrome配置对象转换为字典
:param options: chrome配置对象字典或DriverOptions对象
:return: 配置字典
"""
if options in (False, None):
return DriverOptions(read_file=False).as_dict()
if isinstance(options, dict):
return options
re_dict = dict()
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path',
'page_load_strategy', 'download_path']
options_dir = options.__dir__()
for attr in attrs:
try:
re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None
except Exception:
pass
if 'timeouts' in options_dir and 'timeouts' in options._caps:
timeouts = options.__getattribute__('timeouts')
re_dict['timeouts'] = timeouts
return re_dict

View File

@ -1,89 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, List
from selenium.webdriver.chrome.options import Options
class DriverOptions(Options):
def __init__(self, read_file: bool = True, ini_path: Union[str, Path] = None):
self.ini_path: str = ...
self._driver_path: str = ...
self._user_data_path: str = ...
self._download_path: str = ...
@property
def driver_path(self) -> str: ...
@property
def download_path(self) -> str: ...
@property
def chrome_path(self) -> str: ...
@property
def browser_path(self) -> str: ...
@property
def user_data_path(self) -> str: ...
# -------------重写父类方法,实现链式操作-------------
def add_argument(self, argument: str) -> DriverOptions: ...
def set_capability(self, name: str, value: str) -> DriverOptions: ...
def add_extension(self, extension: str) -> DriverOptions: ...
def add_encoded_extension(self, extension: str) -> DriverOptions: ...
def add_experimental_option(self, name: str, value: Union[str, int, dict, List[str]]) -> DriverOptions: ...
# -------------重写父类方法结束-------------
def save(self, path: str = None) -> str: ...
def save_to_default(self) -> str: ...
def remove_argument(self, value: str) -> DriverOptions: ...
def remove_experimental_option(self, key: str) -> DriverOptions: ...
def remove_all_extensions(self) -> DriverOptions: ...
def set_argument(self, arg: str, value: Union[bool, str]) -> DriverOptions: ...
def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> DriverOptions: ...
def set_headless(self, on_off: bool = True) -> DriverOptions: ...
def set_no_imgs(self, on_off: bool = True) -> DriverOptions: ...
def set_no_js(self, on_off: bool = True) -> DriverOptions: ...
def set_mute(self, on_off: bool = True) -> DriverOptions: ...
def set_user_agent(self, user_agent: str) -> DriverOptions: ...
def set_proxy(self, proxy: str) -> DriverOptions: ...
def set_page_load_strategy(self, value: str) -> DriverOptions: ...
def set_paths(self,
driver_path: Union[str, Path] = None,
chrome_path: Union[str, Path] = None,
browser_path: Union[str, Path] = None,
local_port: Union[int, str] = None,
debugger_address: str = None,
download_path: str = None,
user_data_path: str = None,
cache_path: str = None) -> DriverOptions: ...
def as_dict(self) -> dict: ...
def chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: ...

View File

@ -1,430 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from os import popen
from pathlib import Path
from re import search
from typing import Union
from .commons.constants import Settings
from .commons.tools import unzip
from .configs.chromium_options import ChromiumOptions
from .configs.options_manage import OptionsManager
from .session_page import SessionPage
try:
from selenium import webdriver
from .mixpage.drission import Drission
from .configs.driver_options import DriverOptions
except ModuleNotFoundError:
pass
def raise_when_ele_not_found(on_off=True):
"""设置全局变量,找不到元素时是否抛出异常
:param on_off: True False
:return: None
"""
Settings.raise_ele_not_found = on_off
def configs_to_here(save_name=None):
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
om = OptionsManager('default')
save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini'
om.save(save_name)
def show_settings(ini_path=None):
"""打印ini文件内容
:param ini_path: ini文件路径
:return: None
"""
OptionsManager(ini_path).show()
def set_paths(driver_path=None,
chrome_path=None,
browser_path=None,
local_port=None,
debugger_address=None,
download_path=None,
user_data_path=None,
cache_path=None,
ini_path=None,
check_version=False):
"""快捷的路径设置函数
:param driver_path: chromedriver.exe路径
:param chrome_path: 浏览器可执行文件路径
:param browser_path: 浏览器可执行文件路径
:param local_port: 本地端口号
:param debugger_address: 调试浏览器地址127.0.0.1:9222
:param download_path: 下载文件路径
:param user_data_path: 用户数据路径
:param cache_path: 缓存路径
:param ini_path: 要修改的ini文件路径
:param check_version: 是否检查chromedriver和chrome是否匹配
:return: None
"""
om = OptionsManager(ini_path)
def format_path(path: str) -> str:
return str(path) if path else ''
if driver_path is not None:
om.set_item('paths', 'chromedriver_path', format_path(driver_path))
if chrome_path is not None:
om.set_item('chrome_options', 'binary_location', format_path(chrome_path))
if browser_path is not None:
om.set_item('chrome_options', 'binary_location', format_path(browser_path))
if local_port is not None:
om.set_item('chrome_options', 'debugger_address', f'127.0.0.1:{local_port}')
if debugger_address is not None:
address = debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
om.set_item('chrome_options', 'debugger_address', address)
if download_path is not None:
om.set_item('paths', 'download_path', format_path(download_path))
om.save()
if user_data_path is not None:
set_argument('--user-data-dir', format_path(user_data_path), ini_path)
if cache_path is not None:
set_argument('--disk-cache-dir', format_path(cache_path), ini_path)
if check_version:
check_driver_version(format_path(driver_path), format_path(browser_path))
def use_auto_port(on_off=True, ini_path=None):
"""设置启动浏览器时使用自动分配的端口和临时文件夹
:param on_off: 是否开启自动端口
:param ini_path: 要修改的ini文件路径
:return: None
"""
if not isinstance(on_off, bool):
raise TypeError('on_off参数只能输入bool值。')
om = OptionsManager(ini_path)
om.set_item('chrome_options', 'auto_port', on_off)
om.save()
def use_system_user_path(on_off=True, ini_path=None):
"""设置是否使用系统安装的浏览器默认用户文件夹
:param on_off: 开或关
:param ini_path: 要修改的ini文件路径
:return: 当前对象
"""
if not isinstance(on_off, bool):
raise TypeError('on_off参数只能输入bool值。')
om = OptionsManager(ini_path)
om.set_item('chrome_options', 'system_user_path', on_off)
om.save()
def set_argument(arg, value=None, ini_path=None):
"""设置浏览器配置argument属性
:param arg: 属性名
:param value: 属性值有值的属性传入值没有的传入None
:param ini_path: 要修改的ini文件路径
:return: None
"""
co = ChromiumOptions(ini_path=ini_path)
co.set_argument(arg, value)
co.save()
def set_headless(on_off=True, ini_path=None):
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:param ini_path: 要修改的ini文件路径
:return: None
"""
on_off = 'new' if on_off else False
set_argument('--headless', on_off, ini_path)
def set_no_imgs(on_off=True, ini_path=None):
"""设置是否禁止加载图片
:param on_off: 开或关
:param ini_path: 要修改的ini文件路径
:return: None
"""
on_off = None if on_off else False
set_argument('--blink-settings=imagesEnabled=false', on_off, ini_path)
def set_no_js(on_off=True, ini_path=None):
"""设置是否禁用js
:param on_off: 开或关
:param ini_path: 要修改的ini文件路径
:return: None
"""
on_off = None if on_off else False
set_argument('--disable-javascript', on_off, ini_path)
def set_mute(on_off=True, ini_path=None):
"""设置是否静音
:param on_off: 开或关
:param ini_path: 要修改的ini文件路径
:return: None
"""
on_off = None if on_off else False
set_argument('--mute-audio', on_off, ini_path)
def set_user_agent(user_agent, ini_path=None):
"""设置user agent
:param user_agent: user agent文本
:param ini_path: 要修改的ini文件路径
:return: None
"""
set_argument('--user-agent', user_agent, ini_path)
def set_proxy(proxy, ini_path=None):
"""设置代理
:param proxy: 代理网址和端口
:param ini_path: 要修改的ini文件路径
:return: None
"""
set_argument('--proxy-server', proxy, ini_path)
def check_driver_version(driver_path=None, chrome_path=None):
"""检查传入的chrome和chromedriver是否匹配
:param driver_path: chromedriver.exe路径
:param chrome_path: chrome.exe路径
:return: 是否匹配
"""
print('正在检测可用性...')
om = OptionsManager()
driver_path = driver_path or om.get_value('paths', 'chromedriver_path') or 'chromedriver'
chrome_path = str(chrome_path or om.get_value('chrome_options', 'binary_location'))
do = DriverOptions(read_file=False)
do.add_argument('--headless')
if chrome_path:
do.binary_location = chrome_path
try:
driver = webdriver.Chrome(driver_path, options=do)
driver.quit()
print('版本匹配,可正常使用。')
return True
except Exception as e:
print(f'出现异常:\n{e}\n可执行easy_set.get_match_driver()自动下载匹配的版本。\n'
f'或自行从以下网址下载http://npm.taobao.org/mirrors/chromedriver/')
return False
# -------------------------自动识别chrome版本号并下载对应driver------------------------
def get_match_driver(ini_path='default',
save_path=None,
chrome_path=None,
show_msg=True,
check_version=True):
"""自动识别chrome版本并下载匹配的driver
:param ini_path: 要读取和修改的ini文件路径
:param save_path: chromedriver保存路径
:param chrome_path: 指定chrome.exe位置
:param show_msg: 是否打印信息
:param check_version: 是否检查版本匹配
:return: None
"""
save_path = save_path or str(Path(__file__).parent)
chrome_path = chrome_path or get_chrome_path(ini_path, show_msg)
chrome_path = Path(chrome_path).absolute() if chrome_path else None
if show_msg:
print('chrome.exe路径', chrome_path)
ver = _get_chrome_version(str(chrome_path))
if show_msg:
print('version', ver)
zip_path = _download_driver(ver, save_path, show_msg=show_msg)
if not zip_path and show_msg:
print('没有找到对应版本的driver。')
try:
driver_path = unzip(zip_path, save_path)[0]
except TypeError:
driver_path = None
if show_msg:
print('解压路径', driver_path)
if driver_path:
Path(zip_path).unlink()
if ini_path:
set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False)
if check_version:
if not check_driver_version(driver_path, chrome_path) and show_msg:
print('获取失败,请手动配置。')
else:
if show_msg:
print('获取失败,请手动配置。')
return driver_path
def get_chrome_path(ini_path=None,
show_msg=True,
from_ini=True,
from_regedit=True,
from_system_path=True):
"""从ini文件或系统变量中获取chrome.exe的路径
:param ini_path: ini文件路径
:param show_msg: 是否打印信息
:param from_ini: 是否从ini文件获取
:param from_regedit: 是否从注册表获取
:param from_system_path: 是否从系统路径获取
:return: chrome.exe路径
"""
# -----------从ini文件中获取--------------
if ini_path and from_ini:
try:
path = OptionsManager(ini_path).chrome_options['binary_location']
except KeyError:
path = None
else:
path = None
if path and Path(path).is_file():
if show_msg:
print('ini文件中', end='')
return str(path)
from platform import system
sys = system().lower()
if sys in ('macos', 'darwin'):
return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
elif sys == 'linux':
paths = ('/usr/bin/google-chrome', '/opt/google/chrome/google-chrome',
'/user/lib/chromium-browser/chromium-browser')
for p in paths:
if Path(p).exists():
return p
return None
elif sys != 'windows':
return None
# -----------从注册表中获取--------------
if from_regedit:
import winreg
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe',
reserved=0, access=winreg.KEY_READ)
# key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Google\Chrome\BLBeacon\version',
# reserved=0, access=winreg.KEY_READ)
k = winreg.EnumValue(key, 0)
winreg.CloseKey(key)
if show_msg:
print('注册表中', end='')
return k[1]
except FileNotFoundError:
pass
# -----------从系统变量中获取--------------
if from_system_path:
try:
paths = popen('set path').read().lower()
except:
return None
r = search(r'[^;]*chrome[^;]*', paths)
if r:
path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe'
if path.exists():
if show_msg:
print('系统变量中', end='')
return str(path)
paths = paths.split(';')
for path in paths:
path = Path(path) / 'chrome.exe'
try:
if path.exists():
if show_msg:
print('系统变量中', end='')
return str(path)
except OSError:
pass
def _get_chrome_version(path: str) -> Union[str, None]:
"""根据文件路径获取版本号
:param path: chrome.exe文件路径
:return: 版本号
"""
if not path:
return
path = str(path).replace('\\', '\\\\')
try:
return (popen(f'wmic datafile where "name=\'{path}\'" get version').read()
.lower().split('\n')[2].replace(' ', ''))
except Exception:
return None
def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]:
"""根据传入的版本号到镜像网站查找,下载最相近的
:param version: 本地版本号
:return: 保存地址
"""
if not version:
return
main_ver = version.split('.')[0]
remote_ver = None
page = SessionPage(Drission().session)
page.get('https://registry.npmmirror.com/-/binary/chromedriver/')
for version in page.json:
# 遍历所有版本跳过大版本不一致的如果有完全匹配的获取url如果没有获取最后一个版本的url
if not version['name'].startswith(f'{main_ver}.'):
continue
remote_ver = version['name']
if version['name'] == f'{version}/':
break
if remote_ver:
url = f'https://cdn.npmmirror.com/binaries/chromedriver/{remote_ver}chromedriver_win32.zip'
save_path = save_path or str(Path(__file__).parent)
result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg)
if result[0]:
return result[1]
return None

View File

@ -1,73 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union
def raise_when_ele_not_found(on_off: bool = True) -> None: ...
def configs_to_here(file_name: Union[Path, str] = None) -> None: ...
def show_settings(ini_path: Union[str, Path] = None) -> None: ...
def set_paths(driver_path: Union[str, Path] = None,
chrome_path: Union[str, Path] = None,
browser_path: Union[str, Path] = None,
local_port: Union[int, str] = None,
debugger_address: str = None,
download_path: Union[str, Path] = None,
user_data_path: Union[str, Path] = None,
cache_path: Union[str, Path] = None,
ini_path: Union[str, Path] = None,
check_version: bool = False) -> None: ...
def use_auto_port(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def use_system_user_path(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def set_argument(arg: str, value: Union[bool, str] = None, ini_path: Union[str, Path] = None) -> None: ...
def set_headless(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def set_no_imgs(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def set_no_js(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def set_mute(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ...
def set_user_agent(user_agent: str, ini_path: Union[str, Path] = None) -> None: ...
def set_proxy(proxy: str, ini_path: Union[str, Path] = None) -> None: ...
def check_driver_version(driver_path: Union[str, Path] = None, chrome_path: str = None) -> bool: ...
# -------------------------自动识别chrome版本号并下载对应driver------------------------
def get_match_driver(ini_path: Union[str, None] = 'default',
save_path: str = None,
chrome_path: str = None,
show_msg: bool = True,
check_version: bool = True) -> Union[str, None]: ...
def get_chrome_path(ini_path: str = None,
show_msg: bool = True,
from_ini: bool = True,
from_regedit: bool = True,
from_system_path: bool = True, ) -> Union[str, None]: ...

View File

@ -1,39 +1,54 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
class BaseError(Exception):
_info = None
def __init__(self, ErrorInfo=None):
super().__init__(self) # 初始化父类
self._info = ErrorInfo or self._info
def __str__(self):
return self._info
class ElementNotFoundError(BaseError):
_info = '\n没有找到元素。'
def __init__(self, ErrorInfo=None, method=None, arguments=None):
super().__init__(ErrorInfo=ErrorInfo)
self.method = method
self.arguments = arguments
def __str__(self):
method = f'\nmethod: {self.method}' if self.method else ''
arguments = f'\nargs: {self.arguments}' if self.arguments else ''
return f'{self._info}{method}{arguments}'
class AlertExistsError(BaseError):
_info = '存在未处理的提示框。'
class ContextLossError(BaseError):
class ContextLostError(BaseError):
_info = '页面被刷新,请操作前尝试等待页面刷新或加载完成。'
class ElementLossError(BaseError):
_info = '元素对象因刷新已失效。'
class ElementLostError(BaseError):
_info = '元素对象已失效。可能是页面整体刷新或js局部刷新把元素替换或去除了'
class CallMethodError(BaseError):
class CDPError(BaseError):
_info = '方法调用错误。'
class TabClosedError(BaseError):
_info = '标签页已关闭。'
class ElementNotFoundError(BaseError):
_info = '没有找到元素。'
class PageDisconnectedError(BaseError):
_info = '与页面的连接已断开。'
class JavaScriptError(BaseError):
@ -54,3 +69,23 @@ class NoResourceError(BaseError):
class CanNotClickError(BaseError):
_info = '该元素无法滚动到视口或被遮挡,无法点击。'
class GetDocumentError(BaseError):
_info = '获取文档失败。'
class WaitTimeoutError(BaseError):
_info = '等待失败。'
class WrongURLError(BaseError):
_info = '无效的url。'
class StorageError(BaseError):
_info = '无法操作当前存储数据。'
class CookieFormatError(BaseError):
_info = 'cookie格式不正确。'

View File

@ -1,131 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from typing import Union, List, Tuple
from lxml.html import HtmlElement
from .base import DrissionElement, BaseElement
from .chromium_base import ChromiumBase
from .chromium_element import ChromiumElement
from .chromium_frame import ChromiumFrame
from .commons.constants import NoneElement
from mixpage.driver_element import DriverElement
from mixpage.driver_page import DriverPage
from .session_page import SessionPage
class SessionElement(DrissionElement):
def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None):
self._inner_ele: HtmlElement = ...
self.page: SessionPage = ...
@property
def inner_ele(self) -> HtmlElement: ...
def __repr__(self) -> str: ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union['SessionElement', str, None]: ...
@property
def tag(self) -> str: ...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def text(self) -> str: ...
@property
def raw_text(self) -> str: ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ...
def child(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union['SessionElement', str, None]: ...
def prev(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union['SessionElement', str, None]: ...
def next(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union['SessionElement', str, None]: ...
def before(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union['SessionElement', str, None]: ...
def after(self, filter_loc: Union[tuple, str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union['SessionElement', str, None]: ...
def children(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union['SessionElement', str]]: ...
def prevs(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union['SessionElement', str]]: ...
def nexts(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union['SessionElement', str]]: ...
def befores(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union['SessionElement', str]]: ...
def afters(self, filter_loc: Union[tuple, str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union['SessionElement', str]]: ...
def attr(self, attr: str) -> Union[str, None]: ...
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union['SessionElement', str, NoneElement]: ...
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union['SessionElement', str]]: ...
def s_ele(self,
loc_or_str: Union[Tuple[str, str], str] = None) -> Union['SessionElement', str, NoneElement]: ...
def s_eles(self,
loc_or_str: Union[Tuple[str, str], str]) -> List[Union['SessionElement', str]]: ...
def _find_elements(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None,
single: bool = True,
relative: bool = False,
raise_err: bool = None) \
-> Union['SessionElement', str, NoneElement, List[Union['SessionElement', str]]]: ...
def _get_ele_path(self, mode: str) -> str: ...
def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, DriverElement, BaseElement,
ChromiumFrame, ChromiumBase, DriverPage],
loc: Union[str, Tuple[str, str]] = None,
single: bool = True) -> Union[
SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ...

View File

@ -1,4 +1,4 @@
include DrissionPage/configs/configs.ini
include DrissionPage/_configs/configs.ini
include DrissionPage/*.pyi
include DrissionPage/*/*.py
include DrissionPage/*/*.pyi

View File

@ -22,34 +22,20 @@ DrissionPage 是一个基于 python 的网页自动化工具。
支持系统Windows、Linux、Mac
python 版本3.6 及以上
python 版本3.8 及以上
支持浏览器Chromium 内核浏览器(如 Chrome 和 Edge)electron 应用
---
# 🛠 如何使用
**📖 使用文档:** [点击查看](https://g1879.gitee.io/drissionpagedocs)
**交流 QQ 群:** 636361957
---
# 🔥 新版尝鲜
4.0 在 3.x 的基础上对底层进行了大幅重构,新增大量功能,改善运行效率和稳定性,优化项目结构,解决很多存在的问题。对比旧版本有质的提高。
现已发布 beta 版,欢迎尝鲜。
[4.0功能介绍](https://g1879.gitee.io/drissionpagedocs/whatsnew/4_0/)
安装目前是b14关注文档可能会有更新版本
```console
pip install DrissionPage==4.0.0b14
```
---
# 📕 背景
用 requests 做数据采集面对要登录的网站时要分析数据包、JS 源码构造复杂的请求往往还要应付验证码、JS 混淆、签名参数等反爬手段,门槛较高,开发效率不高。
@ -64,7 +50,7 @@ pip install DrissionPage==4.0.0b14
# 💡 理念
简洁!易用 !方便
简洁而强大
---
@ -118,7 +104,7 @@ pip install DrissionPage==4.0.0b14
- 还有很多细节,这里不一一列举,欢迎实际使用中体验:)
---
---
# 🔖 版本历史

View File

@ -1,8 +1,8 @@
requests
lxml
cssselect
DownloadKit>=1.0.0
FlowViewer>=0.3.0
websocket-client
DownloadKit>=2.0.0
websocket-client>=1.7.0
click
tldextract
tldextract
psutil

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup(
name="DrissionPage",
version="3.2.35",
version="4.0.1",
author="g1879",
author_email="g1879@qq.com",
description="Python based web automation tool. It can control the browser and send and receive data packets.",
@ -22,19 +22,19 @@ setup(
'lxml',
'requests',
'cssselect',
'DownloadKit>=1.0.0',
'FlowViewer>=0.3.0',
'websocket-client',
'DownloadKit>=2.0.0',
'websocket-client>=1.7.0',
'click',
'tldextract'
'tldextract',
'psutil'
],
classifiers=[
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.8",
"Development Status :: 4 - Beta",
"Topic :: Utilities",
"License :: OSI Approved :: BSD License",
],
python_requires='>=3.6',
python_requires='>=3.8',
entry_points={
'console_scripts': [
'dp = DrissionPage.commons.cli:main',