Merge pull request !53 from g1879/dev
This commit is contained in:
g1879 2024-12-06 11:08:20 +00:00 committed by Gitee
commit 1de7c66f42
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
92 changed files with 23056 additions and 7688 deletions

View File

@ -1,11 +1,17 @@
在提交issue前请确认已经给本库点了星星这对我来说很重要。
使用方法请查看[使用文档](http://drissionpage.cn),文档里都有。
也可在QQ群里提问636361957
使用问题作者可能不能及时处理,可在知识星球提问,会尽快回复。
![](https://drissionpage.cn/zsxq.png)
请围绕以下内容陈述您的问题:
1. 遇到了什么问题?什么场景下出现的?如何重现?
2. 请附上代码和报错信息(如有)
2. 请附上代码和报错信息
3. DrissionPage、浏览器、python版本号是多少
4. 有什么意见建议?
请在下方写正文,不要把内容插入到上面的问题中。
---

View File

@ -2,16 +2,27 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
允许任何人以个人身份使用或分发本项目源代码但仅限于学习和合法非盈利目的
个人或组织如未获得版权持有人授权不得将本项目以源代码或二进制形式用于商业行为
使用本项目需满足以下条款如使用过程中出现违反任意一项条款的情形授权自动失效
* 禁止将DrissionPage应用到任何可能违反当地法律规定和道德约束的项目中
* 禁止将DrissionPage用于任何可能有损他人利益的项目中
* 禁止将DrissionPage用于攻击与骚扰行为
* 遵守Robots协议禁止将DrissionPage用于采集法律或系统Robots协议不允许的数据
使用DrissionPage发生的一切行为均由使用人自行负责
因使用DrissionPage进行任何行为所产生的一切纠纷及后果均与版权持有人无关
版权持有人不承担任何使用DrissionPage带来的风险和损失
版权持有人不对DrissionPage可能存在的缺陷导致的任何损失负任何责任
"""
from ._base.chromium import Chromium
from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
from ._pages.chromium_page import ChromiumPage
from ._pages.session_page import SessionPage
from ._pages.web_page import WebPage
# 启动配置类
from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
__all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__']
__version__ = '4.0.5.4'
__version__ = '4.1.0.13'

15
DrissionPage/__init__.pyi Normal file
View File

@ -0,0 +1,15 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from ._base.chromium import Chromium
from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
from ._pages.chromium_page import ChromiumPage
from ._pages.session_page import SessionPage
from ._pages.web_page import WebPage
__all__ = ['WebPage', 'ChromiumPage', 'Chromium', 'ChromiumOptions', 'SessionOptions', 'SessionPage', '__version__']
__version__: str = ...

View File

@ -2,20 +2,23 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from abc import abstractmethod
from copy import copy
from pathlib import Path
from re import sub
from urllib.parse import quote
from DownloadKit import DownloadKit
from requests import Session
from .._functions.settings import Settings
from .._functions.locator import get_loc
from .._functions.web import format_html
from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement
from .._functions.elements import get_frame, get_eles
from .._functions.locator import get_loc, is_selenium_loc
from .._functions.settings import Settings
from .._functions.web import format_html
from ..errors import ElementNotFoundError
@ -31,6 +34,23 @@ class BaseParser(object):
def eles(self, locator, timeout=None):
return self._ele(locator, timeout, index=None)
def find(self, locators, any_one=True, first_ele=True, timeout=None):
if 'Session' in self._type:
timeout = 0
if timeout is None:
timeout = self.timeout
if isinstance(locators, tuple) and not is_selenium_loc(locators):
raise ValueError(f"locators参数为tuple时必须是单独的定位符即长度为2且第一位是'id', 'xpath', 'link text', "
f"'partial link text','name', 'tag name', 'class name', 'css selector' 之一。\n"
f"现在是:{locators}")
r = get_eles(locators, self, any_one, first_ele, timeout)
if any_one:
for ele in r:
if r[ele]:
return ele, r[ele]
return None, None
return r
# ----------------以下属性或方法待后代实现----------------
@property
def html(self):
@ -45,7 +65,7 @@ class BaseParser(object):
def _ele(self, locator, timeout=None, index=1, raise_err=None, method=None):
pass
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
pass
@ -54,9 +74,32 @@ class BaseElement(BaseParser):
def __init__(self, owner=None):
self.owner = owner
self.page = owner._page if owner else None
self._type = 'BaseElement'
def get_frame(self, loc_or_ind, timeout=None):
if not isinstance(loc_or_ind, (int, str, tuple)):
raise TypeError('loc_or_ind参数是定位符或序号。')
return get_frame(self, loc_ind_ele=loc_or_ind, timeout=timeout)
def _ele(self, locator, timeout=None, index=1, relative=False, raise_err=None, method=None):
if hasattr(locator, '_type'):
return locator
if timeout is None:
timeout = self.timeout
r = self._find_elements(locator, timeout=timeout, index=index, relative=relative, raise_err=raise_err)
if r or isinstance(r, list):
return r
if raise_err is True or (Settings.raise_when_ele_not_found and raise_err is None):
raise ElementNotFoundError(None, method, {'locator': locator, 'index': index, 'timeout': timeout})
r.method = method
r.args = {'locator': locator, 'index': index, 'timeout': timeout}
return r
@property
def timeout(self):
return self.owner.timeout if self.owner else 10
# ----------------以下属性或方法由后代实现----------------
@property
def tag(self):
@ -71,92 +114,46 @@ class BaseElement(BaseParser):
def nexts(self):
pass
def _ele(self, locator, timeout=None, index=1, relative=False, raise_err=None, method=None):
"""调用获取元素的方法
:param locator: 定位符
:param timeout: 超时时间
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param relative: 是否相对定位
:param raise_err: 找不到时是否抛出异常
:param method: 调用的方法名
:return: 元素对象或它们组成的列表
"""
r = self._find_elements(locator, timeout=timeout, index=index, relative=relative, raise_err=raise_err)
if r or isinstance(r, list):
return r
if Settings.raise_when_ele_not_found or raise_err is True:
raise ElementNotFoundError(None, method, {'locator': locator, 'index': index})
r.method = method
r.args = {'locator': locator, 'index': index}
return r
class DrissionElement(BaseElement):
"""ChromiumElement 和 SessionElement的基类但不是ShadowRoot的基类"""
@property
def link(self):
"""返回href或src绝对url"""
return self.attr('href') or self.attr('src')
@property
def css_path(self):
"""返回css path路径"""
return self._get_ele_path('css')
return self._get_ele_path(xpath=False)
@property
def xpath(self):
"""返回xpath路径"""
return self._get_ele_path('xpath')
return self._get_ele_path()
@property
def comments(self):
"""返回元素注释文本组成的列表"""
return self.eles('xpath:.//comment()')
def texts(self, text_node_only=False):
"""返回元素内所有直接子节点的文本,包括元素和文本节点
:param text_node_only: 是否只返回文本节点
:return: 文本列表
"""
if text_node_only:
texts = self.eles('xpath:/text()')
else:
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
texts = self.eles('xpath:/text()') if text_node_only else [x if isinstance(x, str) else x.text
for x in self.eles('xpath:./text() | *')]
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
def parent(self, level_or_loc=1, index=1):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:return: 上级元素对象
"""
def parent(self, level_or_loc=1, index=1, timeout=None):
if isinstance(level_or_loc, int):
loc = f'xpath:./ancestor::*[{level_or_loc}]'
elif isinstance(level_or_loc, (tuple, str)):
loc = get_loc(level_or_loc, True)
if loc[0] == 'css selector':
raise ValueError('此css selector语法不受支持请换成xpath。')
loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]'
else:
raise TypeError('level_or_loc参数只能是tuple、int或str。')
return self._ele(loc, timeout=0, relative=True, raise_err=False, method='parent()')
return self._ele(loc, timeout=timeout, relative=True, raise_err=False, method='parent()')
def child(self, locator='', index=1, timeout=None, ele_only=True):
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
if isinstance(locator, int):
index = locator
locator = ''
@ -173,52 +170,18 @@ class DrissionElement(BaseElement):
{'locator': locator, 'index': index, 'ele_only': ele_only})
def prev(self, locator='', index=1, timeout=None, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
return self._get_relative('prev()', 'preceding', True, locator, index, timeout, ele_only)
def next(self, locator='', index=1, timeout=None, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
return self._get_relative('next()', 'following', True, locator, index, timeout, ele_only)
def before(self, locator='', index=1, timeout=None, ele_only=True):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
return self._get_relative('before()', 'preceding', False, locator, index, timeout, ele_only)
def after(self, locator='', index=1, timeout=None, ele_only=True):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
return self._get_relative('after()', 'following', False, locator, index, timeout, ele_only)
def children(self, locator='', timeout=None, ele_only=True):
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
if not locator:
loc = '*' if ele_only else 'node()'
else:
@ -232,53 +195,20 @@ class DrissionElement(BaseElement):
return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
def prevs(self, locator='', timeout=None, ele_only=True):
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
return self._get_relatives(locator=locator, direction='preceding', timeout=timeout, ele_only=ele_only)
def nexts(self, locator='', timeout=None, ele_only=True):
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
return self._get_relatives(locator=locator, direction='following', timeout=timeout, ele_only=ele_only)
def befores(self, locator='', timeout=None, ele_only=True):
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return self._get_relatives(locator=locator, direction='preceding',
brother=False, timeout=timeout, ele_only=ele_only)
def afters(self, locator='', timeout=None, ele_only=True):
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的元素或节点组成的列表
"""
return self._get_relatives(locator=locator, direction='following',
brother=False, timeout=timeout, ele_only=ele_only)
def _get_relative(self, func, direction, brother, locator='', index=1, timeout=None, ele_only=True):
"""获取一个亲戚元素或节点,可用查询语法筛选,可指定返回筛选结果的第几个
:param func: 方法名称
:param direction: 方向'following' 'preceding'
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
if isinstance(locator, int):
index = locator
locator = ''
@ -287,14 +217,6 @@ class DrissionElement(BaseElement):
{'locator': locator, 'index': index, 'ele_only': ele_only})
def _get_relatives(self, index=None, locator='', direction='following', brother=True, timeout=.5, ele_only=True):
"""按要求返回兄弟元素或节点组成的列表
:param index: 获取第几个该参数不为None时只获取该编号的元素
:param locator: 用于筛选的查询语法
:param direction: 'following' 'preceding'查找的方向
:param brother: 查找范围在同级查找还是整个dom前后查找
:param timeout: 查找等待时间
:return: 元素对象或字符串
"""
brother = '-sibling' if brother else ''
if not locator:
@ -332,20 +254,17 @@ class DrissionElement(BaseElement):
def attr(self, name: str):
return ''
def _get_ele_path(self, mode):
def _get_ele_path(self, xpath=True):
return ''
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
pass
class BasePage(BaseParser):
"""页面类的基类"""
def __init__(self):
"""初始化函数"""
self._url = None
self._timeout = 10
self._url_available = None
self.retry_times = 3
self.retry_interval = 2
@ -353,48 +272,33 @@ class BasePage(BaseParser):
self._download_path = None
self._none_ele_return_value = False
self._none_ele_value = None
self._session = None
self._headers = None
self._session_options = None
self._type = 'BasePage'
@property
def title(self):
"""返回网页title"""
ele = self._ele('xpath://title', raise_err=False, method='title')
return ele.text if ele else None
@property
def timeout(self):
"""返回查找元素时等待的秒数"""
return self._timeout
@timeout.setter
def timeout(self, second):
"""设置查找元素时等待的秒数"""
self._timeout = second
@property
def url_available(self):
"""返回当前访问的url有效性"""
return self._url_available
@property
def download_path(self):
"""返回默认下载路径"""
return self._download_path
@property
def download(self):
"""返回下载器对象"""
if self._DownloadKit is None:
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
if not self._session:
self._create_session()
self._DownloadKit = DownloadKit(driver=self, save_path=self.download_path)
return self._DownloadKit
def _before_connect(self, url, retry, interval):
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数间隔是否文件组成的tuple
"""
is_file = False
if isinstance(url, Path) or ('://' not in url and ':\\\\' not in url):
p = Path(url)
@ -407,6 +311,24 @@ class BasePage(BaseParser):
interval = interval if interval is not None else self.retry_interval
return retry, interval, is_file
def _set_session_options(self, session_or_options=None):
if not session_or_options:
self._session_options = SessionOptions(session_or_options)
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options
elif isinstance(session_or_options, Session):
self._session_options = SessionOptions()
self._session = copy(session_or_options)
self._headers = self._session.headers
self._session.headers = None
def _create_session(self):
if not self._session_options:
self._set_session_options()
self._session, self._headers = self._session_options.make_session()
# ----------------以下属性或方法由后代实现----------------
@property
def url(self):
@ -420,33 +342,22 @@ class BasePage(BaseParser):
def user_agent(self):
return
@abstractmethod
def cookies(self, as_dict=False, all_info=False):
return {}
@abstractmethod
def get(self, url, show_errmsg=False, retry=None, interval=None):
pass
def _ele(self, locator, timeout=None, index=1, raise_err=None, method=None):
"""调用获取元素的方法
:param locator: 定位符
:param timeout: 超时时间
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param raise_err: 找不到时是否抛出异常
:param method: 调用的方法名
:return: 元素对象或它们组成的列表
"""
if not locator:
raise ElementNotFoundError(None, method, {'locator': locator})
if timeout is None:
timeout = self.timeout
r = self._find_elements(locator, timeout=timeout, index=index, raise_err=raise_err)
if r or isinstance(r, list):
return r
if Settings.raise_when_ele_not_found or raise_err is True:
raise ElementNotFoundError(None, method, {'locator': locator, 'index': index})
if raise_err is True or (Settings.raise_when_ele_not_found and raise_err is None):
raise ElementNotFoundError(None, method, {'locator': locator, 'index': index, 'timeout': timeout})
r.method = method
r.args = {'locator': locator, 'index': index}
r.args = {'locator': locator, 'index': index, 'timeout': timeout}
return r

View File

@ -2,17 +2,21 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from abc import abstractmethod
from typing import Union, Tuple, List, Any, Optional
from typing import Union, Tuple, List, Any, Optional, Dict
from DownloadKit import DownloadKit
from requests import Session
from requests.structures import CaseInsensitiveDict
from .._configs.session_options import SessionOptions
from .._elements.chromium_element import ChromiumElement
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._functions.elements import SessionElementsList
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.session_page import SessionPage
from .._pages.web_page import WebPage
@ -20,6 +24,7 @@ from .._pages.web_page import WebPage
class BaseParser(object):
_type: str
timeout: float
def __call__(self, locator: Union[Tuple[str, str], str], index: int = 1): ...
@ -30,6 +35,23 @@ class BaseParser(object):
def eles(self, locator: Union[Tuple[str, str], str], timeout=None): ...
def find(self,
locators: Union[str, List[str], tuple],
any_one: bool = True,
first_ele: bool = True,
timeout: float = None) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]], Tuple[str, SessionElement],
Tuple[str, ChromiumElement]]:
"""传入多个定位符获取多个ele
:param locators: 定位符组成的列表
:param any_one: 是否任何一个定位符找到结果即返回
:param first_ele: 每个定位符是否只获取第一个元素
:param timeout: 超时时间
:return: any_one为True时返回一个找到的元素定位符和对象组成的元组格式(loc, ele)全都没找到返回(None, None)
any_one为False时返回dict格式key为定位符value为找到的元素或列表
"""
...
# ----------------以下属性或方法待后代实现----------------
@property
def html(self) -> str: ...
@ -49,30 +71,26 @@ class BaseParser(object):
def _find_elements(self,
locator: Union[Tuple[str, str], str],
timeout: float = None,
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None): ...
class BaseElement(BaseParser):
owner: BasePage = ...
def __init__(self, owner: BasePage = None):
self.owner: BasePage = ...
self.page: Union[ChromiumPage, SessionPage, WebPage] = ...
def __init__(self, owner: BasePage = None): ...
@property
def timeout(self) -> float:
"""返回其查找元素时超时时间"""
...
# ----------------以下属性或方法由后代实现----------------
@property
def tag(self) -> str: ...
def _ele(self,
locator: Union[Tuple[str, str], str],
timeout: float = None,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None,
method: str = None): ...
def parent(self, level_or_loc: Union[tuple, str, int] = 1): ...
def prev(self, index: int = 1) -> None: ...
@ -83,83 +101,206 @@ class BaseElement(BaseParser):
def nexts(self): ...
def get_frame(self, loc_or_ind, timeout=None) -> ChromiumFrame:
"""获取元素中一个frame对象
:param loc_or_ind: 定位符iframe序号序号从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间
:return: ChromiumFrame对象
"""
...
def _ele(self,
locator: Union[Tuple[str, str], str],
timeout: float = None,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None,
method: str = None):
"""调用获取元素的方法
:param locator: 定位符
:param timeout: 超时时间
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param relative: 是否相对定位
:param raise_err: 找不到时是否抛出异常
:param method: 调用的方法名
:return: 元素对象或它们组成的列表
"""
...
class DrissionElement(BaseElement):
"""ChromiumElement 和 SessionElement的基类但不是ShadowRoot的基类"""
def __init__(self, owner: BasePage = None): ...
@property
def link(self) -> str: ...
def link(self) -> str:
"""返回href或src绝对url"""
...
@property
def css_path(self) -> str: ...
def css_path(self) -> str:
"""返回css path路径"""
...
@property
def xpath(self) -> str: ...
def xpath(self) -> str:
"""返回xpath路径"""
...
@property
def comments(self) -> list: ...
def comments(self) -> list:
"""返回元素注释文本组成的列表"""
...
def texts(self, text_node_only: bool = False) -> list: ...
def texts(self, text_node_only: bool = False) -> list:
"""返回元素内所有直接子节点的文本,包括元素和文本节点
:param text_node_only: 是否只返回文本节点
:return: 文本列表
"""
...
def parent(self,
level_or_loc: Union[tuple, str, int] = 1,
index: int = 1) -> Union[DrissionElement, None]: ...
index: int = 1,
timeout: float = None) -> Union[DrissionElement, None]:
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:param timeout: 时间
:return: 上级元素对象
"""
...
def child(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]:
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
...
def prev(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]:
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
...
def next(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]:
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素
"""
...
def before(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]:
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
...
def after(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ...
ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]:
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
...
def children(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""返回直接子元素元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
...
def prevs(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
...
def nexts(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 兄弟元素或节点文本组成的列表
"""
...
def befores(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def afters(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的元素或节点组成的列表
"""
...
def _get_relative(self,
func: str,
@ -168,7 +309,17 @@ class DrissionElement(BaseElement):
locator: Union[Tuple[str, str], str] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> DrissionElement: ...
ele_only: bool = True) -> DrissionElement:
"""获取一个亲戚元素或节点,可用查询语法筛选,可指定返回筛选结果的第几个
:param func: 方法名称
:param direction: 方向'following' 'preceding'
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
...
def _get_relatives(self,
index: int = None,
@ -176,7 +327,16 @@ class DrissionElement(BaseElement):
direction: str = 'following',
brother: bool = True,
timeout: float = 0.5,
ele_only: bool = True) -> List[Union[DrissionElement, str]]: ...
ele_only: bool = True) -> List[Union[DrissionElement, str]]:
"""按要求返回兄弟元素或节点组成的列表
:param index: 获取第几个该参数不为None时只获取该编号的元素
:param locator: 用于筛选的查询语法
:param direction: 'following' 'preceding'查找的方向
:param brother: 查找范围在同级查找还是整个dom前后查找
:param timeout: 查找等待时间
:return: 元素对象或字符串
"""
...
# ----------------以下属性或方法由后代实现----------------
@property
@ -191,41 +351,65 @@ class DrissionElement(BaseElement):
@abstractmethod
def attr(self, name: str) -> str: ...
def _get_ele_path(self, mode) -> str: ...
def _get_ele_path(self, xpath: bool = True) -> str: ...
class BasePage(BaseParser):
"""页面类的基类"""
def __init__(self):
self._url_available: bool = ...
self.retry_times: int = ...
self.retry_interval: float = ...
self._timeout: float = ...
self._download_path: str = ...
self._DownloadKit: DownloadKit = ...
self._none_ele_return_value: bool = ...
self._none_ele_value: Any = ...
self._page: Union[ChromiumPage, SessionPage, WebPage]=...
_url_available: Optional[bool] = ...
retry_times: int = ...
retry_interval: float = ...
_download_path: Optional[str] = ...
_DownloadKit: Optional[DownloadKit] = ...
_none_ele_return_value: bool = ...
_none_ele_value: Any = ...
_page: Union[ChromiumPage, SessionPage, WebPage] = ...
_session: Optional[Session] = ...
_headers: Optional[CaseInsensitiveDict] = ...
_session_options: Optional[SessionOptions] = ...
def __init__(self): ...
@property
def title(self) -> Union[str, None]: ...
def title(self) -> Union[str, None]:
"""返回网页title"""
...
@property
def timeout(self) -> float: ...
@timeout.setter
def timeout(self, second: float) -> None: ...
def url_available(self) -> bool:
"""返回当前访问的url有效性"""
...
@property
def url_available(self) -> bool: ...
def download_path(self) -> str:
"""返回默认下载路径"""
...
@property
def download_path(self) -> str: ...
def download(self) -> DownloadKit:
"""返回下载器对象"""
...
@property
def download(self) -> DownloadKit: ...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple:
"""连接前的准备
:param url: 要访问的url
:param retry: 重试次数
:param interval: 重试间隔
:return: 重试次数间隔是否文件组成的tuple
"""
...
def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ...
def _set_session_options(self, session_or_options: Union[Session, SessionOptions] = None) -> None:
"""启动配置
:param session_or_options: SessionSessionOptions对象
:return: None
"""
...
def _create_session(self) -> None:
"""创建内建Session对象"""
...
# ----------------以下属性或方法由后代实现----------------
@property
@ -237,9 +421,6 @@ class BasePage(BaseParser):
@property
def user_agent(self) -> str: ...
@abstractmethod
def cookies(self, as_dict: bool = False, all_info: bool = False) -> Union[list, dict]: ...
@abstractmethod
def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ...
@ -248,4 +429,13 @@ class BasePage(BaseParser):
timeout: float = None,
index: Optional[int] = 1,
raise_err: bool = None,
method: str = None): ...
method: str = None):
"""调用获取元素的方法
:param locator: 定位符
:param timeout: 超时时间
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param raise_err: 找不到时是否抛出异常
:param method: 调用的方法名
:return: 元素对象或它们组成的列表
"""
...

View File

@ -1,292 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from shutil import rmtree
from time import perf_counter, sleep
from websocket import WebSocketBadStatusException
from .driver import BrowserDriver, Driver
from .._functions.tools import raise_error
from .._units.downloader import DownloadManager
from ..errors import PageDisconnectedError
__ERROR__ = 'error'
class Browser(object):
BROWSERS = {}
def __new__(cls, address, browser_id, page):
"""
:param address: 浏览器地址
:param browser_id: 浏览器id
:param page: ChromiumPage对象
"""
if browser_id in cls.BROWSERS:
return cls.BROWSERS[browser_id]
return object.__new__(cls)
def __init__(self, address, browser_id, page):
"""
:param address: 浏览器地址
:param browser_id: 浏览器id
:param page: ChromiumPage对象
"""
if hasattr(self, '_created'):
return
self._created = True
Browser.BROWSERS[browser_id] = self
self.page = page
self.address = address
self._driver = BrowserDriver(browser_id, 'browser', address, self)
self.id = browser_id
self._frames = {}
self._drivers = {}
self._all_drivers = {}
self._connected = False
self._process_id = None
try:
r = self.run_cdp('SystemInfo.getProcessInfo')
for i in r.get('processInfo', []):
if i['type'] == 'browser':
self._process_id = i['id']
break
except:
pass
self.run_cdp('Target.setDiscoverTargets', discover=True)
self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed)
self._driver.set_callback('Target.targetCreated', self._onTargetCreated)
def _get_driver(self, tab_id, owner=None):
"""新建并返回指定tab id的Driver
:param tab_id: 标签页id
:param owner: 使用该驱动的对象
:return: Driver对象
"""
d = self._drivers.pop(tab_id, None)
if not d:
d = Driver(tab_id, 'page', self.address)
d.owner = owner
self._all_drivers.setdefault(tab_id, set()).add(d)
return d
def _onTargetCreated(self, **kwargs):
"""标签页创建时执行"""
if (kwargs['targetInfo']['type'] in ('page', 'webview')
and kwargs['targetInfo']['targetId'] not in self._all_drivers
and not kwargs['targetInfo']['url'].startswith('devtools://')):
try:
tab_id = kwargs['targetInfo']['targetId']
d = Driver(tab_id, 'page', self.address)
self._drivers[tab_id] = d
self._all_drivers.setdefault(tab_id, set()).add(d)
except WebSocketBadStatusException:
pass
def _onTargetDestroyed(self, **kwargs):
"""标签页关闭时执行"""
tab_id = kwargs['targetId']
if hasattr(self, '_dl_mgr'):
self._dl_mgr.clear_tab_info(tab_id)
for key in [k for k, i in self._frames.items() if i == tab_id]:
self._frames.pop(key, None)
for d in self._all_drivers.get(tab_id, tuple()):
d.stop()
self._drivers.pop(tab_id, None)
self._all_drivers.pop(tab_id, None)
def connect_to_page(self):
"""执行与page相关的逻辑"""
if not self._connected:
self._dl_mgr = DownloadManager(self)
self._connected = True
def run_cdp(self, cmd, **cmd_args):
"""执行Chrome DevTools Protocol语句
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
ignore = cmd_args.pop('_ignore', None)
r = self._driver.run(cmd, **cmd_args)
return r if __ERROR__ not in r else raise_error(r, ignore)
@property
def driver(self):
return self._driver
@property
def tabs_count(self):
"""返回标签页数量"""
j = self.run_cdp('Target.getTargets')['targetInfos'] # 不要改用get避免卡死
return len([i for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')])
@property
def tab_ids(self):
"""返回所有标签页id组成的列表"""
j = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp因为顺序不对
return [i['id'] for i in j if i['type'] in ('page', 'webview')
and not i['url'].startswith('devtools://')]
@property
def process_id(self):
"""返回浏览器进程id"""
return self._process_id
def find_tabs(self, title=None, url=None, tab_type=None):
"""查找符合条件的tab返回它们组成的列表title和url是与关系
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:return: dict格式的tab信息列表列表
"""
tabs = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp
if isinstance(tab_type, str):
tab_type = {tab_type}
elif isinstance(tab_type, (list, tuple, set)):
tab_type = set(tab_type)
elif tab_type is not None:
raise TypeError('tab_type只能是set、list、tuple、str、None。')
return [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url'])
and (tab_type is None or i['type'] in tab_type))]
def close_tab(self, tab_id):
"""关闭标签页
:param tab_id: 标签页id
:return: None
"""
self._onTargetDestroyed(targetId=tab_id)
self.driver.run('Target.closeTarget', targetId=tab_id)
def stop_driver(self, driver):
"""停止一个Driver
:param driver: Driver对象
:return: None
"""
driver.stop()
self._all_drivers.get(driver.id, set()).discard(driver)
def activate_tab(self, tab_id):
"""使标签页变为活动状态
:param tab_id: 标签页id
:return: None
"""
self.run_cdp('Target.activateTarget', targetId=tab_id)
def get_window_bounds(self, tab_id=None):
"""返回浏览器窗口位置和大小信息
:param tab_id: 标签页id
:return: 窗口大小字典
"""
return self.run_cdp('Browser.getWindowForTarget', targetId=tab_id or self.id)['bounds']
def new_tab(self, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页id
"""
bid = None
if new_context:
bid = self.run_cdp('Target.createBrowserContext')['browserContextId']
kwargs = {'url': ''}
if new_window:
kwargs['newWindow'] = True
if background:
kwargs['background'] = True
if bid:
kwargs['browserContextId'] = bid
tid = self.run_cdp('Target.createTarget', **kwargs)['targetId']
while tid not in self._drivers:
sleep(.1)
return tid
def reconnect(self):
"""断开重连"""
self._driver.stop()
BrowserDriver.BROWSERS.pop(self.id)
self._driver = BrowserDriver(self.id, 'browser', self.address, self)
self.run_cdp('Target.setDiscoverTargets', discover=True)
self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed)
self._driver.set_callback('Target.targetCreated', self._onTargetCreated)
def quit(self, timeout=5, force=False):
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 是否立刻强制终止进程
:return: None
"""
try:
self.run_cdp('Browser.close')
except PageDisconnectedError:
pass
self.driver.stop()
drivers = list(self._all_drivers.values())
for tab in drivers:
for driver in tab:
driver.stop()
if not force:
return
try:
pids = [pid['id'] for pid in self.run_cdp('SystemInfo.getProcessInfo')['processInfo']]
except:
return
from psutil import Process
for pid in pids:
try:
Process(pid).kill()
except:
pass
from os import popen
from platform import system
end_time = perf_counter() + timeout
while perf_counter() < end_time:
ok = True
for pid in pids:
txt = f'tasklist | findstr {pid}' if system().lower() == 'windows' else f'ps -ef | grep {pid}'
p = popen(txt)
sleep(.05)
try:
if f' {pid} ' in p.read():
ok = False
break
except TypeError:
pass
if ok:
break
def _on_disconnect(self):
self.page._on_disconnect()
Browser.BROWSERS.pop(self.id, None)
if self.page._chromium_options.is_auto_port and self.page._chromium_options.user_data_path:
path = Path(self.page._chromium_options.user_data_path)
end_time = perf_counter() + 7
while perf_counter() < end_time:
if not path.exists():
break
try:
rmtree(path)
break
except (PermissionError, FileNotFoundError, OSError):
pass
sleep(.03)

View File

@ -1,71 +0,0 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from typing import List, Optional, Union, Set, Dict
from .driver import BrowserDriver, Driver
from .._pages.chromium_page import ChromiumPage
from .._units.downloader import DownloadManager
class Browser(object):
BROWSERS: dict = ...
page: ChromiumPage = ...
_driver: BrowserDriver = ...
id: str = ...
address: str = ...
_frames: dict = ...
_drivers: Dict[str, Driver] = ...
_all_drivers: Dict[str, Set[Driver]] = ...
_process_id: Optional[int] = ...
_dl_mgr: DownloadManager = ...
_connected: bool = ...
def __new__(cls, address: str, browser_id: str, page: ChromiumPage): ...
def __init__(self, address: str, browser_id: str, page: ChromiumPage): ...
def _get_driver(self, tab_id: str, owner=None) -> Driver: ...
def run_cdp(self, cmd, **cmd_args) -> dict: ...
@property
def driver(self) -> BrowserDriver: ...
@property
def tabs_count(self) -> int: ...
@property
def tab_ids(self) -> List[str]: ...
@property
def process_id(self) -> Optional[int]: ...
def find_tabs(self, title: str = None, url: str = None,
tab_type: Union[str, list, tuple] = None) -> List[dict]: ...
def close_tab(self, tab_id: str) -> None: ...
def stop_driver(self, driver: Driver) -> None: ...
def activate_tab(self, tab_id: str) -> None: ...
def get_window_bounds(self, tab_id: str = None) -> dict: ...
def new_tab(self, new_window: bool = False, background: bool = False, new_context: bool = False) -> str: ...
def reconnect(self) -> None: ...
def connect_to_page(self) -> None: ...
def _onTargetCreated(self, **kwargs) -> None: ...
def _onTargetDestroyed(self, **kwargs) -> None: ...
def quit(self, timeout: float = 5, force: bool = False) -> None: ...
def _on_disconnect(self) -> None: ...

View File

@ -0,0 +1,496 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from re import match
from shutil import rmtree
from threading import Lock
from time import sleep, perf_counter
from requests import Session
from websocket import WebSocketBadStatusException
from .driver import BrowserDriver, Driver
from .._configs.chromium_options import ChromiumOptions
from .._functions.browser import connect_browser
from .._functions.cookies import CookiesList
from .._functions.settings import Settings
from .._functions.tools import PortFinder
from .._functions.tools import raise_error
from .._pages.chromium_base import Timeout
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._units.downloader import DownloadManager
from .._units.setter import BrowserSetter
from .._units.states import BrowserStates
from .._units.waiter import BrowserWaiter
from ..errors import BrowserConnectError, CDPError
from ..errors import PageDisconnectedError
__ERROR__ = 'error'
class Chromium(object):
_BROWSERS = {}
_lock = Lock()
def __new__(cls, addr_or_opts=None, session_options=None):
opt = handle_options(addr_or_opts)
is_headless, browser_id, is_exists = run_browser(opt)
with cls._lock:
if browser_id in cls._BROWSERS:
r = cls._BROWSERS[browser_id]
while not hasattr(r, '_driver'):
sleep(.05)
return r
r = object.__new__(cls)
r._chromium_options = opt
r._is_headless = is_headless
r._is_exists = is_exists
r.id = browser_id
cls._BROWSERS[browser_id] = r
return r
def __init__(self, addr_or_opts=None, session_options=None):
if hasattr(self, '_created'):
return
self._created = True
self._type = 'Chromium'
self._frames = {}
self._drivers = {}
self._all_drivers = {}
self._relation = {}
self._newest_tab_id = None
self._set = None
self._wait = None
self._states = None
self._timeouts = Timeout(**self._chromium_options.timeouts)
self._load_mode = self._chromium_options.load_mode
self._download_path = str(Path(self._chromium_options.download_path).absolute())
self._auto_handle_alert = None
self._none_ele_return_value = False
self._none_ele_value = None
self.retry_times = self._chromium_options.retry_times
self.retry_interval = self._chromium_options.retry_interval
self.address = self._chromium_options.address
self._disconnect_flag = False
self._driver = BrowserDriver(self.id, 'browser', self.address, self)
if ((not self._chromium_options._ua_set and self._is_headless != self._chromium_options.is_headless)
or (self._is_exists and self._chromium_options._new_env)):
self.quit(3, True)
connect_browser(self._chromium_options)
s = Session()
s.trust_env = False
s.keep_alive = False
ws = s.get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'})
self.id = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
self._driver = BrowserDriver(self.id, 'browser', self.address, self)
ws.close()
s.close()
self._is_exists = False
self._frames = {}
self._drivers = {}
self._all_drivers = {}
self.version = self._run_cdp('Browser.getVersion')['product']
self._process_id = None
try:
r = self._run_cdp('SystemInfo.getProcessInfo')
for i in r.get('processInfo', []):
if i['type'] == 'browser':
self._process_id = i['id']
break
except:
pass
self._run_cdp('Target.setDiscoverTargets', discover=True)
self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed)
self._driver.set_callback('Target.targetCreated', self._onTargetCreated)
self._dl_mgr = DownloadManager(self)
self._session_options = session_options
@property
def user_data_path(self):
return self._chromium_options.user_data_path
@property
def process_id(self):
return self._process_id
@property
def timeout(self):
return self._timeouts.base
@property
def timeouts(self):
return self._timeouts
@property
def load_mode(self):
return self._load_mode
@property
def download_path(self):
return self._download_path
@property
def set(self):
if self._set is None:
self._set = BrowserSetter(self)
return self._set
@property
def states(self):
if self._states is None:
self._states = BrowserStates(self)
return self._states
@property
def wait(self):
if self._wait is None:
self._wait = BrowserWaiter(self)
return self._wait
@property
def tabs_count(self):
j = self._run_cdp('Target.getTargets')['targetInfos'] # 不要改用get避免卡死
return len([i for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')])
@property
def tab_ids(self):
j = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp因为顺序不对
return [i['id'] for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')]
@property
def latest_tab(self):
return self._get_tab(id_or_num=self.tab_ids[0], as_id=not Settings.singleton_tab_obj)
def cookies(self, all_info=False):
cks = self._run_cdp(f'Storage.getCookies')['cookies']
r = cks if all_info else [{'name': c['name'], 'value': c['value'], 'domain': c['domain']} for c in cks]
return CookiesList(r)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
return self._new_tab(True, url=url, new_window=new_window, background=background, new_context=new_context)
def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page'):
t = self._get_tab(id_or_num=id_or_num, title=title, url=url, tab_type=tab_type, mix=True, as_id=False)
if t._type != 'MixTab':
raise RuntimeError('该标签页已有非MixTab版本如需多对象公用请用Settings设置singleton_tab_obj为False。')
return t
def get_tabs(self, title=None, url=None, tab_type='page'):
return self._get_tabs(title=title, url=url, tab_type=tab_type, mix=True, as_id=False)
def close_tabs(self, tabs_or_ids, others=False):
if isinstance(tabs_or_ids, str):
tabs = {tabs_or_ids}
elif isinstance(tabs_or_ids, ChromiumTab):
tabs = {tabs_or_ids.tab_id}
elif isinstance(tabs_or_ids, (list, tuple)):
tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids)
else:
raise TypeError('tabs_or_ids参数只能传入标签页对象或id。')
all_tabs = set(self.tab_ids)
if others:
tabs = all_tabs - tabs
if len(all_tabs - tabs) > 0:
for tab in tabs:
self._close_tab(tab=tab)
else:
self.quit()
def _close_tab(self, tab):
if isinstance(tab, str):
tab = self.get_tab(tab)
tab._run_cdp('Target.closeTarget', targetId=tab.tab_id)
while tab.driver.is_running and tab.tab_id in self._all_drivers:
sleep(.01)
def activate_tab(self, id_ind_tab):
if isinstance(id_ind_tab, int):
id_ind_tab += -1 if id_ind_tab else 1
id_ind_tab = self.tab_ids[id_ind_tab]
elif isinstance(id_ind_tab, ChromiumTab):
id_ind_tab = id_ind_tab.tab_id
self._run_cdp('Target.activateTarget', targetId=id_ind_tab)
def reconnect(self):
self._disconnect_flag = True
self._driver.stop()
BrowserDriver.BROWSERS.pop(self.id)
self._driver = BrowserDriver(self.id, 'browser', self.address, self)
self._run_cdp('Target.setDiscoverTargets', discover=True)
self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed)
self._driver.set_callback('Target.targetCreated', self._onTargetCreated)
self._disconnect_flag = False
def clear_cache(self, cache=True, cookies=True):
if cache:
self.latest_tab.run_cdp('Network.clearBrowserCache')
if cookies:
self._run_cdp('Storage.clearCookies')
def quit(self, timeout=5, force=False, del_data=False):
try:
self._run_cdp('Browser.close')
except PageDisconnectedError:
pass
self._driver.stop()
drivers = list(self._all_drivers.values())
for tab in drivers:
for driver in tab:
driver.stop()
if force:
pids = None
try:
pids = [pid['id'] for pid in self._run_cdp('SystemInfo.getProcessInfo')['processInfo']]
except:
pass
if pids:
from psutil import Process
for pid in pids:
try:
Process(pid).kill()
except:
pass
from os import popen
from platform import system
end_time = perf_counter() + timeout
while perf_counter() < end_time:
ok = True
for pid in pids:
txt = f'tasklist | findstr {pid}' if system().lower() == 'windows' else f'ps -ef | grep {pid}'
p = popen(txt)
sleep(.05)
try:
if f' {pid} ' in p.read():
ok = False
break
except TypeError:
pass
if ok:
break
if del_data and not self._chromium_options.is_auto_port and self._chromium_options.user_data_path:
path = Path(self._chromium_options.user_data_path)
rmtree(path, True)
def _new_tab(self, mix=True, url=None, new_window=False, background=False, new_context=False):
tab_type = MixTab if mix else ChromiumTab
tab = None
if new_context:
tab = self._run_cdp('Target.createBrowserContext')['browserContextId']
kwargs = {'url': ''}
if new_window:
kwargs['newWindow'] = True
if background:
kwargs['background'] = True
if tab:
kwargs['browserContextId'] = tab
if self.states.is_incognito:
return _new_tab_by_js(self, url, tab_type, new_window)
else:
try:
tab = self._run_cdp('Target.createTarget', **kwargs)['targetId']
except CDPError:
return _new_tab_by_js(self, url, tab_type, new_window)
while self.states.is_alive:
if tab in self._drivers:
break
sleep(.01)
else:
raise BrowserConnectError('浏览器已关闭')
tab = tab_type(self, tab)
if url:
tab.get(url)
return tab
def _get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', mix=True, as_id=False):
if id_or_num is not None:
if isinstance(id_or_num, int):
id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num]
elif isinstance(id_or_num, ChromiumTab):
return id_or_num.tab_id if as_id else ChromiumTab(self, id_or_num.tab_id)
elif id_or_num not in [i['id'] for i in self._driver.get(f'http://{self.address}/json').json()]:
raise ValueError(f'没有找到标签页{id_or_num},所有标签页:{self.tab_ids}')
elif title == url is None and tab_type == 'page':
id_or_num = self.tab_ids[0]
else:
tabs = self._get_tabs(title=title, url=url, tab_type=tab_type, as_id=True)
if tabs:
id_or_num = tabs[0]
else:
raise RuntimeError('没有找到指定标签页。')
if as_id:
return id_or_num
with self._lock:
return MixTab(self, id_or_num) if mix else ChromiumTab(self, id_or_num)
def _get_tabs(self, title=None, url=None, tab_type='page', mix=True, as_id=False):
tabs = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp
if isinstance(tab_type, str):
tab_type = {tab_type}
elif isinstance(tab_type, (list, tuple, set)):
tab_type = set(tab_type)
elif tab_type is not None:
raise TypeError('tab_type只能是set、list、tuple、str、None。')
tabs = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url'])
and (tab_type is None or i['type'] in tab_type)
and i['title'] != 'chrome-extension://neajdppkdcdipfabeoofebfddakdcjhd/audio.html')]
if as_id:
return [tab['id'] for tab in tabs]
with self._lock:
if mix:
return [MixTab(self, tab['id']) for tab in tabs]
else:
return [ChromiumTab(self, tab['id']) for tab in tabs]
def _run_cdp(self, cmd, **cmd_args):
ignore = cmd_args.pop('_ignore', None)
r = self._driver.run(cmd, **cmd_args)
return r if __ERROR__ not in r else raise_error(r, self, ignore)
def _get_driver(self, tab_id, owner=None):
d = self._drivers.pop(tab_id, None)
if not d:
d = Driver(tab_id, 'page', self.address)
d.owner = owner
self._all_drivers.setdefault(tab_id, set()).add(d)
return d
def _onTargetCreated(self, **kwargs):
if (kwargs['targetInfo']['type'] in ('page', 'webview')
and kwargs['targetInfo']['targetId'] not in self._all_drivers
and not kwargs['targetInfo']['url'].startswith('devtools://')):
try:
tab_id = kwargs['targetInfo']['targetId']
self._frames[tab_id] = tab_id
d = Driver(tab_id, 'page', self.address)
self._relation[tab_id] = kwargs['targetInfo'].get('openerId', None)
self._drivers[tab_id] = d
self._all_drivers.setdefault(tab_id, set()).add(d)
self._newest_tab_id = tab_id
except WebSocketBadStatusException:
pass
def _onTargetDestroyed(self, **kwargs):
tab_id = kwargs['targetId']
self._dl_mgr.clear_tab_info(tab_id)
for key in [k for k, i in self._frames.items() if i == tab_id]:
self._frames.pop(key, None)
for d in self._all_drivers.get(tab_id, tuple()):
d.stop()
self._drivers.pop(tab_id, None)
self._all_drivers.pop(tab_id, None)
self._relation.pop(tab_id, None)
def _on_disconnect(self):
if not self._disconnect_flag:
Chromium._BROWSERS.pop(self.id, None)
if self._chromium_options.is_auto_port and self._chromium_options.user_data_path:
path = Path(self._chromium_options.user_data_path)
end_time = perf_counter() + 7
while perf_counter() < end_time:
if not path.exists():
break
try:
rmtree(path)
break
except (PermissionError, FileNotFoundError, OSError):
pass
sleep(.03)
def handle_options(addr_or_opts):
"""设置浏览器启动属性
:param addr_or_opts: 'ip:port'ChromiumOptionsDriver
:return: 返回ChromiumOptions对象
"""
if not addr_or_opts:
_chromium_options = ChromiumOptions(addr_or_opts)
if _chromium_options.is_auto_port:
port, path = PortFinder(_chromium_options.tmp_path).get_port(_chromium_options.is_auto_port)
_chromium_options.set_address(f'127.0.0.1:{port}')
_chromium_options.set_user_data_path(path)
_chromium_options.auto_port(scope=_chromium_options.is_auto_port)
elif isinstance(addr_or_opts, ChromiumOptions):
if addr_or_opts.is_auto_port:
port, path = PortFinder(addr_or_opts.tmp_path).get_port(addr_or_opts.is_auto_port)
addr_or_opts.set_address(f'127.0.0.1:{port}')
addr_or_opts.set_user_data_path(path)
addr_or_opts.auto_port(scope=addr_or_opts.is_auto_port)
_chromium_options = addr_or_opts
elif isinstance(addr_or_opts, str):
_chromium_options = ChromiumOptions()
_chromium_options.set_address(addr_or_opts)
elif isinstance(addr_or_opts, int):
_chromium_options = ChromiumOptions()
_chromium_options.set_local_port(addr_or_opts)
else:
raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。')
return _chromium_options
def run_browser(chromium_options):
"""连接浏览器"""
is_exists = connect_browser(chromium_options)
try:
s = Session()
s.trust_env = False
s.keep_alive = False
ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'})
if not ws:
raise BrowserConnectError('\n浏览器连接失败,请确认浏览器是否启动。')
json = ws.json()
browser_id = json['webSocketDebuggerUrl'].split('/')[-1]
is_headless = 'headless' in json['User-Agent'].lower()
ws.close()
s.close()
except KeyError:
raise BrowserConnectError('浏览器版本太旧或此浏览器不支持接管。')
except:
raise BrowserConnectError('\n浏览器连接失败,请确认浏览器是否启动。')
return is_headless, browser_id, is_exists
def _new_tab_by_js(browser: Chromium, url, tab_type, new_window):
mix = tab_type == MixTab
tab = browser._get_tab(mix=mix)
if url and not match(r'^.*?://.*', url):
raise ValueError(f'url也许需要加上http://')
url = f'"{url}"' if url else '""'
new = 'target="_new"' if new_window else 'target="_blank"'
tid = browser.latest_tab.tab_id
tab.run_js(f'window.open({url}, {new})')
tid = browser.wait.new_tab(curr_tab=tid)
return browser._get_tab(tid, mix=mix)

View File

@ -0,0 +1,300 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from threading import Lock
from typing import List, Optional, Set, Dict, Union, Tuple, Literal, Any
from .driver import BrowserDriver, Driver
from .._configs.chromium_options import ChromiumOptions
from .._configs.session_options import SessionOptions
from .._functions.cookies import CookiesList
from .._pages.chromium_base import Timeout, ChromiumBase
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._units.downloader import DownloadManager
from .._units.setter import BrowserSetter
from .._units.states import BrowserStates
from .._units.waiter import BrowserWaiter
class Chromium(object):
_BROWSERS: dict = ...
_lock: Lock = ...
id: str = ...
address: str = ...
version: str = ...
retry_times: int = ...
retry_interval: float = ...
_set: Optional[BrowserSetter] = ...
_wait: Optional[BrowserWaiter] = ...
_states: Optional[BrowserStates] = ...
_chromium_options: ChromiumOptions = ...
_session_options: SessionOptions = ...
_driver: BrowserDriver = ...
_frames: dict = ...
_drivers: Dict[str, Driver] = ...
_all_drivers: Dict[str, Set[Driver]] = ...
_relation: Dict[str, Optional[str]] = ...
_process_id: Optional[int] = ...
_dl_mgr: DownloadManager = ...
_timeouts: Timeout = ...
_load_mode: str = ...
_download_path: str = ...
_auto_handle_alert: Optional[bool] = ...
_is_exists: bool = ...
_is_headless: bool = ...
_disconnect_flag: bool = ...
_none_ele_return_value: bool = ...
_none_ele_value: Any = ...
_newest_tab_id: Optional[str] = ...
def __new__(cls,
addr_or_opts: Union[str, int, ChromiumOptions] = None,
session_options: Union[SessionOptions, None, False] = None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param session_options: 使用双模Tab时使用的默认Session配置为None使用ini文件配置为False不从ini读取
"""
...
def __init__(self, addr_or_opts: Union[str, int, ChromiumOptions] = None,
session_options: Union[SessionOptions, None, False] = None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param session_options: 使用双模Tab时使用的默认Session配置为None使用ini文件配置为False不从ini读取
"""
...
@property
def user_data_path(self) -> str:
"""返回用户文件夹路径"""
...
@property
def process_id(self) -> Optional[int]:
"""返回浏览器进程id"""
...
@property
def timeout(self) -> float:
"""返回基础超时设置"""
...
@property
def timeouts(self) -> Timeout:
"""返回所有超时设置"""
...
@property
def load_mode(self) -> Literal['none', 'normal', 'eager']:
"""返回页面加载模式,包括 'none', 'normal', 'eager' 三种"""
...
@property
def download_path(self) -> str:
"""返回默认下载路径"""
...
@property
def set(self) -> BrowserSetter:
"""返回用于设置的对象"""
...
@property
def states(self) -> BrowserStates:
"""返回用于获取状态的对象"""
...
@property
def wait(self) -> BrowserWaiter:
"""返回用于等待的对象"""
...
@property
def tabs_count(self) -> int:
"""返回标签页数量只统计page、webview类型"""
...
@property
def tab_ids(self) -> List[str]:
"""返回所有标签页id组成的列表只统计page、webview类型"""
...
@property
def latest_tab(self) -> Union[MixTab, str]:
"""返回最新的标签页,最新标签页指最后创建或最后被激活的
当Settings.singleton_tab_obj==True时返回Tab对象否则返回tab id"""
...
def cookies(self, all_info: bool = False) -> CookiesList:
"""以list格式返回所有域名的cookies
:param all_info: 是否返回所有内容False则只返回name, value, domain
:return: cookies组成的列表
"""
...
def new_tab(self,
url: str = None,
new_window: bool = False,
background: bool = False,
new_context: bool = False) -> MixTab:
"""新建一个标签页
:param url: 新标签页跳转到的网址为None时新建空标签页
:param new_window: 是否在新窗口打开标签页隐身模式下无效
:param background: 是否不激活新标签页隐身模式和访客模式及new_window为True时无效
:param new_context: 是否创建独立环境隐身模式和访客模式下无效
:return: 新标签页对象
"""
...
def get_tab(self,
id_or_num: Union[str, int] = None,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> Union[MixTab, str]:
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: Tab对象
"""
...
def get_tabs(self,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> List[MixTab, str]:
"""查找符合条件的tab返回它们组成的列表title和url是与关系
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:param as_id: 是否返回标签页id而不是标签页对象
:return: Tab对象列表
"""
...
def close_tabs(self,
tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]],
Tuple[Union[str, ChromiumTab]]],
others: bool = False) -> None:
"""关闭传入的标签页,可传入多个
:param tabs_or_ids: 指定的标签页对象或id可用列表或元组传入多个
:param others: 是否关闭指定标签页之外的
:return: None
"""
...
def _close_tab(self, tab: Union[ChromiumBase, str]):
"""关闭一个标签页
:param tab: 标签页对象或id
:return: None
"""
def activate_tab(self, id_ind_tab: Union[int, str, ChromiumTab]) -> None:
"""使一个标签页显示到前端
:param id_ind_tab: 标签页idstrTab对象或标签页序号int序号从1开始
:return: None
"""
...
def reconnect(self) -> None:
"""断开重连"""
...
def clear_cache(self, cache: bool = True, cookies: bool = True) -> None:
"""清除缓存,可选要清除的项
:param cache: 是否清除cache
:param cookies: 是否清除cookies
:return: None
"""
...
def quit(self, timeout: float = 5, force: bool = False, del_data: bool = False) -> None:
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 是否立刻强制终止进程
:param del_data: 是否删除用户文件夹
:return: None
"""
...
def _new_tab(self,
mix: bool = True,
url: str = None,
new_window: bool = False,
background: bool = False,
new_context: bool = False) -> Union[ChromiumTab, MixTab]:
"""新建一个标签页
:param mix: 是否创建MixTab
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
...
def _get_tab(self,
id_or_num: Union[str, int] = None,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
mix: bool = True,
as_id: bool = False) -> Union[ChromiumTab, str]:
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param mix: 是否返回可切换模式的Tab对象
:param as_id: 是否返回标签页id而不是标签页对象mix=False时无效
:return: Tab对象
"""
...
def _get_tabs(self,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
mix: bool = True,
as_id: bool = False) -> List[ChromiumTab, str]:
"""查找符合条件的tab返回它们组成的列表title和url是与关系
:param title: 要匹配title的文本
:param url: 要匹配url的文本
:param tab_type: tab类型可用列表输入多个
:param mix: 是否返回可切换模式的Tab对象
:param as_id: 是否返回标签页id而不是标签页对象mix=False时无效
:return: Tab对象列表
"""
...
def _run_cdp(self, cmd, **cmd_args) -> dict:
"""执行Chrome DevTools Protocol语句
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
...
def _get_driver(self, tab_id: str, owner=None) -> Driver:
"""新建并返回指定tab id的Driver
:param tab_id: 标签页id
:param owner: 使用该驱动的对象
:return: Driver对象
"""
...
def _onTargetCreated(self, **kwargs): ...
def _onTargetDestroyed(self, **kwargs): ...
def _on_disconnect(self): ...

View File

@ -2,34 +2,31 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from json import dumps, loads, JSONDecodeError
from queue import Queue, Empty
from threading import Thread, Event
from threading import Thread
from time import perf_counter, sleep
from requests import adapters
from requests import Session
from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection,
WebSocketException, WebSocketBadStatusException)
from .._functions.settings import Settings
from ..errors import PageDisconnectedError
from ..errors import PageDisconnectedError, BrowserConnectError
adapters.DEFAULT_RETRIES = 5
class Driver(object):
def __init__(self, tab_id, tab_type, address, owner=None):
"""
:param tab_id: 标签页id
:param tab_type: 标签页类型
:param address: 浏览器连接地址
:param owner: 创建这个驱动的对象
"""
self.id = tab_id
self.address = address
self.type = tab_type
self.owner = owner
# self._debug = True
# self._debug = False
self.alert_flag = False # 标记alert出现跳过一条请求后复原
@ -43,7 +40,7 @@ class Driver(object):
self._handle_event_th.daemon = True
self._handle_immediate_event_th = None
self._stopped = Event()
self.is_running = False
self.event_handlers = {}
self.immediate_event_handlers = {}
@ -54,11 +51,6 @@ class Driver(object):
self.start()
def _send(self, message, timeout=None):
"""发送信息到浏览器,并返回浏览器返回的信息
:param message: 发送给浏览器的数据
:param timeout: 超时时间为None表示无限
:return: 浏览器返回的数据
"""
self._cur_id += 1
ws_id = self._cur_id
message['id'] = ws_id
@ -86,7 +78,7 @@ class Driver(object):
self.method_results.pop(ws_id, None)
return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'}
while not self._stopped.is_set():
while self.is_running:
try:
result = self.method_results[ws_id].get(timeout=.2)
self.method_results.pop(ws_id, None)
@ -106,8 +98,7 @@ class Driver(object):
return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'}
def _recv_loop(self):
"""接收浏览器信息的守护线程方法"""
while not self._stopped.is_set():
while self.is_running:
try:
# self._ws.settimeout(1)
msg_json = self._ws.recv()
@ -144,8 +135,7 @@ class Driver(object):
# print(f'未知信息:{msg}')
def _handle_event_loop(self):
"""当接收到浏览器信息,执行已绑定的方法"""
while not self._stopped.is_set():
while self.is_running:
try:
event = self.event_queue.get(timeout=1)
except Empty:
@ -158,7 +148,7 @@ class Driver(object):
self.event_queue.task_done()
def _handle_immediate_event_loop(self):
while not self._stopped.is_set() and not self.immediate_event_queue.empty():
while not self.immediate_event_queue.empty():
function, kwargs = self.immediate_event_queue.get(timeout=1)
try:
function(**kwargs)
@ -166,11 +156,6 @@ class Driver(object):
pass
def _handle_immediate_event(self, function, kwargs):
"""处理立即执行的动作
:param function: 要运行下方法
:param kwargs: 方法参数
:return: None
"""
self.immediate_event_queue.put((function, kwargs))
if self._handle_immediate_event_th is None or not self._handle_immediate_event_th.is_alive():
self._handle_immediate_event_th = Thread(target=self._handle_immediate_event_loop)
@ -183,7 +168,7 @@ class Driver(object):
:param kwargs: cdp参数
:return: 执行结果
"""
if self._stopped.is_set():
if not self.is_running:
return {'error': 'connection disconnected', 'type': 'connection_error'}
timeout = kwargs.pop('_timeout', Settings.cdp_timeout)
@ -191,13 +176,12 @@ class Driver(object):
if 'result' not in result and 'error' in result:
kwargs['_timeout'] = timeout
return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'),
'method': _method, 'args': kwargs}
'method': _method, 'args': kwargs, 'data': result['error'].get('data')}
else:
return result['result']
def start(self):
"""启动连接"""
self._stopped.clear()
self.is_running = True
try:
self._ws = create_connection(self._websocket_url, enable_multithread=True, suppress_origin=True)
except WebSocketBadStatusException as e:
@ -205,28 +189,37 @@ class Driver(object):
raise RuntimeError('请升级websocket-client库。')
else:
return
except ConnectionRefusedError:
raise BrowserConnectError('浏览器未开启或已关闭。')
self._recv_th.start()
self._handle_event_th.start()
return True
def stop(self):
"""中断连接"""
self._stop()
while self._handle_event_th.is_alive() or self._recv_th.is_alive():
sleep(.1)
sleep(.01)
return True
def _stop(self):
"""中断连接"""
if self._stopped.is_set():
if not self.is_running:
return False
self._stopped.set()
self.is_running = False
if self._ws:
self._ws.close()
self._ws = None
# try:
# while not self.immediate_event_queue.empty():
# function, kwargs = self.immediate_event_queue.get_nowait()
# try:
# function(**kwargs)
# except PageDisconnectedError:
# raise
# pass
# sleep(.1)
#
# while not self.event_queue.empty():
# event = self.event_queue.get_nowait()
# function = self.event_handlers.get(event['method'])
@ -244,12 +237,6 @@ class Driver(object):
self.owner._on_disconnect()
def set_callback(self, event, callback, immediate=False):
"""绑定cdp event和回调方法
:param event: cdp event
:param callback: 绑定到cdp event的回调方法
:param immediate: 是否要立即处理的动作
:return: None
"""
handler = self.immediate_event_handlers if immediate else self.event_handlers
if callback:
handler[event] = callback
@ -271,13 +258,15 @@ class BrowserDriver(Driver):
self._created = True
BrowserDriver.BROWSERS[tab_id] = self
super().__init__(tab_id, tab_type, address, owner)
self._control_session = Session()
self._control_session.trust_env = False
def __repr__(self):
return f'<BrowserDriver {self.id}>'
def get(self, url):
r = self._control_session.get(url, headers={'Connection': 'close'})
s = Session()
s.trust_env = False
s.keep_alive = False
r = s.get(url, headers={'Connection': 'close'})
r.close()
s.close()
return r

View File

@ -2,25 +2,16 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from queue import Queue
from threading import Thread, Event
from threading import Thread
from typing import Union, Callable, Dict, Optional
from requests import Response, Session
from requests import Response
from websocket import WebSocket
from .browser import Browser
class GenericAttr(object):
def __init__(self, name: str, tab: Driver): ...
def __getattr__(self, item: str) -> Callable: ...
def __setattr__(self, key: str, value: Callable) -> None: ...
from .._base.chromium import Chromium
class Driver(object):
@ -35,43 +26,89 @@ class Driver(object):
_recv_th: Thread
_handle_event_th: Thread
_handle_immediate_event_th: Optional[Thread]
_stopped: Event
is_running: bool
event_handlers: dict
immediate_event_handlers: dict
method_results: dict
event_queue: Queue
immediate_event_queue: Queue
def __init__(self, tab_id: str, tab_type: str, address: str, owner=None): ...
def __init__(self, tab_id: str, tab_type: str, address: str, owner=None):
"""
:param tab_id: 标签页id
:param tab_type: 标签页类型
:param address: 浏览器连接地址
:param owner: 创建这个驱动的对象
"""
...
def _send(self, message: dict, timeout: float = None) -> dict: ...
def _send(self, message: dict, timeout: float = None) -> dict:
"""发送信息到浏览器,并返回浏览器返回的信息
:param message: 发送给浏览器的数据
:param timeout: 超时时间为None表示无限
:return: 浏览器返回的数据
"""
...
def _recv_loop(self) -> None: ...
def _recv_loop(self) -> None:
"""接收浏览器信息的守护线程方法"""
...
def _handle_event_loop(self) -> None: ...
def _handle_event_loop(self) -> None:
"""当接收到浏览器信息,执行已绑定的方法"""
...
def _handle_immediate_event_loop(self): ...
def _handle_immediate_event(self, function: Callable, kwargs: dict): ...
def _handle_immediate_event(self, function: Callable, kwargs: dict):
"""处理立即执行的动作
:param function: 要运行下方法
:param kwargs: 方法参数
:return: None
"""
...
def run(self, _method: str, **kwargs) -> dict: ...
def run(self, _method: str, **kwargs) -> dict:
"""执行cdp方法
:param _method: cdp方法名
:param kwargs: cdp参数
:return: 执行结果
"""
...
def start(self) -> bool: ...
def start(self) -> bool:
"""启动连接"""
...
def stop(self) -> bool: ...
def stop(self) -> bool:
"""中断连接"""
...
def _stop(self) -> None: ...
def _stop(self) -> None:
"""中断连接"""
...
def set_callback(self, event: str, callback: Union[Callable, None], immediate: bool = False) -> None: ...
def set_callback(self, event: str, callback: Union[Callable, None], immediate: bool = False) -> None:
"""绑定cdp event和回调方法
:param event: cdp event
:param callback: 绑定到cdp event的回调方法
:param immediate: 是否要立即处理的动作
:return: None
"""
...
class BrowserDriver(Driver):
BROWSERS: Dict[str, Driver] = ...
owner: Browser = ...
_control_session: Session = ...
owner: Chromium = ...
def __new__(cls, tab_id: str, tab_type: str, address: str, owner: Browser): ...
def __new__(cls, tab_id: str, tab_type: str, address: str, owner: Chromium): ...
def __init__(self, tab_id: str, tab_type: str, address: str, owner: Browser): ...
def __init__(self, tab_id: str, tab_type: str, address: str, owner: Chromium): ...
def get(self, url) -> Response: ...
def get(self, url) -> Response:
"""
:param url: 要访问的链接
:return: Response对象
"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from re import search
@ -13,15 +12,12 @@ from .options_manage import OptionsManager
class ChromiumOptions(object):
def __init__(self, read_file=True, ini_path=None):
"""
:param read_file: 是否从默认ini文件中读取配置信息
:param ini_path: ini文件路径为None则读取默认ini文件
"""
self._user_data_path = None
self._user = 'Default'
self._prefs_to_del = []
self.clear_file_flags = False
self._headless = None
self._is_headless = False
self._ua_set = False
if read_file is False:
ini_path = False
@ -33,10 +29,10 @@ class ChromiumOptions(object):
self.ini_path = str(ini_path)
else:
self.ini_path = str(Path(__file__).parent / 'configs.ini')
om = OptionsManager(ini_path)
om = OptionsManager(ini_path)
options = om.chromium_options
self._download_path = om.paths.get('download_path', None) or None
self._download_path = om.paths.get('download_path', '.') or '.'
self._tmp_path = om.paths.get('tmp_path', None) or None
self._arguments = options.get('arguments', [])
self._browser_path = options.get('browser_path', '')
@ -47,6 +43,11 @@ class ChromiumOptions(object):
self._load_mode = options.get('load_mode', 'normal')
self._system_user_path = options.get('system_user_path', False)
self._existing_only = options.get('existing_only', False)
self._new_env = options.get('new_env', False)
for i in self._arguments:
if i.startswith('--headless'):
self._is_headless = True
break
self._proxy = om.proxies.get('http', None) or om.proxies.get('https', None)
@ -74,102 +75,86 @@ class ChromiumOptions(object):
return
def __repr__(self):
return f'<ChromiumOptions at {id(self)}>'
@property
def download_path(self):
"""默认下载路径文件路径"""
return self._download_path
@property
def browser_path(self):
"""浏览器启动文件路径"""
return self._browser_path
@property
def user_data_path(self):
"""返回用户数据文件夹路径"""
return self._user_data_path
@property
def tmp_path(self):
"""返回临时文件夹路径"""
return self._tmp_path
@property
def user(self):
"""返回用户配置文件夹名称"""
return self._user
@property
def load_mode(self):
"""返回页面加载策略,'normal', 'eager', 'none'"""
return self._load_mode
@property
def timeouts(self):
"""返回timeouts设置"""
return self._timeouts
@property
def proxy(self):
"""返回代理设置"""
return self._proxy
@property
def address(self):
"""返回浏览器地址ip:port"""
return self._address
@property
def arguments(self):
"""返回浏览器命令行设置列表"""
return self._arguments
@property
def extensions(self):
"""以list形式返回要加载的插件路径"""
return self._extensions
@property
def preferences(self):
"""返回用户首选项配置"""
return self._prefs
@property
def flags(self):
"""返回实验项配置"""
return self._flags
@property
def system_user_path(self):
"""返回是否使用系统安装的浏览器所使用的用户数据文件夹"""
return self._system_user_path
@property
def is_existing_only(self):
"""返回是否只接管现有浏览器方式"""
return self._existing_only
@property
def is_auto_port(self):
"""返回是否使用自动端口和用户文件如指定范围则返回范围tuple"""
return self._auto_port
@property
def retry_times(self):
"""返回连接失败时的重试次数"""
return self._retry_times
@property
def retry_interval(self):
"""返回连接失败时的重试间隔(秒)"""
return self._retry_interval
@property
def is_headless(self):
return self._is_headless
def set_retry(self, times=None, interval=None):
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
if times is not None:
self._retry_times = times
if interval is not None:
@ -177,41 +162,36 @@ class ChromiumOptions(object):
return self
def set_argument(self, arg, value=None):
"""设置浏览器配置的argument属性
:param arg: 属性名
:param value: 属性值有值的属性传入值没有的传入None如传入False删除该项
:return: 当前对象
"""
self.remove_argument(arg)
if value is not False:
if arg == '--headless' and value is None:
self._arguments.append('--headless=new')
if arg == '--headless':
if value == 'false':
self._is_headless = False
else:
if value is None:
value = 'new'
self._arguments.append(f'--headless={value}')
self._is_headless = True
else:
arg_str = arg if value is None else f'{arg}={value}'
self._arguments.append(arg_str)
elif arg == '--headless':
self._is_headless = False
return self
def remove_argument(self, value):
"""移除一个argument项
:param value: 设置项名有值的设置项传入设置名称即可
:return: 当前对象
"""
del_list = []
elements_to_delete = [arg for arg in self._arguments if arg == value or arg.startswith(f'{value}=')]
if not elements_to_delete:
return self
for argument in self._arguments:
if argument == value or argument.startswith(f'{value}='):
del_list.append(argument)
for del_arg in del_list:
self._arguments.remove(del_arg)
if len(elements_to_delete) == 1:
self._arguments.remove(elements_to_delete[0])
else:
self._arguments = [arg for arg in self._arguments if arg not in elements_to_delete]
return self
def add_extension(self, path):
"""添加插件
:param path: 插件路径可指向文件夹
:return: 当前对象
"""
path = Path(path)
if not path.exists():
raise OSError('插件路径不存在。')
@ -219,43 +199,22 @@ class ChromiumOptions(object):
return self
def remove_extensions(self):
"""移除所有插件
:return: 当前对象
"""
self._extensions = []
return self
def set_pref(self, arg, value):
"""设置Preferences文件中的用户设置项
:param arg: 设置项名称
:param value: 设置项值
:return: 当前对象
"""
self._prefs[arg] = value
return self
def remove_pref(self, arg):
"""删除用户首选项设置,不能删除已设置到文件中的项
:param arg: 设置项名称
:return: 当前对象
"""
self._prefs.pop(arg, None)
return self
def remove_pref_from_file(self, arg):
"""删除用户配置文件中已设置的项
:param arg: 设置项名称
:return: 当前对象
"""
self._prefs_to_del.append(arg)
return self
def set_flag(self, flag, value=None):
"""设置实验项
:param flag: 设置项名称
:param value: 设置项的值为False则删除该项
:return: 当前对象
"""
if value is False:
self._flags.pop(flag, None)
else:
@ -263,33 +222,22 @@ class ChromiumOptions(object):
return self
def clear_flags_in_file(self):
"""删除浏览器配置文件中已设置的实验项"""
self.clear_file_flags = True
return self
def clear_flags(self):
"""清空本对象已设置的flag参数"""
self._flags = {}
return self
def clear_arguments(self):
"""清空本对象已设置的argument参数"""
self._arguments = []
return self
def clear_prefs(self):
"""清空本对象已设置的pref参数"""
self._prefs = {}
return self
def set_timeouts(self, base=None, page_load=None, script=None, implicit=None):
"""设置超时时间,单位为秒
:param base: 默认超时时间
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
base = base if base is not None else implicit
def set_timeouts(self, base=None, page_load=None, script=None):
if base is not None:
self._timeouts['base'] = base
if page_load is not None:
@ -300,74 +248,43 @@ class ChromiumOptions(object):
return self
def set_user(self, user='Default'):
"""设置使用哪个用户配置文件夹
:param user: 用户文件夹名称
:return: 当前对象
"""
self.set_argument('--profile-directory', user)
self._user = user
return self
def headless(self, on_off=True):
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:return: 当前对象
"""
on_off = 'new' if on_off else 'false'
on_off = 'new' if on_off else on_off
return self.set_argument('--headless', on_off)
def no_imgs(self, on_off=True):
"""设置是否加载图片
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--blink-settings=imagesEnabled=false', on_off)
def no_js(self, on_off=True):
"""设置是否禁用js
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--disable-javascript', on_off)
def mute(self, on_off=True):
"""设置是否静音
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--mute-audio', on_off)
def incognito(self, on_off=True):
"""设置是否使用无痕模式启动
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--incognito', on_off)
self.set_argument('--incognito', on_off)
return self.set_argument('--inprivate', on_off) # edge
def new_env(self, on_off=True):
self._new_env = on_off
return self
def ignore_certificate_errors(self, on_off=True):
"""设置是否忽略证书错误
:param on_off: 开或关
:return: 当前对象
"""
on_off = None if on_off else False
return self.set_argument('--ignore-certificate-errors', on_off)
def set_user_agent(self, user_agent):
"""设置user agent
:param user_agent: user agent文本
:return: 当前对象
"""
return self.set_argument('--user-agent', user_agent)
def set_proxy(self, proxy):
"""设置代理
:param proxy: 代理url和端口
:return: 当前对象
"""
if search(r'.*?:.*?@.*?\..*', proxy):
print('你似乎在设置使用账号密码的代理,暂时不支持这种代理,可自行用插件实现需求。')
if proxy.lower().startswith('socks'):
@ -376,13 +293,6 @@ class ChromiumOptions(object):
return self.set_argument('--proxy-server', proxy)
def set_load_mode(self, value):
"""设置load_mode可接收 'normal', 'eager', 'none'
normal默认情况下使用, 等待所有资源下载完成
eagerDOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中
none完全不阻塞
:param value: 可接收 'normal', 'eager', 'none'
:return: 当前对象
"""
if value not in ('normal', 'eager', 'none'):
raise ValueError("只能选择 'normal', 'eager', 'none'")
self._load_mode = value.lower()
@ -420,52 +330,28 @@ class ChromiumOptions(object):
return self
def set_local_port(self, port):
"""设置本地启动端口
:param port: 端口号
:return: 当前对象
"""
self._address = f'127.0.0.1:{port}'
self._auto_port = False
return self
def set_address(self, address):
"""设置浏览器地址,格式'ip:port'
:param address: 浏览器地址
:return: 当前对象
"""
address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
address = address.replace('localhost', '127.0.0.1').lstrip('htps:/')
self._address = address
return self
def set_browser_path(self, path):
"""设置浏览器可执行文件路径
:param path: 浏览器路径
:return: 当前对象
"""
self._browser_path = str(path)
return self
def set_download_path(self, path):
"""设置下载文件保存路径
:param path: 下载路径
:return: 当前对象
"""
self._download_path = str(path)
self._download_path = '.' if path is None else str(path)
return self
def set_tmp_path(self, path):
"""设置临时文件文件保存路径
:param path: 下载路径
:return: 当前对象
"""
self._tmp_path = str(path)
return self
def set_user_data_path(self, path):
"""设置用户文件夹路径
:param path: 用户文件夹路径
:return: 当前对象
"""
u = str(path)
self.set_argument('--user-data-dir', u)
self._user_data_path = u
@ -473,49 +359,25 @@ class ChromiumOptions(object):
return self
def set_cache_path(self, path):
"""设置缓存路径
:param path: 缓存路径
:return: 当前对象
"""
self.set_argument('--disk-cache-dir', str(path))
return self
def use_system_user_path(self, on_off=True):
"""设置是否使用系统安装的浏览器默认用户文件夹
:param on_off: 开或关
:return: 当前对象
"""
self._system_user_path = on_off
return self
def auto_port(self, on_off=True, tmp_path=None, scope=None):
"""自动获取可用端口
:param on_off: 是否开启自动获取端口号
:param tmp_path: 临时文件保存路径为None时保存到系统临时文件夹on_off为False时此参数无效
:param scope: 指定端口范围不含最后的数字为None则使用[9600-19600)
:return: 当前对象
"""
def auto_port(self, on_off=True, scope=None):
if on_off:
self._auto_port = scope if scope else True
if tmp_path:
self._tmp_path = str(tmp_path)
self._auto_port = scope if scope else (9600, 59600)
else:
self._auto_port = False
return self
def existing_only(self, on_off=True):
"""设置只接管已有浏览器,不自动启动新的
:param on_off: 是否开启自动获取端口号
:return: 当前对象
"""
self._existing_only = on_off
return self
def save(self, path=None):
"""保存设置到文件
:param path: ini文件的路径 None 保存到当前读取的配置文件传入 'default' 保存到默认ini文件
:return: 保存文件的绝对路径
"""
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
@ -537,7 +399,7 @@ class ChromiumOptions(object):
# 设置chromium_options
attrs = ('address', 'browser_path', 'arguments', 'extensions', 'user', 'load_mode',
'auto_port', 'system_user_path', 'existing_only', 'flags')
'auto_port', 'system_user_path', 'existing_only', 'flags', 'new_env')
for i in attrs:
om.set_item('chromium_options', i, self.__getattribute__(f'_{i}'))
# 设置代理
@ -562,8 +424,4 @@ class ChromiumOptions(object):
return path
def save_to_default(self):
"""保存当前配置到默认ini文件"""
return self.save('default')
def __repr__(self):
return f'<ChromiumOptions at {id(self)}>'

View File

@ -2,171 +2,401 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union, Any, Literal, Optional, Tuple
class ChromiumOptions(object):
def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None):
self.ini_path: str = ...
self._driver_path: str = ...
self._user_data_path: str = ...
self._download_path: str = ...
self._tmp_path: str = ...
self._arguments: list = ...
self._browser_path: str = ...
self._user: str = ...
self._load_mode: str = ...
self._timeouts: dict = ...
self._proxy: str = ...
self._address: str = ...
self._extensions: list = ...
self._prefs: dict = ...
self._flags: dict = ...
self._prefs_to_del: list = ...
self.clear_file_flags: bool = ...
self._auto_port: bool = ...
self._system_user_path: bool = ...
self._existing_only: bool = ...
self._headless: bool = ...
self._retry_times: int = ...
self._retry_interval: float = ...
ini_path: Optional[str] = ...
_driver_path: str = ...
_user_data_path: Optional[str] = ...
_download_path: str = ...
_tmp_path: str = ...
_arguments: list = ...
_browser_path: str = ...
_user: str = ...
_load_mode: str = ...
_timeouts: dict = ...
_proxy: str = ...
_address: str = ...
_extensions: list = ...
_prefs: dict = ...
_flags: dict = ...
_prefs_to_del: list = ...
_new_env: bool = ...
clear_file_flags: bool = ...
_auto_port: Union[Tuple[int, int], False] = ...
_system_user_path: bool = ...
_existing_only: bool = ...
_retry_times: int = ...
_retry_interval: float = ...
_is_headless: bool = ...
_ua_set: bool = ...
def __init__(self,
read_file: [bool, None] = True,
ini_path: Union[str, Path] = None):
"""
:param read_file: 是否从默认ini文件中读取配置信息
:param ini_path: ini文件路径为None则读取默认ini文件
"""
...
@property
def download_path(self) -> str: ...
def download_path(self) -> str:
"""默认下载路径文件路径"""
...
@property
def browser_path(self) -> str: ...
def browser_path(self) -> str:
"""浏览器启动文件路径"""
...
@property
def user_data_path(self) -> str: ...
def user_data_path(self) -> str:
"""返回用户数据文件夹路径"""
...
@property
def tmp_path(self) -> Optional[str]: ...
def tmp_path(self) -> Optional[str]:
"""返回临时文件夹路径"""
...
@property
def user(self) -> str: ...
def user(self) -> str:
"""返回用户配置文件夹名称"""
...
@property
def load_mode(self) -> str: ...
def load_mode(self) -> str:
"""返回页面加载策略,'normal', 'eager', 'none'"""
...
@property
def timeouts(self) -> dict: ...
def timeouts(self) -> dict:
"""返回timeouts设置"""
...
@property
def proxy(self) -> str: ...
def proxy(self) -> str:
"""返回代理设置"""
...
@property
def address(self) -> str: ...
def address(self) -> str:
"""返回浏览器地址ip:port"""
...
@property
def arguments(self) -> list: ...
def arguments(self) -> list:
"""返回浏览器命令行设置列表"""
...
@property
def extensions(self) -> list: ...
def extensions(self) -> list:
"""以list形式返回要加载的插件路径"""
...
@property
def preferences(self) -> dict: ...
def preferences(self) -> dict:
"""返回用户首选项配置"""
...
@property
def flags(self) -> dict: ...
def flags(self) -> dict:
"""返回实验项配置"""
...
@property
def system_user_path(self) -> bool: ...
def system_user_path(self) -> bool:
"""返回是否使用系统安装的浏览器所使用的用户数据文件夹"""
...
@property
def is_existing_only(self) -> bool: ...
def is_existing_only(self) -> bool:
"""返回是否只接管现有浏览器方式"""
...
@property
def is_auto_port(self) -> Union[bool, Tuple[int, int]]: ...
def is_auto_port(self) -> Union[bool, Tuple[int, int]]:
"""返回是否使用自动端口和用户文件如指定范围则返回范围tuple"""
...
@property
def retry_times(self) -> int: ...
def retry_times(self) -> int:
"""返回连接失败时的重试次数"""
...
@property
def retry_interval(self) -> float: ...
def retry_interval(self) -> float:
"""返回连接失败时的重试间隔(秒)"""
...
def set_retry(self, times: int = None, interval: float = None) -> ChromiumOptions: ...
@property
def is_headless(self) -> bool:
"""返回是否无头模式"""
...
def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ...
def set_retry(self, times: int = None, interval: float = None) -> ChromiumOptions:
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
...
def remove_argument(self, value: str) -> ChromiumOptions: ...
def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions:
"""设置浏览器配置的argument属性
:param arg: 属性名
:param value: 属性值有值的属性传入值没有的传入None如传入False删除该项
:return: 当前对象
"""
...
def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ...
def remove_argument(self, value: str) -> ChromiumOptions:
"""移除一个argument项
:param value: 设置项名有值的设置项传入设置名称即可
:return: 当前对象
"""
...
def remove_extensions(self) -> ChromiumOptions: ...
def add_extension(self, path: Union[str, Path]) -> ChromiumOptions:
"""添加插件
:param path: 插件路径可指向文件夹
:return: 当前对象
"""
...
def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ...
def remove_extensions(self) -> ChromiumOptions:
"""移除所有插件
:return: 当前对象
"""
...
def remove_pref(self, arg: str) -> ChromiumOptions: ...
def set_pref(self, arg: str, value: Any) -> ChromiumOptions:
"""设置Preferences文件中的用户设置项
:param arg: 设置项名称
:param value: 设置项值
:return: 当前对象
"""
...
def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ...
def remove_pref(self, arg: str) -> ChromiumOptions:
"""删除用户首选项设置,不能删除已设置到文件中的项
:param arg: 设置项名称
:return: 当前对象
"""
...
def set_flag(self, flag: str, value: Union[int, str, bool] = None) -> ChromiumOptions: ...
def remove_pref_from_file(self, arg: str) -> ChromiumOptions:
"""删除用户配置文件中已设置的项
:param arg: 设置项名称
:return: 当前对象
"""
...
def clear_flags_in_file(self) -> ChromiumOptions: ...
def set_flag(self, flag: str, value: Union[int, str, bool] = None) -> ChromiumOptions:
"""设置实验项
:param flag: 设置项名称
:param value: 设置项的值为False则删除该项
:return: 当前对象
"""
...
def clear_flags(self) -> ChromiumOptions: ...
def clear_flags_in_file(self) -> ChromiumOptions:
"""删除浏览器配置文件中已设置的实验项"""
...
def clear_arguments(self) -> ChromiumOptions: ...
def clear_flags(self) -> ChromiumOptions:
"""清空本对象已设置的flag参数"""
...
def clear_prefs(self) -> ChromiumOptions: ...
def clear_arguments(self) -> ChromiumOptions:
"""清空本对象已设置的argument参数"""
...
def clear_prefs(self) -> ChromiumOptions:
"""清空本对象已设置的pref参数"""
...
def set_timeouts(self,
base: float = None,
page_load: float = None,
script: float = None) -> ChromiumOptions: ...
script: float = None) -> ChromiumOptions:
"""设置超时时间,单位为秒
:param base: 默认超时时间
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
...
def set_user(self, user: str = 'Default') -> ChromiumOptions: ...
def set_user(self, user: str = 'Default') -> ChromiumOptions:
"""设置使用哪个用户配置文件夹
:param user: 用户文件夹名称
:return: 当前对象
"""
...
def headless(self, on_off: bool = True) -> ChromiumOptions: ...
def headless(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否隐藏浏览器界面
:param on_off: 开或关
:return: 当前对象
"""
...
def no_imgs(self, on_off: bool = True) -> ChromiumOptions: ...
def no_imgs(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否加载图片
:param on_off: 开或关
:return: 当前对象
"""
...
def no_js(self, on_off: bool = True) -> ChromiumOptions: ...
def no_js(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否禁用js
:param on_off: 开或关
:return: 当前对象
"""
...
def mute(self, on_off: bool = True) -> ChromiumOptions: ...
def mute(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否静音
:param on_off: 开或关
:return: 当前对象
"""
...
def incognito(self, on_off: bool = True) -> ChromiumOptions: ...
def incognito(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否使用无痕模式启动
:param on_off: 开或关
:return: 当前对象
"""
...
def set_user_agent(self, user_agent: str) -> ChromiumOptions: ...
def new_env(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否使用全新浏览器环境
:param on_off: 开或关
:return: 当前对象
"""
...
def set_proxy(self, proxy: str) -> ChromiumOptions: ...
def ignore_certificate_errors(self, on_off=True) -> ChromiumOptions:
"""设置是否忽略证书错误
:param on_off: 开或关
:return: 当前对象
"""
...
def ignore_certificate_errors(self, on_off=True) -> ChromiumOptions: ...
def set_user_agent(self, user_agent: str) -> ChromiumOptions:
"""设置user agent
:param user_agent: user agent文本
:return: 当前对象
"""
...
def set_load_mode(self, value: Literal['normal', 'eager', 'none']) -> ChromiumOptions: ...
def set_proxy(self, proxy: str) -> ChromiumOptions:
"""设置代理
:param proxy: 代理url和端口
:return: 当前对象
"""
...
def set_browser_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_load_mode(self, value: Literal['normal', 'eager', 'none']) -> ChromiumOptions:
"""设置load_mode可接收 'normal', 'eager', 'none'
normal默认情况下使用, 等待所有资源下载完成
eagerDOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中
none完全不阻塞
:param value: 可接收 'normal', 'eager', 'none'
:return: 当前对象
"""
...
def set_local_port(self, port: Union[str, int]) -> ChromiumOptions: ...
def set_local_port(self, port: Union[str, int]) -> ChromiumOptions:
"""设置本地启动端口
:param port: 端口号
:return: 当前对象
"""
...
def set_address(self, address: str) -> ChromiumOptions: ...
def set_address(self, address: str) -> ChromiumOptions:
"""设置浏览器地址,格式'ip:port'
:param address: 浏览器地址
:return: 当前对象
"""
...
def set_download_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_browser_path(self, path: Union[str, Path]) -> ChromiumOptions:
"""设置浏览器可执行文件路径
:param path: 浏览器路径
:return: 当前对象
"""
...
def set_tmp_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_download_path(self, path: Union[str, Path]) -> ChromiumOptions:
"""设置下载文件保存路径
:param path: 下载路径
:return: 当前对象
"""
...
def set_user_data_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_tmp_path(self, path: Union[str, Path]) -> ChromiumOptions:
"""设置临时文件文件保存路径
:param path: 下载路径
:return: 当前对象
"""
...
def set_cache_path(self, path: Union[str, Path]) -> ChromiumOptions: ...
def set_user_data_path(self, path: Union[str, Path]) -> ChromiumOptions:
"""设置用户文件夹路径
:param path: 用户文件夹路径
:return: 当前对象
"""
...
def set_cache_path(self, path: Union[str, Path]) -> ChromiumOptions:
"""设置缓存路径
:param path: 缓存路径
:return: 当前对象
"""
...
def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None,
address: str = None, download_path: Union[str, Path] = None, user_data_path: Union[str, Path] = None,
cache_path: Union[str, Path] = None) -> ChromiumOptions: ...
def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: ...
def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions:
"""设置是否使用系统安装的浏览器默认用户文件夹
:param on_off: 开或关
:return: 当前对象
"""
...
def auto_port(self,
on_off: bool = True,
tmp_path: Union[str, Path] = None,
scope: Tuple[int, int] = None) -> ChromiumOptions: ...
scope: Tuple[int, int] = None) -> ChromiumOptions:
"""自动获取可用端口
:param on_off: 是否开启自动获取端口号
:param scope: 指定端口范围不含最后的数字为None则使用[9600-59600)
:return: 当前对象
"""
...
def existing_only(self, on_off: bool = True) -> ChromiumOptions: ...
def existing_only(self, on_off: bool = True) -> ChromiumOptions:
"""设置只接管已有浏览器,不自动启动新的
:param on_off: 是否开启自动获取端口号
:return: 当前对象
"""
...
def save(self, path: Union[str, Path] = None) -> str: ...
def save(self, path: Union[str, Path] = None) -> str:
"""保存设置到文件
:param path: ini文件的路径 None 保存到当前读取的配置文件传入 'default' 保存到默认ini文件
:return: 保存文件的绝对路径
"""
...
def save_to_default(self) -> str: ...
def save_to_default(self) -> str:
"""保存当前配置到默认ini文件"""
...

View File

@ -14,6 +14,7 @@ user = Default
auto_port = False
system_user_path = False
existing_only = False
new_env = False
[session_options]
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': 'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7'}

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from configparser import RawConfigParser, NoSectionError, NoOptionError
from pathlib import Path
@ -14,9 +13,6 @@ class OptionsManager(object):
"""管理配置文件内容的类"""
def __init__(self, path=None):
"""初始化,读取配置文件,如没有设置临时文件夹,则设置并新建
:param path: ini文件的路径为None则找项目文件夹下的找不到则读取模块文件夹下的
"""
if path is False:
self.ini_path = None
else:
@ -64,6 +60,7 @@ class OptionsManager(object):
self.set_item('chromium_options', 'auto_port', 'False')
self.set_item('chromium_options', 'system_user_path', 'False')
self.set_item('chromium_options', 'existing_only', 'False')
self.set_item('chromium_options', 'new_env', 'False')
self.set_item('session_options', 'headers', "{'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X "
"10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10."
"1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml"
@ -78,18 +75,9 @@ class OptionsManager(object):
self.set_item('others', 'retry_interval', '2')
def __getattr__(self, item):
"""以dict形似返回获取大项信息
:param item: 项名
:return: None
"""
return self.get_option(item)
def get_value(self, section, item):
"""获取配置的值
:param section: 段名
:param item: 项名
:return: 项值
"""
try:
return eval(self._conf.get(section, item))
except (SyntaxError, NameError):
@ -98,10 +86,6 @@ class OptionsManager(object):
return None
def get_option(self, section):
"""把section内容以字典方式返回
:param section: 段名
:return: 段内容生成的字典
"""
items = self._conf.items(section)
option = dict()
@ -114,30 +98,15 @@ class OptionsManager(object):
return option
def set_item(self, section, item, value):
"""设置配置值
:param section: 段名
:param item: 项名
:param value: 项值
:return: None
"""
self._conf.set(section, item, str(value))
self.__setattr__(f'_{section}', None)
return self
def remove_item(self, section, item):
"""删除配置值
:param section: 段名
:param item: 项名
:return: None
"""
self._conf.remove_option(section, item)
return self
def save(self, path=None):
"""保存配置文件
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 保存路径
"""
default_path = (Path(__file__).parent / 'configs.ini').absolute()
if path == 'default':
path = default_path
@ -162,11 +131,9 @@ class OptionsManager(object):
return path
def save_to_default(self):
"""保存当前配置到默认ini文件"""
return self.save('default')
def show(self):
"""打印所有设置信息"""
for i in self._conf.sections():
print(f'[{i}]')
pprint(self.get_option(i))

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from configparser import RawConfigParser
from pathlib import Path
@ -15,20 +14,62 @@ class OptionsManager(object):
file_exists: bool = ...
_conf: RawConfigParser = ...
def __init__(self, path: Union[Path, str] = None): ...
def __init__(self, path: Union[Path, str] = None):
"""初始化,读取配置文件,如没有设置临时文件夹,则设置并新建
:param path: ini文件的路径为None则找项目文件夹下的找不到则读取模块文件夹下的
"""
...
def __getattr__(self, item) -> dict: ...
def __getattr__(self, item) -> dict:
"""以dict形似返回获取大项信息
:param item: 项名
:return: None
"""
...
def get_value(self, section: str, item: str) -> Any: ...
def get_value(self, section: str, item: str) -> Any:
"""获取配置的值
:param section: 段名
:param item: 项名
:return: 项值
"""
...
def get_option(self, section: str) -> dict: ...
def get_option(self, section: str) -> dict:
"""把section内容以字典方式返回
:param section: 段名
:return: 段内容生成的字典
"""
...
def set_item(self, section: str, item: str, value: Any) -> None: ...
def set_item(self, section: str, item: str, value: Any) -> None:
"""设置配置值
:param section: 段名
:param item: 项名
:param value: 项值
:return: None
"""
...
def remove_item(self, section: str, item: str) -> None: ...
def remove_item(self, section: str, item: str) -> None:
"""删除配置值
:param section: 段名
:param item: 项名
:return: None
"""
...
def save(self, path: str = None) -> str: ...
def save(self, path: str = None) -> str:
"""保存配置文件
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 保存路径
"""
...
def save_to_default(self) -> str: ...
def save_to_default(self) -> str:
"""保存当前配置到默认ini文件"""
...
def show(self) -> None: ...
def show(self) -> None:
"""打印所有设置信息"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from copy import copy
from pathlib import Path
@ -12,19 +11,15 @@ from requests import Session
from requests.structures import CaseInsensitiveDict
from .options_manage import OptionsManager
from .._functions.web import cookies_to_tuple, set_session_cookies, format_headers
from .._functions.cookies import cookies_to_tuple, set_session_cookies
from .._functions.web import format_headers
class SessionOptions(object):
"""requests的Session对象配置类"""
def __init__(self, read_file=True, ini_path=None):
"""
:param read_file: 是否从文件读取配置
:param ini_path: ini文件路径
"""
self.ini_path = None
self._download_path = None
self._download_path = '.'
self._timeout = 10
self._del_set = set() # 记录要从ini文件删除的参数
@ -83,71 +78,51 @@ class SessionOptions(object):
self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None))
self._timeout = om.timeouts.get('base', 10)
self._download_path = om.paths.get('download_path', None) or None
self._download_path = om.paths.get('download_path', '.') or '.'
others = om.others
self._retry_times = others.get('retry_times', 3)
self._retry_interval = others.get('retry_interval', 2)
def __repr__(self):
return f'<SessionOptions at {id(self)}>'
# ===========须独立处理的项开始============
@property
def download_path(self):
"""返回默认下载路径属性信息"""
return self._download_path
def set_download_path(self, path):
"""设置默认下载路径
:param path: 下载路径
:return: 返回当前对象
"""
self._download_path = str(path)
self._download_path = '.' if path is None else str(path)
return self
@property
def timeout(self):
"""返回timeout属性信息"""
return self._timeout
def set_timeout(self, second):
"""设置超时信息
:param second: 秒数
:return: 返回当前对象
"""
self._timeout = second
return self
@property
def proxies(self):
"""返回proxies设置信息"""
if self._proxies is None:
self._proxies = {}
return self._proxies
def set_proxies(self, http=None, https=None):
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: 返回当前对象
"""
self._sets('proxies', {'http': http, 'https': https})
return self
@property
def retry_times(self):
"""返回连接失败时的重试次数"""
return self._retry_times
@property
def retry_interval(self):
"""返回连接失败时的重试间隔(秒)"""
return self._retry_interval
def set_retry(self, times=None, interval=None):
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
if times is not None:
self._retry_times = times
if interval is not None:
@ -158,16 +133,11 @@ class SessionOptions(object):
@property
def headers(self):
"""返回headers设置信息"""
if self._headers is None:
self._headers = {}
return self._headers
def set_headers(self, headers):
"""设置headers参数
:param headers: 参数值传入None可在ini文件标记删除
:return: 返回当前对象
"""
if headers is None:
self._headers = None
self._del_set.add('headers')
@ -177,11 +147,6 @@ class SessionOptions(object):
return self
def set_a_header(self, name, value):
"""设置headers中一个项
:param name: 设置名称
:param value: 设置值
:return: 返回当前对象
"""
if self._headers is None:
self._headers = {}
@ -189,10 +154,6 @@ class SessionOptions(object):
return self
def remove_a_header(self, name):
"""从headers中删除一个设置
:param name: 要删除的设置
:return: 返回当前对象
"""
if self._headers is None:
return self
@ -201,156 +162,99 @@ class SessionOptions(object):
return self
def clear_headers(self):
"""清空已设置的header参数"""
self._headers = None
self._del_set.add('headers')
@property
def cookies(self):
"""以list形式返回cookies"""
if self._cookies is None:
self._cookies = []
return self._cookies
def set_cookies(self, cookies):
"""设置一个或多个cookies信息
:param cookies: cookies可为Cookie, CookieJar, list, tuple, str, dict传入None可在ini文件标记删除
:return: 返回当前对象
"""
cookies = cookies if cookies is None else list(cookies_to_tuple(cookies))
self._sets('cookies', cookies)
return self
@property
def auth(self):
"""返回认证设置信息"""
return self._auth
def set_auth(self, auth):
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: 返回当前对象
"""
self._sets('auth', auth)
return self
@property
def hooks(self):
"""返回回调方法"""
if self._hooks is None:
self._hooks = {}
return self._hooks
def set_hooks(self, hooks):
"""设置回调方法
:param hooks: 回调方法
:return: 返回当前对象
"""
self._hooks = hooks
return self
@property
def params(self):
"""返回连接参数设置信息"""
if self._params is None:
self._params = {}
return self._params
def set_params(self, params):
"""设置查询参数字典
:param params: 查询参数字典
:return: 返回当前对象
"""
self._sets('params', params)
return self
@property
def verify(self):
"""返回是否验证SSL证书设置"""
return self._verify
def set_verify(self, on_off):
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: 返回当前对象
"""
self._sets('verify', on_off)
return self
@property
def cert(self):
"""返回SSL证书设置信息"""
return self._cert
def set_cert(self, cert):
"""SSL客户端证书文件的路径(.pem格式),或(cert, key)元组
:param cert: 证书路径或元组
:return: 返回当前对象
"""
self._sets('cert', cert)
return self
@property
def adapters(self):
"""返回适配器设置信息"""
if self._adapters is None:
self._adapters = []
return self._adapters
def add_adapter(self, url, adapter):
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: 返回当前对象
"""
self._adapters.append((url, adapter))
return self
@property
def stream(self):
"""返回是否使用流式响应内容设置信息"""
return self._stream
def set_stream(self, on_off):
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: 返回当前对象
"""
self._sets('stream', on_off)
return self
@property
def trust_env(self):
"""返回是否信任环境设置信息"""
return self._trust_env
def set_trust_env(self, on_off):
"""设置是否信任环境
:param on_off: 是否信任环境
:return: 返回当前对象
"""
self._sets('trust_env', on_off)
return self
@property
def max_redirects(self):
"""返回最大重定向次数"""
return self._max_redirects
def set_max_redirects(self, times):
"""设置最大重定向次数
:param times: 最大重定向次数
:return: 返回当前对象
"""
self._sets('max_redirects', times)
return self
def _sets(self, arg, val):
"""给属性赋值或标记删除
:param arg: 属性名称
:param val: 参数值
:return: None
"""
if val is None:
self.__setattr__(f'_{arg}', None)
self._del_set.add(arg)
@ -360,10 +264,6 @@ class SessionOptions(object):
self._del_set.remove(arg)
def save(self, path=None):
"""保存设置到文件
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 保存文件的绝对路径
"""
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
@ -411,15 +311,12 @@ class SessionOptions(object):
return path
def save_to_default(self):
"""保存当前配置到默认ini文件"""
return self.save('default')
def as_dict(self):
"""以字典形式返回本对象"""
return session_options_to_dict(self)
def make_session(self):
"""根据内在的配置生成Session对象ua从对象中分离"""
s = Session()
h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict()
@ -437,11 +334,6 @@ class SessionOptions(object):
return s, h
def from_session(self, session, headers=None):
"""从Session对象中读取配置
:param session: Session对象
:param headers: headers
:return: 当前对象
"""
self._headers = CaseInsensitiveDict(copy(session.headers).update(headers)) if headers else session.headers
self._cookies = session.cookies
self._auth = session.auth
@ -457,15 +349,8 @@ class SessionOptions(object):
self._adapters = [(k, i) for k, i in session.adapters.items()]
return self
def __repr__(self):
return f'<SessionOptions at {id(self)}>'
def session_options_to_dict(options):
"""把session配置对象转换为字典
:param options: session配置对象或字典
:return: 配置字典
"""
if options in (False, None):
return SessionOptions(read_file=False).as_dict()

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from http.cookiejar import CookieJar, Cookie
from pathlib import Path
@ -12,125 +11,287 @@ from typing import Any, Union, Tuple, Optional
from requests import Session
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
class SessionOptions(object):
def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None):
self.ini_path: str = ...
self._download_path: str = ...
self._headers: dict = ...
self._cookies: list = ...
self._auth: tuple = ...
self._proxies: dict = ...
self._hooks: dict = ...
self._params: dict = ...
self._verify: bool = ...
self._cert: Union[str, tuple] = ...
self._adapters: list = ...
self._stream: bool = ...
self._trust_env: bool = ...
self._max_redirects: int = ...
self._timeout: float = ...
self._del_set: set = ...
self._retry_times: int = ...
self._retry_interval: float = ...
"""requests的Session对象配置类"""
ini_path: Optional[str] = ...
_download_path: str = ...
_headers: Union[dict, CaseInsensitiveDict, None] = ...
_cookies: Union[list, RequestsCookieJar, None] = ...
_auth: Optional[tuple] = ...
_proxies: Optional[dict] = ...
_hooks: Optional[dict] = ...
_params: Union[dict, None] = ...
_verify: Optional[bool] = ...
_cert: Union[str, tuple, None] = ...
_adapters: Optional[list] = ...
_stream: Optional[bool] = ...
_trust_env: Optional[bool] = ...
_max_redirects: Optional[int] = ...
_timeout: float = ...
_del_set: set = ...
_retry_times: int = ...
_retry_interval: float = ...
def __init__(self,
read_file: [bool, None] = True,
ini_path: Union[str, Path] = None):
"""
:param read_file: 是否从文件读取配置
:param ini_path: ini文件路径
"""
...
@property
def download_path(self) -> str: ...
def download_path(self) -> str:
"""返回默认下载路径属性信息"""
...
def set_download_path(self, path: Union[str, Path]) -> SessionOptions: ...
def set_download_path(self, path: Union[str, Path]) -> SessionOptions:
"""设置默认下载路径
:param path: 下载路径
:return: 返回当前对象
"""
...
@property
def timeout(self) -> float: ...
def timeout(self) -> float:
"""返回timeout属性信息"""
...
def set_timeout(self, second: float) -> SessionOptions: ...
def set_timeout(self, second: float) -> SessionOptions:
"""设置超时信息
:param second: 秒数
:return: 返回当前对象
"""
...
@property
def headers(self) -> dict: ...
def proxies(self) -> dict:
"""返回proxies设置信息"""
...
def set_headers(self, headers: Union[dict, str, None]) -> SessionOptions: ...
def set_a_header(self, name: str, value: str) -> SessionOptions: ...
def remove_a_header(self, name: str) -> SessionOptions: ...
def clear_headers(self) -> SessionOptions: ...
def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions:
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: 返回当前对象
"""
...
@property
def cookies(self) -> list: ...
def set_cookies(self, cookies: Union[Cookie, CookieJar, list, tuple, str, dict, None]) -> SessionOptions: ...
def retry_times(self) -> int:
"""返回连接失败时的重试次数"""
...
@property
def auth(self) -> Union[Tuple[str, str], HTTPBasicAuth]: ...
def retry_interval(self) -> float:
"""返回连接失败时的重试间隔(秒)"""
...
def set_auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> SessionOptions: ...
def set_retry(self, times: int = None, interval: float = None) -> SessionOptions:
"""设置连接失败时的重试操作
:param times: 重试次数
:param interval: 重试间隔
:return: 当前对象
"""
...
@property
def proxies(self) -> dict: ...
def headers(self) -> dict:
"""返回headers设置信息"""
...
def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: ...
def set_headers(self, headers: Union[dict, str, None]) -> SessionOptions:
"""设置headers参数
:param headers: 参数值传入None可在ini文件标记删除
:return: 返回当前对象
"""
...
def set_a_header(self, name: str, value: str) -> SessionOptions:
"""设置headers中一个项
:param name: 设置名称
:param value: 设置值
:return: 返回当前对象
"""
...
def remove_a_header(self, name: str) -> SessionOptions:
"""从headers中删除一个设置
:param name: 要删除的设置
:return: 返回当前对象
"""
...
def clear_headers(self) -> SessionOptions:
"""清空已设置的header参数"""
...
@property
def retry_times(self) -> int: ...
def cookies(self) -> list:
"""以list形式返回cookies"""
...
def set_cookies(self, cookies: Union[Cookie, CookieJar, list, tuple, str, dict, None]) -> SessionOptions:
"""设置一个或多个cookies信息
:param cookies: cookies可为Cookie, CookieJar, list, tuple, str, dict传入None可在ini文件标记删除
:return: 返回当前对象
"""
...
@property
def retry_interval(self) -> float: ...
def auth(self) -> Union[Tuple[str, str], HTTPBasicAuth]:
"""返回认证设置信息"""
...
def set_retry(self, times: int = None, interval: float = None) -> SessionOptions: ...
def set_auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> SessionOptions:
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: 返回当前对象
"""
...
@property
def hooks(self) -> dict: ...
def hooks(self) -> dict:
"""返回回调方法"""
...
def set_hooks(self, hooks: Union[dict, None]) -> SessionOptions: ...
def set_hooks(self, hooks: Union[dict, None]) -> SessionOptions:
"""设置回调方法
:param hooks: 回调方法
:return: 返回当前对象
"""
...
@property
def params(self) -> dict: ...
def params(self) -> dict:
"""返回连接参数设置信息"""
...
def set_params(self, params: Union[dict, None]) -> SessionOptions: ...
def set_params(self, params: Union[dict, None]) -> SessionOptions:
"""设置查询参数字典
:param params: 查询参数字典
:return: 返回当前对象
"""
...
@property
def verify(self) -> bool: ...
def verify(self) -> bool:
"""返回是否验证SSL证书设置"""
...
def set_verify(self, on_off: Union[bool, None]) -> SessionOptions: ...
def set_verify(self, on_off: Union[bool, None]) -> SessionOptions:
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: 返回当前对象
"""
...
@property
def cert(self) -> Union[str, tuple]: ...
def cert(self) -> Union[str, tuple]:
"""返回SSL证书设置信息"""
...
def set_cert(self, cert: Union[str, Tuple[str, str], None]) -> SessionOptions: ...
def set_cert(self, cert: Union[str, Tuple[str, str], None]) -> SessionOptions:
"""SSL客户端证书文件的路径(.pem格式),或('cert', 'key')元组
:param cert: 证书路径或元组
:return: 返回当前对象
"""
...
@property
def adapters(self): list: ...
def adapters(self) -> list:
"""返回适配器设置信息"""
...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> SessionOptions: ...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> SessionOptions:
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: 返回当前对象
"""
...
@property
def stream(self) -> bool: ...
def stream(self) -> bool:
"""返回是否使用流式响应内容设置信息"""
...
def set_stream(self, on_off: Union[bool, None]) -> SessionOptions: ...
def set_stream(self, on_off: Union[bool, None]) -> SessionOptions:
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: 返回当前对象
"""
...
@property
def trust_env(self) -> bool: ...
def trust_env(self) -> bool:
"""返回是否信任环境设置信息"""
...
def set_trust_env(self, on_off: Union[bool, None]) -> SessionOptions: ...
def set_trust_env(self, on_off: Union[bool, None]) -> SessionOptions:
"""设置是否信任环境
:param on_off: 是否信任环境
:return: 返回当前对象
"""
...
@property
def max_redirects(self) -> int: ...
def max_redirects(self) -> int:
"""返回最大重定向次数"""
...
def set_max_redirects(self, times: Union[int, None]) -> SessionOptions: ...
def set_max_redirects(self, times: Union[int, None]) -> SessionOptions:
"""设置最大重定向次数
:param times: 最大重定向次数
:return: 返回当前对象
"""
...
def _sets(self, arg: str, val: Any) -> None: ...
def _sets(self, arg: str, val: Any) -> None:
"""给属性赋值或标记删除
:param arg: 属性名称
:param val: 参数值
:return: None
"""
...
def save(self, path: str = None) -> str: ...
def save(self, path: str = None) -> str:
"""保存设置到文件
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 保存文件的绝对路径
"""
...
def save_to_default(self) -> str: ...
def save_to_default(self) -> str:
"""保存当前配置到默认ini文件"""
...
def as_dict(self) -> dict: ...
def as_dict(self) -> dict:
"""以字典形式返回本对象"""
...
def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ...
def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]:
"""根据内在的配置生成Session对象headers从对象中分离"""
...
def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ...
def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions:
"""从Session对象中读取配置
:param session: Session对象
:param headers: headers
:return: 当前对象
"""
...
def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ...
def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]:
"""把session配置对象转换为字典
:param options: session配置对象或字典
:return: 配置字典
"""
...

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from .._functions.settings import Settings
from ..errors import ElementNotFoundError
@ -11,11 +10,6 @@ from ..errors import ElementNotFoundError
class NoneElement(object):
def __init__(self, page=None, method=None, args=None):
"""
:param page: 元素所在页面
:param method: 查找元素的方法
:param args: 查找元素的参数
"""
if method and Settings.raise_when_ele_not_found: # 无传入method时不自动抛出由调用者处理
raise ElementNotFoundError(None, method=method, arguments=args)
@ -26,7 +20,7 @@ class NoneElement(object):
self._none_ele_value = None
self._none_ele_return_value = False
self.method = method
self.args = args
self.args = {} if args is None else args
self._get = None
def __call__(self, *args, **kwargs):
@ -35,11 +29,14 @@ class NoneElement(object):
else:
return self
def __repr__(self):
return f'<NoneElement method={self.method}, {", ".join([f"{k}={v}" for k, v in self.args.items()])}>'
def __getattr__(self, item):
if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args)
elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before',
'after', 'get_frame', 'shadow_root', 'sr'):
elif item in ('ele', 's_ele', 'parent', 'child', 'next', 'prev', 'before', 'east', 'north', 'south', 'west',
'offset', 'over', 'after', 'get_frame', 'shadow_root', 'sr'):
return self
else:
if item in ('size', 'link', 'css_path', 'xpath', 'comments', 'texts', 'tag', 'html', 'inner_html',
@ -49,11 +46,7 @@ class NoneElement(object):
raise ElementNotFoundError(None, self.method, self.args)
def __eq__(self, other):
if other is None:
return True
return other is None
def __bool__(self):
return False
def __repr__(self):
return 'None'

View File

@ -0,0 +1,32 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Any
from .._base.base import BasePage
class NoneElement(object):
def __init__(self,
page: BasePage = None,
method: str = None,
args: dict = None):
"""
:param page: 元素所在页面
:param method: 查找元素的方法
:param args: 查找元素的参数
"""
...
def __call__(self, *args, **kwargs) -> NoneElement: ...
def __repr__(self) -> str: ...
def __getattr__(self, item: str) -> str: ...
def __eq__(self, other: Any) -> bool: ...
def __bool__(self) -> bool: ...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from html import unescape
from re import match, sub, DOTALL, search
@ -19,7 +18,6 @@ from .._functions.web import get_ele_txt, make_absolute_link
class SessionElement(DrissionElement):
"""session模式的元素对象包装了一个lxml的Element对象并封装了常用功能"""
def __init__(self, ele, owner=None):
"""初始化对象
@ -30,173 +28,81 @@ class SessionElement(DrissionElement):
self._inner_ele = ele
self._type = 'SessionElement'
@property
def inner_ele(self):
return self._inner_ele
def __repr__(self):
attrs = [f"{k}='{v}'" for k, v in self.attrs.items()]
return f'<SessionElement {self.tag} {" ".join(attrs)}>'
def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本
"""
return self.ele(locator, index=index)
def __eq__(self, other):
return self.xpath == getattr(other, 'xpath', None)
@property
def inner_ele(self):
return self._inner_ele
@property
def tag(self):
"""返回元素类型"""
return self._inner_ele.tag
@property
def html(self):
"""返回outerHTML文本"""
html = tostring(self._inner_ele, method="html").decode()
return unescape(html[:html.rfind('>') + 1]) # tostring()会把跟紧元素的文本节点也带上,因此要去掉
@property
def inner_html(self):
"""返回元素innerHTML文本"""
r = match(r'<.*?>(.*)</.*?>', self.html, flags=DOTALL)
return '' if not r else r.group(1)
@property
def attrs(self):
"""返回元素所有属性及值"""
return {attr: self.attr(attr) for attr, val in self.inner_ele.items()}
@property
def text(self):
"""返回元素内所有文本"""
return get_ele_txt(self)
@property
def raw_text(self):
"""返回未格式化处理的元素内文本"""
return str(self._inner_ele.text_content())
def parent(self, level_or_loc=1, index=1):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果
:return: 上级元素对象
"""
def parent(self, level_or_loc=1, index=1, timeout: float = None):
return super().parent(level_or_loc, index)
def child(self, locator='', index=1, timeout=None, ele_only=True):
"""返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本
"""
return super().child(locator, index, timeout, ele_only=ele_only)
def prev(self, locator='', index=1, timeout=None, ele_only=True):
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素
"""
return super().prev(locator, index, timeout, ele_only=ele_only)
def next(self, locator='', index=1, timeout=None, ele_only=True):
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素
"""
return super().next(locator, index, timeout, ele_only=ele_only)
def before(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
return super().before(locator, index, timeout, ele_only=ele_only)
def after(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
return super().after(locator, index, timeout, ele_only=ele_only)
def children(self, locator='', timeout=0, ele_only=True):
"""返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
return SessionElementsList(self.owner, super().children(locator, timeout, ele_only=ele_only))
def prevs(self, locator='', timeout=None, ele_only=True):
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return SessionElementsList(self.owner, super().prevs(locator, timeout, ele_only=ele_only))
def nexts(self, locator='', timeout=None, ele_only=True):
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return SessionElementsList(self.owner, super().nexts(locator, timeout, ele_only=ele_only))
def befores(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return SessionElementsList(self.owner, super().befores(locator, timeout, ele_only=ele_only))
def afters(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的元素或节点组成的列表
"""
return SessionElementsList(self.owner, super().afters(locator, timeout, ele_only=ele_only))
def attr(self, name):
"""返回attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
# 获取href属性时返回绝对url
if name == 'href':
if name == 'href': # 获取href属性时返回绝对url
link = self.inner_ele.get('href')
# 若为链接为None、js或邮件直接返回
if not link or link.lower().startswith(('javascript:', 'mailto:')):
@ -221,81 +127,46 @@ class SessionElement(DrissionElement):
return self.inner_html
else:
return self.inner_ele.get(name)
return self.inner_ele.get(name.lower())
def ele(self, locator, index=1, timeout=None):
"""返回当前元素下级符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本
"""
return self._ele(locator, index=index, method='ele()')
def eles(self, locator, timeout=None):
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本组成的列表
"""
return self._ele(locator, index=None)
def s_ele(self, locator=None, index=1):
"""返回当前元素下级符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
return self._ele(locator, index=index, method='s_ele()')
def s_eles(self, locator):
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本组成的列表
"""
return self._ele(locator, index=None)
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
"""返回当前元素下级符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用用于和父类对应
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: WebPage用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: SessionElement对象
"""
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
return make_session_ele(self, locator, index=index)
def _get_ele_path(self, mode):
"""获取css路径或xpath路径
:param mode: 'css' 'xpath'
:return: css路径或xpath路径
"""
def _get_ele_path(self, xpath=True):
path_str = ''
ele = self
while ele:
if mode == 'css':
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
path_str = f'>{ele.tag}:nth-child({brothers + 1}){path_str}'
else:
if xpath:
while ele:
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}'
ele = ele.parent()
ele = ele.parent()
else:
while ele:
id_ = ele.attr('id')
if id_:
path_str = f'>{ele.tag}#{id_}{path_str}'
else:
path_str = f'>{ele.tag}:nth-child({len(ele.eles("xpath:./preceding-sibling::*")) + 1}){path_str}'
ele = ele.parent()
return f'{path_str[1:]}' if mode == 'css' else path_str
return path_str if xpath else f'{path_str[1:]}'
def make_session_ele(html_or_ele, loc=None, index=1, method=None):
"""从接收到的对象或html文本中查找元素返回SessionElement对象
如要直接从html生成SessionElement而不在下级查找loc输入None即可
:param html_or_ele: html文本BaseParser对象
:param loc: 定位元组或字符串为None时不在下级查找返回根元素
:param index: 获取第几个元素从1开始可传入负数获取倒数第几个None获取所有
:param method: 调用此方法的方法
:return: 返回SessionElement元素或列表或属性文本
"""
# ---------------处理定位符---------------
if not loc:
if isinstance(html_or_ele, SessionElement):
@ -309,13 +180,14 @@ def make_session_ele(html_or_ele, loc=None, index=1, method=None):
raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------根据传入对象类型获取页面对象和lxml元素对象---------------
the_type = getattr(html_or_ele, '_type', None)
# 直接传入html文本
if isinstance(html_or_ele, str):
page = None
html_or_ele = fromstring(html_or_ele)
# SessionElement
elif html_or_ele._type == 'SessionElement':
elif the_type == 'SessionElement':
page = html_or_ele.owner
loc_str = loc[1]
@ -336,7 +208,7 @@ def make_session_ele(html_or_ele, loc=None, index=1, method=None):
loc = loc[0], loc_str
elif html_or_ele._type == 'ChromiumElement':
elif the_type == 'ChromiumElement':
loc_str = loc[1]
if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'):
loc_str = f'.{loc[1]}'
@ -349,17 +221,17 @@ def make_session_ele(html_or_ele, loc=None, index=1, method=None):
xpath = html_or_ele.xpath
# ChromiumElement兼容传入的元素在iframe内的情况
if html_or_ele._doc_id is None:
doc = html_or_ele.run_js('return this.ownerDocument;')
doc = html_or_ele._run_js('return this.ownerDocument;')
html_or_ele._doc_id = doc['objectId'] if doc else False
if html_or_ele._doc_id:
html = html_or_ele.owner.run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML']
html = html_or_ele.owner._run_cdp('DOM.getOuterHTML', objectId=html_or_ele._doc_id)['outerHTML']
else:
html = html_or_ele.owner.html
html_or_ele = fromstring(html)
html_or_ele = html_or_ele.xpath(xpath)[0]
elif html_or_ele._type == 'ChromiumFrame':
elif the_type == 'ChromiumFrame':
page = html_or_ele
html_or_ele = fromstring(html_or_ele.inner_html)
@ -395,7 +267,7 @@ def make_session_ele(html_or_ele, loc=None, index=1, method=None):
# 把lxml元素对象包装成SessionElement对象并按需要返回一个或全部
if index is None:
r = SessionElementsList(page=page)
r = SessionElementsList(owner=page)
for e in eles:
if e != '\n':
r.append(SessionElement(e, page) if isinstance(e, HtmlElement) else e)

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, List, Tuple, Optional
@ -18,130 +17,290 @@ from .._pages.session_page import SessionPage
class SessionElement(DrissionElement):
"""静态元素对象"""
def __init__(self, ele: HtmlElement, owner: Union[SessionPage, None] = None):
self._inner_ele: HtmlElement = ...
self.owner: SessionPage = ...
self.page: SessionPage = ...
@property
def inner_ele(self) -> HtmlElement: ...
def __repr__(self) -> str: ...
def __call__(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
timeout: float = None) -> SessionElement: ...
timeout: float = None) -> SessionElement:
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本
"""
...
def __repr__(self) -> str: ...
def __eq__(self, other: SessionElement) -> bool: ...
@property
def tag(self) -> str: ...
def inner_ele(self) -> HtmlElement: ...
@property
def html(self) -> str: ...
def tag(self) -> str:
"""返回元素类型"""
...
@property
def inner_html(self) -> str: ...
def html(self) -> str:
"""返回outerHTML文本"""
...
@property
def attrs(self) -> dict: ...
def inner_html(self) -> str:
"""返回元素innerHTML文本"""
...
@property
def text(self) -> str: ...
def attrs(self) -> dict:
"""返回元素所有属性及值"""
...
@property
def raw_text(self) -> str: ...
def text(self) -> str:
"""返回元素内文本"""
...
@property
def raw_text(self) -> str:
"""返回未格式化处理的元素内文本"""
...
def parent(self,
level_or_loc: Union[tuple, str, int] = 1,
index: int = 1) -> SessionElement: ...
index: int = 1,
timeout: float = None) -> SessionElement:
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果
:param timeout: 此参数不起实际作用
:return: 上级元素对象
"""
...
def child(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str]: ...
ele_only: bool = True) -> Union[SessionElement, str]:
"""返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本
"""
...
def prev(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str]: ...
ele_only: bool = True) -> Union[SessionElement, str]:
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素
"""
...
def next(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str]: ...
ele_only: bool = True) -> Union[SessionElement, str]:
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素
"""
...
def before(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str]: ...
ele_only: bool = True) -> Union[SessionElement, str]:
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
...
def after(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[SessionElement, str]: ...
ele_only: bool = True) -> Union[SessionElement, str]:
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 第几个查询结果1开始
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
...
def children(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ...
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]:
"""返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 直接子元素或节点文本组成的列表
"""
...
def prevs(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ...
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]:
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def nexts(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ...
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]:
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def befores(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ...
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]:
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def afters(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]: ...
ele_only: bool = True) -> Union[SessionElementsList, List[Union[SessionElement, str]]]:
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 此参数不起实际作用
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的元素或节点组成的列表
"""
...
def attr(self, name: str) -> Optional[str]: ...
def attr(self, name: str) -> Optional[str]:
"""返回attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
...
def ele(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
timeout: float = None) -> SessionElement: ...
timeout: float = None) -> SessionElement:
"""返回当前元素下级符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本
"""
...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> SessionElementsList: ...
timeout: float = None) -> SessionElementsList:
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用
:return: SessionElement对象或属性文本组成的列表
"""
...
def s_ele(self,
locator: Union[Tuple[str, str], str] = None,
index: int = 1) -> SessionElement: ...
index: int = 1) -> SessionElement:
"""返回当前元素下级符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList:
"""返回当前元素下级所有符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本组成的列表
"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str],
timeout: float = None,
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) -> Union[SessionElement, SessionElementsList]: ...
raise_err: bool = None) -> Union[SessionElement, SessionElementsList]:
"""返回当前元素下级符合条件的子元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用用于和父类对应
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: SessionElement对象
"""
...
def _get_ele_path(self, mode: str) -> str: ...
def _get_ele_path(self, xpath: bool=True) -> str:
"""获取css路径或xpath路径
:param xpath: 用xpath还是css
:return: css路径或xpath路径
"""
...
def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame,
ChromiumBase],
loc: Union[str, Tuple[str, str]] = None,
index: Optional[int] = 1,
method: Optional[str] = None) -> Union[SessionElement, SessionElementsList]: ...
method: Optional[str] = None) -> Union[SessionElement, SessionElementsList]:
"""从接收到的对象或html文本中查找元素返回SessionElement对象
如要直接从html生成SessionElement而不在下级查找loc输入None即可
:param html_or_ele: html文本BaseParser对象
:param loc: 定位元组或字符串为None时不在下级查找返回根元素
:param index: 获取第几个元素从1开始可传入负数获取倒数第几个None获取所有
:param method: 调用此方法的方法
:return: 返回SessionElement元素或列表或属性文本
"""
...

View File

@ -2,43 +2,45 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from json import load, dump, JSONDecodeError
from os import environ
from pathlib import Path
from shutil import rmtree
from subprocess import Popen, DEVNULL
from tempfile import gettempdir
from time import perf_counter, sleep
from requests import Session
from .settings import Settings
from .tools import port_is_using
from .._configs.options_manage import OptionsManager
from ..errors import BrowserConnectError
def connect_browser(option):
"""连接或启动浏览器
:param option: ChromiumOptions对象
:return: 返回是否接管的浏览器
"""
address = option.address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://')
address = option.address.replace('localhost', '127.0.0.1').lstrip('htps:/')
browser_path = option.browser_path
ip, port = address.split(':')
if ip != '127.0.0.1' or port_is_using(ip, port) or option.is_existing_only:
test_connect(ip, port)
option._headless = False
for i in option.arguments:
if i.startswith('--headless') and not i.endswith('=false'):
option._headless = True
break
return True
using = port_is_using(ip, port)
if ip != '127.0.0.1' or using or option.is_existing_only:
if test_connect(ip, port):
return True
elif ip != '127.0.0.1':
raise BrowserConnectError(f'\n{address}浏览器连接失败。')
elif using:
raise BrowserConnectError(f'\n{address}浏览器连接失败,请检查{port}端口是否浏览器,'
f'且已添加\'--remote-debugging-port={port}\'启动项。')
else: # option.is_existing_only
raise BrowserConnectError(f'\n{address}浏览器连接失败,请确认浏览器已启动。')
# ----------创建浏览器进程----------
args = get_launch_args(option)
args, user_path = get_launch_args(option)
if option._new_env:
rmtree(user_path, ignore_errors=True)
set_prefs(option)
set_flags(option)
try:
@ -47,61 +49,44 @@ def connect_browser(option):
# 传入的路径找不到主动在ini文件、注册表、系统变量中找
except FileNotFoundError:
browser_path = get_chrome_path(option.ini_path)
if not browser_path:
raise FileNotFoundError('无法找到浏览器可执行文件路径,请手动配置。')
_run_browser(port, browser_path, args)
test_connect(ip, port)
if not test_connect(ip, port):
raise BrowserConnectError(f'\n{address}浏览器连接失败。\n请确认:\n'
f'1、用户文件夹没有和已打开的浏览器冲突\n'
f'2、如为无界面系统请添加\'--headless=new\'启动参数\n'
f'3、如果是Linux系统尝试添加\'--no-sandbox\'启动参数\n'
f'可使用ChromiumOptions设置端口和用户文件夹路径。')
return False
def get_launch_args(opt):
"""从ChromiumOptions获取命令行启动参数
:param opt: ChromiumOptions
:return: 启动参数列表
"""
# ----------处理arguments-----------
result = set()
has_user_path = False
headless = None
user_path = False
for i in opt.arguments:
if i.startswith(('--load-extension=', '--remote-debugging-port=')):
continue
elif i.startswith('--user-data-dir') and not opt.system_user_path:
result.add(f'--user-data-dir={Path(i[16:]).absolute()}')
has_user_path = True
user_path = f'--user-data-dir={Path(i[16:]).absolute()}'
result.add(user_path)
continue
elif i.startswith('--headless'):
if i == '--headless=false':
headless = False
continue
elif i == '--headless':
i = '--headless=new'
headless = True
else:
headless = True
elif i.startswith('--user-agent='):
opt._ua_set = True
result.add(i)
if not has_user_path and not opt.system_user_path:
if not user_path and not opt.system_user_path:
port = opt.address.split(':')[-1] if opt.address else '0'
p = Path(opt.tmp_path) if opt.tmp_path else Path(gettempdir()) / 'DrissionPage'
path = p / f'userData_{port}'
path = p / 'userData' / port
path.mkdir(parents=True, exist_ok=True)
opt.set_user_data_path(path)
result.add(f'--user-data-dir={path}')
# if headless is None and system().lower() == 'linux': # 无界面Linux自动加入无头
# from os import popen
# r = popen('systemctl list-units | grep graphical.target')
# if 'graphical.target' not in r.read():
# headless = True
# result.add('--headless=new')
user_path = path.absolute()
opt.set_user_data_path(user_path)
result.add(f'--user-data-dir={user_path}')
result = list(result)
opt._headless = headless
# ----------处理插件extensions-------------
ext = [str(Path(e).absolute()) for e in opt.extensions]
@ -110,14 +95,10 @@ def get_launch_args(opt):
ext = f'--load-extension={ext}'
result.append(ext)
return result
return result, user_path
def set_prefs(opt):
"""处理启动配置中的prefs项目前只能对已存在文件夹配置
:param opt: ChromiumOptions
:return: None
"""
if not opt.user_data_path or (not opt.preferences and not opt._prefs_to_del):
return
prefs = opt.preferences
@ -156,10 +137,6 @@ def set_prefs(opt):
def set_flags(opt):
"""处理启动配置中的flags项
:param opt: ChromiumOptions
:return: None
"""
if not opt.user_data_path or (not opt.clear_file_flags and not opt.flags):
return
@ -191,16 +168,11 @@ def set_flags(opt):
dump(states_dict, f)
def test_connect(ip, port, timeout=30):
"""测试浏览器是否可用
:param ip: 浏览器ip
:param port: 浏览器端口
:param timeout: 超时时间
:return: None
"""
end_time = perf_counter() + timeout
def test_connect(ip, port):
end_time = perf_counter() + Settings.browser_connect_timeout
s = Session()
s.trust_env = False
s.keep_alive = False
while perf_counter() < end_time:
try:
r = s.get(f'http://{ip}:{port}/json', timeout=10, headers={'Connection': 'close'})
@ -208,18 +180,13 @@ def test_connect(ip, port, timeout=30):
if tab['type'] in ('page', 'webview'):
r.close()
s.close()
return
return True
r.close()
except Exception:
sleep(.2)
s.close()
raise BrowserConnectError(f'\n{ip}:{port}浏览器无法链接。\n请确认:\n1、该端口为浏览器\n'
f'2、已添加\'--remote-debugging-port={port}\'启动项\n'
f'3、用户文件夹没有和已打开的浏览器冲突\n'
f'4、如为无界面系统请添加\'--headless=new\'参数\n'
f'5、如果是Linux系统可能还要添加\'--no-sandbox\'启动参数\n'
f'可使用ChromiumOptions设置端口和用户文件夹路径。')
return False
def _run_browser(port, path: str, args) -> Popen:
@ -287,7 +254,6 @@ def _remove_arg_from_dict(target_dict: dict, arg: str) -> None:
def get_chrome_path(ini_path):
"""从ini文件或系统变量中获取chrome可执行文件的路径"""
# -----------从ini文件中获取--------------
if ini_path and Path(ini_path).exists():
path = OptionsManager(ini_path).chromium_options.get('browser_path', None)

View File

@ -2,27 +2,58 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union
from .._configs.chromium_options import ChromiumOptions
def connect_browser(option: ChromiumOptions) -> bool: ...
def connect_browser(option: ChromiumOptions) -> bool:
"""连接或启动浏览器
:param option: ChromiumOptions对象
:return: 返回是否接管的浏览器
"""
...
def get_launch_args(opt: ChromiumOptions) -> list: ...
def get_launch_args(opt: ChromiumOptions) -> list:
"""从ChromiumOptions获取命令行启动参数
:param opt: ChromiumOptions
:return: 启动参数列表
"""
...
def set_prefs(opt: ChromiumOptions) -> None: ...
def set_prefs(opt: ChromiumOptions) -> None:
"""处理启动配置中的prefs项目前只能对已存在文件夹配置
:param opt: ChromiumOptions
:return: None
"""
...
def set_flags(opt: ChromiumOptions) -> None: ...
def set_flags(opt: ChromiumOptions) -> None:
"""处理启动配置中的flags项
:param opt: ChromiumOptions
:return: None
"""
...
def test_connect(ip: str, port: Union[int, str], timeout: float = 30) -> None: ...
def test_connect(ip: str, port: Union[int, str], timeout: float = 30) -> bool:
"""测试浏览器是否可用
:param ip: 浏览器ip
:param port: 浏览器端口
:param timeout: 超时时间
:return: None
"""
...
def get_chrome_path(ini_path: str) -> Union[str, None]: ...
def get_chrome_path(ini_path: str) -> Union[str, None]:
"""从ini文件或系统变量中获取chrome可执行文件的路径
:param ini_path: ini文件路径
:return: 文件路径
"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from click import command, option

View File

@ -0,0 +1,225 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from datetime import datetime
from http.cookiejar import Cookie, CookieJar
from tldextract import TLDExtract
from .settings import Settings
def cookie_to_dict(cookie):
if isinstance(cookie, Cookie):
cookie_dict = cookie.__dict__.copy()
cookie_dict.pop('rfc2109', None)
cookie_dict.pop('_rest', None)
return cookie_dict
elif isinstance(cookie, dict):
cookie_dict = cookie
elif isinstance(cookie, str):
cookie_dict = {}
for attr in cookie.strip().rstrip(';,').split(',' if ',' in cookie else ';'):
attr_val = attr.strip().split('=', 1)
if attr_val[0] in ('domain', 'path', 'expires', 'max-age', 'HttpOnly', 'secure', 'expiry', 'name', 'value'):
cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else ''
else:
cookie_dict['name'] = attr_val[0]
cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else ''
return cookie_dict
else:
raise TypeError('cookie参数必须为Cookie、str或dict类型。')
return cookie_dict
def cookies_to_tuple(cookies):
if isinstance(cookies, (list, tuple, CookieJar)):
cookies = tuple(cookie_to_dict(cookie) for cookie in cookies)
elif isinstance(cookies, str):
c_dict = {}
cookies = cookies.rstrip('; ')
cookies = cookies.split(';')
for attr in cookies:
attr_val = attr.strip().split('=', 1)
c_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else True
cookies = _dict_cookies_to_tuple(c_dict)
elif isinstance(cookies, dict):
cookies = _dict_cookies_to_tuple(cookies)
elif isinstance(cookies, Cookie):
cookies = (cookie_to_dict(cookies),)
else:
raise TypeError('cookies参数必须为Cookie、CookieJar、list、tuple、str或dict类型。')
return cookies
def set_session_cookies(session, cookies):
for cookie in cookies_to_tuple(cookies):
if cookie['value'] is None:
cookie['value'] = ''
kwargs = {x: cookie[x] for x in cookie
if x.lower() in ('version', 'port', 'domain', 'path', 'secure',
'expires', 'discard', 'comment', 'comment_url', 'rest')}
if 'expiry' in cookie:
kwargs['expires'] = cookie['expiry']
session.cookies.set(cookie['name'], cookie['value'], **kwargs)
def set_browser_cookies(browser, cookies):
c = []
for cookie in cookies_to_tuple(cookies):
if 'domain' not in cookie and 'url' not in cookie:
raise ValueError(f"cookie必须带有'domain''url'字段:{cookie}")
c.append(format_cookie(cookie))
browser._run_cdp('Storage.setCookies', cookies=c)
def set_tab_cookies(page, cookies):
for cookie in cookies_to_tuple(cookies):
cookie = format_cookie(cookie)
if cookie['name'].startswith('__Host-'):
if not page.url.startswith('http'):
cookie['name'] = cookie['name'].replace('__Host-', '__Secure-', 1)
else:
cookie['url'] = page.url
page._run_cdp_loaded('Network.setCookie', **cookie)
continue # 不用设置域名,可退出
if cookie.get('domain', None):
try:
page._run_cdp_loaded('Network.setCookie', **cookie)
if not is_cookie_in_driver(page, cookie):
page.browser.set.cookies(cookie)
continue
except Exception:
pass
url = page._browser_url
if not url.startswith('http'):
raise RuntimeError(f'未设置域名请设置cookie的domain参数或先访问一个网站。{cookie}')
ex_url = TLDExtract(suffix_list_urls=["https://publicsuffix.org/list/public_suffix_list.dat",
f"file:///{Settings.suffixes_list_path}"]).extract_str(url)
d_list = ex_url.subdomain.split('.')
d_list.append(f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain)
tmp = [d_list[0]]
if len(d_list) > 1:
for i in d_list[1:]:
tmp.append('.')
tmp.append(i)
for i in range(len(tmp)):
cookie['domain'] = ''.join(tmp[i:])
page._run_cdp_loaded('Network.setCookie', **cookie)
if is_cookie_in_driver(page, cookie):
break
def is_cookie_in_driver(page, cookie):
if 'domain' in cookie:
for c in page.cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain',
None):
return True
else:
for c in page.cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value']:
return True
return False
def format_cookie(cookie):
if 'expiry' in cookie:
cookie['expires'] = int(cookie['expiry'])
cookie.pop('expiry')
if 'expires' in cookie:
if not cookie['expires']:
cookie.pop('expires')
elif isinstance(cookie['expires'], str):
if cookie['expires'].isdigit():
cookie['expires'] = int(cookie['expires'])
elif cookie['expires'].replace('.', '').isdigit():
cookie['expires'] = float(cookie['expires'])
else:
try:
cookie['expires'] = datetime.strptime(cookie['expires'], '%a, %d %b %Y %H:%M:%S GMT').timestamp()
except ValueError:
cookie['expires'] = datetime.strptime(cookie['expires'], '%a, %d %b %y %H:%M:%S GMT').timestamp()
if cookie['value'] is None:
cookie['value'] = ''
elif not isinstance(cookie['value'], str):
cookie['value'] = str(cookie['value'])
if cookie['name'].startswith('__Host-'):
cookie['path'] = '/'
cookie['secure'] = True
elif cookie['name'].startswith('__Secure-'):
cookie['secure'] = True
if 'sameSite' in cookie:
sameSite = cookie['sameSite']
if sameSite in (None, False) or sameSite not in ('None', 'Lax', 'Strict', 'no_restriction'):
cookie.pop('sameSite')
if 'priority' in cookie:
priority = cookie['priority']
if priority in (None, False):
cookie.pop('priority')
elif priority not in ('Low', 'Medium', 'High'):
raise ValueError(f'{cookie}\npriority字段必须为"Low""Medium""High"之一。')
if 'sourceScheme' in cookie:
sourceScheme = cookie['sourceScheme']
if sourceScheme in (None, False):
cookie.pop('sourceScheme')
elif sourceScheme not in ('Unset', 'NonSecure', 'Secure'):
raise ValueError(f'{cookie}\nsourceScheme字段必须为"Unset""NonSecure""Secure"之一。')
return cookie
class CookiesList(list):
def as_dict(self):
return {c['name']: c['value'] for c in self}
def as_str(self):
return '; '.join([f'{c["name"]}={c["value"]}' for c in self])
def as_json(self):
from json import dumps
return dumps(self)
def _dict_cookies_to_tuple(cookies: dict):
"""把dict形式的cookies转换为tuple形式
:param cookies: 单个或多个cookies单个时包含 'name' 'value'
:return: 多个dict格式cookies组成的列表
"""
if 'name' in cookies and 'value' in cookies: # 单个cookie
return (cookies,)
keys = ('domain', 'path', 'expires', 'max-age', 'HttpOnly', 'secure', 'expiry')
template = {k: v for k, v in cookies.items() if k in keys}
return tuple(dict(**{'name': k, 'value': v}, **template) for k, v in cookies.items() if k not in keys)

View File

@ -0,0 +1,93 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from http.cookiejar import Cookie
from typing import Union
from requests import Session
from requests.cookies import RequestsCookieJar
from .._base.chromium import Chromium
from .._pages.chromium_base import ChromiumBase
def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
"""把Cookie对象转为dict格式
:param cookie: Cookie对象字符串或字典
:return: cookie字典
"""
...
def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict, Cookie]) -> tuple:
"""把cookies转为tuple格式
:param cookies: cookies信息可为CookieJar, list, tuple, str, dict
:return: 返回tuple形式的cookies
"""
...
def set_session_cookies(session: Session,
cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
"""设置Session对象的cookies
:param session: Session对象
:param cookies: cookies信息
:return: None
"""
...
def set_browser_cookies(browser: Chromium,
cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
"""设置cookies值
:param browser: 页面对象
:param cookies: cookies信息
:return: None
"""
...
def set_tab_cookies(page: ChromiumBase,
cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
"""设置cookies值
:param page: 页面对象
:param cookies: cookies信息
:return: None
"""
...
def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool:
"""查询cookie是否在浏览器内
:param page: BasePage对象
:param cookie: dict格式cookie
:return: bool
"""
...
def format_cookie(cookie: dict) -> dict:
"""设置cookie为可用格式
:param cookie: dict格式cookie
:return: 格式化后的cookie字典
"""
...
class CookiesList(list):
def as_dict(self) -> dict:
"""以dict格式返回只包含name和value字段"""
...
def as_str(self) -> str:
"""以str格式返回只包含name和value字段"""
...
def as_json(self) -> str:
"""以json格式返回"""
...
def __next__(self) -> dict: ...

View File

@ -2,18 +2,27 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from time import perf_counter
from time import perf_counter, sleep
from .locator import is_str_loc
from .._elements.none_element import NoneElement
class SessionElementsList(list):
def __init__(self, page=None, *args):
def __init__(self, owner=None, *args):
super().__init__(*args)
self._page = page
self._owner = owner
def __getitem__(self, item):
cls = type(self)
if isinstance(item, slice):
return cls(self._owner, super().__getitem__(item))
elif isinstance(item, int):
return super().__getitem__(item)
else:
raise TypeError('序号必须是数字或切片。')
@property
def get(self):
@ -39,35 +48,14 @@ class ChromiumElementsList(SessionElementsList):
return ChromiumFilterOne(self)
def search(self, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
"""或关系筛选元素
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:return: 筛选结果
"""
have_rect=None, have_text=None, tag=None):
return _search(self, displayed=displayed, checked=checked, selected=selected, enabled=enabled,
clickable=clickable, have_rect=have_rect, have_text=have_text)
clickable=clickable, have_rect=have_rect, have_text=have_text, tag=tag)
def search_one(self, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
"""或关系筛选元素,获取一个结果
:param index: 元素序号从1开始
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:return: 筛选结果
"""
have_rect=None, have_text=None, tag=None):
return _search_one(self, index=index, displayed=displayed, checked=checked, selected=selected,
enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text)
enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text, tag=tag)
class SessionFilterOne(object):
@ -76,29 +64,30 @@ class SessionFilterOne(object):
self._index = 1
def __call__(self, index=1):
"""返回结果中第几个元素
:param index: 元素序号从1开始
:return: 对象自身
"""
self._index = index
return self
def tag(self, name, equal=True):
num = 0
name = name.lower()
if equal:
for i in self._list:
if not isinstance(i, str) and i.tag == name:
num += 1
if self._index == num:
return i
else:
for i in self._list:
if not isinstance(i, str) and i.tag != name:
num += 1
if self._index == num:
return i
return NoneElement(self._list._owner, 'tag()', args={'name': name, 'equal': equal, 'index': self._index})
def attr(self, name, value, equal=True):
"""以是否拥有某个attribute值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
return self._get_attr(name, value, 'attr', equal=equal)
def text(self, text, fuzzy=True, contain=True):
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
num = 0
if contain:
for i in self._list:
@ -114,16 +103,10 @@ class SessionFilterOne(object):
num += 1
if self._index == num:
return i
return NoneElement(self._list._page, 'text()',
return NoneElement(self._list._owner, 'text()',
args={'text': text, 'fuzzy': fuzzy, 'contain': contain, 'index': self._index})
def _get_attr(self, name, value, method, equal=True):
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
num = 0
if equal:
for i in self._list:
@ -137,7 +120,7 @@ class SessionFilterOne(object):
num += 1
if self._index == num:
return i
return NoneElement(self._list._page, f'{method}()',
return NoneElement(self._list._owner, f'{method}()',
args={'name': name, 'value': value, 'equal': equal, 'index': self._index})
@ -157,99 +140,50 @@ class SessionFilter(SessionFilterOne):
@property
def get(self):
"""返回用于获取元素属性的对象"""
return self._list.get
def tag(self, name, equal=True):
self._list = _tag_all(self._list, SessionElementsList(owner=self._list._owner), name=name, equal=equal)
return self
def text(self, text, fuzzy=True, contain=True):
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
self._list = _text_all(self._list, SessionElementsList(page=self._list._page),
self._list = _text_all(self._list, SessionElementsList(owner=self._list._owner),
text=text, fuzzy=fuzzy, contain=contain)
return self
def _get_attr(self, name, value, method, equal=True):
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
self._list = _get_attr_all(self._list, SessionElementsList(page=self._list._page),
name=name, value=value, method=method, equal=equal)
self._list = _attr_all(self._list, SessionElementsList(owner=self._list._owner),
name=name, value=value, method=method, equal=equal)
return self
class ChromiumFilterOne(SessionFilterOne):
def displayed(self, equal=True):
"""以是否显示为条件筛选元素
:param equal: 是否匹配显示的元素False匹配不显示的
:return: 筛选结果
"""
return self._any_state('is_displayed', equal=equal)
def checked(self, equal=True):
"""以是否被选中为条件筛选元素
:param equal: 是否匹配被选中的元素False匹配不被选中的
:return: 筛选结果
"""
return self._any_state('is_checked', equal=equal)
def selected(self, equal=True):
"""以是否被选择为条件筛选元素,用于<select>元素项目
:param equal: 是否匹配被选择的元素False匹配不被选择的
:return: 筛选结果
"""
return self._any_state('is_selected', equal=equal)
def enabled(self, equal=True):
"""以是否可用为条件筛选元素
:param equal: 是否匹配可用的元素False表示匹配disabled状态的
:return: 筛选结果
"""
return self._any_state('is_enabled', equal=equal)
def clickable(self, equal=True):
"""以是否可点击为条件筛选元素
:param equal: 是否匹配可点击的元素False表示匹配不是可点击的
:return: 筛选结果
"""
return self._any_state('is_clickable', equal=equal)
def have_rect(self, equal=True):
"""以是否有大小为条件筛选元素
:param equal: 是否匹配有大小的元素False表示匹配没有大小的
:return: 筛选结果
"""
return self._any_state('has_rect', equal=equal)
def style(self, name, value, equal=True):
"""以是否拥有某个style值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
return self._get_attr(name, value, 'style', equal=equal)
def property(self, name, value, equal=True):
"""以是否拥有某个property值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
return self._get_attr(name, value, 'property', equal=equal)
def _any_state(self, name, equal=True):
"""
:param name: 状态名称
:param equal: 是否是指定状态False表示否定状态
:return: 选中的元素
"""
num = 0
if equal:
for i in self._list:
@ -263,7 +197,7 @@ class ChromiumFilterOne(SessionFilterOne):
num += 1
if self._index == num:
return i
return NoneElement(self._list._page, f'{name}()', args={'equal': equal, 'index': self._index})
return NoneElement(self._list._owner, f'{name}()', args={'equal': equal, 'index': self._index})
class ChromiumFilter(ChromiumFilterOne):
@ -282,69 +216,34 @@ class ChromiumFilter(ChromiumFilterOne):
@property
def get(self):
"""返回用于获取元素属性的对象"""
return self._list.get
def search_one(self, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
"""或关系筛选元素,获取一个结果
:param index: 元素序号从1开始
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:return: 筛选结果
"""
have_rect=None, have_text=None, tag=None):
return _search_one(self._list, index=index, displayed=displayed, checked=checked, selected=selected,
enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text)
enabled=enabled, clickable=clickable, have_rect=have_rect, have_text=have_text, tag=tag)
def search(self, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
"""或关系筛选元素
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:return: 筛选结果
"""
have_rect=None, have_text=None, tag=None):
return _search(self._list, displayed=displayed, checked=checked, selected=selected, enabled=enabled,
clickable=clickable, have_rect=have_rect, have_text=have_text)
clickable=clickable, have_rect=have_rect, have_text=have_text, tag=tag)
def tag(self, name, equal=True):
self._list = _tag_all(self._list, ChromiumElementsList(owner=self._list._owner), name=name, equal=equal)
return self
def text(self, text, fuzzy=True, contain=True):
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
self._list = _text_all(self._list, ChromiumElementsList(page=self._list._page),
self._list = _text_all(self._list, ChromiumElementsList(owner=self._list._owner),
text=text, fuzzy=fuzzy, contain=contain)
return self
def _get_attr(self, name, value, method, equal=True):
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
self._list = _get_attr_all(self._list, ChromiumElementsList(page=self._list._page),
name=name, value=value, method=method, equal=equal)
self._list = _attr_all(self._list, ChromiumElementsList(owner=self._list._owner),
name=name, value=value, method=method, equal=equal)
return self
def _any_state(self, name, equal=True):
"""
:param name: 状态名称
:param equal: 是否是指定状态False表示否定状态
:return: 选中的列表
"""
r = ChromiumElementsList(page=self._list._page)
r = ChromiumElementsList(owner=self._list._owner)
if equal:
for i in self._list:
if not isinstance(i, str) and getattr(i.states, name):
@ -362,47 +261,81 @@ class Getter(object):
self._list = _list
def links(self):
"""返回所有元素的link属性组成的列表"""
return [e.link for e in self._list if not isinstance(e, str)]
def texts(self):
"""返回所有元素的text属性组成的列表"""
return [e if isinstance(e, str) else e.text for e in self._list]
def attrs(self, name):
"""返回所有元素指定的attr属性组成的列表
:param name: 属性名称
:return: 属性文本组成的列表
"""
return [e.attr(name) for e in self._list if not isinstance(e, str)]
def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10):
"""传入多个定位符获取多个ele
:param locators: 定位符组成的列表
:param owner: 页面或元素对象
:param any_one: 是否找到任何一个即返回
:param first_ele: 每个定位符是否只获取第一个元素
:param timeout: 超时时间
:return: 多个定位符组成的dict
"""
res = {loc: False for loc in locators}
if isinstance(locators, (tuple, str)):
locators = (locators,)
res = {loc: None for loc in locators}
if timeout == 0:
for loc in locators:
ele = owner._ele(loc, timeout=0, raise_err=False, index=1 if first_ele else None, method='find()')
res[loc] = ele
if ele and any_one:
return res
return res
end_time = perf_counter() + timeout
while perf_counter() <= end_time:
for loc in locators:
if res[loc] is not False:
if res[loc]:
continue
ele = owner.ele(loc, timeout=0) if first_ele else owner.eles(loc, timeout=0)
if ele:
res[loc] = ele
if any_one:
return res
if False not in res.values():
break
ele = owner._ele(loc, timeout=0, raise_err=False, index=1 if first_ele else None, method='find()')
res[loc] = ele
if ele and any_one:
return res
if all(res.values()):
return res
sleep(.05)
return res
def _get_attr_all(src_list, aim_list, name, value, method, equal=True):
def get_frame(owner, loc_ind_ele, timeout=None):
if isinstance(loc_ind_ele, str):
if not is_str_loc(loc_ind_ele):
xpath = f'xpath://*[(name()="iframe" or name()="frame") and ' \
f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]'
else:
xpath = loc_ind_ele
ele = owner._ele(xpath, timeout=timeout)
if ele and ele._type != 'ChromiumFrame':
raise TypeError('该定位符不是指向frame元素。')
r = ele
elif isinstance(loc_ind_ele, tuple):
ele = owner._ele(loc_ind_ele, timeout=timeout)
if ele and ele._type != 'ChromiumFrame':
raise TypeError('该定位符不是指向frame元素。')
r = ele
elif isinstance(loc_ind_ele, int):
ele = owner._ele('@|tag():iframe@|tag():frame', timeout=timeout, index=loc_ind_ele)
if ele and ele._type != 'ChromiumFrame':
raise TypeError('该定位符不是指向frame元素。')
r = ele
elif getattr(loc_ind_ele, '_type', None) == 'ChromiumFrame':
r = loc_ind_ele
else:
raise TypeError('必须传入定位符、iframe序号、id、name、ChromiumFrame对象其中之一。')
if isinstance(r, NoneElement):
r.method = 'get_frame()'
r.args = {'loc_ind_ele': loc_ind_ele}
return r
def _attr_all(src_list, aim_list, name, value, method, equal=True):
if equal:
for i in src_list:
if not isinstance(i, str) and getattr(i, method)(name) == value:
@ -414,6 +347,19 @@ def _get_attr_all(src_list, aim_list, name, value, method, equal=True):
return aim_list
def _tag_all(src_list, aim_list, name, equal=True):
name = name.lower()
if equal:
for i in src_list:
if not isinstance(i, str) and i.tag == name:
aim_list.append(i)
else:
for i in src_list:
if not isinstance(i, str) and i.tag != name:
aim_list.append(i)
return aim_list
def _text_all(src_list, aim_list, text, fuzzy=True, contain=True):
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
@ -435,7 +381,7 @@ def _text_all(src_list, aim_list, text, fuzzy=True, contain=True):
def _search(_list, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
have_rect=None, have_text=None, tag=None):
"""或关系筛选元素
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
@ -444,9 +390,10 @@ def _search(_list, displayed=None, checked=None, selected=None, enabled=None, cl
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 元素类型
:return: 筛选结果
"""
r = ChromiumElementsList(page=_list._page)
r = ChromiumElementsList(owner=_list._owner)
for i in _list:
if not isinstance(i, str) and (
(displayed is not None and (displayed is True and i.states.is_displayed)
@ -462,13 +409,14 @@ def _search(_list, displayed=None, checked=None, selected=None, enabled=None, cl
or (have_rect is not None and (have_rect is True and i.states.has_rect)
or (have_rect is False and not i.states.has_rect))
or (have_text is not None and (have_text is True and i.raw_text)
or (have_text is False and not i.raw_text))):
or (have_text is False and not i.raw_text))
or (tag is not None and i.tag == tag.lower())):
r.append(i)
return ChromiumFilter(r)
def _search_one(_list, index=1, displayed=None, checked=None, selected=None, enabled=None, clickable=None,
have_rect=None, have_text=None):
have_rect=None, have_text=None, tag=None):
"""或关系筛选元素,获取一个结果
:param index: 元素序号从1开始
:param displayed: 是否显示boolNone为忽略该项
@ -478,6 +426,7 @@ def _search_one(_list, index=1, displayed=None, checked=None, selected=None, ena
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 元素类型
:return: 筛选结果
"""
num = 0
@ -496,12 +445,13 @@ def _search_one(_list, index=1, displayed=None, checked=None, selected=None, ena
or (have_rect is not None and (have_rect is True and i.states.has_rect)
or (have_rect is False and not i.states.has_rect))
or (have_text is not None and (have_text is True and i.raw_text)
or (have_text is False and not i.raw_text))):
or (have_text is False and not i.raw_text))
or (tag is not None and i.tag == tag.lower())):
num += 1
if num == index:
return i
return NoneElement(_list._page, method='filter()', args={'displayed': displayed,
'checked': checked, 'selected': selected,
'enabled': enabled, 'clickable': clickable,
'have_rect': have_rect, 'have_text': have_text})
return NoneElement(_list._owner, method='filter()', args={'displayed': displayed, 'checked': checked,
'selected': selected, 'enabled': enabled,
'clickable': clickable, 'have_rect': have_rect,
'have_text': have_text, 'tag': tag})

View File

@ -2,47 +2,79 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, List, Optional, Iterable
from typing import Union, List, Optional, Iterable, Dict
from .._base.base import BaseParser
from .._elements.chromium_element import ChromiumElement
from .._elements.session_element import SessionElement
def get_eles(locators: Union[List[str], tuple],
owner: BaseParser,
any_one: bool = False,
first_ele: bool = True,
timeout: float = 10) -> dict: ...
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.session_page import SessionPage
class SessionElementsList(list):
_page = ...
_owner: SessionPage = ...
def __init__(self, page=None, *args): ...
@property
def get(self) -> Getter: ...
@property
def filter(self) -> SessionFilter: ...
@property
def filter_one(self) -> SessionFilterOne: ...
def __init__(self,
owner: SessionPage = None,
*args):
"""
:param owner: 产生元素列表的页面
:param args:
"""
...
def __next__(self) -> SessionElement: ...
def __getitem__(self, _i) -> Union[SessionElement, SessionElementsList]: ...
def __iter__(self) -> List[SessionElement]: ...
@property
def get(self) -> Getter:
"""返回用于属性的对象"""
...
@property
def filter(self) -> SessionFilter:
"""返回用于筛选多个元素的对象"""
...
@property
def filter_one(self) -> SessionFilterOne:
"""用于筛选单个元素的对象"""
...
class ChromiumElementsList(SessionElementsList):
_owner: ChromiumBase = ...
def __init__(self,
owner: ChromiumBase = None,
*args):
"""
:param owner: 产生元素列表的页面
:param args:
"""
...
def __next__(self) -> ChromiumElement: ...
def __getitem__(self, _i) -> Union[ChromiumElement, ChromiumElementsList]: ...
def __iter__(self) -> List[ChromiumElement]: ...
@property
def filter(self) -> ChromiumFilter: ...
def filter(self) -> ChromiumFilter:
"""返回用于筛选多个元素的对象"""
...
@property
def filter_one(self) -> ChromiumFilterOne: ...
def filter_one(self) -> ChromiumFilterOne:
"""用于筛选单个元素的对象"""
...
def search(self,
displayed: Optional[bool] = None,
@ -51,7 +83,20 @@ class ChromiumElementsList(SessionElementsList):
enabled: Optional[bool] = None,
clickable: Optional[bool] = None,
have_rect: Optional[bool] = None,
have_text: Optional[bool] = None) -> ChromiumFilter: ...
have_text: Optional[bool] = None,
tag: str = None) -> ChromiumFilter:
"""或关系筛选元素
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 指定的元素类型
:return: 筛选结果
"""
...
def search_one(self,
index: int = 1,
@ -61,28 +106,78 @@ class ChromiumElementsList(SessionElementsList):
enabled: Optional[bool] = None,
clickable: Optional[bool] = None,
have_rect: Optional[bool] = None,
have_text: Optional[bool] = None) -> ChromiumElement: ...
def __next__(self) -> ChromiumElement: ...
have_text: Optional[bool] = None,
tag: str = None) -> ChromiumElement:
"""或关系筛选元素,获取一个结果
:param index: 元素序号从1开始
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 指定的元素类型
:return: 筛选结果
"""
...
class SessionFilterOne(object):
_list: SessionElementsList = ...
_index: int = ...
def __init__(self, _list: SessionElementsList, index: int = 1): ...
def __init__(self, _list: SessionElementsList):
"""
:param _list: 元素列表对象
"""
...
def __call__(self, index: int = 1) -> SessionFilterOne: ...
def __call__(self, index: int = 1) -> SessionFilterOne:
"""返回结果中第几个元素
:param index: 元素序号从1开始
:return: 对象自身
"""
...
def attr(self, name: str, value: str, equal: bool = True) -> SessionElement: ...
def tag(self, name: str, equal: bool = True) -> SessionElement:
"""筛选某种元素
:param name: 标签页名称
:param equal: True表示匹配这种元素False表示匹配非这种元素
:return: 筛选结果
"""
...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionElement: ...
def attr(self, name: str, value: str, equal: bool = True) -> SessionElement:
"""以是否拥有某个attribute值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionElement:
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
...
def _get_attr(self,
name: str,
value: str,
method: str,
equal: bool = True) -> SessionElement: ...
equal: bool = True) -> SessionElement:
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
...
class SessionFilter(SessionFilterOne):
@ -96,54 +191,176 @@ class SessionFilter(SessionFilterOne):
def __getitem__(self, item: int) -> SessionElement: ...
@property
def get(self) -> Getter: ...
def get(self) -> Getter:
"""返回用于获取元素属性的对象"""
...
def attr(self, name: str, value: str, equal: bool = True) -> SessionFilter: ...
def tag(self, name: str, equal: bool = True) -> SessionFilter:
"""筛选某种元素
:param name: 标签页名称
:param equal: True表示匹配这种元素False表示匹配非这种元素
:return: 筛选结果
"""
...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionFilter: ...
def attr(self, name: str, value: str, equal: bool = True) -> SessionFilter:
"""以是否拥有某个attribute值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> SessionFilter:
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
...
def _get_attr(self,
name: str,
value: str,
method: str,
equal: bool = True) -> SessionFilter: ...
equal: bool = True) -> SessionFilter:
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
...
class ChromiumFilterOne(SessionFilterOne):
_list: ChromiumElementsList = ...
def __init__(self, _list: ChromiumElementsList): ...
def __init__(self, _list: ChromiumElementsList):
"""
:param _list: 元素列表对象
"""
...
def __call__(self, index: int = 1) -> ChromiumFilterOne: ...
def __call__(self, index: int = 1) -> ChromiumFilterOne:
"""返回结果中第几个元素
:param index: 元素序号从1开始
:return: 对象自身
"""
...
def displayed(self, equal: bool = True) -> ChromiumElement: ...
def tag(self, name: str, equal: bool = True) -> SessionElement:
"""筛选某种元素
:param name: 标签页名称
:param equal: True表示匹配这种元素False表示匹配非这种元素
:return: 筛选结果
"""
...
def checked(self, equal: bool = True) -> ChromiumElement: ...
def attr(self, name: str, value: str, equal: bool = True) -> ChromiumElement:
"""以是否拥有某个attribute值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def selected(self, equal: bool = True) -> ChromiumElement: ...
def text(self,
text: str,
fuzzy: bool = True,
contain: bool = True) -> ChromiumElement:
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
...
def enabled(self, equal: bool = True) -> ChromiumElement: ...
def displayed(self, equal: bool = True) -> ChromiumElement:
"""以是否显示为条件筛选元素
:param equal: 是否匹配显示的元素False匹配不显示的
:return: 筛选结果
"""
...
def clickable(self, equal: bool = True) -> ChromiumElement: ...
def checked(self, equal: bool = True) -> ChromiumElement:
"""以是否被选中为条件筛选元素
:param equal: 是否匹配被选中的元素False匹配不被选中的
:return: 筛选结果
"""
...
def have_rect(self, equal: bool = True) -> ChromiumElement: ...
def selected(self, equal: bool = True) -> ChromiumElement:
"""以是否被选择为条件筛选元素,用于<select>元素项目
:param equal: 是否匹配被选择的元素False匹配不被选择的
:return: 筛选结果
"""
...
def style(self, name: str, value: str, equal: bool = True) -> ChromiumElement: ...
def enabled(self, equal: bool = True) -> ChromiumElement:
"""以是否可用为条件筛选元素
:param equal: 是否匹配可用的元素False表示匹配disabled状态的
:return: 筛选结果
"""
...
def clickable(self, equal: bool = True) -> ChromiumElement:
"""以是否可点击为条件筛选元素
:param equal: 是否匹配可点击的元素False表示匹配不是可点击的
:return: 筛选结果
"""
...
def have_rect(self, equal: bool = True) -> ChromiumElement:
"""以是否有大小为条件筛选元素
:param equal: 是否匹配有大小的元素False表示匹配没有大小的
:return: 筛选结果
"""
...
def style(self, name: str, value: str, equal: bool = True) -> ChromiumElement:
"""以是否拥有某个style值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def property(self,
name: str,
value: str, equal: bool = True) -> ChromiumElement: ...
def attr(self, name: str, value: str, equal: bool = True) -> ChromiumElement: ...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumElement: ...
value: str, equal: bool = True) -> ChromiumElement:
"""以是否拥有某个property值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def _get_attr(self,
name: str,
value: str,
method: str, equal: bool = True) -> ChromiumElement: ...
method: str, equal: bool = True) -> ChromiumElement:
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
...
def _any_state(self, name: str, equal: bool = True) -> ChromiumElement: ...
def _any_state(self, name: str, equal: bool = True) -> ChromiumElement:
"""
:param name: 状态名称
:param equal: 是否是指定状态False表示否定状态
:return: 选中的列表
"""
...
class ChromiumFilter(ChromiumFilterOne):
@ -157,38 +374,97 @@ class ChromiumFilter(ChromiumFilterOne):
def __getitem__(self, item: int) -> ChromiumElement: ...
@property
def get(self) -> Getter: ...
def get(self) -> Getter:
"""返回用于获取元素属性的对象"""
...
def displayed(self, equal: bool = True) -> ChromiumFilter: ...
def tag(self, name: str, equal: bool = True) -> ChromiumFilter:
"""筛选某种元素
:param name: 标签页名称
:param equal: True表示匹配这种元素False表示匹配非这种元素
:return: 筛选结果
"""
...
def checked(self, equal: bool = True) -> ChromiumFilter: ...
def attr(self, name: str, value: str, equal: bool = True) -> ChromiumFilter:
"""以是否拥有某个attribute值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def selected(self, equal: bool = True) -> ChromiumFilter: ...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumFilter:
"""以是否含有指定文本为条件筛选元素
:param text: 用于匹配的文本
:param fuzzy: 是否模糊匹配
:param contain: 是否包含该字符串False表示不包含
:return: 筛选结果
"""
...
def enabled(self, equal: bool = True) -> ChromiumFilter: ...
def displayed(self, equal: bool = True) -> ChromiumFilter:
"""以是否显示为条件筛选元素
:param equal: 是否匹配显示的元素False匹配不显示的
:return: 筛选结果
"""
...
def clickable(self, equal: bool = True) -> ChromiumFilter: ...
def checked(self, equal: bool = True) -> ChromiumFilter:
"""以是否被选中为条件筛选元素
:param equal: 是否匹配被选中的元素False匹配不被选中的
:return: 筛选结果
"""
...
def have_rect(self, equal: bool = True) -> ChromiumFilter: ...
def selected(self, equal: bool = True) -> ChromiumFilter:
"""以是否被选择为条件筛选元素,用于<select>元素项目
:param equal: 是否匹配被选择的元素False匹配不被选择的
:return: 筛选结果
"""
...
def style(self, name: str, value: str, equal: bool = True) -> ChromiumFilter: ...
def enabled(self, equal: bool = True) -> ChromiumFilter:
"""以是否可用为条件筛选元素
:param equal: 是否匹配可用的元素False表示匹配disabled状态的
:return: 筛选结果
"""
...
def clickable(self, equal: bool = True) -> ChromiumFilter:
"""以是否可点击为条件筛选元素
:param equal: 是否匹配可点击的元素False表示匹配不是可点击的
:return: 筛选结果
"""
...
def have_rect(self, equal: bool = True) -> ChromiumFilter:
"""以是否有大小为条件筛选元素
:param equal: 是否匹配有大小的元素False表示匹配没有大小的
:return: 筛选结果
"""
...
def style(self, name: str, value: str, equal: bool = True) -> ChromiumFilter:
"""以是否拥有某个style值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def property(self,
name: str,
value: str, equal: bool = True) -> ChromiumFilter: ...
def attr(self, name: str, value: str, equal: bool = True) -> ChromiumFilter: ...
def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumFilter: ...
def search(self,
displayed: Optional[bool] = None,
checked: Optional[bool] = None,
selected: Optional[bool] = None,
enabled: Optional[bool] = None,
clickable: Optional[bool] = None,
have_rect: Optional[bool] = None,
have_text: Optional[bool] = None) -> ChromiumFilter: ...
value: str, equal: bool = True) -> ChromiumFilter:
"""以是否拥有某个property值为条件筛选元素
:param name: 属性名称
:param value: 属性值
:param equal: True表示匹配name值为value值的元素False表示匹配name值不为value值的
:return: 筛选结果
"""
...
def search_one(self,
index: int = 1,
@ -198,23 +474,114 @@ class ChromiumFilter(ChromiumFilterOne):
enabled: Optional[bool] = None,
clickable: Optional[bool] = None,
have_rect: Optional[bool] = None,
have_text: Optional[bool] = None) -> ChromiumElement: ...
have_text: Optional[bool] = None,
tag: str = None) -> ChromiumElement:
"""或关系筛选元素,获取一个结果
:param index: 元素序号从1开始
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 指定的元素类型
:return: 筛选结果
"""
...
def search(self,
displayed: Optional[bool] = None,
checked: Optional[bool] = None,
selected: Optional[bool] = None,
enabled: Optional[bool] = None,
clickable: Optional[bool] = None,
have_rect: Optional[bool] = None,
have_text: Optional[bool] = None,
tag: str = None) -> ChromiumFilter:
"""或关系筛选元素
:param displayed: 是否显示boolNone为忽略该项
:param checked: 是否被选中boolNone为忽略该项
:param selected: 是否被选择boolNone为忽略该项
:param enabled: 是否可用boolNone为忽略该项
:param clickable: 是否可点击boolNone为忽略该项
:param have_rect: 是否拥有大小和位置boolNone为忽略该项
:param have_text: 是否含有文本boolNone为忽略该项
:param tag: 指定的元素类型
:return: 筛选结果
"""
...
def _get_attr(self,
name: str,
value: str,
method: str, equal: bool = True) -> ChromiumFilter: ...
method: str, equal: bool = True) -> ChromiumFilter:
"""返回通过某个方法可获得某个值的元素
:param name: 属性名称
:param value: 属性值
:param method: 方法名称
:return: 筛选结果
"""
...
def _any_state(self, name: str, equal: bool = True) -> ChromiumFilter: ...
def _any_state(self, name: str, equal: bool = True) -> ChromiumFilter:
"""
:param name: 状态名称
:param equal: 是否是指定状态False表示否定状态
:return: 选中的列表
"""
...
class Getter(object):
_list: SessionElementsList = ...
def __init__(self, _list: SessionElementsList): ...
def __init__(self, _list: SessionElementsList):
"""
:param _list: 元素列表对象
"""
...
def links(self) -> List[str]: ...
def links(self) -> List[str]:
"""返回所有元素的link属性组成的列表"""
...
def texts(self) -> List[str]: ...
def texts(self) -> List[str]:
"""返回所有元素的text属性组成的列表"""
...
def attrs(self, name: str) -> List[str]: ...
def attrs(self, name: str) -> List[str]:
"""返回所有元素指定的attr属性组成的列表
:param name: 属性名称
:return: 属性文本组成的列表
"""
...
def get_eles(locators: Union[str, tuple, List[Union[str, tuple]]],
owner: BaseParser,
any_one: bool = False,
first_ele: bool = True,
timeout: float = 10) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]:
"""传入多个定位符获取多个ele
:param locators: 定位符或它们组成的列表
:param owner: 页面或元素对象
:param any_one: 是否找到任何一个即返回
:param first_ele: 每个定位符是否只获取第一个元素
:param timeout: 超时时间
:return: 多个定位符组成的dictfirst_only为False返回列表否则为元素无结果的返回False
"""
...
def get_frame(owner: BaseParser,
loc_ind_ele: Union[str, int, tuple, ChromiumFrame, ChromiumElement],
timeout: float = None) -> ChromiumFrame:
"""获取页面中一个frame对象
:param owner: 要在其中查找元素的对象
:param loc_ind_ele: 定位符iframe序号ChromiumFrame对象序号从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间
:return: ChromiumFrame对象
"""
...

View File

@ -2,9 +2,10 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from platform import system
from ..errors import AlertExistsError
@ -21,18 +22,14 @@ class Keys:
CANCEL = '\ue001' # ^break
HELP = '\ue002'
BACKSPACE = '\ue003'
BACK_SPACE = BACKSPACE
TAB = '\ue004'
CLEAR = '\ue005'
RETURN = '\ue006'
ENTER = '\ue007'
SHIFT = '\ue008'
LEFT_SHIFT = SHIFT
CONTROL = '\ue009'
CTRL = '\ue009'
LEFT_CONTROL = CONTROL
ALT = '\ue00a'
LEFT_ALT = ALT
PAUSE = '\ue00b'
ESCAPE = '\ue00c'
SPACE = '\ue00d'
@ -41,13 +38,9 @@ class Keys:
END = '\ue010'
HOME = '\ue011'
LEFT = '\ue012'
ARROW_LEFT = LEFT
UP = '\ue013'
ARROW_UP = UP
RIGHT = '\ue014'
ARROW_RIGHT = RIGHT
DOWN = '\ue015'
ARROW_DOWN = DOWN
INSERT = '\ue016'
DELETE = '\ue017'
DEL = '\ue017'
@ -219,15 +212,15 @@ keyDefinitions = {
'\ue005': {'keyCode': 12, 'shiftKeyCode': 101, 'key': 'Clear', 'code': 'Numpad5', 'shiftKey': '5', 'location': 3},
'\ue006': {'keyCode': 13, 'code': 'NumpadEnter', 'key': 'Enter', 'text': '\r', 'location': 3},
'\ue00b': {'keyCode': 19, 'code': 'Pause', 'key': 'Pause'},
'CapsLock': {'keyCode': 20, 'code': 'CapsLock', 'key': 'CapsLock'},
# 'CapsLock': {'keyCode': 20, 'code': 'CapsLock', 'key': 'CapsLock'},
'\ue00c': {'keyCode': 27, 'code': 'Escape', 'key': 'Escape'},
'Convert': {'keyCode': 28, 'code': 'Convert', 'key': 'Convert'},
'NonConvert': {'keyCode': 29, 'code': 'NonConvert', 'key': 'NonConvert'},
# 'Convert': {'keyCode': 28, 'code': 'Convert', 'key': 'Convert'},
# 'NonConvert': {'keyCode': 29, 'code': 'NonConvert', 'key': 'NonConvert'},
'\ue010': {'keyCode': 35, 'code': 'End', 'key': 'End'},
# 'Numpad1': {'keyCode': 35, 'shiftKeyCode': 97, 'key': 'End', 'code': 'Numpad1', 'shiftKey': '1', 'location': 3},
'Select': {'keyCode': 41, 'code': 'Select', 'key': 'Select'},
'Open': {'keyCode': 43, 'code': 'Open', 'key': 'Execute'},
'PrintScreen': {'keyCode': 44, 'code': 'PrintScreen', 'key': 'PrintScreen'},
# 'Select': {'keyCode': 41, 'code': 'Select', 'key': 'Select'},
# 'Open': {'keyCode': 43, 'code': 'Open', 'key': 'Execute'},
# 'PrintScreen': {'keyCode': 44, 'code': 'PrintScreen', 'key': 'PrintScreen'},
'\ue016': {'keyCode': 45, 'code': 'Insert', 'key': 'Insert'},
# 'Numpad0': {'keyCode': 45, 'shiftKeyCode': 96, 'key': 'Insert', 'code': 'Numpad0', 'shiftKey': '0', 'location': 3},
'\ue017': {'keyCode': 46, 'code': 'Delete', 'key': 'Delete'},
@ -242,36 +235,7 @@ keyDefinitions = {
'\ue020': {'keyCode': 54, 'code': 'Digit6', 'shiftKey': '^', 'key': '6'},
'\ue021': {'keyCode': 55, 'code': 'Digit7', 'shiftKey': '&', 'key': '7'},
'\ue022': {'keyCode': 56, 'code': 'Digit8', 'shiftKey': '*', 'key': '8'},
'\ue023': {'keyCode': 57, 'code': 'Digit9', 'shiftKey': '\(', 'key': '9'},
'KeyA': {'keyCode': 65, 'code': 'KeyA', 'shiftKey': 'A', 'key': 'a'},
'KeyB': {'keyCode': 66, 'code': 'KeyB', 'shiftKey': 'B', 'key': 'b'},
'KeyC': {'keyCode': 67, 'code': 'KeyC', 'shiftKey': 'C', 'key': 'c'},
'KeyD': {'keyCode': 68, 'code': 'KeyD', 'shiftKey': 'D', 'key': 'd'},
'KeyE': {'keyCode': 69, 'code': 'KeyE', 'shiftKey': 'E', 'key': 'e'},
'KeyF': {'keyCode': 70, 'code': 'KeyF', 'shiftKey': 'F', 'key': 'f'},
'KeyG': {'keyCode': 71, 'code': 'KeyG', 'shiftKey': 'G', 'key': 'g'},
'KeyH': {'keyCode': 72, 'code': 'KeyH', 'shiftKey': 'H', 'key': 'h'},
'KeyI': {'keyCode': 73, 'code': 'KeyI', 'shiftKey': 'I', 'key': 'i'},
'KeyJ': {'keyCode': 74, 'code': 'KeyJ', 'shiftKey': 'J', 'key': 'j'},
'KeyK': {'keyCode': 75, 'code': 'KeyK', 'shiftKey': 'K', 'key': 'k'},
'KeyL': {'keyCode': 76, 'code': 'KeyL', 'shiftKey': 'L', 'key': 'l'},
'KeyM': {'keyCode': 77, 'code': 'KeyM', 'shiftKey': 'M', 'key': 'm'},
'KeyN': {'keyCode': 78, 'code': 'KeyN', 'shiftKey': 'N', 'key': 'n'},
'KeyO': {'keyCode': 79, 'code': 'KeyO', 'shiftKey': 'O', 'key': 'o'},
'KeyP': {'keyCode': 80, 'code': 'KeyP', 'shiftKey': 'P', 'key': 'p'},
'KeyQ': {'keyCode': 81, 'code': 'KeyQ', 'shiftKey': 'Q', 'key': 'q'},
'KeyR': {'keyCode': 82, 'code': 'KeyR', 'shiftKey': 'R', 'key': 'r'},
'KeyS': {'keyCode': 83, 'code': 'KeyS', 'shiftKey': 'S', 'key': 's'},
'KeyT': {'keyCode': 84, 'code': 'KeyT', 'shiftKey': 'T', 'key': 't'},
'KeyU': {'keyCode': 85, 'code': 'KeyU', 'shiftKey': 'U', 'key': 'u'},
'KeyV': {'keyCode': 86, 'code': 'KeyV', 'shiftKey': 'V', 'key': 'v'},
'KeyW': {'keyCode': 87, 'code': 'KeyW', 'shiftKey': 'W', 'key': 'w'},
'KeyX': {'keyCode': 88, 'code': 'KeyX', 'shiftKey': 'X', 'key': 'x'},
'KeyY': {'keyCode': 89, 'code': 'KeyY', 'shiftKey': 'Y', 'key': 'y'},
'KeyZ': {'keyCode': 90, 'code': 'KeyZ', 'shiftKey': 'Z', 'key': 'z'},
'MetaLeft': {'keyCode': 91, 'code': 'MetaLeft', 'key': 'Meta'},
'MetaRight': {'keyCode': 92, 'code': 'MetaRight', 'key': 'Meta'},
'ContextMenu': {'keyCode': 93, 'code': 'ContextMenu', 'key': 'ContextMenu'},
'\ue023': {'keyCode': 57, 'code': 'Digit9', 'shiftKey': r'\(', 'key': '9'},
'\ue024': {'keyCode': 106, 'code': 'NumpadMultiply', 'key': '*', 'location': 3},
'\ue025': {'keyCode': 107, 'code': 'NumpadAdd', 'key': '+', 'location': 3},
'\ue027': {'keyCode': 109, 'code': 'NumpadSubtract', 'key': '-', 'location': 3},
@ -288,69 +252,94 @@ keyDefinitions = {
'\ue03a': {'keyCode': 121, 'code': 'F10', 'key': 'F10'},
'\ue03b': {'keyCode': 122, 'code': 'F11', 'key': 'F11'},
'\ue03c': {'keyCode': 123, 'code': 'F12', 'key': 'F12'},
'F13': {'keyCode': 124, 'code': 'F13', 'key': 'F13'},
'F14': {'keyCode': 125, 'code': 'F14', 'key': 'F14'},
'F15': {'keyCode': 126, 'code': 'F15', 'key': 'F15'},
'F16': {'keyCode': 127, 'code': 'F16', 'key': 'F16'},
'F17': {'keyCode': 128, 'code': 'F17', 'key': 'F17'},
'F18': {'keyCode': 129, 'code': 'F18', 'key': 'F18'},
'F19': {'keyCode': 130, 'code': 'F19', 'key': 'F19'},
'F20': {'keyCode': 131, 'code': 'F20', 'key': 'F20'},
'F21': {'keyCode': 132, 'code': 'F21', 'key': 'F21'},
'F22': {'keyCode': 133, 'code': 'F22', 'key': 'F22'},
'F23': {'keyCode': 134, 'code': 'F23', 'key': 'F23'},
'F24': {'keyCode': 135, 'code': 'F24', 'key': 'F24'},
'NumLock': {'keyCode': 144, 'code': 'NumLock', 'key': 'NumLock'},
'ScrollLock': {'keyCode': 145, 'code': 'ScrollLock', 'key': 'ScrollLock'},
'AudioVolumeMute': {'keyCode': 173, 'code': 'AudioVolumeMute', 'key': 'AudioVolumeMute'},
'AudioVolumeDown': {'keyCode': 174, 'code': 'AudioVolumeDown', 'key': 'AudioVolumeDown'},
'AudioVolumeUp': {'keyCode': 175, 'code': 'AudioVolumeUp', 'key': 'AudioVolumeUp'},
'MediaTrackNext': {'keyCode': 176, 'code': 'MediaTrackNext', 'key': 'MediaTrackNext'},
'MediaTrackPrevious': {'keyCode': 177, 'code': 'MediaTrackPrevious', 'key': 'MediaTrackPrevious'},
'MediaStop': {'keyCode': 178, 'code': 'MediaStop', 'key': 'MediaStop'},
'MediaPlayPause': {'keyCode': 179, 'code': 'MediaPlayPause', 'key': 'MediaPlayPause'},
'\ue018': {'keyCode': 186, 'code': 'Semicolon', 'shiftKey': ':', 'key': ';'},
'Equal': {'keyCode': 187, 'code': 'Equal', 'shiftKey': '+', 'key': '='},
'\ue019': {'keyCode': 187, 'code': 'NumpadEqual', 'key': '=', 'location': 3},
'Comma': {'keyCode': 188, 'code': 'Comma', 'shiftKey': '<', 'key': ','},
'Minus': {'keyCode': 189, 'code': 'Minus', 'shiftKey': '_', 'key': '-'},
'Period': {'keyCode': 190, 'code': 'Period', 'shiftKey': '>', 'key': '.'},
'Slash': {'keyCode': 191, 'code': 'Slash', 'shiftKey': '?', 'key': '/'},
'Backquote': {'keyCode': 192, 'code': 'Backquote', 'shiftKey': '~', 'key': '`'},
'BracketLeft': {'keyCode': 219, 'code': 'BracketLeft', 'shiftKey': '{', 'key': '['},
'Backslash': {'keyCode': 220, 'code': 'Backslash', 'shiftKey': '|', 'key': '\\'},
'BracketRight': {'keyCode': 221, 'code': 'BracketRight', 'shiftKey': '}', 'key': ']'},
'Quote': {'keyCode': 222, 'code': 'Quote', 'shiftKey': '"', 'key': '\''},
'AltGraph': {'keyCode': 225, 'code': 'AltGraph', 'key': 'AltGraph'},
'Props': {'keyCode': 247, 'code': 'Props', 'key': 'CrSel'},
'Cancel': {'keyCode': 3, 'key': 'Cancel', 'code': 'Abort'},
'Clear': {'keyCode': 12, 'key': 'Clear', 'code': 'Numpad5', 'location': 3},
'Shift': {'keyCode': 16, 'key': 'Shift', 'code': 'ShiftLeft'},
'Control': {'keyCode': 17, 'key': 'Control', 'code': 'ControlLeft'},
'Alt': {'keyCode': 18, 'key': 'Alt', 'code': 'AltLeft'},
'Accept': {'keyCode': 30, 'key': 'Accept'},
'ModeChange': {'keyCode': 31, 'key': 'ModeChange'},
'Print': {'keyCode': 42, 'key': 'Print'},
'Execute': {'keyCode': 43, 'key': 'Execute', 'code': 'Open'},
'\u0000': {'keyCode': 46, 'key': '\u0000', 'code': 'NumpadDecimal', 'location': 3},
'Attn': {'keyCode': 246, 'key': 'Attn'},
'CrSel': {'keyCode': 247, 'key': 'CrSel', 'code': 'Props'},
'ExSel': {'keyCode': 248, 'key': 'ExSel'},
'EraseEof': {'keyCode': 249, 'key': 'EraseEof'},
'Play': {'keyCode': 250, 'key': 'Play'},
'ZoomOut': {'keyCode': 251, 'key': 'ZoomOut'},
'Power': {'key': 'Power', 'code': 'Power'},
'Eject': {'key': 'Eject', 'code': 'Eject'},
# 'KeyA': {'keyCode': 65, 'code': 'KeyA', 'shiftKey': 'A', 'key': 'a'},
# 'KeyB': {'keyCode': 66, 'code': 'KeyB', 'shiftKey': 'B', 'key': 'b'},
# 'KeyC': {'keyCode': 67, 'code': 'KeyC', 'shiftKey': 'C', 'key': 'c'},
# 'KeyD': {'keyCode': 68, 'code': 'KeyD', 'shiftKey': 'D', 'key': 'd'},
# 'KeyE': {'keyCode': 69, 'code': 'KeyE', 'shiftKey': 'E', 'key': 'e'},
# 'KeyF': {'keyCode': 70, 'code': 'KeyF', 'shiftKey': 'F', 'key': 'f'},
# 'KeyG': {'keyCode': 71, 'code': 'KeyG', 'shiftKey': 'G', 'key': 'g'},
# 'KeyH': {'keyCode': 72, 'code': 'KeyH', 'shiftKey': 'H', 'key': 'h'},
# 'KeyI': {'keyCode': 73, 'code': 'KeyI', 'shiftKey': 'I', 'key': 'i'},
# 'KeyJ': {'keyCode': 74, 'code': 'KeyJ', 'shiftKey': 'J', 'key': 'j'},
# 'KeyK': {'keyCode': 75, 'code': 'KeyK', 'shiftKey': 'K', 'key': 'k'},
# 'KeyL': {'keyCode': 76, 'code': 'KeyL', 'shiftKey': 'L', 'key': 'l'},
# 'KeyM': {'keyCode': 77, 'code': 'KeyM', 'shiftKey': 'M', 'key': 'm'},
# 'KeyN': {'keyCode': 78, 'code': 'KeyN', 'shiftKey': 'N', 'key': 'n'},
# 'KeyO': {'keyCode': 79, 'code': 'KeyO', 'shiftKey': 'O', 'key': 'o'},
# 'KeyP': {'keyCode': 80, 'code': 'KeyP', 'shiftKey': 'P', 'key': 'p'},
# 'KeyQ': {'keyCode': 81, 'code': 'KeyQ', 'shiftKey': 'Q', 'key': 'q'},
# 'KeyR': {'keyCode': 82, 'code': 'KeyR', 'shiftKey': 'R', 'key': 'r'},
# 'KeyS': {'keyCode': 83, 'code': 'KeyS', 'shiftKey': 'S', 'key': 's'},
# 'KeyT': {'keyCode': 84, 'code': 'KeyT', 'shiftKey': 'T', 'key': 't'},
# 'KeyU': {'keyCode': 85, 'code': 'KeyU', 'shiftKey': 'U', 'key': 'u'},
# 'KeyV': {'keyCode': 86, 'code': 'KeyV', 'shiftKey': 'V', 'key': 'v'},
# 'KeyW': {'keyCode': 87, 'code': 'KeyW', 'shiftKey': 'W', 'key': 'w'},
# 'KeyX': {'keyCode': 88, 'code': 'KeyX', 'shiftKey': 'X', 'key': 'x'},
# 'KeyY': {'keyCode': 89, 'code': 'KeyY', 'shiftKey': 'Y', 'key': 'y'},
# 'KeyZ': {'keyCode': 90, 'code': 'KeyZ', 'shiftKey': 'Z', 'key': 'z'},
# 'MetaLeft': {'keyCode': 91, 'code': 'MetaLeft', 'key': 'Meta'},
# 'MetaRight': {'keyCode': 92, 'code': 'MetaRight', 'key': 'Meta'},
# 'ContextMenu': {'keyCode': 93, 'code': 'ContextMenu', 'key': 'ContextMenu'},
# 'F13': {'keyCode': 124, 'code': 'F13', 'key': 'F13'},
# 'F14': {'keyCode': 125, 'code': 'F14', 'key': 'F14'},
# 'F15': {'keyCode': 126, 'code': 'F15', 'key': 'F15'},
# 'F16': {'keyCode': 127, 'code': 'F16', 'key': 'F16'},
# 'F17': {'keyCode': 128, 'code': 'F17', 'key': 'F17'},
# 'F18': {'keyCode': 129, 'code': 'F18', 'key': 'F18'},
# 'F19': {'keyCode': 130, 'code': 'F19', 'key': 'F19'},
# 'F20': {'keyCode': 131, 'code': 'F20', 'key': 'F20'},
# 'F21': {'keyCode': 132, 'code': 'F21', 'key': 'F21'},
# 'F22': {'keyCode': 133, 'code': 'F22', 'key': 'F22'},
# 'F23': {'keyCode': 134, 'code': 'F23', 'key': 'F23'},
# 'F24': {'keyCode': 135, 'code': 'F24', 'key': 'F24'},
# 'NumLock': {'keyCode': 144, 'code': 'NumLock', 'key': 'NumLock'},
# 'ScrollLock': {'keyCode': 145, 'code': 'ScrollLock', 'key': 'ScrollLock'},
# 'AudioVolumeMute': {'keyCode': 173, 'code': 'AudioVolumeMute', 'key': 'AudioVolumeMute'},
# 'AudioVolumeDown': {'keyCode': 174, 'code': 'AudioVolumeDown', 'key': 'AudioVolumeDown'},
# 'AudioVolumeUp': {'keyCode': 175, 'code': 'AudioVolumeUp', 'key': 'AudioVolumeUp'},
# 'MediaTrackNext': {'keyCode': 176, 'code': 'MediaTrackNext', 'key': 'MediaTrackNext'},
# 'MediaTrackPrevious': {'keyCode': 177, 'code': 'MediaTrackPrevious', 'key': 'MediaTrackPrevious'},
# 'MediaStop': {'keyCode': 178, 'code': 'MediaStop', 'key': 'MediaStop'},
# 'MediaPlayPause': {'keyCode': 179, 'code': 'MediaPlayPause', 'key': 'MediaPlayPause'},
# 'Equal': {'keyCode': 187, 'code': 'Equal', 'shiftKey': '+', 'key': '='},
# 'Comma': {'keyCode': 188, 'code': 'Comma', 'shiftKey': '<', 'key': ','},
# 'Minus': {'keyCode': 189, 'code': 'Minus', 'shiftKey': '_', 'key': '-'},
# 'Period': {'keyCode': 190, 'code': 'Period', 'shiftKey': '>', 'key': '.'},
# 'Slash': {'keyCode': 191, 'code': 'Slash', 'shiftKey': '?', 'key': '/'},
# 'Backquote': {'keyCode': 192, 'code': 'Backquote', 'shiftKey': '~', 'key': '`'},
# 'BracketLeft': {'keyCode': 219, 'code': 'BracketLeft', 'shiftKey': '{', 'key': '['},
# 'Backslash': {'keyCode': 220, 'code': 'Backslash', 'shiftKey': '|', 'key': '\\'},
# 'BracketRight': {'keyCode': 221, 'code': 'BracketRight', 'shiftKey': '}', 'key': ']'},
# 'Quote': {'keyCode': 222, 'code': 'Quote', 'shiftKey': '"', 'key': '\''},
# 'AltGraph': {'keyCode': 225, 'code': 'AltGraph', 'key': 'AltGraph'},
# 'Props': {'keyCode': 247, 'code': 'Props', 'key': 'CrSel'},
# 'Cancel': {'keyCode': 3, 'key': 'Cancel', 'code': 'Abort'},
# 'Clear': {'keyCode': 12, 'key': 'Clear', 'code': 'Numpad5', 'location': 3},
# 'Shift': {'keyCode': 16, 'key': 'Shift', 'code': 'ShiftLeft'},
# 'Control': {'keyCode': 17, 'key': 'Control', 'code': 'ControlLeft'},
# 'Alt': {'keyCode': 18, 'key': 'Alt', 'code': 'AltLeft'},
# 'Accept': {'keyCode': 30, 'key': 'Accept'},
# 'ModeChange': {'keyCode': 31, 'key': 'ModeChange'},
# 'Print': {'keyCode': 42, 'key': 'Print'},
# 'Execute': {'keyCode': 43, 'key': 'Execute', 'code': 'Open'},
# 'Attn': {'keyCode': 246, 'key': 'Attn'},
# 'CrSel': {'keyCode': 247, 'key': 'CrSel', 'code': 'Props'},
# 'ExSel': {'keyCode': 248, 'key': 'ExSel'},
# 'EraseEof': {'keyCode': 249, 'key': 'EraseEof'},
# 'Play': {'keyCode': 250, 'key': 'Play'},
# 'ZoomOut': {'keyCode': 251, 'key': 'ZoomOut'},
# 'Power': {'key': 'Power', 'code': 'Power'},
# 'Eject': {'key': 'Eject', 'code': 'Eject'},
}
modifierBit = {'\ue00a': 1,
'\ue009': 2,
'\ue03d': 4,
'\ue008': 8}
modifierBit = {'\ue00a': 1, '\ue009': 2, '\ue03d': 4, '\ue008': 8}
sys = system().lower()
def keys_to_typing(value):
"""把要输入的内容连成字符串,去掉其中 ctrl 等键。
返回的modifier表示是否有按下组合键"""
typing = []
modifier = 0
for val in value:
@ -368,79 +357,65 @@ def keys_to_typing(value):
return modifier, ''.join(typing)
def keyDescriptionForString(_modifiers, keyString): # noqa: C901
shift = _modifiers & 8
description = {'key': '',
'keyCode': 0,
'code': '',
'text': '',
'location': 0}
def make_input_data(modifiers, key, key_up=False):
data = keyDefinitions.get(key)
if not data:
return None
definition = keyDefinitions.get(keyString) # type: ignore
if not definition:
raise ValueError(f'未知按键:{keyString}')
result = {'modifiers': modifiers, 'autoRepeat': False, '_ignore': AlertExistsError}
shift = modifiers & 8
if 'key' in definition:
description['key'] = definition['key']
if shift and definition.get('shiftKey'):
description['key'] = definition['shiftKey']
if shift and data.get('shiftKey'):
result['key'] = data['shiftKey']
result['text'] = data['shiftKey']
elif 'key' in data:
result['key'] = data['key']
if 'keyCode' in definition:
description['keyCode'] = definition['keyCode']
if shift and definition.get('shiftKeyCode'):
description['keyCode'] = definition['shiftKeyCode']
if len(result.get('key', '')) == 1: # type: ignore
result['text'] = data['key']
if 'code' in definition:
description['code'] = definition['code']
sys_text = 'windowsVirtualKeyCode' if sys == 'windows' else 'nativeVirtualKeyCode'
if shift and data.get('shiftKeyCode'):
result[sys_text] = data['shiftKeyCode']
elif 'keyCode' in data:
result[sys_text] = data['keyCode']
if 'location' in definition:
description['location'] = definition['location']
if 'code' in data:
result['code'] = data['code']
if len(description['key']) == 1: # type: ignore
description['text'] = description['key']
if 'location' in data:
result['location'] = data['location']
result['isKeypad'] = data['location'] == 3
else:
result['location'] = 0
result['isKeypad'] = False
if 'text' in definition:
description['text'] = definition['text']
if shift and definition.get('shiftText'):
description['text'] = definition['shiftText']
if shift and data.get('shiftText'):
result['text'] = data['shiftText']
result['unmodifiedText'] = data['shiftText']
elif 'text' in data:
result['text'] = data['text']
result['unmodifiedText'] = data['text']
if _modifiers & ~8:
description['text'] = ''
if modifiers & ~8:
result['text'] = ''
return description
result['type'] = 'keyUp' if key_up else ('keyDown' if result.get('text') else 'rawKeyDown')
return result
def send_key(page, modifier, key):
"""发送一个字,在键盘中的字符触发按键,其它直接发送文本"""
if key in keyDefinitions:
description = keyDescriptionForString(modifier, key)
text = description['text']
data = {'type': 'keyDown' if text else 'rawKeyDown',
'modifiers': modifier,
'windowsVirtualKeyCode': description['keyCode'],
'code': description['code'],
'key': description['key'],
'text': text,
'autoRepeat': False,
'unmodifiedText': text,
'location': description['location'],
'isKeypad': description['location'] == 3,
'_ignore': AlertExistsError}
page.run_cdp('Input.dispatchKeyEvent', **data)
data = make_input_data(modifier, key)
if data:
page._run_cdp('Input.dispatchKeyEvent', **data)
data['type'] = 'keyUp'
page.run_cdp('Input.dispatchKeyEvent', **data)
page._run_cdp('Input.dispatchKeyEvent', **data)
else:
page.run_cdp('Input.insertText', text=key, _ignore=AlertExistsError)
page._run_cdp('Input.insertText', text=key, _ignore=AlertExistsError)
def input_text_or_keys(page, text_or_keys):
"""输入文本也可输入组合键组合键用tuple形式输入
:param page: ChromiumBase对象
:param text_or_keys: 文本值或按键组合
:return: self
"""
if not isinstance(text_or_keys, (tuple, list)):
text_or_keys = (str(text_or_keys),)
modifier, text_or_keys = keys_to_typing(text_or_keys)
@ -451,7 +426,7 @@ def input_text_or_keys(page, text_or_keys):
return
if text_or_keys.endswith(('\n', '\ue007')):
page.run_cdp('Input.insertText', text=text_or_keys[:-1], _ignore=AlertExistsError)
page._run_cdp('Input.insertText', text=text_or_keys[:-1], _ignore=AlertExistsError)
send_key(page, modifier, '\n')
else:
page.run_cdp('Input.insertText', text=text_or_keys, _ignore=AlertExistsError)
page._run_cdp('Input.insertText', text=text_or_keys, _ignore=AlertExistsError)

View File

@ -2,10 +2,9 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Tuple, Dict, Union, Any
from typing import Tuple, Union, Any
from .._pages.chromium_base import ChromiumBase
@ -23,18 +22,14 @@ class Keys:
CANCEL: str
HELP: str
BACKSPACE: str
BACK_SPACE: str
TAB: str
CLEAR: str
RETURN: str
ENTER: str
SHIFT: str
LEFT_SHIFT: str
CONTROL: str
CTRL: str
LEFT_CONTROL: str
ALT: str
LEFT_ALT: str
PAUSE: str
ESCAPE: str
SPACE: str
@ -43,13 +38,9 @@ class Keys:
END: str
HOME: str
LEFT: str
ARROW_LEFT: str
UP: str
ARROW_UP: str
RIGHT: str
ARROW_RIGHT: str
DOWN: str
ARROW_DOWN: str
INSERT: str
DELETE: str
DEL: str
@ -93,13 +84,38 @@ keyDefinitions: dict = ...
modifierBit: dict = ...
def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]: ...
def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]:
"""把要输入的内容连成字符串,去掉其中 ctrl 等键。
返回的modifier表示是否有按下组合键"""
...
def keyDescriptionForString(_modifiers: int, keyString: str) -> Dict: ...
def make_input_data(modifiers: int,
key: str,
key_up: bool = False) -> dict:
"""
:param modifiers: 功能键设置
:param key: 按键字符
:param key_up: 是否提起
:return: None
"""
...
def send_key(page: ChromiumBase, modifier: int, key: str) -> None: ...
def send_key(page: ChromiumBase, modifier: int, key: str) -> None:
"""发送一个字,在键盘中的字符触发按键,其它直接发送文本
:param page: 动作所在页面
:param modifier: 功能键信息
:param key: 要是输入的按键
:return: None
"""
...
def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None: ...
def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None:
"""输入文本也可输入组合键组合键用tuple形式输入
:param page: ChromiumBase对象
:param text_or_keys: 文本值或按键组合
:return: self
"""
...

View File

@ -2,18 +2,13 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from re import split
from .by import By
def locator_to_tuple(loc):
"""解析定位字符串生成dict格式数据
:param loc: 待处理的字符串
:return: 格式 {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]}
"""
loc = _preprocess(loc)
# 多属性查找
@ -83,19 +78,18 @@ def _get_arg(text) -> list:
return [name, None, None] if len(r) != 3 else [name, r[1], r[2]]
def is_loc(text):
"""返回text是否定位符"""
def is_str_loc(text):
return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=',
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def is_selenium_loc(loc):
return (isinstance(loc, tuple) and len(loc) == 2 and isinstance(loc[1], str)
and loc[0] in ('id', 'xpath', 'link text', 'partial link text', 'name', 'tag name', 'class name',
'css selector'))
def get_loc(loc, translate_css=False, css_mode=False):
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath用于相对定位
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组
"""
if isinstance(loc, tuple):
loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
@ -118,10 +112,6 @@ def get_loc(loc, translate_css=False, css_mode=False):
def str_to_xpath_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
loc_by = 'xpath'
loc = _preprocess(loc)
@ -145,14 +135,14 @@ def str_to_xpath_loc(loc):
# 根据文本查找
elif loc.startswith('text='):
loc_str = f'//*[text()={_make_search_str(loc[5:])}]'
loc_str = f'//*[text()={_quotes_escape(loc[5:])}]'
elif loc.startswith('text:') and loc != 'text:':
loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..'
loc_str = f'//*/text()[contains(., {_quotes_escape(loc[5:])})]/..'
elif loc.startswith('text^') and loc != 'text^':
loc_str = f'//*/text()[starts-with(., {_make_search_str(loc[5:])})]/..'
loc_str = f'//*/text()[starts-with(., {_quotes_escape(loc[5:])})]/..'
elif loc.startswith('text$') and loc != 'text$':
loc_str = f'//*/text()[substring(., string-length(.) - string-length({_make_search_str(loc[5:])}) +1) = ' \
f'{_make_search_str(loc[5:])}]/..'
loc_str = (f'//*/text()[substring(., string-length(.) - string-length({_quotes_escape(loc[5:])}) +1) = '
f'{_quotes_escape(loc[5:])}]/..')
# 用xpath查找
elif loc.startswith(('xpath:', 'xpath=')) and loc not in ('xpath:', 'xpath='):
@ -165,7 +155,7 @@ def str_to_xpath_loc(loc):
# 根据文本模糊查找
elif loc:
loc_str = f'//*/text()[contains(., {_make_search_str(loc)})]/..'
loc_str = f'//*/text()[contains(., {_quotes_escape(loc)})]/..'
else:
loc_str = '//*'
@ -173,10 +163,6 @@ def str_to_xpath_loc(loc):
def str_to_css_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
loc_by = 'css selector'
loc = _preprocess(loc)
@ -238,31 +224,31 @@ def _make_single_xpath_str(tag: str, text: str) -> tuple:
else:
symbol = r[1]
if symbol == '=': # 精确查找
arg = '.' if r[0] in ('@text()', '@tx()') else r[0]
arg_str = f'{arg}={_make_search_str(r[2])}'
arg = 'text()' if r[0] in ('@text()', '@tx()') else r[0]
arg_str = f'{arg}={_quotes_escape(r[2])}'
elif symbol == '^': # 匹配开头
if r[0] in ('@text()', '@tx()'):
txt_str = f'/text()[starts-with(., {_make_search_str(r[2])})]/..'
txt_str = f'/text()[starts-with(., {_quotes_escape(r[2])})]/..'
arg_str = ''
else:
arg_str = f"starts-with({r[0]},{_make_search_str(r[2])})"
arg_str = f"starts-with({r[0]},{_quotes_escape(r[2])})"
elif symbol == '$': # 匹配结尾
if r[0] in ('@text()', '@tx()'):
txt_str = (f'/text()[substring(., string-length(.) - string-length({_make_search_str(r[2])}) '
f'+1) = {_make_search_str(r[2])}]/..')
txt_str = (f'/text()[substring(., string-length(.) - string-length('
f'{_quotes_escape(r[2])}) +1) = {_quotes_escape(r[2])}]/..')
arg_str = ''
else:
arg_str = (f'substring({r[0]}, string-length({r[0]}) - string-length({_make_search_str(r[2])}) '
f'+1) = {_make_search_str(r[2])}')
arg_str = (f'substring({r[0]}, string-length({r[0]}) - string-length('
f'{_quotes_escape(r[2])}) +1) = {_quotes_escape(r[2])}')
elif symbol == ':': # 模糊查找
if r[0] in ('@text()', '@tx()'):
txt_str = f'/text()[contains(., {_make_search_str(r[2])})]/..'
txt_str = f'/text()[contains(., {_quotes_escape(r[2])})]/..'
arg_str = ''
else:
arg_str = f"contains({r[0]},{_make_search_str(r[2])})"
arg_str = f"contains({r[0]},{_quotes_escape(r[2])})"
else:
raise ValueError(f'符号不正确:{symbol}')
@ -326,17 +312,17 @@ def _make_multi_xpath_str(tag: str, text: str) -> tuple:
txt = r[2]
if symbol == '=':
arg_str = f'{arg}={_make_search_str(txt)}'
arg_str = f'{arg}={_quotes_escape(txt)}'
elif symbol == ':':
arg_str = f'contains({arg},{_make_search_str(txt)})'
arg_str = f'contains({arg},{_quotes_escape(txt)})'
elif symbol == '^':
arg_str = f'starts-with({arg},{_make_search_str(txt)})'
arg_str = f'starts-with({arg},{_quotes_escape(txt)})'
elif symbol == '$':
arg_str = f'substring({arg}, string-length({arg}) - string-length({_make_search_str(txt)}) +1) ' \
f'= {_make_search_str(txt)}'
arg_str = (f'substring({arg}, string-length({arg}) - string-length('
f'{_quotes_escape(txt)}) +1) = {_quotes_escape(txt)}')
else:
raise ValueError(f'符号不正确:{symbol}')
@ -355,11 +341,14 @@ def _make_multi_xpath_str(tag: str, text: str) -> tuple:
return 'xpath', f'//*[{arg_str}]' if arg_str else f'//*'
def _make_search_str(search_str: str) -> str:
""""转义,不知何故不能直接用 \ 来转义
def _quotes_escape(search_str: str) -> str:
""""转义,不知何故不能直接用 斜杠 来转义
:param search_str: 查询字符串
:return: "转义后的字符串
"""
if '"' not in search_str:
return f'"{search_str}"'
parts = search_str.split('"')
parts_num = len(parts)
search_str = 'concat('
@ -444,10 +433,6 @@ def _make_single_css_str(tag: str, text: str) -> tuple:
def translate_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')
@ -480,16 +465,12 @@ def translate_loc(loc):
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
raise ValueError(f'无法识别的定位符:{loc}')
return loc_by, loc_str
def translate_css_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')

View File

@ -2,31 +2,76 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union
def locator_to_tuple(loc: str) -> dict: ...
def locator_to_tuple(loc: str) -> dict:
"""解析定位字符串生成dict格式数据
:param loc: 待处理的字符串
:return: 格式 {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]}
"""
...
def is_loc(text: str) -> bool: ...
def is_str_loc(text: str) -> bool:
"""返回text是否定位符"""
...
def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool = False) -> tuple: ...
def is_selenium_loc(loc: tuple) -> bool:
"""返回tuple是否selenium的定位符"""
...
def str_to_xpath_loc(loc: str) -> tuple: ...
def get_loc(loc: Union[tuple, str],
translate_css: bool = False,
css_mode: bool = False) -> tuple:
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath用于相对定位
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组
"""
...
def str_to_css_loc(loc: str) -> tuple: ...
def str_to_xpath_loc(loc: str) -> tuple:
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
...
def translate_loc(loc: tuple) -> tuple: ...
def str_to_css_loc(loc: str) -> tuple:
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
...
def translate_css_loc(loc: tuple) -> tuple: ...
def translate_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
...
def css_trans(txt: str) -> str: ...
def translate_css_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
...
def css_trans(txt: str) -> str:
"""css字符串中特殊字符转义
:param txt: 要处理的文本
:return: 处理后的文本
"""
...

View File

@ -2,9 +2,9 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
class Settings(object):
@ -13,4 +13,14 @@ class Settings(object):
raise_when_wait_failed = False
singleton_tab_obj = True
cdp_timeout = 30
browser_connect_timeout = 30
auto_handle_alert = None
_suffixes_list = str(Path(__file__).parent.absolute() / 'suffixes.dat').replace('\\', '/')
@property
def suffixes_list_path(self):
return Settings._suffixes_list
@suffixes_list_path.setter
def suffixes_list_path(self, path):
Settings._suffixes_list = str(Path(path).absolute()).replace('\\', '/')

View File

@ -0,0 +1,24 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Optional, Union
class Settings(object):
raise_when_ele_not_found: bool = ...
raise_when_click_failed: bool = ...
raise_when_wait_failed: bool = ...
singleton_tab_obj: bool = ...
cdp_timeout: float = ...
browser_connect_timeout: float = ...
auto_handle_alert: Optional[bool] = ...
_suffixes_list: str = ...
@property
def suffixes_list_path(self) -> str:
"""设置用于识别域名后缀的文件路径"""
...

File diff suppressed because it is too large Load Diff

View File

@ -2,13 +2,12 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from platform import system
from shutil import rmtree
from tempfile import gettempdir, TemporaryDirectory
from tempfile import gettempdir
from threading import Lock
from time import perf_counter, sleep
@ -18,52 +17,48 @@ from ..errors import (ContextLostError, ElementLostError, CDPError, PageDisconne
class PortFinder(object):
used_port = {}
used_port = set()
prev_time = 0
lock = Lock()
checked_paths = set()
def __init__(self, path=None):
"""
:param path: 临时文件保存路径为None时使用系统临时文件夹
"""
tmp = Path(path) if path else Path(gettempdir()) / 'DrissionPage'
self.tmp_dir = tmp / 'UserTempFolder'
self.tmp_dir = tmp / 'autoPortData'
self.tmp_dir.mkdir(parents=True, exist_ok=True)
if not PortFinder.used_port:
clean_folder(self.tmp_dir)
if str(self.tmp_dir.absolute()) not in PortFinder.checked_paths:
for i in self.tmp_dir.iterdir():
if i.is_dir() and not port_is_using('127.0.0.1', i.name):
rmtree(i, ignore_errors=True)
PortFinder.checked_paths.add(str(self.tmp_dir.absolute()))
def get_port(self, scope=None):
"""查找一个可用端口
:param scope: 指定端口范围不含最后的数字为None则使用[9600-19600)
:return: 可以使用的端口和用户文件夹路径组成的元组
"""
from random import randint
with PortFinder.lock:
if PortFinder.prev_time and perf_counter() - PortFinder.prev_time > 60:
PortFinder.used_port.clear()
if scope in (True, None):
scope = (9600, 19600)
for i in range(scope[0], scope[1]):
if i in PortFinder.used_port:
scope = (9600, 59600)
max_times = scope[1] - scope[0]
times = 0
while times < max_times:
times += 1
port = randint(*scope)
if port in PortFinder.used_port or port_is_using('127.0.0.1', port):
continue
elif port_is_using('127.0.0.1', i):
PortFinder.used_port[i] = None
continue
path = TemporaryDirectory(dir=self.tmp_dir).name
PortFinder.used_port[i] = path
return i, path
for i in range(scope[0], scope[1]):
if port_is_using('127.0.0.1', i):
continue
rmtree(PortFinder.used_port[i], ignore_errors=True)
return i, TemporaryDirectory(dir=self.tmp_dir).name
raise OSError('未找到可用端口。')
path = self.tmp_dir / str(port)
if path.exists():
try:
rmtree(path)
except:
continue
PortFinder.used_port.add(port)
PortFinder.prev_time = perf_counter()
return port, str(path)
raise OSError('未找到可用端口。')
def port_is_using(ip, port):
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
from socket import socket, AF_INET, SOCK_STREAM
s = socket(AF_INET, SOCK_STREAM)
s.settimeout(.1)
@ -73,11 +68,6 @@ def port_is_using(ip, port):
def clean_folder(folder_path, ignore=None):
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
ignore = [] if not ignore else ignore
p = Path(folder_path)
@ -89,13 +79,8 @@ def clean_folder(folder_path, ignore=None):
rmtree(f, True)
def show_or_hide_browser(page, hide=True):
"""执行显示或隐藏浏览器窗口
:param page: ChromePage对象
:param hide: 是否隐藏
:return: None
"""
if not page.address.startswith(('127.0.0.1', 'localhost')):
def show_or_hide_browser(tab, hide=True):
if not tab.browser.address.startswith(('127.0.0.1', 'localhost')):
return
if system().lower() != 'windows':
@ -107,21 +92,16 @@ def show_or_hide_browser(page, hide=True):
except ImportError:
raise ImportError('请先安装pip install pypiwin32')
pid = page._page.process_id
pid = tab.browser.process_id
if not pid:
return None
hds = get_hwnds_from_pid(pid, page.title)
hds = get_hwnds_from_pid(pid, tab.title)
sw = SW_HIDE if hide else SW_SHOW
for hd in hds:
ShowWindow(hd, sw)
def get_browser_progress_id(progress, address):
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
if progress:
return progress.pid
@ -140,11 +120,6 @@ def get_browser_progress_id(progress, address):
def get_hwnds_from_pid(pid, title):
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
try:
from win32gui import IsWindow, GetWindowText, EnumWindows
from win32process import GetWindowThreadProcessId
@ -164,12 +139,6 @@ def get_hwnds_from_pid(pid, title):
def wait_until(function, kwargs=None, timeout=10):
"""等待传入的方法返回值不为假
:param function: 要执行的方法
:param kwargs: 方法参数
:param timeout: 超时时间
:return: 执行结果超时抛出TimeoutError
"""
if kwargs is None:
kwargs = {}
end_time = perf_counter() + timeout
@ -182,21 +151,12 @@ def wait_until(function, kwargs=None, timeout=10):
def configs_to_here(save_name=None):
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
om = OptionsManager('default')
save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini'
om.save(save_name)
def raise_error(result, ignore=None):
"""抛出error对应报错
:param result: 包含error的dict
:param ignore: 要忽略的错误
:return: None
"""
def raise_error(result, browser, ignore=None, user=False):
error = result['error']
if error in ('Cannot find context with specified id', 'Inspected target navigated or closed',
'No frame with given id found'):
@ -217,16 +177,23 @@ def raise_error(result, ignore=None):
r = StorageError()
elif error == 'Sanitizing cookie failed':
r = CookieFormatError(f'cookie格式不正确{result["args"]}')
elif error == 'Invalid header name':
r = ValueError(f'header名不正确。\n参数:{result["args"]["headers"]}')
elif error == 'Given expression does not evaluate to a function':
r = JavaScriptError(f'传入的js无法解析成函数\n{result["args"]["functionDeclaration"]}')
elif error.endswith("' wasn't found"):
r = RuntimeError(f'你的浏览器可能太旧。\n方法:{result["method"]}\n参数:{result["args"]}')
elif result['type'] in ('call_method_error', 'timeout'):
r = RuntimeError(f'没有找到对应功能,方法错误或你的浏览器太旧。\n浏览器版本:{browser.version}\n方法:{result["method"]}')
elif result['type'] == 'timeout':
from DrissionPage import __version__
txt = f'\n错误:{result["error"]}\n方法:{result["method"]}\n参数:{result["args"]}\n' \
f'版本:{__version__}\n超时,可能是浏览器卡了。'
r = TimeoutError(txt)
elif result['type'] == 'call_method_error' and not user:
from DrissionPage import __version__
txt = f'\n错误:{result["error"]}\n方法:{result["method"]}\n参数:{result["args"]}\n' \
f'版本:{__version__}\n出现这个错误可能意味着程序有bug请把错误信息和重现方法' \
'告知作者,谢谢。\n报告网站https://gitee.com/g1879/DrissionPage/issues'
r = TimeoutError(txt) if result['type'] == 'timeout' else CDPError(txt)
r = CDPError(txt)
else:
r = RuntimeError(result)

View File

@ -2,47 +2,108 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from os import popen
from pathlib import Path
from threading import Lock
from typing import Union, Tuple
from .._base.chromium import Chromium
from .._pages.chromium_base import ChromiumBase
class PortFinder(object):
used_port: dict = ...
used_port: set = ...
prev_time: float = ...
lock: Lock = ...
tmp_dir: Path = ...
checked_paths: set = ...
def __init__(self, path: Union[str, Path] = None): ...
def __init__(self, path: Union[str, Path] = None):
"""
:param path: 临时文件保存路径为None时使用系统临时文件夹
"""
...
@staticmethod
def get_port(scope: Tuple[int, int] = None) -> Tuple[int, str]: ...
def get_port(scope: Tuple[int, int] = None) -> Tuple[int, str]:
"""查找一个可用端口
:param scope: 指定端口范围不含最后的数字为None则使用[9600-59600)
:return: 可以使用的端口和用户文件夹路径组成的元组
"""
...
def port_is_using(ip: str, port: Union[str, int]) -> bool: ...
def port_is_using(ip: str, port: Union[str, int]) -> bool:
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
...
def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ...
def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None:
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
...
def show_or_hide_browser(page: ChromiumBase, hide: bool = True) -> None: ...
def show_or_hide_browser(tab: ChromiumBase, hide: bool = True) -> None:
"""执行显示或隐藏浏览器窗口
:param tab: ChromiumTab对象
:param hide: 是否隐藏
:return: None
"""
...
def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ...
def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]:
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
...
def get_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ...
def get_hwnds_from_pid(pid: Union[str, int], title: str) -> list:
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
...
def wait_until(function: callable, kwargs: dict = None, timeout: float = 10): ...
def wait_until(function: callable, kwargs: dict = None, timeout: float = 10):
"""等待传入的方法返回值不为假
:param function: 要执行的方法
:param kwargs: 方法参数
:param timeout: 超时时间
:return: 执行结果超时抛出TimeoutError
"""
...
def configs_to_here(file_name: Union[Path, str] = None) -> None: ...
def configs_to_here(save_name: Union[Path, str] = None) -> None:
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
...
def raise_error(result: dict, ignore=None) -> None: ...
def raise_error(result: dict, browser: Chromium, ignore=None, user: bool = False) -> None:
"""抛出error对应报错
:param result: 包含error的dict
:param browser: 浏览器对象
:param ignore: 要忽略的错误
:param user: 是否用户调用的
:return: None
"""
...

View File

@ -2,26 +2,19 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from datetime import datetime
from html import unescape
from http.cookiejar import Cookie, CookieJar
from os.path import sep
from pathlib import Path
from re import sub, match
from re import sub
from urllib.parse import urlparse, urljoin, urlunparse
from DataRecorder.tools import make_valid_name
from tldextract import extract
from requests.structures import CaseInsensitiveDict
def get_ele_txt(e):
"""获取元素内所有文本
:param e: 元素对象
:return: 元素内所有文本
"""
# 前面无须换行的元素
nowrap_list = ('br', 'sub', 'sup', 'em', 'strong', 'a', 'font', 'b', 'span', 's', 'i', 'del', 'ins', 'img', 'td',
'th', 'abbr', 'bdi', 'bdo', 'cite', 'code', 'data', 'dfn', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby',
@ -37,7 +30,7 @@ def get_ele_txt(e):
if e.tag in noText_list:
return e.raw_text
def get_node_txt(ele, pre: bool = False):
def get_node_txt(ele, pre=False) -> list:
tag = ele.tag
if tag == 'br':
return [True]
@ -59,7 +52,7 @@ def get_ele_txt(e):
if sub('[ \n\t\r]', '', el) != '': # 字符除了回车和空格还有其它内容
txt = el
if not pre:
txt = txt.replace('\r\n', ' ').replace('\n', ' ').strip(' ')
txt = txt.replace('\r\n', ' ').replace('\n', ' ')
txt = sub(r' {2,}', ' ', txt)
str_list.append(txt)
@ -80,25 +73,38 @@ def get_ele_txt(e):
re_str = get_node_txt(e)
if re_str and re_str[-1] == '\n':
re_str.pop()
re_str = ''.join([i if i is not True else '\n' for i in re_str])
return format_html(re_str)
l = len(re_str)
if l > 1:
r = []
for i in range(l - 1):
i1 = re_str[i]
i2 = re_str[i + 1]
if i1 is True:
r.append('\n')
continue
elif i2 is True:
r.append(i1)
continue
elif i1.endswith(' ') and i2.startswith(' '):
i1 = i1[:-1]
r.append(i1)
r.append('\n' if re_str[-1] is True else re_str[-1])
re_str = ''.join(r)
elif not l:
re_str = ''
else:
re_str = re_str[0] if re_str[0] is not True else '\n'
return format_html(re_str.strip())
def format_html(text):
"""处理html编码字符
:param text: html文本
:return: 格式化后的html文本
"""
return unescape(text).replace('\xa0', ' ') if text else text
def location_in_viewport(page, loc_x, loc_y):
"""判断给定的坐标是否在视口中 |n
:param page: ChromePage对象
:param loc_x: 页面绝对坐标x
:param loc_y: 页面绝对坐标y
:return: bool
"""
js = f'''function(){{let x = {loc_x}; let y = {loc_y};
const scrollLeft = document.documentElement.scrollLeft;
const scrollTop = document.documentElement.scrollTop;
@ -106,24 +112,17 @@ def location_in_viewport(page, loc_x, loc_y):
const vHeight = document.documentElement.clientHeight;
if (x< scrollLeft || y < scrollTop || x > vWidth + scrollLeft || y > vHeight + scrollTop){{return false;}}
return true;}}'''
return page.run_js(js)
return page._run_js(js)
def offset_scroll(ele, offset_x, offset_y):
"""接收元素及偏移坐标,把坐标滚动到页面中间,返回该点在视口中的坐标
有偏移量时以元素左上角坐标为基准没有时以click_point为基准
:param ele: 元素对象
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:return: 视口中的坐标
"""
loc_x, loc_y = ele.rect.location
cp_x, cp_y = ele.rect.click_point
lx = loc_x + offset_x if offset_x else cp_x
ly = loc_y + offset_y if offset_y else cp_y
if not location_in_viewport(ele.owner, lx, ly):
clientWidth = ele.owner.run_js('return document.body.clientWidth;')
clientHeight = ele.owner.run_js('return document.body.clientHeight;')
clientWidth = ele.owner._run_js('return document.body.clientWidth;')
clientHeight = ele.owner._run_js('return document.body.clientHeight;')
ele.owner.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2)
cl_x, cl_y = ele.rect.viewport_location
ccp_x, ccp_y = ele.rect.viewport_click_point
@ -133,19 +132,13 @@ def offset_scroll(ele, offset_x, offset_y):
def make_absolute_link(link, baseURI=None):
"""获取绝对url
:param link: 超链接
:param baseURI: 页面或iframe的url
:return: 绝对链接
"""
if not link:
return link
link = link.strip().replace('\\', '/')
parsed = urlparse(link)._asdict()
if baseURI:
p = urlparse(baseURI)._asdict()
baseURI = f'{p["scheme"]}://{p["netloc"]}'
baseURI = baseURI.rstrip('/\\')
# 是相对路径与页面url拼接并返回
if not parsed['netloc']:
@ -162,7 +155,6 @@ def make_absolute_link(link, baseURI=None):
def is_js_func(func):
"""检查文本是否js函数"""
func = func.strip()
if (func.startswith('function') or func.startswith('async ')) and func.endswith('}'):
return True
@ -171,195 +163,7 @@ def is_js_func(func):
return False
def cookie_to_dict(cookie):
"""把Cookie对象转为dict格式
:param cookie: Cookie对象字符串或字典
:return: cookie字典
"""
if isinstance(cookie, Cookie):
cookie_dict = cookie.__dict__.copy()
cookie_dict.pop('rfc2109', None)
cookie_dict.pop('_rest', None)
return cookie_dict
elif isinstance(cookie, dict):
cookie_dict = cookie
elif isinstance(cookie, str):
cookie_dict = {}
for attr in cookie.strip().rstrip(';,').split(',' if ',' in cookie else ';'):
attr_val = attr.strip().split('=', 1)
if attr_val[0] in ('domain', 'path', 'expires', 'max-age', 'HttpOnly', 'secure', 'expiry', 'name', 'value'):
cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else ''
else:
cookie_dict['name'] = attr_val[0]
cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else ''
return cookie_dict
else:
raise TypeError('cookie参数必须为Cookie、str或dict类型。')
return cookie_dict
def cookies_to_tuple(cookies):
"""把cookies转为tuple格式
:param cookies: cookies信息可为CookieJar, list, tuple, str, dict
:return: 返回tuple形式的cookies
"""
if isinstance(cookies, (list, tuple, CookieJar)):
cookies = tuple(cookie_to_dict(cookie) for cookie in cookies)
elif isinstance(cookies, str):
c_dict = {}
cookies = cookies.rstrip('; ')
cookies = cookies.split(';')
# r = match(r'.*?=([^=]+)=', cookies)
# if not r: # 只有一个
# cookies = [cookies.rstrip(',;')]
# else:
# s = match(r'.*([,;]).*', r.group(1)).group(1)
# cookies = cookies.rstrip(s).split(s)
for attr in cookies:
attr_val = attr.strip().split('=', 1)
c_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else True
cookies = _dict_cookies_to_tuple(c_dict)
elif isinstance(cookies, dict):
cookies = _dict_cookies_to_tuple(cookies)
elif isinstance(cookies, Cookie):
cookies = (cookie_to_dict(cookies),)
else:
raise TypeError('cookies参数必须为Cookie、CookieJar、list、tuple、str或dict类型。')
return cookies
def set_session_cookies(session, cookies):
"""设置Session对象的cookies
:param session: Session对象
:param cookies: cookies信息
:return: None
"""
for cookie in cookies_to_tuple(cookies):
if cookie['value'] is None:
cookie['value'] = ''
kwargs = {x: cookie[x] for x in cookie
if x.lower() in ('version', 'port', 'domain', 'path', 'secure',
'expires', 'discard', 'comment', 'comment_url', 'rest')}
if 'expiry' in cookie:
kwargs['expires'] = cookie['expiry']
session.cookies.set(cookie['name'], cookie['value'], **kwargs)
def set_browser_cookies(page, cookies):
"""设置cookies值
:param page: 页面对象
:param cookies: cookies信息
:return: None
"""
for cookie in cookies_to_tuple(cookies):
if 'expiry' in cookie:
cookie['expires'] = int(cookie['expiry'])
cookie.pop('expiry')
if 'expires' in cookie:
if not cookie['expires']:
cookie.pop('expires')
elif isinstance(cookie['expires'], str):
if cookie['expires'].isdigit():
cookie['expires'] = int(cookie['expires'])
elif cookie['expires'].replace('.', '').isdigit():
cookie['expires'] = float(cookie['expires'])
else:
try:
cookie['expires'] = datetime.strptime(cookie['expires'],
'%a, %d %b %Y %H:%M:%S GMT').timestamp()
except ValueError:
cookie['expires'] = datetime.strptime(cookie['expires'],
'%a, %d %b %y %H:%M:%S GMT').timestamp()
if cookie['value'] is None:
cookie['value'] = ''
elif not isinstance(cookie['value'], str):
cookie['value'] = str(cookie['value'])
if cookie['name'].startswith('__Host-'):
cookie['path'] = '/'
cookie['secure'] = True
if not page.url.startswith('http'):
cookie['name'] = cookie['name'].replace('__Host-', '__Secure-', 1)
else:
cookie['url'] = page.url
page.run_cdp_loaded('Network.setCookie', **cookie)
continue # 不用设置域名,可退出
if cookie['name'].startswith('__Secure-'):
cookie['secure'] = True
if cookie.get('domain', None):
try:
page.run_cdp_loaded('Network.setCookie', **cookie)
if is_cookie_in_driver(page, cookie):
continue
except Exception:
pass
url = page._browser_url
if not url.startswith('http'):
raise RuntimeError(f'未设置域名请设置cookie的domain参数或先访问一个网站。{cookie}')
ex_url = extract(url)
d_list = ex_url.subdomain.split('.')
d_list.append(f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain)
tmp = [d_list[0]]
if len(d_list) > 1:
for i in d_list[1:]:
tmp.append('.')
tmp.append(i)
for i in range(len(tmp)):
cookie['domain'] = ''.join(tmp[i:])
page.run_cdp_loaded('Network.setCookie', **cookie)
if is_cookie_in_driver(page, cookie):
break
def is_cookie_in_driver(page, cookie):
"""查询cookie是否在浏览器内
:param page: BasePage对象
:param cookie: dict格式cookie
:return: bool
"""
if 'domain' in cookie:
for c in page.cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain',
None):
return True
else:
for c in page.cookies(all_domains=True):
if cookie['name'] == c['name'] and cookie['value'] == c['value']:
return True
return False
def get_blob(page, url, as_bytes=True):
"""获取知道blob资源
:param page: 资源所在页面对象
:param url: 资源url
:param as_bytes: 是否以字节形式返回
:return: 资源内容
"""
if not url.startswith('blob'):
raise TypeError('该链接非blob类型。')
js = """
@ -378,7 +182,7 @@ def get_blob(page, url, as_bytes=True):
}
"""
try:
result = page.run_js(js, url)
result = page._run_js(js, url)
except:
raise RuntimeError('无法获取该资源。')
if as_bytes:
@ -389,14 +193,6 @@ def get_blob(page, url, as_bytes=True):
def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param tab: Tab或Page对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
if name:
if name.endswith('.pdf'):
name = name[:-4]
@ -420,13 +216,7 @@ def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None):
def get_mhtml(page, path=None, name=None):
"""把当前页面保存为mhtml文件如果path和name参数都为None只返回mhtml文本
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:return: mhtml文本
"""
r = page.run_cdp('Page.captureSnapshot')['data']
r = page._run_cdp('Page.captureSnapshot')['data']
if path is None and name is None:
return r
@ -439,20 +229,13 @@ def get_mhtml(page, path=None, name=None):
def get_pdf(page, path=None, name=None, kwargs=None):
"""把当前页面保存为pdf文件如果path和name参数都为None只返回字节
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param kwargs: pdf生成参数
:return: pdf文本
"""
if not kwargs:
kwargs = {}
kwargs['transferMode'] = 'ReturnAsBase64'
if 'printBackground' not in kwargs:
kwargs['printBackground'] = True
try:
r = page.run_cdp('Page.printToPDF', **kwargs)['data']
r = page._run_cdp('Page.printToPDF', **kwargs)['data']
except:
raise RuntimeError('保存失败,可能浏览器版本不支持。')
from base64 import b64decode
@ -469,14 +252,6 @@ def get_pdf(page, path=None, name=None, kwargs=None):
def tree(ele_or_page, text=False, show_js=False, show_css=False):
"""把页面或元素对象DOM结构打印出来
:param ele_or_page: 页面或元素对象
:param text: 是否打印文本输入数字可指定打印文本长度上线
:param show_js: 打印文本时是否包含<script>内文本text参数为False时无效
:param show_css: 打印文本时是否包含<style>内文本text参数为False时无效
:return: None
"""
def _tree(obj, last_one=True, body=''):
list_ele = obj.children()
length = len(list_ele)
@ -524,27 +299,17 @@ def tree(ele_or_page, text=False, show_js=False, show_css=False):
def format_headers(txt):
"""从浏览器复制的文本生成dict格式headers文本用换行分隔
:param txt: 从浏览器复制的原始文本格式headers
:return: dict格式headers
"""
if not isinstance(txt, str):
if isinstance(txt, (dict, CaseInsensitiveDict)):
for k, v in txt.items():
if v not in (None, False, True):
txt[k] = str(v)
for i in (':method', ':scheme', ':authority', ':path'):
txt.pop(i, None)
return txt
headers = {}
for header in txt.split('\n'):
if header:
name, value = header.split(': ', maxsplit=1)
headers[name] = value
if name not in (':method', ':scheme', ':authority', ':path'):
headers[name] = value
return headers
def _dict_cookies_to_tuple(cookies: dict):
"""把dict形式的cookies转换为tuple形式
:param cookies: 单个或多个cookies单个时包含'name''value'
:return: 多个dict格式cookies组成的列表
"""
if 'name' in cookies and 'value' in cookies: # 单个cookie
return (cookies,)
keys = ('domain', 'path', 'expires', 'max-age', 'HttpOnly', 'secure', 'expiry')
template = {k: v for k, v in cookies.items() if k in keys}
return tuple(dict(**{'name': k, 'value': v}, **template) for k, v in cookies.items() if k not in keys)

View File

@ -2,15 +2,10 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from http.cookiejar import Cookie
from pathlib import Path
from typing import Union, Optional
from requests import Session
from requests.cookies import RequestsCookieJar
from typing import Union, Optional, Tuple
from .._base.base import DrissionElement, BaseParser
from .._elements.chromium_element import ChromiumElement
@ -19,64 +14,126 @@ from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab
def get_ele_txt(e: DrissionElement) -> str: ...
def get_ele_txt(e: DrissionElement) -> str:
"""获取元素内所有文本
:param e: 元素对象
:return: 元素内所有文本
"""
...
def format_html(text: str) -> str: ...
def format_html(text: str) -> str:
"""处理html编码字符
:param text: html文本
:return: 格式化后的html文本
"""
...
def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool: ...
def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool:
"""判断给定的坐标是否在视口中 |n
:param page: ChromePage对象
:param loc_x: 页面绝对坐标x
:param loc_y: 页面绝对坐标y
:return: bool
"""
...
def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> tuple: ...
def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> Tuple[int, int]:
"""接收元素及偏移坐标,把坐标滚动到页面中间,返回该点坐标
有偏移量时以元素左上角坐标为基准没有时以click_point为基准
:param ele: 元素对象
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:return: 相对坐标
"""
...
def make_absolute_link(link: str, baseURI: str = None) -> str: ...
def make_absolute_link(link: str, baseURI: str = None) -> str:
"""获取绝对url
:param link: 超链接
:param baseURI: 页面或iframe的url
:return: 绝对链接
"""
...
def is_js_func(func: str) -> bool: ...
def is_js_func(func: str) -> bool:
"""检查文本是否js函数"""
...
def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ...
def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict, Cookie]) -> tuple: ...
def set_session_cookies(session: Session, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ...
def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ...
def get_blob(page: ChromiumBase, url: str, as_bytes: bool = True) -> bytes: ...
def get_blob(page: ChromiumBase, url: str, as_bytes: bool = True) -> bytes:
"""获取知道blob资源
:param page: 资源所在页面对象
:param url: 资源url
:param as_bytes: 是否以字节形式返回
:return: 资源内容
"""
...
def save_page(tab: Union[ChromiumPage, ChromiumTab],
path: Union[Path, str, None] = None,
name: Optional[str] = None,
as_pdf: bool = False,
kwargs: dict = None) -> Union[bytes, str]: ...
kwargs: dict = None) -> Union[bytes, str]:
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param tab: Tab或Page对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
...
def get_mhtml(page: Union[ChromiumPage, ChromiumTab],
path: Optional[Path] = None,
name: Optional[str] = None) -> Union[bytes, str]: ...
name: Optional[str] = None) -> Union[bytes, str]:
"""把当前页面保存为mhtml文件如果path和name参数都为None只返回mhtml文本
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:return: mhtml文本
"""
...
def get_pdf(page: Union[ChromiumPage, ChromiumTab],
path: Optional[Path] = None,
name: Optional[str] = None,
kwargs: dict = None) -> Union[bytes, str]: ...
kwargs: dict = None) -> Union[bytes, str]:
"""把当前页面保存为pdf文件如果path和name参数都为None只返回字节
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param kwargs: pdf生成参数
:return: pdf文本
"""
...
def tree(ele_or_page: BaseParser,
text: Union[int, bool] = False,
show_js: bool = False,
show_css: bool = False) -> None: ...
show_css: bool = False) -> None:
"""把页面或元素对象DOM结构打印出来
:param ele_or_page: 页面或元素对象
:param text: 是否打印文本输入数字可指定打印文本长度上线
:param show_js: 打印文本时是否包含<script>内文本text参数为False时无效
:param show_css: 打印文本时是否包含<style>内文本text参数为False时无效
:return: None
"""
...
def format_headers(txt: str) -> dict: ...
def format_headers(txt: str) -> dict:
"""从浏览器复制的文本生成dict格式headers文本用换行分隔
:param txt: 从浏览器复制的原始文本格式headers
:return: dict格式headers
"""
...

File diff suppressed because it is too large Load Diff

View File

@ -2,22 +2,27 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union, Tuple, List, Any, Optional, Literal
from typing import Union, Tuple, Any, Optional, Literal
from requests import Session
from .chromium_page import ChromiumPage
from .chromium_tab import ChromiumTab
from .mix_tab import MixTab
from .web_page import WebPage
from .._base.base import BasePage
from .._base.browser import Browser
from .._base.chromium import Chromium
from .._base.driver import Driver
from .._elements.chromium_element import ChromiumElement
from .._elements.session_element import SessionElement
from .._functions.cookies import CookiesList
from .._functions.elements import SessionElementsList, ChromiumElementsList
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._units.actions import Actions
from .._units.console import Console
from .._units.listener import Listener
from .._units.rect import TabRect
from .._units.screencast import Screencast
@ -30,269 +35,646 @@ PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True]
class ChromiumBase(BasePage):
"""标签页、Frame、Page基类"""
_tab: Union[ChromiumTab, MixTab, ChromiumFrame, ChromiumPage, WebPage] = ...
_browser: Chromium = ...
_driver: Optional[Driver] = ...
_frame_id: str = ...
_is_reading: bool = ...
_is_timeout: bool = ...
_timeouts: Timeout = ...
_first_run: bool = ...
_is_loading: Optional[bool] = ...
_load_mode: str = ...
_scroll: Optional[Scroller] = ...
_url: str = ...
_root_id: Optional[str] = ...
_upload_list: Optional[list] = ...
_wait: Optional[BaseWaiter] = ...
_set: Optional[ChromiumBaseSetter] = ...
_screencast: Optional[Screencast] = ...
_actions: Optional[Actions] = ...
_listener: Optional[Listener] = ...
_states: Optional[PageStates] = ...
_alert: Alert = ...
_has_alert: bool = ...
_auto_handle_alert: Optional[bool] = ...
_doc_got: bool = ...
_load_end_time: float = ...
_init_jss: list = ...
_ready_state: Optional[str] = ...
_rect: Optional[TabRect] = ...
_console: Optional[Console] = ...
_disconnect_flag: bool = ...
_type: str = ...
def __init__(self,
address: Union[str, int],
tab_id: str = None,
timeout: float = None):
self._browser: Browser = ...
self._page: ChromiumPage = ...
self.tab: Union[ChromiumPage, ChromiumTab] = ...
self.address: str = ...
self._driver: Driver = ...
self._frame_id: str = ...
self._is_reading: bool = ...
self._is_timeout: bool = ...
self._timeouts: Timeout = ...
self._first_run: bool = ...
self._is_loading: bool = ...
self._load_mode: str = ...
self._scroll: Scroller = ...
self._url: str = ...
self._root_id: str = ...
self._upload_list: list = ...
self._wait: BaseWaiter = ...
self._set: ChromiumBaseSetter = ...
self._screencast: Screencast = ...
self._actions: Actions = ...
self._listener: Listener = ...
self._states: PageStates = ...
self._alert: Alert = ...
self._has_alert: bool = ...
self._doc_got: bool = ...
self._load_end_time: float = ...
self._init_jss: list = ...
self._ready_state: Optional[str] = ...
self._rect: TabRect = ...
self._type: str = ...
browser: Chromium,
tab_id: str = None):
"""
:param browser: Chromium
:param tab_id: 要控制的tab id不指定默认为激活的标签页
"""
...
def _connect_browser(self, tab_id: str = None) -> None: ...
def __call__(self,
locator: Union[Tuple[str, str], str, ChromiumElement],
index: int = 1,
timeout: float = None) -> ChromiumElement:
"""在内部查找元素
ele = page('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: ChromiumElement对象
"""
...
def _driver_init(self, tab_id: str) -> None: ...
def _d_set_runtime_settings(self) -> None: ...
def _get_document(self, timeout: float = 10) -> bool: ...
def _connect_browser(self, target_id: str = None) -> None:
"""连接浏览器,在第一次时运行
:param target_id: 要控制的target id不指定默认为激活的标签页
:return: None
"""
...
def _wait_loaded(self, timeout: float = None) -> bool: ...
def _driver_init(self, target_id: str) -> None:
"""新建页面、页面刷新后要进行的cdp参数初始化
:param target_id: 要跳转到的target id
:return: None
"""
...
def _get_document(self, timeout: float = 10) -> bool:
"""获取页面文档
:param timeout: 超时时间
:return: 是否获取成功
"""
...
def _onFrameDetached(self, **kwargs) -> None: ...
def _onFrameAttached(self, **kwargs) -> None: ...
def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时执行"""
...
def _onFrameNavigated(self, **kwargs): ...
def _onFrameNavigated(self, **kwargs):
"""页面跳转时执行"""
...
def _onDomContentEventFired(self, **kwargs): ...
def _onDomContentEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
...
def _onLoadEventFired(self, **kwargs): ...
def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
...
def _onFrameStoppedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后执行"""
...
def _onFileChooserOpened(self, **kwargs): ...
def _onFileChooserOpened(self, **kwargs):
"""文件选择框打开时执行"""
...
def _wait_to_stop(self): ...
def _d_set_start_options(self, address) -> None: ...
def _d_set_runtime_settings(self) -> None: ...
def __call__(self,
locator: Union[Tuple[str, str], str, ChromiumElement],
index: int = 1,
timeout: float = None) -> ChromiumElement: ...
def _wait_to_stop(self):
"""eager策略超时时使页面停止加载"""
...
@property
def _js_ready_state(self) -> str: ...
def wait(self) -> BaseWaiter:
"""返回用于等待的对象"""
...
@property
def browser(self) -> Browser: ...
def set(self) -> ChromiumBaseSetter:
"""返回用于设置的对象"""
...
@property
def title(self) -> str: ...
def screencast(self) -> Screencast:
"""返回用于录屏的对象"""
...
@property
def driver(self) -> Driver: ...
def actions(self) -> Actions:
"""返回用于执行动作链的对象"""
...
@property
def url(self) -> str: ...
def listen(self) -> Listener:
"""返回用于聆听数据包的对象"""
...
@property
def _browser_url(self) -> str: ...
def states(self) -> PageStates:
"""返回用于获取状态信息的对象"""
...
@property
def html(self) -> str: ...
def scroll(self) -> PageScroller:
"""返回用于滚动滚动条的对象"""
...
@property
def json(self) -> Union[dict, None]: ...
def rect(self) -> TabRect:
"""返回获取窗口坐标和大小的对象"""
...
@property
def _target_id(self) -> str: ...
def console(self) -> Console:
"""返回获取控制台信息的对象"""
...
@property
def tab_id(self) -> str: ...
def timeout(self) -> float:
"""返回timeout设置"""
...
@property
def active_ele(self) -> ChromiumElement: ...
def timeouts(self) -> Timeout:
"""返回timeouts设置"""
...
@property
def load_mode(self) -> str: ...
def browser(self) -> Chromium:
"""返回浏览器对象"""
...
@property
def user_agent(self) -> str: ...
def driver(self) -> Driver:
"""返回用于控制浏览器的Driver对象"""
...
@property
def scroll(self) -> PageScroller: ...
def title(self) -> str:
"""返回当前页面title"""
...
@property
def rect(self) -> TabRect: ...
def url(self) -> str:
"""返回当前页面url"""
...
@property
def timeouts(self) -> Timeout: ...
def _browser_url(self) -> str:
"""用于被MixTab覆盖"""
...
@property
def upload_list(self) -> list: ...
def html(self) -> str:
"""返回当前页面html文本"""
...
@property
def wait(self) -> BaseWaiter: ...
def json(self) -> Union[dict, None]:
"""当返回内容是json格式时返回对应的字典非json格式时返回None"""
...
@property
def set(self) -> ChromiumBaseSetter: ...
def tab_id(self) -> str:
"""返回当前标签页id"""
...
@property
def screencast(self) -> Screencast: ...
def _target_id(self) -> str:
"""返回当前标签页id"""
...
@property
def actions(self) -> Actions: ...
def active_ele(self) -> ChromiumElement:
"""返回当前焦点所在元素"""
...
@property
def listen(self) -> Listener: ...
def load_mode(self) -> Literal['none', 'normal', 'eager']:
"""返回页面加载策略有3种'none''normal''eager'"""
...
@property
def states(self) -> PageStates: ...
def user_agent(self) -> str:
"""返回user agent"""
...
def run_js(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any: ...
@property
def upload_list(self) -> list:
"""返回等待上传文件列表"""
...
def run_js_loaded(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any: ...
@property
def session(self) -> Session:
"""返回用于转换模式或download的Session对象"""
...
def run_async_js(self, script: Union[str, Path], *args, as_expr: bool = False) -> None: ...
@property
def _js_ready_state(self) -> str:
"""返回js获取的ready state信息"""
...
def run_cdp(self, cmd: str, **cmd_args) -> dict:
"""执行Chrome DevTools Protocol语句
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
...
def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict:
"""执行Chrome DevTools Protocol语句执行前等待页面加载完毕
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
...
def _run_cdp(self, cmd: str, **cmd_args) -> dict:
"""执行Chrome DevTools Protocol语句
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
...
def _run_cdp_loaded(self, cmd: str, **cmd_args) -> dict:
"""执行Chrome DevTools Protocol语句执行前等待页面加载完毕
:param cmd: 协议项目
:param cmd_args: 参数
:return: 执行的结果
"""
...
def run_js(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本或js文件路径
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def run_js_loaded(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any:
"""运行javascript代码执行前等待页面加载完毕
:param script: js文本或js文件路径
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script属性值
:return: 运行的结果
"""
...
def _run_js(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本或js文件路径
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def _run_js_loaded(self, script: Union[str, Path], *args, as_expr: bool = False, timeout: float = None) -> Any:
"""运行javascript代码执行前等待页面加载完毕
:param script: js文本或js文件路径
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script属性值
:return: 运行的结果
"""
...
def run_async_js(self, script: Union[str, Path], *args, as_expr: bool = False) -> None:
"""以异步方式执行js代码或js文件路径
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:return: None
"""
...
def get(self, url: str, show_errmsg: bool = False, retry: int = None,
interval: float = None, timeout: float = None) -> Union[None, bool]: ...
interval: float = None, timeout: float = None) -> Union[None, bool]:
"""访问url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeouts.page_load属性值
:return: 目标url是否可用
"""
...
def cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[
list, dict]: ...
def cookies(self, all_domains: bool = False, all_info: bool = False) -> CookiesList:
"""返回cookies信息
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息为False时只返回namevaluedomain
:return: cookies信息
"""
...
def ele(self,
locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
index: int = 1,
timeout: float = None) -> ChromiumElement: ...
timeout: float = None) -> ChromiumElement:
"""获取一个符合条件的元素对象
:param locator: 定位符或元素对象
:param index: 获取第几个元素从1开始可传入负数获取倒数第几个
:param timeout: 查找超时时间默认与页面等待时间一致
:return: ChromiumElement对象
"""
...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> ChromiumElementsList: ...
timeout: float = None) -> ChromiumElementsList:
"""获取所有符合条件的元素对象
:param locator: 定位符或元素对象
:param timeout: 查找超时时间默认与页面等待时间一致
:return: ChromiumElement对象组成的列表
"""
...
def s_ele(self,
locator: Union[Tuple[str, str], str] = None,
index: int = 1) -> SessionElement: ...
index: int = 1,
timeout: float = None) -> SessionElement:
"""查找一个符合条件的元素以SessionElement形式返回处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象或属性文本
"""
...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ...
def s_eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> SessionElementsList:
"""查找所有符合条件的元素以SessionElement列表形式返回
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象组成的列表
"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None,
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, ChromiumElementsList]: ...
raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, ChromiumElementsList]:
"""执行元素查找
:param locator: 定位符或元素对象
:param timeout: 查找超时时间
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象或元素对象组成的列表
"""
...
def refresh(self, ignore_cache: bool = False) -> None: ...
def refresh(self, ignore_cache: bool = False) -> None:
"""刷新当前页面
:param ignore_cache: 是否忽略缓存
:return: None
"""
...
def forward(self, steps: int = 1) -> None: ...
def forward(self, steps: int = 1) -> None:
"""在浏览历史中前进若干步
:param steps: 前进步数
:return: None
"""
...
def back(self, steps: int = 1) -> None: ...
def back(self, steps: int = 1) -> None:
"""在浏览历史中后退若干步
:param steps: 后退步数
:return: None
"""
...
def _forward_or_back(self, steps: int) -> None: ...
def _forward_or_back(self, steps: int) -> None:
"""执行浏览器前进或后退会跳过url相同的历史记录
:param steps: 步数
:return: None
"""
...
def stop_loading(self) -> None: ...
def stop_loading(self) -> None:
"""页面停止加载"""
...
def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None: ...
def remove_ele(self, loc_or_ele: Union[ChromiumElement, ChromiumFrame, str, Tuple[str, str]]) -> None:
"""从页面上删除一个元素
:param loc_or_ele: 元素对象或定位符
:return: None
"""
...
def add_ele(self,
html_or_info: Union[str, Tuple[str, dict]],
insert_to: Union[ChromiumElement, str, Tuple[str, str], None] = None,
before: Union[ChromiumElement, str, Tuple[str, str], None] = None) -> ChromiumElement: ...
before: Union[ChromiumElement, str, Tuple[str, str], None] = None) -> Union[
ChromiumElement, ChromiumFrame]:
"""新建一个元素
:param html_or_info: 新元素的html文本或信息信息格式为(tag, {attr1: value, ...})
:param insert_to: 插入到哪个元素中可接收元素对象和定位符为None且为html添加到body不为html不插入
:param before: 在哪个子节点前面插入可接收对象和定位符为None插入到父元素末尾
:return: 元素对象
"""
...
def get_frame(self, loc_ind_ele: Union[str, int, tuple, ChromiumFrame], timeout: float = None) -> ChromiumFrame: ...
def get_frame(self,
loc_ind_ele: Union[str, int, tuple, ChromiumFrame, ChromiumElement],
timeout: float = None) -> ChromiumFrame:
"""获取页面中一个frame对象
:param loc_ind_ele: 定位符iframe序号ChromiumFrame对象序号从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间
:return: ChromiumFrame对象
"""
...
def get_frames(self, locator: Union[str, tuple] = None, timeout: float = None) -> List[ChromiumFrame]: ...
def get_frames(self,
locator: Union[str, tuple] = None,
timeout: float = None) -> ChromiumElementsList:
"""获取所有符合条件的frame对象
:param locator: 定位符为None时返回所有
:param timeout: 查找超时时间
:return: ChromiumFrame对象组成的列表
"""
...
def run_cdp(self, cmd: str, **cmd_args) -> dict: ...
def session_storage(self, item: str = None) -> Union[str, dict, None]:
"""返回sessionStorage信息不设置item则获取全部
:param item: 要获取的项不设置则返回全部
:return: sessionStorage一个或所有项内容
"""
...
def run_cdp_loaded(self, cmd: str, **cmd_args) -> dict: ...
def session_storage(self, item: str = None) -> Union[str, dict, None]: ...
def local_storage(self, item: str = None) -> Union[str, dict, None]: ...
def add_init_js(self, script: str) -> str: ...
def remove_init_js(self, script_id: str = None) -> None: ...
def local_storage(self, item: str = None) -> Union[str, dict, None]:
"""返回localStorage信息不设置item则获取全部
:param item: 要获取的项目不设置则返回全部
:return: localStorage一个或所有项内容
"""
...
def get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None,
as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ...
right_bottom: Tuple[int, int] = None) -> Union[str, bytes]:
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:return: 图片完整路径或字节文本
"""
...
def add_init_js(self, script: str) -> str:
"""添加初始化脚本,在页面加载任何脚本前执行
:param script: js文本
:return: 添加的脚本的id
"""
...
def remove_init_js(self, script_id: str = None) -> None:
"""删除初始化脚本js_id传入None时删除所有
:param script_id: 脚本的id
:return: None
"""
...
def clear_cache(self, session_storage: bool = True, local_storage: bool = True, cache: bool = True,
cookies: bool = True) -> None:
"""清除缓存,可选要清除的项
:param session_storage: 是否清除sessionStorage
:param local_storage: 是否清除localStorage
:param cache: 是否清除cache
:param cookies: 是否清除cookies
:return: None
"""
...
def disconnect(self) -> None:
"""断开与页面的连接,不关闭页面"""
...
def reconnect(self, wait: float = 0) -> None:
"""断开与页面原来的页面,重新建立连接
:param wait: 断开后等待若干秒再连接
:return: None
"""
...
def handle_alert(self,
accept: Optional[bool] = True,
send: str = None,
timeout: float = None,
next_one: bool = False) -> Union[str, False]:
"""处理提示框,可以自动等待提示框出现
:param accept: True表示确认False表示取消为None不会按按钮但依然返回文本值
:param send: 处理prompt提示框时可输入文本
:param timeout: 等待提示框出现的超时时间为None则使用self.timeout属性的值
:param next_one: 是否处理下一个出现的提示框为True时timeout参数无效
:return: 提示框内容文本未等到提示框则返回False
"""
...
def _handle_alert(self,
accept: Optional[bool] = True,
send: str = None,
timeout: float = None,
next_one: bool = False) -> Union[str, False]:
"""处理提示框,可以自动等待提示框出现
:param accept: True表示确认False表示取消其它值不会按按钮但依然返回文本值
:param send: 处理prompt提示框时可输入文本
:param timeout: 等待提示框出现的超时时间为None则使用self.timeout属性的值
:param next_one: 是否处理下一个出现的提示框为True时timeout参数无效
:return: 提示框内容文本未等到提示框则返回False
"""
...
def _on_alert_open(self, **kwargs):
"""alert出现时触发的方法"""
...
def _on_alert_close(self, **kwargs):
"""alert关闭时触发的方法"""
...
def _wait_loaded(self, timeout: float = None) -> bool:
"""等待页面加载完成,超时触发停止加载
:param timeout: 超时时间
:return: 是否成功超时返回False
"""
...
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]:
"""尝试连接,重试若干次
:param to_url: 要访问的url
:param times: 重试次数
:param interval: 重试间隔
:param show_errmsg: 是否抛出异常
:param timeout: 连接超时时间
:return: 是否成功返回None表示不确定
"""
...
def _get_screenshot(self, path: [str, Path] = None, name: str = None, as_bytes: PIC_TYPE = None,
as_base64: PIC_TYPE = None, full_page: bool = False, left_top: Tuple[float, float] = None,
right_bottom: Tuple[float, float] = None, ele: ChromiumElement = None) -> Union[str, bytes]: ...
def clear_cache(self, session_storage: bool = True, local_storage: bool = True, cache: bool = True,
cookies: bool = True) -> None: ...
def disconnect(self) -> None: ...
def reconnect(self, wait: float = 0) -> None: ...
def handle_alert(self, accept: Optional[bool] = True, send: str = None, timeout: float = None,
next_one: bool = False) -> Union[str, False]: ...
def _handle_alert(self, accept: bool = True, send: str = None, timeout: float = None,
next_one: bool = False) -> Union[str, False]: ...
def _on_alert_close(self, **kwargs): ...
def _on_alert_open(self, **kwargs): ...
def _d_connect(self, to_url: str, times: int = 0, interval: float = 1, show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]: ...
right_bottom: Tuple[float, float] = None, ele: ChromiumElement = None) -> Union[str, bytes]:
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:param ele: 为异域iframe内元素截图设置
:return: 图片完整路径或字节文本
"""
...
class Timeout(object):
"""用于保存d模式timeout信息的类"""
base: float = ...
page_load: float = ...
script: float = ...
def __init__(self, page: ChromiumBase, base=None, page_load=None, script=None):
self._page: ChromiumBase = ...
self.base: float = ...
self.page_load: float = ...
self.script: float = ...
def __init__(self, base=None, page_load=None, script=None):
"""
:param base: 默认超时时间
:param page_load: 页面加载超时时间
:param script: js超时时间
"""
...
@property
def as_dict(self) -> dict:
"""以dict格式返回timeout设置"""
...
class Alert(object):
"""用于保存alert信息的类"""
activated: Optional[bool] = ...
text: Optional[str] = ...
type: Optional[str] = ...
defaultPrompt: Optional[str] = ...
response_accept: Optional[str] = ...
response_text: Optional[str] = ...
handle_next: Optional[bool] = ...
next_text: Optional[str] = ...
auto: Optional[bool] = ...
def __init__(self):
self.activated: bool = ...
self.text: str = ...
self.type: str = ...
self.defaultPrompt: str = ...
self.response_accept: str = ...
self.response_text: str = ...
self.handle_next: Optional[bool] = ...
self.next_text: str = ...
self.auto: Optional[bool] = ...
def get_mhtml(page: Union[ChromiumPage, ChromiumTab],
path: Union[str, Path] = None,
name: str = None) -> str: ...
def get_pdf(page: Union[ChromiumPage, ChromiumTab],
path: Union[str, Path] = None,
name: str = None, kwargs: dict = None) -> bytes: ...
def __init__(self, auto: bool = None): ...

View File

@ -2,14 +2,14 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from copy import copy
from re import search, findall, DOTALL
from time import sleep, perf_counter
from .._elements.chromium_element import ChromiumElement
from .._functions.settings import Settings
from .._pages.chromium_base import ChromiumBase
from .._units.listener import FrameListener
from .._units.rect import FrameRect
@ -21,91 +21,81 @@ from ..errors import ContextLostError, ElementLostError, PageDisconnectedError,
class ChromiumFrame(ChromiumBase):
def __init__(self, owner, ele, info=None):
_Frames = {}
def __new__(cls, owner, ele, info=None):
"""
:param owner: frame所在的页面对象
:param ele: frame所在元素
:param info: frame所在元素信息
"""
if owner._type in ('ChromiumPage', 'WebPage'):
self._page = self._target_page = self.tab = owner
self._browser = owner.browser
else: # Tab、Frame
self._page = owner.page
self._browser = self._page.browser
self._target_page = owner
self.tab = owner.tab if owner._type == 'ChromiumFrame' else owner
fid = info['node']['frameId'] if info else owner._run_cdp('DOM.describeNode',
backendNodeId=ele._backend_id)['node']['frameId']
if Settings.singleton_tab_obj and fid in cls._Frames:
r = cls._Frames[fid]
while not hasattr(r, '_type') or r._type != 'ChromiumFrame':
sleep(.01)
return r
r = object.__new__(cls)
cls._Frames[fid] = r
return r
self.address = owner.address
self._tab_id = owner.tab_id
def __init__(self, owner, ele, info=None):
if Settings.singleton_tab_obj and hasattr(self, '_created'):
return
self._created = True
self._tab = owner._tab
self._target_page = owner
self._backend_id = ele._backend_id
self._frame_ele = ele
self._states = None
self._reloading = False
node = info['node'] if not info else owner.run_cdp('DOM.describeNode', backendNodeId=ele._backend_id)['node']
node = info['node'] if info else owner._run_cdp('DOM.describeNode', backendNodeId=ele._backend_id)['node']
self._frame_id = node['frameId']
if self._is_inner_frame():
self._is_diff_domain = False
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
super().__init__(owner.address, owner.tab_id, owner.timeout)
super().__init__(owner.browser, owner.driver.id)
else:
self._is_diff_domain = True
delattr(self, '_frame_id')
super().__init__(owner.address, node['frameId'], owner.timeout)
obj_id = super().run_js('document;', as_expr=True)['objectId']
super().__init__(owner.browser, node['frameId'])
obj_id = super()._run_js('document;', as_expr=True)['objectId']
self.doc_ele = ChromiumElement(self, obj_id=obj_id)
self._rect = None
self._type = 'ChromiumFrame'
# end_time = perf_counter() + 2
# while perf_counter() < end_time:
# if self.url not in (None, 'about:blank'):
# break
# sleep(.1)
def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: ChromiumElement对象或属性文本
"""
return self.ele(locator, index=index, timeout=timeout)
def __eq__(self, other):
return self._frame_id == getattr(other, '_frame_id', None)
def __repr__(self):
attrs = [f"{k}='{v}'" for k, v in self._frame_ele.attrs.items()]
return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>'
def __eq__(self, other):
return self._frame_id == getattr(other, '_frame_id', None)
def _d_set_runtime_settings(self):
"""重写设置浏览器运行参数方法"""
if not hasattr(self, '_timeouts'):
self._timeouts = copy(self._target_page.timeouts)
self.retry_times = self._target_page.retry_times
self.retry_interval = self._target_page.retry_interval
self._download_path = self._target_page.download_path
self._auto_handle_alert = self._target_page._auto_handle_alert
self._load_mode = self._target_page._load_mode if not self._is_diff_domain else 'normal'
def _driver_init(self, tab_id, is_init=True):
"""避免出现服务器500错误
:param tab_id: 要跳转到的标签页id
:return: None
"""
def _driver_init(self, target_id, is_init=True):
try:
super()._driver_init(tab_id)
super()._driver_init(target_id)
except:
self.browser.driver.get(f'http://{self.address}/json')
super()._driver_init(tab_id)
self.browser._driver.get(f'http://{self._browser.address}/json')
super()._driver_init(target_id)
self._driver.set_callback('Inspector.detached', self._onInspectorDetached, immediate=True)
self._driver.set_callback('Page.frameDetached', None)
self._driver.set_callback('Page.frameDetached', self._onFrameDetached, immediate=True)
def _reload(self):
"""重新获取document"""
self._is_loading = True
# d_debug = self.driver._debug
self._reloading = True
@ -116,7 +106,7 @@ class ChromiumFrame(ChromiumBase):
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
end_time = perf_counter() + 2
while perf_counter() < end_time:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
node = self._target_page._run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
if 'frameId' in node:
break
sleep(.05)
@ -132,16 +122,16 @@ class ChromiumFrame(ChromiumBase):
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
self._frame_id = node['frameId']
if self._listener:
self._listener._to_target(self._target_page.tab_id, self.address, self)
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
self._listener._to_target(self._target_page.tab_id, self._browser.address, self)
super().__init__(self._browser, self._target_page.tab_id)
# self.driver._debug = d_debug
else:
self._is_diff_domain = True
if self._listener:
self._listener._to_target(node['frameId'], self.address, self)
self._listener._to_target(node['frameId'], self._browser.address, self)
end_time = perf_counter() + self.timeouts.page_load
super().__init__(self.address, node['frameId'], self._target_page.timeout)
super().__init__(self._browser, node['frameId'])
timeout = end_time - perf_counter()
if timeout <= 0:
timeout = .5
@ -151,27 +141,23 @@ class ChromiumFrame(ChromiumBase):
self._reloading = False
def _get_document(self, timeout=10):
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._is_reading:
return
self._is_reading = True
try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
node = self._target_page._run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
else:
timeout = max(timeout, 2)
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
b_id = self._run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
self._root_id = self.doc_ele._obj_id
r = self.run_cdp('Page.getFrameTree')
r = self._run_cdp('Page.getFrameTree', _ignore=PageDisconnectedError)
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
return True
@ -185,19 +171,19 @@ class ChromiumFrame(ChromiumBase):
self._is_reading = False
def _onInspectorDetached(self, **kwargs):
"""异域转同域或退出"""
# 异域转同域或退出
self._reload()
def _onFrameDetached(self, **kwargs):
"""同域变异域"""
# 同域变异域
self.browser._frames.pop(kwargs['frameId'], None)
ChromiumFrame._Frames.pop(kwargs['frameId'], None)
if kwargs['frameId'] == self._frame_id:
self._reload()
# ----------挂件----------
@property
def scroll(self):
"""返回用于滚动的对象"""
self.wait.doc_loaded()
if self._scroll is None:
self._scroll = FrameScroller(self)
@ -205,35 +191,30 @@ class ChromiumFrame(ChromiumBase):
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = ChromiumFrameSetter(self)
return self._set
@property
def states(self):
"""返回用于获取状态信息的对象"""
if self._states is None:
self._states = FrameStates(self)
return self._states
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = FrameWaiter(self)
return self._wait
@property
def rect(self):
"""返回获取坐标和大小的对象"""
if self._rect is None:
self._rect = FrameRect(self)
return self._rect
@property
def listen(self):
"""返回用于聆听数据包的对象"""
if self._listener is None:
self._listener = FrameListener(self)
return self._listener
@ -242,256 +223,159 @@ class ChromiumFrame(ChromiumBase):
@property
def _obj_id(self):
"""返回frame元素的object id"""
return self.frame_ele._obj_id
@property
def _node_id(self):
"""返回cdp中的node id"""
return self.frame_ele._node_id
@property
def page(self):
"""返回所属Page对象"""
return self._page
@property
def owner(self):
"""返回所属页面对象"""
return self.frame_ele.owner
@property
def frame_ele(self):
"""返回总页面上的frame元素"""
return self._frame_ele
@property
def tag(self):
"""返回元素tag"""
return self.frame_ele.tag
@property
def url(self):
"""返回frame当前访问的url"""
try:
return self.doc_ele.run_js('return this.location.href;')
return self.doc_ele._run_js('return this.location.href;')
except JavaScriptError:
return None
@property
def html(self):
"""返回元素outerHTML文本"""
tag = self.tag
out_html = self._target_page.run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML']
out_html = self._target_page._run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML']
sign = search(rf'<{tag}.*?>', out_html, DOTALL).group(0)
return f'{sign}{self.inner_html}</{tag}>'
@property
def inner_html(self):
"""返回元素innerHTML文本"""
return self.doc_ele.run_js('return this.documentElement.outerHTML;')
return self.doc_ele._run_js('return this.documentElement.outerHTML;')
@property
def link(self):
return self.frame_ele.link
@property
def title(self):
"""返回页面title"""
r = self._ele('t:title', raise_err=False)
return r.text if r else None
@property
def attrs(self):
"""返回frame元素所有attribute属性"""
return self.frame_ele.attrs
@property
def active_ele(self):
"""返回当前焦点所在元素"""
return self.doc_ele.run_js('return this.activeElement;')
return self.doc_ele._run_js('return this.activeElement;')
@property
def xpath(self):
"""返回frame的xpath绝对路径"""
return self.frame_ele.xpath
@property
def css_path(self):
"""返回frame的css selector绝对路径"""
return self.frame_ele.css_path
@property
def tab(self):
return self._tab
@property
def tab_id(self):
"""返回frame所在tab的id"""
return self._tab_id
return self.tab.tab_id
@property
def download_path(self):
return self._download_path
@property
def sr(self):
return self.frame_ele.sr
@property
def shadow_root(self):
return self.frame_ele.sr
@property
def _js_ready_state(self):
"""返回当前页面加载状态,'loading' 'interactive' 'complete'"""
if self._is_diff_domain:
return super()._js_ready_state
else:
try:
return self.doc_ele.run_js('return this.readyState;')
return self.doc_ele._run_js('return this.readyState;')
except ContextLostError:
try:
node = self.run_cdp('DOM.describeNode', backendNodeId=self.frame_ele._backend_id)['node']
node = self._run_cdp('DOM.describeNode', backendNodeId=self.frame_ele._backend_id)['node']
doc = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
return doc.run_js('return this.readyState;')
return doc._run_js('return this.readyState;')
except:
return None
def refresh(self):
"""刷新frame页面"""
self.doc_ele.run_js('this.location.reload();')
self.doc_ele._run_js('this.location.reload();')
def property(self, name):
"""返回frame元素一个property属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
return self.frame_ele.property(name)
def attr(self, name):
"""返回frame元素一个attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
return self.frame_ele.attr(name)
def remove_attr(self, name):
"""删除frame元素attribute属性
:param name: 属性名
:return: None
"""
self.frame_ele.remove_attr(name)
def run_js(self, script, *args, as_expr=False, timeout=None):
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
if script.startswith('this.scrollIntoView'):
return self.frame_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout)
else:
return self.doc_ele.run_js(script, *args, as_expr=as_expr, timeout=timeout)
def style(self, style, pseudo_ele=''):
return self.frame_ele.style(style=style, pseudo_ele=pseudo_ele)
def parent(self, level_or_loc=1, index=1):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:return: 上级元素对象
"""
return self.frame_ele.parent(level_or_loc, index)
def run_js(self, script, *args, as_expr=False, timeout=None):
return self._run_js(script, *args, as_expr=as_expr, timeout=timeout)
def _run_js(self, script, *args, as_expr=False, timeout=None):
if script.startswith('this.scrollIntoView'):
return self.frame_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout)
else:
return self.doc_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout)
def parent(self, level_or_loc=1, index=1, timeout=0):
return self.frame_ele.parent(level_or_loc, index, timeout=timeout)
def prev(self, locator='', index=1, timeout=0, ele_only=True):
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
return self.frame_ele.prev(locator, index, timeout, ele_only=ele_only)
def next(self, locator='', index=1, timeout=0, ele_only=True):
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
return self.frame_ele.next(locator, index, timeout, ele_only=ele_only)
def before(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
return self.frame_ele.before(locator, index, timeout, ele_only=ele_only)
def after(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
return self.frame_ele.after(locator, index, timeout, ele_only=ele_only)
def prevs(self, locator='', timeout=0, ele_only=True):
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return self.frame_ele.prevs(locator, timeout, ele_only=ele_only)
def nexts(self, locator='', timeout=0, ele_only=True):
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return self.frame_ele.nexts(locator, timeout, ele_only=ele_only)
def befores(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return self.frame_ele.befores(locator, timeout, ele_only=ele_only)
def afters(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return self.frame_ele.afters(locator, timeout, ele_only=ele_only)
def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None):
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:return: 图片完整路径或字节文本
"""
return self.frame_ele.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64)
def _get_screenshot(self, path=None, name=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page=False, left_top=None, right_bottom=None, ele=None):
"""实现截图
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:param ele: 为异域iframe内元素截图设置
:return: 图片完整路径或字节文本
"""
if not self._is_diff_domain:
return super().get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64,
full_page=full_page, left_top=left_top, right_bottom=right_bottom)
@ -542,12 +426,12 @@ class ChromiumFrame(ChromiumBase):
img.style.setProperty("position","fixed");
arguments[0].insertBefore(img, this);
return img;'''
new_ele = first_child.run_js(js, body)
new_ele = first_child._run_js(js, body)
new_ele.scroll.to_see(center=True)
top = int(self.frame_ele.style('border-top').split('px')[0])
left = int(self.frame_ele.style('border-left').split('px')[0])
r = self.tab.run_cdp('Page.getLayoutMetrics')['visualViewport']
r = self.tab._run_cdp('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
r = self.tab.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64,
@ -556,15 +440,7 @@ class ChromiumFrame(ChromiumBase):
self.tab.remove_ele(new_ele)
return r
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
"""在frame内查找单个元素
:param locator: 定位符或元素对象
:param timeout: 查找超时时间
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: WebPage用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象
"""
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
if isinstance(locator, ChromiumElement):
return locator
self.wait.doc_loaded()
@ -572,5 +448,4 @@ class ChromiumFrame(ChromiumBase):
raise_err=raise_err) if index is not None else self.doc_ele.eles(locator, timeout)
def _is_inner_frame(self):
"""返回当前frame是否同域"""
return self._frame_id in str(self._target_page.run_cdp('Page.getFrameTree')['frameTree'])
return self._frame_id in str(self._target_page._run_cdp('Page.getFrameTree')['frameTree'])

View File

@ -2,17 +2,15 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union, Tuple, List, Any, Optional
from typing import Union, Tuple, List, Any, Optional, Literal
from .chromium_base import ChromiumBase
from .chromium_page import ChromiumPage
from .chromium_tab import ChromiumTab
from .web_page import WebPage
from .._elements.chromium_element import ChromiumElement
from .mix_tab import MixTab
from .._elements.chromium_element import ChromiumElement, ShadowRoot
from .._functions.elements import ChromiumElementsList
from .._units.listener import FrameListener
from .._units.rect import FrameRect
@ -23,81 +21,102 @@ from .._units.waiter import FrameWaiter
class ChromiumFrame(ChromiumBase):
_Frames: dict = ...
_target_page: Union[ChromiumTab, ChromiumFrame] = ...
_tab: Union[MixTab, ChromiumTab] = ...
_set: ChromiumFrameSetter = ...
_frame_ele: ChromiumElement = ...
_backend_id: int = ...
_doc_ele: ChromiumElement = ...
_is_diff_domain: bool = ...
doc_ele: ChromiumElement = ...
_states: FrameStates = ...
_reloading: bool = ...
_rect: Optional[FrameRect] = ...
_listener: FrameListener = ...
def __init__(self,
owner: Union[ChromiumPage, WebPage, ChromiumTab, ChromiumFrame],
owner: Union[ChromiumTab, ChromiumFrame],
ele: ChromiumElement,
info: dict = None):
self._target_page: ChromiumBase = ...
self._page: ChromiumPage = ...
self.tab: Union[ChromiumPage, ChromiumTab] = ...
self._tab_id: str = ...
self._set: ChromiumFrameSetter = ...
self._frame_ele: ChromiumElement = ...
self._backend_id: int = ...
self._doc_ele: ChromiumElement = ...
self._is_diff_domain: bool = ...
self.doc_ele: ChromiumElement = ...
self._states: FrameStates = ...
self._reloading: bool = ...
self._rect: FrameRect = ...
self._listener: FrameListener = ...
"""
:param owner: frame所在的页面对象
:param ele: frame所在元素
:param info: frame所在元素信息
"""
...
def __call__(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
timeout: float = None) -> ChromiumElement: ...
def __eq__(self, other: ChromiumFrame) -> bool: ...
def _check_alive(self) -> None: ...
timeout: float = None) -> ChromiumElement:
"""在内部查找元素
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: ChromiumElement对象或属性文本
"""
...
def __repr__(self) -> str: ...
def _d_set_runtime_settings(self) -> None: ...
def __eq__(self, other: ChromiumFrame) -> bool: ...
def _driver_init(self, tab_id: str) -> None: ...
def _d_set_runtime_settings(self) -> None:
"""重写设置浏览器运行参数方法"""
...
def _reload(self) -> None: ...
def _driver_init(self, target_id: str, is_init: bool = True) -> None:
"""避免出现服务器500错误
:param target_id: 要跳转到的target id
:return: None
"""
...
def _get_document(self, timeout: float = 10) -> bool: ...
def _reload(self) -> None:
"""重新获取document"""
...
def _get_document(self, timeout: float = 10) -> bool:
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
...
def _onFrameStoppedLoading(self, **kwargs): ...
def _onInspectorDetached(self, **kwargs): ...
@property
def page(self) -> Union[ChromiumPage, WebPage]: ...
def scroll(self) -> FrameScroller:
"""返回用于滚动的对象"""
...
@property
def owner(self) -> ChromiumBase: ...
def set(self) -> ChromiumFrameSetter:
"""返回用于设置的对象"""
...
@property
def frame_ele(self) -> ChromiumElement: ...
def states(self) -> FrameStates:
"""返回用于获取状态信息的对象"""
...
@property
def tag(self) -> str: ...
def wait(self) -> FrameWaiter:
"""返回用于等待的对象"""
...
@property
def url(self) -> str: ...
def rect(self) -> FrameRect:
"""返回获取坐标和大小的对象"""
...
@property
def html(self) -> str: ...
@property
def inner_html(self) -> str: ...
@property
def title(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def rect(self) -> FrameRect: ...
@property
def listen(self) -> FrameListener: ...
def listen(self) -> FrameListener:
"""返回用于聆听数据包的对象"""
...
@property
def _obj_id(self) -> str: ...
@ -106,99 +125,289 @@ class ChromiumFrame(ChromiumBase):
def _node_id(self) -> int: ...
@property
def active_ele(self) -> ChromiumElement: ...
def owner(self) -> ChromiumBase:
"""返回所属页面对象"""
...
@property
def xpath(self) -> str: ...
def frame_ele(self) -> ChromiumElement:
"""返回总页面上的frame元素"""
...
@property
def css_path(self) -> str: ...
def tag(self) -> str:
"""返回元素tag"""
...
@property
def scroll(self) -> FrameScroller: ...
def url(self) -> str:
"""返回frame当前访问的url"""
...
@property
def set(self) -> ChromiumFrameSetter: ...
def html(self) -> str:
"""返回元素outerHTML文本"""
...
@property
def states(self) -> FrameStates: ...
def inner_html(self) -> str:
"""返回元素innerHTML文本"""
...
@property
def wait(self) -> FrameWaiter: ...
def link(self) -> str:
"""返回href或src绝对url"""
...
@property
def tab_id(self) -> str: ...
def title(self) -> str:
"""返回页面title"""
...
@property
def download_path(self) -> str: ...
def attrs(self) -> dict:
"""返回frame元素所有attribute属性"""
...
def refresh(self) -> None: ...
@property
def active_ele(self) -> ChromiumElement:
"""返回当前焦点所在元素"""
...
def property(self, name: str) -> Union[str, None]: ...
@property
def xpath(self) -> str:
"""返回frame的xpath绝对路径"""
...
def attr(self, name: str) -> Union[str, None]: ...
@property
def css_path(self) -> str:
"""返回frame的css selector绝对路径"""
...
def remove_attr(self, name: str) -> None: ...
@property
def tab(self) -> Union[ChromiumTab, MixTab]:
"""返回frame所在的tab对象"""
...
@property
def tab_id(self) -> str:
"""返回frame所在tab的id"""
...
@property
def download_path(self) -> str:
"""返回下载文件保存路径"""
...
@property
def sr(self) -> Union[None, ShadowRoot]:
"""返回iframe的shadow-root元素对象"""
...
@property
def shadow_root(self) -> Union[None, ShadowRoot]:
"""返回iframe的shadow-root元素对象"""
...
@property
def _js_ready_state(self) -> Literal['loading', 'interactive', 'complete']:
"""返回当前页面加载状态"""
...
def refresh(self) -> None:
"""刷新frame页面"""
...
def property(self, name: str) -> Union[str, None]:
"""返回frame元素一个property属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
...
def attr(self, name: str) -> Union[str, None]:
"""返回frame元素一个attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
...
def remove_attr(self, name: str) -> None:
"""删除frame元素attribute属性
:param name: 属性名
:return: None
"""
...
def style(self, style: str, pseudo_ele: str = '') -> str:
"""返回frame元素样式属性值可获取伪元素属性值
:param style: 样式属性名称
:param pseudo_ele: 伪元素名称如有
:return: 样式属性的值
"""
...
def run_js(self,
script: str,
*args,
as_expr: bool = False,
timeout: float = None) -> Any: ...
timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def _run_js(self,
script: str,
*args,
as_expr: bool = False,
timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def parent(self,
level_or_loc: Union[Tuple[str, str], str, int] = 1,
index: int = 1) -> ChromiumElement: ...
index: int = 1,
timeout: float = 0) -> ChromiumElement:
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:param timeout: 查找超时时间
:return: 上级元素对象
"""
...
def prev(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
...
def next(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
...
def before(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
...
def after(self,
locator: Union[Tuple[str, str], str, int] = '',
index: int = 1,
timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ...
ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
...
def prevs(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def nexts(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def befores(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def afters(self,
locator: Union[Tuple[str, str], str] = '',
timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ...
ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def get_screenshot(self,
path: [str, Path] = None,
name: str = None,
as_bytes: [bool, str] = None,
as_base64: [bool, str] = None) -> Union[str, bytes]: ...
as_base64: [bool, str] = None) -> Union[str, bytes]:
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:return: 图片完整路径或字节文本
"""
...
def _get_screenshot(self,
path: [str, Path] = None,
@ -208,13 +417,36 @@ class ChromiumFrame(ChromiumBase):
full_page: bool = False,
left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None,
ele: ChromiumElement = None) -> Union[str, bytes]: ...
ele: ChromiumElement = None) -> Union[str, bytes]:
"""实现截图
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:param ele: 为异域iframe内元素截图设置
:return: 图片完整路径或字节文本
"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None,
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, None, ChromiumElementsList]: ...
raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, None, ChromiumElementsList]:
"""在frame内查找单个元素
:param locator: 定位符或元素对象
:param timeout: 查找超时时间
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象
"""
...
def _is_inner_frame(self) -> bool: ...
def _is_inner_frame(self) -> bool:
"""返回当前frame是否同域"""
...

View File

@ -2,338 +2,129 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from threading import Lock
from time import sleep, perf_counter
from time import sleep
from requests import Session
from .._base.browser import Browser
from .._configs.chromium_options import ChromiumOptions
from .._functions.browser import connect_browser
from .._base.chromium import Chromium
from .._functions.settings import Settings
from .._functions.tools import PortFinder
from .._functions.web import save_page
from .._pages.chromium_base import ChromiumBase, Timeout
from .._pages.chromium_tab import ChromiumTab
from .._pages.chromium_base import ChromiumBase
from .._units.setter import ChromiumPageSetter
from .._units.waiter import PageWaiter
from ..errors import BrowserConnectError
from .._units.waiter import ChromiumPageWaiter
class ChromiumPage(ChromiumBase):
"""用于管理浏览器的类"""
_PAGES = {}
def __new__(cls, addr_or_opts=None, tab_id=None, timeout=None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
opt = handle_options(addr_or_opts)
is_exist, browser_id = run_browser(opt)
if browser_id in cls._PAGES:
r = cls._PAGES[browser_id]
# 即将废弃timeout
browser = Chromium(addr_or_opts=addr_or_opts)
if browser.id in cls._PAGES:
r = cls._PAGES[browser.id]
while not hasattr(r, '_frame_id'):
sleep(.1)
sleep(.05)
return r
r = object.__new__(cls)
r._chromium_options = opt
r._is_exist = is_exist
r._browser_id = browser_id
r.address = opt.address
cls._PAGES[browser_id] = r
r._browser = browser
cls._PAGES[browser.id] = r
return r
def __init__(self, addr_or_opts=None, tab_id=None, timeout=None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
# 即将废弃timeout
if hasattr(self, '_created'):
return
self._created = True
self._page = self
self.tab = self
self._run_browser()
super().__init__(self.address, tab_id)
super().__init__(self.browser, tab_id)
self._type = 'ChromiumPage'
self._lock = Lock()
self.set.timeouts(base=timeout)
self._page_init()
self.set.timeouts(base=timeout) # 即将废弃
self._tab = self
self._browser._dl_mgr._page_id = self.tab_id
def _run_browser(self):
"""连接浏览器"""
self._browser = Browser(self._chromium_options.address, self._browser_id, self)
r = self._browser.run_cdp('Browser.getVersion')
self._browser_version = r['product']
if self._is_exist and self._chromium_options._headless is False and 'headless' in r['userAgent'].lower():
self._browser.quit(3)
connect_browser(self._chromium_options)
s = Session()
s.trust_env = False
ws = s.get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'})
bid = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
self._browser = Browser(self._chromium_options.address, bid, self)
ws.close()
s.close()
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def _d_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeouts = Timeout(self, page_load=self._chromium_options.timeouts['page_load'],
script=self._chromium_options.timeouts['script'],
base=self._chromium_options.timeouts['base'])
if self._chromium_options.timeouts['base'] is not None:
self._timeout = self._chromium_options.timeouts['base']
self._load_mode = self._chromium_options.load_mode
self._download_path = None if self._chromium_options.download_path is None \
else str(Path(self._chromium_options.download_path).absolute())
self.retry_times = self._chromium_options.retry_times
self.retry_interval = self._chromium_options.retry_interval
def _page_init(self):
"""浏览器相关设置"""
self._browser.connect_to_page()
# ----------挂件----------
self._timeouts = self.browser.timeouts
self._load_mode = self.browser._load_mode
self._download_path = self.browser.download_path
self.retry_times = self.browser.retry_times
self.retry_interval = self.browser.retry_interval
self._auto_handle_alert = self.browser._auto_handle_alert
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = ChromiumPageSetter(self)
return self._set
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = PageWaiter(self)
self._wait = ChromiumPageWaiter(self)
return self._wait
# ----------挂件----------
@property
def browser(self):
"""返回用于控制浏览器cdp的driver"""
return self._browser
@property
def tabs_count(self):
"""返回标签页数量"""
return self.browser.tabs_count
@property
def tab_ids(self):
"""返回所有标签页id组成的列表"""
return self.browser.tab_ids
@property
def latest_tab(self):
"""返回最新的标签页,最新标签页指最后创建或最后被激活的
当Settings.singleton_tab_obj==True时返回Tab对象否则返回tab id"""
return self.get_tab(self.tab_ids[0], as_id=not Settings.singleton_tab_obj)
return self.browser._get_tab(id_or_num=self.tab_ids[0], as_id=not Settings.singleton_tab_obj)
@property
def process_id(self):
"""返回浏览器进程id"""
return self.browser.process_id
@property
def browser_version(self):
"""返回所控制的浏览器版本号"""
return self._browser_version
return self._browser.version
@property
def address(self):
return self.browser.address
@property
def download_path(self):
return self.browser.download_path
def save(self, path=None, name=None, as_pdf=False, **kwargs):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
return save_page(self, path, name, as_pdf, kwargs)
def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', as_id=False):
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象
"""
if id_or_num is not None:
if isinstance(id_or_num, str):
id_or_num = id_or_num
elif isinstance(id_or_num, int):
id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num]
elif isinstance(id_or_num, ChromiumTab):
if as_id:
return id_or_num.tab_id
elif Settings.singleton_tab_obj:
return id_or_num
else:
return self.get_tab(id_or_num.tab_id)
elif title == url == tab_type is None:
id_or_num = self.tab_id
else:
id_or_num = self._browser.find_tabs(title, url, tab_type)
if id_or_num:
id_or_num = id_or_num[0]['id']
else:
return None
if as_id:
return id_or_num
with self._lock:
return ChromiumTab(self, id_or_num)
return self.browser._get_tab(id_or_num=id_or_num, title=title, url=url,
tab_type=tab_type, mix=False, as_id=as_id)
def get_tabs(self, title=None, url=None, tab_type='page', as_id=False):
"""查找符合条件的tab返回它们组成的列表
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
if as_id:
return [tab['id'] for tab in self._browser.find_tabs(title, url, tab_type)]
with self._lock:
return [ChromiumTab(self, tab['id']) for tab in self._browser.find_tabs(title, url, tab_type)]
return self.browser._get_tabs(title=title, url=url, tab_type=tab_type, mix=False, as_id=as_id)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = ChromiumTab(self, tab_id=self.browser.new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
return self.browser._new_tab(False, url=url, new_window=new_window,
background=background, new_context=new_context)
def activate_tab(self, id_ind_tab):
self.browser.activate_tab(id_ind_tab)
def close(self):
"""关闭Page管理的标签页"""
self.close_tabs(self.tab_id)
self.browser._close_tab(self)
def close_tabs(self, tabs_or_ids=None, others=False):
"""关闭传入的标签页,默认关闭当前页。可传入多个
:param tabs_or_ids: 要关闭的标签页对象或id可传入列表或元组为None时关闭当前页
:param others: 是否关闭指定标签页之外的
:return: None
"""
all_tabs = set(self.tab_ids)
if isinstance(tabs_or_ids, str):
tabs = {tabs_or_ids}
elif isinstance(tabs_or_ids, ChromiumTab):
tabs = {tabs_or_ids.tab_id}
elif tabs_or_ids is None:
tabs = {self.tab_id}
elif isinstance(tabs_or_ids, (list, tuple)):
tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids)
else:
raise TypeError('tabs_or_ids参数只能传入标签页对象或id。')
def close_tabs(self, tabs_or_ids, others=False):
self.browser.close_tabs(tabs_or_ids=tabs_or_ids, others=others)
if others:
tabs = all_tabs - tabs
end_len = len(set(all_tabs) - set(tabs))
if end_len <= 0:
self.quit()
return
for tab in tabs:
self.browser.close_tab(tab)
sleep(.2)
end_time = perf_counter() + 3
while self.tabs_count != end_len and perf_counter() < end_time:
sleep(.1)
def quit(self, timeout=5, force=True):
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:return: None
"""
self.browser.quit(timeout, force)
def quit(self, timeout=5, force=True, del_data=False):
self.browser.quit(timeout, force, del_data=del_data)
def _on_disconnect(self):
"""浏览器退出时执行"""
ChromiumPage._PAGES.pop(self._browser_id, None)
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def handle_options(addr_or_opts):
"""设置浏览器启动属性
:param addr_or_opts: 'ip:port'ChromiumOptionsDriver
:return: 返回ChromiumOptions对象
"""
if not addr_or_opts:
_chromium_options = ChromiumOptions(addr_or_opts)
if _chromium_options.is_auto_port:
port, path = PortFinder(_chromium_options.tmp_path).get_port(_chromium_options.is_auto_port)
_chromium_options.set_address(f'127.0.0.1:{port}')
_chromium_options.set_user_data_path(path)
_chromium_options.auto_port(scope=_chromium_options.is_auto_port)
elif isinstance(addr_or_opts, ChromiumOptions):
if addr_or_opts.is_auto_port:
port, path = PortFinder(addr_or_opts.tmp_path).get_port(addr_or_opts.is_auto_port)
addr_or_opts.set_address(f'127.0.0.1:{port}')
addr_or_opts.set_user_data_path(path)
addr_or_opts.auto_port(scope=addr_or_opts.is_auto_port)
_chromium_options = addr_or_opts
elif isinstance(addr_or_opts, str):
_chromium_options = ChromiumOptions()
_chromium_options.set_address(addr_or_opts)
elif isinstance(addr_or_opts, int):
_chromium_options = ChromiumOptions()
_chromium_options.set_local_port(addr_or_opts)
else:
raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。')
return _chromium_options
def run_browser(chromium_options):
"""连接浏览器"""
is_exist = connect_browser(chromium_options)
try:
s = Session()
s.trust_env = False
ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'})
if not ws:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
browser_id = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
ws.close()
s.close()
except KeyError:
raise BrowserConnectError('浏览器版本太旧或此浏览器不支持接管。')
except:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
return is_exist, browser_id
def get_rename(original, rename):
if '.' in rename:
return rename
else:
suffix = original[original.rfind('.'):] if '.' in original else ''
return f'{rename}{suffix}'
ChromiumPage._PAGES.pop(self._browser.id, None)

View File

@ -2,72 +2,91 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from threading import Lock
from typing import Union, Tuple, List, Optional
from .._base.browser import Browser
from .._base.chromium import Chromium
from .._configs.chromium_options import ChromiumOptions
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_tab import ChromiumTab
from .._units.rect import TabRect
from .._units.setter import ChromiumPageSetter
from .._units.waiter import PageWaiter
from .._units.waiter import ChromiumPageWaiter
class ChromiumPage(ChromiumBase):
"""用于管理浏览器和一个标签页的类"""
_PAGES: dict = ...
tab: ChromiumPage = ...
_browser: Chromium = ...
_rect: Optional[TabRect] = ...
_is_exist: bool = ...
def __new__(cls,
addr_or_opts: Union[str, int, ChromiumOptions] = None,
tab_id: str = None,
timeout: float = None): ...
tab_id: str = None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
"""
...
def __init__(self,
addr_or_opts: Union[str, int, ChromiumOptions] = None,
tab_id: str = None,
timeout: float = None):
self.tab: ChromiumPage = ...
self._chromium_options: ChromiumOptions = ...
self._browser: Browser = ...
self._browser_id: str = ...
self._rect: Optional[TabRect] = ...
self._is_exist: bool = ...
self._lock: Lock = ...
self._browser_version: str = ...
def _handle_options(self, addr_or_opts: Union[str, ChromiumOptions]) -> str: ...
def _run_browser(self) -> None: ...
def _page_init(self) -> None: ...
tab_id: str = None):
"""
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
"""
...
@property
def browser(self) -> Browser: ...
def set(self) -> ChromiumPageSetter:
"""返回用于设置的对象"""
...
@property
def tabs_count(self) -> int: ...
def wait(self) -> ChromiumPageWaiter:
"""返回用于等待的对象"""
...
@property
def tab_ids(self) -> List[str]: ...
def browser(self) -> Chromium:
"""返回浏览器对象"""
...
@property
def wait(self) -> PageWaiter: ...
def tabs_count(self) -> int:
"""返回标签页数量"""
...
@property
def latest_tab(self) -> Union[ChromiumTab, ChromiumPage, str]: ...
def tab_ids(self) -> List[str]:
"""返回所有标签页id组成的列表"""
...
@property
def process_id(self) -> Optional[int]: ...
def latest_tab(self) -> Union[ChromiumTab, ChromiumPage, str]:
"""返回最新的标签页,最新标签页指最后创建或最后被激活的
当Settings.singleton_tab_obj==True时返回Tab对象否则返回tab id"""
...
@property
def browser_version(self) -> str: ...
def process_id(self) -> Optional[int]:
"""返回浏览器进程id"""
...
@property
def set(self) -> ChromiumPageSetter: ...
def browser_version(self) -> str:
"""返回所控制的浏览器版本号"""
...
@property
def address(self) -> str:
"""返回浏览器地址ip:port"""
...
def save(self,
path: Union[str, Path] = None,
@ -88,38 +107,117 @@ class ChromiumPage(ChromiumBase):
footerTemplate: str = ...,
preferCSSPageSize: bool = ...,
generateTaggedPDF: bool = ...,
generateDocumentOutline: bool = ...) -> Union[bytes, str]: ...
generateDocumentOutline: bool = ...) -> Union[bytes, str]:
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param landscape: 纸张方向as_pdf为True时才生效
:param displayHeaderFooter: 是否显示页头页脚as_pdf为True时才生效
:param printBackground: 是否打印背景图片as_pdf为True时才生效
:param scale: 缩放比例as_pdf为True时才生效
:param paperWidth: 页面宽度英寸as_pdf为True时才生效
:param paperHeight: 页面高度英寸as_pdf为True时才生效
:param marginTop: 上边距英寸as_pdf为True时才生效
:param marginBottom: 下边距英寸as_pdf为True时才生效
:param marginLeft: 左边距英寸as_pdf为True时才生效
:param marginRight: 右边距英寸as_pdf为True时才生效
:param pageRanges: 页面范围格式'1-5, 8, 11-13'as_pdf为True时才生效
:param headerTemplate: 页头HTML模板as_pdf为True时才生效
模板可包含以下class
- date日期
- title文档标题
- url文档url
- pageNumber当前页码
- totalPages总页数
示例<span class=title></span>
:param footerTemplate: 页脚HTML模板格式与页头的一样as_pdf为True时才生效
:param preferCSSPageSize: 是否使用css定义的页面大小as_pdf为True时才生效
:param generateTaggedPDF: 是否生成带标签的(可访问的)PDF默认为嵌入器选择as_pdf为True时才生效
:param generateDocumentOutline: 是否将文档大纲嵌入到PDF中as_pdf为True时才生效
:return: as_pdf为True时返回bytes否则返回文件文本
"""
...
def get_tab(self,
id_or_num: Union[str, ChromiumTab, int] = None,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> Union[ChromiumTab, str, None]: ...
as_id: bool = False) -> Union[ChromiumTab, str, None]:
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象
"""
...
def get_tabs(self,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> Union[List[ChromiumTab], List[str]]: ...
as_id: bool = False) -> Union[List[ChromiumTab], List[str]]:
"""查找符合条件的tab返回它们组成的列表
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
...
def new_tab(self, url: str = None, new_window: bool = False, background: bool = False,
new_context: bool = False) -> ChromiumTab: ...
def new_tab(self,
url: str = None,
new_window: bool = False,
background: bool = False,
new_context: bool = False) -> ChromiumTab:
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
...
def close(self) -> None: ...
def activate_tab(self,
id_ind_tab: Union[int, str, ChromiumTab]) -> None:
"""使标签页变为活动状态
:param id_ind_tab: 标签页idstrTab对象或标签页序号int序号从1开始
:return: None
"""
...
def close_tabs(self, tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]],
Tuple[Union[str, ChromiumTab]]] = None, others: bool = False) -> None: ...
def close(self) -> None:
"""关闭Page管理的标签页"""
...
def quit(self, timeout: float = 5, force: bool = True) -> None: ...
def close_tabs(self,
tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]],
Tuple[Union[str, ChromiumTab]]],
others: bool = False) -> None:
"""关闭传入的标签页,可传入多个
:param tabs_or_ids: 要关闭的标签页对象或id可传入列表或元组
:param others: 是否关闭指定标签页之外的
:return: None
"""
...
def _on_disconnect(self) -> None: ...
def quit(self,
timeout: float = 5,
force: bool = True,
del_data: bool = False) -> None:
"""关闭浏览器
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:param del_data: 是否删除用户文件夹
:return: None
"""
...
def handle_options(addr_or_opts): ...
def run_browser(chromium_options): ...
def get_rename(original: str, rename: str) -> str: ...
def _on_disconnect(self) -> None:
"""浏览器退出时执行"""
...

View File

@ -2,400 +2,74 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from copy import copy
from time import sleep
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._functions.settings import Settings
from .._functions.web import set_session_cookies, set_browser_cookies, save_page
from .._functions.web import save_page
from .._pages.chromium_base import ChromiumBase
from .._pages.session_page import SessionPage
from .._units.setter import TabSetter, WebPageTabSetter
from .._units.setter import TabSetter
from .._units.waiter import TabWaiter
class ChromiumTab(ChromiumBase):
"""实现浏览器标签页的类"""
_TABS = {}
def __new__(cls, page, tab_id):
"""
:param page: ChromiumPage对象
:param tab_id: 要控制的标签页id
"""
def __new__(cls, browser, tab_id):
if Settings.singleton_tab_obj and tab_id in cls._TABS:
r = cls._TABS[tab_id]
while not hasattr(r, '_frame_id'):
sleep(.1)
sleep(.05)
return r
r = object.__new__(cls)
cls._TABS[tab_id] = r
return r
def __init__(self, page, tab_id):
"""
:param page: ChromiumPage对象
:param tab_id: 要控制的标签页id
"""
def __init__(self, browser, tab_id):
if Settings.singleton_tab_obj and hasattr(self, '_created'):
return
self._created = True
self._page = page
self.tab = self
self._browser = page.browser
super().__init__(page.address, tab_id, page.timeout)
self._rect = None
super().__init__(browser, tab_id)
self._tab = self
self._type = 'ChromiumTab'
def __repr__(self):
return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>'
def _d_set_runtime_settings(self):
"""重写设置浏览器运行参数方法"""
self._timeouts = copy(self.page.timeouts)
self.retry_times = self.page.retry_times
self.retry_interval = self.page.retry_interval
self._load_mode = self.page._load_mode
self._download_path = self.page.download_path
self._timeouts = copy(self.browser.timeouts)
self.retry_times = self.browser.retry_times
self.retry_interval = self.browser.retry_interval
self._load_mode = self.browser._load_mode
self._download_path = self.browser.download_path
self._auto_handle_alert = self.browser._auto_handle_alert
self._none_ele_return_value = self.browser._none_ele_return_value
self._none_ele_value = self.browser._none_ele_value
def close(self):
"""关闭当前标签页"""
self.page.close_tabs(self.tab_id)
@property
def page(self):
"""返回总体page对象"""
return self._page
def close(self, others=False):
if others:
self.browser.close_tabs(self.tab_id, others=True)
else:
self.browser._close_tab(self)
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = TabSetter(self)
return self._set
@property
def wait(self):
"""返回用于等待的对象"""
if self._wait is None:
self._wait = TabWaiter(self)
return self._wait
def save(self, path=None, name=None, as_pdf=False, **kwargs):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
return save_page(self, path, name, as_pdf, kwargs)
def __repr__(self):
return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>'
def _on_disconnect(self):
ChromiumTab._TABS.pop(self.tab_id, None)
class WebPageTab(SessionPage, ChromiumTab, BasePage):
def __init__(self, page, tab_id):
"""
:param page: WebPage对象
:param tab_id: 要控制的标签页id
"""
if Settings.singleton_tab_obj and hasattr(self, '_created'):
return
self._mode = 'd'
self._has_driver = True
self._has_session = True
super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session),
page._headers))
super(SessionPage, self).__init__(page=page, tab_id=tab_id)
self._type = 'WebPageTab'
def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele = page('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: 子元素对象
"""
if self._mode == 'd':
return super(SessionPage, self).__call__(locator, index=index, timeout=timeout)
elif self._mode == 's':
return super().__call__(locator, index=index)
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = WebPageTabSetter(self)
return self._set
@property
def url(self):
"""返回当前url"""
if self._mode == 'd':
return self._browser_url
elif self._mode == 's':
return self._session_url
@property
def _browser_url(self):
"""返回浏览器当前url"""
return super(SessionPage, self).url if self._driver else None
@property
def title(self):
"""返回当前页面title"""
if self._mode == 's':
return super().title
elif self._mode == 'd':
return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def html(self):
"""返回页面html文本"""
if self._mode == 's':
return super().html
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def json(self):
"""当返回内容是json格式时返回对应的字典"""
if self._mode == 's':
return super().json
elif self._mode == 'd':
return super(SessionPage, self).json
@property
def response(self):
"""返回 s 模式获取到的 Response 对象,切换到 s 模式"""
return self._response
@property
def mode(self):
"""返回当前模式,'s''d' """
return self._mode
@property
def user_agent(self):
"""返回user agent"""
if self._mode == 's':
return super().user_agent
elif self._mode == 'd':
return super(SessionPage, self).user_agent
@property
def session(self):
"""返回Session对象如未初始化则按配置信息创建"""
if self._session is None:
self._create_session()
return self._session
@property
def _session_url(self):
"""返回 session 保存的url"""
return self._response.url if self._response else None
@property
def timeout(self):
"""返回通用timeout设置"""
return self.timeouts.base
@timeout.setter
def timeout(self, second):
"""设置通用超时时间
:param second: 秒数
:return: None
"""
self.set.timeouts(base=second)
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeouts.page_load属性值
:param kwargs: 连接参数s模式专用
:return: url是否可用d模式返回None时表示不确定
"""
if self._mode == 'd':
return super(SessionPage, self).get(url, show_errmsg, retry, interval, timeout)
elif self._mode == 's':
if timeout is None:
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param kwargs: 连接参数
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
if self.mode == 'd':
self.cookies_to_session()
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
return super().post(url, show_errmsg, retry, interval, **kwargs)
def ele(self, locator, index=1, timeout=None):
"""返回第一个符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
return super().ele(locator, index=index)
elif self._mode == 'd':
return super(SessionPage, self).ele(locator, index=index, timeout=timeout)
def eles(self, locator, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
if self._mode == 's':
return super().eles(locator)
elif self._mode == 'd':
return super(SessionPage, self).eles(locator, timeout=timeout)
def s_ele(self, locator=None, index=1):
"""查找第一个符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
if self._mode == 's':
return super().s_ele(locator, index=index)
elif self._mode == 'd':
return super(SessionPage, self).s_ele(locator, index=index)
def s_eles(self, locator):
"""查找所有符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本组成的列表
"""
if self._mode == 's':
return super().s_eles(locator)
elif self._mode == 'd':
return super(SessionPage, self).s_eles(locator)
def change_mode(self, mode=None, go=True, copy_cookies=True):
"""切换模式,接收's''d',除此以外的字符串会切换为 d 模式
如copy_cookies为True切换时会把当前模式的cookies复制到目标模式
切换后如果go是True调用相应的get函数使访问的页面同步
:param mode: 模式字符串
:param go: 是否跳转到原模式的url
:param copy_cookies: 是否复制cookies到目标模式
:return: None
"""
if mode is not None and mode.lower() == self._mode:
return
self._mode = 's' if self._mode == 'd' else 'd'
# s模式转d模式
if self._mode == 'd':
if self._driver is None:
self._connect_browser(self.page._chromium_options)
self._url = None if not self._has_driver else super(SessionPage, self).url
self._has_driver = True
if self._session_url:
if copy_cookies:
self.cookies_to_browser()
if go:
self.get(self._session_url)
# d模式转s模式
elif self._mode == 's':
self._has_session = True
self._url = self._session_url
if self._has_driver:
if copy_cookies:
self.cookies_to_session()
if go:
url = super(SessionPage, self).url
if url.startswith('http'):
self.get(url)
def cookies_to_session(self, copy_user_agent=True):
"""把浏览器的cookies复制到session对象
:param copy_user_agent: 是否复制ua信息
:return: None
"""
if not self._has_session:
return
if copy_user_agent:
user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._headers.update({"User-Agent": user_agent})
set_session_cookies(self.session, super(SessionPage, self).cookies())
def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器"""
if not self._has_driver:
return
set_browser_cookies(self, super().cookies())
def cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 为True时以dict格式返回为False时返回list且all_info无效
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if self._mode == 's':
return super().cookies(as_dict, all_domains, all_info)
elif self._mode == 'd':
return super(SessionPage, self).cookies(as_dict, all_domains, all_info)
def close(self):
"""关闭当前标签页"""
self.page.close_tabs(self.tab_id)
self._session.close()
if self._response is not None:
self._response.close()
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: WebPage用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
return super()._find_elements(locator, index=index)
elif self._mode == 'd':
return super(SessionPage, self)._find_elements(locator, timeout=timeout, index=index, relative=relative)
def __repr__(self):
return f'<WebPageTab browser_id={self.browser.id} tab_id={self.tab_id}>'
if not self._disconnect_flag:
ChromiumTab._TABS.pop(self.tab_id, None)

View File

@ -2,202 +2,108 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union, Tuple, Any, Optional
from requests import Session, Response
from typing import Union, Optional
from .chromium_base import ChromiumBase
from .chromium_frame import ChromiumFrame
from .chromium_page import ChromiumPage
from .session_page import SessionPage
from .web_page import WebPage
from .._base.browser import Browser
from .._elements.chromium_element import ChromiumElement
from .._elements.session_element import SessionElement
from .._functions.elements import SessionElementsList, ChromiumElementsList
from .._base.chromium import Chromium
from .._units.rect import TabRect
from .._units.setter import TabSetter, WebPageTabSetter
from .._units.setter import TabSetter
from .._units.waiter import TabWaiter
class ChromiumTab(ChromiumBase):
"""实现浏览器标签页的类"""
_TABS: dict = ...
_tab: ChromiumTab = ...
_rect: Optional[TabRect] = ...
def __new__(cls, page: ChromiumPage, tab_id: str): ...
def __new__(cls, browser: Chromium, tab_id: str):
"""
:param browser: Browser对象
:param tab_id: 标签页id
"""
...
def __init__(self, page: ChromiumPage, tab_id: str):
self._page: ChromiumPage = ...
self._browser: Browser = ...
self._rect: Optional[TabRect] = ...
def __init__(self, browser: Chromium, tab_id: str):
"""
:param browser: Browser对象
:param tab_id: 标签页id
"""
...
def _d_set_runtime_settings(self) -> None: ...
def _d_set_runtime_settings(self) -> None:
"""重写设置浏览器运行参数方法"""
...
def close(self) -> None: ...
def close(self, others: bool = False) -> None:
"""关闭标签页
:param others: 是否关闭其它保留自己
:return: None
"""
...
@property
def page(self) -> ChromiumPage: ...
def set(self) -> TabSetter:
"""返回用于设置的对象"""
...
@property
def set(self) -> TabSetter: ...
@property
def wait(self) -> TabWaiter: ...
def wait(self) -> TabWaiter:
"""返回用于等待的对象"""
...
def save(self,
path: Union[str, Path] = None,
name: str = None,
as_pdf: bool = False,
landscape: bool = ...,
displayHeaderFooter: bool = ...,
printBackground: bool = ...,
scale: float = ...,
paperWidth: float = ...,
paperHeight: float = ...,
marginTop: float = ...,
marginBottom: float = ...,
marginLeft: float = ...,
marginRight: float = ...,
pageRanges: str = ...,
headerTemplate: str = ...,
footerTemplate: str = ...,
preferCSSPageSize: bool = ...,
landscape: bool = False,
displayHeaderFooter: bool = False,
printBackground: bool = False,
scale: float = 1,
paperWidth: float = 8.5,
paperHeight: float = 11,
marginTop: float = 11,
marginBottom: float = 1,
marginLeft: float = 1,
marginRight: float = 1,
pageRanges: str = '',
headerTemplate: str = '',
footerTemplate: str = '',
preferCSSPageSize: bool = False,
generateTaggedPDF: bool = ...,
generateDocumentOutline: bool = ...) -> Union[bytes, str]: ...
generateDocumentOutline: bool = ...) -> Union[bytes, str]:
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param landscape: 纸张方向as_pdf为True时才生效
:param displayHeaderFooter: 是否显示页头页脚as_pdf为True时才生效
:param printBackground: 是否打印背景图片as_pdf为True时才生效
:param scale: 缩放比例as_pdf为True时才生效
:param paperWidth: 页面宽度英寸as_pdf为True时才生效
:param paperHeight: 页面高度英寸as_pdf为True时才生效
:param marginTop: 上边距英寸as_pdf为True时才生效
:param marginBottom: 下边距英寸as_pdf为True时才生效
:param marginLeft: 左边距英寸as_pdf为True时才生效
:param marginRight: 右边距英寸as_pdf为True时才生效
:param pageRanges: 页面范围格式'1-5, 8, 11-13'as_pdf为True时才生效
:param headerTemplate: 页头HTML模板as_pdf为True时才生效
模板可包含以下class
- date日期
- title文档标题
- url文档url
- pageNumber当前页码
- totalPages总页数
示例<span class=title></span>
:param footerTemplate: 页脚HTML模板格式与页头的一样as_pdf为True时才生效
:param preferCSSPageSize: 是否使用css定义的页面大小as_pdf为True时才生效
:param generateTaggedPDF: 是否生成带标签的(可访问的)PDF默认为嵌入器选择as_pdf为True时才生效
:param generateDocumentOutline: 是否将文档大纲嵌入到PDF中as_pdf为True时才生效
:return: as_pdf为True时返回bytes否则返回文件文本
"""
...
class WebPageTab(SessionPage, ChromiumTab):
def __init__(self, page: WebPage, tab_id: str):
self._page: WebPage = ...
self._browser: Browser = ...
self._mode: str = ...
self._has_driver = ...
self._has_session = ...
def __call__(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
@property
def page(self) -> WebPage: ...
@property
def url(self) -> Union[str, None]: ...
@property
def _browser_url(self) -> Union[str, None]: ...
@property
def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...
@property
def json(self) -> dict: ...
@property
def response(self) -> Response: ...
@property
def mode(self) -> str: ...
@property
def user_agent(self) -> str: ...
@property
def session(self) -> Session: ...
@property
def _session_url(self) -> str: ...
@property
def timeout(self) -> float: ...
@timeout.setter
def timeout(self, second: float) -> None: ...
def get(self,
url: str,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
timeout: float | None = None,
params: dict | None = ...,
data: Union[dict, str, None] = ...,
json: Union[dict, str, None] = ...,
headers: dict | None = ...,
cookies: Any | None = ...,
files: Any | None = ...,
auth: Any | None = ...,
allow_redirects: bool = ...,
proxies: dict | None = ...,
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> Union[bool, None]: ...
def ele(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> Union[SessionElementsList, ChromiumElementsList]: ...
def s_ele(self,
locator: Union[Tuple[str, str], str] = None,
index: int = 1) -> SessionElement: ...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ...
def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ...
def cookies_to_session(self, copy_user_agent: bool = True) -> None: ...
def cookies_to_browser(self) -> None: ...
def cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
def close(self) -> None: ...
# ----------------重写SessionPage的函数-----------------------
def post(self,
url: str,
data: Union[dict, str, None] = None,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
timeout: float | None = ...,
params: dict | None = ...,
json: Union[dict, str, None] = ...,
headers: dict | None = ...,
cookies: Any | None = ...,
files: Any | None = ...,
auth: Any | None = ...,
allow_redirects: bool = ...,
proxies: dict | None = ...,
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> Union[bool, Response]: ...
@property
def set(self) -> WebPageTabSetter: ...
def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float = None,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]: ...
def _on_disconnect(self): ...

View File

@ -0,0 +1,198 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from .chromium_tab import ChromiumTab
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._functions.cookies import set_session_cookies, set_tab_cookies
from .._functions.settings import Settings
from .._pages.session_page import SessionPage
from .._units.setter import MixTabSetter
class MixTab(SessionPage, ChromiumTab, BasePage):
def __init__(self, browser, tab_id):
if Settings.singleton_tab_obj and hasattr(self, '_created'):
return
self._d_mode = True
self._session_options = None
self._headers = None
self._response = None
self._session = None
self._encoding = None
self._timeout = 10
super(SessionPage, self).__init__(browser=browser, tab_id=tab_id)
self._type = 'MixTab'
def __call__(self, locator, index=1, timeout=None):
return super(SessionPage, self).__call__(locator, index=index, timeout=timeout) if self._d_mode \
else super().__call__(locator, index=index)
def __repr__(self):
return f'<MixTab browser_id={self.browser.id} tab_id={self.tab_id}>'
@property
def set(self):
if self._set is None:
self._set = MixTabSetter(self)
return self._set
@property
def url(self):
return self._browser_url if self._d_mode else self._session_url
@property
def _browser_url(self):
return super(SessionPage, self).url if self._driver else None
@property
def title(self):
return super(SessionPage, self).title if self._d_mode else super().title
@property
def raw_data(self):
return super(SessionPage, self).html if self._d_mode else super().raw_data
@property
def html(self):
return super(SessionPage, self).html if self._d_mode else super().html
@property
def json(self):
return super(SessionPage, self).json if self._d_mode else super().json
@property
def response(self):
return self._response
@property
def mode(self):
return 'd' if self._d_mode else 's'
@property
def user_agent(self):
return super(SessionPage, self).user_agent if self._d_mode else super().user_agent
@property
def _session_url(self):
return self._response.url if self._response else None
@property
def timeout(self):
return self.timeouts.base if self._d_mode else self._timeout
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
if self._d_mode:
if kwargs:
raise ValueError(f'以下参数在s模式下才会生效{" ".join(kwargs.keys())}')
return super(SessionPage, self).get(url, show_errmsg, retry, interval, timeout)
if timeout is None:
timeout = self.timeouts.page_load
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
if self.mode == 'd':
self.cookies_to_session()
if timeout is None:
kwargs['timeout'] = self.timeouts.page_load
if self._session is None:
self._create_session()
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
def ele(self, locator, index=1, timeout=None):
return (super(SessionPage, self).ele(locator, index=index, timeout=timeout)
if self._d_mode else super().ele(locator, index=index))
def eles(self, locator, timeout=None):
return super(SessionPage, self).eles(locator, timeout=timeout) if self._d_mode else super().eles(locator)
def s_ele(self, locator=None, index=1, timeout=None):
return (super(SessionPage, self).s_ele(locator, index=index, timeout=timeout)
if self._d_mode else super().s_ele(locator, index=index, timeout=timeout))
def s_eles(self, locator, timeout=None):
return (super(SessionPage, self).s_eles(locator, timeout=timeout)
if self._d_mode else super().s_eles(locator, timeout=timeout))
def change_mode(self, mode=None, go=True, copy_cookies=True):
if mode:
mode = mode.lower()
if mode is not None and ((mode == 'd' and self._d_mode) or (mode == 's' and not self._d_mode)):
return
self._d_mode = not self._d_mode
# s模式转d模式
if self._d_mode:
if self._driver is None or not self._driver.is_running:
self._driver_init(self.tab_id)
self._get_document()
self._url = super(SessionPage, self).url
if self._session_url:
if copy_cookies:
self.cookies_to_browser()
if go:
self.get(self._session_url)
return
# d模式转s模式
if self._session is None:
self._set_session_options(
self.browser._session_options or SessionOptions(read_file=self.browser._session_options is None))
self._create_session()
self._url = self._session_url
if self._driver:
if copy_cookies:
self.cookies_to_session()
if go:
url = super(SessionPage, self).url
if url.startswith('http'):
self.get(url)
def cookies_to_session(self, copy_user_agent=True):
if not self._session:
return
if copy_user_agent:
user_agent = self._run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._headers.update({"User-Agent": user_agent})
set_session_cookies(self.session, super(SessionPage, self).cookies())
def cookies_to_browser(self):
if self._driver is None or not self._driver.is_running:
return
set_tab_cookies(self, super().cookies())
def cookies(self, all_domains=False, all_info=False):
return super(SessionPage, self).cookies(all_domains, all_info) if self._d_mode \
else super().cookies(all_domains, all_info)
def close(self, others=False, session=False):
if others:
self.browser.close_tabs(self.tab_id, others=True)
else:
self.browser._close_tab(self)
if session and self._session:
self._session.close()
if self._response is not None:
self._response.close()
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
return super(SessionPage, self)._find_elements(locator, timeout=timeout, index=index, relative=relative) \
if self._d_mode else super()._find_elements(locator, index=index, timeout=timeout)
def _set_session_options(self, session_or_options=None):
if session_or_options is None:
session_or_options = self.browser._session_options or SessionOptions(
read_file=self.browser._session_options is None)
super()._set_session_options(session_or_options)

View File

@ -0,0 +1,297 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from http.cookiejar import CookieJar
from typing import Union, Tuple, Any, Optional, Literal
from requests import Session, Response
from .chromium_frame import ChromiumFrame
from .chromium_tab import ChromiumTab
from .session_page import SessionPage
from .._base.chromium import Chromium
from .._elements.chromium_element import ChromiumElement
from .._elements.session_element import SessionElement
from .._functions.cookies import CookiesList
from .._functions.elements import SessionElementsList, ChromiumElementsList
from .._units.setter import MixTabSetter
from .._units.waiter import MixTabWaiter
class MixTab(SessionPage, ChromiumTab):
_tab: MixTab = ...
_d_mode: bool = ...
_set: MixTabSetter = ...
def __init__(self, browser: Chromium, tab_id: str):
"""
:param browser: Chromium对象
:param tab_id: 标签页id
"""
...
def __call__(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]:
"""在内部查找元素
ele = page('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: 子元素对象
"""
...
@property
def set(self) -> MixTabSetter:
"""返回用于设置的对象"""
...
@property
def wait(self) -> MixTabWaiter:
"""返回用于等待的对象"""
...
@property
def url(self) -> Union[str, None]:
"""返回浏览器当前url"""
...
@property
def _browser_url(self) -> Union[str, None]:
"""返回浏览器当前url"""
...
@property
def title(self) -> str:
"""返回当前页面title"""
...
@property
def raw_data(self) -> Union[str, bytes]:
"""返回页码原始数据数据"""
...
@property
def html(self) -> str:
"""返回页面html文本"""
...
@property
def json(self) -> dict:
"""当返回内容是json格式时返回对应的字典"""
...
@property
def response(self) -> Response:
"""返回 s 模式获取到的 Response 对象,切换到 s 模式"""
...
@property
def mode(self) -> Literal['s', 'd']:
"""返回当前模式,'s''d' """
...
@property
def user_agent(self) -> str:
"""返回user agent"""
...
@property
def session(self) -> Session:
"""返回Session对象如未初始化则按配置信息创建"""
...
@property
def _session_url(self) -> str:
"""返回 session 保存的url"""
...
@property
def timeout(self) -> float:
"""返回通用timeout设置"""
...
def get(self,
url: str,
show_errmsg: bool = False,
retry: Optional[int] = None,
interval: Optional[float] = None,
timeout: Optional[float] = None,
params: Optional[dict] = None,
data: Union[dict, str, None] = None,
json: Union[dict, str, None] = None,
headers: Optional[dict] = None,
cookies: Union[CookieJar, dict] = None,
files: Optional[Any] = None,
auth: Optional[Any] = None,
allow_redirects: bool = True,
proxies: Optional[dict] = None,
hooks: Optional[Any] = None,
stream: bool = None,
verify: Union[bool, str] = None,
cert: [str, Tuple[str, str]] = None) -> Union[bool, None]:
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
...
def post(self,
url: str,
show_errmsg: bool = False,
retry: Optional[int] = None,
interval: Optional[float] = None,
timeout: Optional[float] = None,
params: Optional[dict] = None,
data: Union[dict, str, None] = None,
json: Union[dict, str, None] = None,
headers: Optional[dict] = None,
cookies: Union[CookieJar, dict] = None,
files: Optional[Any] = None,
auth: Optional[Any] = None,
allow_redirects: bool = True,
proxies: Optional[dict] = None,
hooks: Optional[Any] = None,
stream: bool = None,
verify: Union[bool, str] = None,
cert: [str, Tuple[str, str]] = None) -> Response:
"""用post方式跳转到url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: 获取到的Response对象
"""
...
def ele(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]:
"""返回第一个符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> Union[ChromiumElementsList, SessionElementsList]:
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
...
def s_ele(self,
locator: Union[Tuple[str, str], str] = None,
index: int = 1,
timeout: float = None) -> SessionElement:
"""查找第一个符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象或属性文本
"""
...
def s_eles(self, locator: Union[Tuple[str, str], str], timeout: float = None) -> SessionElementsList:
"""查找所有符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象或属性文本组成的列表
"""
...
def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None:
"""切换模式,接收's''d',除此以外的字符串会切换为 d 模式
如copy_cookies为True切换时会把当前模式的cookies复制到目标模式
切换后如果go是True调用相应的get函数使访问的页面同步
:param mode: 模式字符串
:param go: 是否跳转到原模式的url
:param copy_cookies: 是否复制cookies到目标模式
:return: None
"""
...
def cookies_to_session(self, copy_user_agent: bool = True) -> None:
"""把浏览器的cookies复制到session对象
:param copy_user_agent: 是否复制ua信息
:return: None
"""
...
def cookies_to_browser(self) -> None:
"""把session对象的cookies复制到浏览器"""
...
def cookies(self, all_domains: bool = False, all_info: bool = False) -> CookiesList:
"""返回cookies
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
...
def close(self, others: bool = False) -> None:
"""关闭标签页
:param others: 是否关闭其它保留自己
:return: None
"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: 元素对象或属性文本节点文本
"""
...

View File

@ -2,118 +2,76 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from copy import copy
from pathlib import Path
from re import search, DOTALL
from time import sleep
from urllib.parse import urlparse
from requests import Session, Response
from requests import Response
from requests.structures import CaseInsensitiveDict
from tldextract import extract
from tldextract import TLDExtract
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._elements.session_element import SessionElement, make_session_ele
from .._functions.web import cookie_to_dict, format_headers
from .._functions.cookies import cookie_to_dict, CookiesList
from .._functions.settings import Settings
from .._functions.web import format_headers
from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
"""SessionPage封装了页面操作的常用功能使用requests来获取、解析网页"""
def __init__(self, session_or_options=None, timeout=None):
"""
:param session_or_options: Session对象或SessionOptions对象
:param timeout: 连接超时时间为None时从ini文件读取或默认10
"""
super(SessionPage, SessionPage).__init__(self)
self._headers = None
super().__init__()
self._response = None
self._session = None
self._set = None
self._encoding = None
self._type = 'SessionPage'
self._page = self
self._s_set_start_options(session_or_options)
self._set_session_options(session_or_options)
self._s_set_runtime_settings()
self._create_session()
if timeout is not None:
self.timeout = timeout
if timeout is not None: # 即将废弃
self._timeout = timeout
if not self._session:
self._create_session()
def _s_set_start_options(self, session_or_options):
"""启动配置
:param session_or_options: SessionSessionOptions对象
:return: None
"""
if not session_or_options:
self._session_options = SessionOptions(session_or_options)
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options
elif isinstance(session_or_options, Session):
self._session_options = SessionOptions()
self._session = copy(session_or_options)
self._headers = self._session.headers
self._session.headers = None
def __repr__(self):
return f'<SessionPage url={self.url}>'
def _s_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeout = self._session_options.timeout
self._download_path = None if self._session_options.download_path is None \
else str(Path(self._session_options.download_path).absolute())
self._download_path = str(Path(self._session_options.download_path or '.').absolute())
self.retry_times = self._session_options.retry_times
self.retry_interval = self._session_options.retry_interval
def _create_session(self):
"""创建内建Session对象"""
if not self._session:
self._session, self._headers = self._session_options.make_session()
def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
return self.ele(locator, index=index)
# -----------------共有属性和方法-------------------
@property
def title(self):
"""返回网页title"""
ele = self._ele('xpath://title', raise_err=False)
return ele.text if ele else None
@property
def url(self):
"""返回当前访问url"""
return self._url
@property
def _session_url(self):
"""返回当前访问url"""
return self._url
@property
def raw_data(self):
"""返回页面原始数据"""
return self.response.content if self.response else b''
@property
def html(self):
"""返回页面的html文本"""
return self.response.text if self.response else ''
@property
def json(self):
"""当返回内容是json格式时返回对应的字典非json格式时返回None"""
try:
return self.response.json()
except Exception:
@ -121,31 +79,30 @@ class SessionPage(BasePage):
@property
def user_agent(self):
"""返回user agent"""
return self._headers.get('user-agent', '')
@property
def session(self):
"""返回Session对象"""
return self._session
@property
def response(self):
"""返回访问url得到的Response对象"""
return self._response
@property
def encoding(self):
"""返回设置的编码"""
return self._encoding
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = SessionPageSetter(self)
return self._set
@property
def timeout(self):
return self._timeout
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url可输入文件路径
:param url: 目标url可指定本地文件路径
@ -179,92 +136,50 @@ class SessionPage(BasePage):
return self._s_connect(url, 'post', show_errmsg, retry, interval, **kwargs)
def ele(self, locator, index=1, timeout=None):
"""返回页面中符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
return self._ele(locator, index=index, method='ele()')
def eles(self, locator, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本组成的列表
"""
return self._ele(locator, index=None)
def s_ele(self, locator=None, index=1):
"""返回页面中符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
return make_session_ele(self) if locator is None else self._ele(locator, index=index, method='s_ele()')
def s_eles(self, locator):
"""返回页面中符合条件的所有元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:return: SessionElement对象或属性文本
"""
return self._ele(locator, index=None)
def _find_elements(self, locator, timeout=None, index=1, relative=True, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 不起实际作用用于和父类对应
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: SessionElement对象
"""
def _find_elements(self, locator, timeout, index=1, relative=True, raise_err=None):
return locator if isinstance(locator, SessionElement) else make_session_ele(self, locator, index=index)
def cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 为True时以dict格式返回为False时返回list且all_info无效
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
def cookies(self, all_domains=False, all_info=False):
if all_domains:
cookies = self.session.cookies
else:
if self.url:
ex_url = extract(self._session_url)
ex_url = TLDExtract(
suffix_list_urls=["https://publicsuffix.org/list/public_suffix_list.dat",
f"file:///{Settings.suffixes_list_path}"]).extract_str(self._session_url)
domain = f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
cookies = tuple(c for c in self.session.cookies if domain in c.domain or c.domain == '')
else:
cookies = tuple(x for x in self.session.cookies)
cookies = tuple(c for c in self.session.cookies)
if as_dict:
return {x.name: x.value for x in cookies}
elif all_info:
return [cookie_to_dict(cookie) for cookie in cookies]
if all_info:
r = CookiesList()
for c in cookies:
r.append(cookie_to_dict(c))
else:
r = []
r = CookiesList()
for c in cookies:
c = cookie_to_dict(c)
r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']})
return r
return r
def close(self):
"""关闭Session对象"""
self._session.close()
if self._response is not None:
self._response.close()
def _s_connect(self, url, mode, show_errmsg=False, retry=None, interval=None, **kwargs):
"""执行get或post连接
:param url: 目标url
:param mode: 'get' 'post'
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param kwargs: 连接参数
:return: url是否可用
"""
retry, interval, is_file = self._before_connect(url, retry, interval)
self._response, info = self._make_response(self._url, mode, retry, interval, show_errmsg, **kwargs)
@ -283,18 +198,11 @@ class SessionPage(BasePage):
return self._url_available
def _make_response(self, url, mode='get', retry=None, interval=None, show_errmsg=False, **kwargs):
"""生成Response对象
:param url: 目标url
:param mode: 'get' 'post'
:param show_errmsg: 是否显示和抛出异常
:param kwargs: 其它参数
:return: tuple第一位为Response或None第二位为出错信息或 'Success'
"""
kwargs = CaseInsensitiveDict(kwargs)
if 'headers' not in kwargs:
kwargs['headers'] = CaseInsensitiveDict()
else:
if 'headers' in kwargs:
kwargs['headers'] = CaseInsensitiveDict(format_headers(kwargs['headers']))
else:
kwargs['headers'] = CaseInsensitiveDict()
# 设置referer和host值
parsed_url = urlparse(url)
@ -302,8 +210,12 @@ class SessionPage(BasePage):
scheme = parsed_url.scheme
if not check_headers(kwargs['headers'], self._headers, 'Referer'):
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
elif not kwargs['headers']['Referer']:
kwargs['headers'].pop('Referer')
if not check_headers(kwargs['headers'], self._headers, 'Host'):
kwargs['headers']['Host'] = hostname
elif not kwargs['headers']['Host']:
kwargs['headers'].pop('Host')
if not check_headers(kwargs, self._headers, 'timeout'):
kwargs['timeout'] = self.timeout
@ -353,17 +265,12 @@ class SessionPage(BasePage):
else:
return None, '连接失败' if err is None else err
def __repr__(self):
return f'<SessionPage url={self.url}>'
def check_headers(kwargs, headers, arg):
"""检查kwargs或headers中是否有arg所示属性"""
return arg in kwargs or arg in headers
def set_charset(response):
"""设置Response对象的编码"""
# 在headers中获取编码
content_type = response.headers.get('content-type', '').lower()
if not content_type.endswith(';'):

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Any, Union, Tuple, Optional
@ -14,62 +13,106 @@ from requests.structures import CaseInsensitiveDict
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._elements.session_element import SessionElement
from .._functions.cookies import CookiesList
from .._functions.elements import SessionElementsList
from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
def __init__(self,
session_or_options: Union[Session, SessionOptions] = None,
timeout: float = None):
self._headers: Optional[CaseInsensitiveDict] = ...
self._session: Session = ...
self._session_options: SessionOptions = ...
self._url: str = ...
self._response: Response = ...
self._url_available: bool = ...
self.timeout: float = ...
self.retry_times: int = ...
self.retry_interval: float = ...
self._set: SessionPageSetter = ...
self._encoding: str = ...
self._page: SessionPage = ...
"""SessionPage封装了页面操作的常用功能使用requests来获取、解析网页"""
_session_options: Optional[SessionOptions] = ...
_url: str = ...
_response: Optional[Response] = ...
_url_available: bool = ...
_timeout: float = ...
retry_times: int = ...
retry_interval: float = ...
_set: Optional[SessionPageSetter] = ...
_encoding: Optional[str] = ...
_page: SessionPage = ...
def _s_set_start_options(self, session_or_options: Union[Session, SessionOptions]) -> None: ...
def __init__(self, session_or_options: Union[Session, SessionOptions] = None):
"""
:param session_or_options: Session对象或SessionOptions对象
"""
...
def _s_set_runtime_settings(self) -> None: ...
def _create_session(self) -> None: ...
def _s_set_runtime_settings(self) -> None:
"""设置运行时用到的属性"""
...
def __call__(self,
locator: Union[Tuple[str, str], str, SessionElement],
index: int = 1,
timeout: float = None) -> SessionElement: ...
# -----------------共有属性和方法-------------------
@property
def title(self) -> str: ...
timeout: float = None) -> SessionElement:
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
...
@property
def url(self) -> str: ...
def title(self) -> str:
"""返回网页title"""
...
@property
def _session_url(self) -> str: ...
def url(self) -> str:
"""返回当前访问url"""
...
@property
def raw_data(self) -> Union[str, bytes]: ...
def _session_url(self) -> str:
"""返回当前访问url"""
...
@property
def html(self) -> str: ...
def raw_data(self) -> Union[str, bytes]:
"""返回页面原始数据"""
...
@property
def json(self) -> Union[dict, None]: ...
def html(self) -> str:
"""返回页面的html文本"""
...
@property
def user_agent(self) -> str: ...
def json(self) -> Union[dict, None]:
"""当返回内容是json格式时返回对应的字典非json格式时返回None"""
...
@property
def download_path(self) -> str: ...
def user_agent(self) -> str:
"""返回user agent"""
...
@property
def session(self) -> Session:
"""返回Session对象"""
...
@property
def response(self) -> Response:
"""返回访问url得到的Response对象"""
...
@property
def encoding(self) -> str:
"""返回设置的编码s模式专用"""
...
@property
def set(self) -> SessionPageSetter:
"""返回用于设置的对象"""
...
@property
def timeout(self) -> float:
"""返回超时设置"""
...
def get(self,
url: Union[Path, str],
@ -89,56 +132,38 @@ class SessionPage(BasePage):
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
def ele(self,
locator: Union[Tuple[str, str], str, SessionElement],
index: int = 1,
timeout: float = None) -> SessionElement: ...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> SessionElementsList: ...
def s_ele(self,
locator: Union[Tuple[str, str], str, SessionElement] = None,
index: int = 1) -> SessionElement: ...
def s_eles(self, loc: Union[Tuple[str, str], str]) -> SessionElementsList: ...
def _find_elements(self,
locator: Union[Tuple[str, str], str, SessionElement],
timeout: float = None,
index: Optional[int] = 1,
relative: bool = True,
raise_err: bool = None) -> Union[SessionElement, SessionElementsList]: ...
def cookies(self,
as_dict: bool = False,
all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
# ----------------session独有属性和方法-----------------------
@property
def session(self) -> Session: ...
@property
def response(self) -> Response: ...
@property
def encoding(self) -> str: ...
@property
def set(self) -> SessionPageSetter: ...
cert: Any | None = ...) -> bool:
"""用get方式跳转到url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
...
def post(self,
url: str,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
data: Union[dict, str, None] = ...,
timeout: float | None = ...,
params: dict | None = ...,
data: Union[dict, str, None] = ...,
json: Union[dict, str, None] = ...,
headers: Union[dict, str, None] = ...,
cookies: Any | None = ...,
@ -149,9 +174,97 @@ class SessionPage(BasePage):
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> bool: ...
cert: Any | None = ...) -> bool:
"""用post方式跳转到url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
...
def close(self) -> None: ...
def ele(self,
locator: Union[Tuple[str, str], str, SessionElement],
index: int = 1,
timeout: float = None) -> SessionElement:
"""返回页面中符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本
"""
...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> SessionElementsList:
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用用于和ChromiumElement对应便于无差别调用
:return: SessionElement对象或属性文本组成的列表
"""
...
def s_ele(self,
locator: Union[Tuple[str, str], str, SessionElement] = None,
index: int = 1) -> SessionElement:
"""返回页面中符合条件的一个元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList:
"""返回页面中符合条件的所有元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:return: SessionElement对象或属性文本
"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str, SessionElement],
timeout: float,
index: Optional[int] = 1,
relative: bool = True,
raise_err: bool = None) -> Union[SessionElement, SessionElementsList]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 不起实际作用用于和父类对应
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: SessionElement对象
"""
...
def cookies(self,
all_domains: bool = False,
all_info: bool = False) -> CookiesList:
"""返回cookies
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies组成的列表
"""
...
def close(self) -> None:
"""关闭Session对象"""
...
def _s_connect(self,
url: str,
@ -159,7 +272,17 @@ class SessionPage(BasePage):
show_errmsg: bool = False,
retry: int = None,
interval: float = None,
**kwargs) -> bool: ...
**kwargs) -> bool:
"""执行get或post连接
:param url: 目标url
:param mode: 'get' 'post'
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param kwargs: 连接参数
:return: url是否可用
"""
...
def _make_response(self,
url: str,
@ -167,12 +290,32 @@ class SessionPage(BasePage):
retry: int = None,
interval: float = None,
show_errmsg: bool = False,
**kwargs) -> tuple: ...
**kwargs) -> tuple:
"""生成Response对象
:param url: 目标url
:param mode: 'get' 'post'
:param show_errmsg: 是否显示和抛出异常
:param kwargs: 其它参数
:return: tuple第一位为Response或None第二位为出错信息或 'Success'
"""
...
def check_headers(kwargs: Union[dict, CaseInsensitiveDict],
headers: Union[dict, CaseInsensitiveDict],
arg: str) -> bool: ...
arg: str) -> bool:
"""检查kwargs或headers中是否有arg所示属性
:param kwargs: 要检查的参数dict
:param headers: 要检查的headers
:param arg: 属性名称
:return: 检查结果
"""
...
def set_charset(response: Response) -> Response: ...
def set_charset(response: Response) -> Response:
"""设置Response对象的编码
:param response: Response对象
:return: Response对象
"""
...

View File

@ -2,43 +2,32 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from .chromium_page import ChromiumPage
from .chromium_tab import WebPageTab
from .session_page import SessionPage
from .._base.base import BasePage
from .._configs.chromium_options import ChromiumOptions
from .._functions.web import set_session_cookies, set_browser_cookies
from .._configs.session_options import SessionOptions
from .._functions.cookies import set_session_cookies, set_tab_cookies
from .._functions.settings import Settings
from .._units.setter import WebPageSetter
class WebPage(SessionPage, ChromiumPage, BasePage):
"""整合浏览器和request的页面类"""
def __new__(cls, mode='d', timeout=None, chromium_options=None, session_or_options=None):
"""初始化函数
:param mode: 'd' 's'即driver模式和session模式
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param chromium_options: Driver对象只使用s模式时应传入False
:param session_or_options: Session对象或SessionOptions对象只使用d模式时应传入False
"""
# 即将废弃timeout
return super().__new__(cls, chromium_options)
def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None, driver_or_options=None):
"""初始化函数
:param mode: 'd' 's'即driver模式和session模式
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param chromium_options: ChromiumOptions对象只使用s模式时应传入False
:param session_or_options: Session对象或SessionOptions对象只使用d模式时应传入False
"""
def __init__(self, mode='d', timeout=None, chromium_options=None, session_or_options=None):
# 即将废弃timeout
if hasattr(self, '_created'):
return
self._mode = mode.lower()
if self._mode not in ('s', 'd'):
mode = mode.lower()
if mode not in ('s', 'd'):
raise ValueError('mode参数只能是s或d。')
self._d_mode = mode == 'd'
self._has_driver = True
self._has_session = True
@ -46,328 +35,179 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
if not chromium_options:
chromium_options = ChromiumOptions(read_file=chromium_options)
chromium_options.set_timeouts(base=self._timeout).set_paths(download_path=self.download_path)
super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout)
super(SessionPage, self).__init__(addr_or_opts=chromium_options, timeout=timeout) # 即将废弃timeout
self._type = 'WebPage'
self.change_mode(self._mode, go=False, copy_cookies=False)
self.change_mode(mode, go=False, copy_cookies=False)
def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele = page('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: 子元素对象
"""
if self._mode == 'd':
if self._d_mode:
return super(SessionPage, self).__call__(locator, index=index, timeout=timeout)
elif self._mode == 's':
return super().__call__(locator, index=index)
return super().__call__(locator, index=index)
def __repr__(self):
return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>'
@property
def latest_tab(self):
return self.browser._get_tab(id_or_num=self.tab_ids[0], mix=True, as_id=not Settings.singleton_tab_obj)
@property
def set(self):
"""返回用于设置的对象"""
if self._set is None:
self._set = WebPageSetter(self)
return self._set
@property
def url(self):
"""返回当前url"""
if self._mode == 'd':
return self._browser_url
elif self._mode == 's':
return self._session_url
return self._browser_url if self._d_mode else self._session_url
@property
def _browser_url(self):
"""返回浏览器当前url"""
return super(SessionPage, self).url if self._driver else None
@property
def title(self):
"""返回当前页面title"""
if self._mode == 's':
return super().title
elif self._mode == 'd':
return super(SessionPage, self).title
return super(SessionPage, self).title if self._d_mode else super().title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
if self._d_mode:
return super(SessionPage, self).html if self._has_driver else ''
return super().raw_data
@property
def html(self):
"""返回页面html文本"""
if self._mode == 's':
return super().html
elif self._mode == 'd':
if self._d_mode:
return super(SessionPage, self).html if self._has_driver else ''
return super().html
@property
def json(self):
"""当返回内容是json格式时返回对应的字典"""
if self._mode == 's':
return super().json
elif self._mode == 'd':
return super(SessionPage, self).json
return super(SessionPage, self).json if self._d_mode else super().json
@property
def response(self):
"""返回 s 模式获取到的 Response 对象,切换到 s 模式"""
return self._response
@property
def mode(self):
"""返回当前模式,'s''d' """
return self._mode
return 'd' if self._d_mode else 's'
@property
def user_agent(self):
"""返回user agent"""
if self._mode == 's':
return super().user_agent
elif self._mode == 'd':
return super(SessionPage, self).user_agent
return super(SessionPage, self).user_agent if self._d_mode else super().user_agent
@property
def session(self):
"""返回Session对象如未初始化则按配置信息创建"""
if self._session is None:
self._create_session()
return self._session
@property
def _session_url(self):
"""返回 session 保存的url"""
return self._response.url if self._response else None
@property
def timeout(self):
"""返回通用timeout设置"""
return self.timeouts.base
return self.timeouts.base if self._d_mode else self._timeout
@timeout.setter
def timeout(self, second):
"""设置通用超时时间
:param second: 秒数
:return: None
"""
self.set.timeouts(base=second)
@property
def download_path(self):
return self.browser.download_path
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间为None时使用页面对象timeouts.page_load属性值
:param kwargs: 连接参数s模式专用
:return: url是否可用d模式返回None时表示不确定
"""
if self._mode == 'd':
if self._d_mode:
return super(SessionPage, self).get(url, show_errmsg, retry, interval, timeout)
elif self._mode == 's':
if timeout is None:
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
if timeout is None:
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param kwargs: 连接参数
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
if self.mode == 'd':
self.cookies_to_session()
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
return super().post(url, show_errmsg, retry, interval, **kwargs)
super().post(url, show_errmsg, retry, interval, **kwargs)
return self.response
def ele(self, locator, index=1, timeout=None):
"""返回第一个符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
return super().ele(locator, index=index)
elif self._mode == 'd':
return super(SessionPage, self).ele(locator, index=index, timeout=timeout)
return (super(SessionPage, self).ele(locator, index=index, timeout=timeout)
if self._d_mode else super().ele(locator, index=index))
def eles(self, locator, timeout=None):
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
if self._mode == 's':
return super().eles(locator)
elif self._mode == 'd':
return super(SessionPage, self).eles(locator, timeout=timeout)
return super(SessionPage, self).eles(locator, timeout=timeout) if self._d_mode else super().eles(locator)
def s_ele(self, locator=None, index=1):
"""查找第一个符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:return: SessionElement对象或属性文本
"""
if self._mode == 's':
return super().s_ele(locator, index=index)
elif self._mode == 'd':
return super(SessionPage, self).s_ele(locator, index=index)
def s_ele(self, locator=None, index=1, timeout=None):
return (super(SessionPage, self).s_ele(locator, index=index, timeout=timeout)
if self._d_mode else super().s_ele(locator, index=index, timeout=timeout))
def s_eles(self, locator):
"""查找所有符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本组成的列表
"""
if self._mode == 's':
return super().s_eles(locator)
elif self._mode == 'd':
return super(SessionPage, self).s_eles(locator)
def s_eles(self, locator, timeout=None):
return (super(SessionPage, self).s_eles(locator, timeout=timeout)
if self._d_mode else super().s_eles(locator, timeout=timeout))
def change_mode(self, mode=None, go=True, copy_cookies=True):
"""切换模式,接收's''d',除此以外的字符串会切换为 d 模式
如copy_cookies为True切换时会把当前模式的cookies复制到目标模式
切换后如果go是True调用相应的get函数使访问的页面同步
:param mode: 模式字符串
:param go: 是否跳转到原模式的url
:param copy_cookies: 是否复制cookies到目标模式
:return: None
"""
if mode is not None and mode.lower() == self._mode:
if mode:
mode = mode.lower()
if mode is not None and ((mode == 'd' and self._d_mode) or (mode == 's' and not self._d_mode)):
return
self._mode = 's' if self._mode == 'd' else 'd'
self._d_mode = not self._d_mode
# s模式转d模式
if self._mode == 'd':
if self._driver is None:
self._connect_browser(self._chromium_options)
if self._d_mode:
if self._driver is None or not self._driver.is_running:
self._driver_init(self.tab_id)
self._get_document()
self._url = None if not self._has_driver else super(SessionPage, self).url
self._has_driver = True
if self._session_url:
if copy_cookies:
self.cookies_to_browser()
if go:
self.get(self._session_url)
return
# d模式转s模式
elif self._mode == 's':
self._has_session = True
self._url = self._session_url
self._has_session = True
self._url = self._session_url
if self._has_driver:
if copy_cookies:
self.cookies_to_session()
if go and not self.get(super(SessionPage, self).url):
raise ConnectionError('s模式访问失败请设置go=False自行构造连接参数进行访问。')
if self._has_driver and self._driver.is_running:
if copy_cookies:
self.cookies_to_session()
if go and not self.get(super(SessionPage, self).url):
raise ConnectionError('s模式访问失败请设置go=False自行构造连接参数进行访问。')
def cookies_to_session(self, copy_user_agent=True):
"""把driver对象的cookies复制到session对象
:param copy_user_agent: 是否复制ua信息
:return: None
"""
if not self._has_session:
return
if copy_user_agent:
user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
user_agent = self._run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._headers.update({"User-Agent": user_agent})
set_session_cookies(self.session, super(SessionPage, self).cookies())
def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器"""
if not self._has_driver:
return
set_browser_cookies(self, super().cookies())
set_tab_cookies(self, super().cookies())
def cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 为True时以dict格式返回为False时返回list且all_info无效
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if self._mode == 's':
return super().cookies(as_dict, all_domains, all_info)
elif self._mode == 'd':
return super(SessionPage, self).cookies(as_dict, all_domains, all_info)
def cookies(self, all_domains=False, all_info=False):
return super(SessionPage, self).cookies(all_domains, all_info) if self._d_mode \
else super().cookies(all_domains, all_info)
def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', as_id=False):
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: WebPageTab对象
"""
if id_or_num is not None:
if isinstance(id_or_num, str):
id_or_num = id_or_num
elif isinstance(id_or_num, int):
id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num]
elif isinstance(id_or_num, WebPageTab):
return id_or_num.tab_id if as_id else id_or_num
elif title == url == tab_type is None:
id_or_num = self.tab_id
else:
id_or_num = self._browser.find_tabs(title, url, tab_type)
if id_or_num:
id_or_num = id_or_num[0]['id']
else:
return None
if as_id:
return id_or_num
with self._lock:
return WebPageTab(self, id_or_num)
return self.browser._get_tab(id_or_num=id_or_num, title=title, url=url,
tab_type=tab_type, mix=True, as_id=as_id)
def get_tabs(self, title=None, url=None, tab_type='page', as_id=False):
"""查找符合条件的tab返回它们组成的列表
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
if as_id:
return [tab['id'] for tab in self._browser.find_tabs(title, url, tab_type)]
with self._lock:
return [WebPageTab(self, tab['id']) for tab in self._browser.find_tabs(title, url, tab_type)]
return self.browser._get_tabs(title=title, url=url, tab_type=tab_type, mix=True, as_id=as_id)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = WebPageTab(self, tab_id=self.browser.new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
return self.browser._new_tab(url=url, new_window=new_window, background=background, new_context=new_context)
def close_driver(self):
"""关闭driver及浏览器"""
if self._has_driver:
self.change_mode('s')
try:
@ -379,7 +219,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._has_driver = None
def close_session(self):
"""关闭session"""
if self._has_session:
self.change_mode('d')
self._session.close()
@ -390,43 +229,31 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._has_session = None
def close(self):
"""关闭标签页和Session"""
if self._has_driver:
self.close_tabs(self.tab_id)
self.browser._close_tab(self)
if self._session:
self._session.close()
if self._response is not None:
self._response.close()
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: WebPage用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: 元素对象或属性文本节点文本
"""
if self._mode == 's':
return super()._find_elements(locator, index=index)
elif self._mode == 'd':
def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None):
if self._d_mode:
return super(SessionPage, self)._find_elements(locator, timeout=timeout, index=index, relative=relative)
return super()._find_elements(locator, index=index, timeout=timeout)
def quit(self, timeout=5, force=True):
"""关闭浏览器和Session
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:return: None
"""
def quit(self, timeout=5, force=True, del_data=False):
if self._has_session:
self._session.close()
self._session = None
self._response = None
self._has_session = None
if self._has_driver:
super(SessionPage, self).quit(timeout, force)
super(SessionPage, self).quit(timeout, force, del_data=del_data)
self._driver = None
self._has_driver = None
def __repr__(self):
return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def _set_session_options(self, session_or_options=None):
if session_or_options is None:
session_or_options = self.browser._session_options or SessionOptions(
read_file=self.browser._session_options is None)
super()._set_session_options(session_or_options)

View File

@ -2,194 +2,378 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, Tuple, List, Any, Optional
from http.cookiejar import CookieJar
from typing import Union, Tuple, List, Any, Optional, Literal
from requests import Session, Response
from .chromium_frame import ChromiumFrame
from .chromium_page import ChromiumPage
from .chromium_tab import WebPageTab
from .mix_tab import MixTab
from .session_page import SessionPage
from .._base.base import BasePage
from .._base.driver import Driver
from .._configs.chromium_options import ChromiumOptions
from .._configs.session_options import SessionOptions
from .._elements.chromium_element import ChromiumElement
from .._elements.session_element import SessionElement
from .._functions.cookies import CookiesList
from .._functions.elements import SessionElementsList, ChromiumElementsList
from .._units.setter import WebPageSetter
from .._units.waiter import WebPageWaiter
class WebPage(SessionPage, ChromiumPage, BasePage):
"""整合浏览器和request的页面类"""
_d_mode: bool = ...
_set: WebPageSetter = ...
_has_driver: Optional[bool] = ...
_has_session: Optional[bool] = ...
_session_options: Union[SessionOptions, None] = ...
_chromium_options: Union[ChromiumOptions, None] = ...
def __init__(self,
mode: str = 'd',
timeout: float = None,
chromium_options: Union[ChromiumOptions, bool] = None,
session_or_options: Union[Session, SessionOptions, bool] = None) -> None:
self._mode: str = ...
self._set: WebPageSetter = ...
self._has_driver: bool = ...
self._has_session: bool = ...
self._session_options: Union[SessionOptions, None] = ...
self._chromium_options: Union[ChromiumOptions, None] = ...
"""初始化函数
:param mode: 'd' 's'即driver模式和session模式
:param chromium_options: ChromiumOptions对象传入None时从默认ini文件读取传入False时不读取ini文件使用默认配置
:param session_or_options: Session对象或SessionOptions对象传入None时从默认ini文件读取传入False时不读取ini文件使用默认配置
"""
...
def __call__(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
# -----------------共有属性和方法-------------------
@property
def url(self) -> Union[str, None]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement]:
"""在内部查找元素
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: 子元素对象
"""
...
@property
def _browser_url(self) -> Union[str, None]: ...
def latest_tab(self) -> Union[MixTab, WebPage, str]:
"""返回最新的标签页,最新标签页指最后创建或最后被激活的
当Settings.singleton_tab_obj==True时返回Tab对象否则返回tab id"""
...
@property
def title(self) -> str: ...
def set(self) -> WebPageSetter:
"""返回用于设置的对象"""
...
@property
def raw_data(self) -> Union[str, bytes]: ...
def wait(self) -> WebPageWaiter:
"""返回用于等待的对象"""
...
@property
def html(self) -> str: ...
def url(self) -> Union[str, None]:
"""返回浏览器当前url"""
...
@property
def json(self) -> dict: ...
def _browser_url(self) -> Union[str, None]:
"""返回浏览器当前url"""
...
@property
def response(self) -> Response: ...
def title(self) -> str:
"""返回当前页面title"""
...
@property
def mode(self) -> str: ...
def raw_data(self) -> Union[str, bytes]:
"""返回页码原始数据数据"""
...
@property
def user_agent(self) -> str: ...
def html(self) -> str:
"""返回页面html文本"""
...
@property
def session(self) -> Session: ...
def json(self) -> dict:
"""当返回内容是json格式时返回对应的字典"""
...
@property
def _session_url(self) -> str: ...
def response(self) -> Response:
"""返回 s 模式获取到的 Response 对象,切换到 s 模式"""
...
@property
def timeout(self) -> float: ...
def mode(self) -> Literal['s', 'd']:
"""返回当前模式,'s''d' """
...
@timeout.setter
def timeout(self, second: float) -> None: ...
@property
def user_agent(self) -> str:
"""返回user agent"""
...
@property
def session(self) -> Session:
"""返回Session对象如未初始化则按配置信息创建"""
...
@property
def _session_url(self) -> str:
"""返回 session 保存的url"""
...
@property
def timeout(self) -> float:
"""返回通用timeout设置"""
...
def get(self,
url: str,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
timeout: float | None = None,
params: dict | None = ...,
data: Union[dict, str, None] = ...,
json: Union[dict, str, None] = ...,
headers: Union[dict, str, None] = ...,
cookies: Any | None = ...,
files: Any | None = ...,
auth: Any | None = ...,
allow_redirects: bool = ...,
proxies: dict | None = ...,
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> Union[bool, None]: ...
retry: Optional[int] = None,
interval: Optional[float] = None,
timeout: Optional[float] = None,
params: Optional[dict] = None,
data: Union[dict, str, None] = None,
json: Union[dict, str, None] = None,
headers: Optional[dict] = None,
cookies: Union[CookieJar, dict] = None,
files: Optional[Any] = None,
auth: Optional[Any] = None,
allow_redirects: bool = True,
proxies: Optional[dict] = None,
hooks: Optional[Any] = None,
stream: bool = None,
verify: Union[bool, str] = None,
cert: [str, Tuple[str, str]] = None) -> Union[bool, None]:
"""跳转到一个url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: s模式时返回url是否可用d模式时返回获取到的Response对象
"""
...
def post(self,
url: str,
show_errmsg: bool = False,
retry: Optional[int] = None,
interval: Optional[float] = None,
timeout: Optional[float] = None,
params: Optional[dict] = None,
data: Union[dict, str, None] = None,
json: Union[dict, str, None] = None,
headers: Optional[dict] = None,
cookies: Union[CookieJar, dict] = None,
files: Optional[Any] = None,
auth: Optional[Any] = None,
allow_redirects: bool = True,
proxies: Optional[dict] = None,
hooks: Optional[Any] = None,
stream: bool = None,
verify: Union[bool, str] = None,
cert: [str, Tuple[str, str]] = None) -> Response:
"""用post方式跳转到url
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数为None时使用页面对象retry_times属性值
:param interval: 重试间隔为None时使用页面对象retry_interval属性值
:param timeout: 连接超时时间
:param params: url中的参数
:param data: 携带的数据
:param json: 要发送的 JSON 数据会自动设置 Content-Type application/json
:param headers: 请求头
:param cookies: cookies信息
:param files: 要上传的文件可以是一个字典其中键是文件名值是文件对象或文件路径
:param auth: 身份认证信息
:param allow_redirects: 是否允许重定向
:param proxies: 代理信息
:param hooks: 回调方法
:param stream: 是否使用流式传输
:param verify: 是否验证 SSL 证书
:param cert: SSL客户端证书文件的路径(.pem格式)('cert', 'key')元组
:return: 获取到的Response对象
"""
...
def ele(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
index: int = 1,
timeout: float = None) -> Union[ChromiumElement, SessionElement]: ...
timeout: float = None) -> Union[ChromiumElement, SessionElement]:
"""返回第一个符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本节点文本
"""
...
def eles(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> Union[SessionElementsList, ChromiumElementsList]: ...
timeout: float = None) -> Union[ChromiumElementsList, SessionElementsList]:
"""返回页面中所有符合条件的元素、属性或节点文本
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: 元素对象或属性文本组成的列表
"""
...
def s_ele(self,
locator: Union[Tuple[str, str], str] = None,
index: int = 1) -> SessionElement: ...
index: int = 1,
timeout: float = None) -> SessionElement:
"""查找第一个符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象或属性文本
"""
...
def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ...
def s_eles(self, locator: Union[Tuple[str, str], str], timeout: float = None) -> SessionElementsList:
"""查找所有符合条件的元素以SessionElement形式返回d模式处理复杂页面时效率很高
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与页面等待时间一致
:return: SessionElement对象或属性文本组成的列表
"""
...
def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ...
def change_mode(self,
mode: str = None,
go: bool = True,
copy_cookies: bool = True) -> None:
"""切换模式,接收's''d',除此以外的字符串会切换为 d 模式
如copy_cookies为True切换时会把当前模式的cookies复制到目标模式
切换后如果go是True调用相应的get函数使访问的页面同步
:param mode: 模式字符串
:param go: 是否跳转到原模式的url
:param copy_cookies: 是否复制cookies到目标模式
:return: None
"""
...
def cookies_to_session(self, copy_user_agent: bool = True) -> None: ...
def cookies_to_session(self, copy_user_agent: bool = True) -> None:
"""把driver对象的cookies复制到session对象
:param copy_user_agent: 是否复制ua信息
:return: None
"""
...
def cookies_to_browser(self) -> None: ...
def cookies_to_browser(self) -> None:
"""把session对象的cookies复制到浏览器"""
...
def cookies(self,
as_dict: bool = False,
all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
all_info: bool = False) -> CookiesList:
"""返回cookies
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
...
def get_tab(self,
id_or_num: Union[str, WebPageTab, int] = None,
id_or_num: Union[str, MixTab, int] = None,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> Union[WebPageTab, str, None]: ...
as_id: bool = False) -> Union[MixTab, str, None]:
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
:param id_or_num: 要获取的标签页id或序号序号从1开始可传入负数获取倒数第几个不是视觉排列顺序而是激活顺序
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: MixTab对象
"""
...
def get_tabs(self,
title: str = None,
url: str = None,
tab_type: Union[str, list, tuple] = 'page',
as_id: bool = False) -> Union[List[WebPageTab], List[str]]: ...
as_id: bool = False) -> Union[List[MixTab], List[str]]:
"""查找符合条件的tab返回它们组成的列表
:param title: 要匹配title的文本模糊匹配为None则匹配所有
:param url: 要匹配url的文本模糊匹配为None则匹配所有
:param tab_type: tab类型可用列表输入多个 'page', 'iframe' 为None则匹配所有
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
...
def new_tab(self,
url: str = None,
new_window: bool = False,
background: bool = False,
new_context: bool = False) -> WebPageTab: ...
new_context: bool = False) -> MixTab:
"""新建一个标签页
:param url: 新标签页跳转到的网址
:param new_window: 是否在新窗口打开标签页
:param background: 是否不激活新标签页如new_window为True则无效
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
...
def close_driver(self) -> None: ...
def close_driver(self) -> None:
"""关闭driver及浏览器"""
...
def close_session(self) -> None: ...
def close_session(self) -> None:
"""关闭session"""
...
def close(self) -> None: ...
# ----------------重写SessionPage的函数-----------------------
def post(self,
url: str,
data: Union[dict, str, None] = None,
show_errmsg: bool = False,
retry: int | None = None,
interval: float | None = None,
timeout: float | None = ...,
params: dict | None = ...,
json: Union[dict, str, None] = ...,
headers: Union[dict, str, None] = ...,
cookies: Any | None = ...,
files: Any | None = ...,
auth: Any | None = ...,
allow_redirects: bool = ...,
proxies: dict | None = ...,
hooks: Any | None = ...,
stream: Any | None = ...,
verify: Any | None = ...,
cert: Any | None = ...) -> Union[bool, Response]: ...
@property
def latest_tab(self) -> Union[WebPageTab, WebPage]: ...
@property
def set(self) -> WebPageSetter: ...
def close(self) -> None:
"""关闭标签页和Session"""
...
def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame],
timeout: float = None,
timeout: float,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) \
-> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]: ...
raise_err: bool = None) -> Union[
ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: 元素对象或属性文本节点文本
"""
...
def _set_start_options(self,
dr_opt: Union[Driver, bool, None],
se_opt: Union[Session, SessionOptions, bool, None]) -> None: ...
def quit(self, timeout: float = 5, force: bool = True) -> None: ...
def quit(self,
timeout: float = 5,
force: bool = True,
del_data: bool = False) -> None:
"""关闭浏览器和Session
:param timeout: 等待浏览器关闭超时时间
:param force: 关闭超时是否强制终止进程
:param del_data: 是否删除用户文件夹
:return: None
"""
...

View File

@ -2,23 +2,17 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from time import sleep, perf_counter
from ..errors import AlertExistsError
from .._functions.keys import modifierBit, keyDescriptionForString, input_text_or_keys, Keys, keyDefinitions
from .._functions.keys import modifierBit, make_input_data, input_text_or_keys, Keys
from .._functions.web import location_in_viewport
class Actions:
"""用于实现动作链的类"""
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
self.owner = owner
self._dr = owner.driver
self.modifier = 0 # 修饰符Alt=1, Ctrl=2, Meta/Command=4, Shift=8
@ -26,16 +20,13 @@ class Actions:
self.curr_y = 0
self._holding = 'left'
def move_to(self, ele_or_loc, offset_x=0, offset_y=0, duration=.5):
"""鼠标移动到元素中点,或页面上的某个绝对坐标。可设置偏移量
当带偏移量时偏移量相对于元素左上角坐标
:param ele_or_loc: 元素对象绝对坐标或文本定位符坐标为tuple(int, int)形式
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: self
"""
def move_to(self, ele_or_loc, offset_x=None, offset_y=None, duration=.5):
is_loc = False
mid_point = offset_x == offset_y is None
if offset_x is None:
offset_x = 0
if offset_y is None:
offset_y = 0
if isinstance(ele_or_loc, (tuple, list)):
is_loc = True
lx = ele_or_loc[0] + offset_x
@ -43,7 +34,7 @@ class Actions:
elif isinstance(ele_or_loc, str) or ele_or_loc._type == 'ChromiumElement':
ele_or_loc = self.owner(ele_or_loc)
self.owner.scroll.to_see(ele_or_loc)
x, y = ele_or_loc.rect.location if offset_x or offset_y else ele_or_loc.rect.midpoint
x, y = ele_or_loc.rect.midpoint if mid_point else ele_or_loc.rect.location
lx = x + offset_x
ly = y + offset_y
else:
@ -51,16 +42,15 @@ class Actions:
if not location_in_viewport(self.owner, lx, ly):
# 把坐标滚动到页面中间
clientWidth = self.owner.run_js('return document.body.clientWidth;')
clientHeight = self.owner.run_js('return document.body.clientHeight;')
clientWidth = self.owner._run_js('return document.body.clientWidth;')
clientHeight = self.owner._run_js('return document.body.clientHeight;')
self.owner.scroll.to_location(lx - clientWidth // 2, ly - clientHeight // 2)
# 这样设计为了应付那些不随滚动条滚动的元素
if is_loc:
cx, cy = location_to_client(self.owner, lx, ly)
else:
x, y = ele_or_loc.rect.viewport_location if offset_x or offset_y \
else ele_or_loc.rect.viewport_midpoint
x, y = ele_or_loc.rect.viewport_midpoint if mid_point else ele_or_loc.rect.viewport_location
cx = x + offset_x
cy = y + offset_y
@ -70,12 +60,6 @@ class Actions:
return self
def move(self, offset_x=0, offset_y=0, duration=.5):
"""鼠标相对当前位置移动若干位置
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: self
"""
duration = .02 if duration < .02 else duration
num = int(duration * 50)
@ -95,99 +79,49 @@ class Actions:
return self
def click(self, on_ele=None):
"""点击鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'left').wait(.05)._release('left')
def click(self, on_ele=None, times=1):
self._hold(on_ele, 'left', times).wait(.05)._release('left')
return self
def r_click(self, on_ele=None):
"""点击鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'right').wait(.05)._release('right')
def r_click(self, on_ele=None, times=1):
self._hold(on_ele, 'right', times).wait(.05)._release('right')
return self
def m_click(self, on_ele=None):
"""点击鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'middle').wait(.05)._release('middle')
return self
def db_click(self, on_ele=None):
"""双击鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'left', 2).wait(.05)._release('left')
def m_click(self, on_ele=None, times=1):
self._hold(on_ele, 'middle', times).wait(.05)._release('middle')
return self
def hold(self, on_ele=None):
"""按住鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'left')
return self
def release(self, on_ele=None):
"""释放鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
if on_ele:
self.move_to(on_ele, duration=.2)
self._release('left')
return self
def r_hold(self, on_ele=None):
"""按住鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'right')
return self
def r_release(self, on_ele=None):
"""释放鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
if on_ele:
self.move_to(on_ele, duration=.2)
self._release('right')
return self
def m_hold(self, on_ele=None):
"""按住鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
self._hold(on_ele, 'middle')
return self
def m_release(self, on_ele=None):
"""释放鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: self
"""
if on_ele:
self.move_to(on_ele, duration=.2)
self._release('middle')
return self
def _hold(self, on_ele=None, button='left', count=1):
"""按下鼠标按键
:param on_ele: ChromiumElement元素或文本定位符
:param button: 要按下的按键
:param count: 点击次数
:return: self
"""
if on_ele:
self.move_to(on_ele, duration=.2)
self._dr.run('Input.dispatchMouseEvent', type='mousePressed', button=button, clickCount=count,
@ -196,22 +130,12 @@ class Actions:
return self
def _release(self, button):
"""释放鼠标按键
:param button: 要释放的按键
:return: self
"""
self._dr.run('Input.dispatchMouseEvent', type='mouseReleased', button=button, clickCount=1,
x=self.curr_x, y=self.curr_y, modifiers=self.modifier)
self._holding = 'left'
return self
def scroll(self, delta_y=0, delta_x=0, on_ele=None):
"""滚动鼠标滚轮,可先移动到元素上
:param delta_y: 滚轮变化值y
:param delta_x: 滚轮变化值x
:param on_ele: ChromiumElement元素
:return: self
"""
if on_ele:
self.move_to(on_ele, duration=.2)
self._dr.run('Input.dispatchMouseEvent', type='mouseWheel', x=self.curr_x, y=self.curr_y,
@ -219,126 +143,75 @@ class Actions:
return self
def up(self, pixel):
"""鼠标向上移动若干像素
:param pixel: 鼠标移动的像素值
:return: self
"""
return self.move(0, -pixel)
def down(self, pixel):
"""鼠标向下移动若干像素
:param pixel: 鼠标移动的像素值
:return: self
"""
return self.move(0, pixel)
def left(self, pixel):
"""鼠标向左移动若干像素
:param pixel: 鼠标移动的像素值
:return: self
"""
return self.move(-pixel, 0)
def right(self, pixel):
"""鼠标向右移动若干像素
:param pixel: 鼠标移动的像素值
:return: self
"""
return self.move(pixel, 0)
def key_down(self, key):
"""按下键盘上的按键,
:param key: 使用Keys获取的按键'DEL'形式按键名称
:return: self
"""
key = getattr(Keys, key.upper(), key)
if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量
self.modifier |= modifierBit.get(key, 0)
return self
data = self._get_key_data(key, 'keyDown')
data['_ignore'] = AlertExistsError
self.owner.run_cdp('Input.dispatchKeyEvent', **data)
data = make_input_data(self.modifier, key, False)
if not data:
raise ValueError(f'没有这个按键:{key}')
self.owner._run_cdp('Input.dispatchKeyEvent', **data)
return self
def key_up(self, key):
"""提起键盘上的按键
:param key: 按键特殊字符见Keys
:return: self
"""
key = getattr(Keys, key.upper(), key)
if key in ('\ue009', '\ue008', '\ue00a', '\ue03d'): # 如果上修饰符,添加到变量
self.modifier ^= modifierBit.get(key, 0)
return self
data = self._get_key_data(key, 'keyUp')
data['_ignore'] = AlertExistsError
self.owner.run_cdp('Input.dispatchKeyEvent', **data)
data = make_input_data(self.modifier, key, True)
if not data:
raise ValueError(f'没有这个按键:{key}')
self.owner._run_cdp('Input.dispatchKeyEvent', **data)
return self
def type(self, keys):
"""用模拟键盘按键方式输入文本,可输入字符串,也可输入组合键
:param keys: 要按下的按键特殊字符和多个文本可用list或tuple传入
:return: self
"""
def type(self, keys, interval=0):
modifiers = []
if not isinstance(keys, (str, tuple, list)):
keys = str(keys)
for i in keys:
for character in i:
if character in keyDefinitions:
self.key_down(character)
if character in ('\ue009', '\ue008', '\ue00a', '\ue03d'):
modifiers.append(character)
else:
self.key_up(character)
if character in ('\ue009', '\ue008', '\ue00a', '\ue03d'):
self.modifier |= modifierBit.get(character, 0)
modifiers.append(character)
data = make_input_data(self.modifier, character, False)
if data:
self.owner._run_cdp('Input.dispatchKeyEvent', **data)
if character not in ('\ue009', '\ue008', '\ue00a', '\ue03d'):
data['type'] = 'keyUp'
self.owner._run_cdp('Input.dispatchKeyEvent', **data)
else:
self.owner.run_cdp('Input.dispatchKeyEvent', type='char', text=character)
self.owner._run_cdp('Input.dispatchKeyEvent', type='char', text=character)
sleep(interval)
for m in modifiers:
self.key_up(m)
return self
def input(self, text):
"""输入文本也可输入组合键组合键用tuple形式输入
:param text: 文本值或按键组合
:return: self
"""
input_text_or_keys(self.owner, text)
return self
def wait(self, second, scope=None):
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: None
"""
self.owner.wait(second=second, scope=scope)
return self
def _get_key_data(self, key, action):
"""获取用于发送的按键信息
:param key: 按键
:param action: 'keyDown' 'keyUp'
:return: 按键信息
"""
description = keyDescriptionForString(self.modifier, key)
text = description['text']
if action != 'keyUp':
action = 'keyDown' if text else 'rawKeyDown'
return {'type': action,
'modifiers': self.modifier,
'windowsVirtualKeyCode': description['keyCode'],
'code': description['code'],
'key': description['key'],
'text': text,
'autoRepeat': False,
'unmodifiedText': text,
'location': description['location'],
'isKeypad': description['location'] == 3}
def location_to_client(page, lx, ly):
"""绝对坐标转换为视口坐标"""
scroll_x = page.run_js('return document.documentElement.scrollLeft;')
scroll_y = page.run_js('return document.documentElement.scrollTop;')
scroll_x = page._run_js('return document.documentElement.scrollLeft;')
scroll_y = page._run_js('return document.documentElement.scrollTop;')
return lx - scroll_x, ly - scroll_y

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, Tuple, Any, Literal
@ -11,19 +10,19 @@ from .._base.driver import Driver
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
KEYS = Literal['NULL', 'CANCEL', 'HELP', 'BACKSPACE', 'BACK_SPACE', 'meta',
'TAB', 'CLEAR', 'RETURN', 'ENTER', 'SHIFT', 'LEFT_SHIFT', 'CONTROL', 'command ',
'CTRL', 'LEFT_CONTROL', 'ALT', 'LEFT_ALT', 'PAUSE', 'ESCAPE', 'SPACE',
'PAGE_UP', 'PAGE_DOWN', 'END', 'HOME', 'LEFT', 'ARROW_LEFT', 'UP',
'ARROW_UP', 'RIGHT', 'ARROW_RIGHT', 'DOWN', 'ARROW_DOWN', 'INSERT',
KEYS = Literal['NULL', 'CANCEL', 'HELP', 'BACKSPACE', 'meta',
'TAB', 'CLEAR', 'RETURN', 'ENTER', 'SHIFT', 'CONTROL', 'command ',
'CTRL', 'ALT', 'PAUSE', 'ESCAPE', 'SPACE',
'PAGE_UP', 'PAGE_DOWN', 'END', 'HOME', 'LEFT', 'UP',
'RIGHT', 'DOWN', 'INSERT',
'DELETE', 'DEL', 'SEMICOLON', 'EQUALS', 'NUMPAD0', 'NUMPAD1', 'NUMPAD2',
'NUMPAD3', 'NUMPAD4', 'NUMPAD5', 'NUMPAD6', 'NUMPAD7', 'NUMPAD8', 'NUMPAD9',
'MULTIPLY', 'ADD', 'SUBTRACT', 'DECIMAL', 'DIVIDE', 'F1', 'F2',
'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'META', 'COMMAND ',
'null', 'cancel', 'help', 'backspace', 'back_space', 'tab', 'clear', 'return', 'enter',
'shift', 'left_shift', 'control', 'ctrl', 'left_control', 'alt', 'left_alt', 'pause',
'escape', 'space', 'page_up', 'page_down', 'end', 'home', 'left', 'arrow_left', 'up',
'arrow_up', 'right', 'arrow_right', 'down', 'arrow_down', 'insert', 'delete', 'del',
'null', 'cancel', 'help', 'backspace', 'tab', 'clear', 'return', 'enter',
'shift', 'control', 'ctrl', 'alt', 'pause',
'escape', 'space', 'page_up', 'page_down', 'end', 'home', 'left', 'up',
'right', 'down', 'insert', 'delete', 'del',
'semicolon', 'equals', 'numpad0', 'numpad1', 'numpad2', 'numpad3', 'numpad4', 'numpad5',
'numpad6', 'numpad7', 'numpad8', 'numpad9', 'multiply', 'add', 'subtract', 'decimal',
'divide', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
@ -43,67 +42,210 @@ KEYS = Literal['NULL', 'CANCEL', 'HELP', 'BACKSPACE', 'BACK_SPACE', 'meta',
class Actions:
"""用于实现动作链的类"""
owner: ChromiumBase = ...
_dr: Driver = ...
modifier: int = ...
curr_x: float = ...
curr_y: float = ...
_holding: str = ...
def __init__(self, owner: ChromiumBase):
self.owner: ChromiumBase = ...
self._dr: Driver = ...
self.modifier: int = ...
self.curr_x: int = ...
self.curr_y: int = ...
self._holding: str = ...
"""
:param owner: ChromiumBase对象
"""
...
def move_to(self, ele_or_loc: Union[ChromiumElement, Tuple[float, float], str],
offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions: ...
offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions:
"""鼠标移动到元素中点,或页面上的某个绝对坐标。可设置偏移量
当带偏移量时偏移量相对于元素左上角坐标
:param ele_or_loc: 元素对象绝对坐标或文本定位符坐标为tuple(int, int)形式
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: 动作链对象本身
"""
...
def move(self, offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions: ...
def move(self, offset_x: float = 0, offset_y: float = 0, duration: float = .5) -> Actions:
"""鼠标相对当前位置移动若干位置
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:param duration: 拖动用时传入0即瞬间到达
:return: 动作链对象本身
"""
...
def click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def click(self, on_ele: Union[ChromiumElement, str] = None, times: int = 1) -> Actions:
"""点击鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:param times: 点击次数
:return: 动作链对象本身
"""
...
def r_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_click(self, on_ele: Union[ChromiumElement, str] = None, times: int = 1) -> Actions:
"""点击鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:param times: 点击次数
:return: 动作链对象本身
"""
...
def m_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_click(self, on_ele: Union[ChromiumElement, str] = None, times: int = 1) -> Actions:
"""点击鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:param times: 点击次数
:return: 动作链对象本身
"""
...
def db_click(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""按住鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""释放鼠标左键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""按住鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def r_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""释放鼠标右键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def r_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""按住鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def m_hold(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""释放鼠标中键,可先移动到元素上
:param on_ele: ChromiumElement元素或文本定位符
:return: 动作链对象本身
"""
...
def m_release(self, on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
def _hold(self,
on_ele: Union[ChromiumElement, str] = None,
button: str = 'left',
count: int = 1) -> Actions:
"""按下鼠标按键
:param on_ele: ChromiumElement元素或文本定位符
:param button: 要按下的按键
:param count: 点击次数
:return: 动作链对象本身
"""
...
def _hold(self, on_ele: Union[ChromiumElement, str] = None, button: str = 'left',
count: int = 1) -> Actions: ...
def _release(self, button: str) -> Actions: ...
def _release(self, button: str) -> Actions:
"""释放鼠标按键
:param button: 要释放的按键
:return: 动作链对象本身
"""
...
def scroll(self, delta_y: int = 0, delta_x: int = 0,
on_ele: Union[ChromiumElement, str] = None) -> Actions: ...
on_ele: Union[ChromiumElement, str] = None) -> Actions:
"""滚动鼠标滚轮,可先移动到元素上
:param delta_y: 滚轮变化值y
:param delta_x: 滚轮变化值x
:param on_ele: ChromiumElement元素
:return: 动作链对象本身
"""
...
def up(self, pixel: int) -> Actions: ...
def up(self, pixel: int) -> Actions:
"""鼠标向上移动若干像素
:param pixel: 鼠标移动的像素值
:return: 动作链对象本身
"""
...
def down(self, pixel: int) -> Actions: ...
def down(self, pixel: int) -> Actions:
"""鼠标向下移动若干像素
:param pixel: 鼠标移动的像素值
:return: 动作链对象本身
"""
...
def left(self, pixel: int) -> Actions: ...
def left(self, pixel: int) -> Actions:
"""鼠标向左移动若干像素
:param pixel: 鼠标移动的像素值
:return: 动作链对象本身
"""
...
def right(self, pixel: int) -> Actions: ...
def right(self, pixel: int) -> Actions:
"""鼠标向右移动若干像素
:param pixel: 鼠标移动的像素值
:return: 动作链对象本身
"""
...
def key_down(self, key: Union[KEYS, str]) -> Actions: ...
def key_down(self, key: Union[KEYS, str]) -> Actions:
"""按下键盘上的按键,
:param key: 使用Keys获取的按键 'DEL' 形式按键名称
:return: 动作链对象本身
"""
...
def key_up(self, key: Union[KEYS, str]) -> Actions: ...
def key_up(self, key: Union[KEYS, str]) -> Actions:
"""提起键盘上的按键
:param key: 按键特殊字符见Keys
:return: 动作链对象本身
"""
...
def type(self, keys: Union[KEYS, str, list, tuple]) -> Actions: ...
def type(self,
keys: Union[KEYS, str, list, tuple],
interval: float = 0) -> Actions:
"""用模拟键盘按键方式输入文本,可输入字符串,也可输入组合键
:param keys: 要按下的按键特殊字符和多个文本可用list或tuple传入
:param interval: 每个字符之间间隔时间
:return: 动作链对象本身
"""
...
def input(self, text: Any) -> Actions: ...
def input(self, text: Any) -> Actions:
"""输入文本也可输入组合键组合键用tuple形式输入
:param text: 文本值或按键组合
:return: 动作链对象本身
"""
...
def wait(self, second: float, scope: float = None) -> Actions: ...
def _get_key_data(self, key: str, action: str) -> dict: ...
def wait(self, second: float, scope: float = None) -> Actions:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: None
"""
...
def location_to_client(page, lx: int, ly: int) -> tuple: ...
def location_to_client(page: ChromiumBase, lx: int, ly: int) -> tuple:
"""绝对坐标转换为视口坐标
:param page: 页面对象
:param lx: 绝对坐标x
:param ly: 绝对坐标y
:return: 视口坐标元组
"""
...

View File

@ -2,40 +2,26 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from time import perf_counter, sleep
from .waiter import wait_mission
from .._functions.settings import Settings
from .._functions.web import offset_scroll
from .._units.downloader import TabDownloadSettings
from ..errors import CanNotClickError, CDPError, NoRectError, AlertExistsError
class Clicker(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
def __call__(self, by_js=False, timeout=1.5, wait_stop=True):
"""点击元素
如果遇到遮挡可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
return self.left(by_js, timeout, wait_stop)
def left(self, by_js=False, timeout=1.5, wait_stop=True):
"""点击元素可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
if self._ele.tag == 'option':
if not self._ele.states.is_selected:
self._ele.parent('t:select').select.by_option(self._ele)
@ -43,11 +29,12 @@ class Clicker(object):
select = self._ele.parent('t:select')
if select.select.is_multi:
self._ele.parent('t:select').select.cancel_by_option(self._ele)
return
return self._ele
if not by_js: # 模拟点击
can_click = False
timeout = self._ele.owner.timeout if timeout is None else timeout
if timeout is None:
timeout = self._ele.timeout
rect = None
if timeout == 0:
try:
@ -87,8 +74,8 @@ class Clicker(object):
x = rect[1][0] - (rect[1][0] - rect[0][0]) / 2
y = rect[0][0] + 3
try:
r = self._ele.owner.run_cdp('DOM.getNodeForLocation', x=int(x), y=int(y),
includeUserAgentShadowDOM=True, ignorePointerEventsNone=True)
r = self._ele.owner._run_cdp('DOM.getNodeForLocation', x=int(x), y=int(y),
includeUserAgentShadowDOM=True, ignorePointerEventsNone=True)
if r['backendNodeId'] != self._ele._backend_id:
vx, vy = self._ele.rect.viewport_midpoint
else:
@ -98,111 +85,124 @@ class Clicker(object):
vx, vy = self._ele.rect.viewport_midpoint
self._click(vx, vy)
return True
return self._ele
if by_js is not False:
self._ele.run_js('this.click();')
return True
self._ele._run_js('this.click();')
return self._ele
if Settings.raise_when_click_failed:
raise CanNotClickError
return False
def right(self):
"""右键单击"""
self._ele.owner.scroll.to_see(self._ele)
x, y = self._ele.rect.viewport_click_point
self._click(x, y, 'right')
return self._click(*self._ele.rect.viewport_click_point, button='right')
def middle(self, get_tab=True):
"""中键单击默认返回新出现的tab对象
:param get_tab: 是否返回新tab对象为False则返回None
:return: Tab对象或None
"""
self._ele.owner.scroll.to_see(self._ele)
x, y = self._ele.rect.viewport_click_point
self._click(x, y, 'middle')
curr_tid = self._ele.tab.browser.tab_ids[0]
self._click(*self._ele.rect.viewport_click_point, button='middle')
if get_tab:
tid = self._ele.page.wait.new_tab()
tid = self._ele.tab.browser.wait.new_tab(curr_tab=curr_tid)
if not tid:
raise RuntimeError('没有出现新标签页。')
return self._ele.page.get_tab(tid)
return self._ele.tab.browser._get_tab(tid, mix=self._ele.tab._type == 'MixTab')
def at(self, offset_x=None, offset_y=None, button='left', count=1):
"""带偏移量点击本元素相对于左上角坐标。不传入x或y值时点击元素中间点
:param offset_x: 相对元素左上角坐标的x轴偏移量
:param offset_y: 相对元素左上角坐标的y轴偏移量
:param button: 点击哪个键可选 left, middle, right, back, forward
:param count: 点击次数
:return: None
"""
self._ele.owner.scroll.to_see(self._ele)
if offset_x is None and offset_y is None:
w, h = self._ele.rect.size
offset_x = w // 2
offset_y = h // 2
x, y = offset_scroll(self._ele, offset_x, offset_y)
self._click(x, y, button, count)
return self._click(*offset_scroll(self._ele, offset_x, offset_y), button=button, count=count)
def multi(self, times=2):
"""多次点击
:param times: 默认双击
:return: None
"""
self.at(count=times)
return self.at(count=times)
def to_download(self, save_path=None, rename=None, suffix=None, new_tab=None, by_js=False, timeout=None):
if not self._ele.tab._browser._dl_mgr._running:
self._ele.tab._browser.set.download_path('.')
when_file_exists = None
tmp_path = None
if self._ele.tab._type.endswith('Page'):
obj = browser = self._ele.owner._browser
tid = 'browser'
elif new_tab:
obj = browser = self._ele.owner._browser
tid = 'browser'
t_settings = TabDownloadSettings(self._ele.owner.tab_id)
b_settings = TabDownloadSettings('browser')
when_file_exists = b_settings.when_file_exists
b_settings.when_file_exists = t_settings.when_file_exists
b_settings.rename = t_settings.rename
b_settings.suffix = t_settings.suffix
t_settings.rename = None
t_settings.suffix = None
if not save_path and b_settings.path != t_settings.path:
tmp_path = b_settings.path
b_settings.path = t_settings.path
else:
obj = self._ele.owner._tab
browser = obj.browser
browser._dl_mgr._waiting_tab.add(self._ele.owner.tab_id)
tid = obj.tab_id
def to_download(self, save_path=None, rename=None, suffix=None, new_tab=False, by_js=False, timeout=None):
"""点击触发下载
:param save_path: 保存路径为None保存在原来设置的如未设置保存到当前路径
:param rename: 重命名文件名
:param suffix: 指定文件后缀
:param new_tab: 该下载是否在新tab中触发
:param by_js: 是否用js方式点击逻辑与click()一致
:param timeout: 等待下载触发的超时时间为None则使用页面对象设置
:return: DownloadMission对象
"""
if save_path:
self._ele.owner.tab.set.download_path(save_path)
elif not self._ele.page._browser._dl_mgr._running:
self._ele.page.set.download_path('.')
tmp_path = obj.download_path
TabDownloadSettings(tid).path = str(Path(save_path).absolute())
if rename or suffix:
self._ele.owner.tab.set.download_file_name(rename, suffix)
tab = self._ele.page if new_tab else self._ele.owner
obj.set.download_file_name(rename, suffix)
if timeout is None:
timeout = obj.timeout
browser._dl_mgr.set_flag(tid, True)
self.left(by_js=by_js)
return tab.wait.download_begin(timeout=timeout)
m = wait_mission(browser, tid, timeout)
if tmp_path:
TabDownloadSettings(tid).path = tmp_path
if when_file_exists:
browser.set.when_download_file_exists(when_file_exists)
if m and new_tab:
self._ele.owner.browser._dl_mgr._tab_missions.setdefault(self._ele.owner.tab_id, set()).add(m)
m.from_tab = self._ele.owner.tab_id
browser._dl_mgr._waiting_tab.discard(self._ele.owner.tab_id)
return m
def to_upload(self, file_paths, by_js=False):
"""触发上传文件选择框并自动填入指定路径
:param file_paths: 文件路径如果上传框支持多文件可传入列表或字符串字符串时多个文件用回车分隔
:param by_js: 是否用js方式点击逻辑与click()一致
:return: None
"""
self._ele.owner.set.upload_files(file_paths)
self.left(by_js=by_js)
self._ele.owner.wait.upload_paths_inputted()
def for_new_tab(self, by_js=False):
"""点击后等待新tab出现并返回其对象
:param by_js: 是否使用js点击逻辑与click()一致
:return: 新标签页对象如果没有等到新标签页出现则抛出异常
"""
def for_new_tab(self, by_js=False, timeout=3):
curr_tid = self._ele.tab.browser._newest_tab_id
self.left(by_js=by_js)
tid = self._ele.page.wait.new_tab()
tid = self._ele.tab.browser.wait.new_tab(timeout=timeout, curr_tab=curr_tid)
if not tid:
raise RuntimeError('没有出现新标签页。')
return self._ele.page.get_tab(tid)
return self._ele.tab.browser._get_tab(tid, mix=self._ele.tab._type == 'MixTab')
def _click(self, client_x, client_y, button='left', count=1):
"""实施点击
:param client_x: 视口中的x坐标
:param client_y: 视口中的y坐标
:param button: 'left' 'right' 'middle' 'back' 'forward'
:param count: 点击次数
:return: None
"""
self._ele.owner.run_cdp('Input.dispatchMouseEvent', type='mousePressed', x=client_x,
y=client_y, button=button, clickCount=count, _ignore=AlertExistsError)
self._ele.owner.run_cdp('Input.dispatchMouseEvent', type='mouseReleased', x=client_x,
y=client_y, button=button, _ignore=AlertExistsError)
def for_url_change(self, text=None, exclude=False, by_js=False, timeout=None):
if text is None:
exclude = True
text = self._ele.tab.url
self.left(by_js=by_js)
return True if self._ele.tab.wait.url_change(text=text, exclude=exclude, timeout=timeout) else False
def for_title_change(self, text=None, exclude=False, by_js=False, timeout=None):
if text is None:
exclude = True
text = self._ele.tab.title
self.left(by_js=by_js)
return True if self._ele.tab.wait.title_change(text=text, exclude=exclude, timeout=timeout) else False
def _click(self, view_x, view_y, button='left', count=1):
self._ele.owner._run_cdp('Input.dispatchMouseEvent', type='mousePressed', x=view_x,
y=view_y, button=button, clickCount=count, _ignore=AlertExistsError)
self._ele.owner._run_cdp('Input.dispatchMouseEvent', type='mouseReleased', x=view_x,
y=view_y, button=button, _ignore=AlertExistsError)
return self._ele

View File

@ -2,47 +2,145 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union
from .downloader import DownloadMission
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_tab import WebPageTab, ChromiumTab
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
class Clicker(object):
_ele: ChromiumElement = ...
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
"""
:param ele: ChromiumElement
"""
...
def __call__(self, by_js: Union[bool, str, None] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ...
def __call__(self, by_js: Union[bool, str, None] = False,
timeout: float = 1.5, wait_stop: bool = True) -> Union[ChromiumElement, False]:
"""点击元素
如果遇到遮挡可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
...
def left(self, by_js: Union[bool, str, None] = False, timeout: float = 1.5, wait_stop: bool = True) -> bool: ...
def left(self, by_js: Union[bool, str, None] = False,
timeout: float = 1.5, wait_stop: bool = True) -> Union[ChromiumElement, False]:
"""点击元素可选择是否用js点击
:param by_js: 是否用js点击为None时先用模拟点击遇到遮挡改用js为True时直接用js点击为False时只用模拟点击
:param timeout: 模拟点击的超时时间等待元素可见可用进入视口
:param wait_stop: 是否等待元素运动结束再执行点击
:return: 是否点击成功
"""
...
def right(self) -> None: ...
def right(self) -> ChromiumElement:
"""右键单击"""
...
def middle(self, get_tab: bool = True) -> Union[ChromiumTab, WebPageTab, None]: ...
def middle(self, get_tab: bool = True) -> Union[ChromiumTab, MixTab, None]:
"""中键单击默认返回新出现的tab对象
:param get_tab: 是否返回新tab对象为False则返回None
:return: Tab对象或None
"""
...
def at(self,
offset_x: float = None,
offset_y: float = None,
button: str = 'left',
count: int = 1) -> None: ...
count: int = 1) -> ChromiumElement:
"""带偏移量点击本元素相对于左上角坐标。不传入x或y值时点击元素中间点
:param offset_x: 相对元素左上角坐标的x轴偏移量
:param offset_y: 相对元素左上角坐标的y轴偏移量
:param button: 点击哪个键可选 left, middle, right, back, forward
:param count: 点击次数
:return: None
"""
...
def multi(self, times: int = 2) -> None: ...
def multi(self, times: int = 2) -> ChromiumElement:
"""多次点击
:param times: 默认双击
:return: None
"""
...
def to_download(self,
save_path: Union[str, Path] = None,
rename: str = None,
suffix: str = None,
new_tab: bool = False,
new_tab: bool = None,
by_js: bool = False,
timeout: float = None) -> DownloadMission: ...
timeout: float = None) -> DownloadMission:
"""点击触发下载
:param save_path: 保存路径为None保存在原来设置的如未设置保存到当前路径
:param rename: 重命名文件名
:param suffix: 指定文件后缀
:param new_tab: 下载任务是否从新标签页触发为None会自动获取如获取不到设为True
:param by_js: 是否用js方式点击逻辑与click()一致
:param timeout: 等待下载触发的超时时间为None则使用页面对象设置
:return: DownloadMission对象
"""
...
def to_upload(self, file_paths: Union[str, Path, list, tuple], by_js: bool = False) -> None: ...
def to_upload(self, file_paths: Union[str, Path, list, tuple], by_js: bool = False) -> None:
"""触发上传文件选择框并自动填入指定路径
:param file_paths: 文件路径如果上传框支持多文件可传入列表或字符串字符串时多个文件用回车分隔
:param by_js: 是否用js方式点击逻辑与click()一致
:return: None
"""
...
def for_new_tab(self, by_js: bool = False) -> Union[ChromiumTab, WebPageTab]: ...
def for_new_tab(self, by_js: bool = False, timeout: float = 3) -> Union[ChromiumTab, MixTab]:
"""点击后等待新tab出现并返回其对象
:param by_js: 是否使用js点击逻辑与click()一致
:param timeout: 等待超时时间
:return: 新标签页对象如果没有等到新标签页出现则抛出异常
"""
...
def _click(self, client_x: float, client_y: float, button: str = 'left', count: int = 1) -> None: ...
def for_url_change(self, text: str = None, exclude: bool = False,
by_js: bool = False, timeout: float = None) -> bool:
"""点击并等待tab的url变成包含或不包含指定文本
:param text: 用于识别的文本为None等待当前url变化
:param exclude: 是否排除为True时当url不包含text指定文本时返回Truetext为None时自动设为True
:param by_js: 是否用js点击
:param timeout: 超时时间为None使用页面设置
:return: 是否等待成功
"""
...
def for_title_change(self, text: str = None, exclude: bool = False,
by_js: bool = False, timeout: float = None) -> bool:
"""点击并等待tab的title变成包含或不包含指定文本
:param text: 用于识别的文本为None等待当前title变化
:param exclude: 是否排除为True时当title不包含text指定文本时返回Truetext为None时自动设为True
:param by_js: 是否用js点击
:param timeout: 超时时间为None使用页面设置
:return: 是否等待成功
"""
...
def _click(self,
view_x: float,
view_y: float,
button: str = 'left',
count: int = 1) -> ChromiumElement:
"""实施点击
:param view_x: 视口x坐标
:param view_y: 视口y坐标
:param button: 'left' 'right' 'middle' 'back' 'forward'
:param count: 点击次数
:return: None
"""
...

View File

@ -0,0 +1,93 @@
# -*- coding:utf-8 -*-
from queue import Queue
from time import perf_counter, sleep
class Console(object):
def __init__(self, owner):
self._owner = owner
self._caught = None
self._not_enabled = True
self.listening = False
@property
def messages(self):
if self._caught is None:
return []
lst = []
while not self._caught.empty():
lst.append(self._caught.get_nowait())
return lst
def start(self):
self._caught = Queue(maxsize=0)
self._owner._driver.set_callback("Console.messageAdded", self._console)
if self._not_enabled:
self._owner._run_cdp("Console.enable")
self._not_enabled = False
self.listening = True
def stop(self):
if self.listening:
self._owner._driver.set_callback('Console.messageAdded', None)
self.listening = False
def clear(self):
self._caught = Queue(maxsize=0)
def wait(self, timeout=None):
if not self.listening:
raise RuntimeError('监听未启动。')
if timeout is None:
while self._owner._driver.is_running and self.listening and not self._caught.qsize():
sleep(.03)
return self._caught.get_nowait() if self._caught.qsize() else None
else:
end = perf_counter() + timeout
while self._owner._driver.is_running and self.listening and perf_counter() < end:
if self._caught.qsize():
return self._caught.get_nowait()
sleep(0.05)
return False
def steps(self, timeout=None):
if timeout is None:
while self._owner._driver.is_running and self.listening:
if self._caught.qsize():
yield self._caught.get_nowait()
sleep(0.05)
else:
end = perf_counter() + timeout
while self._owner._driver.is_running and self.listening and perf_counter() < end:
if self._caught.qsize():
yield self._caught.get_nowait()
end = perf_counter() + timeout
sleep(0.05)
return False
def _console(self, **kwargs):
self._caught.put(ConsoleData(kwargs['message']))
class ConsoleData(object):
__slots__ = ('_data', 'source', 'level', 'text', 'url', 'line', 'column')
def __init__(self, data):
self._data = data
def __getattr__(self, item):
return self._data.get(item, None)
def __repr__(self):
return (f'<ConsoleData source={self.source} level={self.level} text={self.text} url={self.url} '
f'line={self.line} column={self.column} >')
@property
def body(self):
from json import loads
try:
return loads(self.text)
except:
return self._raw_body

View File

@ -0,0 +1,62 @@
# -*- coding:utf-8 -*-
from queue import Queue
from typing import Optional, Iterable, List, Union, Any
from .._pages.chromium_base import ChromiumBase
class Console(object):
listening: bool = ...
_owner: ChromiumBase = ...
_caught: Optional[Queue] = ...
_not_enabled: bool = ...
def __init__(self, owner: ChromiumBase) -> None:
"""
:param owner: 页面对象
"""
...
@property
def messages(self) -> List[ConsoleData]:
"""以list方式返回获取到的信息返回后会清空列表"""
...
def start(self) -> None:
"""开启console监听"""
...
def stop(self) -> None:
"""停止监听,清空已监听到的列表"""
...
def clear(self) -> None:
"""清空已获取但未返回的信息"""
...
def wait(self, timeout: float = None) -> Union[ConsoleData, False]:
"""等待一条信息
:param timeout: 超时时间
:return: ConsoleData对象
"""
...
def steps(self, timeout: Optional[float] = None) -> Iterable[ConsoleData]:
"""每监听到一个信息就返回用于for循环
:param timeout: 等待一个信息的超时时间为None无限等待
:return: None
"""
...
def _console(self, **kwargs) -> None: ...
class ConsoleData(object):
__slots__ = ('_data', 'source', 'level', 'text', 'url', 'line', 'column')
def __init__(self, data: dict) -> None: ...
def __getattr__(self, item: str) -> str: ...
@property
def body(self) -> Any: ...

View File

@ -2,34 +2,27 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from .._functions.web import set_browser_cookies, set_session_cookies
from .._functions.cookies import set_tab_cookies, set_session_cookies, set_browser_cookies
class CookiesSetter(object):
class BrowserCookiesSetter(object):
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
self._owner = owner
def __call__(self, cookies):
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
set_browser_cookies(self._owner, cookies)
def clear(self):
self._owner._run_cdp('Storage.clearCookies')
class CookiesSetter(BrowserCookiesSetter):
def __call__(self, cookies):
set_tab_cookies(self._owner, cookies)
def remove(self, name, url=None, domain=None, path=None):
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选
:param domain: cookie的domain字段可选
:param path: cookie的path字段可选
:return: None
"""
d = {'name': name}
if url is not None:
d['url'] = url
@ -37,13 +30,14 @@ class CookiesSetter(object):
d['domain'] = domain
if not url and not domain:
d['url'] = self._owner.url
if not d['url'].startswith('http'):
raise ValueError('需设置domain或url值。如设置url值需以http开头。')
if path is not None:
d['path'] = path
self._owner.run_cdp('Network.deleteCookies', **d)
self._owner._run_cdp('Network.deleteCookies', **d)
def clear(self):
"""清除cookies"""
self._owner.run_cdp('Network.clearBrowserCookies')
self._owner._run_cdp('Network.clearBrowserCookies')
class SessionCookiesSetter(object):
@ -51,54 +45,54 @@ class SessionCookiesSetter(object):
self._owner = owner
def __call__(self, cookies):
"""设置多个cookie注意不要传入单个
:param cookies: cookies信息
:return: None
"""
set_session_cookies(self._owner.session, cookies)
def remove(self, name):
"""删除一个cookie
:param name: cookie的name字段
:return: None
"""
self._owner.session.cookies.set(name, None)
def clear(self):
"""清除cookies"""
self._owner.session.cookies.clear()
class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter):
class WebPageCookiesSetter(CookiesSetter):
def __call__(self, cookies):
"""设置多个cookie注意不要传入单个
:param cookies: cookies信息
:return: None
"""
if self._owner.mode == 'd' and self._owner._has_driver:
super().__call__(cookies)
elif self._owner.mode == 's' and self._owner._has_session:
super(CookiesSetter, self).__call__(cookies)
set_session_cookies(self._owner.session, cookies)
def remove(self, name, url=None, domain=None, path=None):
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选d模式时才有效
:param domain: cookie的domain字段可选d模式时才有效
:param path: cookie的path字段可选d模式时才有效
:return: None
"""
if self._owner.mode == 'd' and self._owner._has_driver:
super().remove(name, url, domain, path)
elif self._owner.mode == 's' and self._owner._has_session:
if url or domain or path:
raise AttributeError('url、domain、path参数只有d模式下有效。')
super(CookiesSetter, self).remove(name)
self._owner.session.cookies.set(name, None)
def clear(self):
"""清除cookies"""
if self._owner.mode == 'd' and self._owner._has_driver:
super().clear()
elif self._owner.mode == 's' and self._owner._has_session:
super(CookiesSetter, self).clear()
self._owner.session.cookies.clear()
class MixTabCookiesSetter(CookiesSetter):
def __call__(self, cookies):
if self._owner._d_mode and self._owner._driver.is_running:
super().__call__(cookies)
elif not self._owner._d_mode and self._owner._session:
set_session_cookies(self._owner.session, cookies)
def remove(self, name, url=None, domain=None, path=None):
if self._owner._d_mode and self._owner._driver.is_running:
super().remove(name, url, domain, path)
elif not self._owner._d_mode and self._owner._session:
if url or domain or path:
raise AttributeError('url、domain、path参数只有d模式下有效。')
self._owner.session.cookies.set(name, None)
def clear(self):
if self._owner._d_mode and self._owner._driver.is_running:
super().clear()
elif not self._owner._d_mode and self._owner._session:
self._owner.session.cookies.clear()

View File

@ -2,49 +2,167 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from http.cookiejar import Cookie, CookieJar
from typing import Union
from .._base.chromium import Chromium
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_tab import WebPageTab
from .._pages.mix_tab import MixTab
from .._pages.session_page import SessionPage
from .._pages.web_page import WebPage
class CookiesSetter(object):
_owner: ChromiumBase
class BrowserCookiesSetter(object):
_owner: Chromium = ...
def __init__(self, page: ChromiumBase): ...
def __init__(self, owner: Chromium):
"""
:param owner: Chromium对象
"""
...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None:
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
...
def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ...
def clear(self) -> None:
"""清除cookies"""
...
def clear(self) -> None: ...
class CookiesSetter(BrowserCookiesSetter):
_owner: ChromiumBase = ...
def __init__(self, owner: ChromiumBase):
"""
:param owner: 页面对象
"""
...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None:
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
...
def remove(self,
name: str,
url: str = None,
domain: str = None,
path: str = None) -> None:
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选
:param domain: cookie的domain字段可选
:param path: cookie的path字段可选
:return: None
"""
...
def clear(self) -> None:
"""清除cookies"""
...
class SessionCookiesSetter(object):
_owner: SessionPage
_owner: SessionPage = ...
def __init__(self, page: SessionPage): ...
def __init__(self, owner: SessionPage):
"""
:param owner: SessionPage对象
"""
...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None:
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
...
def remove(self, name: str) -> None: ...
def remove(self, name: str) -> None:
"""删除一个cookie
:param name: cookie的name字段
:return: None
"""
...
def clear(self) -> None: ...
def clear(self) -> None:
"""清除cookies"""
...
class WebPageCookiesSetter(CookiesSetter, SessionCookiesSetter):
_owner: Union[WebPage, WebPageTab]
_owner: WebPage = ...
def __init__(self, page: SessionPage): ...
def __init__(self, owner: WebPage):
"""
:param owner: WebPage对象
"""
...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None: ...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None:
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
...
def remove(self, name: str, url: str = None, domain: str = None, path: str = None) -> None: ...
def remove(self,
name: str,
url: str = None,
domain: str = None,
path: str = None) -> None:
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选d模式时才有效
:param domain: cookie的domain字段可选d模式时才有效
:param path: cookie的path字段可选d模式时才有效
:return: None
"""
...
def clear(self) -> None: ...
def clear(self) -> None:
"""清除cookies"""
...
class MixTabCookiesSetter(CookiesSetter, SessionCookiesSetter):
_owner: MixTab = ...
def __init__(self, owner: MixTab):
"""
:param owner: MixTab对象
"""
...
def __call__(self, cookies: Union[CookieJar, Cookie, list, tuple, str, dict]) -> None:
"""设置一个或多个cookie
:param cookies: cookies信息
:return: None
"""
...
def remove(self,
name: str,
url: str = None,
domain: str = None,
path: str = None) -> None:
"""删除一个cookie
:param name: cookie的name字段
:param url: cookie的url字段可选d模式时才有效
:param domain: cookie的domain字段可选d模式时才有效
:param path: cookie的path字段可选d模式时才有效
:return: None
"""
...
def clear(self) -> None:
"""清除cookies"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from os.path import sep
from pathlib import Path
@ -16,143 +15,101 @@ from DataRecorder.tools import get_usable_path
class DownloadManager(object):
def __init__(self, browser):
"""
:param browser: Browser对象
"""
self._browser = browser
self._page = browser.page
self._when_download_file_exists = 'rename'
self._save_path = None
t = TabDownloadSettings(self._page.tab_id)
t.path = self._page.download_path
t = TabDownloadSettings('browser')
t.path = self._browser.download_path
t.rename = None
t.suffix = None
t.when_file_exists = 'rename'
self._missions = {} # {guid: DownloadMission}
self._tab_missions = {} # {tab_id: DownloadMission}
self._tab_missions = {} # {tab_id: [DownloadMission, ...]}
self._flags = {} # {tab_id: [bool, DownloadMission]}
self._waiting_tab = set() # click.to_download()专用
self._tmp_path = '.'
self._page_id = None
if self._page.download_path:
self.set_path(self._page, self._page.download_path)
else:
self._running = False
self._running = False
@property
def missions(self):
"""返回所有未完成的下载任务"""
return self._missions
def set_path(self, tab, path):
"""设置某个tab的下载路径
:param tab: 页面对象
:param path: 下载路径绝对路径str
:return: None
"""
TabDownloadSettings(tab.tab_id).path = path
if tab is self._page or not self._running:
self._browser.driver.set_callback('Browser.downloadProgress', self._onDownloadProgress)
self._browser.driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin)
r = self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=path,
behavior='allowAndName', eventsEnabled=True)
self._save_path = path
tid = tab if isinstance(tab, str) else tab.tab_id
TabDownloadSettings(tid).path = path
if not self._running or tid == 'browser':
self._browser._driver.set_callback('Browser.downloadProgress', self._onDownloadProgress)
self._browser._driver.set_callback('Browser.downloadWillBegin', self._onDownloadWillBegin)
r = self._browser._run_cdp('Browser.setDownloadBehavior', downloadPath=self._browser._download_path,
behavior='allowAndName', eventsEnabled=True)
self._tmp_path = self._browser._download_path
if 'error' in r:
print('浏览器版本太低无法使用下载管理功能。')
self._running = True
def set_rename(self, tab_id, rename=None, suffix=None):
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param rename: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
ts = TabDownloadSettings(tab_id)
ts.rename = rename
ts.suffix = suffix
def set_file_exists(self, tab_id, mode):
"""设置某个tab下载文件重名时执行的策略
:param tab_id: tab id
:param mode: 下载路径
:return: None
"""
TabDownloadSettings(tab_id).when_file_exists = mode
def set_flag(self, tab_id, flag):
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param flag: 等待标志
:return: None
"""
self._flags[tab_id] = flag
def get_flag(self, tab_id):
"""获取tab下载等待标记
:param tab_id: tab id
:return: 任务对象或False
"""
return self._flags.get(tab_id, None)
def get_tab_missions(self, tab_id):
"""获取某个tab正在下载的任务
:param tab_id:
:return: 下载任务组成的列表
"""
return self._tab_missions.get(tab_id, [])
return self._tab_missions.get(tab_id, set())
def set_done(self, mission, state, final_path=None):
"""设置任务结束
:param mission: 任务对象
:param state: 任务状态
:param final_path: 最终路径
:return: None
"""
if mission.state not in ('canceled', 'skipped'):
mission.state = state
mission.final_path = final_path
if mission.tab_id in self._tab_missions and mission.id in self._tab_missions[mission.tab_id]:
self._tab_missions[mission.tab_id].remove(mission.id)
if mission.tab_id in self._tab_missions and mission in self._tab_missions[mission.tab_id]:
self._tab_missions[mission.tab_id].discard(mission)
if (mission.from_tab and mission.from_tab in self._tab_missions
and mission in self._tab_missions[mission.from_tab]):
self._tab_missions[mission.from_tab].discard(mission)
self._missions.pop(mission.id, None)
mission._is_done = True
def cancel(self, mission):
"""取消任务
:param mission: 任务对象
:return: None
"""
mission.state = 'canceled'
try:
self._browser.run_cdp('Browser.cancelDownload', guid=mission.id)
self._browser._run_cdp('Browser.cancelDownload', guid=mission.id)
except:
pass
if mission.final_path:
Path(mission.final_path).unlink(True)
def skip(self, mission):
"""跳过任务
:param mission: 任务对象
:return: None
"""
mission.state = 'skipped'
try:
self._browser.run_cdp('Browser.cancelDownload', guid=mission.id)
self._browser._run_cdp('Browser.cancelDownload', guid=mission.id)
except:
pass
def clear_tab_info(self, tab_id):
"""当tab关闭时清除有关信息
:param tab_id: 标签页id
:return: None
"""
self._tab_missions.pop(tab_id, None)
self._flags.pop(tab_id, None)
TabDownloadSettings.TABS.pop(tab_id, None)
self._waiting_tab.discard(tab_id)
def _onDownloadWillBegin(self, **kwargs):
"""用于获取弹出新标签页触发的下载任务"""
guid = kwargs['guid']
tab_id = self._browser._frames.get(kwargs['frameId'], self._page.tab_id)
tab_id = self._browser._frames.get(kwargs['frameId'], 'browser')
tab = 'browser' if tab_id in ('browser', self._page_id) or self.get_flag('browser') is not None else tab_id
opener = self._browser._relation.get(tab_id, None)
from_tab = None
if opener and opener in self._waiting_tab:
tab = from_tab = opener
settings = TabDownloadSettings(tab_id if tab_id in TabDownloadSettings.TABS else self._page.tab_id)
settings = TabDownloadSettings(tab)
if settings.rename:
if settings.suffix is not None:
name = f'{settings.rename}.{settings.suffix}' if settings.suffix else settings.rename
@ -177,28 +134,35 @@ class DownloadManager(object):
name = kwargs['suggestedFilename']
skip = False
overwrite = None # 存在且重命名
goal_path = Path(settings.path) / name
if goal_path.exists():
if settings.when_file_exists == 'skip':
skip = True
elif settings.when_file_exists == 'overwrite':
goal_path.unlink()
overwrite = True # 存在且覆盖
else: # 不存在
overwrite = False
m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._save_path)
m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url'], self._tmp_path, overwrite)
if from_tab:
m.from_tab = from_tab
self._tab_missions.setdefault(from_tab, set()).add(m)
self._missions[guid] = m
if self.get_flag(tab_id) is False: # 取消该任务
if self.get_flag('browser') is False or self.get_flag(tab) is False: # 取消该任务
self.cancel(m)
elif skip:
self.skip(m)
else:
self._tab_missions.setdefault(tab_id, []).append(guid)
self._tab_missions.setdefault(tab_id, set()).add(m)
if self.get_flag(tab_id) is not None:
self._flags[tab_id] = m
if self.get_flag('browser') is not None:
self._flags['browser'] = m
elif self.get_flag(tab) is not None:
self._flags[tab] = m
def _onDownloadProgress(self, **kwargs):
"""下载状态变化时执行"""
if kwargs['guid'] in self._missions:
mission = self._missions[kwargs['guid']]
if kwargs['state'] == 'inProgress':
@ -207,14 +171,28 @@ class DownloadManager(object):
elif kwargs['state'] == 'completed':
if mission.state == 'skipped':
Path(f'{mission.save_path}{sep}{mission.id}').unlink(True)
Path(f'{mission.tmp_path}{sep}{mission.id}').unlink(True)
self.set_done(mission, 'skipped')
return
mission.received_bytes = kwargs['receivedBytes']
mission.total_bytes = kwargs['totalBytes']
form_path = f'{mission.save_path}{sep}{mission.id}'
to_path = str(get_usable_path(f'{mission.path}{sep}{mission.name}'))
move(form_path, to_path)
form_path = f'{mission.tmp_path}{sep}{mission.id}'
if mission._overwrite is None:
to_path = str(get_usable_path(f'{mission.folder}{sep}{mission.name}'))
else:
to_path = f'{mission.folder}{sep}{mission.name}'
Path(mission.folder).mkdir(parents=True, exist_ok=True)
not_moved = True
for _ in range(10):
try:
move(form_path, to_path)
not_moved = False
break
except PermissionError:
sleep(.5)
if not_moved:
from shutil import copy
copy(form_path, to_path)
self.set_done(mission, 'completed', final_path=to_path)
else: # 'canceled'
@ -233,43 +211,33 @@ class TabDownloadSettings(object):
return object.__new__(cls)
def __init__(self, tab_id):
"""
:param tab_id: tab id
"""
if hasattr(self, '_created'):
return
self._created = True
self.tab_id = tab_id
self.rename = None
self.suffix = None
self.path = ''
self.when_file_exists = 'rename'
self.path = '' if tab_id == 'browser' else self.TABS['browser'].path
self.when_file_exists = 'rename' if tab_id == 'browser' else self.TABS['browser'].when_file_exists
TabDownloadSettings.TABS[tab_id] = self
class DownloadMission(object):
def __init__(self, mgr, tab_id, _id, path, name, url, save_path):
"""
:param mgr: BrowserDownloadManager对象
:param tab_id: 标签页id
:param _id: 任务id
:param path: 保存路径
:param name: 文件名
:param url: url
:param save_path: 下载路径
"""
def __init__(self, mgr, tab_id, _id, folder, name, url, tmp_path, overwrite):
self._mgr = mgr
self.url = url
self.tab_id = tab_id
self.from_tab = None
self.id = _id
self.path = path
self.folder = folder
self.name = name
self.state = 'running'
self.total_bytes = None
self.received_bytes = 0
self.final_path = None
self.save_path = save_path
self.tmp_path = tmp_path
self._overwrite = overwrite
self._is_done = False
def __repr__(self):
@ -277,32 +245,23 @@ class DownloadMission(object):
@property
def rate(self):
"""以百分比形式返回下载进度"""
return round((self.received_bytes / self.total_bytes) * 100, 2) if self.total_bytes else None
@property
def is_done(self):
"""返回任务是否在运行中"""
return self._is_done
def cancel(self):
"""取消该任务,如任务已完成,删除已下载的文件"""
self._mgr.cancel(self)
def wait(self, show=True, timeout=None, cancel_if_timeout=True):
"""等待任务结束
:param show: 是否显示下载信息
:param timeout: 超时时间为None则无限等待
:param cancel_if_timeout: 超时时是否取消任务
:return: 等待成功返回完整路径否则返回False
"""
if show:
print(f'url{self.url}')
end_time = perf_counter()
while self.name is None and perf_counter() < end_time:
sleep(0.01)
print(f'文件名:{self.name}')
print(f'标路径:{self.path}')
print(f'文件名:{self.name or "未知"}')
print(f'录路径:{self.folder}')
if timeout is None:
while not self.is_done:
@ -322,11 +281,17 @@ class DownloadMission(object):
if show:
if self.state == 'completed':
print(f'下载完成 {self.final_path}')
print('\r100% ', end='')
if self._overwrite is None:
print(f'完成并重命名 {self.final_path}')
elif self._overwrite is False:
print(f'下载完成 {self.final_path}')
else:
print(f'已覆盖 {self.final_path}')
elif self.state == 'canceled':
print(f'下载取消')
elif self.state == 'skipped':
print(f'已跳过')
print(f'已跳过 {self.folder}{sep}{self.name}')
print()
return self.final_path if self.final_path else False

View File

@ -2,53 +2,127 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Dict, Optional, Union, Literal
from typing import Dict, Optional, Union, Literal, Set
from .._base.browser import Browser
from .._base.chromium import Chromium
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_page import ChromiumPage
FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']
class DownloadManager(object):
_browser: Browser = ...
_page: ChromiumPage = ...
_browser: Chromium = ...
_missions: Dict[str, DownloadMission] = ...
_tab_missions: dict = ...
_tab_missions: Dict[str, Set[DownloadMission]] = ...
_flags: dict = ...
_waiting_tab: set = ...
_running: bool = ...
_save_path: Optional[str] = ...
_tmp_path: str = ...
_page_id: Optional[str] = ...
def __init__(self, browser: Browser): ...
def __init__(self, browser: Chromium):
"""
:param browser: Browser对象
"""
...
@property
def missions(self) -> Dict[str, DownloadMission]: ...
def missions(self) -> Dict[str, DownloadMission]:
"""返回所有未完成的下载任务"""
...
def set_path(self, tab: ChromiumBase, path: str) -> None: ...
def set_path(self, tab: Union[str, ChromiumBase], path: str) -> None:
"""设置某个tab的下载路径
:param tab: 页面对象
:param path: 下载路径绝对路径str
:return: None
"""
...
def set_rename(self, tab_id: str, rename: str = None, suffix: str = None) -> None: ...
def set_rename(self,
tab_id: str,
rename: str = None,
suffix: str = None) -> None:
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param rename: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
...
def set_file_exists(self, tab_id: str, mode: Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']) -> None: ...
def set_file_exists(self, tab_id: str, mode: FILE_EXISTS) -> None:
"""设置某个tab下载文件重名时执行的策略
:param tab_id: tab id
:param mode: 下载路径
:return: None
"""
...
def set_flag(self, tab_id: str, flag: Union[bool, DownloadMission, None]) -> None: ...
def set_flag(self, tab_id: str, flag: Union[bool, DownloadMission, None]) -> None:
"""设置某个tab的重命名文件名
:param tab_id: tab id
:param flag: 等待标志
:return: None
"""
...
def get_flag(self, tab_id: str) -> Union[bool, DownloadMission, None]: ...
def get_flag(self, tab_id: str) -> Union[bool, DownloadMission, None]:
"""获取tab下载等待标记
:param tab_id: tab id
:return: 任务对象或False
"""
...
def get_tab_missions(self, tab_id: str) -> list: ...
def get_tab_missions(self, tab_id: str) -> list:
"""获取某个tab正在下载的任务
:param tab_id:
:return: 下载任务组成的列表
"""
...
def set_done(self, mission: DownloadMission, state: str, final_path: str = None) -> None: ...
def set_done(self,
mission: DownloadMission,
state: str,
final_path: str = None) -> None:
"""设置任务结束
:param mission: 任务对象
:param state: 任务状态
:param final_path: 最终路径
:return: None
"""
...
def cancel(self, mission: DownloadMission) -> None: ...
def cancel(self, mission: DownloadMission) -> None:
"""取消任务
:param mission: 任务对象
:return: None
"""
...
def skip(self, mission: DownloadMission) -> None: ...
def skip(self, mission: DownloadMission) -> None:
"""跳过任务
:param mission: 任务对象
:return: None
"""
...
def clear_tab_info(self, tab_id: str) -> None: ...
def clear_tab_info(self, tab_id: str) -> None:
"""当tab关闭时清除有关信息
:param tab_id: 标签页id
:return: None
"""
...
def _onDownloadWillBegin(self, **kwargs) -> None: ...
def _onDownloadWillBegin(self, **kwargs) -> None:
"""用于获取弹出新标签页触发的下载任务"""
...
def _onDownloadProgress(self, **kwargs) -> None: ...
def _onDownloadProgress(self, **kwargs) -> None:
"""下载状态变化时执行"""
...
class TabDownloadSettings(object):
@ -58,34 +132,74 @@ class TabDownloadSettings(object):
rename: Optional[str] = ...
suffix: Optional[str] = ...
path: Optional[str] = ...
when_file_exists: str = ...
when_file_exists: FILE_EXISTS = ...
def __init__(self, tab_id: str): ...
def __init__(self, tab_id: str):
"""
:param tab_id: tab id
"""
...
class DownloadMission(object):
tab_id: str = ...
from_tab: Optional[str] = ...
_mgr: DownloadManager = ...
url: str = ...
id: str = ...
path: str = ...
folder: str = ...
name: str = ...
state: str = ...
total_bytes: Optional[int] = ...
received_bytes: int = ...
final_path: Optional[str] = ...
save_path: str = ...
tmp_path: str = ...
_overwrite: bool = ...
_is_done: bool = ...
def __init__(self, mgr: DownloadManager, tab_id: str, _id: str, path: str, name: str, url: str,
save_path: str): ...
def __init__(self,
mgr: DownloadManager,
tab_id: str,
_id: str,
folder: str,
name: str,
url: str,
tmp_path: str,
overwrite: bool):
"""
:param mgr: BrowserDownloadManager对象
:param tab_id: 标签页id
:param _id: 任务id
:param folder: 最终保存文件夹路径
:param name: 文件名
:param url: url
:param tmp_path: 下载临时路径
:param overwrite: 是否已存在同名文件None表示重命名
"""
...
@property
def rate(self) -> float: ...
def rate(self) -> float:
"""以百分比形式返回下载进度"""
...
@property
def is_done(self) -> bool: ...
def is_done(self) -> bool:
"""返回任务是否在运行中"""
...
def cancel(self) -> None: ...
def cancel(self) -> None:
"""取消该任务,如任务已完成,删除已下载的文件"""
...
def wait(self, show: bool = True, timeout=None, cancel_if_timeout=True) -> Union[bool, str]: ...
def wait(self,
show: bool = True,
timeout=None,
cancel_if_timeout=True) -> Union[bool, str]:
"""等待任务结束
:param show: 是否显示下载信息
:param timeout: 超时时间为None则无限等待
:param cancel_if_timeout: 超时时是否取消任务
:return: 等待成功返回完整路径否则返回False
"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from base64 import b64decode
from json import JSONDecodeError, loads
@ -22,11 +21,8 @@ class Listener(object):
"""监听器基类"""
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
self._owner = owner
self._address = owner.address
self._address = owner.browser.address
self._target_id = owner._target_id
self._driver = None
self._running_requests = 0
@ -41,7 +37,7 @@ class Listener(object):
self._targets = True
self._is_regex = False
self._method = ('GET', 'POST')
self._method = {'GET', 'POST'}
self._res_type = True
@property
@ -50,15 +46,6 @@ class Listener(object):
return self._targets
def set_targets(self, targets=True, is_regex=False, method=('GET', 'POST'), res_type=True):
"""指定要等待的数据包
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式
:param method: 设置监听的请求类型可指定多个为True时监听全部
:param res_type: 设置监听的资源类型可指定多个为True时监听全部可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
if targets is not None:
if not isinstance(targets, (str, list, tuple, set)) and targets is not True:
raise TypeError('targets只能是str、list、tuple、set、True。')
@ -91,15 +78,6 @@ class Listener(object):
raise TypeError('res_type参数只能是str、list、tuple、set、True类型。')
def start(self, targets=None, is_regex=None, method=None, res_type=None):
"""拦截目标请求,每次拦截前清空结果
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式为None时保持原来设置
:param method: 设置监听的请求类型可指定多个默认('GET', 'POST')为True时监听全部为None时保持原来设置
:param res_type: 设置监听的资源类型可指定多个默认为True时监听全部为None时保持原来设置可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
if targets is not None:
if is_regex is None:
is_regex = False
@ -117,23 +95,16 @@ class Listener(object):
self.listening = True
def wait(self, count=1, timeout=None, fit_count=True, raise_err=None):
"""等待符合要求的数据包到达指定数量
:param count: 需要捕捉的数据包数量
:param timeout: 超时时间为None无限等待
:param fit_count: 是否必须满足总数要求发生超时为True返回False为False返回已捕捉到的数据包
:param raise_err: 超时时是否抛出错误为None时根据Settings设置
:return: count为1时返回数据包对象大于1时返回列表超时且fit_count为True时返回False
"""
if not self.listening:
raise RuntimeError('监听未启动或已暂停。')
if not timeout:
while self._caught.qsize() < count:
while self._driver.is_running and self.listening and self._caught.qsize() < count:
sleep(.03)
fail = False
else:
end = perf_counter() + timeout
while True:
while self._driver.is_running and self.listening:
if perf_counter() > end:
fail = True
break
@ -157,31 +128,33 @@ class Listener(object):
return [self._caught.get_nowait() for _ in range(count)]
def steps(self, count=None, timeout=None, gap=1):
"""用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页)
:param count: 需捕获的数据包总数为None表示无限
:param timeout: 每个数据包等待时间为None表示无限
:param gap: 每接收到多少个数据包返回一次数据
:return: 用于在接收到监听目标时触发动作的可迭代对象
"""
if not self.listening:
raise RuntimeError('监听未启动或已暂停。')
caught = 0
end = perf_counter() + timeout if timeout else None
while True:
if (timeout and perf_counter() > end) or self._driver._stopped.is_set():
return
if self._caught.qsize() >= gap:
yield self._caught.get_nowait() if gap == 1 else [self._caught.get_nowait() for _ in range(gap)]
if timeout:
if timeout is None:
while self._driver.is_running and self.listening:
if self._caught.qsize() >= gap:
yield self._caught.get_nowait() if gap == 1 else [self._caught.get_nowait() for _ in range(gap)]
if count:
caught += gap
if caught >= count:
return
sleep(.03)
else:
end = perf_counter() + timeout
while self._driver.is_running and self.listening and perf_counter() < end:
if self._caught.qsize() >= gap:
yield self._caught.get_nowait() if gap == 1 else [self._caught.get_nowait() for _ in range(gap)]
end = perf_counter() + timeout
if count:
caught += gap
if caught >= count:
return
sleep(.03)
if count:
caught += gap
if caught >= count:
return
sleep(.03)
return False
def stop(self):
"""停止监听,清空已监听到的列表"""
if self.listening:
self.pause()
self.clear()
@ -189,10 +162,6 @@ class Listener(object):
self._driver = None
def pause(self, clear=True):
"""暂停监听
:param clear: 是否清空已获取队列
:return: None
"""
if self.listening:
self._driver.set_callback('Network.requestWillBeSent', None)
self._driver.set_callback('Network.responseReceived', None)
@ -203,14 +172,12 @@ class Listener(object):
self.clear()
def resume(self):
"""继续暂停的监听"""
if self.listening:
return
self._set_callback()
self.listening = True
def clear(self):
"""清空结果"""
self._request_ids = {}
self._extra_info_ids = {}
self._caught = Queue(maxsize=0)
@ -218,18 +185,12 @@ class Listener(object):
self._running_targets = 0
def wait_silent(self, timeout=None, targets_only=False, limit=0):
"""等待所有请求结束
:param timeout: 超时时间为None时无限等待
:param targets_only: 是否只等待targets指定的请求结束
:param limit: 剩下多少个连接时视为结束
:return: 返回是否等待成功
"""
if not self.listening:
raise RuntimeError('监听未启动或已暂停。')
if timeout is None:
while ((not targets_only and self._running_requests > limit)
or (targets_only and self._running_targets > limit)):
sleep(.1)
sleep(.01)
return True
end_time = perf_counter() + timeout
@ -237,17 +198,11 @@ class Listener(object):
if ((not targets_only and self._running_requests <= limit)
or (targets_only and self._running_targets <= limit)):
return True
sleep(.1)
sleep(.01)
else:
return False
def _to_target(self, target_id, address, owner):
"""切换监听的页面对象
:param target_id: 新页面对象_target_id
:param address: 新页面对象address
:param owner: 新页面对象
:return: None
"""
self._target_id = target_id
self._address = address
self._owner = owner
@ -262,7 +217,6 @@ class Listener(object):
self._set_callback()
def _set_callback(self):
"""设置监听请求的回调函数"""
self._driver.set_callback('Network.requestWillBeSent', self._requestWillBeSent)
self._driver.set_callback('Network.requestWillBeSentExtraInfo', self._requestWillBeSentExtraInfo)
self._driver.set_callback('Network.responseReceived', self._response_received)
@ -271,7 +225,6 @@ class Listener(object):
self._driver.set_callback('Network.loadingFailed', self._loading_failed)
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
self._running_requests += 1
p = None
if self._targets is True:
@ -300,19 +253,16 @@ class Listener(object):
self._extra_info_ids.setdefault(kwargs['requestId'], {})['obj'] = p if p else False
def _requestWillBeSentExtraInfo(self, **kwargs):
"""接收到请求额外信息时的回调函数"""
self._running_requests += 1
self._extra_info_ids.setdefault(kwargs['requestId'], {})['request'] = kwargs
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
request = self._request_ids.get(kwargs['requestId'], None)
if request:
request._raw_response = kwargs['response']
request._resource_type = kwargs['type']
def _responseReceivedExtraInfo(self, **kwargs):
"""接收到返回额外信息时的回调函数"""
self._running_requests -= 1
r = self._extra_info_ids.get(kwargs['requestId'], None)
if r:
@ -327,7 +277,6 @@ class Listener(object):
r['response'] = kwargs
def _loading_finished(self, **kwargs):
"""请求完成时处理方法"""
self._running_requests -= 1
rid = kwargs['requestId']
packet = self._request_ids.get(rid)
@ -363,7 +312,6 @@ class Listener(object):
self._running_targets -= 1
def _loading_failed(self, **kwargs):
"""请求失败时的回调方法"""
self._running_requests -= 1
r_id = kwargs['requestId']
data_packet = self._request_ids.get(r_id, None)
@ -393,26 +341,19 @@ class Listener(object):
class FrameListener(Listener):
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
if not self._owner._is_diff_domain and kwargs.get('frameId', None) != self._owner._frame_id:
return
super()._requestWillBeSent(**kwargs)
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
if not self._owner._is_diff_domain and kwargs.get('frameId', None) != self._owner._frame_id:
return
super()._response_received(**kwargs)
class DataPacket(object):
"""返回的数据包管理类"""
def __init__(self, tab_id, target):
"""
:param tab_id: 产生这个数据包的tab的id
:param target: 监听目标
"""
self.tab_id = tab_id
self.target = target
self.is_failed = False
@ -479,13 +420,9 @@ class DataPacket(object):
return self._fail_info
def wait_extra_info(self, timeout=None):
"""等待额外的信息加载完成
:param timeout: 超时时间None为无限等待
:return: 是否等待成功
"""
if timeout is None:
while self._responseExtraInfo is None:
sleep(.1)
sleep(.01)
return True
else:
@ -493,7 +430,7 @@ class DataPacket(object):
while perf_counter() < end_time:
if self._responseExtraInfo is not None:
return True
sleep(.1)
sleep(.01)
else:
return False
@ -511,14 +448,22 @@ class Request(object):
@property
def headers(self):
"""以大小写不敏感字典返回headers数据"""
if self._headers is None:
self._headers = CaseInsensitiveDict(self._request['headers'])
if self.extra_info.headers:
h = CaseInsensitiveDict(self.extra_info.headers)
for k, v in h.items():
if k not in self._headers:
self._headers[k] = v
return self._headers
@property
def params(self):
from urllib.parse import parse_qsl, urlparse
return dict(parse_qsl(urlparse(self.url).query, keep_blank_values=True))
@property
def postData(self):
"""返回postData数据"""
if self._postData is None:
if self._raw_post_data:
postData = self._raw_post_data
@ -534,12 +479,10 @@ class Request(object):
@property
def cookies(self):
"""以list形式返回发送的cookies"""
return [c['cookie'] for c in self.extra_info.associatedCookies if not c['blockedReasons']]
@property
def extra_info(self):
"""返回额外数据"""
return RequestExtraInfo(self._data_packet._request_extra_info or {})
@ -557,19 +500,21 @@ class Response(object):
@property
def headers(self):
"""以大小写不敏感字典返回headers数据"""
if self._headers is None:
self._headers = CaseInsensitiveDict(self._response['headers'])
if self.extra_info.headers:
h = CaseInsensitiveDict(self.extra_info.headers)
for k, v in h.items():
if k not in self._headers:
self._headers[k] = v
return self._headers
@property
def raw_body(self):
"""返回未被处理的body文本"""
return self._raw_body
@property
def body(self):
"""返回body内容如果是json格式自动进行转换如果时图片格式进行base64转换其它格式直接返回文本"""
if self._body is None:
if self._is_base64_body:
self._body = b64decode(self._raw_body)
@ -593,7 +538,6 @@ class ExtraInfo(object):
@property
def all_info(self):
"""以dict形式返回所有额外信息"""
return self._extra_info
def __getattr__(self, item):

View File

@ -2,11 +2,10 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from queue import Queue
from typing import Union, Dict, List, Iterable, Optional, Literal, Any
from typing import Union, List, Iterable, Optional, Literal, Any
from requests.structures import CaseInsensitiveDict
@ -19,21 +18,26 @@ __RES_TYPE__ = Literal['Document', 'Stylesheet', 'Image', 'Media', 'Font', 'Scri
class Listener(object):
_owner: ChromiumBase = ...
_address: str = ...
_target_id: str = ...
_targets: Union[str, dict, True, None] = ...
_method: Union[set, True] = ...
_res_type: Union[set, True] = ...
_caught: Optional[Queue] = ...
_is_regex: bool = ...
_driver: Optional[Driver] = ...
_request_ids: Optional[dict] = ...
_extra_info_ids: Optional[dict] = ...
_running_requests: int = ...
_running_targets: int = ...
listening: bool = ...
def __init__(self, owner: ChromiumBase):
self._owner: ChromiumBase = ...
self._address: str = ...
self._target_id: str = ...
self._targets: Union[str, dict, None] = ...
self._method: set = ...
self._res_type: set = ...
self._caught: Queue = ...
self._is_regex: bool = ...
self._driver: Driver = ...
self._request_ids: dict = ...
self._extra_info_ids: dict = ...
self.listening: bool = ...
self._running_requests: int = ...
self._running_targets: int = ...
"""
:param owner: 页面对象
"""
...
@property
def targets(self) -> Optional[set]: ...
@ -42,42 +46,101 @@ class Listener(object):
targets: Union[str, list, tuple, set, bool, None] = True,
is_regex: Optional[bool] = False,
method: Union[str, list, tuple, set, bool, None] = ('GET', 'POST'),
res_type: Union[__RES_TYPE__, list, tuple, set, bool, None] = True) -> None: ...
res_type: Union[__RES_TYPE__, list, tuple, set, bool, None] = True) -> None:
"""指定要等待的数据包
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式
:param method: 设置监听的请求类型可指定多个为True时监听全部
:param res_type: 设置监听的资源类型可指定多个为True时监听全部可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
...
def start(self,
targets: Union[str, list, tuple, set, bool, None] = None,
is_regex: Optional[bool] = None,
method: Union[str, list, tuple, set, bool, None] = None,
res_type: Union[__RES_TYPE__, list, tuple, set, bool, None] = None) -> None: ...
def stop(self) -> None: ...
def pause(self, clear: bool = True) -> None: ...
def resume(self) -> None: ...
res_type: Union[__RES_TYPE__, list, tuple, set, bool, None] = None) -> None:
"""拦截目标请求,每次拦截前清空结果
:param targets: 要匹配的数据包url特征可用list等传入多个为True时获取所有
:param is_regex: 设置的target是否正则表达式为None时保持原来设置
:param method: 设置监听的请求类型可指定多个默认('GET', 'POST')为True时监听全部为None时保持原来设置
:param res_type: 设置监听的资源类型可指定多个默认为True时监听全部为None时保持原来设置可指定的值有
Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket,
Manifest, SignedExchange, Ping, CSPViolationReport, Preflight, Other
:return: None
"""
...
def wait(self,
count: int = 1,
timeout: float = None,
fit_count: bool = True,
raise_err: bool = None) -> Union[List[DataPacket], DataPacket, None]: ...
raise_err: bool = None) -> Union[List[DataPacket], DataPacket, None]:
"""等待符合要求的数据包到达指定数量
:param count: 需要捕捉的数据包数量
:param timeout: 超时时间为None无限等待
:param fit_count: 是否必须满足总数要求发生超时为True返回False为False返回已捕捉到的数据包
:param raise_err: 超时时是否抛出错误为None时根据Settings设置
:return: count为1时返回数据包对象大于1时返回列表超时且fit_count为True时返回False
"""
...
def steps(self,
count: int = None,
timeout: float = None,
gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]: ...
gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]:
"""用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页)
:param count: 需捕获的数据包总数为None表示无限
:param timeout: 每个数据包等待时间为None表示无限
:param gap: 每接收到多少个数据包返回一次数据
:return: 用于在接收到监听目标时触发动作的可迭代对象
"""
...
@property
def results(self) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ...
def stop(self) -> None:
"""停止监听,清空已监听到的列表"""
...
def clear(self) -> None: ...
def pause(self, clear: bool = True) -> None:
"""暂停监听
:param clear: 是否清空已获取队列
:return: None
"""
...
def resume(self) -> None:
"""继续暂停的监听"""
...
def clear(self) -> None:
"""清空监听到但还没返回的结果"""
...
def wait_silent(self,
timeout: float = None,
targets_only: bool = False,
limit: int = 0) -> bool: ...
limit: int = 0) -> bool:
"""等待所有请求结束
:param timeout: 超时时间为None时无限等待
:param targets_only: 是否只等待targets指定的请求结束
:param limit: 剩下多少个连接时视为结束
:return: 返回是否等待成功
"""
...
def _to_target(self, target_id: str, address: str, owner: ChromiumBase) -> None: ...
def _to_target(self, target_id: str, address: str, owner: ChromiumBase) -> None:
"""切换监听的页面对象
:param target_id: 新页面对象_target_id
:param address: 新页面对象address
:param owner: 新页面对象
:return: None
"""
...
def _set_callback(self) -> None: ...
def _requestWillBeSent(self, **kwargs) -> None: ...
@ -91,34 +154,85 @@ class Listener(object):
def _loading_failed(self, **kwargs) -> None: ...
def _set_callback(self) -> None: ...
class FrameListener(Listener):
_owner: ChromiumFrame = ...
_is_diff: bool = ...
def __init__(self, owner: ChromiumFrame):
self._owner: ChromiumFrame = ...
self._is_diff: bool = ...
"""
:param owner: ChromiumFrame对象
"""
...
class DataPacket(object):
"""返回的数据包管理类"""
"""数据包类"""
tab_id: str = ...
target: str = ...
is_failed: bool = ...
_raw_request: Optional[dict] = ...
_raw_response: Optional[dict] = ...
_raw_post_data: Optional[str] = ...
_raw_body: Optional[str] = ...
_raw_fail_info: Optional[dict] = ...
_base64_body: bool = ...
_request: Optional[Request] = ...
_response: Optional[Response] = ...
_fail_info: Optional[FailInfo] = ...
_resource_type: Optional[str] = ...
_requestExtraInfo: Optional[dict] = ...
_responseExtraInfo: Optional[dict] = ...
def __init__(self, tab_id: str, target: [str, bool]):
self.tab_id: str = ...
self.target: str = ...
self.is_failed: bool = ...
self._raw_request: Optional[dict] = ...
self._raw_response: Optional[dict] = ...
self._raw_post_data: str = ...
self._raw_body: str = ...
self._raw_fail_info: Optional[dict] = ...
self._base64_body: bool = ...
self._request: Request = ...
self._response: Response = ...
self._fail_info: Optional[FailInfo] = ...
self._resource_type: str = ...
self._requestExtraInfo: Optional[dict] = ...
self._responseExtraInfo: Optional[dict] = ...
"""
:param tab_id: 产生这个数据包的tab的id
:param target: 监听目标
"""
...
@property
def url(self) -> str:
"""请求网址"""
...
@property
def method(self) -> str:
"""请求类型"""
...
@property
def frameId(self) -> str:
"""发出请求的frame id"""
...
@property
def resourceType(self) -> str:
"""资源类型"""
...
@property
def request(self) -> Request:
"""数据"""
...
@property
def response(self) -> Response:
"""Response数据"""
...
@property
def fail_info(self) -> Optional[FailInfo]:
"""请求失败数据"""
...
def wait_extra_info(self, timeout: float = None) -> bool:
"""等待额外的信息加载完成
:param timeout: 超时时间None为无限等待
:return: 是否等待成功
"""
...
@property
def _request_extra_info(self) -> Optional[dict]: ...
@ -126,116 +240,147 @@ class DataPacket(object):
@property
def _response_extra_info(self) -> Optional[dict]: ...
@property
def url(self) -> str: ...
@property
def method(self) -> str: ...
@property
def frameId(self) -> str: ...
@property
def resourceType(self) -> str: ...
@property
def request(self) -> Request: ...
@property
def response(self) -> Response: ...
@property
def fail_info(self) -> Optional[FailInfo]: ...
def wait_extra_info(self, timeout: float = None) -> bool: ...
class Request(object):
_data_packet: DataPacket = ...
_request: dict = ...
_raw_post_data: str = ...
_postData: Optional[str] = ...
url: str = ...
_headers: Union[CaseInsensitiveDict, None] = ...
method: str = ...
urlFragment = ...
hasPostData = ...
postDataEntries = ...
mixedContentType = ...
initialPriority = ...
referrerPolicy = ...
isLinkPreload = ...
trustTokenParams = ...
isSameSite = ...
urlFragment: str = ...
hasPostData: bool = ...
postDataEntries: List[dict] = ...
mixedContentType: Literal['blockable', 'optionally-blockable', 'none'] = ...
initialPriority: Literal['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'] = ...
referrerPolicy: Literal['unsafe-url', 'no-referrer-when-downgrade', 'no-referrer', 'origin',
'origin-when-cross-origin', 'same-origin', 'strict-origin', 'strict-origin-when-cross-origin'] = ...
isLinkPreload: bool = ...
trustTokenParams: dict = ...
isSameSite: bool = ...
def __init__(self, data_packet: DataPacket, raw_request: dict, post_data: str):
self._data_packet: DataPacket = ...
self._request: dict = ...
self._raw_post_data: str = ...
self._postData: str = ...
def __init__(self,
data_packet: DataPacket,
raw_request: dict,
post_data: str):
"""
:param data_packet: DataPacket对象
:param raw_request: 未处理的请求数据
:param post_data: post发送的数据
"""
...
@property
def headers(self) -> dict: ...
def headers(self) -> dict:
"""以大小写不敏感字典返回headers数据"""
...
@property
def postData(self) -> Any: ...
def params(self) -> dict:
"""dict格式返回请求url中的参数"""
...
@property
def cookies(self) -> List[dict]: ...
def postData(self) -> Any:
"""返回postData数据"""
...
@property
def extra_info(self) -> Optional[RequestExtraInfo]: ...
def cookies(self) -> List[dict]:
"""以list形式返回发送的cookies"""
...
@property
def extra_info(self) -> Optional[RequestExtraInfo]:
"""返回额外数据"""
...
class Response(object):
url = ...
status = ...
statusText = ...
headersText = ...
mimeType = ...
requestHeaders = ...
requestHeadersText = ...
connectionReused = ...
_data_packet: DataPacket = ...
_response: dict = ...
_raw_body: str = ...
_is_base64_body: bool = ...
_body: Union[str, dict, bytes, None] = ...
_headers: Union[dict, CaseInsensitiveDict, None] = ...
url: str = ...
status: int = ...
statusText: str = ...
headersText: str = ...
mimeType: str = ...
requestHeaders: dict = ...
requestHeadersText: str = ...
connectionReused: bool = ...
connectionId = ...
remoteIPAddress = ...
remotePort = ...
fromDiskCache = ...
fromServiceWorker = ...
fromPrefetchCache = ...
encodedDataLength = ...
timing = ...
serviceWorkerResponseSource = ...
responseTime = ...
cacheStorageCacheName = ...
protocol = ...
alternateProtocolUsage = ...
securityState = ...
securityDetails = ...
remoteIPAddress: str = ...
remotePort: int = ...
fromDiskCache: bool = ...
fromServiceWorker: bool = ...
fromPrefetchCache: bool = ...
fromEarlyHints: bool = ...
serviceWorkerRouterInfo: dict = ...
encodedDataLength: int = ...
timing: dict = ...
serviceWorkerResponseSource: Literal['cache-storage', 'http-cache', 'fallback-code', 'network'] = ...
responseTime: float = ...
cacheStorageCacheName: str = ...
protocol: str = ...
alternateProtocolUsage: Literal['alternativeJobWonWithoutRace', 'alternativeJobWonRace', 'mainJobWonRace',
'mappingMissing', 'broken', 'dnsAlpnH3JobWonWithoutRace', 'dnsAlpnH3JobWonRace', 'unspecifiedReason'] = ...
securityState: Literal['unknown', 'neutral', 'insecure', 'secure', 'info', 'insecure-broken'] = ...
securityDetails: dict = ...
def __init__(self, data_packet: DataPacket, raw_response: dict, raw_body: str, base64_body: bool):
self._data_packet: DataPacket = ...
self._response: dict = ...
self._raw_body: str = ...
self._is_base64_body: bool = ...
self._body: Union[str, dict, None] = ...
self._headers: dict = ...
def __init__(self,
data_packet: DataPacket,
raw_response: dict,
raw_body: str,
base64_body: bool):
"""
:param data_packet: DataPacket对象
:param raw_response: 未处理的response信息
:param raw_body: 未处理的body
:param base64_body: body是否base64格式
"""
...
@property
def extra_info(self) -> Optional[ResponseExtraInfo]: ...
def headers(self) -> CaseInsensitiveDict:
"""以大小写不敏感字典返回headers数据"""
...
@property
def headers(self) -> CaseInsensitiveDict: ...
def raw_body(self) -> str:
"""返回未被处理的body文本"""
...
@property
def raw_body(self) -> str: ...
def body(self) -> Any:
"""返回body内容如果是json格式自动进行转换如果时图片格式进行base64转换其它格式直接返回文本"""
...
@property
def body(self) -> Any: ...
def extra_info(self) -> Optional[ResponseExtraInfo]:
"""额外信息"""
...
class ExtraInfo(object):
_extra_info: dict = ...
def __init__(self, extra_info: dict):
self._extra_info: dict = ...
"""
:param extra_info: dict格式信息
"""
...
@property
def all_info(self) -> dict: ...
def all_info(self) -> dict:
"""以dict形式返回所有额外信息"""
...
class RequestExtraInfo(ExtraInfo):
@ -261,10 +406,14 @@ class ResponseExtraInfo(ExtraInfo):
class FailInfo(object):
_data_packet: DataPacket
_fail_info: dict
_fail_info: float
errorText: str
canceled: bool
blockedReason: Optional[str]
corsErrorStatus: Optional[str]
def __init__(self, data_packet: DataPacket, fail_info: dict): ...
def __init__(self, data_packet: DataPacket, fail_info: dict):
"""
:param data_packet: DataPacket对象
:param fail_info: 返回的失败数据
"""
...

View File

@ -2,112 +2,104 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
class ElementRect(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def corners(self):
"""返回元素四个角坐标顺序左上、右上、右下、左下没有大小的元素抛出NoRectError"""
vr = self._get_viewport_rect('border')
r = self._ele.owner.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
r = self._ele.owner._run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
return [(vr[0] + sx, vr[1] + sy), (vr[2] + sx, vr[3] + sy), (vr[4] + sx, vr[5] + sy), (vr[6] + sx, vr[7] + sy)]
@property
def viewport_corners(self):
"""返回元素四个角视口坐标顺序左上、右上、右下、左下没有大小的元素抛出NoRectError"""
r = self._get_viewport_rect('border')
return (r[0], r[1]), (r[2], r[3]), (r[4], r[5]), (r[6], r[7])
@property
def size(self):
"""返回元素大小,格式(宽, 高)"""
border = self._ele.owner.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id,
nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model']['border']
border = self._ele.owner._run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id,
nodeId=self._ele._node_id, objectId=self._ele._obj_id)['model']['border']
return border[2] - border[0], border[5] - border[1]
@property
def location(self):
"""返回元素左上角的绝对坐标"""
cl = self.viewport_location
return self._get_page_coord(cl[0], cl[1])
return self._get_page_coord(*self.viewport_location)
@property
def midpoint(self):
"""返回元素中间点的绝对坐标"""
cl = self.viewport_midpoint
return self._get_page_coord(cl[0], cl[1])
return self._get_page_coord(*self.viewport_midpoint)
@property
def click_point(self):
"""返回元素接受点击的点的绝对坐标"""
cl = self.viewport_click_point
return self._get_page_coord(cl[0], cl[1])
return self._get_page_coord(*self.viewport_click_point)
@property
def viewport_location(self):
"""返回元素左上角在视口中的坐标"""
m = self._get_viewport_rect('border')
return m[0], m[1]
@property
def viewport_midpoint(self):
"""返回元素中间点在视口中的坐标"""
m = self._get_viewport_rect('border')
return m[0] + (m[2] - m[0]) // 2, m[3] + (m[5] - m[3]) // 2
@property
def viewport_click_point(self):
"""返回元素接受点击的点视口坐标"""
m = self._get_viewport_rect('padding')
return self.viewport_midpoint[0], m[1] + 3
@property
def screen_location(self):
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.owner.rect.viewport_location
ex, ey = self.viewport_location
pr = self._ele.owner.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
pr = self._ele.owner._run_js('return window.devicePixelRatio;')
if getattr(self._ele.owner, '_is_diff_domain', None):
x, y = self._ele.owner.rect.screen_location
return (vx + ex) * pr + x, (ey + vy) * pr + y
else:
return (vx + ex) * pr, (ey + vy) * pr
@property
def screen_midpoint(self):
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.owner.rect.viewport_location
ex, ey = self.viewport_midpoint
pr = self._ele.owner.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
pr = self._ele.owner._run_js('return window.devicePixelRatio;')
if getattr(self._ele.owner, '_is_diff_domain', None):
x, y = self._ele.owner.rect.screen_location
return (vx + ex) * pr + x, (ey + vy) * pr + y
else:
return (vx + ex) * pr, (ey + vy) * pr
@property
def screen_click_point(self):
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
vx, vy = self._ele.owner.rect.viewport_location
ex, ey = self.viewport_click_point
pr = self._ele.owner.run_js('return window.devicePixelRatio;')
return (vx + ex) * pr, (ey + vy) * pr
pr = self._ele.owner._run_js('return window.devicePixelRatio;')
if getattr(self._ele.owner, '_is_diff_domain', None):
x, y = self._ele.owner.rect.screen_location
return (vx + ex) * pr + x, (ey + vy) * pr + y
else:
return (vx + ex) * pr, (ey + vy) * pr
@property
def scroll_position(self):
r = self._ele._run_js('return this.scrollLeft.toString() + " " + this.scrollTop.toString();')
w, h = r.split(' ')
return int(w), int(h)
def _get_viewport_rect(self, quad):
"""按照类型返回在可视窗口中的范围
:param quad: 方框类型margin border padding
:return: 四个角坐标
"""
return self._ele.owner.run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id,
# nodeId=self._ele._node_id, objectId=self._ele._obj_id
)['model'][quad]
return self._ele.owner._run_cdp('DOM.getBoxModel', backendNodeId=self._ele._backend_id)['model'][quad]
def _get_page_coord(self, x, y):
"""根据视口坐标获取绝对坐标"""
r = self._ele.owner.run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
r = self._ele.owner._run_cdp_loaded('Page.getLayoutMetrics')['visualViewport']
sx = r['pageX']
sy = r['pageY']
return x + sx, y + sy
@ -115,19 +107,14 @@ class ElementRect(object):
class TabRect(object):
def __init__(self, owner):
"""
:param owner: Page对象和Tab对象
"""
self._owner = owner
@property
def window_state(self):
"""返回窗口状态normal、fullscreen、maximized、minimized"""
return self._get_window_rect()['windowState']
@property
def window_location(self):
"""返回窗口在屏幕上的坐标,左上角为(0, 0)"""
r = self._get_window_rect()
if r['windowState'] in ('maximized', 'fullscreen'):
return 0, 0
@ -135,7 +122,6 @@ class TabRect(object):
@property
def window_size(self):
"""返回窗口大小"""
r = self._get_window_rect()
if r['windowState'] == 'fullscreen':
return r['width'], r['height']
@ -146,14 +132,12 @@ class TabRect(object):
@property
def page_location(self):
"""返回页面左上角在屏幕中坐标,左上角为(0, 0)"""
w, h = self.viewport_location
r = self._get_page_rect()['layoutViewport']
return w - r['pageX'], h - r['pageY']
@property
def viewport_location(self):
"""返回视口在屏幕中坐标,左上角为(0, 0)"""
w_bl, h_bl = self.window_location
w_bs, h_bs = self.window_size
w_vs, h_vs = self.viewport_size_with_scrollbar
@ -161,74 +145,71 @@ class TabRect(object):
@property
def size(self):
"""返回页面总宽高,格式:(宽, 高)"""
r = self._get_page_rect()['contentSize']
return r['width'], r['height']
@property
def viewport_size(self):
"""返回视口宽高,不包括滚动条,格式:(宽, 高)"""
r = self._get_page_rect()['visualViewport']
return r['clientWidth'], r['clientHeight']
@property
def viewport_size_with_scrollbar(self):
"""返回视口宽高,包括滚动条,格式:(宽, 高)"""
r = self._owner.run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();')
r = self._owner._run_js('return window.innerWidth.toString() + " " + window.innerHeight.toString();')
w, h = r.split(' ')
return int(w), int(h)
@property
def scroll_position(self):
r = self._get_page_rect()['visualViewport']
return r['pageX'], r['pageY']
def _get_page_rect(self):
"""获取页面范围信息"""
return self._owner.run_cdp_loaded('Page.getLayoutMetrics')
return self._owner._run_cdp_loaded('Page.getLayoutMetrics')
def _get_window_rect(self):
"""获取窗口范围信息"""
return self._owner.browser.get_window_bounds(self._owner.tab_id)
return self._owner.browser._driver.run('Browser.getWindowForTarget', targetId=self._owner.tab_id)['bounds']
class FrameRect(object):
"""异域iframe使用"""
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
self._frame = frame
@property
def location(self):
"""返回iframe元素左上角的绝对坐标"""
return self._frame.frame_ele.rect.location
@property
def viewport_location(self):
"""返回元素在视口中坐标,左上角为(0, 0)"""
return self._frame.frame_ele.rect.viewport_location
@property
def screen_location(self):
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
return self._frame.frame_ele.rect.screen_location
@property
def size(self):
"""返回frame内页面尺寸格式(宽, 高)"""
w = self._frame.doc_ele.run_js('return this.body.scrollWidth')
h = self._frame.doc_ele.run_js('return this.body.scrollHeight')
w = self._frame.doc_ele._run_js('return this.body.scrollWidth')
h = self._frame.doc_ele._run_js('return this.body.scrollHeight')
return w, h
@property
def viewport_size(self):
"""返回视口宽高,格式:(宽, 高)"""
return self._frame.frame_ele.rect.size
@property
def corners(self):
"""返回元素四个角坐标,顺序:左上、右上、右下、左下"""
return self._frame.frame_ele.rect.corners
@property
def viewport_corners(self):
"""返回元素四个角视口坐标,顺序:左上、右上、右下、左下"""
return self._frame.frame_ele.rect.viewport_corners
@property
def scroll_position(self):
r = self._frame.doc_ele._run_js('return this.documentElement.scrollLeft.toString() + " " '
'+ this.documentElement.scrollTop.toString();')
w, h = r.split(' ')
return int(w), int(h)

View File

@ -2,119 +2,214 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Tuple, Union
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab, WebPageTab
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._pages.web_page import WebPage
class ElementRect(object):
_ele: ChromiumElement = ...
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
"""
:param ele: ChromiumElement对象
"""
...
@property
def size(self) -> Tuple[float, float]: ...
def corners(self) -> Tuple[Tuple[float, float], ...]:
"""返回元素四个角坐标顺序左上、右上、右下、左下没有大小的元素抛出NoRectError"""
...
@property
def location(self) -> Tuple[float, float]: ...
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]:
"""返回元素四个角视口坐标顺序左上、右上、右下、左下没有大小的元素抛出NoRectError"""
...
@property
def midpoint(self) -> Tuple[float, float]: ...
def size(self) -> Tuple[float, float]:
"""返回元素大小,格式(宽, 高)"""
...
@property
def click_point(self) -> Tuple[float, float]: ...
def location(self) -> Tuple[float, float]:
"""返回元素左上角的绝对坐标"""
...
@property
def viewport_location(self) -> Tuple[float, float]: ...
def midpoint(self) -> Tuple[float, float]:
"""返回元素中间点的绝对坐标"""
...
@property
def viewport_midpoint(self) -> Tuple[float, float]: ...
def click_point(self) -> Tuple[float, float]:
"""返回元素接受点击的点的绝对坐标"""
...
@property
def viewport_click_point(self) -> Tuple[float, float]: ...
def viewport_location(self) -> Tuple[float, float]:
"""返回元素左上角在视口中的坐标"""
...
@property
def screen_location(self) -> Tuple[float, float]: ...
def viewport_midpoint(self) -> Tuple[float, float]:
"""返回元素中间点在视口中的坐标"""
...
@property
def screen_midpoint(self) -> Tuple[float, float]: ...
def viewport_click_point(self) -> Tuple[float, float]:
"""返回元素接受点击的点视口坐标"""
...
@property
def screen_click_point(self) -> Tuple[float, float]: ...
def screen_location(self) -> Tuple[float, float]:
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
...
@property
def corners(self) -> Tuple[Tuple[float, float], ...]: ...
def screen_midpoint(self) -> Tuple[float, float]:
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
...
@property
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ...
def screen_click_point(self) -> Tuple[float, float]:
"""返回元素中点在屏幕上坐标,左上角为(0, 0)"""
...
def _get_viewport_rect(self, quad: str) -> Union[list, None]: ...
@property
def scroll_position(self) -> Tuple[float, float]:
"""返回滚动条位置,格式:(x, y)"""
...
def _get_page_coord(self, x: float, y: float) -> Tuple[float, float]: ...
def _get_viewport_rect(self, quad: str) -> Union[list, None]:
"""按照类型返回在可视窗口中的范围
:param quad: 方框类型margin border padding
:return: 四个角坐标
"""
...
def _get_page_coord(self, x: float, y: float) -> Tuple[float, float]:
"""根据视口坐标获取绝对坐标
:param x: 视口x坐标
:param y: 视口y坐标
:return: 坐标元组
"""
...
class TabRect(object):
def __init__(self, owner: ChromiumBase):
self._owner: Union[ChromiumPage, ChromiumTab, WebPage, WebPageTab] = ...
"""
:param owner: Page对象和Tab对象
"""
self._owner: Union[ChromiumPage, ChromiumTab, WebPage, MixTab] = ...
@property
def window_state(self) -> str: ...
def window_state(self) -> str:
"""返回窗口状态normal、fullscreen、maximized、minimized"""
...
@property
def window_location(self) -> Tuple[int, int]: ...
def window_location(self) -> Tuple[int, int]:
"""返回窗口在屏幕上的坐标,左上角为(0, 0)"""
...
@property
def page_location(self) -> Tuple[int, int]: ...
def window_size(self) -> Tuple[int, int]:
"""返回窗口大小"""
...
@property
def viewport_location(self) -> Tuple[int, int]: ...
def page_location(self) -> Tuple[int, int]:
"""返回页面左上角在屏幕中坐标,左上角为(0, 0)"""
...
@property
def window_size(self) -> Tuple[int, int]: ...
def viewport_location(self) -> Tuple[int, int]:
"""返回视口在屏幕中坐标,左上角为(0, 0)"""
...
@property
def size(self) -> Tuple[int, int]: ...
def size(self) -> Tuple[int, int]:
"""返回页面总宽高,格式:(宽, 高)"""
...
@property
def viewport_size(self) -> Tuple[int, int]: ...
def viewport_size(self) -> Tuple[int, int]:
"""返回视口宽高,不包括滚动条,格式:(宽, 高)"""
...
@property
def viewport_size_with_scrollbar(self) -> Tuple[int, int]: ...
def viewport_size_with_scrollbar(self) -> Tuple[int, int]:
"""返回视口宽高,包括滚动条,格式:(宽, 高)"""
...
def _get_page_rect(self) -> dict: ...
@property
def scroll_position(self) -> Tuple[int, int]:
"""返回滚动条位置,格式:(x, y)"""
...
def _get_window_rect(self) -> dict: ...
def _get_page_rect(self) -> dict:
"""获取页面范围信息"""
...
def _get_window_rect(self) -> dict:
"""获取窗口范围信息"""
...
class FrameRect(object):
_frame: ChromiumFrame = ...
def __init__(self, frame: ChromiumFrame):
self._frame: ChromiumFrame = ...
"""
:param frame: ChromiumFrame对象
"""
...
@property
def location(self) -> Tuple[float, float]: ...
def location(self) -> Tuple[float, float]:
"""返回iframe元素左上角的绝对坐标"""
...
@property
def viewport_location(self) -> Tuple[float, float]: ...
def viewport_location(self) -> Tuple[float, float]:
"""返回元素在视口中坐标,左上角为(0, 0)"""
...
@property
def screen_location(self) -> Tuple[float, float]: ...
def screen_location(self) -> Tuple[float, float]:
"""返回元素左上角在屏幕上坐标,左上角为(0, 0)"""
...
@property
def size(self) -> Tuple[float, float]: ...
def size(self) -> Tuple[float, float]:
"""返回frame内页面尺寸格式(宽, 高)"""
...
@property
def viewport_size(self) -> Tuple[float, float]: ...
def viewport_size(self) -> Tuple[float, float]:
"""返回视口宽高,格式:(宽, 高)"""
...
@property
def corners(self) -> Tuple[Tuple[float, float], ...]: ...
def corners(self) -> Tuple[Tuple[float, float], ...]:
"""返回元素四个角坐标,顺序:左上、右上、右下、左下"""
...
@property
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]: ...
def viewport_corners(self) -> Tuple[Tuple[float, float], ...]:
"""返回元素四个角视口坐标,顺序:左上、右上、右下、左下"""
...
@property
def scroll_position(self) -> Tuple[float, float]:
"""返回滚动条位置,格式:(x, y)"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from base64 import b64decode
from os.path import sep
@ -26,29 +25,24 @@ class Screencast(object):
@property
def set_mode(self):
"""返回用于设置录屏幕式的对象"""
return ScreencastMode(self)
def start(self, save_path=None):
"""开始录屏
:param save_path: 录屏保存位置
:return: None
"""
self.set_save_path(save_path)
if self._path is None:
raise ValueError('save_path必须设置。')
if self._mode in ('frugal_video', 'video'):
if self._owner.browser.page._chromium_options.tmp_path:
if self._owner.browser._chromium_options.tmp_path:
self._tmp_path = Path(
self._owner.browser.page._chromium_options.tmp_path) / f'screencast_tmp_{time()}_{randint(0, 100)}'
self._owner.browser._chromium_options.tmp_path) / f'screencast_tmp_{time()}_{randint(0, 100)}'
else:
self._tmp_path = Path(gettempdir()) / 'DrissionPage' / f'screencast_tmp_{time()}_{randint(0, 100)}'
self._tmp_path.mkdir(parents=True, exist_ok=True)
if self._mode.startswith('frugal'):
self._owner.driver.set_callback('Page.screencastFrame', self._onScreencastFrame)
self._owner.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100)
self._owner._run_cdp('Page.startScreencast', everyNthFrame=1, quality=100)
elif not self._mode.startswith('js'):
self._running = True
@ -63,7 +57,7 @@ class Screencast(object):
? "video/webm; codecs=vp9"
: "video/webm"
mediaRecorder = new MediaRecorder(stream, {mimeType: mime})
DrissionPage_Screencast_chunks = []
DrissionPage_Screencast_chunks = [];
mediaRecorder.addEventListener('dataavailable', function(e) {
DrissionPage_Screencast_blob_ok = false;
DrissionPage_Screencast_chunks.push(e.data);
@ -79,39 +73,38 @@ class Screencast(object):
}
'''
print('请手动选择要录制的目标。')
self._owner.run_js('var DrissionPage_Screencast_blob;var DrissionPage_Screencast_blob_ok=false;')
self._owner.run_js(js)
self._owner._run_js('var DrissionPage_Screencast_blob;var DrissionPage_Screencast_blob_ok=false;')
self._owner._run_js(js)
print('开始录制')
def stop(self, video_name=None):
"""停止录屏
:param video_name: 视频文件名为None时以当前时间名命
:return: 文件路径
"""
if video_name and not video_name.endswith('mp4'):
video_name = f'{video_name}.mp4'
name = f'{time()}.mp4' if not video_name else video_name
path = f'{self._path}{sep}{name}'
if self._mode.startswith('js'):
self._owner.run_js('mediaRecorder.stop();', as_expr=True)
while not self._owner.run_js('return DrissionPage_Screencast_blob_ok;'):
sleep(.1)
blob = self._owner.run_js('return DrissionPage_Screencast_blob;')
uuid = self._owner.run_cdp('IO.resolveBlob', objectId=blob['result']['objectId'])['uuid']
data = self._owner.run_cdp('IO.read', handle=f'blob:{uuid}')['data']
self._owner._run_js('mediaRecorder.stop();', as_expr=True)
while not self._owner._run_js('return DrissionPage_Screencast_blob_ok;'):
sleep(.05)
with open(path, 'wb') as f:
f.write(b64decode(data))
f.write(b64decode(self._owner._run_js('return DrissionPage_Screencast_blob;')))
self._owner._run_js('DrissionPage_Screencast_blob_ok = false;'
'DrissionPage_Screencast_chunks = [];'
'DrissionPage_Screencast_blob = null', as_expr=True)
print('停止录制')
return path
if self._mode.startswith('frugal'):
self._owner.driver.set_callback('Page.screencastFrame', None)
self._owner.run_cdp('Page.stopScreencast')
self._owner._run_cdp('Page.stopScreencast')
else:
self._enable = False
while self._running:
sleep(.1)
sleep(.01)
if self._mode.endswith('imgs'):
print('停止录制')
return str(Path(self._path).absolute())
if not str(self._path).isascii():
@ -128,7 +121,7 @@ class Screencast(object):
imgInfo = img.shape
size = (imgInfo[1], imgInfo[0])
videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size)
videoWrite = VideoWriter(path, VideoWriter_fourcc(*"H264"), 5, size)
for i in pic_list:
img = imread(str(i))
@ -136,13 +129,10 @@ class Screencast(object):
rmtree(self._tmp_path)
self._tmp_path = None
print('停止录制')
return f'{self._path}{sep}{name}'
def set_save_path(self, save_path=None):
"""设置保存路径
:param save_path: 保存路径
:return: None
"""
if save_path:
save_path = Path(save_path)
if save_path.exists() and save_path.is_file():
@ -151,7 +141,6 @@ class Screencast(object):
self._path = save_path
def _run(self):
"""非节俭模式运行方法"""
self._running = True
path = self._tmp_path or self._path
while self._enable:
@ -160,11 +149,10 @@ class Screencast(object):
self._running = False
def _onScreencastFrame(self, **kwargs):
"""节俭模式运行方法"""
path = self._tmp_path or self._path
with open(f'{path}{sep}{kwargs["metadata"]["timestamp"]}.jpg', 'wb') as f:
f.write(b64decode(kwargs['data']))
self._owner.run_cdp('Page.screencastFrameAck', sessionId=kwargs['sessionId'])
self._owner._run_cdp('Page.screencastFrameAck', sessionId=kwargs['sessionId'])
class ScreencastMode(object):
@ -172,21 +160,16 @@ class ScreencastMode(object):
self._screencast = screencast
def video_mode(self):
"""持续视频模式,生成的视频没有声音"""
self._screencast._mode = 'video'
def frugal_video_mode(self):
"""设置节俭视频模式,页面有变化时才录制,生成的视频没有声音"""
self._screencast._mode = 'frugal_video'
def js_video_mode(self):
"""设置使用js录制视频模式可生成有声音的视频但需要手动启动"""
self._screencast._mode = 'js_video'
def frugal_imgs_mode(self):
"""设置节俭视频模式,页面有变化时才截图"""
self._screencast._mode = 'frugal_imgs'
def imgs_mode(self):
"""设置图片模式,持续对页面进行截图"""
self._screencast._mode = 'imgs'

View File

@ -2,48 +2,87 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union
from typing import Union, Optional
from .._pages.chromium_base import ChromiumBase
class Screencast(object):
_owner: ChromiumBase = ...
_path: Optional[Path] = ...
_tmp_path: Optional[Path] = ...
_running: bool = ...
_enable: bool = ...
_mode: str = ...
def __init__(self, owner: ChromiumBase):
self._owner: ChromiumBase = ...
self._path: Path = ...
self._tmp_path: Path = ...
self._running: bool = ...
self._enable: bool = ...
self._mode: str = ...
"""
:param owner: 页面对象
"""
@property
def set_mode(self) -> ScreencastMode: ...
def set_mode(self) -> ScreencastMode:
"""返回用于设置录屏幕式的对象"""
...
def start(self, save_path: Union[str, Path] = None) -> None: ...
def start(self, save_path: Union[str, Path] = None) -> None:
"""开始录屏
:param save_path: 录屏保存位置
:return: None
"""
...
def stop(self, video_name: str = None) -> str: ...
def stop(self, video_name: str = None) -> str:
"""停止录屏
:param video_name: 视频文件名为None时以当前时间名命
:return: 文件路径
"""
...
def set_save_path(self, save_path: Union[str, Path] = None) -> None: ...
def set_save_path(self, save_path: Union[str, Path] = None) -> None:
"""设置保存路径
:param save_path: 保存路径
:return: None
"""
...
def _run(self) -> None: ...
def _run(self) -> None:
"""非节俭模式运行方法"""
...
def _onScreencastFrame(self, **kwargs) -> None: ...
def _onScreencastFrame(self, **kwargs) -> None:
"""节俭模式运行方法"""
...
class ScreencastMode(object):
_screencast: Screencast = ...
def __init__(self, screencast: Screencast):
self._screencast: Screencast = ...
"""
:param screencast: Screencast对象
"""
...
def video_mode(self) -> None: ...
def video_mode(self) -> None:
"""持续视频模式,生成的视频没有声音"""
...
def frugal_video_mode(self) -> None: ...
def frugal_video_mode(self) -> None:
"""设置节俭视频模式,页面有变化时才录制,生成的视频没有声音"""
...
def js_video_mode(self) -> None: ...
def js_video_mode(self) -> None:
"""设置使用js录制视频模式可生成有声音的视频但需要手动启动"""
...
def frugal_imgs_mode(self) -> None: ...
def frugal_imgs_mode(self) -> None:
"""设置节俭视频模式,页面有变化时才截图"""
...
def imgs_mode(self) -> None: ...
def imgs_mode(self) -> None:
"""设置图片模式,持续对页面进行截图"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from time import sleep, perf_counter
@ -11,91 +10,74 @@ from time import sleep, perf_counter
class Scroller(object):
"""用于滚动的对象"""
def __init__(self, ele):
"""
:param ele: 元素对象
"""
self._driver = ele
self.t1 = self.t2 = 'this'
def __init__(self, owner):
self._owner = owner
self._t1 = self._t2 = 'this'
self._wait_complete = False
def __call__(self, pixel=300):
return self.down(pixel)
def _run_js(self, js):
js = js.format(self.t1, self.t2, self.t2)
self._driver.run_js(js)
js = js.format(self._t1, self._t2, self._t2)
self._owner._run_js(js)
self._wait_scrolled()
def to_top(self):
"""滚动到顶端,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, 0);')
return self._owner
def to_bottom(self):
"""滚动到底端,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight);')
return self._owner
def to_half(self):
"""滚动到垂直中间位置,水平位置不变"""
self._run_js('{}.scrollTo({}.scrollLeft, {}.scrollHeight/2);')
return self._owner
def to_rightmost(self):
"""滚动到最右边,垂直位置不变"""
self._run_js('{}.scrollTo({}.scrollWidth, {}.scrollTop);')
return self._owner
def to_leftmost(self):
"""滚动到最左边,垂直位置不变"""
self._run_js('{}.scrollTo(0, {}.scrollTop);')
return self._owner
def to_location(self, x, y):
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
self._run_js(f'{{}}.scrollTo({x}, {y});')
return self._owner
def up(self, pixel=300):
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
pixel = -pixel
self._run_js(f'{{}}.scrollBy(0, {pixel});')
return self._owner
def down(self, pixel=300):
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
self._run_js(f'{{}}.scrollBy(0, {pixel});')
return self._owner
def left(self, pixel=300):
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
pixel = -pixel
self._run_js(f'{{}}.scrollBy({pixel}, 0);')
return self._owner
def right(self, pixel=300):
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
self._run_js(f'{{}}.scrollBy({pixel}, 0);')
return self._owner
def _wait_scrolled(self):
"""等待滚动结束"""
if not self._wait_complete:
return
owner = self._driver.owner if self._driver._type == 'ChromiumElement' else self._driver
r = owner.run_cdp('Page.getLayoutMetrics')
owner = self._owner.owner if self._owner._type == 'ChromiumElement' else self._owner
r = owner._run_cdp('Page.getLayoutMetrics')
x = r['layoutViewport']['pageX']
y = r['layoutViewport']['pageY']
end_time = perf_counter() + owner.timeout
while perf_counter() < end_time:
sleep(.1)
r = owner.run_cdp('Page.getLayoutMetrics')
sleep(.02)
r = owner._run_cdp('Page.getLayoutMetrics')
x1 = r['layoutViewport']['pageX']
y1 = r['layoutViewport']['pageY']
@ -108,45 +90,30 @@ class Scroller(object):
class ElementScroller(Scroller):
def to_see(self, center=None):
"""滚动页面直到元素可见
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
self._driver.owner.scroll.to_see(self._driver, center=center)
self._owner.owner.scroll.to_see(self._owner, center=center)
return self._owner
def to_center(self):
"""元素尽量滚动到视口中间"""
self._driver.owner.scroll.to_see(self._driver, center=True)
self._owner.owner.scroll.to_see(self._owner, center=True)
return self._owner
class PageScroller(Scroller):
def __init__(self, owner):
"""
:param owner: 页面对象
"""
super().__init__(owner)
self.t1 = 'window'
self.t2 = 'document.documentElement'
self._t1 = 'window'
self._t2 = 'document.documentElement'
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = self._driver._ele(loc_or_ele)
ele = self._owner._ele(loc_or_ele)
self._to_see(ele, center)
return self._owner
def _to_see(self, ele, center):
"""执行滚动页面直到元素可见
:param ele: 元素对象
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
txt = 'true' if center else 'false'
ele.run_js(f'this.scrollIntoViewIfNeeded({txt});')
ele._run_js(f'this.scrollIntoViewIfNeeded({txt});')
if center or (center is not False and ele.states.is_covered):
ele.run_js('''function getWindowScrollTop() {let scroll_top = 0;
ele._run_js('''function getWindowScrollTop() {let scroll_top = 0;
if (document.documentElement && document.documentElement.scrollTop) {
scroll_top = document.documentElement.scrollTop;
} else if (document.body) {scroll_top = document.body.scrollTop;}
@ -160,18 +127,14 @@ class PageScroller(Scroller):
class FrameScroller(PageScroller):
def __init__(self, frame):
def __init__(self, owner):
"""
:param frame: ChromiumFrame对象
:param owner: ChromiumFrame对象
"""
super().__init__(frame.doc_ele)
self.t1 = self.t2 = 'this.documentElement'
super().__init__(owner.doc_ele)
self._t1 = self._t2 = 'this.documentElement'
def to_see(self, loc_or_ele, center=None):
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = loc_or_ele if loc_or_ele._type == 'ChromiumElement' else self._driver._ele(loc_or_ele)
ele = self._owner._ele(loc_or_ele)
self._to_see(ele, center)
return self._owner

View File

@ -2,76 +2,329 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._pages.web_page import WebPage
class Scroller(object):
def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement]):
self.t1: str = ...
self.t2: str = ...
self._driver: Union[ChromiumBase, ChromiumElement] = ...
self._wait_complete: bool = ...
_owner: Union[ChromiumBase, ChromiumElement] = ...
_t1: str = ...
_t2: str = ...
_wait_complete: bool = ...
def __init__(self, owner: Union[ChromiumBase, ChromiumElement]):
"""
:param owner: 元素对象或页面对象
"""
...
def __call__(self, pixel: int = 300) -> None:
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def to_top(self) -> None:
"""滚动到顶端,水平位置不变"""
...
def to_bottom(self) -> None:
"""滚动到底端,水平位置不变"""
...
def to_half(self) -> None:
"""滚动到垂直中间位置,水平位置不变"""
...
def to_rightmost(self) -> None:
"""滚动到最右边,垂直位置不变"""
...
def to_leftmost(self) -> None:
"""滚动到最左边,垂直位置不变"""
...
def to_location(self, x: int, y: int) -> None:
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
...
def up(self, pixel: int = 300) -> None:
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def down(self, pixel: int = 300) -> None:
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def left(self, pixel: int = 300) -> None:
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def right(self, pixel: int = 300) -> None:
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def _run_js(self, js: str): ...
def to_top(self) -> None: ...
def to_bottom(self) -> None: ...
def to_half(self) -> None: ...
def to_rightmost(self) -> None: ...
def to_leftmost(self) -> None: ...
def to_location(self, x: int, y: int) -> None: ...
def up(self, pixel: int = 300) -> None: ...
def down(self, pixel: int = 300) -> None: ...
def left(self, pixel: int = 300) -> None: ...
def right(self, pixel: int = 300) -> None: ...
def _wait_scrolled(self) -> None: ...
def _wait_scrolled(self) -> None:
"""等待滚动结束"""
...
class ElementScroller(Scroller):
_owner: ChromiumElement = ...
def to_see(self, center: Union[bool, None] = None) -> None: ...
def __init__(self, owner: ChromiumElement):
"""
:param owner: 元素对象
"""
...
def to_center(self) -> None: ...
def to_see(self, center: Union[bool, None] = None) -> ChromiumElement:
"""滚动页面直到元素可见
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
...
def to_center(self) -> ChromiumElement:
"""元素尽量滚动到视口中间"""
...
def to_top(self) -> ChromiumElement:
"""滚动到顶端,水平位置不变"""
...
def to_bottom(self) -> ChromiumElement:
"""滚动到底端,水平位置不变"""
...
def to_half(self) -> ChromiumElement:
"""滚动到垂直中间位置,水平位置不变"""
...
def to_rightmost(self) -> ChromiumElement:
"""滚动到最右边,垂直位置不变"""
...
def to_leftmost(self) -> ChromiumElement:
"""滚动到最左边,垂直位置不变"""
...
def to_location(self, x: int, y: int) -> ChromiumElement:
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
...
def up(self, pixel: int = 300) -> ChromiumElement:
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def down(self, pixel: int = 300) -> ChromiumElement:
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def left(self, pixel: int = 300) -> ChromiumElement:
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def right(self, pixel: int = 300) -> ChromiumElement:
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
class PageScroller(Scroller):
def __init__(self, owner: ChromiumBase): ...
_owner: Union[ChromiumBase, ChromiumElement] = ...
def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ...
def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ...
class FrameScroller(PageScroller):
def __init__(self, frame):
def __init__(self, owner: Union[ChromiumBase, ChromiumElement]):
"""
:param frame: ChromiumFrame对象
:param owner: 页面对象
"""
self._driver = frame.doc_ele
self.t1 = self.t2 = 'this.documentElement'
self._wait_complete = False
...
def to_see(self, loc_or_ele, center=None):
def to_see(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
center: Union[bool, None] = None) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele)
self._to_see(ele, center)
...
def to_top(self) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到顶端,水平位置不变"""
...
def to_bottom(self) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到底端,水平位置不变"""
...
def to_half(self) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到垂直中间位置,水平位置不变"""
...
def to_rightmost(self) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到最右边,垂直位置不变"""
...
def to_leftmost(self) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到最左边,垂直位置不变"""
...
def to_location(self, x: int, y: int) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
...
def up(self, pixel: int = 300) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def down(self, pixel: int = 300) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def left(self, pixel: int = 300) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def right(self, pixel: int = 300) -> Union[ChromiumTab, MixTab, ChromiumPage, WebPage]:
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None:
"""执行滚动页面直到元素可见
:param ele: 元素对象
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
...
class FrameScroller(PageScroller):
_owner: ChromiumElement = ...
def __init__(self, owner: ChromiumFrame): ...
def to_see(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
center: Union[bool, None] = None) -> ChromiumFrame:
"""滚动页面直到元素可见
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param center: 是否尽量滚动到页面正中为None时如果被遮挡则滚动到页面正中
:return: None
"""
...
def to_top(self) -> ChromiumFrame:
"""滚动到顶端,水平位置不变"""
...
def to_bottom(self) -> ChromiumFrame:
"""滚动到底端,水平位置不变"""
...
def to_half(self) -> ChromiumFrame:
"""滚动到垂直中间位置,水平位置不变"""
...
def to_rightmost(self) -> ChromiumFrame:
"""滚动到最右边,垂直位置不变"""
...
def to_leftmost(self) -> ChromiumFrame:
"""滚动到最左边,垂直位置不变"""
...
def to_location(self, x: int, y: int) -> ChromiumFrame:
"""滚动到指定位置
:param x: 水平距离
:param y: 垂直距离
:return: None
"""
...
def up(self, pixel: int = 300) -> ChromiumFrame:
"""向上滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def down(self, pixel: int = 300) -> ChromiumFrame:
"""向下滚动若干像素,水平位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def left(self, pixel: int = 300) -> ChromiumFrame:
"""向左滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...
def right(self, pixel: int = 300) -> ChromiumFrame:
"""向右滚动若干像素,垂直位置不变
:param pixel: 滚动的像素
:return: None
"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from time import perf_counter, sleep
@ -12,181 +11,101 @@ class SelectElement(object):
"""用于处理 select 标签"""
def __init__(self, ele):
"""
:param ele: select 元素对象
"""
if ele.tag != 'select':
raise TypeError("select方法只能在<select>元素使用。")
self._ele = ele
def __call__(self, text_or_index, timeout=None):
"""选定下拉列表中子元素
:param text_or_index: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param timeout: 超时时间不输入默认实用页面超时时间
:return: None
"""
para_type = 'index' if isinstance(text_or_index, int) else 'text'
timeout = timeout if timeout is not None else self._ele.owner.timeout
if timeout is None:
timeout = self._ele.timeout
return self._select(text_or_index, para_type, timeout=timeout)
@property
def is_multi(self):
"""返回是否多选表单"""
return self._ele.attr('multiple') is not None
@property
def options(self):
"""返回所有选项元素组成的列表"""
return [i for i in self._ele.eles('xpath://option') if not isinstance(i, int)]
@property
def selected_option(self):
"""返回第一个被选中的option元素
:return: ChromiumElement对象或None
"""
ele = self._ele.run_js('return this.options[this.selectedIndex];')
return ele
return self._ele._run_js('return this.options[this.selectedIndex];')
@property
def selected_options(self):
"""返回所有被选中的option元素列表
:return: ChromiumElement对象组成的列表
"""
return [x for x in self.options if x.states.is_selected]
def all(self):
"""全选"""
if not self.is_multi:
raise TypeError("只能在多选菜单执行此操作。")
return self._by_loc('tag:option', 1, False)
def invert(self):
"""反选"""
if not self.is_multi:
raise TypeError("只能对多项选框执行反选。")
change = False
for i in self.options:
change = True
mode = 'false' if i.states.is_selected else 'true'
i.run_js(f'this.selected={mode};')
i._run_js(f'this.selected={mode};')
if change:
self._dispatch_change()
return self._ele
def clear(self):
"""清除所有已选项"""
if not self.is_multi:
raise TypeError("只能在多选菜单执行此操作。")
return self._by_loc('tag:option', 1, True)
def by_text(self, text, timeout=None):
"""此方法用于根据text值选择项。当元素是多选列表时可以接收list或tuple
:param text: text属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(text, 'text', False, timeout)
def by_value(self, value, timeout=None):
"""此方法用于根据value值选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(value, 'value', False, timeout)
def by_index(self, index, timeout=None):
"""此方法用于根据index值选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号从1开始可传入负数获取倒数第几个传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: 是否选择成功
"""
return self._select(index, 'index', False, timeout)
def by_locator(self, locator, timeout=None):
"""用定位符选择指定的项
:param locator: 定位符
:param timeout: 超时时间
:return: 是否选择成功
"""
return self._by_loc(locator, timeout)
def by_option(self, option):
"""选中单个或多个option元素
:param option: option元素或它们组成的列表
:return: None
"""
self._select_options(option, 'true')
return self._select_options(option, 'true')
def cancel_by_text(self, text, timeout=None):
"""此方法用于根据text值取消选择项。当元素是多选列表时可以接收list或tuple
:param text: 文本传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(text, 'text', True, timeout)
def cancel_by_value(self, value, timeout=None):
"""此方法用于根据value值取消选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(value, 'value', True, timeout)
def cancel_by_index(self, index, timeout=None):
"""此方法用于根据index值取消选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号从1开始可传入负数获取倒数第几个传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: 是否取消成功
"""
return self._select(index, 'index', True, timeout)
def cancel_by_locator(self, locator, timeout=None):
"""用定位符取消选择指定的项
:param locator: 定位符
:param timeout: 超时时间
:return: 是否选择成功
"""
return self._by_loc(locator, timeout, True)
def cancel_by_option(self, option):
"""取消选中单个或多个option元素
:param option: option元素或它们组成的列表
:return: None
"""
self._select_options(option, 'false')
return self._select_options(option, 'false')
def _by_loc(self, loc, timeout=None, cancel=False):
"""用定位符取消选择指定的项
:param loc: 定位符
:param timeout: 超时时间
:param cancel: 是否取消选择
:return: 是否选择成功
"""
eles = self._ele.eles(loc, timeout)
if not eles:
return False
raise RuntimeError('没有找到指定选项。')
mode = 'false' if cancel else 'true'
if self.is_multi:
self._select_options(eles, mode)
else:
self._select_options(eles[0], mode)
return True
if not self.is_multi:
eles = eles[0]
return self._select_options(eles, mode)
def _select(self, condition, para_type='text', cancel=False, timeout=None):
"""选定或取消选定下拉列表中子元素
:param condition: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param para_type: 参数类型可选 'text''value''index'
:param cancel: 是否取消选择
:return: 是否选择成功
"""
if not self.is_multi and isinstance(condition, (list, tuple)):
raise TypeError('单选列表只能传入str格式。')
mode = 'false' if cancel else 'true'
timeout = timeout if timeout is not None else self._ele.owner.timeout
if timeout is None:
timeout = self._ele.timeout
condition = set(condition) if isinstance(condition, (list, tuple)) else {condition}
if para_type in ('text', 'value'):
@ -195,14 +114,6 @@ class SelectElement(object):
return self._index(condition, mode, timeout)
def _text_value(self, condition, para_type, mode, timeout):
"""执行text和value搜索
:param condition: 条件set
:param para_type: 参数类型可选 'text''value'
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: 是否选择成功
"""
ok = False
text_len = len(condition)
eles = []
end_time = perf_counter() + timeout
@ -213,57 +124,34 @@ class SelectElement(object):
eles = [i for i in self.options if i.attr('value') in condition]
if len(eles) >= text_len:
ok = True
break
return self._select_options(eles, mode)
sleep(.01)
if ok:
self._select_options(eles, mode)
return True
return False
raise RuntimeError('没有找到指定选项。')
def _index(self, condition, mode, timeout):
"""执行index搜索
:param condition: 条件set
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: 是否选择成功
"""
ok = False
condition = [int(i) for i in condition]
text_len = abs(max(condition, key=abs))
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if len(self.options) >= text_len:
ok = True
break
eles = self.options
eles = [eles[i - 1] if i > 0 else eles[i] for i in condition]
return self._select_options(eles, mode)
sleep(.01)
if ok:
eles = self.options
eles = [eles[i - 1] if i > 0 else eles[i] for i in condition]
self._select_options(eles, mode)
return True
return False
raise RuntimeError('没有找到指定选项。')
def _select_options(self, option, mode):
"""选中或取消某个选项
:param option: options元素对象
:param mode: 选中还是取消
:return: None
"""
if isinstance(option, (list, tuple, set)):
if not self.is_multi and len(option) > 1:
option = option[:1]
for o in option:
o.run_js(f'this.selected={mode};')
o._run_js(f'this.selected={mode};')
self._dispatch_change()
else:
option.run_js(f'this.selected={mode};')
option._run_js(f'this.selected={mode};')
self._dispatch_change()
return self._ele
def _dispatch_change(self):
"""触发修改动作"""
self._ele.run_js('this.dispatchEvent(new CustomEvent("change", {bubbles: true}));')
self._ele._run_js('this.dispatchEvent(new CustomEvent("change", {bubbles: true}));')

View File

@ -2,72 +2,218 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, Tuple, List
from typing import Union, Tuple, List, Optional
from .._elements.chromium_element import ChromiumElement
class SelectElement(object):
_ele: ChromiumElement = ...
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
"""
:param ele: <select>元素对象
"""
...
def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ...
def __call__(self,
text_or_index: Union[str, int, list, tuple],
timeout: float = None) -> ChromiumElement:
"""选定下拉列表中子元素
:param text_or_index: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param timeout: 超时时间不输入默认实用页面超时时间
:return: <select>元素对象
"""
...
@property
def is_multi(self) -> bool: ...
def is_multi(self) -> bool:
"""返回是否多选表单"""
...
@property
def options(self) -> List[ChromiumElement]: ...
def options(self) -> List[ChromiumElement]:
"""返回所有选项元素组成的列表"""
...
@property
def selected_option(self) -> Union[ChromiumElement, None]: ...
def selected_option(self) -> Optional[ChromiumElement]:
"""返回第一个被选中的<option>元素"""
...
@property
def selected_options(self) -> List[ChromiumElement]: ...
def selected_options(self) -> List[ChromiumElement]:
"""返回所有被选中的<option>元素列表"""
...
def clear(self) -> None: ...
def all(self) -> ChromiumElement:
"""全选"""
...
def all(self) -> None: ...
def invert(self) -> ChromiumElement:
"""反选"""
...
def by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def clear(self) -> ChromiumElement:
"""清除所有已选项"""
...
def by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_text(self,
text: Union[str, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据text值选择项。当元素是多选列表时可以接收list或tuple
:param text: text属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: <select>元素对象
"""
...
def by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def by_value(self,
value: Union[str, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据value值选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: <select>元素对象
"""
...
def by_locator(self, locator: Union[Tuple[str, str], str], timeout: float = None) -> bool: ...
def by_index(self,
index: Union[int, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据index值选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号从1开始可传入负数获取倒数第几个传入list或tuple可选择多项
:param timeout: 超时时间为None默认使用页面超时时间
:return: <select>元素对象
"""
...
def by_option(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ...
def by_locator(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> ChromiumElement:
"""用定位符选择指定的项
:param locator: 定位符
:param timeout: 超时时间
:return: <select>元素对象
"""
...
def cancel_by_text(self, text: Union[str, list, tuple], timeout: float = None) -> bool: ...
def by_option(self,
option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> ChromiumElement:
"""选中单个或多个<option>元素
:param option: <option>元素或它们组成的列表
:return: <select>元素对象
"""
...
def cancel_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_text(self,
text: Union[str, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据text值取消选择项。当元素是多选列表时可以接收list或tuple
:param text: 文本传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: <select>元素对象
"""
...
def cancel_by_index(self, index: Union[int, list, tuple], timeout: float = None) -> bool: ...
def cancel_by_value(self,
value: Union[str, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据value值取消选择项。当元素是多选列表时可以接收list或tuple
:param value: value属性值传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: <select>元素对象
"""
...
def cancel_by_locator(self, locator: Union[Tuple[str, str], str], timeout: float = None) -> bool: ...
def cancel_by_index(self,
index: Union[int, list, tuple],
timeout: float = None) -> ChromiumElement:
"""此方法用于根据index值取消选择项。当元素是多选列表时可以接收list或tuple
:param index: 序号从1开始可传入负数获取倒数第几个传入list或tuple可取消多项
:param timeout: 超时时间不输入默认实用页面超时时间
:return: <select>元素对象
"""
...
def cancel_by_locator(self,
locator: Union[Tuple[str, str], str],
timeout: float = None) -> ChromiumElement:
"""用定位符取消选择指定的项
:param locator: 定位符
:param timeout: 超时时间
:return: <select>元素对象
"""
...
def cancel_by_option(self,
option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]]) -> None: ...
option: Union[ChromiumElement, List[ChromiumElement],
Tuple[ChromiumElement]]) -> ChromiumElement:
"""取消选中单个或多个<option>元素
:param option: <option>元素或它们组成的列表
:return: <select>元素对象
"""
...
def invert(self) -> None: ...
def _by_loc(self, loc: Union[str, Tuple[str, str]], timeout: float = None, cancel: bool = False) -> bool: ...
def _by_loc(self,
loc: Union[str, Tuple[str, str]],
timeout: float = None,
cancel: bool = False) -> ChromiumElement:
"""用定位符取消选择指定的项
:param loc: 定位符
:param timeout: 超时时间
:param cancel: 是否取消选择
:return: <select>元素对象
"""
...
def _select(self,
condition: Union[str, int, list, tuple] = None,
para_type: str = 'text',
cancel: bool = False,
timeout: float = None) -> bool: ...
timeout: float = None) -> ChromiumElement:
"""选定或取消选定下拉列表中子元素
:param condition: 根据文本值选或序号择选项若允许多选传入list或tuple可多选
:param para_type: 参数类型可选 'text''value''index'
:param cancel: 是否取消选择
:return: <select>元素对象
"""
...
def _text_value(self, condition: Union[list, set], para_type: str, mode: str, timeout: float) -> bool: ...
def _text_value(self,
condition: Union[list, set],
para_type: str,
mode: str,
timeout: float) -> ChromiumElement:
"""执行text和value搜索
:param condition: 条件set
:param para_type: 参数类型可选 'text''value'
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: <select>元素对象
"""
...
def _index(self, condition: set, mode: str, timeout: float) -> bool: ...
def _index(self, condition: set, mode: str, timeout: float) -> ChromiumElement:
"""执行index搜索
:param condition: 条件set
:param mode: 'true' 'false'
:param timeout: 超时时间
:return: <select>元素对象
"""
...
def _select_options(self, option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]],
mode: str) -> None: ...
def _select_options(self,
option: Union[ChromiumElement, List[ChromiumElement], Tuple[ChromiumElement]],
mode: str) -> ChromiumElement:
"""选中或取消某个选项
:param option: options元素对象
:param mode: 选中还是取消
:return: <select>元素对象
"""
...
def _dispatch_change(self) -> None: ...
def _dispatch_change(self) -> None:
"""触发修改动作"""
...

View File

@ -2,82 +2,117 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from time import sleep
from requests.structures import CaseInsensitiveDict
from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter
from .._functions.settings import Settings
from .cookies_setter import (SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter, BrowserCookiesSetter,
MixTabCookiesSetter)
from .._functions.tools import show_or_hide_browser
from .._functions.web import format_headers
from ..errors import ElementLostError, JavaScriptError
class BasePageSetter(object):
class BaseSetter(object):
def __init__(self, owner):
"""
:param owner: BasePage对象
"""
self._owner = owner
def NoneElement_value(self, value=None, on_off=True):
"""设置空元素是否返回设定值
:param value: 返回的设定值
:param on_off: 是否启用
:return: None
"""
self._owner._none_ele_return_value = on_off
self._owner._none_ele_value = value
def retry_times(self, times):
self._owner.retry_times = times
class ChromiumBaseSetter(BasePageSetter):
def retry_interval(self, interval):
self._owner.retry_interval = interval
def download_path(self, path):
if path is None:
path = '.'
self._owner._download_path = str(Path(path).absolute())
class SessionPageSetter(BaseSetter):
def __init__(self, owner):
super().__init__(owner)
self._cookies_setter = None
@property
def cookies(self):
if self._cookies_setter is None:
self._cookies_setter = SessionCookiesSetter(self._owner)
return self._cookies_setter
def download_path(self, path):
super().download_path(path)
if self._owner._DownloadKit:
self._owner._DownloadKit.set.save_path(self._owner._download_path)
def timeout(self, second):
self._owner._timeout = second
def encoding(self, encoding, set_all=True):
if set_all:
self._owner._encoding = encoding if encoding else None
if self._owner.response:
self._owner.response.encoding = encoding
def headers(self, headers):
self._owner._headers = CaseInsensitiveDict(format_headers(headers))
def header(self, name, value):
self._owner._headers[name] = value
def user_agent(self, ua):
self._owner._headers['user-agent'] = ua
def proxies(self, http=None, https=None):
self._owner.session.proxies = {'http': http, 'https': https}
def auth(self, auth):
self._owner.session.auth = auth
def hooks(self, hooks):
self._owner.session.hooks = hooks
def params(self, params):
self._owner.session.params = params
def verify(self, on_off):
self._owner.session.verify = on_off
def cert(self, cert):
self._owner.session.cert = cert
def stream(self, on_off):
self._owner.session.stream = on_off
def trust_env(self, on_off):
self._owner.session.trust_env = on_off
def max_redirects(self, times):
self._owner.session.max_redirects = times
def add_adapter(self, url, adapter):
self._owner.session.mount(url, adapter)
class BrowserBaseSetter(BaseSetter):
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
super().__init__(owner)
self._cookies_setter = None
@property
def load_mode(self):
"""返回用于设置页面加载策略的对象"""
return LoadMode(self._owner)
@property
def scroll(self):
"""返回用于设置页面滚动设置的对象"""
return PageScrollSetter(self._owner.scroll)
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = CookiesSetter(self._owner)
return self._cookies_setter
def retry_times(self, times):
"""设置连接失败重连次数"""
self._owner.retry_times = times
def retry_interval(self, interval):
"""设置连接失败重连间隔"""
self._owner.retry_interval = interval
def timeouts(self, base=None, page_load=None, script=None, implicit=None):
"""设置超时时间,单位为秒
:param base: 基本等待时间除页面加载和脚本超时其它等待默认使用
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: None
"""
base = base if base is not None else implicit
def timeouts(self, base=None, page_load=None, script=None):
if base is not None:
self._owner.timeouts.base = base
self._owner._timeout = base
if page_load is not None:
self._owner.timeouts.page_load = page_load
@ -85,57 +120,82 @@ class ChromiumBaseSetter(BasePageSetter):
if script is not None:
self._owner.timeouts.script = script
class BrowserSetter(BrowserBaseSetter):
@property
def cookies(self):
if self._cookies_setter is None:
self._cookies_setter = BrowserCookiesSetter(self._owner)
return self._cookies_setter
def auto_handle_alert(self, on_off=True, accept=True):
self._owner._auto_handle_alert = None if on_off is None else accept if on_off else 'close'
def download_path(self, path):
super().download_path(path)
self._owner._dl_mgr.set_path('browser', self._owner._download_path)
def download_file_name(self, name=None, suffix=None):
self._owner._dl_mgr.set_rename('browser', name, suffix)
def when_download_file_exists(self, mode):
types = {'rename': 'rename', 'overwrite': 'overwrite', 'skip': 'skip', 'r': 'rename', 'o': 'overwrite',
's': 'skip'}
mode = types.get(mode, mode)
if mode not in types:
raise ValueError(f'''mode参数只能是 '{"', '".join(types.keys())}' 之一,现在是:{mode}''')
self._owner._dl_mgr.set_file_exists('browser', mode)
class ChromiumBaseSetter(BrowserBaseSetter):
@property
def scroll(self):
return PageScrollSetter(self._owner.scroll)
@property
def cookies(self):
if self._cookies_setter is None:
self._cookies_setter = CookiesSetter(self._owner)
return self._cookies_setter
def headers(self, headers):
self._owner._run_cdp('Network.enable')
self._owner._run_cdp('Network.setExtraHTTPHeaders', headers=format_headers(headers))
def user_agent(self, ua, platform=None):
"""为当前tab设置user agent只在当前tab有效
:param ua: user agent字符串
:param platform: platform字符串
:return: None
"""
keys = {'userAgent': ua}
if platform:
keys['platform'] = platform
self._owner.run_cdp('Emulation.setUserAgentOverride', **keys)
self._owner._run_cdp('Emulation.setUserAgentOverride', **keys)
def session_storage(self, item, value):
"""设置或删除某项sessionStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
self._owner.run_cdp_loaded('DOMStorage.enable')
i = self._owner.run_cdp('Storage.getStorageKeyForFrame', frameId=self._owner._frame_id)['storageKey']
self._owner._run_cdp_loaded('DOMStorage.enable')
i = self._owner._run_cdp('Storage.getStorageKeyForFrame', frameId=self._owner._frame_id)['storageKey']
if value is False:
self._owner.run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': False}, key=item)
self._owner._run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': False}, key=item)
else:
self._owner.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': False},
key=item, value=value)
self._owner.run_cdp_loaded('DOMStorage.disable')
self._owner._run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': False},
key=item, value=value)
self._owner._run_cdp_loaded('DOMStorage.disable')
def local_storage(self, item, value):
"""设置或删除某项localStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
self._owner.run_cdp_loaded('DOMStorage.enable')
i = self._owner.run_cdp('Storage.getStorageKeyForFrame', frameId=self._owner._frame_id)['storageKey']
self._owner._run_cdp_loaded('DOMStorage.enable')
i = self._owner._run_cdp('Storage.getStorageKeyForFrame', frameId=self._owner._frame_id)['storageKey']
if value is False:
self._owner.run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': True}, key=item)
self._owner._run_cdp('DOMStorage.removeDOMStorageItem',
storageId={'storageKey': i, 'isLocalStorage': True}, key=item)
else:
self._owner.run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': True},
key=item, value=value)
self._owner.run_cdp_loaded('DOMStorage.disable')
self._owner._run_cdp('DOMStorage.setDOMStorageItem', storageId={'storageKey': i, 'isLocalStorage': True},
key=item, value=value)
self._owner._run_cdp_loaded('DOMStorage.disable')
def upload_files(self, files):
"""等待上传的文件路径
:param files: 文件路径列表或字符串字符串时多个文件用回车分隔
:return: None
"""
if not self._owner._upload_list:
self._owner.driver.set_callback('Page.fileChooserOpened', self._owner._onFileChooserOpened)
self._owner.run_cdp('Page.setInterceptFileChooserDialog', enabled=True)
self._owner._run_cdp('Page.setInterceptFileChooserDialog', enabled=True)
if isinstance(files, str):
files = files.split('\n')
@ -143,256 +203,77 @@ class ChromiumBaseSetter(BasePageSetter):
files = (files,)
self._owner._upload_list = [str(Path(i).absolute()) for i in files]
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
self._owner.run_cdp('Network.enable')
self._owner.run_cdp('Network.setExtraHTTPHeaders', headers=format_headers(headers))
def auto_handle_alert(self, on_off=True, accept=True):
"""设置是否启用自动处理弹窗
:param on_off: bool表示开或关
:param accept: bool表示确定还是取消
:return: None
"""
self._owner._alert.auto = accept if on_off else None
self._owner._alert.auto = None if on_off is None else accept if on_off else 'close'
def blocked_urls(self, urls):
"""设置要忽略的url
:param urls: 要忽略的url可用*通配符可输入多个传入None时清空已设置的内容
:return: None
"""
if not urls:
urls = []
elif isinstance(urls, str):
urls = (urls,)
if not isinstance(urls, (list, tuple)):
raise TypeError('urls需传入str、list或tuple类型。')
self._owner.run_cdp('Network.enable')
self._owner.run_cdp('Network.setBlockedURLs', urls=urls)
self._owner._run_cdp('Network.enable')
self._owner._run_cdp('Network.setBlockedURLs', urls=urls)
class TabSetter(ChromiumBaseSetter):
def __init__(self, owner):
"""
:param owner: 标签页对象
"""
super().__init__(owner)
@property
def window(self):
"""返回用于设置浏览器窗口的对象"""
return WindowSetter(self._owner)
def download_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = str(Path(path).absolute())
self._owner._download_path = path
self._owner.browser._dl_mgr.set_path(self._owner, path)
super().download_path(path)
self._owner.browser._dl_mgr.set_path(self._owner, self._owner._download_path)
if self._owner._DownloadKit:
self._owner._DownloadKit.set.goal_path(path)
self._owner._DownloadKit.set.save_path(self._owner._download_path)
def download_file_name(self, name=None, suffix=None):
"""设置下一个被下载文件的名称
:param name: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
self._owner.browser._dl_mgr.set_rename(self._owner.tab_id, name, suffix)
def when_download_file_exists(self, mode):
"""设置当存在同名文件时的处理方式
:param mode: 可在 'rename', 'overwrite', 'skip', 'r', 'o', 's'中选择
:return: None
"""
types = {'rename': 'rename', 'overwrite': 'overwrite', 'skip': 'skip', 'r': 'rename', 'o': 'overwrite',
's': 'skip'}
types = {'rename': 'rename', 'overwrite': 'overwrite', 'skip': 'skip',
'r': 'rename', 'o': 'overwrite', 's': 'skip'}
mode = types.get(mode, mode)
if mode not in types:
raise ValueError(f'''mode参数只能是 '{"', '".join(types.keys())}' 之一,现在是:{mode}''')
self._owner.browser._dl_mgr.set_file_exists(self._owner.tab_id, mode)
def activate(self):
"""使标签页处于最前面"""
self._owner.browser.activate_tab(self._owner.tab_id)
class ChromiumPageSetter(TabSetter):
def tab_to_front(self, tab_or_id=None):
"""激活标签页使其处于最前面
:param tab_or_id: 标签页对象或id为None表示当前标签页
:return: None
"""
if not tab_or_id:
tab_or_id = self._owner.tab_id
elif not isinstance(tab_or_id, str): # 传入Tab对象
tab_or_id = tab_or_id.tab_id
self._owner.browser.activate_tab(tab_or_id)
def auto_handle_alert(self, on_off=True, accept=True, all_tabs=False):
"""设置是否启用自动处理弹窗
:param on_off: bool表示开或关
:param accept: bool表示确定还是取消
:param all_tabs: 是否为全局设置
:return: None
"""
if all_tabs:
Settings.auto_handle_alert = on_off
else:
self._owner._alert.auto = accept if on_off else None
class SessionPageSetter(BasePageSetter):
def __init__(self, owner):
"""
:param owner: SessionPage对象
"""
super().__init__(owner)
self._cookies_setter = None
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = SessionCookiesSetter(self._owner)
return self._cookies_setter
def NoneElement_value(self, value=None, on_off=True):
super().NoneElement_value(value, on_off)
self._owner.browser._none_ele_return_value = on_off
self._owner.browser._none_ele_value = value
def retry_times(self, times):
"""设置连接失败时重连次数"""
self._owner.retry_times = times
super().retry_times(times)
self._owner.browser.retry_times = times
def retry_interval(self, interval):
"""设置连接失败时重连间隔"""
self._owner.retry_interval = interval
super().retry_interval(interval)
self._owner.browser.retry_interval = interval
def download_path(self, path):
"""设置下载路径
:param path: 下载路径
:return: None
"""
path = str(Path(path).absolute())
self._owner._download_path = path
if path is None:
path = '.'
self._owner._download_path = str(Path(path).absolute())
self._owner.browser.set.download_path(path)
if self._owner._DownloadKit:
self._owner._DownloadKit.set.goal_path(path)
self._owner._DownloadKit.set.save_path(path)
def timeout(self, second):
"""设置连接超时时间
:param second: 秒数
:return: None
"""
self._owner.timeout = second
def download_file_name(self, name=None, suffix=None):
self._owner.browser.set.download_file_name(name, suffix)
def encoding(self, encoding, set_all=True):
"""设置编码
:param encoding: 编码名称如果要取消之前的设置传入None
:param set_all: 是否设置对象参数为False则只设置当前Response
:return: None
"""
if set_all:
self._owner._encoding = encoding if encoding else None
if self._owner.response:
self._owner.response.encoding = encoding
def headers(self, headers):
"""设置通用的headers
:param headers: dict形式的headers
:return: None
"""
self._owner._headers = CaseInsensitiveDict(format_headers(headers))
def header(self, name, value):
"""设置headers中一个项
:param name: 设置名称
:param value: 设置值
:return: None
"""
self._owner._headers[name] = value
def user_agent(self, ua):
"""设置user agent
:param ua: user agent
:return: None
"""
self._owner._headers['user-agent'] = ua
def proxies(self, http=None, https=None):
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: None
"""
self._owner.session.proxies = {'http': http, 'https': https}
def auth(self, auth):
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: None
"""
self._owner.session.auth = auth
def hooks(self, hooks):
"""设置回调方法
:param hooks: 回调方法
:return: None
"""
self._owner.session.hooks = hooks
def params(self, params):
"""设置查询参数字典
:param params: 查询参数字典
:return: None
"""
self._owner.session.params = params
def verify(self, on_off):
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: None
"""
self._owner.session.verify = on_off
def cert(self, cert):
"""SSL客户端证书文件的路径(.pem格式),或(cert, key)元组
:param cert: 证书路径或元组
:return: None
"""
self._owner.session.cert = cert
def stream(self, on_off):
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: None
"""
self._owner.session.stream = on_off
def trust_env(self, on_off):
"""设置是否信任环境
:param on_off: 是否信任环境
:return: None
"""
self._owner.session.trust_env = on_off
def max_redirects(self, times):
"""设置最大重定向次数
:param times: 最大重定向次数
:return: None
"""
self._owner.session.max_redirects = times
def add_adapter(self, url, adapter):
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: None
"""
self._owner.session.mount(url, adapter)
def when_download_file_exists(self, mode):
self._owner.browser.set.when_download_file_exists(mode)
class WebPageSetter(ChromiumPageSetter):
@ -403,30 +284,24 @@ class WebPageSetter(ChromiumPageSetter):
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = WebPageCookiesSetter(self._owner)
return self._cookies_setter
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
def headers(self, headers):
if self._owner.mode == 's':
self._session_setter.headers(headers)
else:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._owner.mode == 's':
self._session_setter.user_agent(ua)
else:
self._chromium_setter.user_agent(ua, platform)
class WebPageTabSetter(TabSetter):
class MixTabSetter(TabSetter):
def __init__(self, owner):
super().__init__(owner)
self._session_setter = SessionPageSetter(self._owner)
@ -434,198 +309,137 @@ class WebPageTabSetter(TabSetter):
@property
def cookies(self):
"""返回用于设置cookies的对象"""
if self._cookies_setter is None:
self._cookies_setter = WebPageCookiesSetter(self._owner)
self._cookies_setter = MixTabCookiesSetter(self._owner)
return self._cookies_setter
def headers(self, headers) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据
:return: None
"""
if self._owner._has_session:
def headers(self, headers):
if self._owner._session:
self._session_setter.headers(headers)
if self._owner._has_driver:
if self._owner._driver and self._owner._driver.is_running:
self._chromium_setter.headers(headers)
def user_agent(self, ua, platform=None):
"""设置user agentd模式下只有当前tab有效"""
if self._owner._has_session:
if self._owner._session:
self._session_setter.user_agent(ua)
if self._owner._has_driver:
if self._owner._driver and self._owner._driver.is_running:
self._chromium_setter.user_agent(ua, platform)
def timeouts(self, base=None, page_load=None, script=None):
super().timeouts(base=base, page_load=page_load, script=script)
if base is not None:
self._owner._timeout = base
class ChromiumElementSetter(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
def attr(self, name, value):
"""设置元素attribute属性
:param name: 属性名
:param value: 属性值
:return: None
"""
def attr(self, name, value=''):
try:
self._ele.owner.run_cdp('DOM.setAttributeValue',
nodeId=self._ele._node_id, name=name, value=str(value))
self._ele.owner._run_cdp('DOM.setAttributeValue',
nodeId=self._ele._node_id, name=name, value=str(value))
except ElementLostError:
self._ele._refresh_id()
self._ele.owner.run_cdp('DOM.setAttributeValue',
nodeId=self._ele._node_id, name=name, value=str(value))
self._ele.owner._run_cdp('DOM.setAttributeValue',
nodeId=self._ele._node_id, name=name, value=str(value))
def property(self, name, value):
"""设置元素property属性
:param name: 属性名
:param value: 属性值
:return: None
"""
value = value.replace('"', r'\"')
self._ele.run_js(f'this.{name}="{value}";')
self._ele._run_js(f'this.{name}="{value}";')
def style(self, name, value):
"""设置元素style样式
:param name: 样式名称
:param value: 样式值
:return: None
"""
try:
self._ele.run_js(f'this.style.{name}="{value}";')
self._ele._run_js(f'this.style.{name}="{value}";')
except JavaScriptError:
raise ValueError(f'设置失败,请检查属性名{name}')
def innerHTML(self, html):
"""设置元素innerHTML
:param html: html文本
:return: None
"""
self.property('innerHTML', html)
def value(self, value):
"""设置元素value值
:param value: value值
:return: None
"""
self.property('value', value)
class ChromiumFrameSetter(ChromiumBaseSetter):
def attr(self, name, value):
"""设置frame元素attribute属性
:param name: 属性名
:param value: 属性值
:return: None
"""
self._owner.frame_ele.set.attr(name, value)
def property(self, name, value):
self._owner.frame_ele.set.property(name=name, value=value)
def style(self, name, value):
self._owner.frame_ele.set.style(name=name, value=value)
class LoadMode(object):
"""用于设置页面加载策略的类"""
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
self._owner = owner
def __call__(self, value):
"""设置加载策略
:param value: 可选 'normal', 'eager', 'none'
:return: None
"""
if value.lower() not in ('normal', 'eager', 'none'):
raise ValueError("只能选择 'normal', 'eager', 'none'")
self._owner._load_mode = value
if self._owner._type in ('ChromiumPage', 'WebPage'):
self._owner.browser._load_mode = value
def normal(self):
"""设置页面加载策略为normal"""
self._owner._load_mode = 'normal'
self.__call__('normal')
def eager(self):
"""设置页面加载策略为eager"""
self._owner._load_mode = 'eager'
self.__call__('eager')
def none(self):
"""设置页面加载策略为none"""
self._owner._load_mode = 'none'
self.__call__('none')
class PageScrollSetter(object):
def __init__(self, scroll):
"""
:param scroll: PageScroller对象
"""
self._scroll = scroll
def wait_complete(self, on_off=True):
"""设置滚动命令后是否等待完成
:param on_off: 开或关
:return: None
"""
if not isinstance(on_off, bool):
raise TypeError('on_off必须为bool。')
self._scroll._wait_complete = on_off
def smooth(self, on_off=True):
"""设置页面滚动是否平滑滚动
:param on_off: 开或关
:return: None
"""
if not isinstance(on_off, bool):
raise TypeError('on_off必须为bool。')
b = 'smooth' if on_off else 'auto'
self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");')
self._scroll._owner._run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");')
self._scroll._wait_complete = on_off
class WindowSetter(object):
"""用于设置窗口大小的类"""
def __init__(self, owner):
"""
:param owner: 页面对象
"""
self._owner = owner
self._window_id = self._get_info()['windowId']
def max(self):
"""窗口最大化"""
s = self._get_info()['bounds']['windowState']
if s in ('fullscreen', 'minimized'):
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'maximized'})
def mini(self):
"""窗口最小化"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'minimized'})
def full(self):
"""设置窗口为全屏"""
s = self._get_info()['bounds']['windowState']
if s == 'minimized':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'fullscreen'})
def normal(self):
"""设置窗口为常规模式"""
s = self._get_info()['bounds']['windowState']
if s == 'fullscreen':
self._perform({'windowState': 'normal'})
self._perform({'windowState': 'normal'})
def size(self, width=None, height=None):
"""设置窗口大小
:param width: 窗口宽度
:param height: 窗口高度
:return: None
"""
if width or height:
s = self._get_info()['bounds']['windowState']
if s != 'normal':
@ -636,11 +450,6 @@ class WindowSetter(object):
self._perform({'width': width, 'height': height})
def location(self, x=None, y=None):
"""设置窗口在屏幕中的位置,相对左上角坐标
:param x: 距离顶部距离
:param y: 距离左边距离
:return: None
"""
if x is not None or y is not None:
self.normal()
info = self._get_info()['bounds']
@ -648,42 +457,22 @@ class WindowSetter(object):
y = y if y is not None else info['top']
self._perform({'left': x - 8, 'top': y})
def _get_info(self):
"""获取窗口位置及大小信息"""
for _ in range(50):
try:
return self._owner.run_cdp('Browser.getWindowForTarget')
except:
sleep(.1)
def _perform(self, bounds):
"""执行改变窗口大小操作
:param bounds: 控制数据
:return: None
"""
try:
self._owner.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds)
except:
raise RuntimeError('浏览器全屏或最小化状态时请先调用set.window.normal()恢复正常状态。')
# ------------即将废除----------
def maximized(self):
"""窗口最大化"""
self.max()
def minimized(self):
"""窗口最小化"""
self.mini()
def fullscreen(self):
"""设置窗口为全屏"""
self.full()
def hide(self):
"""隐藏浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._owner, hide=True)
def show(self):
"""显示浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self._owner, hide=False)
def _get_info(self):
for _ in range(50):
try:
return self._owner._run_cdp('Browser.getWindowForTarget')
except:
sleep(.02)
raise RuntimeError('获取窗口信息失败。')
def _perform(self, bounds):
try:
self._owner._run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds)
except:
raise RuntimeError('浏览器全屏或最小化状态时请先调用set.window.normal()恢复正常状态。')

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from pathlib import Path
from typing import Union, Tuple, Literal, Any, Optional
@ -11,131 +10,399 @@ from typing import Union, Tuple, Literal, Any, Optional
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter
from .cookies_setter import SessionCookiesSetter, CookiesSetter, WebPageCookiesSetter, BrowserCookiesSetter
from .scroller import PageScroller
from .._base.base import BasePage
from .._base.chromium import Chromium
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab, WebPageTab
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._pages.session_page import SessionPage
from .._pages.web_page import WebPage
FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']
class BasePageSetter(object):
def __init__(self, owner: BasePage):
self._owner: BasePage = ...
class BaseSetter(object):
_owner: Union[Chromium, BasePage] = ...
def NoneElement_value(self, value: Any = None, on_off: bool = True) -> None: ...
def __init__(self, owner: Union[Chromium, BasePage]):
"""
:param owner: BasePage对象
"""
...
def NoneElement_value(self,
value: Any = None,
on_off: bool = True) -> None:
"""设置空元素是否返回设定值
:param value: 返回的设定值
:param on_off: 是否启用
:return: None
"""
...
def retry_times(self, times: int) -> None:
"""设置连接失败重连次数
:param times: 重试次数
:return: None
"""
...
def retry_interval(self, interval: float) -> None:
"""设置连接失败重连间隔(秒)
:param interval: 重试间隔
:return: None
"""
...
def download_path(self, path: Union[str, Path, None]) -> None:
"""设置下载路径
:param path: 下载路径
:return: None
"""
...
class ChromiumBaseSetter(BasePageSetter):
def __init__(self, owner):
self._owner: ChromiumBase = ...
self._cookies_setter: CookiesSetter = ...
class SessionPageSetter(BaseSetter):
_owner: SessionPage = ...
_cookies_setter: Optional[SessionCookiesSetter] = ...
def __init__(self, owner: SessionPage):
"""
:param owner: SessionPage对象
"""
...
@property
def load_mode(self) -> LoadMode: ...
def cookies(self) -> SessionCookiesSetter:
"""返回用于设置cookies的对象"""
...
def download_path(self, path: Union[str, Path, None]) -> None:
"""设置下载路径
:param path: 下载路径
:return: None
"""
...
def timeout(self, second: float) -> None:
"""设置连接超时时间
:param second: 秒数
:return: None
"""
...
def encoding(self, encoding: Union[str, None], set_all: bool = True) -> None:
"""设置编码
:param encoding: 编码名称如果要取消之前的设置传入None
:param set_all: 是否设置对象参数为False则只设置当前Response
:return: None
"""
...
def headers(self, headers: Union[str, dict]) -> None:
"""设置通用的headers
:param headers: dict形式的headers
:return: None
"""
...
def header(self, name: str, value: str) -> None:
"""设置headers中一个项
:param name: 设置名称
:param value: 设置值
:return: None
"""
...
def user_agent(self, ua: str) -> None:
"""设置user agent
:param ua: user agent
:return: None
"""
...
def proxies(self, http: str = None, https: str = None) -> None:
"""设置proxies参数
:param http: http代理地址
:param https: https代理地址
:return: None
"""
...
def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None:
"""设置认证元组或对象
:param auth: 认证元组或对象
:return: None
"""
...
def hooks(self, hooks: Union[dict, None]) -> None:
"""设置回调方法
:param hooks: 回调方法
:return: None
"""
...
def params(self, params: Union[dict, None]) -> None:
"""设置查询参数字典
:param params: 查询参数字典
:return: None
"""
...
def verify(self, on_off: Union[bool, None]) -> None:
"""设置是否验证SSL证书
:param on_off: 是否验证 SSL 证书
:return: None
"""
...
def cert(self, cert: Union[str, Tuple[str, str], None]) -> None:
"""SSL客户端证书文件的路径(.pem格式),或('cert', 'key')元组
:param cert: 证书路径或元组
:return: None
"""
...
def stream(self, on_off: Union[bool, None]) -> None:
"""设置是否使用流式响应内容
:param on_off: 是否使用流式响应内容
:return: None
"""
...
def trust_env(self, on_off: Union[bool, None]) -> None:
"""设置是否信任环境
:param on_off: 是否信任环境
:return: None
"""
...
def max_redirects(self, times: Union[int, None]) -> None:
"""设置最大重定向次数
:param times: 最大重定向次数
:return: None
"""
...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> None:
"""添加适配器
:param url: 适配器对应url
:param adapter: 适配器对象
:return: None
"""
...
class BrowserBaseSetter(BaseSetter):
"""Browser和ChromiumBase设置"""
_cookies_setter: Optional[CookiesSetter] = ...
def __init__(self, owner: ChromiumBase):
"""
:param owner: ChromiumBase对象
"""
...
@property
def scroll(self) -> PageScrollSetter: ...
def load_mode(self) -> LoadMode:
"""返回用于设置页面加载模式的对象"""
...
def timeouts(self,
base=None,
page_load=None,
script=None) -> None:
"""设置超时时间,单位为秒
:param base: 基本等待时间除页面加载和脚本超时其它等待默认使用
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: None
"""
...
class BrowserSetter(BrowserBaseSetter):
_owner: Chromium = ...
_cookies_setter: BrowserCookiesSetter = ...
def __init__(self, owner: Chromium):
"""
:param owner: Chromium对象
"""
...
@property
def cookies(self) -> CookiesSetter: ...
def cookies(self) -> BrowserCookiesSetter:
"""返回用于设置cookies的对象"""
...
def retry_times(self, times: int) -> None: ...
@property
def window(self)->WindowSetter:...
def retry_interval(self, interval: float) -> None: ...
def auto_handle_alert(self,
on_off: bool = True,
accept: bool = True) -> None:
"""设置本浏览器是否启用自动处理弹窗
:param on_off: bool表示开或关传入None表示使用Settings设置
:param accept: bool表示确定还是取消
:return: None
"""
...
def timeouts(self, base: float = None, page_load: float = None, script: float = None) -> None: ...
def download_path(self, path: Union[Path, str, None]) -> None:
"""设置下载路径
:param path: 下载路径
:return: None
"""
...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def download_file_name(self,
name: str = None,
suffix: str = None) -> None:
"""设置下一个被下载文件的名称
:param name: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
...
def session_storage(self, item: str, value: Union[str, bool]) -> None: ...
def when_download_file_exists(self, mode: FILE_EXISTS) -> None:
"""设置当存在同名文件时的处理方式
:param mode: 可在 'rename', 'overwrite', 'skip', 'r', 'o', 's'中选择
:return: None
"""
...
def local_storage(self, item: str, value: Union[str, bool]) -> None: ...
def headers(self, headers: Union[dict, str]) -> None: ...
class ChromiumBaseSetter(BrowserBaseSetter):
_owner: ChromiumBase = ...
_cookies_setter: CookiesSetter = ...
def auto_handle_alert(self, on_off: bool = True, accept: bool = True) -> None: ...
def __init__(self, owner): ...
def upload_files(self, files: Union[str, Path, list, tuple]) -> None: ...
@property
def scroll(self) -> PageScrollSetter:
"""返回用于设置页面滚动设置的对象"""
...
def blocked_urls(self, urls: Union[list, tuple, str, None]) -> None: ...
@property
def cookies(self) -> CookiesSetter:
"""返回用于设置cookies的对象"""
...
def headers(self, headers: Union[dict, str]) -> None:
"""设置固定发送的headers
:param headers: dict格式的headers数据或从浏览器复制的headers文本\n分行
:return: None
"""
...
def user_agent(self, ua: str, platform: str = None) -> None:
"""为当前tab设置user agent只在当前tab有效
:param ua: user agent字符串
:param platform: platform字符串
:return: None
"""
...
def session_storage(self, item: str, value: Union[str, bool]) -> None:
"""设置或删除某项sessionStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
...
def local_storage(self, item: str, value: Union[str, bool]) -> None:
"""设置或删除某项localStorage信息
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
...
def upload_files(self, files: Union[str, Path, list, tuple]) -> None:
"""等待上传的文件路径
:param files: 文件路径列表或字符串字符串时多个文件用回车分隔
:return: None
"""
...
def auto_handle_alert(self,
on_off: bool = True,
accept: bool = True) -> None:
"""设置是否启用自动处理弹窗
:param on_off: bool表示开或关
:param accept: bool表示确定还是取消
:return: None
"""
...
def blocked_urls(self, urls: Union[list, tuple, str, None]) -> None:
"""设置要忽略的url
:param urls: 要忽略的url可用*通配符可输入多个传入None时清空已设置的内容
:return: None
"""
...
class TabSetter(ChromiumBaseSetter):
_owner: ChromiumTab = ...
def __init__(self, owner: Union[ChromiumTab, WebPageTab, WebPage, ChromiumPage]): ...
def __init__(self, owner: ChromiumTab):
"""
:param owner: 标签页对象
"""
...
@property
def window(self) -> WindowSetter: ...
def window(self) -> WindowSetter:
"""返回用于设置浏览器窗口的对象"""
...
def download_path(self, path: Union[str, Path]) -> None: ...
def download_path(self, path: Union[str, Path, None]) -> None:
"""设置下载路径
:param path: 下载路径
:return: None
"""
...
def download_file_name(self, name: str = None, suffix: str = None) -> None: ...
def download_file_name(self,
name: str = None,
suffix: str = None) -> None:
"""设置下一个被下载文件的名称
:param name: 文件名可不含后缀会自动使用远程文件后缀
:param suffix: 后缀名显式设置后缀名不使用远程文件后缀
:return: None
"""
...
def when_download_file_exists(self, mode: FILE_EXISTS) -> None: ...
def when_download_file_exists(self, mode: FILE_EXISTS) -> None:
"""设置当存在同名文件时的处理方式
:param mode: 可在 'rename', 'overwrite', 'skip', 'r', 'o', 's'中选择
:return: None
"""
...
def activate(self) -> None: ...
def activate(self) -> None:
"""使标签页处于最前面"""
...
class ChromiumPageSetter(TabSetter):
_owner: ChromiumPage = ...
def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ...
def auto_handle_alert(self, on_off: bool = True, accept: bool = True, all_tabs: bool = False) -> None: ...
class SessionPageSetter(BasePageSetter):
_owner: SessionPage = ...
_cookies_setter: Optional[SessionCookiesSetter] = ...
def __init__(self, owner: SessionPage): ...
@property
def cookies(self) -> SessionCookiesSetter: ...
def retry_times(self, times: int) -> None: ...
def retry_interval(self, interval: float) -> None: ...
def download_path(self, path: Union[str, Path]) -> None: ...
def timeout(self, second: float) -> None: ...
def encoding(self, encoding: Union[str, None], set_all: bool = True) -> None: ...
def headers(self, headers: Union[str, dict]) -> None: ...
def header(self, name: str, value: str) -> None: ...
def user_agent(self, ua: str) -> None: ...
def proxies(self, http: str = None, https: str = None) -> None: ...
def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ...
def hooks(self, hooks: Union[dict, None]) -> None: ...
def params(self, params: Union[dict, None]) -> None: ...
def verify(self, on_off: Union[bool, None]) -> None: ...
def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ...
def stream(self, on_off: Union[bool, None]) -> None: ...
def trust_env(self, on_off: Union[bool, None]) -> None: ...
def max_redirects(self, times: Union[int, None]) -> None: ...
def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ...
def __init__(self, owner: ChromiumPage):
"""
:param owner: ChromiumPage对象
"""
...
class WebPageSetter(ChromiumPageSetter):
@ -143,91 +410,235 @@ class WebPageSetter(ChromiumPageSetter):
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumPageSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: Union[str, dict]) -> None: ...
def __init__(self, owner: WebPage):
"""
:param owner: WebPage对象
"""
...
@property
def cookies(self) -> WebPageCookiesSetter: ...
def cookies(self) -> WebPageCookiesSetter:
"""返回用于设置cookies的对象"""
...
class WebPageTabSetter(TabSetter):
_owner: WebPageTab = ...
class MixTabSetter(TabSetter):
_owner: MixTab = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ...
def user_agent(self, ua: str, platform: str = None) -> None: ...
def headers(self, headers: Union[str, dict]) -> None: ...
def __init__(self, owner: MixTab):
"""
:param owner: MixTab对象
"""
...
@property
def cookies(self) -> WebPageCookiesSetter: ...
def cookies(self) -> WebPageCookiesSetter:
"""返回用于设置cookies的对象"""
...
def timeouts(self,
base: float = None,
page_load: float = None,
script: float = None) -> None:
"""设置超时时间,单位为秒
:param base: 基本等待时间除页面加载和脚本超时其它等待默认使用
:param page_load: 页面加载超时时间
:param script: 脚本运行超时时间
:return: None
"""
...
class ChromiumElementSetter(object):
_ele: ChromiumElement = ...
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
"""
:param ele: ChromiumElement
"""
...
def attr(self, name: str, value: str) -> None: ...
def attr(self, name: str, value: str = '') -> None:
"""设置元素attribute属性
:param name: 属性名
:param value: 属性值
:return: None
"""
...
def property(self, name: str, value: str) -> None: ...
def property(self, name: str, value: str) -> None:
"""设置元素property属性
:param name: 属性名
:param value: 属性值
:return: None
"""
...
def style(self, name: str, value: str) -> None: ...
def style(self, name: str, value: str) -> None:
"""设置元素style样式
:param name: 样式名称
:param value: 样式值
:return: None
"""
...
def innerHTML(self, html: str) -> None: ...
def innerHTML(self, html: str) -> None:
"""设置元素innerHTML
:param html: html文本
:return: None
"""
...
def value(self, value: str) -> None: ...
def value(self, value: str) -> None:
"""设置元素value值
:param value: value值
:return: None
"""
...
class ChromiumFrameSetter(ChromiumBaseSetter):
_owner: ChromiumFrame = ...
def attr(self, name: str, value: str) -> None: ...
def attr(self, name: str, value: str) -> None:
"""设置frame元素attribute属性
:param name: 属性名
:param value: 属性值
:return: None
"""
...
def property(self, name, value) -> None:
"""设置元素property属性
:param name: 属性名
:param value: 属性值
:return: None
"""
...
def style(self, name, value) -> None:
"""设置元素style样式
:param name: 样式名称
:param value: 样式值
:return: None
"""
...
class LoadMode(object):
def __init__(self, owner: ChromiumBase):
self._owner: ChromiumBase = ...
"""用于设置页面加载策略的类"""
_owner: Union[Chromium, ChromiumBase] = ...
def __call__(self, value: str) -> None: ...
def __init__(self, owner: Union[Chromium, ChromiumBase]):
"""
:param owner: ChromiumBase对象
"""
...
def normal(self) -> None: ...
def __call__(self, value: Literal['normal', 'eager', 'none']) -> None:
"""设置加载策略
:param value: 可选 'normal', 'eager', 'none'
:return: None
"""
...
def eager(self) -> None: ...
def normal(self) -> None:
"""设置页面加载策略为normal"""
...
def none(self) -> None: ...
def eager(self) -> None:
"""设置页面加载策略为eager"""
...
def none(self) -> None:
"""设置页面加载策略为none"""
...
class PageScrollSetter(object):
_scroll: PageScroller = ...
def __init__(self, scroll: PageScroller):
self._scroll: PageScroller = ...
"""
:param scroll: PageScroller对象
"""
...
def wait_complete(self, on_off: bool = True): ...
def wait_complete(self, on_off: bool = True):
"""设置滚动命令后是否等待完成
:param on_off: 开或关
:return: None
"""
...
def smooth(self, on_off: bool = True): ...
def smooth(self, on_off: bool = True):
"""设置页面滚动是否平滑滚动
:param on_off: 开或关
:return: None
"""
...
class WindowSetter(object):
def __init__(self, owner: ChromiumBase):
self._owner: ChromiumBase = ...
self._window_id: str = ...
"""用于设置窗口大小的类"""
_owner: ChromiumBase = ...
_window_id: str = ...
def max(self) -> None: ...
def __init__(self, owner: Union[ChromiumTab, ChromiumPage]):
"""
:param owner: Tab或Page对象
"""
...
def mini(self) -> None: ...
def max(self) -> None:
"""窗口最大化"""
...
def full(self) -> None: ...
def mini(self) -> None:
"""窗口最小化"""
...
def normal(self) -> None: ...
def full(self) -> None:
"""设置窗口为全屏"""
...
def size(self, width: int = None, height: int = None) -> None: ...
def normal(self) -> None:
"""设置窗口为常规模式"""
...
def location(self, x: int = None, y: int = None) -> None: ...
def size(self, width: int = None, height: int = None) -> None:
"""设置窗口大小
:param width: 窗口宽度
:param height: 窗口高度
:return: None
"""
...
def _get_info(self) -> dict: ...
def location(self, x: int = None, y: int = None) -> None:
"""设置窗口在屏幕中的位置,相对左上角坐标
:param x: 距离顶部距离
:param y: 距离左边距离
:return: None
"""
...
def _perform(self, bounds: dict) -> None: ...
def hide(self) -> None:
"""隐藏浏览器窗口只在Windows系统可用"""
...
def hide(self) -> None: ...
def show(self) -> None:
"""显示浏览器窗口只在Windows系统可用"""
...
def show(self) -> None: ...
def _get_info(self) -> dict:
"""获取窗口位置及大小信息"""
...
def _perform(self, bounds: dict) -> None:
"""执行改变窗口大小操作
:param bounds: 控制数据
:return: None
"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from .._functions.web import location_in_viewport
from ..errors import CDPError, NoRectError, PageDisconnectedError, ElementLostError
@ -11,51 +10,41 @@ from ..errors import CDPError, NoRectError, PageDisconnectedError, ElementLostEr
class ElementStates(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def is_selected(self):
"""返回列表元素是否被选择"""
return self._ele.run_js('return this.selected;')
return self._ele._run_js('return this.selected;')
@property
def is_checked(self):
"""返回元素是否被选择"""
return self._ele.run_js('return this.checked;')
return self._ele._run_js('return this.checked;')
@property
def is_displayed(self):
"""返回元素是否显示"""
return not (self._ele.style('visibility') == 'hidden' or
self._ele.run_js('return this.offsetParent === null;')
or self._ele.style('display') == 'none' or self._ele.property('hidden'))
return not (self._ele.style('visibility') == 'hidden'
or self._ele.style('display') == 'none'
or self._ele.property('hidden'))
@property
def is_enabled(self):
"""返回元素是否可用"""
return not self._ele.run_js('return this.disabled;')
return not self._ele._run_js('return this.disabled;')
@property
def is_alive(self):
"""返回元素是否仍在DOM中"""
try:
return self._ele.owner.run_cdp('DOM.describeNode',
backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0
return self._ele.owner._run_cdp('DOM.describeNode',
backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0
except ElementLostError:
return False
@property
def is_in_viewport(self):
"""返回元素是否出现在视口中以元素click_point为判断"""
x, y = self._ele.rect.click_point
return location_in_viewport(self._ele.owner, x, y) if x else False
@property
def is_whole_in_viewport(self):
"""返回元素是否整个都在视口内"""
x1, y1 = self._ele.rect.location
w, h = self._ele.rect.size
x2, y2 = x1 + w, y1 + h
@ -63,22 +52,19 @@ class ElementStates(object):
@property
def is_covered(self):
"""返回元素是否被覆盖与是否在视口中无关如被覆盖返回覆盖元素的backend id否则返回False"""
lx, ly = self._ele.rect.click_point
try:
bid = self._ele.owner.run_cdp('DOM.getNodeForLocation', x=int(lx), y=int(ly)).get('backendNodeId')
bid = self._ele.owner._run_cdp('DOM.getNodeForLocation', x=int(lx), y=int(ly)).get('backendNodeId')
return bid if bid != self._ele._backend_id else False
except CDPError:
return False
@property
def is_clickable(self):
"""返回元素是否可被模拟点击,从是否有大小、是否可用、是否显示、是否响应点击判断,不判断是否被遮挡"""
return self.has_rect and self.is_enabled and self.is_displayed and self._ele.style('pointer-events') != 'none'
@property
def has_rect(self):
"""返回元素是否拥有位置和大小没有返回False有返回四个角在页面中坐标组成的列表"""
try:
return self._ele.rect.corners
except NoRectError:
@ -87,95 +73,111 @@ class ElementStates(object):
class ShadowRootStates(object):
def __init__(self, ele):
"""
:param ele: ChromiumElement
"""
self._ele = ele
@property
def is_enabled(self):
"""返回元素是否可用"""
return not self._ele.run_js('return this.disabled;')
return not self._ele._run_js('return this.disabled;')
@property
def is_alive(self):
"""返回元素是否仍在DOM中"""
try:
return self._ele.owner.run_cdp('DOM.describeNode',
backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0
return self._ele.owner._run_cdp('DOM.describeNode',
backendNodeId=self._ele._backend_id)['node']['nodeId'] != 0
except ElementLostError:
return False
class BrowserStates(object):
def __init__(self, browser):
self._browser = browser
self._incognito = None
@property
def is_alive(self):
return self._browser._driver.is_running
@property
def is_headless(self):
return self._browser._is_headless
@property
def is_existed(self):
return self._browser._is_exists
@property
def is_incognito(self):
if self._incognito is None:
self._incognito = "'Browser.WindowCount.Incognito'" in str(self._browser._run_cdp('Browser.getHistograms'))
return self._incognito
class PageStates(object):
"""Page对象、Tab对象使用"""
def __init__(self, owner):
"""
:param owner: ChromiumBase对象
"""
self._owner = owner
@property
def is_loading(self):
"""返回页面是否在加载状态"""
return self._owner._is_loading
@property
def is_alive(self):
"""返回页面对象是否仍然可用"""
try:
self._owner.run_cdp('Page.getLayoutMetrics')
self._owner._run_cdp('Page.getLayoutMetrics')
return True
except PageDisconnectedError:
return False
@property
def ready_state(self):
"""返回当前页面加载状态,'connecting' 'loading' 'interactive' 'complete'"""
return self._owner._ready_state
@property
def has_alert(self):
"""返回当前页面是否存在弹窗"""
return self._owner._has_alert
@property
def is_headless(self):
return self._owner.browser.states.is_headless
@property
def is_existed(self):
return self._owner.browser.states.is_existed
@property
def is_incognito(self):
return self._owner.browser.states.is_incognito
class FrameStates(object):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
self._frame = frame
@property
def is_loading(self):
"""返回页面是否在加载状态"""
return self._frame._is_loading
@property
def is_alive(self):
"""返回frame元素是否可用且里面仍挂载有frame"""
try:
node = self._frame._target_page.run_cdp('DOM.describeNode',
backendNodeId=self._frame._frame_ele._backend_id)['node']
node = self._frame._target_page._run_cdp('DOM.describeNode',
backendNodeId=self._frame._frame_ele._backend_id)['node']
except (ElementLostError, PageDisconnectedError):
return False
return 'frameId' in node
@property
def ready_state(self):
"""返回加载状态"""
return self._frame._ready_state
@property
def is_displayed(self):
"""返回iframe是否显示"""
return not (self._frame.frame_ele.style('visibility') == 'hidden'
or self._frame.frame_ele.run_js('return this.offsetParent === null;')
or self._frame.frame_ele._run_js('return this.offsetParent === null;')
or self._frame.frame_ele.style('display') == 'none')
@property
def has_alert(self):
"""返回当前页面是否存在弹窗"""
return self._frame._has_alert

View File

@ -2,97 +2,202 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, Tuple, List, Optional, Literal
from .._base.chromium import Chromium
from .._elements.chromium_element import ShadowRoot, ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
class ElementStates(object):
_ele: ChromiumElement = ...
def __init__(self, ele: ChromiumElement):
self._ele: ChromiumElement = ...
"""
:param ele: ChromiumElement
"""
...
@property
def is_selected(self) -> bool: ...
def is_selected(self) -> bool:
"""返回列表元素是否被选择"""
...
@property
def is_checked(self) -> bool: ...
def is_checked(self) -> bool:
"""返回元素是否被选择"""
...
@property
def is_displayed(self) -> bool: ...
def is_displayed(self) -> bool:
"""返回元素是否显示"""
...
@property
def is_enabled(self) -> bool: ...
def is_enabled(self) -> bool:
"""返回元素是否可用"""
...
@property
def is_alive(self) -> bool: ...
def is_alive(self) -> bool:
"""返回元素是否仍在DOM中"""
...
@property
def is_in_viewport(self) -> bool: ...
def is_in_viewport(self) -> bool:
"""返回元素是否出现在视口中以元素click_point为判断"""
...
@property
def is_whole_in_viewport(self) -> bool: ...
def is_whole_in_viewport(self) -> bool:
"""返回元素是否整个都在视口内"""
...
@property
def is_covered(self) -> Union[Literal[False], int]: ...
def is_covered(self) -> Union[Literal[False], int]:
"""返回元素是否被覆盖与是否在视口中无关如被覆盖返回覆盖元素的backend id否则返回False"""
...
@property
def is_clickable(self) -> bool: ...
def is_clickable(self) -> bool:
"""返回元素是否可被模拟点击,从是否有大小、是否可用、是否显示、是否响应点击判断,不判断是否被遮挡"""
...
@property
def has_rect(self) -> Union[Literal[False], List[Tuple[float, float]]]: ...
def has_rect(self) -> Union[Literal[False], List[Tuple[float, float]]]:
"""返回元素是否拥有位置和大小没有返回False有返回四个角在页面中坐标组成的列表"""
...
class ShadowRootStates(object):
_ele: ShadowRoot = ...
def __init__(self, ele: ShadowRoot):
"""
:param ele: ChromiumElement
"""
self._ele: ShadowRoot = ...
...
@property
def is_enabled(self) -> bool: ...
def is_enabled(self) -> bool:
"""返回元素是否可用"""
...
@property
def is_alive(self) -> bool: ...
def is_alive(self) -> bool:
"""返回元素是否仍在DOM中"""
...
class BrowserStates(object):
_browser: Chromium = ...
_incognito: Optional[bool] = ...
def __init__(self, browser: Chromium):
"""
:param browser: Chromium对象
"""
...
@property
def is_alive(self) -> bool:
"""返回浏览器是否仍可用"""
...
@property
def is_headless(self) -> bool:
"""返回浏览器是否无头模式"""
...
@property
def is_existed(self) -> bool:
"""返回浏览器是否接管的"""
...
@property
def is_incognito(self) -> bool:
"""返回浏览器是否无痕模式"""
...
class PageStates(object):
_owner: ChromiumBase = ...
def __init__(self, owner: ChromiumBase):
self._owner: ChromiumBase = ...
"""
:param owner: ChromiumBase对象
"""
...
@property
def is_loading(self) -> bool: ...
def is_loading(self) -> bool:
"""返回页面是否在加载状态"""
...
@property
def is_alive(self) -> bool: ...
def is_alive(self) -> bool:
"""返回页面对象是否仍然可用"""
...
@property
def ready_state(self) -> Optional[str]: ...
def ready_state(self) -> Optional[str]:
"""返回当前页面加载状态,'connecting' 'loading' 'interactive' 'complete'"""
...
@property
def has_alert(self) -> bool: ...
def has_alert(self) -> bool:
"""返回当前页面是否存在弹窗"""
...
@property
def is_headless(self) -> bool:
"""返回浏览器是否无头模式"""
...
@property
def is_existed(self) -> bool:
"""返回浏览器是否接管的"""
...
@property
def is_incognito(self) -> bool:
"""返回浏览器是否无痕模式"""
...
class FrameStates(object):
_frame: ChromiumFrame = ...
def __init__(self, frame: ChromiumFrame):
self._frame: ChromiumFrame = ...
"""
:param frame: ChromiumFrame对象
"""
...
@property
def is_loading(self) -> bool: ...
def is_loading(self) -> bool:
"""返回页面是否在加载状态"""
...
@property
def is_alive(self) -> bool: ...
def is_alive(self) -> bool:
"""返回frame元素是否可用且里面仍挂载有frame"""
...
@property
def ready_state(self) -> str: ...
def ready_state(self) -> str:
"""返回加载状态"""
...
@property
def is_displayed(self) -> bool: ...
def is_displayed(self) -> bool:
"""返回iframe是否显示"""
...
@property
def has_alert(self) -> bool: ...
def has_alert(self) -> bool:
"""返回当前页面是否存在弹窗"""
...

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from time import sleep, perf_counter
@ -13,49 +12,81 @@ from ..errors import WaitTimeoutError, NoRectError
class OriginWaiter(object):
def __init__(self, owner):
self._owner = owner
def __call__(self, second, scope=None):
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: None
"""
if scope is None:
sleep(second)
else:
from random import uniform
sleep(uniform(second, scope))
return self._owner
class BrowserWaiter(OriginWaiter):
def new_tab(self, timeout=None, curr_tab=None, raise_err=None):
if not curr_tab:
curr_tab = self._owner._newest_tab_id
elif hasattr(curr_tab, '_type'):
curr_tab = curr_tab.tab_id
if timeout is None:
timeout = self._owner.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if curr_tab != self._owner._newest_tab_id:
return self._owner._newest_tab_id
sleep(.01)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待新标签页失败(等待{timeout}秒)。')
else:
return False
def download_begin(self, timeout=None, cancel_it=False):
if not self._owner._dl_mgr._running:
raise RuntimeError('此功能需显式设置下载路径才能使用。使用set.download_path()方法、配置对象或ini文件均可。')
self._owner._dl_mgr.set_flag('browser', False if cancel_it else True)
if timeout is None:
timeout = self._owner.timeout
return wait_mission(self._owner, 'browser', timeout)
def downloads_done(self, timeout=None, cancel_if_timeout=True):
if not self._owner._dl_mgr._running:
raise RuntimeError('此功能需显式设置下载路径使用set.download_path()方法、配置对象或ini文件均可')
if not timeout:
while self._owner._dl_mgr._missions:
sleep(.5)
return True
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._owner._dl_mgr._missions:
return True
sleep(.5)
if self._owner._dl_mgr._missions:
if cancel_if_timeout:
for m in list(self._owner._dl_mgr._missions.values()):
m.cancel()
return False
else:
return True
class BaseWaiter(OriginWaiter):
def __init__(self, page_or_ele):
"""
:param page_or_ele: 页面对象或元素对象
"""
self._driver = page_or_ele
def ele_deleted(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素从DOM中删除
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0)
ele = self._owner._ele(loc_or_ele, raise_err=False, timeout=0)
return ele.wait.deleted(timeout, raise_err=raise_err) if ele else True
def ele_displayed(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素变成显示状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._driver.timeout
timeout = self._owner.timeout
end_time = perf_counter() + timeout
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout)
ele = self._owner._ele(loc_or_ele, raise_err=False, timeout=timeout)
timeout = end_time - perf_counter()
if timeout <= 0:
if timeout <= 0 or not ele:
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待元素显示失败(等待{timeout}秒)。')
else:
@ -63,16 +94,10 @@ class BaseWaiter(OriginWaiter):
return ele.wait.displayed(timeout, raise_err=raise_err)
def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None):
"""等待元素变成隐藏状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout is None:
timeout = self._driver.timeout
timeout = self._owner.timeout
end_time = perf_counter() + timeout
ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=timeout)
ele = self._owner._ele(loc_or_ele, raise_err=False, timeout=timeout)
timeout = end_time - perf_counter()
if timeout <= 0:
if raise_err is True or Settings.raise_when_wait_failed is True:
@ -82,13 +107,6 @@ class BaseWaiter(OriginWaiter):
return ele.wait.hidden(timeout, raise_err=raise_err)
def eles_loaded(self, locators, timeout=None, any_one=False, raise_err=None):
"""等待元素加载到DOM可等待全部或任意一个
:param locators: 要等待的元素输入定位符用list输入多个
:param timeout: 超时时间默认读取页面超时时间
:param any_one: 是否等待到一个就返回
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回True失败返回False
"""
def _find(loc, driver):
r = driver.run('DOM.performSearch', query=loc, includeUserAgentShadowDOM=True)
@ -118,13 +136,14 @@ class BaseWaiter(OriginWaiter):
by = ('id', 'xpath', 'link text', 'partial link text', 'name', 'tag name', 'class name', 'css selector')
locators = ((get_loc(locators)[1],) if (isinstance(locators, str) or isinstance(locators, tuple)
and locators[0] in by and len(locators) == 2)
else [get_loc(l)[1] for l in locators])
else [get_loc(x)[1] for x in locators])
method = any if any_one else all
timeout = self._driver.timeout if timeout is None else timeout
if timeout is None:
timeout = self._owner.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if method([_find(l, self._driver.driver) for l in locators]):
if method([_find(l, self._owner.driver) for l in locators]):
return True
sleep(.01)
if raise_err is True or Settings.raise_when_wait_failed is True:
@ -133,95 +152,54 @@ class BaseWaiter(OriginWaiter):
return False
def load_start(self, timeout=None, raise_err=None):
"""等待页面开始加载
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._loading(timeout=timeout, gap=.002, raise_err=raise_err)
def doc_loaded(self, timeout=None, raise_err=None):
"""等待页面加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._loading(timeout=timeout, start=False, raise_err=raise_err)
def upload_paths_inputted(self):
"""等待自动填写上传文件路径"""
end_time = perf_counter() + self._driver.timeout
end_time = perf_counter() + self._owner.timeout
while perf_counter() < end_time:
if not self._driver._upload_list:
if not self._owner._upload_list:
return True
sleep(.01)
return False
def download_begin(self, timeout=None, cancel_it=False):
"""等待浏览器下载开始,可将其拦截
:param timeout: 超时时间None使用页面对象超时时间
:param cancel_it: 是否取消该任务
:return: 成功返回任务对象失败返回False
"""
if not self._driver.browser._dl_mgr._running:
if not self._owner.browser._dl_mgr._running:
raise RuntimeError('此功能需显式设置下载路径使用set.download_path()方法、配置对象或ini文件均可')
self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, False if cancel_it else True)
self._owner.browser._dl_mgr.set_flag(self._owner.tab_id, False if cancel_it else True)
if timeout is None:
timeout = self._driver.timeout
r = False
end_time = perf_counter() + timeout
while perf_counter() < end_time:
v = self._driver.browser._dl_mgr.get_flag(self._driver.tab_id)
if not isinstance(v, bool):
r = v
break
sleep(.005)
self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, None)
return r
timeout = self._owner.timeout
return wait_mission(self._owner.browser, self._owner.tab_id, timeout)
def url_change(self, text, exclude=False, timeout=None, raise_err=None):
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._change('url', text, exclude, timeout, raise_err)
return self._owner if self._change('url', text, exclude, timeout, raise_err) else False
def title_change(self, text, exclude=False, timeout=None, raise_err=None):
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._change('title', text, exclude, timeout, raise_err)
return self._owner if self._change('title', text, exclude, timeout, raise_err) else False
def _change(self, arg, text, exclude=False, timeout=None, raise_err=None):
"""等待指定属性变成包含或不包含指定文本
:param arg: 要被匹配的属性
:param text: 用于识别的文本
:param exclude: 是否排除为True时当属性不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
def do():
if arg == 'url':
v = self._owner._run_cdp('Target.getTargetInfo', targetId=self._owner._target_id)['targetInfo']['url']
elif arg == 'title':
v = self._owner._run_cdp('Target.getTargetInfo', targetId=self._owner._target_id)['targetInfo']['title']
else:
raise ValueError
if (not exclude and text in v) or (exclude and text not in v):
return True
if do():
return True
if timeout is None:
timeout = self._driver.timeout
timeout = self._owner.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if arg == 'url':
val = self._driver.url
elif arg == 'title':
val = self._driver.title
else:
raise ValueError
if (not exclude and text in val) or (exclude and text not in val):
if do():
return True
sleep(.05)
@ -231,197 +209,114 @@ class BaseWaiter(OriginWaiter):
return False
def _loading(self, timeout=None, start=True, gap=.01, raise_err=None):
"""等待页面开始加载或加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param start: 等待开始还是结束
:param gap: 间隔秒数
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if timeout != 0:
if timeout is None or timeout is True:
timeout = self._driver.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._driver._is_loading == start:
return True
sleep(gap)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待页面加载失败(等待{timeout}秒)。')
else:
return False
class TabWaiter(BaseWaiter):
def downloads_done(self, timeout=None, cancel_if_timeout=True):
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
if not self._driver.browser._dl_mgr._running:
raise RuntimeError('此功能需显式设置下载路径使用set.download_path()方法、配置对象或ini文件均可')
if not timeout:
while self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
sleep(.5)
return True
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
return True
sleep(.5)
if self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
if cancel_if_timeout:
for m in self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
m.cancel()
return False
else:
return True
def alert_closed(self):
"""等待弹出框关闭"""
while not self._driver.states.has_alert:
sleep(.2)
while self._driver.states.has_alert:
sleep(.2)
class PageWaiter(TabWaiter):
def __init__(self, page):
super().__init__(page)
def new_tab(self, timeout=None, raise_err=None):
"""等待新标签页出现
:param timeout: 超时时间为None则使用页面对象timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等到新标签页返回其id否则返回False
"""
timeout = timeout if timeout is not None else self._driver.timeout
if timeout is None:
timeout = self._owner.timeout
timeout = .1 if timeout <= 0 else timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
latest_tid = self._driver.tab_ids[0]
if self._driver.tab_id != latest_tid:
return latest_tid
sleep(.01)
if self._owner._is_loading == start:
return True
sleep(gap)
if raise_err is True or Settings.raise_when_wait_failed is True:
raise WaitTimeoutError(f'等待新标签页失败(等待{timeout}秒)。')
raise WaitTimeoutError(f'等待页面加载失败(等待{timeout}秒)。')
else:
return False
def all_downloads_done(self, timeout=None, cancel_if_timeout=True):
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
if not self._driver.browser._dl_mgr._running:
class TabWaiter(BaseWaiter):
def downloads_done(self, timeout=None, cancel_if_timeout=True):
if not self._owner.browser._dl_mgr._running:
raise RuntimeError('此功能需显式设置下载路径使用set.download_path()方法、配置对象或ini文件均可')
if not timeout:
while self._driver.browser._dl_mgr._missions:
while self._owner.browser._dl_mgr.get_tab_missions(self._owner.tab_id):
sleep(.5)
return True
return self._owner
else:
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._driver.browser._dl_mgr._missions:
return True
if not self._owner.browser._dl_mgr.get_tab_missions(self._owner.tab_id):
return self._owner
sleep(.5)
if self._driver.browser._dl_mgr._missions:
if self._owner.browser._dl_mgr.get_tab_missions(self._owner.tab_id):
if cancel_if_timeout:
for m in list(self._driver.browser._dl_mgr._missions.values()):
for m in self._owner.browser._dl_mgr.get_tab_missions(self._owner.tab_id):
m.cancel()
return False
else:
return True
return self._owner
def alert_closed(self, timeout=None):
if timeout is None:
while not self._owner.states.has_alert:
sleep(.2)
while self._owner.states.has_alert:
sleep(.2)
else:
end_time = perf_counter() + timeout
while not self._owner.states.has_alert and perf_counter() < end_time:
sleep(.2)
while self._owner.states.has_alert and perf_counter() < end_time:
sleep(.2)
return False if self._owner.states.has_alert else self._owner
class ChromiumPageWaiter(TabWaiter):
def new_tab(self, timeout=None, raise_err=None):
return self._owner.browser.wait.new_tab(timeout=timeout, raise_err=raise_err)
def download_begin(self, timeout=None, cancel_it=False):
return self._owner.browser.wait.download_begin(timeout=timeout, cancel_it=cancel_it)
def all_downloads_done(self, timeout=None, cancel_if_timeout=True):
return self._owner.browser.wait.downloads_done(timeout=timeout, cancel_if_timeout=cancel_if_timeout)
class ElementWaiter(OriginWaiter):
"""等待元素在dom中某种状态如删除、显示、隐藏"""
def __init__(self, owner):
super().__init__(owner)
self._ele = owner
def __init__(self, owner, ele):
"""等待元素在dom中某种状态如删除、显示、隐藏
:param owner: 元素所在页面
:param ele: 要等待的元素
"""
self._owner = owner
self._ele = ele
@property
def _timeout(self):
return self._ele.timeout
def deleted(self, timeout=None, raise_err=None):
"""等待元素从dom删除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_alive', False, timeout, raise_err, err_text='等待元素被删除失败。')
def displayed(self, timeout=None, raise_err=None):
"""等待元素从dom显示
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_displayed', True, timeout, raise_err, err_text='等待元素显示失败。')
def hidden(self, timeout=None, raise_err=None):
"""等待元素从dom隐藏
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_displayed', False, timeout, raise_err, err_text='等待元素隐藏失败。')
def covered(self, timeout=None, raise_err=None):
"""等待当前元素被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回覆盖元素id返回False
"""
return self._wait_state('is_covered', True, timeout, raise_err, err_text='等待元素被覆盖失败。')
return self._ele if self._wait_state('is_covered', True, timeout, raise_err,
err_text='等待元素被覆盖失败。') else False
def not_covered(self, timeout=None, raise_err=None):
"""等待当前元素不被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_covered', False, timeout, raise_err, err_text='等待元素不被覆盖失败。')
def enabled(self, timeout=None, raise_err=None):
"""等待当前元素变成可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_enabled', True, timeout, raise_err, err_text='等待元素变成可用失败。')
def disabled(self, timeout=None, raise_err=None):
"""等待当前元素变成不可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
return self._wait_state('is_enabled', False, timeout, raise_err, err_text='等待元素变成不可用失败。')
def disabled_or_deleted(self, timeout=None, raise_err=None):
"""等待当前元素变成不可用或从DOM移除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
if not self._ele.states.is_enabled or not self._ele.states.is_alive:
return self._ele
if timeout is None:
timeout = self._owner.timeout
timeout = self._timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if not self._ele.states.is_enabled or not self._ele.states.is_alive:
return True
return self._ele
sleep(.05)
if raise_err is True or Settings.raise_when_wait_failed is True:
@ -429,15 +324,26 @@ class ElementWaiter(OriginWaiter):
else:
return False
def stop_moving(self, timeout=None, gap=.1, raise_err=None):
"""等待当前元素停止运动
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param gap: 检测间隔时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
def clickable(self, wait_moved=True, timeout=None, raise_err=None):
if timeout is None:
timeout = self._owner.timeout
timeout = self._timeout
t1 = perf_counter()
r = self._wait_state('is_clickable', True, timeout, raise_err, err_text='等待元素可点击失败(等{}秒)。')
r = self.stop_moving(timeout=timeout - perf_counter() + t1) if wait_moved and r else r
if raise_err and not r:
raise WaitTimeoutError(f'等待元素可点击失败(等{timeout}秒)。')
return r
def has_rect(self, timeout=None, raise_err=None):
return self._ele if self._wait_state('has_rect', True, timeout, raise_err,
err_text='等待元素拥有大小及位置失败(等{}秒)。') else False
def stop_moving(self, timeout=None, gap=.1, raise_err=None):
if timeout is None:
timeout = self._timeout
if timeout <= 0:
timeout = .1
end_time = perf_counter() + timeout
while perf_counter() < end_time:
try:
@ -453,7 +359,7 @@ class ElementWaiter(OriginWaiter):
while perf_counter() < end_time:
sleep(gap)
if self._ele.rect.size == size and self._ele.rect.location == location:
return True
return self._ele
size = self._ele.rect.size
location = self._ele.rect.location
@ -462,48 +368,18 @@ class ElementWaiter(OriginWaiter):
else:
return False
def clickable(self, wait_moved=True, timeout=None, raise_err=None):
"""等待当前元素可被点击
:param wait_moved: 是否等待元素运动结束
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
t1 = perf_counter()
r = self._wait_state('is_clickable', True, timeout, raise_err, err_text='等待元素可点击失败(等{}秒)。')
r = self.stop_moving(timeout=perf_counter() - t1) if wait_moved and r else r
if raise_err and not r:
raise WaitTimeoutError(f'等待元素可点击失败(等{timeout}秒)。')
return r
def has_rect(self, timeout=None, raise_err=None):
"""等待当前元素有大小及位置属性
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素四角坐标左上 右上 右下 左下失败返回False
"""
return self._wait_state('has_rect', True, timeout, raise_err, err_text='等待元素拥有大小及位置失败(等{}秒)。')
def _wait_state(self, attr, mode=False, timeout=None, raise_err=None, err_text=None):
"""等待元素某个元素状态到达指定状态
:param attr: 状态名称
:param mode: 等待True还是False
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:param err_text: 抛出错误时显示的信息
:return: 是否等待成功
"""
a = self._ele.states.__getattribute__(attr)
if (a and mode) or (not a and not mode):
return True if isinstance(a, bool) else a
return self._ele if isinstance(a, bool) else a
if timeout is None:
timeout = self._owner.timeout
timeout = self._timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
a = self._ele.states.__getattribute__(attr)
if (a and mode) or (not a and not mode):
return True if isinstance(a, bool) else a
return self._ele if isinstance(a, bool) else a
sleep(.05)
err_text = err_text or '等待元素状态改变失败(等待{}秒)。'
@ -514,9 +390,24 @@ class ElementWaiter(OriginWaiter):
class FrameWaiter(BaseWaiter, ElementWaiter):
def __init__(self, frame):
"""
:param frame: ChromiumFrame对象
"""
super().__init__(frame)
super(BaseWaiter, self).__init__(frame, frame.frame_ele)
def __init__(self, owner):
super().__init__(owner)
self._ele = owner.frame_ele
@property
def _timeout(self):
return self._owner.timeout
def wait_mission(browser, tid, timeout=None):
r = False
end_time = perf_counter() + timeout
while perf_counter() < end_time:
v = browser._dl_mgr.get_flag(tid)
if not isinstance(v, bool):
r = v
break
sleep(.005)
browser._dl_mgr.set_flag(tid, None)
return r

View File

@ -2,120 +2,778 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from typing import Union, Tuple, Literal, List
from typing import Union, Tuple, Any
from .downloader import DownloadMission
from .._base.chromium import Chromium
from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase
from .._pages.chromium_frame import ChromiumFrame
from .._pages.chromium_page import ChromiumPage
from .._pages.chromium_tab import ChromiumTab
from .._pages.mix_tab import MixTab
from .._pages.web_page import WebPage
class OriginWaiter(object):
def __call__(self, second: float, scope: float = None) -> None: ...
_owner: Any = ...
def __init__(self, owner: Any): ...
def __call__(self, second: float, scope: float = None):
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: 调用等待的对象
"""
...
class BrowserWaiter(OriginWaiter):
_owner: Chromium = ...
def __init__(self, owner: Chromium):
"""
:param owner: Chromium对象
"""
...
def __call__(self, second: float, scope: float = None) -> Chromium:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: Chromium对象
"""
...
def new_tab(self,
timeout: float = None,
curr_tab: Union[str, ChromiumTab, MixTab] = None,
raise_err: bool = None) -> Union[str, bool]:
"""等待新标签页出现
:param timeout: 超时时间为None则使用对象timeout属性
:param curr_tab: 指定当前最新的tab对象或tab id用于判断新tab出现为None自动获取
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等到新标签页返回其id否则返回False
"""
...
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, False]:
"""等待浏览器下载开始,可将其拦截
:param timeout: 超时时间None使用页面对象超时时间
:param cancel_it: 是否取消该任务
:return: 成功返回任务对象失败返回False
"""
...
def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool:
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
...
class BaseWaiter(OriginWaiter):
def __init__(self, page: ChromiumBase):
self._driver: ChromiumBase = ...
def __call__(self, second: float, scope: float = None) -> None: ...
_owner: ChromiumBase = ...
def ele_deleted(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
timeout: float = None,
raise_err: bool = None) -> bool: ...
raise_err: bool = None) -> bool:
"""等待元素从DOM中删除
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def ele_displayed(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
timeout: float = None,
raise_err: bool = None) -> bool: ...
raise_err: bool = None) -> bool:
"""等待元素变成显示状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None,
raise_err: bool = None) -> bool: ...
def ele_hidden(self,
loc_or_ele: Union[str, tuple, ChromiumElement],
timeout: float = None,
raise_err: bool = None) -> bool:
"""等待元素变成隐藏状态
:param loc_or_ele: 要等待的元素可以是已有元素定位符
:param timeout: 超时时间默认读取页面超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def eles_loaded(self,
locators: Union[Tuple[str, str], str, list, tuple],
timeout: float = None,
any_one: bool = False,
raise_err: bool = None) -> bool: ...
raise_err: bool = None) -> bool:
"""等待元素加载到DOM可等待全部或任意一个
:param locators: 要等待的元素输入定位符用list输入多个
:param timeout: 超时时间默认读取页面超时时间
:param any_one: 是否等待到一个就返回
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回True失败返回False
"""
...
def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ...
def load_start(self, timeout: float = None, raise_err: bool = None) -> bool:
"""等待页面开始加载
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def doc_loaded(self, timeout: float = None, raise_err: bool = None) -> bool:
"""等待页面加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def doc_loaded(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def upload_paths_inputted(self) -> bool:
"""等待自动填写上传文件路径"""
...
def upload_paths_inputted(self) -> bool: ...
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool, dict]:
"""等待浏览器下载开始,可将其拦截
:param timeout: 超时时间None使用页面对象超时时间
:param cancel_it: 是否取消该任务
:return: 成功返回任务对象cancel_it为True时返回dict格式的下载信息失败返回False
"""
...
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> ChromiumBase:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> ChromiumBase:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def url_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ...
def _change(self,
arg: str,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> bool:
"""等待指定属性变成包含或不包含指定文本
:param arg: 要被匹配的属性
:param text: 用于识别的文本
:param exclude: 是否排除为True时当属性不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
def title_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ...
def _change(self, arg: str, text: str, exclude: bool = False, timeout: float = None,
raise_err: bool = None) -> bool: ...
def _loading(self,
timeout: float = None,
start: bool = True,
gap: float = .01,
raise_err: bool = None) -> bool:
"""等待页面开始加载或加载完成
:param timeout: 超时时间为None时使用页面timeout属性
:param start: 等待开始还是结束
:param gap: 间隔秒数
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等待成功
"""
...
class TabWaiter(BaseWaiter):
_owner: ChromiumTab = ...
def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
def __init__(self, owner: ChromiumTab):
"""
:param owner: Tab对象
"""
...
def alert_closed(self) -> None: ...
def __call__(self,
second: float,
scope: float = None) -> ChromiumTab:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: ChromiumTab对象
"""
...
def downloads_done(self,
timeout: float = None,
cancel_if_timeout: bool = True) -> Union[False, ChromiumTab]:
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
...
def alert_closed(self, timeout: float = None) -> ChromiumTab:
"""等待弹出框关闭
:param timeout: 超时时间为None无限等待
:return: 标签页对象自己
"""
...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumTab]:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumTab]:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
class PageWaiter(TabWaiter):
_driver: ChromiumPage = ...
class MixTabWaiter(BaseWaiter):
_owner: MixTab = ...
def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ...
def __init__(self, owner: MixTab):
"""
:param owner: Tab对象
"""
...
def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...
def __call__(self,
second: float,
scope: float = None) -> MixTab:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: MixTab对象
"""
...
def downloads_done(self,
timeout: float = None,
cancel_if_timeout: bool = True) -> Union[False, MixTab]:
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
...
def alert_closed(self, timeout: float = None) -> MixTab:
"""等待弹出框关闭
:param timeout: 超时时间为None无限等待
:return: 标签页对象自己
"""
...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, MixTab]:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, MixTab]:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
class ChromiumPageWaiter(TabWaiter):
_owner: Union[ChromiumPage, WebPage] = ...
def __init__(self, owner: ChromiumPage):
"""
:param owner: Page对象
"""
...
def __call__(self,
second: float,
scope: float = None) -> ChromiumPage:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: ChromiumPage对象
"""
...
def new_tab(self,
timeout: float = None,
raise_err: bool = None) -> Union[str, bool]:
"""等待新标签页出现
:param timeout: 超时时间为None则使用页面对象timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等到新标签页返回其id否则返回False
"""
...
def all_downloads_done(self,
timeout: float = None,
cancel_if_timeout: bool = True) -> bool:
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumPage]:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumPage]:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
class WebPageWaiter(TabWaiter):
_owner: Union[ChromiumPage, WebPage] = ...
def __init__(self, owner: WebPage):
"""
:param owner: Page对象
"""
...
def __call__(self,
second: float,
scope: float = None) -> WebPage:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: WebPage对象
"""
...
def new_tab(self,
timeout: float = None,
raise_err: bool = None) -> Union[str, bool]:
"""等待新标签页出现
:param timeout: 超时时间为None则使用页面对象timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等到新标签页返回其id否则返回False
"""
...
def all_downloads_done(self,
timeout: float = None,
cancel_if_timeout: bool = True) -> bool:
"""等待所有浏览器下载任务结束
:param timeout: 超时时间为None时无限等待
:param cancel_if_timeout: 超时时是否取消剩余任务
:return: 是否等待成功
"""
...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, WebPage]:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, WebPage]:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
class ElementWaiter(OriginWaiter):
def __init__(self, owner: ChromiumBase, ele: ChromiumElement):
self._ele: ChromiumElement = ...
self._owner: ChromiumBase = ...
_owner: ChromiumElement = ...
_ele: ChromiumElement = ...
def __call__(self, second: float, scope: float = None) -> None: ...
def __init__(self, owner: ChromiumElement):
"""
:param owner: ChromiumElement对象
"""
...
def deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def __call__(self,
second: float,
scope: float = None) -> ChromiumElement:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: ChromiumElement对象
"""
...
def displayed(self, timeout: float = None, raise_err: bool = None) -> bool: ...
@property
def _timeout(self) -> float:
"""返回超时设置"""
...
def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def deleted(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待元素从dom删除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def covered(self, timeout: float = None, raise_err: bool = None) -> Union[Literal[False], int]: ...
def displayed(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待元素从dom显示
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def hidden(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待元素从dom隐藏
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def covered(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回覆盖元素id返回False
"""
...
def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def not_covered(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素不被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def clickable(self, wait_moved: bool = True, timeout: float = None, raise_err: bool = None) -> bool: ...
def enabled(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素变成可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def disabled(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素变成不可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool:
"""等待当前元素变成不可用或从DOM移除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def clickable(self,
wait_moved: bool = True,
timeout: float = None,
raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素可被点击
:param wait_moved: 是否等待元素运动结束
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def has_rect(self,
timeout: float = None,
raise_err: bool = None) -> Union[Literal[False], List[Tuple[float, float]]]: ...
raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素有大小及位置属性
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def stop_moving(self, timeout: float = None, gap: float = .1, raise_err: bool = None) -> bool: ...
def stop_moving(self,
timeout: float = None,
gap: float = .1,
raise_err: bool = None) -> Union[ChromiumElement, False]:
"""等待当前元素停止运动
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param gap: 检测间隔时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def _wait_state(self,
attr: str,
mode: bool = False,
timeout: float = None,
raise_err: bool = None,
err_text: str = None) -> bool: ...
err_text: str = None) -> Union[ChromiumElement, False]:
"""等待元素某个元素状态到达指定状态
:param attr: 状态名称
:param mode: 等待True还是False
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:param err_text: 抛出错误时显示的信息
:return: 成功返回元素对象失败返回False
"""
...
class FrameWaiter(BaseWaiter, ElementWaiter):
def __init__(self, frame: ChromiumFrame): ...
_owner: ChromiumFrame = ...
def __init__(self, owner: ChromiumFrame):
"""
:param owner: ChromiumFrame对象
"""
...
def __call__(self,
second: float,
scope: float = None) -> ChromiumFrame:
"""等待若干秒,如传入两个参数,等待时间为这两个数间的一个随机数
:param second: 秒数
:param scope: 随机数范围
:return: ChromiumFrame对象
"""
...
def url_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumFrame]:
"""等待url变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当url不包含text指定文本时返回True
:param timeout: 超时时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def title_change(self,
text: str,
exclude: bool = False,
timeout: float = None,
raise_err: bool = None) -> Union[False, ChromiumFrame]:
"""等待title变成包含或不包含指定文本
:param text: 用于识别的文本
:param exclude: 是否排除为True时当title不包含text指定文本时返回True
:param timeout: 超时时间为None使用页面设置
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 等待成功返回页面对象否则返回False
"""
...
def deleted(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待元素从dom删除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def displayed(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待元素从dom显示
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def hidden(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待元素从dom隐藏
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def has_rect(self,
timeout: float = None,
raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素有大小及位置属性
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def covered(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回覆盖元素id返回False
"""
...
def not_covered(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素不被遮盖
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def enabled(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素变成可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def disabled(self, timeout: float = None, raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素变成不可用
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def disabled_or_deleted(self, timeout: float = None, raise_err: bool = None) -> bool:
"""等待当前元素变成不可用或从DOM移除
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def clickable(self,
wait_moved: bool = True,
timeout: float = None,
raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素可被点击
:param wait_moved: 是否等待元素运动结束
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def stop_moving(self,
timeout: float = None,
gap: float = .1,
raise_err: bool = None) -> Union[ChromiumFrame, False]:
"""等待当前元素停止运动
:param timeout: 超时时间为None使用元素所在页面timeout属性
:param gap: 检测间隔时间
:param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 成功返回元素对象失败返回False
"""
...
def wait_mission(browser: Chromium, tid: str, timeout: float = None) -> Union[DownloadMission, False]:
"""等待下载任务
:param browser: Chromium对象
:param tid: 标签页id
:param timeout: 超时时间
:return:
"""
...

View File

@ -2,33 +2,34 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from ._base.chromium import Chromium
from ._configs.chromium_options import ChromiumOptions
from ._elements.session_element import make_session_ele
from ._functions.by import By
from ._functions.elements import get_eles
from ._functions.keys import Keys
from ._functions.settings import Settings
from ._functions.tools import wait_until, configs_to_here
from ._functions.web import get_blob, tree
from ._pages.chromium_page import ChromiumPage
from ._units.actions import Actions
__all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here', 'get_blob',
'tree', 'from_selenium', 'from_playwright', 'get_eles']
'tree', 'from_selenium', 'from_playwright']
def from_selenium(driver):
"""从selenium的WebDriver对象生成ChromiumPage对象"""
"""从selenium的WebDriver对象生成Chromium对象"""
address, port = driver.caps.get('goog:chromeOptions', {}).get('debuggerAddress', ':').split(':')
if not address:
raise RuntimeError('获取失败。')
return ChromiumPage(f'{address}:{port}')
co = ChromiumOptions().set_local_port(port)
co._ua_set = True
return Chromium(co)
def from_playwright(page_or_browser):
"""从playwright的Page或Browser对象生成ChromiumPage对象"""
"""从playwright的Page或Browser对象生成Chromium对象"""
if hasattr(page_or_browser, 'context'):
page_or_browser = page_or_browser.context.browser
try:
@ -48,5 +49,7 @@ def from_playwright(page_or_browser):
port = con_info.laddr.port
break
else:
raise RuntimeError('获取失败。')
return ChromiumPage(f'127.0.0.1:{port}')
raise RuntimeError('获取失败,请用管理员权限运行。')
co = ChromiumOptions().set_local_port(f'127.0.0.1:{port}')
co._ua_set = True
return Chromium(co)

View File

@ -2,8 +2,7 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""

View File

@ -2,14 +2,15 @@
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
@Copyright: (c) 2020 by g1879, Inc. All Rights Reserved.
"""
from ._elements.chromium_element import ChromiumElement, ShadowRoot
from ._elements.none_element import NoneElement
from ._elements.session_element import SessionElement
from ._pages.chromium_frame import ChromiumFrame
from ._pages.chromium_tab import ChromiumTab, WebPageTab
from ._pages.chromium_tab import ChromiumTab
from ._pages.mix_tab import MixTab
from ._pages.mix_tab import MixTab as WebPageTab
__all__ = ['ChromiumElement', 'ShadowRoot', 'NoneElement', 'SessionElement', 'ChromiumFrame', 'ChromiumTab',
'WebPageTab']
'MixTab', 'WebPageTab']

64
LICENSE
View File

@ -1,29 +1,49 @@
BSD 3-Clause License
Copyright (c) 2020, g1879
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
允许任何人以个人身份使用或分发本项目源代码,但仅限于学习和合法非盈利目的。
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
个人或组织如未获得版权持有人授权,不得将本项目以源代码或二进制形式用于商业行为。
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
使用本项目需满足以下条款,如使用过程中出现违反任意一项条款的情形,授权自动失效。
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
* 禁止将DrissionPage应用到任何可能违反当地法律规定和道德约束的项目中
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 禁止将DrissionPage用于任何可能有损他人利益的项目中
* 禁止将DrissionPage用于攻击与骚扰行为
* 遵守Robots协议禁止将DrissionPage用于采集法律或系统Robots协议不允许的数据
使用DrissionPage发生的一切行为均由使用人自行负责。
因使用DrissionPage进行任何行为所产生的一切纠纷及后果均与版权持有人无关
版权持有人不承担任何使用DrissionPage带来的风险和损失。
版权持有人不对DrissionPage可能存在的缺陷导致的任何损失负任何责任。
---------------------------------------------------------
Anyone may use or distribute the source code of this project in their personal capacity,
but only for the purpose of learning and legal non-profit activities.
An individual or organization may not use the project's source code or binary form for
commercial purposes without authorization from the copyright holder.
The following terms and conditions must be met in order to use this project. Authorization
will automatically expire if any of the terms are violated during use.
* It is strictly prohibited to use the DrissionPage app for any project that may violate local
laws and ethical constraints.
* It is strictly prohibited to use DrissionPage for any project that may harm the interests of others.
* It is strictly prohibited to use DrissionPage for attack and harassment.
* Follow the Robots protocol and do not use the DrissionPage to collect data that is prohibited
by law or the system's Robots protocol.
All actions taken using DrissionPage are the responsibility of the user.
The copyright holder is not involved in any disputes or consequences arising from the use of
DrissionPage for any actions, and the copyright holder shall not bear any risks and losses arising
from the use of DrissionPage.
The copyright holder shall not bear any responsibility for any losses resulting from any defects in
DrissionPage.

View File

@ -1,4 +1,5 @@
include DrissionPage/_configs/configs.ini
include DrissionPage/_functions/suffixes.dat
include DrissionPage/*.pyi
include DrissionPage/*/*.py
include DrissionPage/*/*.pyi

View File

@ -12,11 +12,9 @@ DrissionPage 是一个基于 python 的网页自动化工具。
---
官方网站:[https://drissionpage.cn](https://drissionpage.cn)
官方网站:[https://DrissionPage.cn](https://drissionpage.cn)
<a href='https://gitee.com/g1879/DrissionPage/stargazers'><img src='https://gitee.com/g1879/DrissionPage/badge/star.svg?theme=dark' alt='star'></img></a> <a href='https://gitee.com/g1879/DrissionPage/members'><img src='https://gitee.com/g1879/DrissionPage/badge/fork.svg?theme=dark' alt='fork'></img></a>
项目地址:[gitee](https://gitee.com/g1879/DrissionPage) | [github](https://github.com/g1879/DrissionPage)
项目地址:[gitee](https://gitee.com/g1879/DrissionPage) | [github](https://github.com/g1879/DrissionPage) | [gitcode](https://gitcode.com/g1879/DrissionPage)
您的星星是对我最大的支持💖
@ -32,9 +30,11 @@ python 版本3.6 及以上
# 🛠 如何使用
**📖 使用文档:** [点击查看](https://g1879.gitee.io/drissionpagedocs)
**📖 使用文档:** [点击查看](https://DrissionPage.cn)
**交流 QQ 群:** 636361957
**交流 QQ 群:** 见使用文档
![](https://drissionpage.cn/codes.jpg)
---
@ -55,41 +55,52 @@ python 版本3.6 及以上
- 不基于 webdriver
- 无需为不同版本的浏览器下载不同的驱动
- 运行速度更快
- 可以跨`<iframe>`查找元素,无需切入切出
- 把`<iframe>`看作普通元素,获取后可直接在其中查找元素,逻辑更清晰
- 可同时操作浏览器中的多个标签页,即使标签页为非激活状态,无需切换
- 可以直接读取浏览器缓存保存图片,无需用 GUI 点击另存
- 可以对整个网页截图,包括视口外的部分90以上版本浏览器支持
- 可以跨 iframe 查找元素,无需切入切出
- 把 iframe 看作普通元素,逻辑更清晰
- 可同时操作多个标签页,无需切换
- 可以直接读取浏览器缓存保存图片,无需用 GUI 点击另存
- 可以对整个网页截图,包括视口外的部分
- 可处理非`open`状态的 shadow-root
## 🎇 亮点功能
除了以上优点,本库还内置了无数人性化设计。
- 极简的语法规则。集成大量常用功能,代码更优雅
- 定位元素更加容易,功能更强大稳定
- 无处不在的等待和自动重试功能。使不稳定的网络变得易于控制,程序更稳定,编写更省心
- 提供强大的下载工具操作浏览器时也能享受快捷可靠的下载功能
- 允许反复使用已经打开的浏览器。无须每次运行从头启动浏览器,调试超方便
- 极简的定位语法,查找元素更加容易
- 集成大量常用功能,代码更优雅,功能强大稳定
- 无处不在的等待和自动重试使不稳定的网络变得易于控制,程序更稳定,编写更省心
- 提供强大的下载工具操作浏览器时也能享受快捷可靠的下载功能
- 允许反复使用已经打开的浏览器,无需每次运行从头启动浏览器,调试方便
- 使用 ini 文件保存常用配置,自动调用,提供便捷的设置,远离繁杂的配置项
- 内置 lxml 作为解析引擎,解析速度成几个数量级提升
- 使用 POM 模式封装,可直接用于测试,便于扩展
- 高度集成的便利功能,从每个细节中体现
- 还有很多细节,这里不一一列举,欢迎实际使用中体验:
- 还有很多细节,这里不一一列举,欢迎实际使用中体验:D
---
# 🖐🏻 免责声明
# 📝 使用条款
禁止将 DrissionPage 应用到任何可能会违反法律规定和道德约束的项目中。
友善使用 DrissionPage遵守蜘蛛协议禁止将 DrissionPage 用于任何可能有损他人的项目中。
如您选择使用 DrissionPage 即代表您遵守此协议,作者不承担任何由于您违反此协议带来任何的法律风险和损失。
同时,作者不对 DrissionPage 可能存在的缺陷导致的损失承担任何责任,一切后果由您承担。
允许任何人以个人身份使用或分发本项目源代码,但仅限于学习和合法非盈利目的。
个人或组织如未获得版权持有人授权,不得将本项目以源代码或二进制形式用于商业行为。
使用本项目需满足以下条款,如使用过程中出现违反任意一项条款的情形,授权自动失效。
- 禁止将DrissionPage应用到任何可能违反当地法律规定和道德约束的项目中
- 禁止将DrissionPage用于任何可能有损他人利益的项目中
- 禁止将DrissionPage用于攻击与骚扰行为
- 遵守Robots协议禁止将DrissionPage用于采集法律或系统Robots协议不允许的数据
使用DrissionPage发生的一切行为均由使用人自行负责。
因使用DrissionPage进行任何行为所产生的一切纠纷及后果均与版权持有人无关
版权持有人不承担任何使用DrissionPage带来的风险和损失。
版权持有人不对DrissionPage可能存在的缺陷导致的任何损失负任何责任。
---
# ☕ 请我喝咖啡
如果本项目对您有所帮助,不妨请作者我喝杯咖啡
作者是个人开发者,开发和写文档工作量较为繁重。
![](https://gitee.com/g1879/DrissionPageDocs/raw/master/static/img/code.jpg)
如果本项目对您有所帮助,不妨打赏一下作者
![](https://drissionpage.cn/code2.jpg)

View File

@ -1,34 +0,0 @@
<!DOCTYPE html>
<html lang="zh-cn">
<head>
<meta charset="UTF-8">
<meta content="code-wXFsIBcC45" name="baidu-site-verification"/>
<title>DrissionPage</title>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<meta content="Description" name="description">
<meta content="width=device-width, initial-scale=1.0, minimum-scale=1.0" name="viewport">
<meta content="DrissionPage,文档,使用文档,教程,用户手册,api" name="Keywords">
<!-- <link href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/vue.css" rel="stylesheet">-->
<!-- Theme: Simple Dark -->
<!-- <link href="scripts/theme-simple-dark.css" rel="stylesheet">-->
<!--
<link href="//unpkg.com/gitalk/dist/gitalk.css" rel="stylesheet">
<link rel="stylesheet" href="./ignore/font.css">
-->
<!-- <style>
/* body {font-family: PingFang; } */
</style> -->
</head>
<body>
<div id="app"><a href="http://g1879.gitee.io/drissionpagedocs">DrissionPage文档</a></div>
<script>
window.location.replace("http://g1879.gitee.io/drissionpagedocs");
</script>
</body>
</html>

View File

@ -1,8 +1,8 @@
requests
lxml
cssselect
DownloadKit>=2.0.0
DownloadKit>=2.0.7
websocket-client
click
tldextract
tldextract>=3.4.4
psutil

View File

@ -13,9 +13,9 @@ setup(
description="Python based web automation tool. It can control the browser and send and receive data packets.",
long_description=long_description,
long_description_content_type="text/markdown",
license="BSD",
# license="BSD",
keywords="DrissionPage",
url="https://gitee.com/g1879/DrissionPage",
url="https://DrissionPage.cn",
include_package_data=True,
packages=find_packages(),
zip_safe=False,
@ -23,17 +23,17 @@ setup(
'lxml',
'requests',
'cssselect',
'DownloadKit>=2.0.0',
'DownloadKit>=2.0.7',
'websocket-client',
'click',
'tldextract',
'tldextract>=3.4.4',
'psutil'
],
classifiers=[
"Programming Language :: Python :: 3.6",
"Development Status :: 4 - Beta",
"Topic :: Utilities",
"License :: OSI Approved :: BSD License",
# "License :: OSI Approved :: BSD License",
],
python_requires='>=3.6',
entry_points={