继续修改备注

This commit is contained in:
g1879 2024-08-15 17:47:41 +08:00
parent 8a6e225e02
commit 45e64adc1e
13 changed files with 520 additions and 427 deletions

View File

@ -34,7 +34,6 @@ __FRAME_ELEMENT__ = ('iframe', 'frame')
class ChromiumElement(DrissionElement): class ChromiumElement(DrissionElement):
"""控制浏览器元素的对象"""
def __init__(self, owner, node_id=None, obj_id=None, backend_id=None): def __init__(self, owner, node_id=None, obj_id=None, backend_id=None):
super().__init__(owner) super().__init__(owner)
@ -423,7 +422,7 @@ class ChromiumElement(DrissionElement):
def s_ele(self, locator=None, index=1, timeout=None): def s_ele(self, locator=None, index=1, timeout=None):
return (make_session_ele(self, locator, index=index, method='s_ele()') return (make_session_ele(self, locator, index=index, method='s_ele()')
if self.ele(locator, index=index, timeout=timeout) if self.ele(locator, index=index, timeout=timeout)
else NoneElement(self, method='s_ele()', args={'locator': locator, 'index': index})) else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index}))
def s_eles(self, locator=None, timeout=None): def s_eles(self, locator=None, timeout=None):
return (make_session_ele(self, locator, index=None) return (make_session_ele(self, locator, index=None)
@ -686,7 +685,6 @@ class ChromiumElement(DrissionElement):
class ShadowRoot(BaseElement): class ShadowRoot(BaseElement):
"""ShadowRoot是用于处理ShadowRoot的类使用方法和ChromiumElement基本一致"""
def __init__(self, parent_ele, obj_id=None, backend_id=None): def __init__(self, parent_ele, obj_id=None, backend_id=None):
super().__init__(parent_ele.owner) super().__init__(parent_ele.owner)
@ -847,7 +845,7 @@ class ShadowRoot(BaseElement):
def s_ele(self, locator=None, index=1, timeout=None): def s_ele(self, locator=None, index=1, timeout=None):
return (make_session_ele(self, locator, index=index, method='s_ele()') return (make_session_ele(self, locator, index=index, method='s_ele()')
if self.ele(locator, index=index, timeout=timeout) if self.ele(locator, index=index, timeout=timeout)
else NoneElement(self, method='s_ele()', args={'locator': locator, 'index': index})) else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index}))
def s_eles(self, locator, timeout=None): def s_eles(self, locator, timeout=None):
return (make_session_ele(self, locator, index=None) return (make_session_ele(self, locator, index=None)

View File

@ -11,7 +11,8 @@ from .._base.base import BasePage
class NoneElement(object): class NoneElement(object):
def __init__(self, page: BasePage = None, def __init__(self,
page: BasePage = None,
method: str = None, method: str = None,
args: dict = None): args: dict = None):
""" """

View File

@ -341,8 +341,6 @@ sys = system().lower()
def keys_to_typing(value): def keys_to_typing(value):
"""把要输入的内容连成字符串,去掉其中 ctrl 等键。
返回的modifier表示是否有按下组合键"""
typing = [] typing = []
modifier = 0 modifier = 0
for val in value: for val in value:
@ -361,12 +359,6 @@ def keys_to_typing(value):
def make_input_data(modifiers, key, key_up=False): def make_input_data(modifiers, key, key_up=False):
"""
:param modifiers: 功能键设置
:param key: 按键字符
:param key_up: 是否提起
:return: None
"""
data = keyDefinitions.get(key) data = keyDefinitions.get(key)
if not data: if not data:
return None return None
@ -414,7 +406,6 @@ def make_input_data(modifiers, key, key_up=False):
def send_key(page, modifier, key): def send_key(page, modifier, key):
"""发送一个字,在键盘中的字符触发按键,其它直接发送文本"""
data = make_input_data(modifier, key) data = make_input_data(modifier, key)
if data: if data:
page._run_cdp('Input.dispatchKeyEvent', **data) page._run_cdp('Input.dispatchKeyEvent', **data)
@ -426,11 +417,6 @@ def send_key(page, modifier, key):
def input_text_or_keys(page, text_or_keys): def input_text_or_keys(page, text_or_keys):
"""输入文本也可输入组合键组合键用tuple形式输入
:param page: ChromiumBase对象
:param text_or_keys: 文本值或按键组合
:return: self
"""
if not isinstance(text_or_keys, (tuple, list)): if not isinstance(text_or_keys, (tuple, list)):
text_or_keys = (str(text_or_keys),) text_or_keys = (str(text_or_keys),)
modifier, text_or_keys = keys_to_typing(text_or_keys) modifier, text_or_keys = keys_to_typing(text_or_keys)

View File

@ -85,13 +85,38 @@ keyDefinitions: dict = ...
modifierBit: dict = ... modifierBit: dict = ...
def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]: ... def keys_to_typing(value: Union[str, int, list, tuple]) -> Tuple[int, str]:
"""把要输入的内容连成字符串,去掉其中 ctrl 等键。
返回的modifier表示是否有按下组合键"""
...
def make_input_data(modifiers: int, key: str, key_up: bool = False) -> dict: ... def make_input_data(modifiers: int,
key: str,
key_up: bool = False) -> dict:
"""
:param modifiers: 功能键设置
:param key: 按键字符
:param key_up: 是否提起
:return: None
"""
...
def send_key(page: ChromiumBase, modifier: int, key: str) -> None: ... def send_key(page: ChromiumBase, modifier: int, key: str) -> None:
"""发送一个字,在键盘中的字符触发按键,其它直接发送文本
:param page: 动作所在页面
:param modifier: 功能键信息
:param key: 要是输入的按键
:return: None
"""
...
def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None: ... def input_text_or_keys(page: ChromiumBase, text_or_keys: Any) -> None:
"""输入文本也可输入组合键组合键用tuple形式输入
:param page: ChromiumBase对象
:param text_or_keys: 文本值或按键组合
:return: self
"""
...

View File

@ -10,10 +10,6 @@ from .by import By
def locator_to_tuple(loc): def locator_to_tuple(loc):
"""解析定位字符串生成dict格式数据
:param loc: 待处理的字符串
:return: 格式 {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]}
"""
loc = _preprocess(loc) loc = _preprocess(loc)
# 多属性查找 # 多属性查找
@ -84,18 +80,11 @@ def _get_arg(text) -> list:
def is_loc(text): def is_loc(text):
"""返回text是否定位符"""
return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=', return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=',
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c=')) 'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def get_loc(loc, translate_css=False, css_mode=False): def get_loc(loc, translate_css=False, css_mode=False):
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath用于相对定位
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组
"""
if isinstance(loc, tuple): if isinstance(loc, tuple):
loc = translate_css_loc(loc) if css_mode else translate_loc(loc) loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
@ -118,10 +107,6 @@ def get_loc(loc, translate_css=False, css_mode=False):
def str_to_xpath_loc(loc): def str_to_xpath_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
loc_by = 'xpath' loc_by = 'xpath'
loc = _preprocess(loc) loc = _preprocess(loc)
@ -173,10 +158,6 @@ def str_to_xpath_loc(loc):
def str_to_css_loc(loc): def str_to_css_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
loc_by = 'css selector' loc_by = 'css selector'
loc = _preprocess(loc) loc = _preprocess(loc)
@ -444,10 +425,6 @@ def _make_single_css_str(tag: str, text: str) -> tuple:
def translate_loc(loc): def translate_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2: if len(loc) != 2:
raise ValueError('定位符长度必须为2。') raise ValueError('定位符长度必须为2。')
@ -486,10 +463,6 @@ def translate_loc(loc):
def translate_css_loc(loc): def translate_css_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2: if len(loc) != 2:
raise ValueError('定位符长度必须为2。') raise ValueError('定位符长度必须为2。')

View File

@ -8,25 +8,66 @@
from typing import Union from typing import Union
def locator_to_tuple(loc: str) -> dict: ... def locator_to_tuple(loc: str) -> dict:
"""解析定位字符串生成dict格式数据
:param loc: 待处理的字符串
:return: 格式 {'and': bool, 'args': ['属性名称', '匹配方式', '属性值', 是否否定]}
"""
...
def is_loc(text: str) -> bool: ... def is_loc(text: str) -> bool:
"""返回text是否定位符"""
...
def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool = False) -> tuple: ... def get_loc(loc: Union[tuple, str],
translate_css: bool = False,
css_mode: bool = False) -> tuple:
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath用于相对定位
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组
"""
...
def str_to_xpath_loc(loc: str) -> tuple: ... def str_to_xpath_loc(loc: str) -> tuple:
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
...
def str_to_css_loc(loc: str) -> tuple: ... def str_to_css_loc(loc: str) -> tuple:
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
...
def translate_loc(loc: tuple) -> tuple: ... def translate_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
...
def translate_css_loc(loc: tuple) -> tuple: ... def translate_css_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
...
def css_trans(txt: str) -> str: ... def css_trans(txt: str) -> str:
"""css字符串中特殊字符转义
:param txt: 要处理的文本
:return: 处理后的文本
"""
...

View File

@ -24,9 +24,6 @@ class PortFinder(object):
checked_paths = set() checked_paths = set()
def __init__(self, path=None): def __init__(self, path=None):
"""
:param path: 临时文件保存路径为None时使用系统临时文件夹
"""
tmp = Path(path) if path else Path(gettempdir()) / 'DrissionPage' tmp = Path(path) if path else Path(gettempdir()) / 'DrissionPage'
self.tmp_dir = tmp / 'autoPortData' self.tmp_dir = tmp / 'autoPortData'
self.tmp_dir.mkdir(parents=True, exist_ok=True) self.tmp_dir.mkdir(parents=True, exist_ok=True)
@ -37,10 +34,6 @@ class PortFinder(object):
PortFinder.checked_paths.add(str(self.tmp_dir.absolute())) PortFinder.checked_paths.add(str(self.tmp_dir.absolute()))
def get_port(self, scope=None): def get_port(self, scope=None):
"""查找一个可用端口
:param scope: 指定端口范围不含最后的数字为None则使用[9600-59600)
:return: 可以使用的端口和用户文件夹路径组成的元组
"""
from random import randint from random import randint
with PortFinder.lock: with PortFinder.lock:
if PortFinder.prev_time and perf_counter() - PortFinder.prev_time > 60: if PortFinder.prev_time and perf_counter() - PortFinder.prev_time > 60:
@ -67,11 +60,6 @@ class PortFinder(object):
def port_is_using(ip, port): def port_is_using(ip, port):
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
from socket import socket, AF_INET, SOCK_STREAM from socket import socket, AF_INET, SOCK_STREAM
s = socket(AF_INET, SOCK_STREAM) s = socket(AF_INET, SOCK_STREAM)
s.settimeout(.1) s.settimeout(.1)
@ -81,11 +69,6 @@ def port_is_using(ip, port):
def clean_folder(folder_path, ignore=None): def clean_folder(folder_path, ignore=None):
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
ignore = [] if not ignore else ignore ignore = [] if not ignore else ignore
p = Path(folder_path) p = Path(folder_path)
@ -98,11 +81,6 @@ def clean_folder(folder_path, ignore=None):
def show_or_hide_browser(page, hide=True): def show_or_hide_browser(page, hide=True):
"""执行显示或隐藏浏览器窗口
:param page: ChromePage对象
:param hide: 是否隐藏
:return: None
"""
if not page.browser.address.startswith(('127.0.0.1', 'localhost')): if not page.browser.address.startswith(('127.0.0.1', 'localhost')):
return return
@ -125,11 +103,6 @@ def show_or_hide_browser(page, hide=True):
def get_browser_progress_id(progress, address): def get_browser_progress_id(progress, address):
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
if progress: if progress:
return progress.pid return progress.pid
@ -148,11 +121,6 @@ def get_browser_progress_id(progress, address):
def get_hwnds_from_pid(pid, title): def get_hwnds_from_pid(pid, title):
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
try: try:
from win32gui import IsWindow, GetWindowText, EnumWindows from win32gui import IsWindow, GetWindowText, EnumWindows
from win32process import GetWindowThreadProcessId from win32process import GetWindowThreadProcessId
@ -172,12 +140,6 @@ def get_hwnds_from_pid(pid, title):
def wait_until(function, kwargs=None, timeout=10): def wait_until(function, kwargs=None, timeout=10):
"""等待传入的方法返回值不为假
:param function: 要执行的方法
:param kwargs: 方法参数
:param timeout: 超时时间
:return: 执行结果超时抛出TimeoutError
"""
if kwargs is None: if kwargs is None:
kwargs = {} kwargs = {}
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
@ -190,22 +152,12 @@ def wait_until(function, kwargs=None, timeout=10):
def configs_to_here(save_name=None): def configs_to_here(save_name=None):
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
om = OptionsManager('default') om = OptionsManager('default')
save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini' save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini'
om.save(save_name) om.save(save_name)
def raise_error(result, ignore=None, user=False): def raise_error(result, ignore=None, user=False):
"""抛出error对应报错
:param result: 包含error的dict
:param ignore: 要忽略的错误
:param user: 是否用户调用的
:return: None
"""
error = result['error'] error = result['error']
if error in ('Cannot find context with specified id', 'Inspected target navigated or closed', if error in ('Cannot find context with specified id', 'Inspected target navigated or closed',
'No frame with given id found'): 'No frame with given id found'):

View File

@ -20,31 +20,89 @@ class PortFinder(object):
tmp_dir: Path = ... tmp_dir: Path = ...
checked_paths: set = ... checked_paths: set = ...
def __init__(self, path: Union[str, Path] = None): ... def __init__(self, path: Union[str, Path] = None):
"""
:param path: 临时文件保存路径为None时使用系统临时文件夹
"""
...
@staticmethod @staticmethod
def get_port(scope: Tuple[int, int] = None) -> Tuple[int, str]: ... def get_port(scope: Tuple[int, int] = None) -> Tuple[int, str]:
"""查找一个可用端口
:param scope: 指定端口范围不含最后的数字为None则使用[9600-59600)
:return: 可以使用的端口和用户文件夹路径组成的元组
"""
...
def port_is_using(ip: str, port: Union[str, int]) -> bool: ... def port_is_using(ip: str, port: Union[str, int]) -> bool:
"""检查端口是否被占用
:param ip: 浏览器地址
:param port: 浏览器端口
:return: bool
"""
...
def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None:
"""清空一个文件夹除了ignore里的文件和文件夹
:param folder_path: 要清空的文件夹路径
:param ignore: 忽略列表
:return: None
"""
...
def show_or_hide_browser(page: ChromiumBase, hide: bool = True) -> None: ... def show_or_hide_browser(page: ChromiumBase, hide: bool = True) -> None:
"""执行显示或隐藏浏览器窗口
:param page: ChromePage对象
:param hide: 是否隐藏
:return: None
"""
...
def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]:
"""获取浏览器进程id
:param progress: 已知的进程对象没有时传入None
:param address: 浏览器管理地址含端口
:return: 进程id或None
"""
...
def get_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... def get_hwnds_from_pid(pid: Union[str, int], title: str) -> list:
"""通过PID查询句柄ID
:param pid: 进程id
:param title: 窗口标题
:return: 进程句柄组成的列表
"""
...
def wait_until(function: callable, kwargs: dict = None, timeout: float = 10): ... def wait_until(function: callable, kwargs: dict = None, timeout: float = 10):
"""等待传入的方法返回值不为假
:param function: 要执行的方法
:param kwargs: 方法参数
:param timeout: 超时时间
:return: 执行结果超时抛出TimeoutError
"""
...
def configs_to_here(file_name: Union[Path, str] = None) -> None: ... def configs_to_here(file_name: Union[Path, str] = None) -> None:
"""把默认ini文件复制到当前目录
:param save_name: 指定文件名为None则命名为'dp_configs.ini'
:return: None
"""
...
def raise_error(result: dict, ignore=None, user: bool = False) -> None: ... def raise_error(result: dict, ignore=None, user: bool = False) -> None:
"""抛出error对应报错
:param result: 包含error的dict
:param ignore: 要忽略的错误
:param user: 是否用户调用的
:return: None
"""
...

View File

@ -16,10 +16,6 @@ from requests.structures import CaseInsensitiveDict
def get_ele_txt(e): def get_ele_txt(e):
"""获取元素内所有文本
:param e: 元素对象
:return: 元素内所有文本
"""
# 前面无须换行的元素 # 前面无须换行的元素
nowrap_list = ('br', 'sub', 'sup', 'em', 'strong', 'a', 'font', 'b', 'span', 's', 'i', 'del', 'ins', 'img', 'td', nowrap_list = ('br', 'sub', 'sup', 'em', 'strong', 'a', 'font', 'b', 'span', 's', 'i', 'del', 'ins', 'img', 'td',
'th', 'abbr', 'bdi', 'bdo', 'cite', 'code', 'data', 'dfn', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby', 'th', 'abbr', 'bdi', 'bdo', 'cite', 'code', 'data', 'dfn', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby',
@ -106,20 +102,10 @@ def get_ele_txt(e):
def format_html(text): def format_html(text):
"""处理html编码字符
:param text: html文本
:return: 格式化后的html文本
"""
return unescape(text).replace('\xa0', ' ') if text else text return unescape(text).replace('\xa0', ' ') if text else text
def location_in_viewport(page, loc_x, loc_y): def location_in_viewport(page, loc_x, loc_y):
"""判断给定的坐标是否在视口中 |n
:param page: ChromePage对象
:param loc_x: 页面绝对坐标x
:param loc_y: 页面绝对坐标y
:return: bool
"""
js = f'''function(){{let x = {loc_x}; let y = {loc_y}; js = f'''function(){{let x = {loc_x}; let y = {loc_y};
const scrollLeft = document.documentElement.scrollLeft; const scrollLeft = document.documentElement.scrollLeft;
const scrollTop = document.documentElement.scrollTop; const scrollTop = document.documentElement.scrollTop;
@ -131,13 +117,6 @@ def location_in_viewport(page, loc_x, loc_y):
def offset_scroll(ele, offset_x, offset_y): def offset_scroll(ele, offset_x, offset_y):
"""接收元素及偏移坐标,把坐标滚动到页面中间,返回该点坐标
有偏移量时以元素左上角坐标为基准没有时以click_point为基准
:param ele: 元素对象
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:return: 相对坐标
"""
loc_x, loc_y = ele.rect.location loc_x, loc_y = ele.rect.location
cp_x, cp_y = ele.rect.click_point cp_x, cp_y = ele.rect.click_point
lx = loc_x + offset_x if offset_x else cp_x lx = loc_x + offset_x if offset_x else cp_x
@ -154,11 +133,6 @@ def offset_scroll(ele, offset_x, offset_y):
def make_absolute_link(link, baseURI=None): def make_absolute_link(link, baseURI=None):
"""获取绝对url
:param link: 超链接
:param baseURI: 页面或iframe的url
:return: 绝对链接
"""
if not link: if not link:
return link return link
@ -182,7 +156,6 @@ def make_absolute_link(link, baseURI=None):
def is_js_func(func): def is_js_func(func):
"""检查文本是否js函数"""
func = func.strip() func = func.strip()
if (func.startswith('function') or func.startswith('async ')) and func.endswith('}'): if (func.startswith('function') or func.startswith('async ')) and func.endswith('}'):
return True return True
@ -192,12 +165,6 @@ def is_js_func(func):
def get_blob(page, url, as_bytes=True): def get_blob(page, url, as_bytes=True):
"""获取知道blob资源
:param page: 资源所在页面对象
:param url: 资源url
:param as_bytes: 是否以字节形式返回
:return: 资源内容
"""
if not url.startswith('blob'): if not url.startswith('blob'):
raise TypeError('该链接非blob类型。') raise TypeError('该链接非blob类型。')
js = """ js = """
@ -227,14 +194,6 @@ def get_blob(page, url, as_bytes=True):
def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None): def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param tab: Tab或Page对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
if name: if name:
if name.endswith('.pdf'): if name.endswith('.pdf'):
name = name[:-4] name = name[:-4]
@ -258,12 +217,6 @@ def save_page(tab, path=None, name=None, as_pdf=False, kwargs=None):
def get_mhtml(page, path=None, name=None): def get_mhtml(page, path=None, name=None):
"""把当前页面保存为mhtml文件如果path和name参数都为None只返回mhtml文本
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:return: mhtml文本
"""
r = page._run_cdp('Page.captureSnapshot')['data'] r = page._run_cdp('Page.captureSnapshot')['data']
if path is None and name is None: if path is None and name is None:
return r return r
@ -277,13 +230,6 @@ def get_mhtml(page, path=None, name=None):
def get_pdf(page, path=None, name=None, kwargs=None): def get_pdf(page, path=None, name=None, kwargs=None):
"""把当前页面保存为pdf文件如果path和name参数都为None只返回字节
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param kwargs: pdf生成参数
:return: pdf文本
"""
if not kwargs: if not kwargs:
kwargs = {} kwargs = {}
kwargs['transferMode'] = 'ReturnAsBase64' kwargs['transferMode'] = 'ReturnAsBase64'
@ -307,13 +253,6 @@ def get_pdf(page, path=None, name=None, kwargs=None):
def tree(ele_or_page, text=False, show_js=False, show_css=False): def tree(ele_or_page, text=False, show_js=False, show_css=False):
"""把页面或元素对象DOM结构打印出来
:param ele_or_page: 页面或元素对象
:param text: 是否打印文本输入数字可指定打印文本长度上线
:param show_js: 打印文本时是否包含<script>内文本text参数为False时无效
:param show_css: 打印文本时是否包含<style>内文本text参数为False时无效
:return: None
"""
def _tree(obj, last_one=True, body=''): def _tree(obj, last_one=True, body=''):
list_ele = obj.children() list_ele = obj.children()
@ -362,10 +301,6 @@ def tree(ele_or_page, text=False, show_js=False, show_css=False):
def format_headers(txt): def format_headers(txt):
"""从浏览器复制的文本生成dict格式headers文本用换行分隔
:param txt: 从浏览器复制的原始文本格式headers
:return: dict格式headers
"""
if isinstance(txt, (dict, CaseInsensitiveDict)): if isinstance(txt, (dict, CaseInsensitiveDict)):
for k, v in txt.items(): for k, v in txt.items():
if k in (':method', ':scheme', ':authority', ':path'): if k in (':method', ':scheme', ':authority', ':path'):

View File

@ -15,49 +15,126 @@ from .._pages.chromium_page import ChromiumPage
from .._pages.tabs import ChromiumTab from .._pages.tabs import ChromiumTab
def get_ele_txt(e: DrissionElement) -> str: ... def get_ele_txt(e: DrissionElement) -> str:
"""获取元素内所有文本
:param e: 元素对象
:return: 元素内所有文本
"""
...
def format_html(text: str) -> str: ... def format_html(text: str) -> str:
"""处理html编码字符
:param text: html文本
:return: 格式化后的html文本
"""
...
def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool: ... def location_in_viewport(page: ChromiumBase, loc_x: float, loc_y: float) -> bool:
"""判断给定的坐标是否在视口中 |n
:param page: ChromePage对象
:param loc_x: 页面绝对坐标x
:param loc_y: 页面绝对坐标y
:return: bool
"""
...
def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> Tuple[int, int]: ... def offset_scroll(ele: ChromiumElement, offset_x: float, offset_y: float) -> Tuple[int, int]:
"""接收元素及偏移坐标,把坐标滚动到页面中间,返回该点坐标
有偏移量时以元素左上角坐标为基准没有时以click_point为基准
:param ele: 元素对象
:param offset_x: 偏移量x
:param offset_y: 偏移量y
:return: 相对坐标
"""
...
def make_absolute_link(link: str, baseURI: str = None) -> str: ... def make_absolute_link(link: str, baseURI: str = None) -> str:
"""获取绝对url
:param link: 超链接
:param baseURI: 页面或iframe的url
:return: 绝对链接
"""
...
def is_js_func(func: str) -> bool: ... def is_js_func(func: str) -> bool:
"""检查文本是否js函数"""
...
def get_blob(page: ChromiumBase, url: str, as_bytes: bool = True) -> bytes: ... def get_blob(page: ChromiumBase, url: str, as_bytes: bool = True) -> bytes:
"""获取知道blob资源
:param page: 资源所在页面对象
:param url: 资源url
:param as_bytes: 是否以字节形式返回
:return: 资源内容
"""
...
def save_page(tab: Union[ChromiumPage, ChromiumTab], def save_page(tab: Union[ChromiumPage, ChromiumTab],
path: Union[Path, str, None] = None, path: Union[Path, str, None] = None,
name: Optional[str] = None, name: Optional[str] = None,
as_pdf: bool = False, as_pdf: bool = False,
kwargs: dict = None) -> Union[bytes, str]: ... kwargs: dict = None) -> Union[bytes, str]:
"""把当前页面保存为文件如果path和name参数都为None只返回文本
:param tab: Tab或Page对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param as_pdf: 为Ture保存为pdf否则为mhtml且忽略kwargs参数
:param kwargs: pdf生成参数
:return: as_pdf为True时返回bytes否则返回文件文本
"""
...
def get_mhtml(page: Union[ChromiumPage, ChromiumTab], def get_mhtml(page: Union[ChromiumPage, ChromiumTab],
path: Optional[Path] = None, path: Optional[Path] = None,
name: Optional[str] = None) -> Union[bytes, str]: ... name: Optional[str] = None) -> Union[bytes, str]:
"""把当前页面保存为mhtml文件如果path和name参数都为None只返回mhtml文本
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:return: mhtml文本
"""
...
def get_pdf(page: Union[ChromiumPage, ChromiumTab], def get_pdf(page: Union[ChromiumPage, ChromiumTab],
path: Optional[Path] = None, path: Optional[Path] = None,
name: Optional[str] = None, name: Optional[str] = None,
kwargs: dict = None) -> Union[bytes, str]: ... kwargs: dict = None) -> Union[bytes, str]:
"""把当前页面保存为pdf文件如果path和name参数都为None只返回字节
:param page: 要保存的页面对象
:param path: 保存路径为None且name不为None时保存在当前路径
:param name: 文件名为None且path不为None时用title属性值
:param kwargs: pdf生成参数
:return: pdf文本
"""
...
def tree(ele_or_page: BaseParser, def tree(ele_or_page: BaseParser,
text: Union[int, bool] = False, text: Union[int, bool] = False,
show_js: bool = False, show_js: bool = False,
show_css: bool = False) -> None: ... show_css: bool = False) -> None:
"""把页面或元素对象DOM结构打印出来
:param ele_or_page: 页面或元素对象
:param text: 是否打印文本输入数字可指定打印文本长度上线
:param show_js: 打印文本时是否包含<script>内文本text参数为False时无效
:param show_css: 打印文本时是否包含<style>内文本text参数为False时无效
:return: None
"""
...
def format_headers(txt: str) -> dict: ... def format_headers(txt: str) -> dict:
"""从浏览器复制的文本生成dict格式headers文本用换行分隔
:param txt: 从浏览器复制的原始文本格式headers
:return: dict格式headers
"""
...

View File

@ -42,11 +42,6 @@ class ChromiumFrame(ChromiumBase):
return r return r
def __init__(self, owner, ele, info=None): def __init__(self, owner, ele, info=None):
"""
:param owner: frame所在的页面对象
:param ele: frame所在元素
:param info: frame所在元素信息
"""
if Settings.singleton_tab_obj and hasattr(self, '_created'): if Settings.singleton_tab_obj and hasattr(self, '_created'):
return return
self._created = True self._created = True
@ -73,13 +68,6 @@ class ChromiumFrame(ChromiumBase):
self._type = 'ChromiumFrame' self._type = 'ChromiumFrame'
def __call__(self, locator, index=1, timeout=None): def __call__(self, locator, index=1, timeout=None):
"""在内部查找元素
ele2 = ele1('@id=ele_id')
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: ChromiumElement对象或属性文本
"""
return self.ele(locator, index=index, timeout=timeout) return self.ele(locator, index=index, timeout=timeout)
def __eq__(self, other): def __eq__(self, other):
@ -90,7 +78,6 @@ class ChromiumFrame(ChromiumBase):
return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>' return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>'
def _d_set_runtime_settings(self): def _d_set_runtime_settings(self):
"""重写设置浏览器运行参数方法"""
if not hasattr(self, '_timeouts'): if not hasattr(self, '_timeouts'):
self._timeouts = copy(self._target_page.timeouts) self._timeouts = copy(self._target_page.timeouts)
self.retry_times = self._target_page.retry_times self.retry_times = self._target_page.retry_times
@ -99,10 +86,6 @@ class ChromiumFrame(ChromiumBase):
self._load_mode = self._target_page._load_mode if not self._is_diff_domain else 'normal' self._load_mode = self._target_page._load_mode if not self._is_diff_domain else 'normal'
def _driver_init(self, target_id, is_init=True): def _driver_init(self, target_id, is_init=True):
"""避免出现服务器500错误
:param target_id: 要跳转到的target id
:return: None
"""
try: try:
super()._driver_init(target_id) super()._driver_init(target_id)
except: except:
@ -113,7 +96,6 @@ class ChromiumFrame(ChromiumBase):
self._driver.set_callback('Page.frameDetached', self._onFrameDetached, immediate=True) self._driver.set_callback('Page.frameDetached', self._onFrameDetached, immediate=True)
def _reload(self): def _reload(self):
"""重新获取document"""
self._is_loading = True self._is_loading = True
# d_debug = self.driver._debug # d_debug = self.driver._debug
self._reloading = True self._reloading = True
@ -159,10 +141,6 @@ class ChromiumFrame(ChromiumBase):
self._reloading = False self._reloading = False
def _get_document(self, timeout=10): def _get_document(self, timeout=10):
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._is_reading: if self._is_reading:
return return
@ -193,11 +171,11 @@ class ChromiumFrame(ChromiumBase):
self._is_reading = False self._is_reading = False
def _onInspectorDetached(self, **kwargs): def _onInspectorDetached(self, **kwargs):
"""异域转同域或退出""" # 异域转同域或退出
self._reload() self._reload()
def _onFrameDetached(self, **kwargs): def _onFrameDetached(self, **kwargs):
"""同域变异域""" # 同域变异域
self.browser._frames.pop(kwargs['frameId'], None) self.browser._frames.pop(kwargs['frameId'], None)
ChromiumFrame._Frames.pop(kwargs['frameId'], None) ChromiumFrame._Frames.pop(kwargs['frameId'], None)
if kwargs['frameId'] == self._frame_id: if kwargs['frameId'] == self._frame_id:
@ -206,7 +184,6 @@ class ChromiumFrame(ChromiumBase):
# ----------挂件---------- # ----------挂件----------
@property @property
def scroll(self): def scroll(self):
"""返回用于滚动的对象"""
self.wait.doc_loaded() self.wait.doc_loaded()
if self._scroll is None: if self._scroll is None:
self._scroll = FrameScroller(self) self._scroll = FrameScroller(self)
@ -214,35 +191,30 @@ class ChromiumFrame(ChromiumBase):
@property @property
def set(self): def set(self):
"""返回用于设置的对象"""
if self._set is None: if self._set is None:
self._set = ChromiumFrameSetter(self) self._set = ChromiumFrameSetter(self)
return self._set return self._set
@property @property
def states(self): def states(self):
"""返回用于获取状态信息的对象"""
if self._states is None: if self._states is None:
self._states = FrameStates(self) self._states = FrameStates(self)
return self._states return self._states
@property @property
def wait(self): def wait(self):
"""返回用于等待的对象"""
if self._wait is None: if self._wait is None:
self._wait = FrameWaiter(self) self._wait = FrameWaiter(self)
return self._wait return self._wait
@property @property
def rect(self): def rect(self):
"""返回获取坐标和大小的对象"""
if self._rect is None: if self._rect is None:
self._rect = FrameRect(self) self._rect = FrameRect(self)
return self._rect return self._rect
@property @property
def listen(self): def listen(self):
"""返回用于聆听数据包的对象"""
if self._listener is None: if self._listener is None:
self._listener = FrameListener(self) self._listener = FrameListener(self)
return self._listener return self._listener
@ -251,32 +223,26 @@ class ChromiumFrame(ChromiumBase):
@property @property
def _obj_id(self): def _obj_id(self):
"""返回frame元素的object id"""
return self.frame_ele._obj_id return self.frame_ele._obj_id
@property @property
def _node_id(self): def _node_id(self):
"""返回cdp中的node id"""
return self.frame_ele._node_id return self.frame_ele._node_id
@property @property
def owner(self): def owner(self):
"""返回所属页面对象"""
return self.frame_ele.owner return self.frame_ele.owner
@property @property
def frame_ele(self): def frame_ele(self):
"""返回总页面上的frame元素"""
return self._frame_ele return self._frame_ele
@property @property
def tag(self): def tag(self):
"""返回元素tag"""
return self.frame_ele.tag return self.frame_ele.tag
@property @property
def url(self): def url(self):
"""返回frame当前访问的url"""
try: try:
return self.doc_ele._run_js('return this.location.href;') return self.doc_ele._run_js('return this.location.href;')
except JavaScriptError: except JavaScriptError:
@ -284,7 +250,6 @@ class ChromiumFrame(ChromiumBase):
@property @property
def html(self): def html(self):
"""返回元素outerHTML文本"""
tag = self.tag tag = self.tag
out_html = self._target_page._run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML'] out_html = self._target_page._run_cdp('DOM.getOuterHTML', backendNodeId=self.frame_ele._backend_id)['outerHTML']
sign = search(rf'<{tag}.*?>', out_html, DOTALL).group(0) sign = search(rf'<{tag}.*?>', out_html, DOTALL).group(0)
@ -292,43 +257,35 @@ class ChromiumFrame(ChromiumBase):
@property @property
def inner_html(self): def inner_html(self):
"""返回元素innerHTML文本"""
return self.doc_ele._run_js('return this.documentElement.outerHTML;') return self.doc_ele._run_js('return this.documentElement.outerHTML;')
@property @property
def title(self): def title(self):
"""返回页面title"""
r = self._ele('t:title', raise_err=False) r = self._ele('t:title', raise_err=False)
return r.text if r else None return r.text if r else None
@property @property
def attrs(self): def attrs(self):
"""返回frame元素所有attribute属性"""
return self.frame_ele.attrs return self.frame_ele.attrs
@property @property
def active_ele(self): def active_ele(self):
"""返回当前焦点所在元素"""
return self.doc_ele._run_js('return this.activeElement;') return self.doc_ele._run_js('return this.activeElement;')
@property @property
def xpath(self): def xpath(self):
"""返回frame的xpath绝对路径"""
return self.frame_ele.xpath return self.frame_ele.xpath
@property @property
def css_path(self): def css_path(self):
"""返回frame的css selector绝对路径"""
return self.frame_ele.css_path return self.frame_ele.css_path
@property @property
def tab(self): def tab(self):
"""返回frame所在tab的id"""
return self._tab return self._tab
@property @property
def tab_id(self): def tab_id(self):
"""返回frame所在tab的id"""
return self.tab.tab_id return self.tab.tab_id
@property @property
@ -337,17 +294,14 @@ class ChromiumFrame(ChromiumBase):
@property @property
def sr(self): def sr(self):
"""返回iframe的shadow-root元素对象"""
return self.frame_ele.sr return self.frame_ele.sr
@property @property
def shadow_root(self): def shadow_root(self):
"""返回iframe的shadow-root元素对象"""
return self.frame_ele.sr return self.frame_ele.sr
@property @property
def _js_ready_state(self): def _js_ready_state(self):
"""返回当前页面加载状态,'loading' 'interactive' 'complete'"""
if self._is_diff_domain: if self._is_diff_domain:
return super()._js_ready_state return super()._js_ready_state
@ -363,165 +317,58 @@ class ChromiumFrame(ChromiumBase):
return None return None
def refresh(self): def refresh(self):
"""刷新frame页面"""
self.doc_ele._run_js('this.location.reload();') self.doc_ele._run_js('this.location.reload();')
def property(self, name): def property(self, name):
"""返回frame元素一个property属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
return self.frame_ele.property(name) return self.frame_ele.property(name)
def attr(self, name): def attr(self, name):
"""返回frame元素一个attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
return self.frame_ele.attr(name) return self.frame_ele.attr(name)
def remove_attr(self, name): def remove_attr(self, name):
"""删除frame元素attribute属性
:param name: 属性名
:return: None
"""
self.frame_ele.remove_attr(name) self.frame_ele.remove_attr(name)
def run_js(self, script, *args, as_expr=False, timeout=None): def run_js(self, script, *args, as_expr=False, timeout=None):
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
return self._run_js(script, *args, as_expr=as_expr, timeout=timeout) return self._run_js(script, *args, as_expr=as_expr, timeout=timeout)
def _run_js(self, script, *args, as_expr=False, timeout=None): def _run_js(self, script, *args, as_expr=False, timeout=None):
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
if script.startswith('this.scrollIntoView'): if script.startswith('this.scrollIntoView'):
return self.frame_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout) return self.frame_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout)
else: else:
return self.doc_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout) return self.doc_ele._run_js(script, *args, as_expr=as_expr, timeout=timeout)
def parent(self, level_or_loc=1, index=1, timeout=0): def parent(self, level_or_loc=1, index=1, timeout=0):
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:param timeout: 查找超时时间
:return: 上级元素对象
"""
return self.frame_ele.parent(level_or_loc, index, timeout=timeout) return self.frame_ele.parent(level_or_loc, index, timeout=timeout)
def prev(self, locator='', index=1, timeout=0, ele_only=True): def prev(self, locator='', index=1, timeout=0, ele_only=True):
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
return self.frame_ele.prev(locator, index, timeout, ele_only=ele_only) return self.frame_ele.prev(locator, index, timeout, ele_only=ele_only)
def next(self, locator='', index=1, timeout=0, ele_only=True): def next(self, locator='', index=1, timeout=0, ele_only=True):
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
return self.frame_ele.next(locator, index, timeout, ele_only=ele_only) return self.frame_ele.next(locator, index, timeout, ele_only=ele_only)
def before(self, locator='', index=1, timeout=None, ele_only=True): def before(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
return self.frame_ele.before(locator, index, timeout, ele_only=ele_only) return self.frame_ele.before(locator, index, timeout, ele_only=ele_only)
def after(self, locator='', index=1, timeout=None, ele_only=True): def after(self, locator='', index=1, timeout=None, ele_only=True):
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
return self.frame_ele.after(locator, index, timeout, ele_only=ele_only) return self.frame_ele.after(locator, index, timeout, ele_only=ele_only)
def prevs(self, locator='', timeout=0, ele_only=True): def prevs(self, locator='', timeout=0, ele_only=True):
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return self.frame_ele.prevs(locator, timeout, ele_only=ele_only) return self.frame_ele.prevs(locator, timeout, ele_only=ele_only)
def nexts(self, locator='', timeout=0, ele_only=True): def nexts(self, locator='', timeout=0, ele_only=True):
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
return self.frame_ele.nexts(locator, timeout, ele_only=ele_only) return self.frame_ele.nexts(locator, timeout, ele_only=ele_only)
def befores(self, locator='', timeout=None, ele_only=True): def befores(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return self.frame_ele.befores(locator, timeout, ele_only=ele_only) return self.frame_ele.befores(locator, timeout, ele_only=ele_only)
def afters(self, locator='', timeout=None, ele_only=True): def afters(self, locator='', timeout=None, ele_only=True):
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
return self.frame_ele.afters(locator, timeout, ele_only=ele_only) return self.frame_ele.afters(locator, timeout, ele_only=ele_only)
def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None): def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None):
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:return: 图片完整路径或字节文本
"""
return self.frame_ele.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64) return self.frame_ele.get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64)
def _get_screenshot(self, path=None, name=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None, def _get_screenshot(self, path=None, name=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None,
full_page=False, left_top=None, right_bottom=None, ele=None): full_page=False, left_top=None, right_bottom=None, ele=None):
"""实现截图
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:param ele: 为异域iframe内元素截图设置
:return: 图片完整路径或字节文本
"""
if not self._is_diff_domain: if not self._is_diff_domain:
return super().get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64, return super().get_screenshot(path=path, name=name, as_bytes=as_bytes, as_base64=as_base64,
full_page=full_page, left_top=left_top, right_bottom=right_bottom) full_page=full_page, left_top=left_top, right_bottom=right_bottom)
@ -587,14 +434,6 @@ class ChromiumFrame(ChromiumBase):
return r return r
def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None): def _find_elements(self, locator, timeout=None, index=1, relative=False, raise_err=None):
"""在frame内查找单个元素
:param locator: 定位符或元素对象
:param timeout: 查找超时时间
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象
"""
if isinstance(locator, ChromiumElement): if isinstance(locator, ChromiumElement):
return locator return locator
self.wait.doc_loaded() self.wait.doc_loaded()
@ -602,5 +441,4 @@ class ChromiumFrame(ChromiumBase):
raise_err=raise_err) if index is not None else self.doc_ele.eles(locator, timeout) raise_err=raise_err) if index is not None else self.doc_ele.eles(locator, timeout)
def _is_inner_frame(self): def _is_inner_frame(self):
"""返回当前frame是否同域"""
return self._frame_id in str(self._target_page._run_cdp('Page.getFrameTree')['frameTree']) return self._frame_id in str(self._target_page._run_cdp('Page.getFrameTree')['frameTree'])

View File

@ -6,7 +6,7 @@
@License : BSD 3-Clause. @License : BSD 3-Clause.
""" """
from pathlib import Path from pathlib import Path
from typing import Union, Tuple, List, Any, Optional from typing import Union, Tuple, List, Any, Optional, Literal
from .chromium_base import ChromiumBase from .chromium_base import ChromiumBase
from .tabs import ChromiumTab, MixTab from .tabs import ChromiumTab, MixTab
@ -22,76 +22,101 @@ from .._units.waiter import FrameWaiter
class ChromiumFrame(ChromiumBase): class ChromiumFrame(ChromiumBase):
_Frames: dict = ... _Frames: dict = ...
_target_page: Union[ChromiumTab, ChromiumFrame] = ...
_tab: Union[MixTab, ChromiumTab] = ...
_set: ChromiumFrameSetter = ...
_frame_ele: ChromiumElement = ...
_backend_id: int = ...
_doc_ele: ChromiumElement = ...
_is_diff_domain: bool = ...
doc_ele: ChromiumElement = ...
_states: FrameStates = ...
_reloading: bool = ...
_rect: Optional[FrameRect] = ...
_listener: FrameListener = ...
def __init__(self, def __init__(self,
owner: Union[ChromiumTab, ChromiumFrame], owner: Union[ChromiumTab, ChromiumFrame],
ele: ChromiumElement, ele: ChromiumElement,
info: dict = None): info: dict = None):
self._target_page: Union[ChromiumTab, ChromiumFrame] = ... """
self._tab: Union[MixTab, ChromiumTab] = ... :param owner: frame所在的页面对象
self._set: ChromiumFrameSetter = ... :param ele: frame所在元素
self._frame_ele: ChromiumElement = ... :param info: frame所在元素信息
self._backend_id: int = ... """
self._doc_ele: ChromiumElement = ... ...
self._is_diff_domain: bool = ...
self.doc_ele: ChromiumElement = ...
self._states: FrameStates = ...
self._reloading: bool = ...
self._rect: Optional[FrameRect] = ...
self._listener: FrameListener = ...
def __call__(self, def __call__(self,
locator: Union[Tuple[str, str], str], locator: Union[Tuple[str, str], str],
index: int = 1, index: int = 1,
timeout: float = None) -> ChromiumElement: ... timeout: float = None) -> ChromiumElement:
"""在内部查找元素
:param locator: 元素的定位信息可以是loc元组或查询字符串
:param index: 获取第几个从1开始可传入负数获取倒数第几个
:param timeout: 超时时间
:return: ChromiumElement对象或属性文本
"""
...
def __eq__(self, other: ChromiumFrame) -> bool: ... def __eq__(self, other: ChromiumFrame) -> bool: ...
def _check_alive(self) -> None: ...
def __repr__(self) -> str: ... def __repr__(self) -> str: ...
def _d_set_runtime_settings(self) -> None: ... def _d_set_runtime_settings(self) -> None:
"""重写设置浏览器运行参数方法"""
...
def _driver_init(self, target_id: str, is_init: bool = True) -> None: ... def _driver_init(self, target_id: str, is_init: bool = True) -> None:
"""避免出现服务器500错误
:param target_id: 要跳转到的target id
:return: None
"""
...
def _reload(self) -> None: ... def _reload(self) -> None:
"""重新获取document"""
...
def _get_document(self, timeout: float = 10) -> bool: ... def _get_document(self, timeout: float = 10) -> bool:
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
...
def _onFrameStoppedLoading(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...
def _onInspectorDetached(self, **kwargs): ... def _onInspectorDetached(self, **kwargs): ...
@property @property
def owner(self) -> ChromiumBase: ... def scroll(self) -> FrameScroller:
"""返回用于滚动的对象"""
...
@property @property
def frame_ele(self) -> ChromiumElement: ... def set(self) -> ChromiumFrameSetter:
"""返回用于设置的对象"""
...
@property @property
def tag(self) -> str: ... def states(self) -> FrameStates:
"""返回用于获取状态信息的对象"""
...
@property @property
def url(self) -> str: ... def wait(self) -> FrameWaiter:
"""返回用于等待的对象"""
...
@property @property
def html(self) -> str: ... def rect(self) -> FrameRect:
"""返回获取坐标和大小的对象"""
...
@property @property
def inner_html(self) -> str: ... def listen(self) -> FrameListener:
"""返回用于聆听数据包的对象"""
@property ...
def title(self) -> str: ...
@property
def attrs(self) -> dict: ...
@property
def rect(self) -> FrameRect: ...
@property
def listen(self) -> FrameListener: ...
@property @property
def _obj_id(self) -> str: ... def _obj_id(self) -> str: ...
@ -100,115 +125,276 @@ class ChromiumFrame(ChromiumBase):
def _node_id(self) -> int: ... def _node_id(self) -> int: ...
@property @property
def active_ele(self) -> ChromiumElement: ... def owner(self) -> ChromiumBase:
"""返回所属页面对象"""
...
@property @property
def xpath(self) -> str: ... def frame_ele(self) -> ChromiumElement:
"""返回总页面上的frame元素"""
...
@property @property
def css_path(self) -> str: ... def tag(self) -> str:
"""返回元素tag"""
...
@property @property
def scroll(self) -> FrameScroller: ... def url(self) -> str:
"""返回frame当前访问的url"""
...
@property @property
def set(self) -> ChromiumFrameSetter: ... def html(self) -> str:
"""返回元素outerHTML文本"""
...
@property @property
def states(self) -> FrameStates: ... def inner_html(self) -> str:
"""返回元素innerHTML文本"""
...
@property @property
def wait(self) -> FrameWaiter: ... def title(self) -> str:
"""返回页面title"""
...
@property @property
def tab(self) -> Union[ChromiumTab, MixTab]: ... def attrs(self) -> dict:
"""返回frame元素所有attribute属性"""
...
@property @property
def tab_id(self) -> str: ... def active_ele(self) -> ChromiumElement:
"""返回当前焦点所在元素"""
...
@property @property
def download_path(self) -> str: ... def xpath(self) -> str:
"""返回frame的xpath绝对路径"""
...
@property @property
def sr(self) -> Union[None, ShadowRoot]: ... def css_path(self) -> str:
"""返回frame的css selector绝对路径"""
...
@property @property
def shadow_root(self) -> Union[None, ShadowRoot]: ... def tab(self) -> Union[ChromiumTab, MixTab]:
"""返回frame所在的tab对象"""
...
def refresh(self) -> None: ... @property
def tab_id(self) -> str:
"""返回frame所在tab的id"""
...
def property(self, name: str) -> Union[str, None]: ... @property
def download_path(self) -> str:
"""返回下载文件保存路径"""
...
def attr(self, name: str) -> Union[str, None]: ... @property
def sr(self) -> Union[None, ShadowRoot]:
"""返回iframe的shadow-root元素对象"""
...
def remove_attr(self, name: str) -> None: ... @property
def shadow_root(self) -> Union[None, ShadowRoot]:
"""返回iframe的shadow-root元素对象"""
...
@property
def _js_ready_state(self)->Literal['loading', 'interactive', 'complete']:
"""返回当前页面加载状态"""
...
def refresh(self) -> None:
"""刷新frame页面"""
...
def property(self, name: str) -> Union[str, None]:
"""返回frame元素一个property属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
...
def attr(self, name: str) -> Union[str, None]:
"""返回frame元素一个attribute属性值
:param name: 属性名
:return: 属性值文本没有该属性返回None
"""
...
def remove_attr(self, name: str) -> None:
"""删除frame元素attribute属性
:param name: 属性名
:return: None
"""
...
def run_js(self, def run_js(self,
script: str, script: str,
*args, *args,
as_expr: bool = False, as_expr: bool = False,
timeout: float = None) -> Any: ... timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def _run_js(self, def _run_js(self,
script: str, script: str,
*args, *args,
as_expr: bool = False, as_expr: bool = False,
timeout: float = None) -> Any: ... timeout: float = None) -> Any:
"""运行javascript代码
:param script: js文本
:param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效
:param timeout: js超时时间为None则使用页面timeouts.script设置
:return: 运行的结果
"""
...
def parent(self, def parent(self,
level_or_loc: Union[Tuple[str, str], str, int] = 1, level_or_loc: Union[Tuple[str, str], str, int] = 1,
index: int = 1, index: int = 1,
timeout: float = 0) -> ChromiumElement: ... timeout: float = 0) -> ChromiumElement:
"""返回上面某一级父元素,可指定层数或用查询语法定位
:param level_or_loc: 第几级父元素1开始或定位符
:param index: 当level_or_loc传入定位符使用此参数选择第几个结果1开始
:param timeout: 查找超时时间
:return: 上级元素对象
"""
...
def prev(self, def prev(self,
locator: Union[Tuple[str, str], str, int] = '', locator: Union[Tuple[str, str], str, int] = '',
index: int = 1, index: int = 1,
timeout: float = 0, timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ... ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
...
def next(self, def next(self,
locator: Union[Tuple[str, str], str, int] = '', locator: Union[Tuple[str, str], str, int] = '',
index: int = 1, index: int = 1,
timeout: float = 0, timeout: float = 0,
ele_only: bool = True) -> Union[ChromiumElement, str]: ... ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点
"""
...
def before(self, def before(self,
locator: Union[Tuple[str, str], str, int] = '', locator: Union[Tuple[str, str], str, int] = '',
index: int = 1, index: int = 1,
timeout: float = None, timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ... ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 前面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的某个元素或节点
"""
...
def after(self, def after(self,
locator: Union[Tuple[str, str], str, int] = '', locator: Union[Tuple[str, str], str, int] = '',
index: int = 1, index: int = 1,
timeout: float = None, timeout: float = None,
ele_only: bool = True) -> Union[ChromiumElement, str]: ... ele_only: bool = True) -> Union[ChromiumElement, str]:
"""返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param index: 后面第几个查询结果1开始
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素后面的某个元素或节点
"""
...
def prevs(self, def prevs(self,
locator: Union[Tuple[str, str], str] = '', locator: Union[Tuple[str, str], str] = '',
timeout: float = 0, timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回当前元素前面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def nexts(self, def nexts(self,
locator: Union[Tuple[str, str], str] = '', locator: Union[Tuple[str, str], str] = '',
timeout: float = 0, timeout: float = 0,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回当前元素后面符合条件的同级元素或节点组成的列表,可用查询语法筛选
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 同级元素或节点文本组成的列表
"""
...
def befores(self, def befores(self,
locator: Union[Tuple[str, str], str] = '', locator: Union[Tuple[str, str], str] = '',
timeout: float = None, timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回文档中当前元素前面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def afters(self, def afters(self,
locator: Union[Tuple[str, str], str] = '', locator: Union[Tuple[str, str], str] = '',
timeout: float = None, timeout: float = None,
ele_only: bool = True) -> List[Union[ChromiumElement, str]]: ... ele_only: bool = True) -> List[Union[ChromiumElement, str]]:
"""返回文档中当前元素后面符合条件的元素或节点组成的列表,可用查询语法筛选
查找范围不限同级元素而是整个DOM文档
:param locator: 用于筛选的查询语法
:param timeout: 查找节点的超时时间
:param ele_only: 是否只获取元素为False时把文本注释节点也纳入
:return: 本元素前面的元素或节点组成的列表
"""
...
def get_screenshot(self, def get_screenshot(self,
path: [str, Path] = None, path: [str, Path] = None,
name: str = None, name: str = None,
as_bytes: [bool, str] = None, as_bytes: [bool, str] = None,
as_base64: [bool, str] = None) -> Union[str, bytes]: ... as_base64: [bool, str] = None) -> Union[str, bytes]:
"""对页面进行截图可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:return: 图片完整路径或字节文本
"""
...
def _get_screenshot(self, def _get_screenshot(self,
path: [str, Path] = None, path: [str, Path] = None,
@ -218,13 +404,36 @@ class ChromiumFrame(ChromiumBase):
full_page: bool = False, full_page: bool = False,
left_top: Tuple[int, int] = None, left_top: Tuple[int, int] = None,
right_bottom: Tuple[int, int] = None, right_bottom: Tuple[int, int] = None,
ele: ChromiumElement = None) -> Union[str, bytes]: ... ele: ChromiumElement = None) -> Union[str, bytes]:
"""实现截图
:param path: 文件保存路径
:param name: 完整文件名后缀可选 'jpg','jpeg','png','webp'
:param as_bytes: 是否以字节形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数和as_base64参数无效
:param as_base64: 是否以base64字符串形式返回图片可选 'jpg','jpeg','png','webp'生效时path参数无效
:param full_page: 是否整页截图为True截取整个网页为False截取可视窗口
:param left_top: 截取范围左上角坐标
:param right_bottom: 截取范围右下角角坐标
:param ele: 为异域iframe内元素截图设置
:return: 图片完整路径或字节文本
"""
...
def _find_elements(self, def _find_elements(self,
locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], locator: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],
timeout: float = None, timeout: float = None,
index: Optional[int] = 1, index: Optional[int] = 1,
relative: bool = False, relative: bool = False,
raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, None, ChromiumElementsList]: ... raise_err: bool = None) -> Union[ChromiumElement, ChromiumFrame, None, ChromiumElementsList]:
"""在frame内查找单个元素
:param locator: 定位符或元素对象
:param timeout: 查找超时时间
:param index: 第几个结果从1开始可传入负数获取倒数第几个为None返回所有
:param relative: MixTab用的表示是否相对定位的参数
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象
"""
...
def _is_inner_frame(self) -> bool: ... def _is_inner_frame(self) -> bool:
"""返回当前frame是否同域"""
...

View File

@ -6,7 +6,7 @@
@License : BSD 3-Clause. @License : BSD 3-Clause.
""" """
from queue import Queue from queue import Queue
from typing import Union, Dict, List, Iterable, Optional, Literal, Any from typing import Union, List, Iterable, Optional, Literal, Any
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict