3.0.20修复页面刷新时自动切换d模式的问题

This commit is contained in:
g1879 2022-12-06 23:23:09 +08:00
parent 9810f0f280
commit ecae46ae19
6 changed files with 288 additions and 18 deletions

View File

@ -116,16 +116,14 @@ class ChromiumBase(BasePage):
return False
def _onFrameStartedLoading(self, **kwargs):
"""页面跳转时触发"""
# print('FrameStartedLoading')
"""页面开始加载时触发"""
if kwargs['frameId'] == self.tab_id:
self._is_loading = True
if self._debug:
print('FrameStartedLoading')
def _onFrameStoppedLoading(self, **kwargs):
"""页面跳转时触发"""
# print('FrameStoppedLoading')
"""页面加载完成后触发"""
if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading:
if self._debug:
print('FrameStoppedLoading')
@ -216,7 +214,7 @@ class ChromiumBase(BasePage):
@property
def ready_state(self) -> str:
"""返回当前页面加载状态,'loading' 'interactive' 'complete'"""
return self._driver.Runtime.evaluate(expression='document.readyState;')['result']['value']
return self._tab_obj.Runtime.evaluate(expression='document.readyState;')['result']['value']
@property
def size(self) -> dict:
@ -300,14 +298,32 @@ class ChromiumBase(BasePage):
:param timeout: 连接超时时间
:return: 目标url是否可用返回None表示不确定
"""
retry, interval = self._before_connect(url, retry, interval)
self._url_available = self._d_connect(self._url,
times=retry,
interval=interval,
show_errmsg=show_errmsg,
timeout=timeout)
self._url_available = self._get(url, show_errmsg, retry, interval, timeout)
return self._url_available
def _get(self,
url: str,
show_errmsg: bool = False,
retry: int = None,
interval: float = None,
timeout: float = None,
frame_id: str = None) -> Union[None, bool]:
"""访问url \n
:param url: 目标url
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param timeout: 连接超时时间
:return: 目标url是否可用返回None表示不确定
"""
retry, interval = self._before_connect(url, retry, interval)
return self._d_connect(self._url,
times=retry,
interval=interval,
show_errmsg=show_errmsg,
timeout=timeout,
frame_id=frame_id)
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
"""获取cookies信息 \n
:param as_dict: 为True时返回由{name: value}键值对组成的dict
@ -499,7 +515,7 @@ class ChromiumBase(BasePage):
"""页面停止加载"""
if self._debug:
print('stopLoading')
self._driver.Page.stopLoading()
self._tab_obj.Page.stopLoading()
self._get_document()
def run_cdp(self, cmd: str, **cmd_args) -> dict:
@ -582,7 +598,8 @@ class ChromiumBase(BasePage):
times: int = 0,
interval: float = 1,
show_errmsg: bool = False,
timeout: float = None) -> Union[bool, None]:
timeout: float = None,
frame_id: str = None) -> Union[bool, None]:
"""尝试连接,重试若干次 \n
:param to_url: 要访问的url
:param times: 重试次数
@ -596,7 +613,10 @@ class ChromiumBase(BasePage):
for _ in range(times + 1):
err = None
result = self._driver.Page.navigate(url=to_url)
if frame_id:
result = self._driver.Page.navigate(url=to_url, frameId=frame_id)
else:
result = self._driver.Page.navigate(url=to_url)
is_timeout = not self._wait_loading(timeout)
if is_timeout:
@ -638,7 +658,8 @@ class ChromiumFrame(ChromiumBase):
super().__init__(page.address, frame_id, page.timeout)
def __repr__(self) -> str:
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
attrs = self.attrs
attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs]
return f'<ChromiumFrame {self.tag} {" ".join(attrs)}>'
@property

View File

@ -32,7 +32,7 @@ class ChromiumElement(DrissionElement):
self._scroll = None
self._tag = None
if not node_id and not obj_id:
raise TypeError('node_id或obj_id必须传入一个')
raise RuntimeError('元素可能已失效')
if node_id:
self._node_id = node_id
@ -42,7 +42,8 @@ class ChromiumElement(DrissionElement):
self._obj_id = obj_id
def __repr__(self) -> str:
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
attrs = self.attrs
attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs]
return f'<ChromiumElement {self.tag} {" ".join(attrs)}>'
def __call__(self,
@ -1101,6 +1102,251 @@ class ChromiumShadowRootElement(BaseElement):
return self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node']['backendNodeId']
# class ChromiumFrame(object):
# def __init__(self, page, ele: ChromiumElement):
# from .chromium_base import ChromiumBase
# self.page: ChromiumBase = page
# self._inner_ele = ele
# self._is_diff_domain = False
# self.frame_id = page.run_cdp('DOM.describeNode', nodeId=ele.node_id)['node'].get('frameId', None)
#
# src = ele.attr('src')
# if src:
# netloc1 = urlparse(src).netloc
# netloc2 = urlparse(page.url).netloc
# if netloc1 != netloc2:
# self._is_diff_domain = True
# from .chromium_base import ChromiumBase
# self.inner_page = ChromiumBase(page.address, self.frame_id, page.timeout)
# self.inner_page.set_page_load_strategy(self.page.page_load_strategy)
# self.inner_page.timeouts = self.page.timeouts
#
# def __repr__(self) -> str:
# attrs = self._inner_ele.attrs
# attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs]
# return f'<ChromiumFrame {self._inner_ele.tag} {" ".join(attrs)}>'
#
# @property
# def tag(self) -> str:
# """返回元素tag"""
# return self._inner_ele.tag
#
# @property
# def url(self) -> str:
# """"""
# if self._is_diff_domain:
# return self.inner_page.url
# else:
# r = self.page.run_cdp('DOM.describeNode', nodeId=self._inner_ele.node_id)
# return r['node']['contentDocument']['documentURL']
#
# @property
# def html(self) -> str:
# """返回元素outerHTML文本"""
# if self._is_diff_domain:
# tag = self.tag
# out_html = self.page.run_cdp('DOM.getOuterHTML', nodeId=self._inner_ele.node_id)['outerHTML']
# in_html = self.inner_page.html
# sign = search(rf'<{tag}.*?>', out_html).group(0)
# return f'{sign}{in_html}</{tag}>'
#
# else:
# return self._inner_ele.html
#
# @property
# def title(self) -> str:
# d = self.inner_page if self._is_diff_domain else self._inner_ele
# ele = d.ele('xpath://title')
# return ele.text if ele else None
#
# @property
# def cookies(self):
# return self.inner_page.cookies if self._is_diff_domain else self.page.cookies
#
# @property
# def inner_html(self) -> str:
# """返回元素innerHTML文本"""
# return self.inner_page.html if self._is_diff_domain else self._inner_ele.inner_html
#
# @property
# def attrs(self) -> dict:
# return self._inner_ele.attrs
#
# @property
# def frame_size(self) -> dict:
# if self._is_diff_domain:
# return self.inner_page.size
# else:
# h = self._inner_ele.run_script('return this.contentDocument.body.scrollHeight;')
# w = self._inner_ele.run_script('return this.contentDocument.body.scrollWidth;')
# return {'height': h, 'width': w}
#
# @property
# def size(self) -> dict:
# """返回frame元素大小"""
# return self._inner_ele.size
#
# @property
# def obj_id(self) -> str:
# """返回js中的object id"""
# return self._inner_ele.obj_id
#
# @property
# def node_id(self) -> str:
# """返回cdp中的node id"""
# return self._inner_ele.node_id
#
# @property
# def location(self) -> dict:
# """返回frame元素左上角的绝对坐标"""
# return self._inner_ele.location
#
# @property
# def is_displayed(self) -> bool:
# """返回frame元素是否显示"""
# return self._inner_ele.is_displayed
#
# def get(self, url):
# self.page._get(url, False, None, None, None, self.frame_id)
#
# def ele(self,
# loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, 'ChromiumFrame'],
# timeout: float = None):
# d = self.inner_page if self._is_diff_domain else self._inner_ele
# return d.ele(loc_or_ele, timeout)
#
# def eles(self,
# loc_or_ele: Union[Tuple[str, str], str],
# timeout: float = None):
# d = self.inner_page if self._is_diff_domain else self._inner_ele
# return d.eles(loc_or_ele, timeout)
#
# # def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement] = None) \
# # -> Union[SessionElement, str, None]:
# # """查找第一个符合条件的元素以SessionElement形式返回处理复杂页面时效率很高 \n
# # :param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
# # :return: SessionElement对象或属性、文本
# # """
# # if isinstance(loc_or_ele, ChromiumElement):
# # return make_session_ele(loc_or_ele)
# # else:
# # return make_session_ele(self, loc_or_ele)
# #
# # def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[Union[SessionElement, str]]:
# # """查找所有符合条件的元素以SessionElement列表形式返回 \n
# # :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
# # :return: SessionElement对象组成的列表
# # """
# # return make_session_ele(self, loc_or_str, single=False)
#
# def attr(self, attr: str) -> Union[str, None]:
# """返回frame元素attribute属性值 \n
# :param attr: 属性名
# :return: 属性值文本没有该属性返回None
# """
# return self._inner_ele.attr(attr)
#
# def set_attr(self, attr: str, value: str) -> None:
# """设置frame元素attribute属性 \n
# :param attr: 属性名
# :param value: 属性值
# :return: None
# """
# self._inner_ele.set_attr(attr, value)
#
# def remove_attr(self, attr: str) -> None:
# """删除frame元素attribute属性 \n
# :param attr: 属性名
# :return: None
# """
# self._inner_ele.remove_attr(attr)
#
# def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['ChromiumElement', None]:
# """返回上面某一级父元素,可指定层数或用查询语法定位 \n
# :param level_or_loc: 第几级父元素,或定位符
# :return: 上级元素对象
# """
# return self._inner_ele.parent(level_or_loc)
#
# def prev(self,
# index: int = 1,
# filter_loc: Union[tuple, str] = '',
# timeout: float = 0) -> Union['ChromiumElement', str, None]:
# """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
# :param index: 前面第几个查询结果元素
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 兄弟元素
# """
# return self._inner_ele.prev(index, filter_loc, timeout)
#
# def next(self,
# index: int = 1,
# filter_loc: Union[tuple, str] = '',
# timeout: float = 0) -> Union['ChromiumElement', str, None]:
# """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
# :param index: 后面第几个查询结果元素
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 兄弟元素
# """
# return self._inner_ele.next(index, filter_loc, timeout)
#
# def before(self,
# index: int = 1,
# filter_loc: Union[tuple, str] = '',
# timeout: float = None) -> Union['ChromiumElement', str, None]:
# """返回当前元素前面的一个元素可指定筛选条件和第几个。查找范围不限兄弟元素而是整个DOM文档 \n
# :param index: 前面第几个查询结果元素
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 本元素前面的某个元素或节点
# """
# return self._inner_ele.before(index, filter_loc, timeout)
#
# def after(self,
# index: int = 1,
# filter_loc: Union[tuple, str] = '',
# timeout: float = None) -> Union['ChromiumElement', str, None]:
# """返回当前元素后面的一个元素可指定筛选条件和第几个。查找范围不限兄弟元素而是整个DOM文档 \n
# :param index: 后面第几个查询结果元素
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 本元素后面的某个元素或节点
# """
# return self._inner_ele.after(index, filter_loc, timeout)
#
# def prevs(self,
# filter_loc: Union[tuple, str] = '',
# timeout: float = 0) -> List[Union['ChromiumElement', str]]:
# """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 兄弟元素或节点文本组成的列表
# """
# return self._inner_ele.prevs(filter_loc, timeout)
#
# def nexts(self,
# filter_loc: Union[tuple, str] = '',
# timeout: float = 0) -> List[Union['ChromiumElement', str]]:
# """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 兄弟元素或节点文本组成的列表
# """
# return self._inner_ele.nexts(filter_loc, timeout)
#
# def befores(self,
# filter_loc: Union[tuple, str] = '',
# timeout: float = None) -> List[Union['ChromiumElement', str]]:
# """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表可用查询语法筛选。查找范围不限兄弟元素而是整个DOM文档 \n
# :param filter_loc: 用于筛选元素的查询语法
# :param timeout: 查找元素的超时时间
# :return: 本元素前面的元素或节点组成的列表
# """
# return self._inner_ele.befores(filter_loc, timeout)
def make_chromium_ele(ele: ChromiumElement,
loc: Union[str, Tuple[str, str]],
single: bool = True,

View File

@ -350,6 +350,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
@property
def download(self) -> DownloadKit:
"""返回下载器对象"""
if self.mode == 'd':
self.cookies_to_session()
return super().download

View File

@ -194,6 +194,7 @@ class SessionPage(BasePage):
@property
def download(self) -> DownloadKit:
"""返回下载器对象"""
if not hasattr(self, '_download_kit'):
self._download_kit = DownloadKit(session=self)

View File

@ -383,6 +383,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property
def download(self) -> DownloadKit:
"""返回下载器对象"""
if self.mode == 'd':
self.cookies_to_session()
return super().download

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup(
name="DrissionPage",
version="3.0.19",
version="3.0.20",
author="g1879",
author_email="g1879@qq.com",
description="A module that integrates selenium and requests session, encapsulates common page operations.",