3.0.22,修复读取页面小概率出错问题;增加_debug_recorder属性

This commit is contained in:
g1879 2022-12-14 15:51:50 +08:00
parent 55d7e78ef2
commit d8f6487a05
6 changed files with 63 additions and 18 deletions

View File

@ -27,6 +27,7 @@ class ChromiumBase(BasePage):
self._is_loading = None self._is_loading = None
self._root_id = None self._root_id = None
self._debug = False self._debug = False
self._debug_recorder = None
self._connect_browser(address, tab_id) self._connect_browser(address, tab_id)
def _connect_browser(self, addr_tab_opts=None, tab_id=None): def _connect_browser(self, addr_tab_opts=None, tab_id=None):
@ -74,11 +75,29 @@ class ChromiumBase(BasePage):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if not self._is_reading: if not self._is_reading:
self._is_reading = True self._is_reading = True
if self._debug: if self._debug:
print('获取document') print('获取document')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
self._wait_loading() self._wait_loading()
root_id = self._tab_obj.DOM.getDocument()['root']['nodeId'] while True:
self._root_id = self._tab_obj.DOM.resolveNode(nodeId=root_id)['object']['objectId'] try:
root_id = self._tab_obj.DOM.getDocument()['root']['nodeId']
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id{root_id}'))
self._root_id = self._tab_obj.DOM.resolveNode(nodeId=root_id)['object']['objectId']
break
except:
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
if self._debug:
print('获取document结束')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
self._is_loading = False self._is_loading = False
self._is_reading = False self._is_reading = False
@ -92,8 +111,10 @@ class ChromiumBase(BasePage):
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
while perf_counter() < end_time: while perf_counter() < end_time:
state = self.ready_state state = self.ready_state
# if self._debug:
# print(f'{state=}') if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), 'waiting', state))
if state == 'complete': if state == 'complete':
return True return True
elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'): elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'):
@ -111,34 +132,42 @@ class ChromiumBase(BasePage):
"""页面开始加载时触发""" """页面开始加载时触发"""
if kwargs['frameId'] == self.tab_id: if kwargs['frameId'] == self.tab_id:
self._is_loading = True self._is_loading = True
if self._debug: if self._debug:
print('页面开始加载 FrameStartedLoading') print('页面开始加载 FrameStartedLoading')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading'))
def _onFrameStoppedLoading(self, **kwargs): def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后触发""" """页面加载完成后触发"""
if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading: if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading:
if self._debug: if self._debug:
print('页面停止加载 FrameStoppedLoading') print('页面停止加载 FrameStoppedLoading')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading'))
self._get_document() self._get_document()
def _onLoadEventFired(self, **kwargs): def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容""" """在页面刷新、变化后重新读取页面内容"""
if self._debug: if self._debug:
print('loadEventFired') print('loadEventFired')
# if self._first_run is False and self._is_loading: if self._debug_recorder:
# if self._debug: self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
# print('loadEventFired')
# self._get_document()
def _onDocumentUpdated(self, **kwargs): def _onDocumentUpdated(self, **kwargs):
"""页面跳转时触发""" """页面跳转时触发"""
if self._debug: if self._debug:
print('documentUpdated') print('documentUpdated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated'))
def _onFrameNavigated(self, **kwargs): def _onFrameNavigated(self, **kwargs):
"""页面跳转时触发""" """页面跳转时触发"""
if self._debug and not kwargs['frame'].get('parentId', None): if self._debug and not kwargs['frame'].get('parentId', None):
print('navigated') print('navigated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
def _set_options(self): def _set_options(self):
pass pass
@ -152,6 +181,11 @@ class ChromiumBase(BasePage):
""" """
return self.ele(loc_or_str, timeout) return self.ele(loc_or_str, timeout)
@property
def title(self):
"""返回当前页面title"""
return self._tab_obj.Target.getTargetInfo(targetId=self.tab_id)['targetInfo']['title']
@property @property
def driver(self): def driver(self):
"""返回用于控制浏览器的Tab对象""" """返回用于控制浏览器的Tab对象"""
@ -176,8 +210,7 @@ class ChromiumBase(BasePage):
@property @property
def url(self): def url(self):
"""返回当前页面url""" """返回当前页面url"""
json = self._control_session.get(f'http://{self.address}/json').json() return self._tab_obj.Target.getTargetInfo(targetId=self.tab_id)['targetInfo']['url']
return [i['url'] for i in json if i['id'] == self._tab_obj.id][0] # change_mode要调用不能用_driver
@property @property
def html(self): def html(self):
@ -477,6 +510,9 @@ class ChromiumBase(BasePage):
"""页面停止加载""" """页面停止加载"""
if self._debug: if self._debug:
print('停止页面加载') print('停止页面加载')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载'))
self._tab_obj.Page.stopLoading() self._tab_obj.Page.stopLoading()
while self.ready_state != 'complete': while self.ready_state != 'complete':
sleep(.1) sleep(.1)

View File

@ -1,14 +1,15 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from typing import Union, Tuple, List, Any from typing import Union, Tuple, List, Any
from DataRecorder import Recorder
from requests import Session from requests import Session
from requests.cookies import RequestsCookieJar from requests.cookies import RequestsCookieJar
from .chromium_element import ChromiumElementWaiter, ChromeScroll
from .session_element import SessionElement
from .chromium_element import ChromiumElement
from .config import DriverOptions
from .base import BasePage from .base import BasePage
from .chromium_element import ChromiumElement
from .chromium_element import ChromiumElementWaiter, ChromeScroll
from .config import DriverOptions
from .session_element import SessionElement
from .tab import Tab from .tab import Tab
@ -23,7 +24,6 @@ class ChromiumBase(BasePage):
self.address: str = ... self.address: str = ...
self._tab_obj: Tab = ... self._tab_obj: Tab = ...
self._is_reading: bool = ... self._is_reading: bool = ...
self._debug: bool = ...
self.timeouts: Timeout = ... self.timeouts: Timeout = ...
self._first_run: bool = ... self._first_run: bool = ...
self._is_loading: bool = ... self._is_loading: bool = ...
@ -31,6 +31,8 @@ class ChromiumBase(BasePage):
self._scroll: ChromeScroll = ... self._scroll: ChromeScroll = ...
self._url: str = ... self._url: str = ...
self._root_id: str = ... self._root_id: str = ...
self._debug: bool = ...
self._debug_recorder: Recorder = ...
def _connect_browser(self, def _connect_browser(self,
addr_tab_opts: Union[str, Tab, DriverOptions] = ..., addr_tab_opts: Union[str, Tab, DriverOptions] = ...,
@ -57,6 +59,9 @@ class ChromiumBase(BasePage):
def __call__(self, loc_or_str: Union[Tuple[str, str], str, 'ChromiumElement'], def __call__(self, loc_or_str: Union[Tuple[str, str], str, 'ChromiumElement'],
timeout: float = ...) -> Union['ChromiumElement', 'ChromiumFrame', None]: ... timeout: float = ...) -> Union['ChromiumElement', 'ChromiumFrame', None]: ...
@property
def title(self) -> str: ...
@property @property
def driver(self) -> Tab: ... def driver(self) -> Tab: ...

View File

@ -26,6 +26,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
if self._mode not in ('s', 'd'): if self._mode not in ('s', 'd'):
raise ValueError('mode参数只能是s或d。') raise ValueError('mode参数只能是s或d。')
self._debug = False self._debug = False
self._debug_recorder = None
super(ChromiumBase, self).__init__(timeout) # 调用Base的__init__() super(ChromiumBase, self).__init__(timeout) # 调用Base的__init__()
self._session = None self._session = None

View File

@ -1 +1,2 @@
include DrissionPage/configs.ini include DrissionPage/configs.ini
include DrissionPage/*.pyi

View File

@ -1,9 +1,11 @@
# v3.0.21 # v3.0.22
- `change_mode()`增加`copy_cookies`参数 - `change_mode()`增加`copy_cookies`参数
- ###### 调整`WebPage`生成的元素对象的`prev()``next()``before()``after()`参数顺序 - ###### 调整`WebPage`生成的元素对象的`prev()``next()``before()``after()`参数顺序
- 修复读取页面时小概率失效问题
- 用存根文件取代类型注解 - 用存根文件取代类型注解
# v3.0.20 # v3.0.20

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup( setup(
name="DrissionPage", name="DrissionPage",
version="3.0.21", version="3.0.22",
author="g1879", author="g1879",
author_email="g1879@qq.com", author_email="g1879@qq.com",
description="A module that integrates selenium and requests session, encapsulates common page operations.", description="A module that integrates selenium and requests session, encapsulates common page operations.",