From d8f6487a05ffeb98c172bbca75b249c7f4987aa8 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 14 Dec 2022 15:51:50 +0800 Subject: [PATCH] =?UTF-8?q?3.0.22=EF=BC=8C=E4=BF=AE=E5=A4=8D=E8=AF=BB?= =?UTF-8?q?=E5=8F=96=E9=A1=B5=E9=9D=A2=E5=B0=8F=E6=A6=82=E7=8E=87=E5=87=BA?= =?UTF-8?q?=E9=94=99=E9=97=AE=E9=A2=98=EF=BC=9B=E5=A2=9E=E5=8A=A0=5Fdebug?= =?UTF-8?q?=5Frecorder=E5=B1=9E=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 56 ++++++++++++++++++++++++++++------ DrissionPage/chromium_base.pyi | 15 ++++++--- DrissionPage/web_page.py | 1 + MANIFEST.in | 3 +- docs/版本历史.md | 4 ++- setup.py | 2 +- 6 files changed, 63 insertions(+), 18 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index f3461ba..55c85b8 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -27,6 +27,7 @@ class ChromiumBase(BasePage): self._is_loading = None self._root_id = None self._debug = False + self._debug_recorder = None self._connect_browser(address, tab_id) def _connect_browser(self, addr_tab_opts=None, tab_id=None): @@ -74,11 +75,29 @@ class ChromiumBase(BasePage): """刷新cdp使用的document数据""" if not self._is_reading: self._is_reading = True + if self._debug: print('获取document') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) + self._wait_loading() - root_id = self._tab_obj.DOM.getDocument()['root']['nodeId'] - self._root_id = self._tab_obj.DOM.resolveNode(nodeId=root_id)['object']['objectId'] + while True: + try: + root_id = self._tab_obj.DOM.getDocument()['root']['nodeId'] + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{root_id}')) + self._root_id = self._tab_obj.DOM.resolveNode(nodeId=root_id)['object']['objectId'] + break + except: + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错')) + + if self._debug: + print('获取document结束') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) + self._is_loading = False self._is_reading = False @@ -92,8 +111,10 @@ class ChromiumBase(BasePage): end_time = perf_counter() + timeout while perf_counter() < end_time: state = self.ready_state - # if self._debug: - # print(f'{state=}') + + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), 'waiting', state)) + if state == 'complete': return True elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'): @@ -111,34 +132,42 @@ class ChromiumBase(BasePage): """页面开始加载时触发""" if kwargs['frameId'] == self.tab_id: self._is_loading = True + if self._debug: print('页面开始加载 FrameStartedLoading') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading')) def _onFrameStoppedLoading(self, **kwargs): """页面加载完成后触发""" if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading: if self._debug: print('页面停止加载 FrameStoppedLoading') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading')) + self._get_document() def _onLoadEventFired(self, **kwargs): """在页面刷新、变化后重新读取页面内容""" if self._debug: print('loadEventFired') - # if self._first_run is False and self._is_loading: - # if self._debug: - # print('loadEventFired') - # self._get_document() + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired')) def _onDocumentUpdated(self, **kwargs): """页面跳转时触发""" if self._debug: print('documentUpdated') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated')) def _onFrameNavigated(self, **kwargs): """页面跳转时触发""" if self._debug and not kwargs['frame'].get('parentId', None): print('navigated') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) def _set_options(self): pass @@ -152,6 +181,11 @@ class ChromiumBase(BasePage): """ return self.ele(loc_or_str, timeout) + @property + def title(self): + """返回当前页面title""" + return self._tab_obj.Target.getTargetInfo(targetId=self.tab_id)['targetInfo']['title'] + @property def driver(self): """返回用于控制浏览器的Tab对象""" @@ -176,8 +210,7 @@ class ChromiumBase(BasePage): @property def url(self): """返回当前页面url""" - json = self._control_session.get(f'http://{self.address}/json').json() - return [i['url'] for i in json if i['id'] == self._tab_obj.id][0] # change_mode要调用,不能用_driver + return self._tab_obj.Target.getTargetInfo(targetId=self.tab_id)['targetInfo']['url'] @property def html(self): @@ -477,6 +510,9 @@ class ChromiumBase(BasePage): """页面停止加载""" if self._debug: print('停止页面加载') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载')) + self._tab_obj.Page.stopLoading() while self.ready_state != 'complete': sleep(.1) diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 427c5d7..c39576e 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -1,14 +1,15 @@ # -*- coding:utf-8 -*- from typing import Union, Tuple, List, Any +from DataRecorder import Recorder from requests import Session from requests.cookies import RequestsCookieJar -from .chromium_element import ChromiumElementWaiter, ChromeScroll -from .session_element import SessionElement -from .chromium_element import ChromiumElement -from .config import DriverOptions from .base import BasePage +from .chromium_element import ChromiumElement +from .chromium_element import ChromiumElementWaiter, ChromeScroll +from .config import DriverOptions +from .session_element import SessionElement from .tab import Tab @@ -23,7 +24,6 @@ class ChromiumBase(BasePage): self.address: str = ... self._tab_obj: Tab = ... self._is_reading: bool = ... - self._debug: bool = ... self.timeouts: Timeout = ... self._first_run: bool = ... self._is_loading: bool = ... @@ -31,6 +31,8 @@ class ChromiumBase(BasePage): self._scroll: ChromeScroll = ... self._url: str = ... self._root_id: str = ... + self._debug: bool = ... + self._debug_recorder: Recorder = ... def _connect_browser(self, addr_tab_opts: Union[str, Tab, DriverOptions] = ..., @@ -57,6 +59,9 @@ class ChromiumBase(BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, 'ChromiumElement'], timeout: float = ...) -> Union['ChromiumElement', 'ChromiumFrame', None]: ... + @property + def title(self) -> str: ... + @property def driver(self) -> Tab: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 6cf8d8a..fbc7076 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -26,6 +26,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._mode not in ('s', 'd'): raise ValueError('mode参数只能是s或d。') self._debug = False + self._debug_recorder = None super(ChromiumBase, self).__init__(timeout) # 调用Base的__init__() self._session = None diff --git a/MANIFEST.in b/MANIFEST.in index bc87fa9..247a544 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -include DrissionPage/configs.ini \ No newline at end of file +include DrissionPage/configs.ini +include DrissionPage/*.pyi \ No newline at end of file diff --git a/docs/版本历史.md b/docs/版本历史.md index ded6328..c114b08 100644 --- a/docs/版本历史.md +++ b/docs/版本历史.md @@ -1,9 +1,11 @@ -# v3.0.21 +# v3.0.22 - `change_mode()`增加`copy_cookies`参数 - ###### 调整`WebPage`生成的元素对象的`prev()`、`next()`、`before()`、`after()`参数顺序 +- 修复读取页面时小概率失效问题 + - 用存根文件取代类型注解 # v3.0.20 diff --git a/setup.py b/setup.py index 5831d04..63e1845 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.0.21", + version="3.0.22", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.",