From 368665df576563657107128167f419fdc2cf0556 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 30 Oct 2023 01:57:18 +0800 Subject: [PATCH] =?UTF-8?q?4.0.0b1=E5=AF=B9=E8=BF=9E=E6=8E=A5=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E8=BF=9B=E8=A1=8C=E5=AE=8C=E5=85=A8=E9=87=8D=E6=9E=84?= =?UTF-8?q?=EF=BC=8C'none'=E6=A8=A1=E5=BC=8F=E4=B8=8D=E4=B8=BB=E5=8A=A8?= =?UTF-8?q?=E5=81=9C=E6=AD=A2=E7=BD=91=E9=A1=B5=E4=B8=94=E6=97=A0=E8=A7=86?= =?UTF-8?q?timeout=EF=BC=9Bwait.new=5Ftab()=E6=88=90=E5=8A=9F=E6=97=B6?= =?UTF-8?q?=E8=BF=94=E5=9B=9Eid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/_base/base.py | 2 +- DrissionPage/_base/chromium_driver.py | 15 +- DrissionPage/_elements/chromium_element.py | 17 +- DrissionPage/_pages/chromium_base.py | 236 +++++++++------------ DrissionPage/_pages/chromium_base.pyi | 15 +- DrissionPage/_pages/chromium_frame.py | 173 ++++++++------- DrissionPage/_pages/chromium_frame.pyi | 4 +- DrissionPage/_units/download_manager.py | 4 +- DrissionPage/_units/waiter.py | 17 +- DrissionPage/_units/waiter.pyi | 4 +- setup.py | 2 +- 11 files changed, 227 insertions(+), 262 deletions(-) diff --git a/DrissionPage/_base/base.py b/DrissionPage/_base/base.py index fe6f9a8..521f982 100644 --- a/DrissionPage/_base/base.py +++ b/DrissionPage/_base/base.py @@ -367,7 +367,7 @@ class BasePage(BaseParser): self.retry_times = 3 self.retry_interval = 2 self._DownloadKit = None - self._download_path = str(Path('../..').absolute()) + self._download_path = str(Path('.').absolute()) @property def title(self): diff --git a/DrissionPage/_base/chromium_driver.py b/DrissionPage/_base/chromium_driver.py index c593222..b6781e2 100644 --- a/DrissionPage/_base/chromium_driver.py +++ b/DrissionPage/_base/chromium_driver.py @@ -6,7 +6,7 @@ from json import dumps, loads from queue import Queue, Empty from threading import Thread, Event -from time import perf_counter, sleep +from time import perf_counter from requests import get from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ @@ -56,8 +56,8 @@ class ChromiumDriver(object): message_json = dumps(message) if self._debug: - if self._debug is True or ( - isinstance(self._debug, str) and message.get('method', '').startswith(self._debug)): + if self._debug is True or (isinstance(self._debug, str) and + message.get('method', '').startswith(self._debug)): print(f'发> {message_json}') elif isinstance(self._debug, (list, tuple, set)): for m in self._debug: @@ -74,17 +74,16 @@ class ChromiumDriver(object): while not self._stopped.is_set(): try: - return self.method_results[message['id']].get_nowait() + return self.method_results[message['id']].get(.2) except Empty: if self.alert_flag: self.alert_flag = False - return {'result': []} + return {'error': {'message': 'alert exists.'}} if timeout is not None and perf_counter() > timeout: return {'error': {'message': 'timeout'}} - sleep(.02) continue except Exception: @@ -138,7 +137,11 @@ class ChromiumDriver(object): function = self.event_handlers.get(event['method']) if function: + if self._debug: + print(f'开始执行 {function.__name__}') function(**event['params']) + if self._debug: + print(f'执行 {function.__name__}完毕') self.event_queue.task_done() diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index 68ee961..e51a0d6 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -376,7 +376,7 @@ class ChromiumElement(DrissionElement): def run_js(self, script, *args, as_expr=False): """对本元素执行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: 运行的结果 """ @@ -385,7 +385,7 @@ class ChromiumElement(DrissionElement): def run_async_js(self, script, *args, as_expr=False): """以异步方式对本元素执行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: None """ @@ -841,7 +841,7 @@ class ChromiumShadowRoot(BaseElement): def run_js(self, script, *args, as_expr=False): """运行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: 运行的结果 """ @@ -850,7 +850,7 @@ class ChromiumShadowRoot(BaseElement): def run_async_js(self, script, *args, as_expr=False): """以异步方式执行js代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: None """ @@ -1042,9 +1042,9 @@ class ChromiumShadowRoot(BaseElement): loc = loc[0], loc[1][5:] timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() + end_time = perf_counter() + timeout eles = make_session_ele(self.html).eles(loc) - while not eles and perf_counter() - t1 <= timeout: + while not eles and perf_counter() <= end_time: eles = make_session_ele(self.html).eles(loc) if not eles: @@ -1299,7 +1299,7 @@ def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None): :param script: js文本 :param as_expr: 是否作为表达式运行,为True时args无效 :param timeout: 超时时间 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :return: js执行结果 """ if isinstance(page_or_ele, (ChromiumElement, ChromiumShadowRoot)): @@ -1701,7 +1701,8 @@ class ChromiumScroll(object): x = r['layoutViewport']['pageX'] y = r['layoutViewport']['pageY'] - while True: + end_time = perf_counter() + self._driver.page.timeout + while perf_counter() < end_time: sleep(.1) r = page.run_cdp('Page.getLayoutMetrics') x1 = r['layoutViewport']['pageX'] diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index 0d6ceb6..bc36b5d 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -24,8 +24,8 @@ from .._units.network_listener import NetworkListener from .._units.screencast import Screencast from .._units.setter import ChromiumBaseSetter from .._units.waiter import ChromiumBaseWaiter -from ..errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ - NoRectError, BrowserConnectError, GetDocumentError +from ..errors import (ContextLossError, ElementLossError, CDPError, TabClosedError, NoRectError, BrowserConnectError, + AlertExistsError) class ChromiumBase(BasePage): @@ -41,14 +41,14 @@ class ChromiumBase(BasePage): self._is_loading = None self._root_id = None # object id self._debug = False - self._debug_recorder = None self._set = None self._screencast = None self._actions = None self._listener = None self._has_alert = False + self._ready_state = None - self._download_path = str(Path('../..').absolute()) + self._download_path = str(Path('.').absolute()) if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): address = f'127.0.0.1:{address}' @@ -76,7 +76,6 @@ class ChromiumBase(BasePage): :param tab_id: 要控制的标签页id,不指定默认为激活的 :return: None """ - self._first_run = True self._is_reading = False self._upload_list = None self._wait = None @@ -89,9 +88,15 @@ class ChromiumBase(BasePage): if not tab_id: raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') tab_id = tab_id[0] + self._driver_init(tab_id) - self._get_document() - self._first_run = False + if self.ready_state == 'complete' and self._ready_state is None: + self._get_document() + self._ready_state = 'complete' + + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id def _driver_init(self, tab_id): """新建页面、页面刷新、切换标签页后要进行的cdp参数初始化 @@ -99,6 +104,7 @@ class ChromiumBase(BasePage): :return: None """ self._is_loading = True + self._frame_id = tab_id self._driver = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) self._alert = Alert() self._driver.set_listener('Page.javascriptDialogOpening', self._on_alert_open) @@ -108,59 +114,20 @@ class ChromiumBase(BasePage): self._driver.call_method('Page.enable') self._driver.call_method('Emulation.setFocusEmulationEnabled', enabled=True) - self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading) self._driver.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading) - self._driver.set_listener('DOM.documentUpdated', self._onDocumentUpdated) - self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired) self._driver.set_listener('Page.frameNavigated', self._onFrameNavigated) + self._driver.set_listener('Page.domContentEventFired', self._onDomContentEventFired) + self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired) + self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading) self._driver.set_listener('Page.frameAttached', self._onFrameAttached) self._driver.set_listener('Page.frameDetached', self._onFrameDetached) def _get_document(self): - """刷新cdp使用的document数据""" if self._is_reading: return - self._is_reading = True - - if self._debug: - print('获取document') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) - - try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉 - self._wait_loaded() - except TabClosedError: - return - - end_time = perf_counter() + 10 - while perf_counter() < end_time: - try: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) - break - - except CDPError as e: - err = e - if self._debug: - print('重试获取document') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错')) - - sleep(.1) - - else: - txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ - f'报告网址:https://gitee.com/g1879/DrissionPage/issues' - raise GetDocumentError(txt) - - if self._debug: - print('获取document结束') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) - + b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] + self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] r = self.run_cdp('Page.getFrameTree') for i in findall(r"'id': '(.*?)'", str(r)): self.browser._frames[i] = self.tab_id @@ -173,25 +140,18 @@ class ChromiumBase(BasePage): :param timeout: 超时时间 :return: 是否成功,超时返回False """ - timeout = timeout if timeout is not None else self.timeouts.page_load + if self.page_load_strategy == 'none': + return True + timeout = timeout if timeout is not None else self.timeouts.page_load end_time = perf_counter() + timeout while perf_counter() < end_time: - state = self.ready_state - if state is None: # 存在alert的情况 - return None - - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), 'waiting', state)) - - if state == 'complete': + if self._ready_state == 'complete': return True - elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'): - self.stop_loading() - return True - elif self.page_load_strategy == 'none': + elif self.page_load_strategy == 'eager' and self._ready_state in ('interactive', 'complete'): self.stop_loading() return True + sleep(.1) self.stop_loading() @@ -209,50 +169,44 @@ class ChromiumBase(BasePage): def _onFrameStartedLoading(self, **kwargs): """页面开始加载时执行""" self.browser._frames[kwargs['frameId']] = self.tab_id - if kwargs['frameId'] == self._target_id: + if kwargs['frameId'] == self._frame_id: + self._ready_state = 'loading' self._is_loading = True - if self._debug: - print('页面开始加载 FrameStartedLoading') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading')) + print(f'frameStartedLoading {kwargs}') + + def _onFrameNavigated(self, **kwargs): + """页面跳转时执行""" + if kwargs['frame']['id'] == self._frame_id: + self._ready_state = 'loading' + self._is_loading = True + if self._debug: + print(f'FrameNavigated {kwargs}') + + def _onDomContentEventFired(self, **kwargs): + """在页面刷新、变化后重新读取页面内容""" + self._ready_state = 'interactive' + if self.page_load_strategy == 'eager': + self.run_cdp('Page.stopLoading') + if self._debug: + print(f'DomContentEventFired {kwargs}') + + def _onLoadEventFired(self, **kwargs): + """在页面刷新、变化后重新读取页面内容""" + self._ready_state = 'complete' + if self._debug: + print(f'LoadEventFired {kwargs}') + # self._get_document() def _onFrameStoppedLoading(self, **kwargs): """页面加载完成后执行""" self.browser._frames[kwargs['frameId']] = self.tab_id - if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading: + if kwargs['frameId'] == self._frame_id: + self._ready_state = 'complete' if self._debug: - print('页面停止加载 FrameStoppedLoading') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading')) - + print(f'FrameStoppedLoading {kwargs}') self._get_document() - def _onLoadEventFired(self, **kwargs): - """在页面刷新、变化后重新读取页面内容""" - if self._debug: - print('loadEventFired') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired')) - - self._get_document() - - def _onDocumentUpdated(self, **kwargs): - """页面跳转时执行""" - if self._debug: - print('documentUpdated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated')) - - def _onFrameNavigated(self, **kwargs): - """页面跳转时执行""" - if kwargs['frame'].get('parentId', None) == self._target_id and self._first_run is False and self._is_loading: - self._is_loading = True - if self._debug: - print('navigated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) - def _onFileChooserOpened(self, **kwargs): """文件选择框打开时执行""" if self._upload_list: @@ -344,14 +298,13 @@ class ChromiumBase(BasePage): @property def ready_state(self): - """返回当前页面加载状态,'loading' 'interactive' 'complete',有弹出框时返回None""" - while True: - try: - return self.run_cdp('Runtime.evaluate', expression='document.readyState;')['result']['value'] - except (AlertExistsError, TypeError): - return None - except ContextLossError: - continue + """返回当前页面加载状态,'loading' 'interactive' 'complete','timeout' 表示可能有弹出框""" + try: + return self.run_cdp('Runtime.evaluate', expression='document.readyState;', _timeout=3)['result']['value'] + except ContextLossError: + return None + except TimeoutError: + return 'timeout' @property def size(self): @@ -439,9 +392,6 @@ class ChromiumBase(BasePage): :param cmd_args: 参数 :return: 执行的结果 """ - # if self.driver.has_alert and cmd != HANDLE_ALERT_METHOD: - # raise AlertExistsError - r = self.driver.call_method(cmd, **cmd_args) if ERROR not in r: return r @@ -455,8 +405,10 @@ class ChromiumBase(BasePage): raise ElementLossError elif error == 'tab closed': raise TabClosedError - elif error == 'alert exists': - pass + elif error == 'timeout': + raise TimeoutError + elif error == 'alert exists.': + raise AlertExistsError elif error in ('Node does not have a layout object', 'Could not compute box model.'): raise NoRectError elif r['type'] == 'call_method_error': @@ -476,7 +428,7 @@ class ChromiumBase(BasePage): def run_js(self, script, *args, as_expr=False): """运行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: 运行的结果 """ @@ -485,7 +437,7 @@ class ChromiumBase(BasePage): def run_js_loaded(self, script, *args, as_expr=False): """运行javascript代码,执行前等待页面加载完毕 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: 运行的结果 """ @@ -495,7 +447,7 @@ class ChromiumBase(BasePage): def run_async_js(self, script, *args, as_expr=False): """以异步方式执行js代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: None """ @@ -691,11 +643,12 @@ class ChromiumBase(BasePage): """页面停止加载""" if self._debug: print('停止页面加载') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载')) - - self.run_cdp('Page.stopLoading') - while self.ready_state not in ('complete', None): + try: + self.run_cdp('Page.stopLoading') + except TabClosedError: + pass + end_time = perf_counter() + self.timeouts.page_load + while self._ready_state != 'complete' and perf_counter() < end_time: sleep(.1) def remove_ele(self, loc_or_ele): @@ -881,32 +834,39 @@ class ChromiumBase(BasePage): """ err = None timeout = timeout if timeout is not None else self.timeouts.page_load - for t in range(times + 1): err = None - result = self.run_cdp('Page.navigate', url=to_url) - - is_timeout = self._wait_loaded(timeout) - if is_timeout is None: - return None - is_timeout = not is_timeout - self.wait.load_complete() - - if is_timeout: + end_time = perf_counter() + timeout + result = self.run_cdp('Page.navigate', url=to_url, _timeout=timeout) + if result.get('error') == 'timeout': err = TimeoutError('页面连接超时。') - if 'errorText' in result: + + elif 'errorText' in result: err = ConnectionError(result['errorText']) + if err: + sleep(interval) + if self._debug or show_errmsg: + print(f'重试{t + 1} {to_url}') + self.stop_loading() + continue + + if self.page_load_strategy == 'none': + return True + + yu = end_time - perf_counter() + ok = self._wait_loaded(1 if yu <= 0 else yu) + if not ok: + err = TimeoutError('页面连接超时。') + sleep(interval) + if self._debug or show_errmsg: + print(f'重试{t + 1} {to_url}') + self.stop_loading() + continue + if not err: break - if t < times: - sleep(interval) - while self.ready_state not in ('complete', None): - sleep(.1) - if self._debug or show_errmsg: - print(f'重试{t + 1} {to_url}') - if err: if show_errmsg: raise err if err is not None else ConnectionError('连接异常。') diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index fb8e148..22812b3 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -4,9 +4,7 @@ @Contact : g1879@qq.com """ from pathlib import Path -from typing import Union, Tuple, List, Any - -from DataRecorder import Recorder +from typing import Union, Tuple, List, Any, Optional from .._base.base import BasePage from .._base.browser import Browser @@ -32,6 +30,7 @@ class ChromiumBase(BasePage): self._page: ChromiumPage = ... self.address: str = ... self._driver: ChromiumDriver = ... + self._frame_id: str = ... self._is_reading: bool = ... self._timeouts: Timeout = ... self._first_run: bool = ... @@ -41,7 +40,6 @@ class ChromiumBase(BasePage): self._url: str = ... self._root_id: str = ... self._debug: bool = ... - self._debug_recorder: Recorder = ... self._upload_list: list = ... self._wait: ChromiumBaseWaiter = ... self._set: ChromiumBaseSetter = ... @@ -50,6 +48,7 @@ class ChromiumBase(BasePage): self._listener: NetworkListener = ... self._alert: Alert = ... self._has_alert: bool = ... + self._ready_state: Optional[str] = ... def _connect_browser(self, tab_id: str = None) -> None: ... @@ -65,13 +64,13 @@ class ChromiumBase(BasePage): def _onFrameStartedLoading(self, **kwargs): ... - def _onFrameStoppedLoading(self, **kwargs): ... + def _onFrameNavigated(self, **kwargs): ... + + def _onDomContentEventFired(self, **kwargs): ... def _onLoadEventFired(self, **kwargs): ... - def _onDocumentUpdated(self, **kwargs): ... - - def _onFrameNavigated(self, **kwargs): ... + def _onFrameStoppedLoading(self, **kwargs): ... def _onFileChooserOpened(self, **kwargs): ... diff --git a/DrissionPage/_pages/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py index d7fb0ed..34214c0 100644 --- a/DrissionPage/_pages/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -4,7 +4,7 @@ @Contact : g1879@qq.com """ from copy import copy -from re import search +from re import search, findall from threading import Thread from time import sleep, perf_counter @@ -14,7 +14,7 @@ from .._elements.chromium_element import ChromiumElement from .._pages.chromium_base import ChromiumBase, ChromiumPageScroll from .._units.setter import ChromiumFrameSetter from .._units.waiter import FrameWaiter -from ..errors import ContextLossError +from ..errors import ContextLossError, ElementLossError, GetDocumentError class ChromiumFrame(ChromiumBase): @@ -40,6 +40,7 @@ class ChromiumFrame(ChromiumBase): self._backend_id = ele.ids.backend_id self._frame_ele = ele self._states = None + self._ids = ChromiumFrameIds(self) if self._is_inner_frame(): self._is_diff_domain = False @@ -50,9 +51,8 @@ class ChromiumFrame(ChromiumBase): super().__init__(page.address, self.frame_id, page.timeout) obj_id = super().run_js('document;', as_expr=True)['objectId'] self.doc_ele = ChromiumElement(self, obj_id=obj_id) - self._ids = ChromiumFrameIds(self) - end_time = perf_counter() + 2 + end_time = perf_counter() + 5 while perf_counter() < end_time and self.url == 'about:blank': sleep(.1) @@ -92,28 +92,46 @@ class ChromiumFrame(ChromiumBase): except: get(f'http://{self.address}/json', headers={'Connection': 'close'}) super()._driver_init(tab_id) + self.driver.set_listener('Inspector.detached', self._onInspectorDetached) def _reload(self): """重新获取document""" debug = self._debug + d_debug = self.driver._debug if debug: print('重新获取document') self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id) node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node'] - if self._is_inner_frame(): - self._is_diff_domain = False - self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) - super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout) - self._debug = debug + end_time = perf_counter() + self.timeout + while perf_counter() < end_time: + try: + if self._is_inner_frame(): + self._is_diff_domain = False + self.doc_ele = ChromiumElement(self._target_page, + backend_id=node['contentDocument']['backendNodeId']) + super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout) + self._debug = debug + self.driver._debug = d_debug + else: + self._is_diff_domain = True + self._driver.stop() + super().__init__(self.address, self.frame_id, self._target_page.timeout) + obj_id = super().run_js('document;', as_expr=True)['objectId'] + self.doc_ele = ChromiumElement(self, obj_id=obj_id) + self._debug = debug + self.driver._debug = d_debug + break + except: + pass + + sleep(.1) + else: - self._is_diff_domain = True - self._driver.stop() - super().__init__(self.address, self.frame_id, self._target_page.timeout) - obj_id = super().run_js('document;', as_expr=True)['objectId'] - self.doc_ele = ChromiumElement(self, obj_id=obj_id) - self._debug = debug + raise GetDocumentError + + self.wait.load_complete() def _check_ok(self): """用于应付同域异域之间跳转导致元素丢失问题""" @@ -122,7 +140,7 @@ class ChromiumFrame(ChromiumBase): try: self._target_page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id) - except Exception: + except ElementLossError: self._reload() # sleep(2) @@ -130,72 +148,42 @@ class ChromiumFrame(ChromiumBase): """刷新cdp使用的document数据""" if self._is_reading: return - self._is_reading = True + if self._is_diff_domain is False: + node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] + self.doc_ele = ChromiumElement(self._target_page, + backend_id=node['contentDocument']['backendNodeId']) - if self._debug: - print('---获取document') + else: + b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] + self.doc_ele = ChromiumElement(self, backend_id=b_id) - end_time = perf_counter() + 3 - while self.is_alive and perf_counter() < end_time: - try: - if self._is_diff_domain is False: - node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] - self.doc_ele = ChromiumElement(self._target_page, - backend_id=node['contentDocument']['backendNodeId']) - - else: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self.doc_ele = ChromiumElement(self, backend_id=b_id) - - break - - except Exception: - sleep(.1) - - # else: - # raise RuntimeError('获取document失败。') - - if self._debug: - print('---获取document结束') + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id self._is_loading = False self._is_reading = False - def _onFrameNavigated(self, **kwargs): - """页面跳转时触发""" - if kwargs['frame']['id'] == self.frame_id and self._first_run is False and self._is_loading: - self._is_loading = True - - if self._debug: - print('navigated') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) - - def _onLoadEventFired(self, **kwargs): - """在页面刷新、变化后重新读取页面内容""" - # 用于覆盖父类方法,不能删 - self._get_new_document() - - if self._debug: - print('loadEventFired') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired')) - - def _onFrameStartedLoading(self, **kwargs): - """页面开始加载时触发""" - if kwargs['frameId'] == self.frame_id: - self._is_loading = True - if self._debug: - print('页面开始加载 FrameStartedLoading') - def _onFrameStoppedLoading(self, **kwargs): """页面加载完成后触发""" - if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading: + self.browser._frames[kwargs['frameId']] = self.tab_id + if kwargs['frameId'] == self.frame_id: + self._ready_state = 'complete' if self._debug: - print('页面停止加载 FrameStoppedLoading') + print(f'FrameStoppedLoading {kwargs}') self._get_new_document() + def _onInspectorDetached(self, **kwargs): + self._is_loading = True + # print('reload') + self._reload() + + # def _onFrameDetached(self, **kwargs): + # if kwargs['frameId'] == self.frame_id: + # self._is_loading = True + # self._reload() + @property def page(self): return self._page @@ -387,7 +375,7 @@ class ChromiumFrame(ChromiumBase): def run_js(self, script, *args, as_expr=False): """运行javascript代码 :param script: js文本 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... + :param args: 参数,按顺序在js文本中对应arguments[0]、arguments[1]... :param as_expr: 是否作为表达式运行,为True时args无效 :return: 运行的结果 """ @@ -614,34 +602,43 @@ class ChromiumFrame(ChromiumBase): for t in range(times + 1): err = None - result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id) - - is_timeout = not self._wait_loaded(timeout) - sleep(.5) - self.wait.load_complete() - - if is_timeout: + end_time = perf_counter() + timeout + result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id, _timeout=timeout) + if result.get('error') == 'timeout': err = TimeoutError('页面连接超时。') - if 'errorText' in result: + + elif 'errorText' in result: err = ConnectionError(result['errorText']) + if err: + sleep(interval) + if self._debug or show_errmsg: + print(f'重试{t + 1} {to_url}') + self.stop_loading() + continue + + if self.page_load_strategy == 'none': + return True + + yu = end_time - perf_counter() + ok = self._wait_loaded(1 if yu <= 0 else yu) + if not ok: + err = TimeoutError('页面连接超时。') + sleep(interval) + if self._debug or show_errmsg: + print(f'重试{t + 1} {to_url}') + self.stop_loading() + continue + if not err: break - if t < times: - sleep(interval) - while self.ready_state not in ('complete', None): - sleep(.1) - if self._debug: - print('重试') - if show_errmsg: - print(f'重试 {to_url}') - if err: if show_errmsg: raise err if err is not None else ConnectionError('连接异常。') return False + self._check_ok() return True def _is_inner_frame(self): diff --git a/DrissionPage/_pages/chromium_frame.pyi b/DrissionPage/_pages/chromium_frame.pyi index 8b4e0bf..4121917 100644 --- a/DrissionPage/_pages/chromium_frame.pyi +++ b/DrissionPage/_pages/chromium_frame.pyi @@ -49,9 +49,9 @@ class ChromiumFrame(ChromiumBase): def _get_new_document(self) -> None: ... - def _onFrameAttached(self, **kwargs): ... + def _onFrameStoppedLoading(self, **kwargs): ... - def _onFrameDetached(self, **kwargs): ... + def _onInspectorDetached(self, **kwargs): ... @property def page(self) -> Union[ChromiumPage, WebPage]: ... diff --git a/DrissionPage/_units/download_manager.py b/DrissionPage/_units/download_manager.py index 029ccb9..be55e9d 100644 --- a/DrissionPage/_units/download_manager.py +++ b/DrissionPage/_units/download_manager.py @@ -266,8 +266,8 @@ class DownloadMission(object): """ if show: print(f'url:{self.url}') - t2 = perf_counter() - while self.name is None and perf_counter() - t2 < 4: + end_time = perf_counter() + while self.name is None and perf_counter() < end_time: sleep(0.01) print(f'文件名:{self.name}') print(f'目标路径:{self.path}') diff --git a/DrissionPage/_units/waiter.py b/DrissionPage/_units/waiter.py index 4baca34..8236b30 100644 --- a/DrissionPage/_units/waiter.py +++ b/DrissionPage/_units/waiter.py @@ -82,8 +82,12 @@ class ChromiumBaseWaiter(object): def upload_paths_inputted(self): """等待自动填写上传文件路径""" - while self._driver._upload_list: + end_time = perf_counter() + self._driver.timeout + while perf_counter() < end_time: + if not self._driver._upload_list: + return True sleep(.01) + return False def download_begin(self, timeout=None, cancel_it=False): """等待浏览器下载开始,可将其拦截 @@ -201,7 +205,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter): else: end_time = perf_counter() + timeout - while end_time > perf_counter(): + while perf_counter() < end_time: if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): return True sleep(.5) @@ -224,13 +228,14 @@ class ChromiumPageWaiter(ChromiumTabWaiter): """等待新标签页出现 :param timeout: 等待超时时间,为None则使用页面对象timeout属性 :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等到新标签页出现 + :return: 等到新标签页返回其id,否则返回False """ timeout = timeout if timeout is not None else self._driver.timeout end_time = perf_counter() + timeout while perf_counter() < end_time: - if self._driver.tab_id != self._driver.latest_tab: - return True + latest_tab = self._driver.latest_tab + if self._driver.tab_id != latest_tab: + return latest_tab sleep(.01) if raise_err is True or Settings.raise_when_wait_failed is True: @@ -251,7 +256,7 @@ class ChromiumPageWaiter(ChromiumTabWaiter): else: end_time = perf_counter() + timeout - while end_time > perf_counter(): + while perf_counter() < end_time: if not self._driver.browser._dl_mgr._missions: return True sleep(.5) diff --git a/DrissionPage/_units/waiter.pyi b/DrissionPage/_units/waiter.pyi index 3bbd19c..1f4da31 100644 --- a/DrissionPage/_units/waiter.pyi +++ b/DrissionPage/_units/waiter.pyi @@ -37,7 +37,7 @@ class ChromiumBaseWaiter(object): def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def upload_paths_inputted(self) -> None: ... + def upload_paths_inputted(self) -> bool: ... def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ... @@ -62,7 +62,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter): class ChromiumPageWaiter(ChromiumTabWaiter): _driver: ChromiumPage = ... - def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... + def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ... def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... diff --git a/setup.py b/setup.py index 473d567..5007555 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="4.0.0b0", + version="4.0.0b1", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.",