diff --git a/DrissionPage/browser_download_manager.py b/DrissionPage/browser_download_manager.py index bf8e7a2..9bcfd3e 100644 --- a/DrissionPage/browser_download_manager.py +++ b/DrissionPage/browser_download_manager.py @@ -143,17 +143,7 @@ class BrowserDownloadManager(object): def _onDownloadWillBegin(self, **kwargs): """用于获取弹出新标签页触发的下载任务""" guid = kwargs['guid'] - - end = perf_counter() + 2 - while perf_counter() < end: - tab_id = self._guid_and_tab.get(guid, None) - if tab_id: - # print('拿到') - break - sleep(.005) - else: - # print('没拿到') - tab_id = self._page.tab_id + tab_id = self._page._frames.get(kwargs['frameId'], self._page.tab_id) settings = TabDownloadSettings(tab_id) if settings.rename: diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index d212eab..0d652d0 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -7,6 +7,7 @@ from base64 import b64decode from json import loads, JSONDecodeError from os.path import sep from pathlib import Path +from re import findall from threading import Thread from time import perf_counter, sleep, time @@ -116,53 +117,60 @@ class ChromiumBase(BasePage): self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated) self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired) self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated) - self._tab_obj.set_listener('Page.downloadWillBegin', self._onDownloadWillBegin) + self._tab_obj.set_listener('Page.frameAttached', self._onFrameAttached) + self._tab_obj.set_listener('Page.frameDetached', self._onFrameDetached) def _get_document(self): """刷新cdp使用的document数据""" - if not self._is_reading: - self._is_reading = True + if self._is_reading: + return - if self._debug: - print('获取document') + self._is_reading = True + + if self._debug: + print('获取document') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) + + try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉 + self._wait_loaded() + except TabClosedError: + return + + end_time = perf_counter() + 10 + while perf_counter() < end_time: + try: + b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] + self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) + self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) + break - try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉 - self._wait_loaded() - except TabClosedError: - return - - end_time = perf_counter() + 10 - while perf_counter() < end_time: - try: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] + except CDPError as e: + err = e + if self._debug: + print('重试获取document') if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) - break + self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错')) - except CDPError as e: - err = e - if self._debug: - print('重试获取document') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错')) + sleep(.1) - sleep(.1) + else: + txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ + f'报告网址:https://gitee.com/g1879/DrissionPage/issues' + raise GetDocumentError(txt) - else: - txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ - f'报告网址:https://gitee.com/g1879/DrissionPage/issues' - raise GetDocumentError(txt) + if self._debug: + print('获取document结束') + if self._debug_recorder: + self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) - if self._debug: - print('获取document结束') - if self._debug_recorder: - self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) + r = self.run_cdp('Page.getFrameTree') + for i in findall(r"'id': '(.*?)'", str(r)): + self.browser._frames[i] = self.tab_id - self._is_loading = False - self._is_reading = False + self._is_loading = False + self._is_reading = False def _wait_loaded(self, timeout=None): """等待页面加载完成,超时触发停止加载 @@ -193,8 +201,18 @@ class ChromiumBase(BasePage): self.stop_loading() return False + def _onFrameDetached(self, **kwargs): + try: + self.browser._frames.pop(kwargs['frameId']) + except KeyError: + pass + + def _onFrameAttached(self, **kwargs): + self.browser._frames[kwargs['frameId']] = self.tab_id + def _onFrameStartedLoading(self, **kwargs): """页面开始加载时执行""" + self.browser._frames[kwargs['frameId']] = self.tab_id if kwargs['frameId'] == self._target_id: self._is_loading = True @@ -205,6 +223,7 @@ class ChromiumBase(BasePage): def _onFrameStoppedLoading(self, **kwargs): """页面加载完成后执行""" + self.browser._frames[kwargs['frameId']] = self.tab_id if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading: if self._debug: print('页面停止加载 FrameStoppedLoading') @@ -248,11 +267,6 @@ class ChromiumBase(BasePage): self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self._upload_list = None - def _onDownloadWillBegin(self, **kwargs): - """下载即将开始时执行""" - print('aaa') - self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid']) - def __call__(self, loc_or_str, timeout=None): """在内部查找元素 例:ele = page('@id=ele_id') diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index db2da7e..358c52c 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -62,6 +62,10 @@ class ChromiumBase(BasePage): def _wait_loaded(self, timeout: float = None) -> bool: ... + def _onFrameDetached(self, **kwargs) -> None: ... + + def _onFrameAttached(self, **kwargs) -> None: ... + def _onFrameStartedLoading(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ... @@ -74,7 +78,7 @@ class ChromiumBase(BasePage): def _onFileChooserOpened(self, **kwargs): ... - def _onDownloadWillBegin(self, **kwargs): ... + # def _onDownloadWillBegin(self, **kwargs): ... def _set_start_options(self, address, none) -> None: ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 28319ee..21cd5c7 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -127,37 +127,39 @@ class ChromiumFrame(ChromiumBase): def _get_new_document(self): """刷新cdp使用的document数据""" - if not self._is_reading: - self._is_reading = True + if self._is_reading: + return - if self._debug: - print('---获取document') + self._is_reading = True - end_time = perf_counter() + 3 - while self.is_alive and perf_counter() < end_time: - try: - if self._is_diff_domain is False: - node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] - self.doc_ele = ChromiumElement(self._target_page, - backend_id=node['contentDocument']['backendNodeId']) + if self._debug: + print('---获取document') - else: - b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] - self.doc_ele = ChromiumElement(self, backend_id=b_id) + end_time = perf_counter() + 3 + while self.is_alive and perf_counter() < end_time: + try: + if self._is_diff_domain is False: + node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] + self.doc_ele = ChromiumElement(self._target_page, + backend_id=node['contentDocument']['backendNodeId']) - break + else: + b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] + self.doc_ele = ChromiumElement(self, backend_id=b_id) - except Exception: - sleep(.1) + break - # else: - # raise RuntimeError('获取document失败。') + except Exception: + sleep(.1) - if self._debug: - print('---获取document结束') + # else: + # raise RuntimeError('获取document失败。') - self._is_loading = False - self._is_reading = False + if self._debug: + print('---获取document结束') + + self._is_loading = False + self._is_reading = False def _onFrameNavigated(self, **kwargs): """页面跳转时触发""" diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 58215d3..24fb94f 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -26,8 +26,9 @@ class ChromiumPage(ChromiumBase): :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间 """ - super().__init__(addr_driver_opts, tab_id) self._page = self + self._frames = {} + super().__init__(addr_driver_opts, tab_id) self._dl_mgr = BrowserDownloadManager(self) self.set.timeouts(implicit=timeout) @@ -93,7 +94,7 @@ class ChromiumPage(ChromiumBase): self._first_run = False def _page_init(self): - """页面相关设置""" + """浏览器相关设置""" u = f'http://{self.address}/json/version' ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] self._control_session.get(u, headers={'Connection': 'close'}) diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 0122d1d..a6d81ec 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -27,6 +27,7 @@ class ChromiumPage(ChromiumBase): self._alert: Alert = ... self._browser_driver: ChromiumDriver = ... self._rect: ChromiumTabRect = ... + self._frames: dict = ... def _connect_browser(self, addr_driver_opts: Union[str, ChromiumDriver] = None, diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py index 54cefe8..fefe638 100644 --- a/DrissionPage/setter.py +++ b/DrissionPage/setter.py @@ -355,7 +355,7 @@ class WebPageSetter(ChromiumPageSetter): self._chromium_setter.user_agent(ua, platform) -class WebPageTabSetter(ChromiumBaseSetter): +class WebPageTabSetter(TabSetter): def __init__(self, page): super().__init__(page) self._session_setter = SessionPageSetter(self._page) diff --git a/DrissionPage/setter.pyi b/DrissionPage/setter.pyi index 8803ed8..0ed4134 100644 --- a/DrissionPage/setter.pyi +++ b/DrissionPage/setter.pyi @@ -129,7 +129,7 @@ class WebPageSetter(ChromiumPageSetter): def cookies(self, cookies) -> None: ... -class WebPageTabSetter(ChromiumBaseSetter): +class WebPageTabSetter(TabSetter): _page: WebPage = ... _session_setter: SessionPageSetter = ... _chromium_setter: ChromiumBaseSetter = ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 2d97b0c..3282a54 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -6,6 +6,7 @@ from requests import Session from .base import BasePage +from .browser_download_manager import BrowserDownloadManager from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver from .chromium_page import ChromiumPage @@ -45,13 +46,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._response = None self._set = None self._screencast = None + self._frames = {} + self._page = self self._set_start_options(driver_or_options, session_or_options) self._set_runtime_settings() self._connect_browser() self._create_session() - - t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit + self._dl_mgr = BrowserDownloadManager(self) + self.set.timeouts(implicit=timeout) def _set_start_options(self, dr_opt, se_opt): """处理两种模式的设置 diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index 5734631..15f30f8 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -37,6 +37,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._DownloadKit: DownloadKit = ... self._download_path: str = ... self._tab_obj: ChromiumDriver = ... + self._frames: dict = ... def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],