继续改进下载功能

This commit is contained in:
g1879 2023-10-18 14:14:08 +08:00
parent c450a0c452
commit 0ff5b47a4c
10 changed files with 97 additions and 81 deletions

View File

@ -143,17 +143,7 @@ class BrowserDownloadManager(object):
def _onDownloadWillBegin(self, **kwargs): def _onDownloadWillBegin(self, **kwargs):
"""用于获取弹出新标签页触发的下载任务""" """用于获取弹出新标签页触发的下载任务"""
guid = kwargs['guid'] guid = kwargs['guid']
tab_id = self._page._frames.get(kwargs['frameId'], self._page.tab_id)
end = perf_counter() + 2
while perf_counter() < end:
tab_id = self._guid_and_tab.get(guid, None)
if tab_id:
# print('拿到')
break
sleep(.005)
else:
# print('没拿到')
tab_id = self._page.tab_id
settings = TabDownloadSettings(tab_id) settings = TabDownloadSettings(tab_id)
if settings.rename: if settings.rename:

View File

@ -7,6 +7,7 @@ from base64 import b64decode
from json import loads, JSONDecodeError from json import loads, JSONDecodeError
from os.path import sep from os.path import sep
from pathlib import Path from pathlib import Path
from re import findall
from threading import Thread from threading import Thread
from time import perf_counter, sleep, time from time import perf_counter, sleep, time
@ -116,53 +117,60 @@ class ChromiumBase(BasePage):
self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated) self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired) self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired)
self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated) self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated)
self._tab_obj.set_listener('Page.downloadWillBegin', self._onDownloadWillBegin) self._tab_obj.set_listener('Page.frameAttached', self._onFrameAttached)
self._tab_obj.set_listener('Page.frameDetached', self._onFrameDetached)
def _get_document(self): def _get_document(self):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if not self._is_reading: if self._is_reading:
self._is_reading = True return
if self._debug: self._is_reading = True
print('获取document')
if self._debug:
print('获取document')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉
self._wait_loaded()
except TabClosedError:
return
end_time = perf_counter() + 10
while perf_counter() < end_time:
try:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
if self._debug_recorder: if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '开始')) self._debug_recorder.add_data((perf_counter(), '信息', f'root_id{self._root_id}'))
break
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉 except CDPError as e:
self._wait_loaded() err = e
except TabClosedError: if self._debug:
return print('重试获取document')
end_time = perf_counter() + 10
while perf_counter() < end_time:
try:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
if self._debug_recorder: if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id{self._root_id}')) self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
break
except CDPError as e: sleep(.1)
err = e
if self._debug:
print('重试获取document')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
sleep(.1) else:
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \
f'报告网址https://gitee.com/g1879/DrissionPage/issues'
raise GetDocumentError(txt)
else: if self._debug:
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ print('获取document结束')
f'报告网址https://gitee.com/g1879/DrissionPage/issues' if self._debug_recorder:
raise GetDocumentError(txt) self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
if self._debug: r = self.run_cdp('Page.getFrameTree')
print('获取document结束') for i in findall(r"'id': '(.*?)'", str(r)):
if self._debug_recorder: self.browser._frames[i] = self.tab_id
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
self._is_loading = False self._is_loading = False
self._is_reading = False self._is_reading = False
def _wait_loaded(self, timeout=None): def _wait_loaded(self, timeout=None):
"""等待页面加载完成,超时触发停止加载 """等待页面加载完成,超时触发停止加载
@ -193,8 +201,18 @@ class ChromiumBase(BasePage):
self.stop_loading() self.stop_loading()
return False return False
def _onFrameDetached(self, **kwargs):
try:
self.browser._frames.pop(kwargs['frameId'])
except KeyError:
pass
def _onFrameAttached(self, **kwargs):
self.browser._frames[kwargs['frameId']] = self.tab_id
def _onFrameStartedLoading(self, **kwargs): def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时执行""" """页面开始加载时执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id: if kwargs['frameId'] == self._target_id:
self._is_loading = True self._is_loading = True
@ -205,6 +223,7 @@ class ChromiumBase(BasePage):
def _onFrameStoppedLoading(self, **kwargs): def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后执行""" """页面加载完成后执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading: if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
if self._debug: if self._debug:
print('页面停止加载 FrameStoppedLoading') print('页面停止加载 FrameStoppedLoading')
@ -248,11 +267,6 @@ class ChromiumBase(BasePage):
self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False)
self._upload_list = None self._upload_list = None
def _onDownloadWillBegin(self, **kwargs):
"""下载即将开始时执行"""
print('aaa')
self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid'])
def __call__(self, loc_or_str, timeout=None): def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素 """在内部查找元素
ele = page('@id=ele_id') ele = page('@id=ele_id')

View File

@ -62,6 +62,10 @@ class ChromiumBase(BasePage):
def _wait_loaded(self, timeout: float = None) -> bool: ... def _wait_loaded(self, timeout: float = None) -> bool: ...
def _onFrameDetached(self, **kwargs) -> None: ...
def _onFrameAttached(self, **kwargs) -> None: ...
def _onFrameStartedLoading(self, **kwargs): ... def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...
@ -74,7 +78,7 @@ class ChromiumBase(BasePage):
def _onFileChooserOpened(self, **kwargs): ... def _onFileChooserOpened(self, **kwargs): ...
def _onDownloadWillBegin(self, **kwargs): ... # def _onDownloadWillBegin(self, **kwargs): ...
def _set_start_options(self, address, none) -> None: ... def _set_start_options(self, address, none) -> None: ...

View File

@ -127,37 +127,39 @@ class ChromiumFrame(ChromiumBase):
def _get_new_document(self): def _get_new_document(self):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if not self._is_reading: if self._is_reading:
self._is_reading = True return
if self._debug: self._is_reading = True
print('---获取document')
end_time = perf_counter() + 3 if self._debug:
while self.is_alive and perf_counter() < end_time: print('---获取document')
try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page,
backend_id=node['contentDocument']['backendNodeId'])
else: end_time = perf_counter() + 3
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] while self.is_alive and perf_counter() < end_time:
self.doc_ele = ChromiumElement(self, backend_id=b_id) try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page,
backend_id=node['contentDocument']['backendNodeId'])
break else:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
except Exception: break
sleep(.1)
# else: except Exception:
# raise RuntimeError('获取document失败。') sleep(.1)
if self._debug: # else:
print('---获取document结束') # raise RuntimeError('获取document失败。')
self._is_loading = False if self._debug:
self._is_reading = False print('---获取document结束')
self._is_loading = False
self._is_reading = False
def _onFrameNavigated(self, **kwargs): def _onFrameNavigated(self, **kwargs):
"""页面跳转时触发""" """页面跳转时触发"""

View File

@ -26,8 +26,9 @@ class ChromiumPage(ChromiumBase):
:param tab_id: 要控制的标签页id不指定默认为激活的 :param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间 :param timeout: 超时时间
""" """
super().__init__(addr_driver_opts, tab_id)
self._page = self self._page = self
self._frames = {}
super().__init__(addr_driver_opts, tab_id)
self._dl_mgr = BrowserDownloadManager(self) self._dl_mgr = BrowserDownloadManager(self)
self.set.timeouts(implicit=timeout) self.set.timeouts(implicit=timeout)
@ -93,7 +94,7 @@ class ChromiumPage(ChromiumBase):
self._first_run = False self._first_run = False
def _page_init(self): def _page_init(self):
"""页面相关设置""" """浏览器相关设置"""
u = f'http://{self.address}/json/version' u = f'http://{self.address}/json/version'
ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] ws = self._control_session.get(u).json()['webSocketDebuggerUrl']
self._control_session.get(u, headers={'Connection': 'close'}) self._control_session.get(u, headers={'Connection': 'close'})

View File

@ -27,6 +27,7 @@ class ChromiumPage(ChromiumBase):
self._alert: Alert = ... self._alert: Alert = ...
self._browser_driver: ChromiumDriver = ... self._browser_driver: ChromiumDriver = ...
self._rect: ChromiumTabRect = ... self._rect: ChromiumTabRect = ...
self._frames: dict = ...
def _connect_browser(self, def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver] = None, addr_driver_opts: Union[str, ChromiumDriver] = None,

View File

@ -355,7 +355,7 @@ class WebPageSetter(ChromiumPageSetter):
self._chromium_setter.user_agent(ua, platform) self._chromium_setter.user_agent(ua, platform)
class WebPageTabSetter(ChromiumBaseSetter): class WebPageTabSetter(TabSetter):
def __init__(self, page): def __init__(self, page):
super().__init__(page) super().__init__(page)
self._session_setter = SessionPageSetter(self._page) self._session_setter = SessionPageSetter(self._page)

View File

@ -129,7 +129,7 @@ class WebPageSetter(ChromiumPageSetter):
def cookies(self, cookies) -> None: ... def cookies(self, cookies) -> None: ...
class WebPageTabSetter(ChromiumBaseSetter): class WebPageTabSetter(TabSetter):
_page: WebPage = ... _page: WebPage = ...
_session_setter: SessionPageSetter = ... _session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ... _chromium_setter: ChromiumBaseSetter = ...

View File

@ -6,6 +6,7 @@
from requests import Session from requests import Session
from .base import BasePage from .base import BasePage
from .browser_download_manager import BrowserDownloadManager
from .chromium_base import ChromiumBase, Timeout from .chromium_base import ChromiumBase, Timeout
from .chromium_driver import ChromiumDriver from .chromium_driver import ChromiumDriver
from .chromium_page import ChromiumPage from .chromium_page import ChromiumPage
@ -45,13 +46,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._response = None self._response = None
self._set = None self._set = None
self._screencast = None self._screencast = None
self._frames = {}
self._page = self
self._set_start_options(driver_or_options, session_or_options) self._set_start_options(driver_or_options, session_or_options)
self._set_runtime_settings() self._set_runtime_settings()
self._connect_browser() self._connect_browser()
self._create_session() self._create_session()
self._dl_mgr = BrowserDownloadManager(self)
t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit self.set.timeouts(implicit=timeout)
def _set_start_options(self, dr_opt, se_opt): def _set_start_options(self, dr_opt, se_opt):
"""处理两种模式的设置 """处理两种模式的设置

View File

@ -37,6 +37,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._DownloadKit: DownloadKit = ... self._DownloadKit: DownloadKit = ...
self._download_path: str = ... self._download_path: str = ...
self._tab_obj: ChromiumDriver = ... self._tab_obj: ChromiumDriver = ...
self._frames: dict = ...
def __call__(self, def __call__(self,
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],