继续改进下载功能

This commit is contained in:
g1879 2023-10-18 14:14:08 +08:00
parent c450a0c452
commit 0ff5b47a4c
10 changed files with 97 additions and 81 deletions

View File

@ -143,17 +143,7 @@ class BrowserDownloadManager(object):
def _onDownloadWillBegin(self, **kwargs):
"""用于获取弹出新标签页触发的下载任务"""
guid = kwargs['guid']
end = perf_counter() + 2
while perf_counter() < end:
tab_id = self._guid_and_tab.get(guid, None)
if tab_id:
# print('拿到')
break
sleep(.005)
else:
# print('没拿到')
tab_id = self._page.tab_id
tab_id = self._page._frames.get(kwargs['frameId'], self._page.tab_id)
settings = TabDownloadSettings(tab_id)
if settings.rename:

View File

@ -7,6 +7,7 @@ from base64 import b64decode
from json import loads, JSONDecodeError
from os.path import sep
from pathlib import Path
from re import findall
from threading import Thread
from time import perf_counter, sleep, time
@ -116,11 +117,14 @@ class ChromiumBase(BasePage):
self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired)
self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated)
self._tab_obj.set_listener('Page.downloadWillBegin', self._onDownloadWillBegin)
self._tab_obj.set_listener('Page.frameAttached', self._onFrameAttached)
self._tab_obj.set_listener('Page.frameDetached', self._onFrameDetached)
def _get_document(self):
"""刷新cdp使用的document数据"""
if not self._is_reading:
if self._is_reading:
return
self._is_reading = True
if self._debug:
@ -161,6 +165,10 @@ class ChromiumBase(BasePage):
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
self._is_loading = False
self._is_reading = False
@ -193,8 +201,18 @@ class ChromiumBase(BasePage):
self.stop_loading()
return False
def _onFrameDetached(self, **kwargs):
try:
self.browser._frames.pop(kwargs['frameId'])
except KeyError:
pass
def _onFrameAttached(self, **kwargs):
self.browser._frames[kwargs['frameId']] = self.tab_id
def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id:
self._is_loading = True
@ -205,6 +223,7 @@ class ChromiumBase(BasePage):
def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
if self._debug:
print('页面停止加载 FrameStoppedLoading')
@ -248,11 +267,6 @@ class ChromiumBase(BasePage):
self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False)
self._upload_list = None
def _onDownloadWillBegin(self, **kwargs):
"""下载即将开始时执行"""
print('aaa')
self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid'])
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
ele = page('@id=ele_id')

View File

@ -62,6 +62,10 @@ class ChromiumBase(BasePage):
def _wait_loaded(self, timeout: float = None) -> bool: ...
def _onFrameDetached(self, **kwargs) -> None: ...
def _onFrameAttached(self, **kwargs) -> None: ...
def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ...
@ -74,7 +78,7 @@ class ChromiumBase(BasePage):
def _onFileChooserOpened(self, **kwargs): ...
def _onDownloadWillBegin(self, **kwargs): ...
# def _onDownloadWillBegin(self, **kwargs): ...
def _set_start_options(self, address, none) -> None: ...

View File

@ -127,7 +127,9 @@ class ChromiumFrame(ChromiumBase):
def _get_new_document(self):
"""刷新cdp使用的document数据"""
if not self._is_reading:
if self._is_reading:
return
self._is_reading = True
if self._debug:

View File

@ -26,8 +26,9 @@ class ChromiumPage(ChromiumBase):
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
super().__init__(addr_driver_opts, tab_id)
self._page = self
self._frames = {}
super().__init__(addr_driver_opts, tab_id)
self._dl_mgr = BrowserDownloadManager(self)
self.set.timeouts(implicit=timeout)
@ -93,7 +94,7 @@ class ChromiumPage(ChromiumBase):
self._first_run = False
def _page_init(self):
"""页面相关设置"""
"""浏览器相关设置"""
u = f'http://{self.address}/json/version'
ws = self._control_session.get(u).json()['webSocketDebuggerUrl']
self._control_session.get(u, headers={'Connection': 'close'})

View File

@ -27,6 +27,7 @@ class ChromiumPage(ChromiumBase):
self._alert: Alert = ...
self._browser_driver: ChromiumDriver = ...
self._rect: ChromiumTabRect = ...
self._frames: dict = ...
def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver] = None,

View File

@ -355,7 +355,7 @@ class WebPageSetter(ChromiumPageSetter):
self._chromium_setter.user_agent(ua, platform)
class WebPageTabSetter(ChromiumBaseSetter):
class WebPageTabSetter(TabSetter):
def __init__(self, page):
super().__init__(page)
self._session_setter = SessionPageSetter(self._page)

View File

@ -129,7 +129,7 @@ class WebPageSetter(ChromiumPageSetter):
def cookies(self, cookies) -> None: ...
class WebPageTabSetter(ChromiumBaseSetter):
class WebPageTabSetter(TabSetter):
_page: WebPage = ...
_session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ...

View File

@ -6,6 +6,7 @@
from requests import Session
from .base import BasePage
from .browser_download_manager import BrowserDownloadManager
from .chromium_base import ChromiumBase, Timeout
from .chromium_driver import ChromiumDriver
from .chromium_page import ChromiumPage
@ -45,13 +46,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._response = None
self._set = None
self._screencast = None
self._frames = {}
self._page = self
self._set_start_options(driver_or_options, session_or_options)
self._set_runtime_settings()
self._connect_browser()
self._create_session()
t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit
self._dl_mgr = BrowserDownloadManager(self)
self.set.timeouts(implicit=timeout)
def _set_start_options(self, dr_opt, se_opt):
"""处理两种模式的设置

View File

@ -37,6 +37,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._DownloadKit: DownloadKit = ...
self._download_path: str = ...
self._tab_obj: ChromiumDriver = ...
self._frames: dict = ...
def __call__(self,
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],