继续改进下载功能

This commit is contained in:
g1879 2023-10-18 14:14:08 +08:00
parent c450a0c452
commit 0ff5b47a4c
10 changed files with 97 additions and 81 deletions

View File

@ -143,17 +143,7 @@ class BrowserDownloadManager(object):
def _onDownloadWillBegin(self, **kwargs): def _onDownloadWillBegin(self, **kwargs):
"""用于获取弹出新标签页触发的下载任务""" """用于获取弹出新标签页触发的下载任务"""
guid = kwargs['guid'] guid = kwargs['guid']
tab_id = self._page._frames.get(kwargs['frameId'], self._page.tab_id)
end = perf_counter() + 2
while perf_counter() < end:
tab_id = self._guid_and_tab.get(guid, None)
if tab_id:
# print('拿到')
break
sleep(.005)
else:
# print('没拿到')
tab_id = self._page.tab_id
settings = TabDownloadSettings(tab_id) settings = TabDownloadSettings(tab_id)
if settings.rename: if settings.rename:

View File

@ -7,6 +7,7 @@ from base64 import b64decode
from json import loads, JSONDecodeError from json import loads, JSONDecodeError
from os.path import sep from os.path import sep
from pathlib import Path from pathlib import Path
from re import findall
from threading import Thread from threading import Thread
from time import perf_counter, sleep, time from time import perf_counter, sleep, time
@ -116,11 +117,14 @@ class ChromiumBase(BasePage):
self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated) self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired) self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired)
self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated) self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated)
self._tab_obj.set_listener('Page.downloadWillBegin', self._onDownloadWillBegin) self._tab_obj.set_listener('Page.frameAttached', self._onFrameAttached)
self._tab_obj.set_listener('Page.frameDetached', self._onFrameDetached)
def _get_document(self): def _get_document(self):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if not self._is_reading: if self._is_reading:
return
self._is_reading = True self._is_reading = True
if self._debug: if self._debug:
@ -161,6 +165,10 @@ class ChromiumBase(BasePage):
if self._debug_recorder: if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '结束')) self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
self._is_loading = False self._is_loading = False
self._is_reading = False self._is_reading = False
@ -193,8 +201,18 @@ class ChromiumBase(BasePage):
self.stop_loading() self.stop_loading()
return False return False
def _onFrameDetached(self, **kwargs):
try:
self.browser._frames.pop(kwargs['frameId'])
except KeyError:
pass
def _onFrameAttached(self, **kwargs):
self.browser._frames[kwargs['frameId']] = self.tab_id
def _onFrameStartedLoading(self, **kwargs): def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时执行""" """页面开始加载时执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id: if kwargs['frameId'] == self._target_id:
self._is_loading = True self._is_loading = True
@ -205,6 +223,7 @@ class ChromiumBase(BasePage):
def _onFrameStoppedLoading(self, **kwargs): def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后执行""" """页面加载完成后执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading: if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
if self._debug: if self._debug:
print('页面停止加载 FrameStoppedLoading') print('页面停止加载 FrameStoppedLoading')
@ -248,11 +267,6 @@ class ChromiumBase(BasePage):
self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False)
self._upload_list = None self._upload_list = None
def _onDownloadWillBegin(self, **kwargs):
"""下载即将开始时执行"""
print('aaa')
self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid'])
def __call__(self, loc_or_str, timeout=None): def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素 """在内部查找元素
ele = page('@id=ele_id') ele = page('@id=ele_id')

View File

@ -62,6 +62,10 @@ class ChromiumBase(BasePage):
def _wait_loaded(self, timeout: float = None) -> bool: ... def _wait_loaded(self, timeout: float = None) -> bool: ...
def _onFrameDetached(self, **kwargs) -> None: ...
def _onFrameAttached(self, **kwargs) -> None: ...
def _onFrameStartedLoading(self, **kwargs): ... def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...
@ -74,7 +78,7 @@ class ChromiumBase(BasePage):
def _onFileChooserOpened(self, **kwargs): ... def _onFileChooserOpened(self, **kwargs): ...
def _onDownloadWillBegin(self, **kwargs): ... # def _onDownloadWillBegin(self, **kwargs): ...
def _set_start_options(self, address, none) -> None: ... def _set_start_options(self, address, none) -> None: ...

View File

@ -127,7 +127,9 @@ class ChromiumFrame(ChromiumBase):
def _get_new_document(self): def _get_new_document(self):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if not self._is_reading: if self._is_reading:
return
self._is_reading = True self._is_reading = True
if self._debug: if self._debug:

View File

@ -26,8 +26,9 @@ class ChromiumPage(ChromiumBase):
:param tab_id: 要控制的标签页id不指定默认为激活的 :param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间 :param timeout: 超时时间
""" """
super().__init__(addr_driver_opts, tab_id)
self._page = self self._page = self
self._frames = {}
super().__init__(addr_driver_opts, tab_id)
self._dl_mgr = BrowserDownloadManager(self) self._dl_mgr = BrowserDownloadManager(self)
self.set.timeouts(implicit=timeout) self.set.timeouts(implicit=timeout)
@ -93,7 +94,7 @@ class ChromiumPage(ChromiumBase):
self._first_run = False self._first_run = False
def _page_init(self): def _page_init(self):
"""页面相关设置""" """浏览器相关设置"""
u = f'http://{self.address}/json/version' u = f'http://{self.address}/json/version'
ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] ws = self._control_session.get(u).json()['webSocketDebuggerUrl']
self._control_session.get(u, headers={'Connection': 'close'}) self._control_session.get(u, headers={'Connection': 'close'})

View File

@ -27,6 +27,7 @@ class ChromiumPage(ChromiumBase):
self._alert: Alert = ... self._alert: Alert = ...
self._browser_driver: ChromiumDriver = ... self._browser_driver: ChromiumDriver = ...
self._rect: ChromiumTabRect = ... self._rect: ChromiumTabRect = ...
self._frames: dict = ...
def _connect_browser(self, def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver] = None, addr_driver_opts: Union[str, ChromiumDriver] = None,

View File

@ -355,7 +355,7 @@ class WebPageSetter(ChromiumPageSetter):
self._chromium_setter.user_agent(ua, platform) self._chromium_setter.user_agent(ua, platform)
class WebPageTabSetter(ChromiumBaseSetter): class WebPageTabSetter(TabSetter):
def __init__(self, page): def __init__(self, page):
super().__init__(page) super().__init__(page)
self._session_setter = SessionPageSetter(self._page) self._session_setter = SessionPageSetter(self._page)

View File

@ -129,7 +129,7 @@ class WebPageSetter(ChromiumPageSetter):
def cookies(self, cookies) -> None: ... def cookies(self, cookies) -> None: ...
class WebPageTabSetter(ChromiumBaseSetter): class WebPageTabSetter(TabSetter):
_page: WebPage = ... _page: WebPage = ...
_session_setter: SessionPageSetter = ... _session_setter: SessionPageSetter = ...
_chromium_setter: ChromiumBaseSetter = ... _chromium_setter: ChromiumBaseSetter = ...

View File

@ -6,6 +6,7 @@
from requests import Session from requests import Session
from .base import BasePage from .base import BasePage
from .browser_download_manager import BrowserDownloadManager
from .chromium_base import ChromiumBase, Timeout from .chromium_base import ChromiumBase, Timeout
from .chromium_driver import ChromiumDriver from .chromium_driver import ChromiumDriver
from .chromium_page import ChromiumPage from .chromium_page import ChromiumPage
@ -45,13 +46,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._response = None self._response = None
self._set = None self._set = None
self._screencast = None self._screencast = None
self._frames = {}
self._page = self
self._set_start_options(driver_or_options, session_or_options) self._set_start_options(driver_or_options, session_or_options)
self._set_runtime_settings() self._set_runtime_settings()
self._connect_browser() self._connect_browser()
self._create_session() self._create_session()
self._dl_mgr = BrowserDownloadManager(self)
t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit self.set.timeouts(implicit=timeout)
def _set_start_options(self, dr_opt, se_opt): def _set_start_options(self, dr_opt, se_opt):
"""处理两种模式的设置 """处理两种模式的设置

View File

@ -37,6 +37,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._DownloadKit: DownloadKit = ... self._DownloadKit: DownloadKit = ...
self._download_path: str = ... self._download_path: str = ...
self._tab_obj: ChromiumDriver = ... self._tab_obj: ChromiumDriver = ...
self._frames: dict = ...
def __call__(self, def __call__(self,
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],