mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
继续改进下载功能
This commit is contained in:
parent
c450a0c452
commit
0ff5b47a4c
@ -143,17 +143,7 @@ class BrowserDownloadManager(object):
|
|||||||
def _onDownloadWillBegin(self, **kwargs):
|
def _onDownloadWillBegin(self, **kwargs):
|
||||||
"""用于获取弹出新标签页触发的下载任务"""
|
"""用于获取弹出新标签页触发的下载任务"""
|
||||||
guid = kwargs['guid']
|
guid = kwargs['guid']
|
||||||
|
tab_id = self._page._frames.get(kwargs['frameId'], self._page.tab_id)
|
||||||
end = perf_counter() + 2
|
|
||||||
while perf_counter() < end:
|
|
||||||
tab_id = self._guid_and_tab.get(guid, None)
|
|
||||||
if tab_id:
|
|
||||||
# print('拿到')
|
|
||||||
break
|
|
||||||
sleep(.005)
|
|
||||||
else:
|
|
||||||
# print('没拿到')
|
|
||||||
tab_id = self._page.tab_id
|
|
||||||
|
|
||||||
settings = TabDownloadSettings(tab_id)
|
settings = TabDownloadSettings(tab_id)
|
||||||
if settings.rename:
|
if settings.rename:
|
||||||
|
@ -7,6 +7,7 @@ from base64 import b64decode
|
|||||||
from json import loads, JSONDecodeError
|
from json import loads, JSONDecodeError
|
||||||
from os.path import sep
|
from os.path import sep
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from re import findall
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from time import perf_counter, sleep, time
|
from time import perf_counter, sleep, time
|
||||||
|
|
||||||
@ -116,53 +117,60 @@ class ChromiumBase(BasePage):
|
|||||||
self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
|
self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
|
||||||
self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired)
|
self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired)
|
||||||
self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated)
|
self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated)
|
||||||
self._tab_obj.set_listener('Page.downloadWillBegin', self._onDownloadWillBegin)
|
self._tab_obj.set_listener('Page.frameAttached', self._onFrameAttached)
|
||||||
|
self._tab_obj.set_listener('Page.frameDetached', self._onFrameDetached)
|
||||||
|
|
||||||
def _get_document(self):
|
def _get_document(self):
|
||||||
"""刷新cdp使用的document数据"""
|
"""刷新cdp使用的document数据"""
|
||||||
if not self._is_reading:
|
if self._is_reading:
|
||||||
self._is_reading = True
|
return
|
||||||
|
|
||||||
if self._debug:
|
self._is_reading = True
|
||||||
print('获取document')
|
|
||||||
|
if self._debug:
|
||||||
|
print('获取document')
|
||||||
|
if self._debug_recorder:
|
||||||
|
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
|
||||||
|
|
||||||
|
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉
|
||||||
|
self._wait_loaded()
|
||||||
|
except TabClosedError:
|
||||||
|
return
|
||||||
|
|
||||||
|
end_time = perf_counter() + 10
|
||||||
|
while perf_counter() < end_time:
|
||||||
|
try:
|
||||||
|
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||||
|
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
|
||||||
if self._debug_recorder:
|
if self._debug_recorder:
|
||||||
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
|
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}'))
|
||||||
|
break
|
||||||
|
|
||||||
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉
|
except CDPError as e:
|
||||||
self._wait_loaded()
|
err = e
|
||||||
except TabClosedError:
|
if self._debug:
|
||||||
return
|
print('重试获取document')
|
||||||
|
|
||||||
end_time = perf_counter() + 10
|
|
||||||
while perf_counter() < end_time:
|
|
||||||
try:
|
|
||||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
|
||||||
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
|
|
||||||
if self._debug_recorder:
|
if self._debug_recorder:
|
||||||
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}'))
|
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
|
||||||
break
|
|
||||||
|
|
||||||
except CDPError as e:
|
sleep(.1)
|
||||||
err = e
|
|
||||||
if self._debug:
|
|
||||||
print('重试获取document')
|
|
||||||
if self._debug_recorder:
|
|
||||||
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
|
|
||||||
|
|
||||||
sleep(.1)
|
else:
|
||||||
|
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \
|
||||||
|
f'报告网址:https://gitee.com/g1879/DrissionPage/issues'
|
||||||
|
raise GetDocumentError(txt)
|
||||||
|
|
||||||
else:
|
if self._debug:
|
||||||
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \
|
print('获取document结束')
|
||||||
f'报告网址:https://gitee.com/g1879/DrissionPage/issues'
|
if self._debug_recorder:
|
||||||
raise GetDocumentError(txt)
|
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
|
||||||
|
|
||||||
if self._debug:
|
r = self.run_cdp('Page.getFrameTree')
|
||||||
print('获取document结束')
|
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||||
if self._debug_recorder:
|
self.browser._frames[i] = self.tab_id
|
||||||
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
|
|
||||||
|
|
||||||
self._is_loading = False
|
self._is_loading = False
|
||||||
self._is_reading = False
|
self._is_reading = False
|
||||||
|
|
||||||
def _wait_loaded(self, timeout=None):
|
def _wait_loaded(self, timeout=None):
|
||||||
"""等待页面加载完成,超时触发停止加载
|
"""等待页面加载完成,超时触发停止加载
|
||||||
@ -193,8 +201,18 @@ class ChromiumBase(BasePage):
|
|||||||
self.stop_loading()
|
self.stop_loading()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _onFrameDetached(self, **kwargs):
|
||||||
|
try:
|
||||||
|
self.browser._frames.pop(kwargs['frameId'])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _onFrameAttached(self, **kwargs):
|
||||||
|
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||||
|
|
||||||
def _onFrameStartedLoading(self, **kwargs):
|
def _onFrameStartedLoading(self, **kwargs):
|
||||||
"""页面开始加载时执行"""
|
"""页面开始加载时执行"""
|
||||||
|
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||||
if kwargs['frameId'] == self._target_id:
|
if kwargs['frameId'] == self._target_id:
|
||||||
self._is_loading = True
|
self._is_loading = True
|
||||||
|
|
||||||
@ -205,6 +223,7 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
def _onFrameStoppedLoading(self, **kwargs):
|
def _onFrameStoppedLoading(self, **kwargs):
|
||||||
"""页面加载完成后执行"""
|
"""页面加载完成后执行"""
|
||||||
|
self.browser._frames[kwargs['frameId']] = self.tab_id
|
||||||
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
|
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading:
|
||||||
if self._debug:
|
if self._debug:
|
||||||
print('页面停止加载 FrameStoppedLoading')
|
print('页面停止加载 FrameStoppedLoading')
|
||||||
@ -248,11 +267,6 @@ class ChromiumBase(BasePage):
|
|||||||
self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False)
|
self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False)
|
||||||
self._upload_list = None
|
self._upload_list = None
|
||||||
|
|
||||||
def _onDownloadWillBegin(self, **kwargs):
|
|
||||||
"""下载即将开始时执行"""
|
|
||||||
print('aaa')
|
|
||||||
self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid'])
|
|
||||||
|
|
||||||
def __call__(self, loc_or_str, timeout=None):
|
def __call__(self, loc_or_str, timeout=None):
|
||||||
"""在内部查找元素
|
"""在内部查找元素
|
||||||
例:ele = page('@id=ele_id')
|
例:ele = page('@id=ele_id')
|
||||||
|
@ -62,6 +62,10 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
def _wait_loaded(self, timeout: float = None) -> bool: ...
|
def _wait_loaded(self, timeout: float = None) -> bool: ...
|
||||||
|
|
||||||
|
def _onFrameDetached(self, **kwargs) -> None: ...
|
||||||
|
|
||||||
|
def _onFrameAttached(self, **kwargs) -> None: ...
|
||||||
|
|
||||||
def _onFrameStartedLoading(self, **kwargs): ...
|
def _onFrameStartedLoading(self, **kwargs): ...
|
||||||
|
|
||||||
def _onFrameStoppedLoading(self, **kwargs): ...
|
def _onFrameStoppedLoading(self, **kwargs): ...
|
||||||
@ -74,7 +78,7 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
def _onFileChooserOpened(self, **kwargs): ...
|
def _onFileChooserOpened(self, **kwargs): ...
|
||||||
|
|
||||||
def _onDownloadWillBegin(self, **kwargs): ...
|
# def _onDownloadWillBegin(self, **kwargs): ...
|
||||||
|
|
||||||
def _set_start_options(self, address, none) -> None: ...
|
def _set_start_options(self, address, none) -> None: ...
|
||||||
|
|
||||||
|
@ -127,37 +127,39 @@ class ChromiumFrame(ChromiumBase):
|
|||||||
|
|
||||||
def _get_new_document(self):
|
def _get_new_document(self):
|
||||||
"""刷新cdp使用的document数据"""
|
"""刷新cdp使用的document数据"""
|
||||||
if not self._is_reading:
|
if self._is_reading:
|
||||||
self._is_reading = True
|
return
|
||||||
|
|
||||||
if self._debug:
|
self._is_reading = True
|
||||||
print('---获取document')
|
|
||||||
|
|
||||||
end_time = perf_counter() + 3
|
if self._debug:
|
||||||
while self.is_alive and perf_counter() < end_time:
|
print('---获取document')
|
||||||
try:
|
|
||||||
if self._is_diff_domain is False:
|
|
||||||
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
|
|
||||||
self.doc_ele = ChromiumElement(self._target_page,
|
|
||||||
backend_id=node['contentDocument']['backendNodeId'])
|
|
||||||
|
|
||||||
else:
|
end_time = perf_counter() + 3
|
||||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
while self.is_alive and perf_counter() < end_time:
|
||||||
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
try:
|
||||||
|
if self._is_diff_domain is False:
|
||||||
|
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
|
||||||
|
self.doc_ele = ChromiumElement(self._target_page,
|
||||||
|
backend_id=node['contentDocument']['backendNodeId'])
|
||||||
|
|
||||||
break
|
else:
|
||||||
|
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
||||||
|
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
||||||
|
|
||||||
except Exception:
|
break
|
||||||
sleep(.1)
|
|
||||||
|
|
||||||
# else:
|
except Exception:
|
||||||
# raise RuntimeError('获取document失败。')
|
sleep(.1)
|
||||||
|
|
||||||
if self._debug:
|
# else:
|
||||||
print('---获取document结束')
|
# raise RuntimeError('获取document失败。')
|
||||||
|
|
||||||
self._is_loading = False
|
if self._debug:
|
||||||
self._is_reading = False
|
print('---获取document结束')
|
||||||
|
|
||||||
|
self._is_loading = False
|
||||||
|
self._is_reading = False
|
||||||
|
|
||||||
def _onFrameNavigated(self, **kwargs):
|
def _onFrameNavigated(self, **kwargs):
|
||||||
"""页面跳转时触发"""
|
"""页面跳转时触发"""
|
||||||
|
@ -26,8 +26,9 @@ class ChromiumPage(ChromiumBase):
|
|||||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||||
:param timeout: 超时时间
|
:param timeout: 超时时间
|
||||||
"""
|
"""
|
||||||
super().__init__(addr_driver_opts, tab_id)
|
|
||||||
self._page = self
|
self._page = self
|
||||||
|
self._frames = {}
|
||||||
|
super().__init__(addr_driver_opts, tab_id)
|
||||||
self._dl_mgr = BrowserDownloadManager(self)
|
self._dl_mgr = BrowserDownloadManager(self)
|
||||||
self.set.timeouts(implicit=timeout)
|
self.set.timeouts(implicit=timeout)
|
||||||
|
|
||||||
@ -93,7 +94,7 @@ class ChromiumPage(ChromiumBase):
|
|||||||
self._first_run = False
|
self._first_run = False
|
||||||
|
|
||||||
def _page_init(self):
|
def _page_init(self):
|
||||||
"""页面相关设置"""
|
"""浏览器相关设置"""
|
||||||
u = f'http://{self.address}/json/version'
|
u = f'http://{self.address}/json/version'
|
||||||
ws = self._control_session.get(u).json()['webSocketDebuggerUrl']
|
ws = self._control_session.get(u).json()['webSocketDebuggerUrl']
|
||||||
self._control_session.get(u, headers={'Connection': 'close'})
|
self._control_session.get(u, headers={'Connection': 'close'})
|
||||||
|
@ -27,6 +27,7 @@ class ChromiumPage(ChromiumBase):
|
|||||||
self._alert: Alert = ...
|
self._alert: Alert = ...
|
||||||
self._browser_driver: ChromiumDriver = ...
|
self._browser_driver: ChromiumDriver = ...
|
||||||
self._rect: ChromiumTabRect = ...
|
self._rect: ChromiumTabRect = ...
|
||||||
|
self._frames: dict = ...
|
||||||
|
|
||||||
def _connect_browser(self,
|
def _connect_browser(self,
|
||||||
addr_driver_opts: Union[str, ChromiumDriver] = None,
|
addr_driver_opts: Union[str, ChromiumDriver] = None,
|
||||||
|
@ -355,7 +355,7 @@ class WebPageSetter(ChromiumPageSetter):
|
|||||||
self._chromium_setter.user_agent(ua, platform)
|
self._chromium_setter.user_agent(ua, platform)
|
||||||
|
|
||||||
|
|
||||||
class WebPageTabSetter(ChromiumBaseSetter):
|
class WebPageTabSetter(TabSetter):
|
||||||
def __init__(self, page):
|
def __init__(self, page):
|
||||||
super().__init__(page)
|
super().__init__(page)
|
||||||
self._session_setter = SessionPageSetter(self._page)
|
self._session_setter = SessionPageSetter(self._page)
|
||||||
|
@ -129,7 +129,7 @@ class WebPageSetter(ChromiumPageSetter):
|
|||||||
def cookies(self, cookies) -> None: ...
|
def cookies(self, cookies) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
class WebPageTabSetter(ChromiumBaseSetter):
|
class WebPageTabSetter(TabSetter):
|
||||||
_page: WebPage = ...
|
_page: WebPage = ...
|
||||||
_session_setter: SessionPageSetter = ...
|
_session_setter: SessionPageSetter = ...
|
||||||
_chromium_setter: ChromiumBaseSetter = ...
|
_chromium_setter: ChromiumBaseSetter = ...
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
from .base import BasePage
|
from .base import BasePage
|
||||||
|
from .browser_download_manager import BrowserDownloadManager
|
||||||
from .chromium_base import ChromiumBase, Timeout
|
from .chromium_base import ChromiumBase, Timeout
|
||||||
from .chromium_driver import ChromiumDriver
|
from .chromium_driver import ChromiumDriver
|
||||||
from .chromium_page import ChromiumPage
|
from .chromium_page import ChromiumPage
|
||||||
@ -45,13 +46,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
|||||||
self._response = None
|
self._response = None
|
||||||
self._set = None
|
self._set = None
|
||||||
self._screencast = None
|
self._screencast = None
|
||||||
|
self._frames = {}
|
||||||
|
self._page = self
|
||||||
|
|
||||||
self._set_start_options(driver_or_options, session_or_options)
|
self._set_start_options(driver_or_options, session_or_options)
|
||||||
self._set_runtime_settings()
|
self._set_runtime_settings()
|
||||||
self._connect_browser()
|
self._connect_browser()
|
||||||
self._create_session()
|
self._create_session()
|
||||||
|
self._dl_mgr = BrowserDownloadManager(self)
|
||||||
t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit
|
self.set.timeouts(implicit=timeout)
|
||||||
|
|
||||||
def _set_start_options(self, dr_opt, se_opt):
|
def _set_start_options(self, dr_opt, se_opt):
|
||||||
"""处理两种模式的设置
|
"""处理两种模式的设置
|
||||||
|
@ -37,6 +37,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
|||||||
self._DownloadKit: DownloadKit = ...
|
self._DownloadKit: DownloadKit = ...
|
||||||
self._download_path: str = ...
|
self._download_path: str = ...
|
||||||
self._tab_obj: ChromiumDriver = ...
|
self._tab_obj: ChromiumDriver = ...
|
||||||
|
self._frames: dict = ...
|
||||||
|
|
||||||
def __call__(self,
|
def __call__(self,
|
||||||
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
|
loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user