From e5a2a254735cdf48afef21567be2bccb06569336 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 22 Oct 2023 10:00:53 +0800 Subject: [PATCH] =?UTF-8?q?get=5Fsrc()=E5=A2=9E=E5=8A=A0=E6=94=AF=E6=8C=81?= =?UTF-8?q?blob?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/_base/chromium_driver.py | 9 ++-- DrissionPage/_base/chromium_driver.pyi | 6 ++- DrissionPage/_configs/chromium_options.py | 6 +-- DrissionPage/_elements/chromium_element.py | 63 ++++++++++++++++------ DrissionPage/_pages/chromium_page.py | 4 +- DrissionPage/_units/download_manager.py | 13 +++-- DrissionPage/_units/download_manager.pyi | 5 +- 7 files changed, 68 insertions(+), 38 deletions(-) diff --git a/DrissionPage/_base/chromium_driver.py b/DrissionPage/_base/chromium_driver.py index cd89be0..94a9664 100644 --- a/DrissionPage/_base/chromium_driver.py +++ b/DrissionPage/_base/chromium_driver.py @@ -137,12 +137,9 @@ class ChromiumDriver(object): except Empty: continue - if event['method'] in self.event_handlers: - try: - self.event_handlers[event['method']](**event['params']) - except Exception as e: - raise - # raise RuntimeError(f"\n回调函数错误:\n{e}") + function = self.event_handlers.get(event['method']) + if function: + function(**event['params']) self.event_queue.task_done() diff --git a/DrissionPage/_base/chromium_driver.pyi b/DrissionPage/_base/chromium_driver.pyi index 9e233a0..aeba4f9 100644 --- a/DrissionPage/_base/chromium_driver.pyi +++ b/DrissionPage/_base/chromium_driver.pyi @@ -5,7 +5,9 @@ """ from queue import Queue from threading import Thread, Event -from typing import Union, Callable, Dict +from typing import Union, Callable, Dict, Optional + +from websocket import WebSocket class GenericAttr(object): @@ -24,7 +26,7 @@ class ChromiumDriver(object): has_alert: bool _websocket_url: str _cur_id: int - _ws = None + _ws: Optional[WebSocket] _recv_th: Thread _handle_event_th: Thread _stopped: Event diff --git a/DrissionPage/_configs/chromium_options.py b/DrissionPage/_configs/chromium_options.py index d49ebba..67dfe89 100644 --- a/DrissionPage/_configs/chromium_options.py +++ b/DrissionPage/_configs/chromium_options.py @@ -117,8 +117,7 @@ class ChromiumOptions(object): @debugger_address.setter def debugger_address(self, address): """设置浏览器地址,格式ip:port""" - address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') - self._debugger_address = address + self.set_debugger_address(address) @property def arguments(self): @@ -349,7 +348,8 @@ class ChromiumOptions(object): :param address: 浏览器地址 :return: 当前对象 """ - self.debugger_address = address + address = address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') + self._debugger_address = address return self def set_browser_path(self, path): diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index bb318a9..c52501f 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -454,31 +454,64 @@ class ChromiumElement(DrissionElement): while not self.run_js(js) and perf_counter() < end_time: sleep(.1) + src = self.attr('src') + is_blob = src.startswith('blob') result = None end_time = perf_counter() + timeout while perf_counter() < end_time: - src = self.prop('currentSrc') - if not src: - continue + if is_blob: + js = """ + function fetchData(url) { + return new Promise((resolve, reject) => { + var xhr = new XMLHttpRequest(); + xhr.responseType = 'blob'; + xhr.onload = function() { + var reader = new FileReader(); + reader.onloadend = function() {resolve(reader.result);} + reader.readAsDataURL(xhr.response); + }; + xhr.open('GET', url, true); + xhr.send(); + }); + } + """ + try: + result = self.page.run_js(js, src) + break + except: + continue - node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] - frame = node.get('frameId', None) - frame = frame or self.page._target_id + else: + src = self.prop('currentSrc') + if not src: + continue - try: - result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) - break - except CDPError: - sleep(.1) + node = self.page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] + frame = node.get('frameId', None) + frame = frame or self.page._target_id + + try: + result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) + break + except CDPError: + sleep(.1) if not result: return None - if result['base64Encoded'] and base64_to_bytes: - from base64 import b64decode - return b64decode(result['content']) + if is_blob: + if base64_to_bytes: + from base64 import b64decode + return b64decode(result.split(',', 1)[1]) + else: + return result + else: - return result['content'] + if result['base64Encoded'] and base64_to_bytes: + from base64 import b64decode + return b64decode(result['content']) + else: + return result['content'] def save(self, path=None, name=None, timeout=None): """保存图片或其它有src属性的元素的资源 diff --git a/DrissionPage/_pages/chromium_page.py b/DrissionPage/_pages/chromium_page.py index efea162..6c6ba37 100644 --- a/DrissionPage/_pages/chromium_page.py +++ b/DrissionPage/_pages/chromium_page.py @@ -49,11 +49,11 @@ class ChromiumPage(ChromiumBase): # 接收浏览器地址和端口 elif isinstance(addr_driver_opts, str): self._driver_options = ChromiumOptions() - self._driver_options.debugger_address = addr_driver_opts + self._driver_options.set_debugger_address(addr_driver_opts) elif isinstance(addr_driver_opts, ChromiumDriver): self._driver_options = ChromiumOptions(False) - self._driver_options.debugger_address = addr_driver_opts.address + self._driver_options.set_debugger_address(addr_driver_opts.address) self._driver = addr_driver_opts else: diff --git a/DrissionPage/_units/download_manager.py b/DrissionPage/_units/download_manager.py index 0a0518c..029ccb9 100644 --- a/DrissionPage/_units/download_manager.py +++ b/DrissionPage/_units/download_manager.py @@ -24,7 +24,6 @@ class BrowserDownloadManager(object): t = TabDownloadSettings(self._page.tab_id) t.path = self._page.download_path self._missions = {} # {guid: DownloadMission} - self._tabs_settings = {self._page.tab_id: t} # {tab_id: TabDownloadSettings} self._tab_missions = {} # {tab_id: DownloadMission} self._flags = {} # {tab_id: [bool, DownloadMission]} @@ -44,7 +43,7 @@ class BrowserDownloadManager(object): :param path: 下载路径 :return: None """ - self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).path = str(Path(path).absolute()) + TabDownloadSettings(tab_id).path = str(Path(path).absolute()) if tab_id == self._page.tab_id: self._browser.run_cdp('Browser.setDownloadBehavior', downloadPath=str(Path(path).absolute()), behavior='allowAndName', eventsEnabled=True) @@ -55,7 +54,7 @@ class BrowserDownloadManager(object): :param rename: 文件名 :return: None """ - self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).rename = rename + TabDownloadSettings(tab_id).rename = rename def set_file_exists(self, tab_id, mode): """设置某个tab下载文件重名时执行的策略 @@ -63,7 +62,7 @@ class BrowserDownloadManager(object): :param mode: 下载路径 :return: None """ - self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).when_file_exists = mode + TabDownloadSettings(tab_id).when_file_exists = mode def set_flag(self, tab_id, flag): """设置某个tab的重命名文件名 @@ -125,7 +124,6 @@ class BrowserDownloadManager(object): :param tab_id: 标签页id :return: None """ - self._tabs_settings.pop(tab_id) self._tab_missions.pop(tab_id) self._flags.pop(tab_id) TabDownloadSettings.TABS.pop(tab_id) @@ -135,7 +133,7 @@ class BrowserDownloadManager(object): guid = kwargs['guid'] tab_id = self._browser._frames.get(kwargs['frameId'], self._page.tab_id) - settings = TabDownloadSettings(tab_id) + settings = TabDownloadSettings(tab_id if tab_id in TabDownloadSettings.TABS else self._page.tab_id) if settings.rename: tmp = kwargs['suggestedFilename'].rsplit('.', 1) ext_name = tmp[-1] if len(tmp) > 1 else '' @@ -165,7 +163,8 @@ class BrowserDownloadManager(object): else: self._tab_missions.setdefault(tab_id, []).append(guid) - self._flags[tab_id] = m + if self.get_flag(tab_id) is not None: + self._flags[tab_id] = m def _onDownloadProgress(self, **kwargs): """下载状态变化时执行""" diff --git a/DrissionPage/_units/download_manager.pyi b/DrissionPage/_units/download_manager.pyi index c4d640e..5153aa6 100644 --- a/DrissionPage/_units/download_manager.pyi +++ b/DrissionPage/_units/download_manager.pyi @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, Optional, Union +from typing import Dict, Optional, Union, Literal from .._base.browser import Browser from .._pages.chromium_page import ChromiumPage @@ -10,7 +10,6 @@ class BrowserDownloadManager(object): _page: ChromiumPage = ... _missions: Dict[str, DownloadMission] = ... _tab_missions: dict = ... - _tabs_settings: Dict[str, TabDownloadSettings] = ... _flags: dict = ... def __init__(self, browser: Browser): ... @@ -22,7 +21,7 @@ class BrowserDownloadManager(object): def set_rename(self, tab_id: str, rename: str) -> None: ... - def set_file_exists(self, tab_id: str, mode: str) -> None: ... + def set_file_exists(self, tab_id: str, mode: Literal['rename', 'skip', 'overwrite']) -> None: ... def set_flag(self, tab_id: str, flag: Optional[bool, DownloadMission]) -> None: ...