diff --git a/DrissionPage/browser_download_manager.py b/DrissionPage/browser_download_manager.py new file mode 100644 index 0000000..4614ac2 --- /dev/null +++ b/DrissionPage/browser_download_manager.py @@ -0,0 +1,276 @@ +# -*- coding:utf-8 -*- +from pathlib import Path +from shutil import move +from threading import Lock +from time import sleep, perf_counter + +from .commons.tools import get_usable_path + + +class BrowserDownloadManager(object): + BROWSERS = {} + + def __new__(cls, page): + """ + :param page: ChromiumPage对象 + """ + if page.browser_driver.id in cls.BROWSERS: + return cls.BROWSERS[page.browser_driver.id] + return object.__new__(cls) + + def __init__(self, page): + """ + :param page: ChromiumPage对象 + """ + if page.browser_driver.id in BrowserDownloadManager.BROWSERS: + return + + self._page = page + self._lock = Lock() + self._when_download_file_exists = 'rename' + + t = TabDownloadSettings(page.tab_id) + t.path = page.download_path + self._tabs_settings = {page.tab_id: t} # {tab_id: TabDownloadSettings} + self._missions = {} # {guid: DownloadMission} + self._tab_missions = {} # {tab_id: DownloadMission} + self._guid_and_tab = {} # 记录guid在哪个tab + self._flags = {} # {tab_id: bool, DownloadMission} + + self._page.browser_driver.set_listener('Browser.downloadProgress', self._onDownloadProgress) + self._page.browser_driver.set_listener('Browser.downloadWillBegin', self._onDownloadWillBegin) + + BrowserDownloadManager.BROWSERS[page.browser_driver.id] = self + + @property + def missions(self): + """返回所有未完成的下载任务""" + return self._missions + + def set_path(self, tab_id, path): + """设置某个tab的下载路径 + :param tab_id: tab id + :param path: 下载路径 + :return: None + """ + self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).path = str(Path(path).absolute()) + + def set_rename(self, tab_id, rename): + """设置某个tab的重命名文件名 + :param tab_id: tab id + :param rename: 文件名 + :return: None + """ + self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).rename = rename + + def set_file_exists(self, tab_id, mode): + """设置某个tab下载文件重名时执行的策略 + :param tab_id: tab id + :param mode: 下载路径 + :return: None + """ + self._tabs_settings.setdefault(tab_id, TabDownloadSettings(tab_id)).when_file_exists = mode + + def set_flag(self, tab_id, flag): + """设置某个tab的重命名文件名 + :param tab_id: tab id + :param flag: 等待标志 + :return: None + """ + self._flags[tab_id] = flag + + def get_flag(self, tab_id): + """获取tab下载等待标记 + :param tab_id: tab id + :return: 任务对象或False + """ + return self._flags.get(tab_id, None) + + def get_tab_missions(self, tab_id): + """获取某个tab正在下载的任务 + :param tab_id: + :return: 下载任务组成的列表 + """ + return self._tab_missions.get(tab_id, []) + + def set_mission(self, tab_id, guid): + """绑定tab和下载任务信息 + :param tab_id: tab id + :param guid: 下载任务id + :return: None + """ + self._guid_and_tab[guid] = tab_id + + def set_done(self, mission, state, cancel=False, final_path=None): + """设置任务结束 + :param mission: 任务对象 + :param state: 任务状态 + :param cancel: 是否取消 + :param final_path: 最终路径 + :return: None + """ + mission.state = state + mission.final_path = final_path + if cancel: + self._page.browser_driver.call_method('Browser.cancelDownload', guid=mission.id) + if mission.final_path: + Path(mission.final_path).unlink(True) + self._missions.pop(mission.id) + + def _onDownloadWillBegin(self, **kwargs): + """用于获取弹出新标签页触发的下载任务""" + guid = kwargs['guid'] + end = perf_counter() + .3 + while perf_counter() < end: + tab_id = self._guid_and_tab.get(guid, None) + if tab_id: + break + sleep(.005) + else: + tab_id = self._page.tab_id + + settings = TabDownloadSettings(tab_id) + if settings.rename: + tmp = kwargs['suggestedFilename'].rsplit('.', 1) + ext_name = tmp[-1] if len(tmp) > 1 else '' + tmp = settings.rename.rsplit('.', 1) + ext_rename = tmp[-1] if len(tmp) > 1 else '' + name = settings.rename if ext_rename == ext_name else f'{settings.rename}.{ext_name}' + settings.rename = None + + else: + name = kwargs['suggestedFilename'] + + skip = False + goal_path = Path(settings.path) / name + if goal_path.exists(): + if settings.when_file_exists == 'skip': + skip = True + elif settings.when_file_exists == 'overwrite': + goal_path.unlink() + + m = DownloadMission(self, tab_id, guid, settings.path, name, kwargs['url']) + self._missions[guid] = m + + if self.get_flag(tab_id) is False: # 取消该任务 + self.set_done(m, 'canceled', True) + elif skip: + self.set_done(m, 'skipped', True) + + self._flags[tab_id] = m + + def _onDownloadProgress(self, **kwargs): + """下载状态变化时执行""" + if kwargs['guid'] in self._missions: + with self._lock: + if kwargs['guid'] in self._missions: + mission = self._missions[kwargs['guid']] + if kwargs['state'] == 'inProgress': + mission.state = 'running' + mission.received_bytes = kwargs['receivedBytes'] + mission.total_bytes = kwargs['totalBytes'] + + elif kwargs['state'] == 'completed': + mission.received_bytes = kwargs['receivedBytes'] + mission.total_bytes = kwargs['totalBytes'] + form_path = f'{self._page.download_path}\\{mission.id}' + to_path = str(get_usable_path(f'{mission.path}\\{mission.name}')) + move(form_path, to_path) + self.set_done(mission, 'completed', final_path=to_path) + + else: + self.set_done(mission, 'canceled') + + +class TabDownloadSettings(object): + TABS = {} + + def __new__(cls, tab_id): + """ + :param tab_id: tab id + """ + if tab_id in cls.TABS: + return cls.TABS[tab_id] + return object.__new__(cls) + + def __init__(self, tab_id): + """ + :param tab_id: tab id + """ + self.tab_id = tab_id + self.rename = None + self.path = '' + self.when_file_exists = 'rename' + + +class DownloadMission(object): + def __init__(self, mgr, tab_id, _id, path, name, url): + self._mgr = mgr + self.url = url + self.tab_id = tab_id + self.id = _id + self.path = path + self.name = name + self.state = 'waiting' + self.total_bytes = None + self.received_bytes = 0 + self.final_path = None + + def __repr__(self): + # return f'' + return f'' + + @property + def rate(self): + """以百分比形式返回下载进度""" + return round((self.received_bytes / self.total_bytes) * 100, 2) if self.total_bytes else None + + def cancel(self): + """取消该任务,如任务已完成,删除已下载的文件""" + self._mgr.set_done(self, state='canceled', cancel=True) + + def wait(self, show=True, timeout=None, cancel_if_timeout=True): + """等待任务结束 + :param show: 是否显示下载信息 + :param timeout: 超时时间,为None则无限等待 + :param cancel_if_timeout: 超时时是否取消任务 + :return: 等待成功返回完整路径,否则返回False + """ + if show: + print(f'url:{self.url}') + t2 = perf_counter() + while self.name is None and perf_counter() - t2 < 4: + sleep(0.01) + print(f'文件名:{self.name}') + print(f'目标路径:{self.path}') + + if timeout is None: + while self.id in self._mgr.missions: + if show: + print(f'\r{self.rate}% ', end='') + sleep(.2) + + else: + running = True + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if show: + print(f'\r{self.rate}% ', end='') + if self.id not in self._mgr.missions: + running = False + break + sleep(.2) + + if running and cancel_if_timeout: + self.cancel() + + if show: + if self.state == 'completed': + print(f'下载完成 {self.final_path}') + elif self.state == 'canceled': + print(f'下载取消') + elif self.state == 'skipped': + print(f'已跳过') + print() + + return self.final_path if self.final_path else False diff --git a/DrissionPage/browser_download_manager.pyi b/DrissionPage/browser_download_manager.pyi new file mode 100644 index 0000000..eb21561 --- /dev/null +++ b/DrissionPage/browser_download_manager.pyi @@ -0,0 +1,77 @@ +from pathlib import Path +from threading import Lock +from typing import Dict, Optional, Union + +from chromium_base import ChromiumBase +from chromium_page import ChromiumPage + + +class BrowserDownloadManager(object): + BROWSERS: Dict[str, BrowserDownloadManager] = ... + _page: ChromiumPage = ... + _lock: Lock = ... + _missions: Dict[str, DownloadMission] = ... + _tab_missions: dict = ... + _tabs_settings: Dict[str, TabDownloadSettings] = ... + _guid_and_tab: Dict[str, str] = ... + _flags: dict = ... + + def __new__(cls, page: ChromiumPage): ... + + def __init__(self, page: ChromiumPage): ... + + @property + def missions(self) -> Dict[str, DownloadMission]: ... + + def set_path(self, tab_id: str, path: Union[Path, str]) -> None: ... + + def set_rename(self, tab_id: str, rename: str) -> None: ... + + def set_file_exists(self, tab_id: str, mode: str) -> None: ... + + def set_flag(self, tab_id: str, flag: Optional[bool, DownloadMission]) -> None: ... + + def get_flag(self, tab_id: str) -> Optional[bool, DownloadMission]: ... + + def get_tab_missions(self, tab_id: str) -> list: ... + + def set_mission(self, tab_id: str, guid: str) -> None: ... + + def set_done(self, mission: DownloadMission, state: str, cancel: bool = False, final_path: str = None) -> None: ... + + def _onDownloadWillBegin(self, **kwargs) -> None: ... + + def _onDownloadProgress(self, **kwargs) -> None: ... + + +class TabDownloadSettings(object): + TABS: dict = ... + tab_id: str = ... + waiting_flag: Optional[bool, dict] = ... + rename: Optional[str] = ... + path: Optional[str] = ... + when_file_exists: str = ... + + def __init__(self, tab_id: str): ... + + +class DownloadMission(object): + tab: ChromiumBase = ... + _mgr: BrowserDownloadManager = ... + url: str = ... + id: str = ... + path: str = ... + name: str = ... + state: str = ... + total_bytes: Optional[int] = ... + received_bytes: int = ... + final_path: Optional[str] = ... + + def __init__(self, mgr: BrowserDownloadManager, tab_id: str, _id: str, path: str, name: str, url: str): ... + + @property + def rate(self) -> float: ... + + def cancel(self) -> None: ... + + def wait(self, show: bool = True, timeout=None, cancel_if_timeout=True) -> Union[bool, str]: ... diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 80f2d13..1fb3cc2 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -108,7 +108,6 @@ class ChromiumBase(BasePage): self._is_loading = True self._tab_obj = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) - self._tab_obj.start() self._tab_obj.call_method('DOM.enable') self._tab_obj.call_method('Page.enable') @@ -251,7 +250,7 @@ class ChromiumBase(BasePage): def _onDownloadWillBegin(self, **kwargs): """下载即将开始时执行""" - self._page._dl_mgr.set_mission(self.tab_id, kwargs['guid']) + self.browser._dl_mgr.set_mission(self.tab_id, kwargs['guid']) def __call__(self, loc_or_str, timeout=None): """在内部查找元素 @@ -263,7 +262,7 @@ class ChromiumBase(BasePage): return self.ele(loc_or_str, timeout) @property - def page(self): + def browser(self): return self._page @property @@ -324,7 +323,7 @@ class ChromiumBase(BasePage): @property def _target_id(self): """返回当前标签页id""" - return self.driver.id if self.driver.status == 'started' else '' + return self.driver.id if not self.driver._stopped.is_set() else '' @property def ready_state(self): diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 82ed6a2..db2da7e 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -83,6 +83,9 @@ class ChromiumBase(BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement], timeout: float = None) -> ChromiumElement: ... + @property + def browser(self) -> ChromiumPage: ... + @property def title(self) -> str: ... diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index b638a0e..611a767 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -12,10 +12,6 @@ from websocket import WebSocketTimeoutException, WebSocketException, WebSocketCo class ChromiumDriver(object): - _INITIAL_ = 'initial' - _STARTED_ = 'started' - _STOPPED_ = 'stopped' - def __init__(self, tab_id, tab_type, address): """ :param tab_id: 标签页id @@ -38,13 +34,13 @@ class ChromiumDriver(object): self._handle_event_th.daemon = True self._stopped = Event() - self._started = False - self.status = self._INITIAL_ self.event_handlers = {} self.method_results = {} self.event_queue = Queue() + self.start() + def _send(self, message, timeout=None): """发送信息到浏览器,并返回浏览器返回的信息 :param message: 发送给浏览器的数据 @@ -105,8 +101,8 @@ class ChromiumDriver(object): while not self._stopped.is_set(): try: self._ws.settimeout(1) - message_json = self._ws.recv() - mes = loads(message_json) + msg_json = self._ws.recv() + msg = loads(msg_json) except WebSocketTimeoutException: continue except (WebSocketException, OSError, WebSocketConnectionClosedException): @@ -114,24 +110,23 @@ class ChromiumDriver(object): return if self._debug: - if self._debug is True or 'id' in mes or (isinstance(self._debug, str) - and mes.get('method', '').startswith(self._debug)): - print(f'<收 {message_json}') + if self._debug is True or 'id' in msg or (isinstance(self._debug, str) + and msg.get('method', '').startswith(self._debug)): + print(f'<收 {msg_json}') elif isinstance(self._debug, (list, tuple, set)): for m in self._debug: - if mes.get('method', '').startswith(m): - print(f'<收 {message_json}') + if msg.get('method', '').startswith(m): + print(f'<收 {msg_json}') break - if "method" in mes: - self.event_queue.put(mes) + if "method" in msg: + self.event_queue.put(msg) - elif "id" in mes: - if mes["id"] in self.method_results: - self.method_results[mes['id']].put(mes) + elif msg.get('id') in self.method_results: + self.method_results[msg['id']].put(msg) elif self._debug: - print(f'未知信息:{mes}') + print(f'未知信息:{msg}') def _handle_event_loop(self): """当接收到浏览器信息,执行已绑定的方法""" @@ -157,10 +152,6 @@ class ChromiumDriver(object): :param kwargs: cdp参数 :return: 执行结果 """ - if not self._started: - self.start() - # raise RuntimeError("不能在启动前调用方法。") - if self._stopped.is_set(): return {'error': 'tab closed', 'type': 'tab_closed'} @@ -178,13 +169,6 @@ class ChromiumDriver(object): def start(self): """启动连接""" - if self._started: - return False - if not self._websocket_url: - raise RuntimeError("已存在另一个连接。") - - self._started = True - self.status = self._STARTED_ self._stopped.clear() self._ws = create_connection(self._websocket_url, enable_multithread=True) self._recv_th.start() @@ -195,10 +179,7 @@ class ChromiumDriver(object): """中断连接""" if self._stopped.is_set(): return False - if not self._started: - return True - self.status = self._STOPPED_ self._stopped.set() if self._ws: self._ws.close() @@ -212,22 +193,12 @@ class ChromiumDriver(object): """绑定cdp event和回调方法 :param event: cdp event :param callback: 绑定到cdp event的回调方法 - :return: 回调方法 + :return: None """ - if not callback: - return self.event_handlers.pop(event, None) - if not callable(callback): - raise RuntimeError("方法不能调用。") - - self.event_handlers[event] = callback - return True - - def get_listener(self, event): - """获取cdp event对应的回调方法 - :param event: cdp event - :return: 回调方法 - """ - return self.event_handlers.get(event, None) + if callback: + self.event_handlers[event] = callback + else: + self.event_handlers.pop(event, None) def __str__(self): return f"" @@ -246,8 +217,8 @@ class BrowserDriver(ChromiumDriver): def __init__(self, tab_id, tab_type, address): if tab_id in BrowserDriver.BROWSERS: return - super().__init__(tab_id, tab_type, address) BrowserDriver.BROWSERS[tab_id] = self + super().__init__(tab_id, tab_type, address) def __repr__(self): return f"" diff --git a/DrissionPage/chromium_driver.pyi b/DrissionPage/chromium_driver.pyi index 152d4c4..9e233a0 100644 --- a/DrissionPage/chromium_driver.pyi +++ b/DrissionPage/chromium_driver.pyi @@ -17,9 +17,6 @@ class GenericAttr(object): class ChromiumDriver(object): - _INITIAL_: str - _STARTED_: str - _STOPPED_: str id: str address: str type: str @@ -31,8 +28,6 @@ class ChromiumDriver(object): _recv_th: Thread _handle_event_th: Thread _stopped: Event - _started: bool - status: str event_handlers: dict method_results: dict event_queue: Queue @@ -53,9 +48,7 @@ class ChromiumDriver(object): def stop(self) -> bool: ... - def set_listener(self, event: str, callback: Union[Callable, None]) -> Union[Callable, None, bool]: ... - - def get_listener(self, event: str) -> Union[Callable, None]: ... + def set_listener(self, event: str, callback: Union[Callable, None]) -> None: ... def __str__(self) -> str: ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index a9336f0..ea6a459 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -3,17 +3,13 @@ @Author : g1879 @Contact : g1879@qq.com """ -from pathlib import Path -from shutil import move -from threading import Lock from time import perf_counter, sleep +from .browser_download_manager import BrowserDownloadManager from .chromium_base import ChromiumBase, Timeout -from .chromium_base import handle_download from .chromium_driver import ChromiumDriver, BrowserDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser -from .commons.tools import get_usable_path from .configs.chromium_options import ChromiumOptions from .errors import BrowserConnectError from .setter import ChromiumPageSetter @@ -101,7 +97,6 @@ class ChromiumPage(ChromiumBase): ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] self._control_session.get(u, headers={'Connection': 'close'}) self._browser_driver = BrowserDriver(ws.split('/')[-1], 'browser', self.address) - self._browser_driver.start() self._alert = Alert() self._tab_obj.set_listener('Page.javascriptDialogOpening', self._on_alert_open) @@ -447,90 +442,6 @@ class ChromiumTabRect(object): return self._page.browser_driver.call_method('Browser.getWindowForTarget', targetId=self._page.tab_id)['bounds'] -class BrowserDownloadManager(object): - BROWSERS = {} - - def __new__(cls, page): - """ - :param page: ChromiumPage对象 - """ - if page.browser_driver.id in cls.BROWSERS: - return cls.BROWSERS[page.browser_driver.id] - return object.__new__(cls) - - def __init__(self, page): - """ - :param page: ChromiumPage对象 - """ - if page.browser_driver.id in BrowserDownloadManager.BROWSERS: - return - - self._page = page - self._lock = Lock() - page.set.download_path(page.download_path) - self._page.browser_driver.set_listener('Browser.downloadProgress', self._onDownloadProgress) - self._page.browser_driver.set_listener('Browser.downloadWillBegin', self._onDownloadWillBegin) - self._missions = {} - - BrowserDownloadManager.BROWSERS[page.browser_driver.id] = self - - @property - def missions(self): - """返回所有未完成的下载任务""" - return self._missions - - def add_mission(self, mission): - """添加下载任务信息 - :param mission: DownloadMission对象 - :return: None - """ - self._missions[mission.id] = mission - - def set_done(self, mission, state, cancel=False, final_path=None): - """设置任务结束 - :param mission: 任务对象 - :param state: 任务状态 - :param cancel: 是否取消 - :param final_path: 最终路径 - :return: None - """ - mission.state = state - mission.final_path = final_path - if cancel: - self._page.browser_driver.call_method('Browser.cancelDownload', guid=mission.id) - if mission.final_path: - Path(mission.final_path).unlink(True) - self._missions.pop(mission.id) - - def _onDownloadWillBegin(self, **kwargs): - """用于获取弹出新标签页触发的下载任务""" - sleep(.3) - if kwargs['guid'] not in self._missions: - handle_download(self._page, kwargs) - - def _onDownloadProgress(self, **kwargs): - """下载状态变化时执行""" - if kwargs['guid'] in self._missions: - with self._lock: - if kwargs['guid'] in self._missions: - mission = self._missions[kwargs['guid']] - if kwargs['state'] == 'inProgress': - mission.state = 'running' - mission.received_bytes = kwargs['receivedBytes'] - mission.total_bytes = kwargs['totalBytes'] - - elif kwargs['state'] == 'completed': - mission.received_bytes = kwargs['receivedBytes'] - mission.total_bytes = kwargs['totalBytes'] - form_path = f'{self._page.download_path}\\{mission.id}' - to_path = str(get_usable_path(f'{mission.path}\\{mission.name}')) - move(form_path, to_path) - self.set_done(mission, 'completed', final_path=to_path) - - else: - self.set_done(mission, 'canceled') - - class Alert(object): """用于保存alert信息的类""" diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 88d62e0..9347094 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -3,15 +3,15 @@ @Author : g1879 @Contact : g1879@qq.com """ -from threading import Lock -from typing import Union, Tuple, List, Dict +from typing import Union, Tuple, List +from .browser_download_manager import BrowserDownloadManager from .chromium_base import ChromiumBase from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .configs.chromium_options import ChromiumOptions from .setter import ChromiumPageSetter -from .waiter import ChromiumPageWaiter, DownloadMission +from .waiter import ChromiumPageWaiter class ChromiumPage(ChromiumBase): @@ -126,28 +126,6 @@ class ChromiumTabRect(object): def _get_browser_rect(self) -> dict: ... -class BrowserDownloadManager(object): - _page: ChromiumPage = ... - _missions: Dict[str, DownloadMission] = ... - _lock: Lock = ... - BROWSERS: Dict[str, BrowserDownloadManager] = ... - - def __new__(cls, page: ChromiumPage): ... - - def __init__(self, page: ChromiumPage): ... - - @property - def missions(self) -> Dict[str, DownloadMission]: ... - - def add_mission(self, mission: DownloadMission) -> None: ... - - def set_done(self, mission: DownloadMission, state: str, cancel: bool = False, final_path: str = None) -> None: ... - - def _onDownloadWillBegin(self, **kwargs) -> None: ... - - def _onDownloadProgress(self, **kwargs) -> None: ... - - class Alert(object): def __init__(self): diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index c5ca0a4..caad458 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -5,6 +5,7 @@ """ from copy import copy +from .waiter import ChromiumTabWaiter from .chromium_base import ChromiumBase from .commons.web import set_session_cookies, set_browser_cookies from .session_page import SessionPage @@ -30,7 +31,6 @@ class ChromiumTab(ChromiumBase): self.retry_interval = self.page.retry_interval self._page_load_strategy = self.page.page_load_strategy self._download_path = self.page.download_path - self._when_download_file_exists = self.page._when_download_file_exists def close(self): """关闭当前标签页""" @@ -53,6 +53,13 @@ class ChromiumTab(ChromiumBase): self._set = TabSetter(self) return self._set + @property + def wait(self): + """返回用于等待的对象""" + if self._wait is None: + self._wait = ChromiumTabWaiter(self) + return self._wait + class WebPageTab(SessionPage, ChromiumTab): def __init__(self, page, tab_id): diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 850c1f3..f38f8fa 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -7,6 +7,7 @@ from typing import Union, Tuple, Any, List from requests import Session, Response +from waiter import ChromiumTabWaiter from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame @@ -36,6 +37,9 @@ class ChromiumTab(ChromiumBase): @property def set(self) -> TabSetter: ... + @property + def wait(self) -> ChromiumTabWaiter: ... + class WebPageTab(SessionPage, ChromiumTab): def __init__(self, page: WebPage, tab_id: str): diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index bad3529..5a438f3 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -164,7 +164,7 @@ def is_js_func(func): def cookie_to_dict(cookie): """把Cookie对象转为dict格式 - :param cookie: Cookie对象 + :param cookie: Cookie对象、字符串或字典 :return: cookie字典 """ if isinstance(cookie, Cookie): @@ -177,7 +177,7 @@ def cookie_to_dict(cookie): cookie_dict = cookie elif isinstance(cookie, str): - cookie = cookie.split(',' if ',' in cookie else ';') + cookie = cookie.rstrip(';,').split(',' if ',' in cookie else ';') cookie_dict = {} for key, attr in enumerate(cookie): diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py index abc6194..c7e0b31 100644 --- a/DrissionPage/setter.py +++ b/DrissionPage/setter.py @@ -126,22 +126,30 @@ class ChromiumBaseSetter(object): if self._page._DownloadKit: self._page._DownloadKit.set.goal_path(path) + +class TabSetter(ChromiumBaseSetter): + def __init__(self, page): + super().__init__(page) + + def download_path(self, path): + """设置下载路径 + :param path: 下载路径 + :return: None + """ + super().download_path(path) + self._page.browser._dl_mgr.set_path(self._page.tab_id, path) + def download_file_name(self, name): """设置下一个被下载文件的名称 :param name: 文件名,可不含后缀 :return: None """ - self._page._download_rename = name + self._page.browser._dl_mgr.set_rename(self._page.tab_id, name) def when_download_file_exists(self, mode): if mode not in ('rename', 'overwrite', 'skip'): raise ValueError(f"mode参数只能是'rename', 'overwrite', 'skip' 之一,现在是:{mode}") - self._page._when_download_file_exists = mode - - -class TabSetter(ChromiumBaseSetter): - def __init__(self, page): - super().__init__(page) + self._page.browser._dl_mgr.set_file_exists(self._page.tab_id, mode) class ChromiumPageSetter(ChromiumBaseSetter): diff --git a/DrissionPage/setter.pyi b/DrissionPage/setter.pyi index 39981d2..26130cb 100644 --- a/DrissionPage/setter.pyi +++ b/DrissionPage/setter.pyi @@ -52,14 +52,16 @@ class ChromiumBaseSetter(object): def download_path(self, path: Union[str, Path]) -> None: ... - def download_file_name(self, name: str) -> None: ... - - def when_download_file_exists(self, mode: str) -> None: ... - class TabSetter(ChromiumBaseSetter): def __init__(self, page): ... + def download_path(self, path: Union[str, Path]) -> None: ... + + def download_file_name(self, name: str) -> None: ... + + def when_download_file_exists(self, mode: str) -> None: ... + class ChromiumPageSetter(ChromiumBaseSetter): _page: ChromiumPage = ... diff --git a/DrissionPage/waiter.py b/DrissionPage/waiter.py index e693873..7075ae2 100644 --- a/DrissionPage/waiter.py +++ b/DrissionPage/waiter.py @@ -1,5 +1,4 @@ # -*- coding:utf-8 -*- -from pathlib import Path from time import sleep, perf_counter from .commons.constants import Settings @@ -90,48 +89,23 @@ class ChromiumBaseWaiter(object): """等待浏览器下载开始,可将其拦截 :param timeout: 超时时间,None使用页面对象超时时间 :param cancel_it: 是否取消该任务 - :return: 成功返回任务信息dict,失败返回False + :return: 成功返回任务对象,失败返回False """ - self._driver._wait_download_flag = False if cancel_it else True + self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, False if cancel_it else True) if timeout is None: timeout = self._driver.timeout r = False end_time = perf_counter() + timeout while perf_counter() < end_time: - if not isinstance(self._driver._wait_download_flag, bool): - r = self._driver._wait_download_flag + v = self._driver.browser._dl_mgr.get_flag(self._driver.tab_id) + if not isinstance(v, bool): + r = v break - self._driver._wait_download_flag = None + self._driver.browser._dl_mgr.set_flag(self._driver.tab_id, None) return r - def downloads_done(self, timeout=None, cancel_if_timeout=True): - """等待所有浏览器下载任务结束 - :param timeout: 超时时间,为None时无限等待 - :param cancel_if_timeout: 超时时是否取消剩余任务 - :return: 是否等待成功 - """ - if not timeout: - while self._driver._download_missions: - sleep(.5) - return True - - else: - end_time = perf_counter() + timeout - while end_time > perf_counter(): - if not self._driver._download_missions: - return True - sleep(.5) - - if self._driver._download_missions: - if cancel_if_timeout: - for m in self._driver._download_missions: - m.cancel() - return False - else: - return True - def url_change(self, text, exclude=False, timeout=None, raise_err=None): """等待url变成包含或不包含指定文本 :param text: 用于识别的文本 @@ -204,7 +178,36 @@ class ChromiumBaseWaiter(object): return False -class ChromiumPageWaiter(ChromiumBaseWaiter): +class ChromiumTabWaiter(ChromiumBaseWaiter): + + def downloads_done(self, timeout=None, cancel_if_timeout=True): + """等待所有浏览器下载任务结束 + :param timeout: 超时时间,为None时无限等待 + :param cancel_if_timeout: 超时时是否取消剩余任务 + :return: 是否等待成功 + """ + if not timeout: + while self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + sleep(.5) + return True + + else: + end_time = perf_counter() + timeout + while end_time > perf_counter(): + if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + return True + sleep(.5) + + if self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + if cancel_if_timeout: + for m in self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): + m.cancel() + return False + else: + return True + + +class ChromiumPageWaiter(ChromiumTabWaiter): def __init__(self, page): super().__init__(page) # self._listener = None @@ -376,75 +379,3 @@ class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): """ super().__init__(frame) super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) - - -class DownloadMission(object): - def __init__(self, tab, _id, path, name, url): - self.url = url - self.tab = tab - self.id = _id - self.path = path - self.name = name - self.state = 'waiting' - self.total_bytes = None - self.received_bytes = 0 - self.final_path = None - - def __repr__(self): - # return f'' - return f'' - - @property - def rate(self): - """以百分比形式返回下载进度""" - return round((self.received_bytes / self.total_bytes) * 100, 2) if self.total_bytes else None - - def cancel(self): - """取消该任务,如任务已完成,删除已下载的文件""" - self.tab._page._dl_mgr.set_done(self, state='canceled', cancel=True) - - def wait(self, show=True, timeout=None, cancel_if_timeout=True): - """等待任务结束 - :param show: 是否显示下载信息 - :param timeout: 超时时间,为None则无限等待 - :param cancel_if_timeout: 超时时是否取消任务 - :return: 等待成功返回完整路径,否则返回False - """ - if show: - print(f'url:{self.url}') - t2 = perf_counter() - while self.name is None and perf_counter() - t2 < 4: - sleep(0.01) - print(f'文件名:{self.name}') - print(f'目标路径:{self.path}') - - if timeout is None: - while self.id in self.tab._page._dl_mgr.missions: - if show: - print(f'\r{self.rate}% ', end='') - sleep(.2) - - else: - running = True - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if show: - print(f'\r{self.rate}% ', end='') - if self.id not in self.tab._page._dl_mgr.missions: - running = False - break - sleep(.2) - - if running and cancel_if_timeout: - self.cancel() - - if show: - if self.state == 'completed': - print(f'下载完成 {self.final_path}') - elif self.state == 'canceled': - print(f'下载取消') - elif self.state == 'skipped': - print(f'已跳过') - print() - - return self.final_path if self.final_path else False diff --git a/DrissionPage/waiter.pyi b/DrissionPage/waiter.pyi index ad73281..823ee3b 100644 --- a/DrissionPage/waiter.pyi +++ b/DrissionPage/waiter.pyi @@ -3,8 +3,9 @@ @Author : g1879 @Contact : g1879@qq.com """ -from typing import Union, Optional +from typing import Union +from .browser_download_manager import DownloadMission from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame @@ -49,7 +50,12 @@ class ChromiumBaseWaiter(object): raise_err: bool = None) -> bool: ... -class ChromiumPageWaiter(ChromiumBaseWaiter): +class ChromiumTabWaiter(ChromiumBaseWaiter): + + def downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... + + +class ChromiumPageWaiter(ChromiumTabWaiter): _driver: ChromiumPage = ... def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... @@ -87,24 +93,3 @@ class ChromiumElementWaiter(object): class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): def __init__(self, frame: ChromiumFrame): ... - - -class DownloadMission(object): - tab: ChromiumBase = ... - url: str = ... - id: str = ... - path: str = ... - name: str = ... - state: str = ... - total_bytes: Optional[int] = ... - received_bytes: int = ... - final_path: Optional[str] = ... - - def __init__(self, tab: ChromiumBase, _id: str, path: str, name: str, url: str): ... - - @property - def rate(self) -> float: ... - - def cancel(self) -> None: ... - - def wait(self, show: bool = True, timeout=None, cancel_if_timeout=True) -> Union[bool, str]: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index ec8a872..b506c85 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -27,6 +27,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): :param driver_or_options: ChromiumDriver对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ + super(ChromiumBase, self).__init__() # 调用Base的__init__() self._mode = mode.lower() if self._mode not in ('s', 'd'): raise ValueError('mode参数只能是s或d。') @@ -51,7 +52,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._create_session() t = timeout if isinstance(timeout, (int, float)) else self.timeouts.implicit - super(ChromiumBase, self).__init__(t) # 调用Base的__init__() def _set_start_options(self, dr_opt, se_opt): """处理两种模式的设置