From 4854c3b769bb56670c02da06626345841b0b6285 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 13 Jan 2023 19:18:01 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E5=AF=B9=E8=B1=A1=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E7=BB=9F=E4=B8=80=E7=9A=84=E4=B8=8B=E8=BD=BD=E8=B7=AF?= =?UTF-8?q?=E5=BE=84=E5=B1=9E=E6=80=A7=EF=BC=9B=E6=B5=8F=E8=A7=88=E5=99=A8?= =?UTF-8?q?=E5=8F=AF=E8=AE=BE=E7=BD=AE=E7=9B=B4=E6=8E=A5=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=EF=BC=9B=E5=B0=9D=E8=AF=95=E7=94=A8download=E6=8E=A5=E7=AE=A1?= =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E4=B8=8B=E8=BD=BD=EF=BC=8C=E6=9C=AA?= =?UTF-8?q?=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_page.py | 44 +++++++++++----------------------- DrissionPage/chromium_page.pyi | 13 ++++++---- DrissionPage/common.py | 2 +- DrissionPage/config.py | 41 +++++++++++++++++++++++++------ DrissionPage/config.pyi | 10 ++++++++ DrissionPage/configs.ini | 2 +- DrissionPage/easy_set.py | 9 +------ DrissionPage/easy_set.pyi | 1 - DrissionPage/session_page.py | 23 ++++++++++++++---- DrissionPage/session_page.pyi | 7 ++++++ DrissionPage/web_page.py | 38 ++++++++++++++++++++++++++++- DrissionPage/web_page.pyi | 19 ++++++++++++++- requirements.txt | 4 ++-- 13 files changed, 152 insertions(+), 61 deletions(-) diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 76a818b..9932aa0 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -5,7 +5,6 @@ """ from pathlib import Path from platform import system -from queue import Queue from re import search from time import perf_counter, sleep @@ -43,7 +42,6 @@ class ChromiumPage(ChromiumBase): self._control_session.keep_alive = False self._alert = Alert() self._first_run = True - self._download_list = None # 接管或启动浏览器 if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions): @@ -88,6 +86,7 @@ class ChromiumPage(ChromiumBase): self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close + self._download_path = self.set_download_path(self.options.download_path) def _set_options(self): """从配置中读取设置""" @@ -127,28 +126,22 @@ class ChromiumPage(ChromiumBase): return self._window_setter @property - def download_list(self): - """以list方式返回被拦截的下载列表""" - if self._download_list is None: - return [] - d_list = [] - while not self._download_list.empty(): - d_list.append(self._download_list.get()) - return d_list + def download_path(self): + """返回默认下载路径""" + p = self._download_path or '' + return str(Path(p).absolute()) - def block_download(self, on_off): - """开始或停止拦截下载 \n - :param on_off: 开始或停止拦截 + def set_download_path(self, path): + """设置下载路径 \n + :param path: 下载路径 :return: None """ - if on_off: - self._tab_obj.Page.downloadWillBegin = self._on_download_begin - self._tab_obj.Browser.setDownloadBehavior(behavior='deny') - # self._tab_obj.Browser.downloadWillBegin = self._on_download_begin - else: - self._tab_obj.Browser.setDownloadBehavior(behavior='default') - self._tab_obj.Page.downloadWillBegin = None - # self._tab_obj.Browser.downloadWillBegin = None + path = path or '' + path = Path(path).absolute() + path.mkdir(parents=True, exist_ok=True) + path = str(path) + self._download_path = path + self.run_cdp('Browser.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True) def get_tab(self, tab_id=None): """获取一个标签页对象 \n @@ -377,15 +370,6 @@ class ChromiumPage(ChromiumBase): self._alert.response_text = None self._tab_obj.has_alert = True - def _on_download_begin(self, **kwargs): - if self._download_list is None: - self._download_list = Queue() - gid = kwargs['guid'] - self._tab_obj.Browser.cancelDownload(guid=gid) - url = kwargs['url'] - name = kwargs['suggestedFilename'] - self._download_list.put(item={'url': url, 'name': name}) - class Alert(object): """用于保存alert信息的类""" diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index dd31a3b..25d6094 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -5,7 +5,6 @@ """ from os import popen from pathlib import Path -from queue import Queue from typing import Union, Tuple, List from .chromium_base import ChromiumBase @@ -25,7 +24,7 @@ class ChromiumPage(ChromiumBase): self._window_setter: WindowSetter = ... self._main_tab: str = ... self._alert: Alert = ... - self._download_list: Queue = ... + self._download_path: str = ... def _connect_browser(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, @@ -53,7 +52,13 @@ class ChromiumPage(ChromiumBase): @property def download_list(self) -> list: ... - def block_download(self, on_off: bool) -> None: ... + @property + def set_download(self) -> DownloadSetter: ... + + @property + def download_path(self) -> str: ... + + def set_download_path(self, path: Union[str, Path]) -> None: ... def get_tab(self, tab_id: str = None) -> ChromiumTab: ... @@ -89,8 +94,6 @@ class ChromiumPage(ChromiumBase): def _on_alert_open(self, **kwargs): ... - def _on_download_begin(self, **kwargs): ... - class Alert(object): diff --git a/DrissionPage/common.py b/DrissionPage/common.py index f62d962..30843f6 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -503,7 +503,7 @@ def port_is_using(ip, port): s = socket(AF_INET, SOCK_STREAM) result = s.connect_ex((ip, int(port))) s.close() - return True if result == 0 else False + return result == 0 def connect_browser(option): diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 7aaab16..bcad2ab 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -136,6 +136,7 @@ class SessionOptions(object): :param ini_path: ini文件路径 """ self.ini_path = None + self._download_path = None self._headers = None self._cookies = None self._auth = None @@ -193,11 +194,18 @@ class SessionOptions(object): self._timeout = options_dict.get('timeout', 10) + self._download_path = om.paths.get('download_path', None) + @property def timeout(self): """返回timeout属性信息""" return self._timeout + @property + def download_path(self): + """返回默认下载路径属性信息""" + return self._download_path + @property def headers(self): """返回headers设置信息""" @@ -375,6 +383,7 @@ class SessionOptions(object): :return: 返回当前对象 """ self._timeout = second + return self def set_headers(self, headers): """设置headers参数 \n @@ -415,11 +424,20 @@ class SessionOptions(object): {'http': 'http://xx.xx.xx.xx:xxxx', 'https': 'http://xx.xx.xx.xx:xxxx'} :param proxies: 参数值 - :return: None + :return: 返回当前对象 """ self._proxies = proxies return self + def set_paths(self, download_path=None): + """设置默认下载路径 \n + :param download_path: 下载路径 + :return: 返回当前对象 + """ + if download_path is not None: + self._download_path = str(download_path) + return self + def save(self, path=None): """保存设置到文件 \n :param path: ini文件的路径,传入 'default' 保存到默认ini文件 @@ -449,6 +467,9 @@ class SessionOptions(object): for i in options: om.set_item('session_options', i, options[i]) + om.set_item('paths', 'download_path', self.download_path) + om.set_item('session_options', 'timeout', self.timeout) + path = str(path) om.save(path) @@ -482,6 +503,7 @@ class DriverOptions(Options): options_dict = om.chrome_options self._driver_path = om.paths.get('chromedriver_path', None) + self._download_path = om.paths.get('download_path', None) self._binary_location = options_dict.get('binary_location', '') self._arguments = options_dict.get('arguments', []) self._extensions = options_dict.get('extensions', []) @@ -498,6 +520,7 @@ class DriverOptions(Options): return self._driver_path = None + self._download_path = None self.ini_path = None self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} self._debugger_address = '127.0.0.1:9222' @@ -507,6 +530,11 @@ class DriverOptions(Options): """chromedriver文件路径""" return self._driver_path + @property + def download_path(self): + """默认下载路径文件路径""" + return self._download_path + @property def chrome_path(self): """浏览器启动文件路径""" @@ -596,6 +624,8 @@ class DriverOptions(Options): for i in options: if i == 'driver_path': om.set_item('paths', 'chromedriver_path', options[i]) + elif i == 'download_path': + om.set_item('paths', 'download_path', options[i]) else: om.set_item('chrome_options', i, options[i]) @@ -761,10 +791,7 @@ class DriverOptions(Options): self.debugger_address = debugger_address if download_path is not None: - if 'prefs' not in self.experimental_options: - self.experimental_options['prefs'] = {'download.default_directory': str(download_path)} - else: - self.experimental_options['prefs']['download.default_directory'] = str(download_path) + self._download_path = str(download_path) if user_data_path is not None: self.set_argument('--user-data-dir', str(user_data_path)) @@ -793,12 +820,12 @@ def chrome_options_to_dict(options): re_dict = dict() attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', - 'page_load_strategy'] + 'page_load_strategy', 'download_path'] options_dir = options.__dir__() for attr in attrs: try: - re_dict[attr] = options.__getattribute__(f'{attr}') if attr in options_dir else None + re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None except Exception: pass diff --git a/DrissionPage/config.pyi b/DrissionPage/config.pyi index 5c6bc54..bb5ec09 100644 --- a/DrissionPage/config.pyi +++ b/DrissionPage/config.pyi @@ -46,6 +46,7 @@ class OptionsManager(object): class SessionOptions(object): def __init__(self, read_file: bool = True, ini_path: str = None): self.ini_path: str = ... + self._download_path: str = ... self._headers: dict = ... self._cookies: list = ... self._auth: tuple = ... @@ -63,6 +64,9 @@ class SessionOptions(object): @property def timeout(self) -> Union[int, float]: ... + @property + def download_path(self) -> str: ... + @property def headers(self) -> dict: ... @@ -148,6 +152,8 @@ class SessionOptions(object): def set_proxies(self, proxies: dict) -> SessionOptions: ... + def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ... + def save(self, path: str = None) -> str: ... def save_to_default(self) -> str: ... @@ -161,10 +167,14 @@ class DriverOptions(Options): self.ini_path: str = ... self._driver_path: str = ... self._user_data_path: str = ... + self._download_path: str = ... @property def driver_path(self) -> str: ... + @property + def download_path(self) -> str: ... + @property def chrome_path(self) -> str: ... diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index 3ea4625..f29c6bf 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -1,6 +1,6 @@ [paths] chromedriver_path = -tmp_path = +download_path = [chrome_options] debugger_address = 127.0.0.1:9222 diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index e568256..77583b8 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -33,7 +33,6 @@ def set_paths(driver_path=None, browser_path=None, local_port=None, debugger_address=None, - tmp_path=None, download_path=None, user_data_path=None, cache_path=None, @@ -46,7 +45,6 @@ def set_paths(driver_path=None, :param local_port: 本地端口号 :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 :param download_path: 下载文件路径 - :param tmp_path: 临时文件夹路径 :param user_data_path: 用户数据路径 :param cache_path: 缓存路径 :param ini_path: 要修改的ini文件路径 @@ -73,13 +71,8 @@ def set_paths(driver_path=None, if debugger_address is not None: om.set_item('chrome_options', 'debugger_address', format_path(debugger_address)) - if tmp_path is not None: - om.set_item('paths', 'tmp_path', format_path(tmp_path)) - if download_path is not None: - experimental_options = om.get_value('chrome_options', 'experimental_options') - experimental_options['prefs']['download.default_directory'] = format_path(download_path) - om.set_item('chrome_options', 'experimental_options', experimental_options) + om.set_item('paths', 'download_path', format_path(download_path)) om.save() diff --git a/DrissionPage/easy_set.pyi b/DrissionPage/easy_set.pyi index 04b1ef1..8c92145 100644 --- a/DrissionPage/easy_set.pyi +++ b/DrissionPage/easy_set.pyi @@ -14,7 +14,6 @@ def set_paths(driver_path: str = None, browser_path: str = None, local_port: Union[int, str] = None, debugger_address: str = None, - tmp_path: str = None, download_path: str = None, user_data_path: str = None, cache_path: str = None, diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index f3a6a9b..7ea3301 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -26,7 +26,6 @@ class SessionPage(BasePage): :param timeout: 连接超时时间,为None时从ini文件读取 """ self._response = None - self.timeout = 10 self._create_session(session_or_options) timeout = timeout if timeout is not None else self.timeout super().__init__(timeout) @@ -40,8 +39,11 @@ class SessionPage(BasePage): options = Session_or_Options or SessionOptions() self._set_session(options.as_dict()) self.timeout = options.timeout + self._download_path = options.download_path elif isinstance(Session_or_Options, Session): self._session = Session_or_Options + self._download_path = None + self._download_kit = None def _set_session(self, data): """根据传入字典对session进行设置 \n @@ -113,6 +115,20 @@ class SessionPage(BasePage): except Exception: return None + @property + def download_path(self): + """返回下载路径""" + return self._download_path + + def set_download_path(self, path): + """设置下载路径 \n + :param path: 下载路径 + :return: None + """ + self._download_path = str(path) + if self._download_kit is not None: + self._download_kit.goal_path = self._download_path + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """用get方式跳转到url \n :param url: 目标url @@ -199,9 +215,8 @@ class SessionPage(BasePage): @property def download(self): """返回下载器对象""" - if not hasattr(self, '_download_kit'): - self._download_kit = DownloadKit(session=self) - + if self._download_kit is None: + self._download_kit = DownloadKit(session=self, goal_path=self.download_path) return self._download_kit def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index 2c8a31c..247ce08 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -3,6 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ +from pathlib import Path from typing import Any, Union, Tuple, List from DownloadKit import DownloadKit @@ -22,6 +23,7 @@ class SessionPage(BasePage): self._session: Session = ... self._url: str = ... self._response: Response = ... + self._download_path: str = ... self._download_kit: DownloadKit = ... self._url_available: bool = ... self.timeout: float = ... @@ -51,6 +53,11 @@ class SessionPage(BasePage): @property def json(self) -> Union[dict, None]: ... + @property + def download_path(self) -> str: ... + + def set_download_path(self, path: Union[str, Path]) -> None: ... + def get(self, url: str, show_errmsg: bool | None = False, diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index a1905ac..fdaa46a 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -37,10 +37,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._tab_obj = None self._is_loading = False self.timeouts = Timeout(self) + self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None) self._set_both_options(driver_or_options, session_or_options) self._setting_tab_id = tab_id - self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None) self._response = None + self._download_kit = None if self._mode == 'd': self._to_d_mode() @@ -153,6 +154,34 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """ self.set_timeouts(implicit=second) + @property + def download_path(self): + """返回默认下载路径""" + return super(SessionPage, self).download_path + + def set_download_tool(self, use_browser=False): + """设置下载释是否使用浏览器 \n + :param use_browser: 是否使用浏览器 + :return: None + """ + if use_browser: + self._tab_obj.Browser.setDownloadBehavior(behavior='allow') + self._tab_obj.Page.downloadWillBegin = None + else: + self._tab_obj.Page.downloadWillBegin = self._on_download_begin + self._tab_obj.Browser.downloadWillBegin = self._on_download_begin + self._tab_obj.Browser.setDownloadBehavior(behavior='deny') + + def set_download_path(self, path): + """设置默认下载路径 + :param path: 下载路径 + :return: None + """ + if self._has_driver: + super(SessionPage, self).set_download_path(path) + else: + super().set_download_path(path) + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url \n :param url: 目标url @@ -450,3 +479,10 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._tab_obj.stop() self._tab_obj = None self._has_driver = None + + def _on_download_begin(self, **kwargs): + gid = kwargs['guid'] + self._tab_obj.Browser.cancelDownload(guid=gid) + url = kwargs['url'] + name = kwargs['suggestedFilename'] + self.download(url, goal_path=self.download_path, rename=name) diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index ce105ee..e278df9 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -3,6 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ +from pathlib import Path from typing import Union, Tuple, List, Any from DownloadKit import DownloadKit @@ -80,6 +81,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @timeout.setter def timeout(self, second: float) -> None: ... + @property + def download_path(self) -> str: ... + + def set_download_tool(self, use_browser:bool=False) -> None: ... + def get(self, url: str, show_errmsg: bool = False, @@ -160,10 +166,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage): Union[ChromiumElement, str, ChromiumFrame]]]: ... def _set_both_options(self, dr_opt: Union[ChromiumDriver, DriverOptions], - se_opt: Union[Session, SessionOptions, dict, bool, None]) -> None: ... + se_opt: Union[Session, SessionOptions, dict, bool, None]) -> None: ... def _set_driver_options(self, driver_or_Options: Union[ChromiumDriver, DriverOptions]) -> None: ... def _set_session_options(self, Session_or_Options: Union[Session, SessionOptions]) -> None: ... def quit(self) -> None: ... + + def _on_download_begin(self, **kwargs): ... + + +class DownloadSetter(object): + def __init__(self, page: ChromiumPage): + self._page: ChromiumPage = ... + + def deny(self) -> None: ... + + def save_path(self, path: Union[str, Path] = '') -> None: ... diff --git a/requirements.txt b/requirements.txt index a2d4027..ba358e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ requests tldextract lxml cssselect -DownloadKit -FlowViewer +DownloadKit>=0.4.1 +FlowViewer>=0.2.1 websocket-client \ No newline at end of file