页面对象增加统一的下载路径属性;浏览器可设置直接下载;尝试用download接管浏览器下载,未完成

This commit is contained in:
g1879 2023-01-13 19:18:01 +08:00
parent 0b60c6c561
commit 4854c3b769
13 changed files with 152 additions and 61 deletions

View File

@ -5,7 +5,6 @@
""" """
from pathlib import Path from pathlib import Path
from platform import system from platform import system
from queue import Queue
from re import search from re import search
from time import perf_counter, sleep from time import perf_counter, sleep
@ -43,7 +42,6 @@ class ChromiumPage(ChromiumBase):
self._control_session.keep_alive = False self._control_session.keep_alive = False
self._alert = Alert() self._alert = Alert()
self._first_run = True self._first_run = True
self._download_list = None
# 接管或启动浏览器 # 接管或启动浏览器
if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions): if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions):
@ -88,6 +86,7 @@ class ChromiumPage(ChromiumBase):
self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open
self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close
self._download_path = self.set_download_path(self.options.download_path)
def _set_options(self): def _set_options(self):
"""从配置中读取设置""" """从配置中读取设置"""
@ -127,28 +126,22 @@ class ChromiumPage(ChromiumBase):
return self._window_setter return self._window_setter
@property @property
def download_list(self): def download_path(self):
"""以list方式返回被拦截的下载列表""" """返回默认下载路径"""
if self._download_list is None: p = self._download_path or ''
return [] return str(Path(p).absolute())
d_list = []
while not self._download_list.empty():
d_list.append(self._download_list.get())
return d_list
def block_download(self, on_off): def set_download_path(self, path):
"""开始或停止拦截下载 \n """设置下载路径 \n
:param on_off: 开始或停止拦截 :param path: 下载路径
:return: None :return: None
""" """
if on_off: path = path or ''
self._tab_obj.Page.downloadWillBegin = self._on_download_begin path = Path(path).absolute()
self._tab_obj.Browser.setDownloadBehavior(behavior='deny') path.mkdir(parents=True, exist_ok=True)
# self._tab_obj.Browser.downloadWillBegin = self._on_download_begin path = str(path)
else: self._download_path = path
self._tab_obj.Browser.setDownloadBehavior(behavior='default') self.run_cdp('Browser.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True)
self._tab_obj.Page.downloadWillBegin = None
# self._tab_obj.Browser.downloadWillBegin = None
def get_tab(self, tab_id=None): def get_tab(self, tab_id=None):
"""获取一个标签页对象 \n """获取一个标签页对象 \n
@ -377,15 +370,6 @@ class ChromiumPage(ChromiumBase):
self._alert.response_text = None self._alert.response_text = None
self._tab_obj.has_alert = True self._tab_obj.has_alert = True
def _on_download_begin(self, **kwargs):
if self._download_list is None:
self._download_list = Queue()
gid = kwargs['guid']
self._tab_obj.Browser.cancelDownload(guid=gid)
url = kwargs['url']
name = kwargs['suggestedFilename']
self._download_list.put(item={'url': url, 'name': name})
class Alert(object): class Alert(object):
"""用于保存alert信息的类""" """用于保存alert信息的类"""

View File

@ -5,7 +5,6 @@
""" """
from os import popen from os import popen
from pathlib import Path from pathlib import Path
from queue import Queue
from typing import Union, Tuple, List from typing import Union, Tuple, List
from .chromium_base import ChromiumBase from .chromium_base import ChromiumBase
@ -25,7 +24,7 @@ class ChromiumPage(ChromiumBase):
self._window_setter: WindowSetter = ... self._window_setter: WindowSetter = ...
self._main_tab: str = ... self._main_tab: str = ...
self._alert: Alert = ... self._alert: Alert = ...
self._download_list: Queue = ... self._download_path: str = ...
def _connect_browser(self, def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None,
@ -53,7 +52,13 @@ class ChromiumPage(ChromiumBase):
@property @property
def download_list(self) -> list: ... def download_list(self) -> list: ...
def block_download(self, on_off: bool) -> None: ... @property
def set_download(self) -> DownloadSetter: ...
@property
def download_path(self) -> str: ...
def set_download_path(self, path: Union[str, Path]) -> None: ...
def get_tab(self, tab_id: str = None) -> ChromiumTab: ... def get_tab(self, tab_id: str = None) -> ChromiumTab: ...
@ -89,8 +94,6 @@ class ChromiumPage(ChromiumBase):
def _on_alert_open(self, **kwargs): ... def _on_alert_open(self, **kwargs): ...
def _on_download_begin(self, **kwargs): ...
class Alert(object): class Alert(object):

View File

@ -503,7 +503,7 @@ def port_is_using(ip, port):
s = socket(AF_INET, SOCK_STREAM) s = socket(AF_INET, SOCK_STREAM)
result = s.connect_ex((ip, int(port))) result = s.connect_ex((ip, int(port)))
s.close() s.close()
return True if result == 0 else False return result == 0
def connect_browser(option): def connect_browser(option):

View File

@ -136,6 +136,7 @@ class SessionOptions(object):
:param ini_path: ini文件路径 :param ini_path: ini文件路径
""" """
self.ini_path = None self.ini_path = None
self._download_path = None
self._headers = None self._headers = None
self._cookies = None self._cookies = None
self._auth = None self._auth = None
@ -193,11 +194,18 @@ class SessionOptions(object):
self._timeout = options_dict.get('timeout', 10) self._timeout = options_dict.get('timeout', 10)
self._download_path = om.paths.get('download_path', None)
@property @property
def timeout(self): def timeout(self):
"""返回timeout属性信息""" """返回timeout属性信息"""
return self._timeout return self._timeout
@property
def download_path(self):
"""返回默认下载路径属性信息"""
return self._download_path
@property @property
def headers(self): def headers(self):
"""返回headers设置信息""" """返回headers设置信息"""
@ -375,6 +383,7 @@ class SessionOptions(object):
:return: 返回当前对象 :return: 返回当前对象
""" """
self._timeout = second self._timeout = second
return self
def set_headers(self, headers): def set_headers(self, headers):
"""设置headers参数 \n """设置headers参数 \n
@ -415,11 +424,20 @@ class SessionOptions(object):
{'http': 'http://xx.xx.xx.xx:xxxx', {'http': 'http://xx.xx.xx.xx:xxxx',
'https': 'http://xx.xx.xx.xx:xxxx'} 'https': 'http://xx.xx.xx.xx:xxxx'}
:param proxies: 参数值 :param proxies: 参数值
:return: None :return: 返回当前对象
""" """
self._proxies = proxies self._proxies = proxies
return self return self
def set_paths(self, download_path=None):
"""设置默认下载路径 \n
:param download_path: 下载路径
:return: 返回当前对象
"""
if download_path is not None:
self._download_path = str(download_path)
return self
def save(self, path=None): def save(self, path=None):
"""保存设置到文件 \n """保存设置到文件 \n
:param path: ini文件的路径传入 'default' 保存到默认ini文件 :param path: ini文件的路径传入 'default' 保存到默认ini文件
@ -449,6 +467,9 @@ class SessionOptions(object):
for i in options: for i in options:
om.set_item('session_options', i, options[i]) om.set_item('session_options', i, options[i])
om.set_item('paths', 'download_path', self.download_path)
om.set_item('session_options', 'timeout', self.timeout)
path = str(path) path = str(path)
om.save(path) om.save(path)
@ -482,6 +503,7 @@ class DriverOptions(Options):
options_dict = om.chrome_options options_dict = om.chrome_options
self._driver_path = om.paths.get('chromedriver_path', None) self._driver_path = om.paths.get('chromedriver_path', None)
self._download_path = om.paths.get('download_path', None)
self._binary_location = options_dict.get('binary_location', '') self._binary_location = options_dict.get('binary_location', '')
self._arguments = options_dict.get('arguments', []) self._arguments = options_dict.get('arguments', [])
self._extensions = options_dict.get('extensions', []) self._extensions = options_dict.get('extensions', [])
@ -498,6 +520,7 @@ class DriverOptions(Options):
return return
self._driver_path = None self._driver_path = None
self._download_path = None
self.ini_path = None self.ini_path = None
self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30}
self._debugger_address = '127.0.0.1:9222' self._debugger_address = '127.0.0.1:9222'
@ -507,6 +530,11 @@ class DriverOptions(Options):
"""chromedriver文件路径""" """chromedriver文件路径"""
return self._driver_path return self._driver_path
@property
def download_path(self):
"""默认下载路径文件路径"""
return self._download_path
@property @property
def chrome_path(self): def chrome_path(self):
"""浏览器启动文件路径""" """浏览器启动文件路径"""
@ -596,6 +624,8 @@ class DriverOptions(Options):
for i in options: for i in options:
if i == 'driver_path': if i == 'driver_path':
om.set_item('paths', 'chromedriver_path', options[i]) om.set_item('paths', 'chromedriver_path', options[i])
elif i == 'download_path':
om.set_item('paths', 'download_path', options[i])
else: else:
om.set_item('chrome_options', i, options[i]) om.set_item('chrome_options', i, options[i])
@ -761,10 +791,7 @@ class DriverOptions(Options):
self.debugger_address = debugger_address self.debugger_address = debugger_address
if download_path is not None: if download_path is not None:
if 'prefs' not in self.experimental_options: self._download_path = str(download_path)
self.experimental_options['prefs'] = {'download.default_directory': str(download_path)}
else:
self.experimental_options['prefs']['download.default_directory'] = str(download_path)
if user_data_path is not None: if user_data_path is not None:
self.set_argument('--user-data-dir', str(user_data_path)) self.set_argument('--user-data-dir', str(user_data_path))
@ -793,12 +820,12 @@ def chrome_options_to_dict(options):
re_dict = dict() re_dict = dict()
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path',
'page_load_strategy'] 'page_load_strategy', 'download_path']
options_dir = options.__dir__() options_dir = options.__dir__()
for attr in attrs: for attr in attrs:
try: try:
re_dict[attr] = options.__getattribute__(f'{attr}') if attr in options_dir else None re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None
except Exception: except Exception:
pass pass

View File

@ -46,6 +46,7 @@ class OptionsManager(object):
class SessionOptions(object): class SessionOptions(object):
def __init__(self, read_file: bool = True, ini_path: str = None): def __init__(self, read_file: bool = True, ini_path: str = None):
self.ini_path: str = ... self.ini_path: str = ...
self._download_path: str = ...
self._headers: dict = ... self._headers: dict = ...
self._cookies: list = ... self._cookies: list = ...
self._auth: tuple = ... self._auth: tuple = ...
@ -63,6 +64,9 @@ class SessionOptions(object):
@property @property
def timeout(self) -> Union[int, float]: ... def timeout(self) -> Union[int, float]: ...
@property
def download_path(self) -> str: ...
@property @property
def headers(self) -> dict: ... def headers(self) -> dict: ...
@ -148,6 +152,8 @@ class SessionOptions(object):
def set_proxies(self, proxies: dict) -> SessionOptions: ... def set_proxies(self, proxies: dict) -> SessionOptions: ...
def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ...
def save(self, path: str = None) -> str: ... def save(self, path: str = None) -> str: ...
def save_to_default(self) -> str: ... def save_to_default(self) -> str: ...
@ -161,10 +167,14 @@ class DriverOptions(Options):
self.ini_path: str = ... self.ini_path: str = ...
self._driver_path: str = ... self._driver_path: str = ...
self._user_data_path: str = ... self._user_data_path: str = ...
self._download_path: str = ...
@property @property
def driver_path(self) -> str: ... def driver_path(self) -> str: ...
@property
def download_path(self) -> str: ...
@property @property
def chrome_path(self) -> str: ... def chrome_path(self) -> str: ...

View File

@ -1,6 +1,6 @@
[paths] [paths]
chromedriver_path = chromedriver_path =
tmp_path = download_path =
[chrome_options] [chrome_options]
debugger_address = 127.0.0.1:9222 debugger_address = 127.0.0.1:9222

View File

@ -33,7 +33,6 @@ def set_paths(driver_path=None,
browser_path=None, browser_path=None,
local_port=None, local_port=None,
debugger_address=None, debugger_address=None,
tmp_path=None,
download_path=None, download_path=None,
user_data_path=None, user_data_path=None,
cache_path=None, cache_path=None,
@ -46,7 +45,6 @@ def set_paths(driver_path=None,
:param local_port: 本地端口号 :param local_port: 本地端口号
:param debugger_address: 调试浏览器地址127.0.0.1:9222 :param debugger_address: 调试浏览器地址127.0.0.1:9222
:param download_path: 下载文件路径 :param download_path: 下载文件路径
:param tmp_path: 临时文件夹路径
:param user_data_path: 用户数据路径 :param user_data_path: 用户数据路径
:param cache_path: 缓存路径 :param cache_path: 缓存路径
:param ini_path: 要修改的ini文件路径 :param ini_path: 要修改的ini文件路径
@ -73,13 +71,8 @@ def set_paths(driver_path=None,
if debugger_address is not None: if debugger_address is not None:
om.set_item('chrome_options', 'debugger_address', format_path(debugger_address)) om.set_item('chrome_options', 'debugger_address', format_path(debugger_address))
if tmp_path is not None:
om.set_item('paths', 'tmp_path', format_path(tmp_path))
if download_path is not None: if download_path is not None:
experimental_options = om.get_value('chrome_options', 'experimental_options') om.set_item('paths', 'download_path', format_path(download_path))
experimental_options['prefs']['download.default_directory'] = format_path(download_path)
om.set_item('chrome_options', 'experimental_options', experimental_options)
om.save() om.save()

View File

@ -14,7 +14,6 @@ def set_paths(driver_path: str = None,
browser_path: str = None, browser_path: str = None,
local_port: Union[int, str] = None, local_port: Union[int, str] = None,
debugger_address: str = None, debugger_address: str = None,
tmp_path: str = None,
download_path: str = None, download_path: str = None,
user_data_path: str = None, user_data_path: str = None,
cache_path: str = None, cache_path: str = None,

View File

@ -26,7 +26,6 @@ class SessionPage(BasePage):
:param timeout: 连接超时时间为None时从ini文件读取 :param timeout: 连接超时时间为None时从ini文件读取
""" """
self._response = None self._response = None
self.timeout = 10
self._create_session(session_or_options) self._create_session(session_or_options)
timeout = timeout if timeout is not None else self.timeout timeout = timeout if timeout is not None else self.timeout
super().__init__(timeout) super().__init__(timeout)
@ -40,8 +39,11 @@ class SessionPage(BasePage):
options = Session_or_Options or SessionOptions() options = Session_or_Options or SessionOptions()
self._set_session(options.as_dict()) self._set_session(options.as_dict())
self.timeout = options.timeout self.timeout = options.timeout
self._download_path = options.download_path
elif isinstance(Session_or_Options, Session): elif isinstance(Session_or_Options, Session):
self._session = Session_or_Options self._session = Session_or_Options
self._download_path = None
self._download_kit = None
def _set_session(self, data): def _set_session(self, data):
"""根据传入字典对session进行设置 \n """根据传入字典对session进行设置 \n
@ -113,6 +115,20 @@ class SessionPage(BasePage):
except Exception: except Exception:
return None return None
@property
def download_path(self):
"""返回下载路径"""
return self._download_path
def set_download_path(self, path):
"""设置下载路径 \n
:param path: 下载路径
:return: None
"""
self._download_path = str(path)
if self._download_kit is not None:
self._download_kit.goal_path = self._download_path
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url \n """用get方式跳转到url \n
:param url: 目标url :param url: 目标url
@ -199,9 +215,8 @@ class SessionPage(BasePage):
@property @property
def download(self): def download(self):
"""返回下载器对象""" """返回下载器对象"""
if not hasattr(self, '_download_kit'): if self._download_kit is None:
self._download_kit = DownloadKit(session=self) self._download_kit = DownloadKit(session=self, goal_path=self.download_path)
return self._download_kit return self._download_kit
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):

View File

@ -3,6 +3,7 @@
@Author : g1879 @Author : g1879
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from pathlib import Path
from typing import Any, Union, Tuple, List from typing import Any, Union, Tuple, List
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
@ -22,6 +23,7 @@ class SessionPage(BasePage):
self._session: Session = ... self._session: Session = ...
self._url: str = ... self._url: str = ...
self._response: Response = ... self._response: Response = ...
self._download_path: str = ...
self._download_kit: DownloadKit = ... self._download_kit: DownloadKit = ...
self._url_available: bool = ... self._url_available: bool = ...
self.timeout: float = ... self.timeout: float = ...
@ -51,6 +53,11 @@ class SessionPage(BasePage):
@property @property
def json(self) -> Union[dict, None]: ... def json(self) -> Union[dict, None]: ...
@property
def download_path(self) -> str: ...
def set_download_path(self, path: Union[str, Path]) -> None: ...
def get(self, def get(self,
url: str, url: str,
show_errmsg: bool | None = False, show_errmsg: bool | None = False,

View File

@ -37,10 +37,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._tab_obj = None self._tab_obj = None
self._is_loading = False self._is_loading = False
self.timeouts = Timeout(self) self.timeouts = Timeout(self)
self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None)
self._set_both_options(driver_or_options, session_or_options) self._set_both_options(driver_or_options, session_or_options)
self._setting_tab_id = tab_id self._setting_tab_id = tab_id
self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None)
self._response = None self._response = None
self._download_kit = None
if self._mode == 'd': if self._mode == 'd':
self._to_d_mode() self._to_d_mode()
@ -153,6 +154,34 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
""" """
self.set_timeouts(implicit=second) self.set_timeouts(implicit=second)
@property
def download_path(self):
"""返回默认下载路径"""
return super(SessionPage, self).download_path
def set_download_tool(self, use_browser=False):
"""设置下载释是否使用浏览器 \n
:param use_browser: 是否使用浏览器
:return: None
"""
if use_browser:
self._tab_obj.Browser.setDownloadBehavior(behavior='allow')
self._tab_obj.Page.downloadWillBegin = None
else:
self._tab_obj.Page.downloadWillBegin = self._on_download_begin
self._tab_obj.Browser.downloadWillBegin = self._on_download_begin
self._tab_obj.Browser.setDownloadBehavior(behavior='deny')
def set_download_path(self, path):
"""设置默认下载路径
:param path: 下载路径
:return: None
"""
if self._has_driver:
super(SessionPage, self).set_download_path(path)
else:
super().set_download_path(path)
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url \n """跳转到一个url \n
:param url: 目标url :param url: 目标url
@ -450,3 +479,10 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._tab_obj.stop() self._tab_obj.stop()
self._tab_obj = None self._tab_obj = None
self._has_driver = None self._has_driver = None
def _on_download_begin(self, **kwargs):
gid = kwargs['guid']
self._tab_obj.Browser.cancelDownload(guid=gid)
url = kwargs['url']
name = kwargs['suggestedFilename']
self.download(url, goal_path=self.download_path, rename=name)

View File

@ -3,6 +3,7 @@
@Author : g1879 @Author : g1879
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from pathlib import Path
from typing import Union, Tuple, List, Any from typing import Union, Tuple, List, Any
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
@ -80,6 +81,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@timeout.setter @timeout.setter
def timeout(self, second: float) -> None: ... def timeout(self, second: float) -> None: ...
@property
def download_path(self) -> str: ...
def set_download_tool(self, use_browser:bool=False) -> None: ...
def get(self, def get(self,
url: str, url: str,
show_errmsg: bool = False, show_errmsg: bool = False,
@ -160,10 +166,21 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
Union[ChromiumElement, str, ChromiumFrame]]]: ... Union[ChromiumElement, str, ChromiumFrame]]]: ...
def _set_both_options(self, dr_opt: Union[ChromiumDriver, DriverOptions], def _set_both_options(self, dr_opt: Union[ChromiumDriver, DriverOptions],
se_opt: Union[Session, SessionOptions, dict, bool, None]) -> None: ... se_opt: Union[Session, SessionOptions, dict, bool, None]) -> None: ...
def _set_driver_options(self, driver_or_Options: Union[ChromiumDriver, DriverOptions]) -> None: ... def _set_driver_options(self, driver_or_Options: Union[ChromiumDriver, DriverOptions]) -> None: ...
def _set_session_options(self, Session_or_Options: Union[Session, SessionOptions]) -> None: ... def _set_session_options(self, Session_or_Options: Union[Session, SessionOptions]) -> None: ...
def quit(self) -> None: ... def quit(self) -> None: ...
def _on_download_begin(self, **kwargs): ...
class DownloadSetter(object):
def __init__(self, page: ChromiumPage):
self._page: ChromiumPage = ...
def deny(self) -> None: ...
def save_path(self, path: Union[str, Path] = '') -> None: ...

View File

@ -3,6 +3,6 @@ requests
tldextract tldextract
lxml lxml
cssselect cssselect
DownloadKit DownloadKit>=0.4.1
FlowViewer FlowViewer>=0.2.1
websocket-client websocket-client