页面对象增加统一的下载路径属性;浏览器可设置直接下载;尝试用download接管浏览器下载,未完成

This commit is contained in:
g1879 2023-01-13 19:18:01 +08:00
parent 0b60c6c561
commit 4854c3b769
13 changed files with 152 additions and 61 deletions

View File

@ -5,7 +5,6 @@
"""
from pathlib import Path
from platform import system
from queue import Queue
from re import search
from time import perf_counter, sleep
@ -43,7 +42,6 @@ class ChromiumPage(ChromiumBase):
self._control_session.keep_alive = False
self._alert = Alert()
self._first_run = True
self._download_list = None
# 接管或启动浏览器
if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions):
@ -88,6 +86,7 @@ class ChromiumPage(ChromiumBase):
self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open
self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close
self._download_path = self.set_download_path(self.options.download_path)
def _set_options(self):
"""从配置中读取设置"""
@ -127,28 +126,22 @@ class ChromiumPage(ChromiumBase):
return self._window_setter
@property
def download_list(self):
"""以list方式返回被拦截的下载列表"""
if self._download_list is None:
return []
d_list = []
while not self._download_list.empty():
d_list.append(self._download_list.get())
return d_list
def download_path(self):
"""返回默认下载路径"""
p = self._download_path or ''
return str(Path(p).absolute())
def block_download(self, on_off):
"""开始或停止拦截下载 \n
:param on_off: 开始或停止拦截
def set_download_path(self, path):
"""设置下载路径 \n
:param path: 下载路径
:return: None
"""
if on_off:
self._tab_obj.Page.downloadWillBegin = self._on_download_begin
self._tab_obj.Browser.setDownloadBehavior(behavior='deny')
# self._tab_obj.Browser.downloadWillBegin = self._on_download_begin
else:
self._tab_obj.Browser.setDownloadBehavior(behavior='default')
self._tab_obj.Page.downloadWillBegin = None
# self._tab_obj.Browser.downloadWillBegin = None
path = path or ''
path = Path(path).absolute()
path.mkdir(parents=True, exist_ok=True)
path = str(path)
self._download_path = path
self.run_cdp('Browser.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True)
def get_tab(self, tab_id=None):
"""获取一个标签页对象 \n
@ -377,15 +370,6 @@ class ChromiumPage(ChromiumBase):
self._alert.response_text = None
self._tab_obj.has_alert = True
def _on_download_begin(self, **kwargs):
if self._download_list is None:
self._download_list = Queue()
gid = kwargs['guid']
self._tab_obj.Browser.cancelDownload(guid=gid)
url = kwargs['url']
name = kwargs['suggestedFilename']
self._download_list.put(item={'url': url, 'name': name})
class Alert(object):
"""用于保存alert信息的类"""

View File

@ -5,7 +5,6 @@
"""
from os import popen
from pathlib import Path
from queue import Queue
from typing import Union, Tuple, List
from .chromium_base import ChromiumBase
@ -25,7 +24,7 @@ class ChromiumPage(ChromiumBase):
self._window_setter: WindowSetter = ...
self._main_tab: str = ...
self._alert: Alert = ...
self._download_list: Queue = ...
self._download_path: str = ...
def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None,
@ -53,7 +52,13 @@ class ChromiumPage(ChromiumBase):
@property
def download_list(self) -> list: ...
def block_download(self, on_off: bool) -> None: ...
@property
def set_download(self) -> DownloadSetter: ...
@property
def download_path(self) -> str: ...
def set_download_path(self, path: Union[str, Path]) -> None: ...
def get_tab(self, tab_id: str = None) -> ChromiumTab: ...
@ -89,8 +94,6 @@ class ChromiumPage(ChromiumBase):
def _on_alert_open(self, **kwargs): ...
def _on_download_begin(self, **kwargs): ...
class Alert(object):

View File

@ -503,7 +503,7 @@ def port_is_using(ip, port):
s = socket(AF_INET, SOCK_STREAM)
result = s.connect_ex((ip, int(port)))
s.close()
return True if result == 0 else False
return result == 0
def connect_browser(option):

View File

@ -136,6 +136,7 @@ class SessionOptions(object):
:param ini_path: ini文件路径
"""
self.ini_path = None
self._download_path = None
self._headers = None
self._cookies = None
self._auth = None
@ -193,11 +194,18 @@ class SessionOptions(object):
self._timeout = options_dict.get('timeout', 10)
self._download_path = om.paths.get('download_path', None)
@property
def timeout(self):
"""返回timeout属性信息"""
return self._timeout
@property
def download_path(self):
"""返回默认下载路径属性信息"""
return self._download_path
@property
def headers(self):
"""返回headers设置信息"""
@ -375,6 +383,7 @@ class SessionOptions(object):
:return: 返回当前对象
"""
self._timeout = second
return self
def set_headers(self, headers):
"""设置headers参数 \n
@ -415,11 +424,20 @@ class SessionOptions(object):
{'http': 'http://xx.xx.xx.xx:xxxx',
'https': 'http://xx.xx.xx.xx:xxxx'}
:param proxies: 参数值
:return: None
:return: 返回当前对象
"""
self._proxies = proxies
return self
def set_paths(self, download_path=None):
"""设置默认下载路径 \n
:param download_path: 下载路径
:return: 返回当前对象
"""
if download_path is not None:
self._download_path = str(download_path)
return self
def save(self, path=None):
"""保存设置到文件 \n
:param path: ini文件的路径传入 'default' 保存到默认ini文件
@ -449,6 +467,9 @@ class SessionOptions(object):
for i in options:
om.set_item('session_options', i, options[i])
om.set_item('paths', 'download_path', self.download_path)
om.set_item('session_options', 'timeout', self.timeout)
path = str(path)
om.save(path)
@ -482,6 +503,7 @@ class DriverOptions(Options):
options_dict = om.chrome_options
self._driver_path = om.paths.get('chromedriver_path', None)
self._download_path = om.paths.get('download_path', None)
self._binary_location = options_dict.get('binary_location', '')
self._arguments = options_dict.get('arguments', [])
self._extensions = options_dict.get('extensions', [])
@ -498,6 +520,7 @@ class DriverOptions(Options):
return
self._driver_path = None
self._download_path = None
self.ini_path = None
self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30}
self._debugger_address = '127.0.0.1:9222'
@ -507,6 +530,11 @@ class DriverOptions(Options):
"""chromedriver文件路径"""
return self._driver_path
@property
def download_path(self):
"""默认下载路径文件路径"""
return self._download_path
@property
def chrome_path(self):
"""浏览器启动文件路径"""
@ -596,6 +624,8 @@ class DriverOptions(Options):
for i in options:
if i == 'driver_path':
om.set_item('paths', 'chromedriver_path', options[i])
elif i == 'download_path':
om.set_item('paths', 'download_path', options[i])
else:
om.set_item('chrome_options', i, options[i])
@ -761,10 +791,7 @@ class DriverOptions(Options):
self.debugger_address = debugger_address
if download_path is not None:
if 'prefs' not in self.experimental_options:
self.experimental_options['prefs'] = {'download.default_directory': str(download_path)}
else:
self.experimental_options['prefs']['download.default_directory'] = str(download_path)
self._download_path = str(download_path)
if user_data_path is not None:
self.set_argument('--user-data-dir', str(user_data_path))
@ -793,12 +820,12 @@ def chrome_options_to_dict(options):
re_dict = dict()
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path',
'page_load_strategy']
'page_load_strategy', 'download_path']
options_dir = options.__dir__()
for attr in attrs:
try:
re_dict[attr] = options.__getattribute__(f'{attr}') if attr in options_dir else None
re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None
except Exception:
pass

View File

@ -46,6 +46,7 @@ class OptionsManager(object):
class SessionOptions(object):
def __init__(self, read_file: bool = True, ini_path: str = None):
self.ini_path: str = ...
self._download_path: str = ...
self._headers: dict = ...
self._cookies: list = ...
self._auth: tuple = ...
@ -63,6 +64,9 @@ class SessionOptions(object):
@property
def timeout(self) -> Union[int, float]: ...
@property
def download_path(self) -> str: ...
@property
def headers(self) -> dict: ...
@ -148,6 +152,8 @@ class SessionOptions(object):
def set_proxies(self, proxies: dict) -> SessionOptions: ...
def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ...
def save(self, path: str = None) -> str: ...
def save_to_default(self) -> str: ...
@ -161,10 +167,14 @@ class DriverOptions(Options):
self.ini_path: str = ...
self._driver_path: str = ...
self._user_data_path: str = ...
self._download_path: str = ...
@property
def driver_path(self) -> str: ...
@property
def download_path(self) -> str: ...
@property
def chrome_path(self) -> str: ...

View File

@ -1,6 +1,6 @@
[paths]
chromedriver_path =
tmp_path =
download_path =
[chrome_options]
debugger_address = 127.0.0.1:9222

View File

@ -33,7 +33,6 @@ def set_paths(driver_path=None,
browser_path=None,
local_port=None,
debugger_address=None,
tmp_path=None,
download_path=None,
user_data_path=None,
cache_path=None,
@ -46,7 +45,6 @@ def set_paths(driver_path=None,
:param local_port: 本地端口号
:param debugger_address: 调试浏览器地址127.0.0.1:9222
:param download_path: 下载文件路径
:param tmp_path: 临时文件夹路径
:param user_data_path: 用户数据路径
:param cache_path: 缓存路径
:param ini_path: 要修改的ini文件路径
@ -73,13 +71,8 @@ def set_paths(driver_path=None,
if debugger_address is not None:
om.set_item('chrome_options', 'debugger_address', format_path(debugger_address))
if tmp_path is not None:
om.set_item('paths', 'tmp_path', format_path(tmp_path))
if download_path is not None:
experimental_options = om.get_value('chrome_options', 'experimental_options')
experimental_options['prefs']['download.default_directory'] = format_path(download_path)
om.set_item('chrome_options', 'experimental_options', experimental_options)
om.set_item('paths', 'download_path', format_path(download_path))
om.save()

View File

@ -14,7 +14,6 @@ def set_paths(driver_path: str = None,
browser_path: str = None,
local_port: Union[int, str] = None,
debugger_address: str = None,
tmp_path: str = None,
download_path: str = None,
user_data_path: str = None,
cache_path: str = None,

View File

@ -26,7 +26,6 @@ class SessionPage(BasePage):
:param timeout: 连接超时时间为None时从ini文件读取
"""
self._response = None
self.timeout = 10
self._create_session(session_or_options)
timeout = timeout if timeout is not None else self.timeout
super().__init__(timeout)
@ -40,8 +39,11 @@ class SessionPage(BasePage):
options = Session_or_Options or SessionOptions()
self._set_session(options.as_dict())
self.timeout = options.timeout
self._download_path = options.download_path
elif isinstance(Session_or_Options, Session):
self._session = Session_or_Options
self._download_path = None
self._download_kit = None
def _set_session(self, data):
"""根据传入字典对session进行设置 \n
@ -113,6 +115,20 @@ class SessionPage(BasePage):
except Exception:
return None
@property
def download_path(self):
"""返回下载路径"""
return self._download_path
def set_download_path(self, path):
"""设置下载路径 \n
:param path: 下载路径
:return: None
"""
self._download_path = str(path)
if self._download_kit is not None:
self._download_kit.goal_path = self._download_path
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url \n
:param url: 目标url
@ -199,9 +215,8 @@ class SessionPage(BasePage):
@property
def download(self):
"""返回下载器对象"""
if not hasattr(self, '_download_kit'):
self._download_kit = DownloadKit(session=self)
if self._download_kit is None:
self._download_kit = DownloadKit(session=self, goal_path=self.download_path)
return self._download_kit
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):

View File

@ -3,6 +3,7 @@
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Any, Union, Tuple, List
from DownloadKit import DownloadKit
@ -22,6 +23,7 @@ class SessionPage(BasePage):
self._session: Session = ...
self._url: str = ...
self._response: Response = ...
self._download_path: str = ...
self._download_kit: DownloadKit = ...
self._url_available: bool = ...
self.timeout: float = ...
@ -51,6 +53,11 @@ class SessionPage(BasePage):
@property
def json(self) -> Union[dict, None]: ...
@property
def download_path(self) -> str: ...
def set_download_path(self, path: Union[str, Path]) -> None: ...
def get(self,
url: str,
show_errmsg: bool | None = False,

View File

@ -37,10 +37,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._tab_obj = None
self._is_loading = False
self.timeouts = Timeout(self)
self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None)
self._set_both_options(driver_or_options, session_or_options)
self._setting_tab_id = tab_id
self._has_driver, self._has_session = (None, True) if self._mode == 's' else (True, None)
self._response = None
self._download_kit = None
if self._mode == 'd':
self._to_d_mode()
@ -153,6 +154,34 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
"""
self.set_timeouts(implicit=second)
@property
def download_path(self):
"""返回默认下载路径"""
return super(SessionPage, self).download_path
def set_download_tool(self, use_browser=False):
"""设置下载释是否使用浏览器 \n
:param use_browser: 是否使用浏览器
:return: None
"""
if use_browser:
self._tab_obj.Browser.setDownloadBehavior(behavior='allow')
self._tab_obj.Page.downloadWillBegin = None
else:
self._tab_obj.Page.downloadWillBegin = self._on_download_begin
self._tab_obj.Browser.downloadWillBegin = self._on_download_begin
self._tab_obj.Browser.setDownloadBehavior(behavior='deny')
def set_download_path(self, path):
"""设置默认下载路径
:param path: 下载路径
:return: None
"""
if self._has_driver:
super(SessionPage, self).set_download_path(path)
else:
super().set_download_path(path)
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url \n
:param url: 目标url
@ -450,3 +479,10 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._tab_obj.stop()
self._tab_obj = None
self._has_driver = None
def _on_download_begin(self, **kwargs):
gid = kwargs['guid']
self._tab_obj.Browser.cancelDownload(guid=gid)
url = kwargs['url']
name = kwargs['suggestedFilename']
self.download(url, goal_path=self.download_path, rename=name)

View File

@ -3,6 +3,7 @@
@Author : g1879
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Union, Tuple, List, Any
from DownloadKit import DownloadKit
@ -80,6 +81,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@timeout.setter
def timeout(self, second: float) -> None: ...
@property
def download_path(self) -> str: ...
def set_download_tool(self, use_browser:bool=False) -> None: ...
def get(self,
url: str,
show_errmsg: bool = False,
@ -167,3 +173,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def _set_session_options(self, Session_or_Options: Union[Session, SessionOptions]) -> None: ...
def quit(self) -> None: ...
def _on_download_begin(self, **kwargs): ...
class DownloadSetter(object):
def __init__(self, page: ChromiumPage):
self._page: ChromiumPage = ...
def deny(self) -> None: ...
def save_path(self, path: Union[str, Path] = '') -> None: ...

View File

@ -3,6 +3,6 @@ requests
tldextract
lxml
cssselect
DownloadKit
FlowViewer
DownloadKit>=0.4.1
FlowViewer>=0.2.1
websocket-client