页面类_download_kit属性移动到download_set中;增加wait_download_begin()方法

This commit is contained in:
g1879 2023-01-24 00:17:14 +08:00
parent 69801b4c3a
commit 77e35e64c6
8 changed files with 165 additions and 95 deletions

View File

@ -8,7 +8,6 @@ from platform import system
from threading import Thread
from time import perf_counter, sleep
from DownloadKit import DownloadKit
from requests import Session
from .chromium_base import ChromiumBase, Timeout
@ -30,10 +29,8 @@ class ChromiumPage(ChromiumBase):
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
super().__init__(addr_driver_opts, tab_id, timeout)
self._session = None
self._download_set = None
self._download_kit = None
super().__init__(addr_driver_opts, tab_id, timeout)
def _connect_browser(self, addr_driver_opts=None, tab_id=None):
"""连接浏览器,在第一次时运行
@ -99,7 +96,7 @@ class ChromiumPage(ChromiumBase):
self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open
self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close
self._main_tab = self.tab_id
self.download_set.use_DownloadKit()
self.download_set.by_DownloadKit()
@property
def tabs_count(self):
@ -147,10 +144,7 @@ class ChromiumPage(ChromiumBase):
@property
def download(self):
"""返回下载器对象"""
self.cookies_to_session()
if self._download_kit is None:
self._download_kit = DownloadKit(session=self._session, goal_path=self.download_path)
return self._download_kit
return self.download_set._switched_DownloadKit
def get_tab(self, tab_id=None):
"""获取一个标签页对象
@ -286,6 +280,13 @@ class ChromiumPage(ChromiumBase):
if read_doc and self.ready_state == 'complete':
self._get_document()
def wait_download_begin(self, timeout=None):
"""等待浏览器下载开始
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:return: 是否等到下载开始
"""
return self.download_set.wait_download_begin(timeout)
def close_tabs(self, tab_ids=None, others=False):
"""关闭传入的标签页,默认关闭当前页。可传入多个
:param tab_ids: 要关闭的标签页id可传入id组成的列表或元组为None时关闭当前页
@ -357,15 +358,6 @@ class ChromiumPage(ChromiumBase):
"""显示浏览器窗口只在Windows系统可用"""
show_or_hide_browser(self, hide=False)
def cookies_to_session(self):
"""把driver对象的cookies复制到session对象"""
if self._session is None:
self._session = Session()
selenium_user_agent = self._tab_obj.Runtime.evaluate(expression='navigator.userAgent;')['result']['value']
self._session.headers.update({"User-Agent": selenium_user_agent})
set_session_cookies(self._session, self.get_cookies(as_dict=True))
def quit(self):
"""关闭浏览器"""
self._tab_obj.Browser.close()
@ -399,6 +391,22 @@ class ChromiumDownloadSetter(DownloadSetter):
super().__init__(page)
self._behavior = 'allow'
self._download_th = None
self._session = None
self._waiting_download = False
self._download_begin = False
@property
def session(self):
"""返回用于DownloadKit的Session对象"""
if self._session is None:
self._session = Session()
return self._session
@property
def _switched_DownloadKit(self):
"""返回从浏览器同步cookies后的Session对象"""
self._cookies_to_session()
return self.DownloadKit
def save_path(self, path):
"""设置下载路径
@ -415,23 +423,46 @@ class ChromiumDownloadSetter(DownloadSetter):
except:
self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True)
if self._page._download_kit is not None:
self._page.download.goal_path = path
self.DownloadKit.goal_path = path
def use_browser(self):
def by_browser(self):
"""设置使用浏览器下载文件"""
self._page.driver.Page.downloadWillBegin = None
self._page.driver.Browser.downloadWillBegin = None
self._page.driver.Page.downloadWillBegin = self._download_by_browser
self._page.driver.Browser.downloadWillBegin = self._download_by_browser
self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
self._behavior = 'allow'
def use_DownloadKit(self):
def by_DownloadKit(self):
"""设置使用DownloadKit下载文件"""
self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit
self._page.driver.Browser.downloadWillBegin = self._download_by_DownloadKit
self._page.driver.Browser.setDownloadBehavior(behavior='deny')
self._behavior = 'deny'
def wait_download_begin(self, timeout=None):
"""等待浏览器下载开始
:param timeout: 等待超时时间为None则使用页面对象timeout属性
:return: 是否等到下载开始
"""
self._waiting_download = True
result = False
timeout = timeout if timeout is not None else self._page.timeout
end_time = perf_counter() + timeout
while perf_counter() < end_time:
if self._download_begin:
result = True
break
sleep(.05)
self._download_begin = False
self._waiting_download = False
return result
def _cookies_to_session(self):
"""把driver对象的cookies复制到session对象"""
ua = self._page.driver.Runtime.evaluate(expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": ua})
set_session_cookies(self.session, self._page.get_cookies(as_dict=True))
def _download_by_DownloadKit(self, **kwargs):
"""拦截浏览器下载并用downloadKit下载"""
self._page.run_cdp('Browser.cancelDownload', guid=kwargs['guid'], not_change=True)
@ -440,6 +471,13 @@ class ChromiumDownloadSetter(DownloadSetter):
if self._download_th is None or not self._download_th.is_alive():
self._download_th = Thread(target=self._wait_download_complete, daemon=False)
self._download_th.start()
if self._waiting_download:
self._download_begin = True
def _download_by_browser(self, **kwargs):
"""使用浏览器下载时调用"""
if self._waiting_download:
self._download_begin = True
def _wait_download_complete(self):
"""等待下载完成"""

View File

@ -31,9 +31,7 @@ class ChromiumPage(ChromiumBase):
self._main_tab: str = ...
self._alert: Alert = ...
self._download_path: str = ...
self._session: Session = ...
self._download_set: ChromiumDownloadSetter = ...
self._download_kit: DownloadKit = ...
def _connect_browser(self,
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None,
@ -87,6 +85,8 @@ class ChromiumPage(ChromiumBase):
def _to_tab(self, tab_id: str = None, activate: bool = True, read_doc: bool = True) -> None: ...
def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ...
def close_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None, others: bool = False) -> None: ...
def close_other_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None) -> None: ...
@ -97,8 +97,6 @@ class ChromiumPage(ChromiumBase):
def show_browser(self) -> None: ...
def cookies_to_session(self) -> None: ...
def quit(self) -> None: ...
def _on_alert_close(self, **kwargs): ...
@ -111,17 +109,32 @@ class ChromiumDownloadSetter(DownloadSetter):
self._page: ChromiumPage = ...
self._behavior: str = ...
self._download_th: Thread = ...
self._session: Session = None
self._waiting_download: bool = ...
self._download_begin: bool = ...
def _wait_download_complete(self) -> None: ...
@property
def session(self) -> Session: ...
@property
def _switched_DownloadKit(self) -> DownloadKit: ...
def save_path(self, path: Union[str, Path]) -> None: ...
def use_browser(self) -> None: ...
def by_browser(self) -> None: ...
def use_DownloadKit(self) -> None: ...
def by_DownloadKit(self) -> None: ...
def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ...
def _cookies_to_session(self) -> None: ...
def _download_by_DownloadKit(self, **kwargs) -> None: ...
def _download_by_browser(self, **kwargs) -> None: ...
def _wait_download_complete(self) -> None: ...
class Alert(object):

View File

@ -27,7 +27,6 @@ class SessionPage(BasePage):
:param timeout: 连接超时时间为None时从ini文件读取
"""
self._response = None
self._download_kit = None
self._download_set = None
self._create_session(session_or_options)
timeout = timeout if timeout is not None else self.timeout
@ -131,6 +130,21 @@ class SessionPage(BasePage):
self._download_set = DownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set.DownloadKit
@property
def session(self):
"""返回session对象"""
return self._session
@property
def response(self):
"""返回访问url得到的response对象"""
return self._response
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""用get方式跳转到url
:param url: 目标url
@ -203,24 +217,6 @@ class SessionPage(BasePage):
else:
return [cookie_to_dict(cookie) for cookie in cookies]
# ----------------session独有属性和方法-----------------------
@property
def session(self):
"""返回session对象"""
return self._session
@property
def response(self):
"""返回访问url得到的response对象"""
return self._response
@property
def download(self):
"""返回下载器对象"""
if self._download_kit is None:
self._download_kit = DownloadKit(session=self, goal_path=self.download_path)
return self._download_kit
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url
:param url: 目标url
@ -334,6 +330,13 @@ class DownloadSetter(object):
def __init__(self, page):
self._page = page
self._DownloadKit = None
@property
def DownloadKit(self):
if self._DownloadKit is None:
self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path)
return self._DownloadKit
@property
def if_file_exists(self):
@ -345,7 +348,7 @@ class DownloadSetter(object):
:param on_off: 是否启用多线程下载大文件
:return: None
"""
self._page.download.split = on_off
self.DownloadKit.split = on_off
def save_path(self, path):
"""设置下载保存路径
@ -354,8 +357,7 @@ class DownloadSetter(object):
"""
path = path if path is None else str(path)
self._page._download_path = path
if self._page._download_kit is not None:
self._page.download.goal_path = path
self.DownloadKit.goal_path = path
class FileExists(object):
@ -367,17 +369,22 @@ class FileExists(object):
"""
self._setter = setter
def __call__(self, mode):
if mode not in ('skip', 'rename', 'overwrite'):
raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'")
self._setter.DownloadKit.file_exists = mode
def skip(self):
"""设为跳过"""
self._setter._page.download.file_exists = 'skip'
self._setter.DownloadKit.file_exists = 'skip'
def rename(self):
"""设为重命名,文件名后加序号"""
self._setter._page.download._file_exists = 'rename'
self._setter.DownloadKit._file_exists = 'rename'
def overwrite(self):
"""设为覆盖"""
self._setter._page.download._file_exists = 'overwrite'
self._setter.DownloadKit._file_exists = 'overwrite'
def check_headers(kwargs, headers, arg) -> bool:

View File

@ -27,7 +27,6 @@ class SessionPage(BasePage):
self._url: str = ...
self._response: Response = ...
self._download_path: str = ...
self._download_kit: DownloadKit = ...
self._download_set: DownloadSetter = ...
self._url_available: bool = ...
self.timeout: float = ...
@ -159,6 +158,10 @@ class SessionPage(BasePage):
class DownloadSetter(object):
def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]):
self._page: SessionPage = ...
self._DownloadKit: DownloadKit = ...
@property
def DownloadKit(self) -> DownloadKit: ...
@property
def if_file_exists(self) -> FileExists: ...
@ -172,11 +175,13 @@ class FileExists(object):
def __init__(self, setter: DownloadSetter):
self._setter: DownloadSetter = ...
def skip(self): ...
def __call__(self, mode: str) -> None: ...
def rename(self): ...
def skip(self) -> None: ...
def overwrite(self): ...
def rename(self) -> None: ...
def overwrite(self) -> None: ...
def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict],

View File

@ -46,7 +46,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._session_options = None
self._setting_tab_id = tab_id
self._response = None
self._download_kit = None
self._download_set = None
self._set_both_options(driver_or_options, session_or_options)
@ -132,7 +131,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 's':
return super().__call__(loc_or_str)
# -----------------共有属性和方法-------------------
@property
def url(self):
"""返回当前url"""
@ -237,6 +235,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._download_set = WebPageDownloadSetter(self)
return self._download_set
@property
def download(self):
"""返回下载器对象"""
return self.download_set._switched_DownloadKit
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
"""跳转到一个url
:param url: 目标url
@ -254,6 +257,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
timeout = self.timeouts.page_load if self._has_driver else self.timeout
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param data: post方式时提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param kwargs: 连接参数
:return: url是否可用
"""
self.change_mode('s', go=False)
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
def ele(self, loc_or_ele, timeout=None):
"""返回第一个符合条件的元素、属性或节点文本
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
@ -437,27 +453,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._response = None
self._has_session = None
# ----------------重写SessionPage的函数-----------------------
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url会切换到s模式
:param url: 目标url
:param data: post方式时提交的数据
:param show_errmsg: 是否显示和抛出异常
:param retry: 重试次数
:param interval: 重试间隔
:param kwargs: 连接参数
:return: url是否可用
"""
self.change_mode('s', go=False)
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
@property
def download(self):
"""返回下载器对象"""
if self.mode == 'd':
self.cookies_to_session()
return super().download
def _ele(self, loc_or_ele, timeout=None, single=True, relative=False):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
@ -487,6 +482,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
class WebPageDownloadSetter(ChromiumDownloadSetter):
"""用于设置下载参数的类"""
def __init__(self, page):
super().__init__(page)
self._session = page.session
@property
def _switched_DownloadKit(self):
"""返回从浏览器同步cookies后的Session对象"""
if self._page.mode == 'd':
self._cookies_to_session()
return self.DownloadKit
def save_path(self, path):
"""设置下载路径
:param path: 下载路径
@ -497,9 +503,7 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
path.mkdir(parents=True, exist_ok=True)
path = str(path)
self._page._download_path = path
if self._page._download_kit is not None:
self._page.download.goal_path = path
self.DownloadKit.goal_path = path
if self._page._has_driver:
try:
@ -509,16 +513,16 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path,
not_change=True)
def use_browser(self):
def by_browser(self):
"""设置使用浏览器下载文件"""
if not self._page._has_driver:
raise RuntimeError('浏览器未连接。')
self._page.driver.Page.downloadWillBegin = None
self._page.driver.Browser.downloadWillBegin = None
self._page.driver.Page.downloadWillBegin = self._download_by_browser
self._page.driver.Browser.downloadWillBegin = self._download_by_browser
self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
self._behavior = 'allow'
def use_DownloadKit(self):
def by_DownloadKit(self):
"""设置使用DownloadKit下载文件"""
if self._page._has_driver:
self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit

View File

@ -35,7 +35,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._session_options: Union[SessionOptions, None] = ...
self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ...
self._setting_tab_id: str = ...
self._download_kit: DownloadKit = ...
self._download_set: WebPageDownloadSetter = ...
self._download_path: str = ...
@ -189,11 +188,15 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
def __init__(self, page: WebPage):
self._page: WebPage = ...
self._behavior: str = ...
self._session: Session = None
@property
def _switched_DownloadKit(self) -> DownloadKit: ...
def save_path(self, path) -> None: ...
def use_browser(self) -> None: ...
def by_browser(self) -> None: ...
def use_DownloadKit(self) -> None: ...
def by_DownloadKit(self) -> None: ...
def _download_by_DownloadKit(self, **kwargs) -> None: ...

View File

@ -3,6 +3,6 @@ requests
tldextract
lxml
cssselect
DownloadKit>=0.4.3
DownloadKit>=0.4.4
FlowViewer>=0.2.1
websocket-client

View File

@ -22,7 +22,7 @@ setup(
"lxml",
"tldextract",
"requests",
"DownloadKit>=0.4.3",
"DownloadKit>=0.4.4",
"FlowViewer",
"websocket-client"
],