mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
页面类_download_kit属性移动到download_set中;增加wait_download_begin()方法
This commit is contained in:
parent
69801b4c3a
commit
77e35e64c6
@ -8,7 +8,6 @@ from platform import system
|
||||
from threading import Thread
|
||||
from time import perf_counter, sleep
|
||||
|
||||
from DownloadKit import DownloadKit
|
||||
from requests import Session
|
||||
|
||||
from .chromium_base import ChromiumBase, Timeout
|
||||
@ -30,10 +29,8 @@ class ChromiumPage(ChromiumBase):
|
||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||
:param timeout: 超时时间
|
||||
"""
|
||||
super().__init__(addr_driver_opts, tab_id, timeout)
|
||||
self._session = None
|
||||
self._download_set = None
|
||||
self._download_kit = None
|
||||
super().__init__(addr_driver_opts, tab_id, timeout)
|
||||
|
||||
def _connect_browser(self, addr_driver_opts=None, tab_id=None):
|
||||
"""连接浏览器,在第一次时运行
|
||||
@ -99,7 +96,7 @@ class ChromiumPage(ChromiumBase):
|
||||
self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open
|
||||
self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close
|
||||
self._main_tab = self.tab_id
|
||||
self.download_set.use_DownloadKit()
|
||||
self.download_set.by_DownloadKit()
|
||||
|
||||
@property
|
||||
def tabs_count(self):
|
||||
@ -147,10 +144,7 @@ class ChromiumPage(ChromiumBase):
|
||||
@property
|
||||
def download(self):
|
||||
"""返回下载器对象"""
|
||||
self.cookies_to_session()
|
||||
if self._download_kit is None:
|
||||
self._download_kit = DownloadKit(session=self._session, goal_path=self.download_path)
|
||||
return self._download_kit
|
||||
return self.download_set._switched_DownloadKit
|
||||
|
||||
def get_tab(self, tab_id=None):
|
||||
"""获取一个标签页对象
|
||||
@ -286,6 +280,13 @@ class ChromiumPage(ChromiumBase):
|
||||
if read_doc and self.ready_state == 'complete':
|
||||
self._get_document()
|
||||
|
||||
def wait_download_begin(self, timeout=None):
|
||||
"""等待浏览器下载开始
|
||||
:param timeout: 等待超时时间,为None则使用页面对象timeout属性
|
||||
:return: 是否等到下载开始
|
||||
"""
|
||||
return self.download_set.wait_download_begin(timeout)
|
||||
|
||||
def close_tabs(self, tab_ids=None, others=False):
|
||||
"""关闭传入的标签页,默认关闭当前页。可传入多个
|
||||
:param tab_ids: 要关闭的标签页id,可传入id组成的列表或元组,为None时关闭当前页
|
||||
@ -357,15 +358,6 @@ class ChromiumPage(ChromiumBase):
|
||||
"""显示浏览器窗口,只在Windows系统可用"""
|
||||
show_or_hide_browser(self, hide=False)
|
||||
|
||||
def cookies_to_session(self):
|
||||
"""把driver对象的cookies复制到session对象"""
|
||||
if self._session is None:
|
||||
self._session = Session()
|
||||
selenium_user_agent = self._tab_obj.Runtime.evaluate(expression='navigator.userAgent;')['result']['value']
|
||||
self._session.headers.update({"User-Agent": selenium_user_agent})
|
||||
|
||||
set_session_cookies(self._session, self.get_cookies(as_dict=True))
|
||||
|
||||
def quit(self):
|
||||
"""关闭浏览器"""
|
||||
self._tab_obj.Browser.close()
|
||||
@ -399,6 +391,22 @@ class ChromiumDownloadSetter(DownloadSetter):
|
||||
super().__init__(page)
|
||||
self._behavior = 'allow'
|
||||
self._download_th = None
|
||||
self._session = None
|
||||
self._waiting_download = False
|
||||
self._download_begin = False
|
||||
|
||||
@property
|
||||
def session(self):
|
||||
"""返回用于DownloadKit的Session对象"""
|
||||
if self._session is None:
|
||||
self._session = Session()
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def _switched_DownloadKit(self):
|
||||
"""返回从浏览器同步cookies后的Session对象"""
|
||||
self._cookies_to_session()
|
||||
return self.DownloadKit
|
||||
|
||||
def save_path(self, path):
|
||||
"""设置下载路径
|
||||
@ -415,23 +423,46 @@ class ChromiumDownloadSetter(DownloadSetter):
|
||||
except:
|
||||
self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True)
|
||||
|
||||
if self._page._download_kit is not None:
|
||||
self._page.download.goal_path = path
|
||||
self.DownloadKit.goal_path = path
|
||||
|
||||
def use_browser(self):
|
||||
def by_browser(self):
|
||||
"""设置使用浏览器下载文件"""
|
||||
self._page.driver.Page.downloadWillBegin = None
|
||||
self._page.driver.Browser.downloadWillBegin = None
|
||||
self._page.driver.Page.downloadWillBegin = self._download_by_browser
|
||||
self._page.driver.Browser.downloadWillBegin = self._download_by_browser
|
||||
self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
|
||||
self._behavior = 'allow'
|
||||
|
||||
def use_DownloadKit(self):
|
||||
def by_DownloadKit(self):
|
||||
"""设置使用DownloadKit下载文件"""
|
||||
self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit
|
||||
self._page.driver.Browser.downloadWillBegin = self._download_by_DownloadKit
|
||||
self._page.driver.Browser.setDownloadBehavior(behavior='deny')
|
||||
self._behavior = 'deny'
|
||||
|
||||
def wait_download_begin(self, timeout=None):
|
||||
"""等待浏览器下载开始
|
||||
:param timeout: 等待超时时间,为None则使用页面对象timeout属性
|
||||
:return: 是否等到下载开始
|
||||
"""
|
||||
self._waiting_download = True
|
||||
result = False
|
||||
timeout = timeout if timeout is not None else self._page.timeout
|
||||
end_time = perf_counter() + timeout
|
||||
while perf_counter() < end_time:
|
||||
if self._download_begin:
|
||||
result = True
|
||||
break
|
||||
sleep(.05)
|
||||
self._download_begin = False
|
||||
self._waiting_download = False
|
||||
return result
|
||||
|
||||
def _cookies_to_session(self):
|
||||
"""把driver对象的cookies复制到session对象"""
|
||||
ua = self._page.driver.Runtime.evaluate(expression='navigator.userAgent;')['result']['value']
|
||||
self.session.headers.update({"User-Agent": ua})
|
||||
set_session_cookies(self.session, self._page.get_cookies(as_dict=True))
|
||||
|
||||
def _download_by_DownloadKit(self, **kwargs):
|
||||
"""拦截浏览器下载并用downloadKit下载"""
|
||||
self._page.run_cdp('Browser.cancelDownload', guid=kwargs['guid'], not_change=True)
|
||||
@ -440,6 +471,13 @@ class ChromiumDownloadSetter(DownloadSetter):
|
||||
if self._download_th is None or not self._download_th.is_alive():
|
||||
self._download_th = Thread(target=self._wait_download_complete, daemon=False)
|
||||
self._download_th.start()
|
||||
if self._waiting_download:
|
||||
self._download_begin = True
|
||||
|
||||
def _download_by_browser(self, **kwargs):
|
||||
"""使用浏览器下载时调用"""
|
||||
if self._waiting_download:
|
||||
self._download_begin = True
|
||||
|
||||
def _wait_download_complete(self):
|
||||
"""等待下载完成"""
|
||||
|
@ -31,9 +31,7 @@ class ChromiumPage(ChromiumBase):
|
||||
self._main_tab: str = ...
|
||||
self._alert: Alert = ...
|
||||
self._download_path: str = ...
|
||||
self._session: Session = ...
|
||||
self._download_set: ChromiumDownloadSetter = ...
|
||||
self._download_kit: DownloadKit = ...
|
||||
|
||||
def _connect_browser(self,
|
||||
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None,
|
||||
@ -87,6 +85,8 @@ class ChromiumPage(ChromiumBase):
|
||||
|
||||
def _to_tab(self, tab_id: str = None, activate: bool = True, read_doc: bool = True) -> None: ...
|
||||
|
||||
def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ...
|
||||
|
||||
def close_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None, others: bool = False) -> None: ...
|
||||
|
||||
def close_other_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None) -> None: ...
|
||||
@ -97,8 +97,6 @@ class ChromiumPage(ChromiumBase):
|
||||
|
||||
def show_browser(self) -> None: ...
|
||||
|
||||
def cookies_to_session(self) -> None: ...
|
||||
|
||||
def quit(self) -> None: ...
|
||||
|
||||
def _on_alert_close(self, **kwargs): ...
|
||||
@ -111,17 +109,32 @@ class ChromiumDownloadSetter(DownloadSetter):
|
||||
self._page: ChromiumPage = ...
|
||||
self._behavior: str = ...
|
||||
self._download_th: Thread = ...
|
||||
self._session: Session = None
|
||||
self._waiting_download: bool = ...
|
||||
self._download_begin: bool = ...
|
||||
|
||||
def _wait_download_complete(self) -> None: ...
|
||||
@property
|
||||
def session(self) -> Session: ...
|
||||
|
||||
@property
|
||||
def _switched_DownloadKit(self) -> DownloadKit: ...
|
||||
|
||||
def save_path(self, path: Union[str, Path]) -> None: ...
|
||||
|
||||
def use_browser(self) -> None: ...
|
||||
def by_browser(self) -> None: ...
|
||||
|
||||
def use_DownloadKit(self) -> None: ...
|
||||
def by_DownloadKit(self) -> None: ...
|
||||
|
||||
def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ...
|
||||
|
||||
def _cookies_to_session(self) -> None: ...
|
||||
|
||||
def _download_by_DownloadKit(self, **kwargs) -> None: ...
|
||||
|
||||
def _download_by_browser(self, **kwargs) -> None: ...
|
||||
|
||||
def _wait_download_complete(self) -> None: ...
|
||||
|
||||
|
||||
class Alert(object):
|
||||
|
||||
|
@ -27,7 +27,6 @@ class SessionPage(BasePage):
|
||||
:param timeout: 连接超时时间,为None时从ini文件读取
|
||||
"""
|
||||
self._response = None
|
||||
self._download_kit = None
|
||||
self._download_set = None
|
||||
self._create_session(session_or_options)
|
||||
timeout = timeout if timeout is not None else self.timeout
|
||||
@ -131,6 +130,21 @@ class SessionPage(BasePage):
|
||||
self._download_set = DownloadSetter(self)
|
||||
return self._download_set
|
||||
|
||||
@property
|
||||
def download(self):
|
||||
"""返回下载器对象"""
|
||||
return self.download_set.DownloadKit
|
||||
|
||||
@property
|
||||
def session(self):
|
||||
"""返回session对象"""
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def response(self):
|
||||
"""返回访问url得到的response对象"""
|
||||
return self._response
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""用get方式跳转到url
|
||||
:param url: 目标url
|
||||
@ -203,24 +217,6 @@ class SessionPage(BasePage):
|
||||
else:
|
||||
return [cookie_to_dict(cookie) for cookie in cookies]
|
||||
|
||||
# ----------------session独有属性和方法-----------------------
|
||||
@property
|
||||
def session(self):
|
||||
"""返回session对象"""
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def response(self):
|
||||
"""返回访问url得到的response对象"""
|
||||
return self._response
|
||||
|
||||
@property
|
||||
def download(self):
|
||||
"""返回下载器对象"""
|
||||
if self._download_kit is None:
|
||||
self._download_kit = DownloadKit(session=self, goal_path=self.download_path)
|
||||
return self._download_kit
|
||||
|
||||
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""用post方式跳转到url
|
||||
:param url: 目标url
|
||||
@ -334,6 +330,13 @@ class DownloadSetter(object):
|
||||
|
||||
def __init__(self, page):
|
||||
self._page = page
|
||||
self._DownloadKit = None
|
||||
|
||||
@property
|
||||
def DownloadKit(self):
|
||||
if self._DownloadKit is None:
|
||||
self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path)
|
||||
return self._DownloadKit
|
||||
|
||||
@property
|
||||
def if_file_exists(self):
|
||||
@ -345,7 +348,7 @@ class DownloadSetter(object):
|
||||
:param on_off: 是否启用多线程下载大文件
|
||||
:return: None
|
||||
"""
|
||||
self._page.download.split = on_off
|
||||
self.DownloadKit.split = on_off
|
||||
|
||||
def save_path(self, path):
|
||||
"""设置下载保存路径
|
||||
@ -354,8 +357,7 @@ class DownloadSetter(object):
|
||||
"""
|
||||
path = path if path is None else str(path)
|
||||
self._page._download_path = path
|
||||
if self._page._download_kit is not None:
|
||||
self._page.download.goal_path = path
|
||||
self.DownloadKit.goal_path = path
|
||||
|
||||
|
||||
class FileExists(object):
|
||||
@ -367,17 +369,22 @@ class FileExists(object):
|
||||
"""
|
||||
self._setter = setter
|
||||
|
||||
def __call__(self, mode):
|
||||
if mode not in ('skip', 'rename', 'overwrite'):
|
||||
raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'")
|
||||
self._setter.DownloadKit.file_exists = mode
|
||||
|
||||
def skip(self):
|
||||
"""设为跳过"""
|
||||
self._setter._page.download.file_exists = 'skip'
|
||||
self._setter.DownloadKit.file_exists = 'skip'
|
||||
|
||||
def rename(self):
|
||||
"""设为重命名,文件名后加序号"""
|
||||
self._setter._page.download._file_exists = 'rename'
|
||||
self._setter.DownloadKit._file_exists = 'rename'
|
||||
|
||||
def overwrite(self):
|
||||
"""设为覆盖"""
|
||||
self._setter._page.download._file_exists = 'overwrite'
|
||||
self._setter.DownloadKit._file_exists = 'overwrite'
|
||||
|
||||
|
||||
def check_headers(kwargs, headers, arg) -> bool:
|
||||
|
@ -27,7 +27,6 @@ class SessionPage(BasePage):
|
||||
self._url: str = ...
|
||||
self._response: Response = ...
|
||||
self._download_path: str = ...
|
||||
self._download_kit: DownloadKit = ...
|
||||
self._download_set: DownloadSetter = ...
|
||||
self._url_available: bool = ...
|
||||
self.timeout: float = ...
|
||||
@ -159,6 +158,10 @@ class SessionPage(BasePage):
|
||||
class DownloadSetter(object):
|
||||
def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]):
|
||||
self._page: SessionPage = ...
|
||||
self._DownloadKit: DownloadKit = ...
|
||||
|
||||
@property
|
||||
def DownloadKit(self) -> DownloadKit: ...
|
||||
|
||||
@property
|
||||
def if_file_exists(self) -> FileExists: ...
|
||||
@ -172,11 +175,13 @@ class FileExists(object):
|
||||
def __init__(self, setter: DownloadSetter):
|
||||
self._setter: DownloadSetter = ...
|
||||
|
||||
def skip(self): ...
|
||||
def __call__(self, mode: str) -> None: ...
|
||||
|
||||
def rename(self): ...
|
||||
def skip(self) -> None: ...
|
||||
|
||||
def overwrite(self): ...
|
||||
def rename(self) -> None: ...
|
||||
|
||||
def overwrite(self) -> None: ...
|
||||
|
||||
|
||||
def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict],
|
||||
|
@ -46,7 +46,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
self._session_options = None
|
||||
self._setting_tab_id = tab_id
|
||||
self._response = None
|
||||
self._download_kit = None
|
||||
self._download_set = None
|
||||
|
||||
self._set_both_options(driver_or_options, session_or_options)
|
||||
@ -132,7 +131,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
elif self._mode == 's':
|
||||
return super().__call__(loc_or_str)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
def url(self):
|
||||
"""返回当前url"""
|
||||
@ -237,6 +235,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
self._download_set = WebPageDownloadSetter(self)
|
||||
return self._download_set
|
||||
|
||||
@property
|
||||
def download(self):
|
||||
"""返回下载器对象"""
|
||||
return self.download_set._switched_DownloadKit
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""跳转到一个url
|
||||
:param url: 目标url
|
||||
@ -254,6 +257,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
timeout = self.timeouts.page_load if self._has_driver else self.timeout
|
||||
return super().get(url, show_errmsg, retry, interval, timeout, **kwargs)
|
||||
|
||||
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""用post方式跳转到url,会切换到s模式
|
||||
:param url: 目标url
|
||||
:param data: post方式时提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
self.change_mode('s', go=False)
|
||||
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def ele(self, loc_or_ele, timeout=None):
|
||||
"""返回第一个符合条件的元素、属性或节点文本
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
@ -437,27 +453,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
self._response = None
|
||||
self._has_session = None
|
||||
|
||||
# ----------------重写SessionPage的函数-----------------------
|
||||
def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""用post方式跳转到url,会切换到s模式
|
||||
:param url: 目标url
|
||||
:param data: post方式时提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
self.change_mode('s', go=False)
|
||||
return super().post(url, data, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
@property
|
||||
def download(self):
|
||||
"""返回下载器对象"""
|
||||
if self.mode == 'd':
|
||||
self.cookies_to_session()
|
||||
return super().download
|
||||
|
||||
def _ele(self, loc_or_ele, timeout=None, single=True, relative=False):
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
@ -487,6 +482,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
class WebPageDownloadSetter(ChromiumDownloadSetter):
|
||||
"""用于设置下载参数的类"""
|
||||
|
||||
def __init__(self, page):
|
||||
super().__init__(page)
|
||||
self._session = page.session
|
||||
|
||||
@property
|
||||
def _switched_DownloadKit(self):
|
||||
"""返回从浏览器同步cookies后的Session对象"""
|
||||
if self._page.mode == 'd':
|
||||
self._cookies_to_session()
|
||||
return self.DownloadKit
|
||||
|
||||
def save_path(self, path):
|
||||
"""设置下载路径
|
||||
:param path: 下载路径
|
||||
@ -497,9 +503,7 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
path = str(path)
|
||||
self._page._download_path = path
|
||||
|
||||
if self._page._download_kit is not None:
|
||||
self._page.download.goal_path = path
|
||||
self.DownloadKit.goal_path = path
|
||||
|
||||
if self._page._has_driver:
|
||||
try:
|
||||
@ -509,16 +513,16 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
|
||||
self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path,
|
||||
not_change=True)
|
||||
|
||||
def use_browser(self):
|
||||
def by_browser(self):
|
||||
"""设置使用浏览器下载文件"""
|
||||
if not self._page._has_driver:
|
||||
raise RuntimeError('浏览器未连接。')
|
||||
self._page.driver.Page.downloadWillBegin = None
|
||||
self._page.driver.Browser.downloadWillBegin = None
|
||||
self._page.driver.Page.downloadWillBegin = self._download_by_browser
|
||||
self._page.driver.Browser.downloadWillBegin = self._download_by_browser
|
||||
self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path)
|
||||
self._behavior = 'allow'
|
||||
|
||||
def use_DownloadKit(self):
|
||||
def by_DownloadKit(self):
|
||||
"""设置使用DownloadKit下载文件"""
|
||||
if self._page._has_driver:
|
||||
self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit
|
||||
|
@ -35,7 +35,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
self._session_options: Union[SessionOptions, None] = ...
|
||||
self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ...
|
||||
self._setting_tab_id: str = ...
|
||||
self._download_kit: DownloadKit = ...
|
||||
self._download_set: WebPageDownloadSetter = ...
|
||||
self._download_path: str = ...
|
||||
|
||||
@ -189,11 +188,15 @@ class WebPageDownloadSetter(ChromiumDownloadSetter):
|
||||
def __init__(self, page: WebPage):
|
||||
self._page: WebPage = ...
|
||||
self._behavior: str = ...
|
||||
self._session: Session = None
|
||||
|
||||
@property
|
||||
def _switched_DownloadKit(self) -> DownloadKit: ...
|
||||
|
||||
def save_path(self, path) -> None: ...
|
||||
|
||||
def use_browser(self) -> None: ...
|
||||
def by_browser(self) -> None: ...
|
||||
|
||||
def use_DownloadKit(self) -> None: ...
|
||||
def by_DownloadKit(self) -> None: ...
|
||||
|
||||
def _download_by_DownloadKit(self, **kwargs) -> None: ...
|
||||
|
@ -3,6 +3,6 @@ requests
|
||||
tldextract
|
||||
lxml
|
||||
cssselect
|
||||
DownloadKit>=0.4.3
|
||||
DownloadKit>=0.4.4
|
||||
FlowViewer>=0.2.1
|
||||
websocket-client
|
Loading…
x
Reference in New Issue
Block a user