diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 2e595ab..25f03e7 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -8,7 +8,6 @@ from platform import system from threading import Thread from time import perf_counter, sleep -from DownloadKit import DownloadKit from requests import Session from .chromium_base import ChromiumBase, Timeout @@ -30,10 +29,8 @@ class ChromiumPage(ChromiumBase): :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间 """ - super().__init__(addr_driver_opts, tab_id, timeout) - self._session = None self._download_set = None - self._download_kit = None + super().__init__(addr_driver_opts, tab_id, timeout) def _connect_browser(self, addr_driver_opts=None, tab_id=None): """连接浏览器,在第一次时运行 @@ -99,7 +96,7 @@ class ChromiumPage(ChromiumBase): self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close self._main_tab = self.tab_id - self.download_set.use_DownloadKit() + self.download_set.by_DownloadKit() @property def tabs_count(self): @@ -147,10 +144,7 @@ class ChromiumPage(ChromiumBase): @property def download(self): """返回下载器对象""" - self.cookies_to_session() - if self._download_kit is None: - self._download_kit = DownloadKit(session=self._session, goal_path=self.download_path) - return self._download_kit + return self.download_set._switched_DownloadKit def get_tab(self, tab_id=None): """获取一个标签页对象 @@ -286,6 +280,13 @@ class ChromiumPage(ChromiumBase): if read_doc and self.ready_state == 'complete': self._get_document() + def wait_download_begin(self, timeout=None): + """等待浏览器下载开始 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :return: 是否等到下载开始 + """ + return self.download_set.wait_download_begin(timeout) + def close_tabs(self, tab_ids=None, others=False): """关闭传入的标签页,默认关闭当前页。可传入多个 :param tab_ids: 要关闭的标签页id,可传入id组成的列表或元组,为None时关闭当前页 @@ -357,15 +358,6 @@ class ChromiumPage(ChromiumBase): """显示浏览器窗口,只在Windows系统可用""" show_or_hide_browser(self, hide=False) - def cookies_to_session(self): - """把driver对象的cookies复制到session对象""" - if self._session is None: - self._session = Session() - selenium_user_agent = self._tab_obj.Runtime.evaluate(expression='navigator.userAgent;')['result']['value'] - self._session.headers.update({"User-Agent": selenium_user_agent}) - - set_session_cookies(self._session, self.get_cookies(as_dict=True)) - def quit(self): """关闭浏览器""" self._tab_obj.Browser.close() @@ -399,6 +391,22 @@ class ChromiumDownloadSetter(DownloadSetter): super().__init__(page) self._behavior = 'allow' self._download_th = None + self._session = None + self._waiting_download = False + self._download_begin = False + + @property + def session(self): + """返回用于DownloadKit的Session对象""" + if self._session is None: + self._session = Session() + return self._session + + @property + def _switched_DownloadKit(self): + """返回从浏览器同步cookies后的Session对象""" + self._cookies_to_session() + return self.DownloadKit def save_path(self, path): """设置下载路径 @@ -415,23 +423,46 @@ class ChromiumDownloadSetter(DownloadSetter): except: self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path, not_change=True) - if self._page._download_kit is not None: - self._page.download.goal_path = path + self.DownloadKit.goal_path = path - def use_browser(self): + def by_browser(self): """设置使用浏览器下载文件""" - self._page.driver.Page.downloadWillBegin = None - self._page.driver.Browser.downloadWillBegin = None + self._page.driver.Page.downloadWillBegin = self._download_by_browser + self._page.driver.Browser.downloadWillBegin = self._download_by_browser self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) self._behavior = 'allow' - def use_DownloadKit(self): + def by_DownloadKit(self): """设置使用DownloadKit下载文件""" self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit self._page.driver.Browser.downloadWillBegin = self._download_by_DownloadKit self._page.driver.Browser.setDownloadBehavior(behavior='deny') self._behavior = 'deny' + def wait_download_begin(self, timeout=None): + """等待浏览器下载开始 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :return: 是否等到下载开始 + """ + self._waiting_download = True + result = False + timeout = timeout if timeout is not None else self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._download_begin: + result = True + break + sleep(.05) + self._download_begin = False + self._waiting_download = False + return result + + def _cookies_to_session(self): + """把driver对象的cookies复制到session对象""" + ua = self._page.driver.Runtime.evaluate(expression='navigator.userAgent;')['result']['value'] + self.session.headers.update({"User-Agent": ua}) + set_session_cookies(self.session, self._page.get_cookies(as_dict=True)) + def _download_by_DownloadKit(self, **kwargs): """拦截浏览器下载并用downloadKit下载""" self._page.run_cdp('Browser.cancelDownload', guid=kwargs['guid'], not_change=True) @@ -440,6 +471,13 @@ class ChromiumDownloadSetter(DownloadSetter): if self._download_th is None or not self._download_th.is_alive(): self._download_th = Thread(target=self._wait_download_complete, daemon=False) self._download_th.start() + if self._waiting_download: + self._download_begin = True + + def _download_by_browser(self, **kwargs): + """使用浏览器下载时调用""" + if self._waiting_download: + self._download_begin = True def _wait_download_complete(self): """等待下载完成""" diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 7c55366..a0bbaca 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -31,9 +31,7 @@ class ChromiumPage(ChromiumBase): self._main_tab: str = ... self._alert: Alert = ... self._download_path: str = ... - self._session: Session = ... self._download_set: ChromiumDownloadSetter = ... - self._download_kit: DownloadKit = ... def _connect_browser(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, @@ -87,6 +85,8 @@ class ChromiumPage(ChromiumBase): def _to_tab(self, tab_id: str = None, activate: bool = True, read_doc: bool = True) -> None: ... + def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ... + def close_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None, others: bool = False) -> None: ... def close_other_tabs(self, tab_ids: Union[str, List[str], Tuple[str]] = None) -> None: ... @@ -97,8 +97,6 @@ class ChromiumPage(ChromiumBase): def show_browser(self) -> None: ... - def cookies_to_session(self) -> None: ... - def quit(self) -> None: ... def _on_alert_close(self, **kwargs): ... @@ -111,17 +109,32 @@ class ChromiumDownloadSetter(DownloadSetter): self._page: ChromiumPage = ... self._behavior: str = ... self._download_th: Thread = ... + self._session: Session = None + self._waiting_download: bool = ... + self._download_begin: bool = ... - def _wait_download_complete(self) -> None: ... + @property + def session(self) -> Session: ... + + @property + def _switched_DownloadKit(self) -> DownloadKit: ... def save_path(self, path: Union[str, Path]) -> None: ... - def use_browser(self) -> None: ... + def by_browser(self) -> None: ... - def use_DownloadKit(self) -> None: ... + def by_DownloadKit(self) -> None: ... + + def wait_download_begin(self, timeout: Union[int, float] = None) -> bool: ... + + def _cookies_to_session(self) -> None: ... def _download_by_DownloadKit(self, **kwargs) -> None: ... + def _download_by_browser(self, **kwargs) -> None: ... + + def _wait_download_complete(self) -> None: ... + class Alert(object): diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index a1fab0c..04dc91e 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -27,7 +27,6 @@ class SessionPage(BasePage): :param timeout: 连接超时时间,为None时从ini文件读取 """ self._response = None - self._download_kit = None self._download_set = None self._create_session(session_or_options) timeout = timeout if timeout is not None else self.timeout @@ -131,6 +130,21 @@ class SessionPage(BasePage): self._download_set = DownloadSetter(self) return self._download_set + @property + def download(self): + """返回下载器对象""" + return self.download_set.DownloadKit + + @property + def session(self): + """返回session对象""" + return self._session + + @property + def response(self): + """返回访问url得到的response对象""" + return self._response + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """用get方式跳转到url :param url: 目标url @@ -203,24 +217,6 @@ class SessionPage(BasePage): else: return [cookie_to_dict(cookie) for cookie in cookies] - # ----------------session独有属性和方法----------------------- - @property - def session(self): - """返回session对象""" - return self._session - - @property - def response(self): - """返回访问url得到的response对象""" - return self._response - - @property - def download(self): - """返回下载器对象""" - if self._download_kit is None: - self._download_kit = DownloadKit(session=self, goal_path=self.download_path) - return self._download_kit - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): """用post方式跳转到url :param url: 目标url @@ -334,6 +330,13 @@ class DownloadSetter(object): def __init__(self, page): self._page = page + self._DownloadKit = None + + @property + def DownloadKit(self): + if self._DownloadKit is None: + self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path) + return self._DownloadKit @property def if_file_exists(self): @@ -345,7 +348,7 @@ class DownloadSetter(object): :param on_off: 是否启用多线程下载大文件 :return: None """ - self._page.download.split = on_off + self.DownloadKit.split = on_off def save_path(self, path): """设置下载保存路径 @@ -354,8 +357,7 @@ class DownloadSetter(object): """ path = path if path is None else str(path) self._page._download_path = path - if self._page._download_kit is not None: - self._page.download.goal_path = path + self.DownloadKit.goal_path = path class FileExists(object): @@ -367,17 +369,22 @@ class FileExists(object): """ self._setter = setter + def __call__(self, mode): + if mode not in ('skip', 'rename', 'overwrite'): + raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") + self._setter.DownloadKit.file_exists = mode + def skip(self): """设为跳过""" - self._setter._page.download.file_exists = 'skip' + self._setter.DownloadKit.file_exists = 'skip' def rename(self): """设为重命名,文件名后加序号""" - self._setter._page.download._file_exists = 'rename' + self._setter.DownloadKit._file_exists = 'rename' def overwrite(self): """设为覆盖""" - self._setter._page.download._file_exists = 'overwrite' + self._setter.DownloadKit._file_exists = 'overwrite' def check_headers(kwargs, headers, arg) -> bool: diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index ee0a9bf..4c4f287 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -27,7 +27,6 @@ class SessionPage(BasePage): self._url: str = ... self._response: Response = ... self._download_path: str = ... - self._download_kit: DownloadKit = ... self._download_set: DownloadSetter = ... self._url_available: bool = ... self.timeout: float = ... @@ -159,6 +158,10 @@ class SessionPage(BasePage): class DownloadSetter(object): def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]): self._page: SessionPage = ... + self._DownloadKit: DownloadKit = ... + + @property + def DownloadKit(self) -> DownloadKit: ... @property def if_file_exists(self) -> FileExists: ... @@ -172,11 +175,13 @@ class FileExists(object): def __init__(self, setter: DownloadSetter): self._setter: DownloadSetter = ... - def skip(self): ... + def __call__(self, mode: str) -> None: ... - def rename(self): ... + def skip(self) -> None: ... - def overwrite(self): ... + def rename(self) -> None: ... + + def overwrite(self) -> None: ... def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index e4fbd40..4303718 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -46,7 +46,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._session_options = None self._setting_tab_id = tab_id self._response = None - self._download_kit = None self._download_set = None self._set_both_options(driver_or_options, session_or_options) @@ -132,7 +131,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 's': return super().__call__(loc_or_str) - # -----------------共有属性和方法------------------- @property def url(self): """返回当前url""" @@ -237,6 +235,11 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._download_set = WebPageDownloadSetter(self) return self._download_set + @property + def download(self): + """返回下载器对象""" + return self.download_set._switched_DownloadKit + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url :param url: 目标url @@ -254,6 +257,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage): timeout = self.timeouts.page_load if self._has_driver else self.timeout return super().get(url, show_errmsg, retry, interval, timeout, **kwargs) + def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + """用post方式跳转到url,会切换到s模式 + :param url: 目标url + :param data: post方式时提交的数据 + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + self.change_mode('s', go=False) + return super().post(url, data, show_errmsg, retry, interval, **kwargs) + def ele(self, loc_or_ele, timeout=None): """返回第一个符合条件的元素、属性或节点文本 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 @@ -437,27 +453,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._response = None self._has_session = None - # ----------------重写SessionPage的函数----------------------- - def post(self, url: str, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url,会切换到s模式 - :param url: 目标url - :param data: post方式时提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - self.change_mode('s', go=False) - return super().post(url, data, show_errmsg, retry, interval, **kwargs) - - @property - def download(self): - """返回下载器对象""" - if self.mode == 'd': - self.cookies_to_session() - return super().download - def _ele(self, loc_or_ele, timeout=None, single=True, relative=False): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 @@ -487,6 +482,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage): class WebPageDownloadSetter(ChromiumDownloadSetter): """用于设置下载参数的类""" + def __init__(self, page): + super().__init__(page) + self._session = page.session + + @property + def _switched_DownloadKit(self): + """返回从浏览器同步cookies后的Session对象""" + if self._page.mode == 'd': + self._cookies_to_session() + return self.DownloadKit + def save_path(self, path): """设置下载路径 :param path: 下载路径 @@ -497,9 +503,7 @@ class WebPageDownloadSetter(ChromiumDownloadSetter): path.mkdir(parents=True, exist_ok=True) path = str(path) self._page._download_path = path - - if self._page._download_kit is not None: - self._page.download.goal_path = path + self.DownloadKit.goal_path = path if self._page._has_driver: try: @@ -509,16 +513,16 @@ class WebPageDownloadSetter(ChromiumDownloadSetter): self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path, not_change=True) - def use_browser(self): + def by_browser(self): """设置使用浏览器下载文件""" if not self._page._has_driver: raise RuntimeError('浏览器未连接。') - self._page.driver.Page.downloadWillBegin = None - self._page.driver.Browser.downloadWillBegin = None + self._page.driver.Page.downloadWillBegin = self._download_by_browser + self._page.driver.Browser.downloadWillBegin = self._download_by_browser self._page.driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) self._behavior = 'allow' - def use_DownloadKit(self): + def by_DownloadKit(self): """设置使用DownloadKit下载文件""" if self._page._has_driver: self._page.driver.Page.downloadWillBegin = self._download_by_DownloadKit diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index d75e35c..57bd343 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -35,7 +35,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._session_options: Union[SessionOptions, None] = ... self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ... self._setting_tab_id: str = ... - self._download_kit: DownloadKit = ... self._download_set: WebPageDownloadSetter = ... self._download_path: str = ... @@ -189,11 +188,15 @@ class WebPageDownloadSetter(ChromiumDownloadSetter): def __init__(self, page: WebPage): self._page: WebPage = ... self._behavior: str = ... + self._session: Session = None + + @property + def _switched_DownloadKit(self) -> DownloadKit: ... def save_path(self, path) -> None: ... - def use_browser(self) -> None: ... + def by_browser(self) -> None: ... - def use_DownloadKit(self) -> None: ... + def by_DownloadKit(self) -> None: ... def _download_by_DownloadKit(self, **kwargs) -> None: ... diff --git a/requirements.txt b/requirements.txt index ab49a53..36f1173 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ requests tldextract lxml cssselect -DownloadKit>=0.4.3 +DownloadKit>=0.4.4 FlowViewer>=0.2.1 websocket-client \ No newline at end of file diff --git a/setup.py b/setup.py index f46539a..72f5947 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( "lxml", "tldextract", "requests", - "DownloadKit>=0.4.3", + "DownloadKit>=0.4.4", "FlowViewer", "websocket-client" ],