From 68a5e2e508892c80a35c07a7738e959cf42ced11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=80=81=E5=A4=A7=E5=93=A5?= Date: Thu, 20 Jul 2023 03:13:17 +0000 Subject: [PATCH 01/17] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E9=94=99=E5=88=AB=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 老大哥 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b94e27e..0c1b297 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ python 版本:3.6 及以上 用 requests 做数据采集面对要登录的网站时,要分析数据包、JS 源码,构造复杂的请求,往往还要应付验证码、JS 混淆、签名参数等反爬手段,门槛较高,开发效率不高。 使用浏览器,可以很大程度上绕过这些坑,但浏览器运行效率不高。 -因此,这个库设计初衷,是将它们合而为一,同时实现“写得快”和“跑得快”。能够在不同须要时切换相应模式,并提供一种人性化的使用方法,提高开发和运行效率。 +因此,这个库设计初衷,是将它们合而为一,同时实现“写得快”和“跑得快”。能够在不同需要时切换相应模式,并提供一种人性化的使用方法,提高开发和运行效率。 除了合并两者,本库还以网页为单位封装了常用功能,提供非常简便的操作和语句,使用户可减少考虑细节,专注功能实现。 以简单的方式实现强大的功能,使代码更优雅。 以前的版本是对 selenium 进行重新封装实现的。从 3.0 开始,作者另起炉灶,对底层进行了重新开发,摆脱对 selenium 的依赖,增强了功能,提升了运行效率。 From d5da98084bb754f7c13ee7bc570233eb733de020 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 27 Jul 2023 15:23:40 +0800 Subject: [PATCH 02/17] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dback()=E5=90=8E?= =?UTF-8?q?=E9=80=80=E4=B8=8D=E5=87=86=E7=A1=AE=E7=9A=84=E6=83=85=E5=86=B5?= =?UTF-8?q?=EF=BC=9B=E4=BF=AE=E5=A4=8D'Secure-aa'=E5=92=8C'Host-'=E5=BC=80?= =?UTF-8?q?=E5=A4=B4=E7=9A=84cookie=E4=B8=8D=E8=83=BD=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=9Bini=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=88=A0=E9=99=A4chromedriver=5Fpath=EF=BC=9B=E4=BF=AE?= =?UTF-8?q?=E5=A4=8Dwait.load=5Fstart()=E4=B8=8D=E8=83=BD=E6=AD=A3?= =?UTF-8?q?=E7=A1=AE=E8=AE=BE=E7=BD=AE=E8=B6=85=E6=97=B6=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98=EF=BC=9B=E4=BF=AE=E5=A4=8DWebPage=E7=9A=84get=5Fcooki?= =?UTF-8?q?es()=E6=96=B9=E6=B3=95=E4=B8=8D=E8=83=BD=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E6=89=80=E6=9C=89=E5=9F=9F=E5=90=8D=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 21 +++++++++++++++------ DrissionPage/chromium_tab.py | 21 ++------------------- DrissionPage/chromium_tab.pyi | 2 -- DrissionPage/commons/web.py | 22 +++++++++++++++------- DrissionPage/configs/configs.ini | 1 - DrissionPage/web_page.py | 22 +++------------------- DrissionPage/web_page.pyi | 2 -- 7 files changed, 35 insertions(+), 56 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index b87fb7a..5392087 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -542,9 +542,11 @@ class ChromiumBase(BasePage): if ok: try: if single: - return make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) + r = make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) + break else: - return [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] + r = [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] + break except ElementLossError: ok = False @@ -560,6 +562,12 @@ class ChromiumBase(BasePage): sleep(.1) + try: + self.run_cdp('DOM.discardSearchResults', searchId=search_result['searchId']) + except: + pass + return r + def refresh(self, ignore_cache=False): """刷新当前页面 :param ignore_cache: 是否忽略缓存 @@ -595,14 +603,14 @@ class ChromiumBase(BasePage): index = history['currentIndex'] history = history['entries'] direction = 1 if steps > 0 else -1 - curr_url = history[index]['userTypedURL'] + curr_url = history[index]['url'] nid = None for num in range(abs(steps)): for i in history[index::direction]: index += direction - if i['userTypedURL'] != curr_url: + if i['url'] != curr_url: nid = i['id'] - curr_url = i['userTypedURL'] + curr_url = i['url'] break if nid: @@ -1021,7 +1029,8 @@ class ChromiumBaseWaiter(object): :return: 是否等待成功 """ if timeout != 0: - timeout = self._driver.timeout if timeout in (None, True) else timeout + if timeout is None or timeout is True: + timeout = self._driver.timeout end_time = perf_counter() + timeout while perf_counter() < end_time: if self._driver.is_loading == start: diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index a97c9e7..50ad5c7 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -292,9 +292,7 @@ class WebPageTab(SessionPage, ChromiumTab): selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] self.session.headers.update({"User-Agent": selenium_user_agent}) - # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) - # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) - set_session_cookies(self.session, self._get_driver_cookies()) + set_session_cookies(self.session, super(SessionPage, self).get_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" @@ -315,22 +313,7 @@ class WebPageTab(SessionPage, ChromiumTab): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return self._get_driver_cookies(as_dict, all_info) - - def _get_driver_cookies(self, as_dict=False, all_info=False): - """获取浏览器cookies - :param as_dict: 是否以dict形式返回,为True时all_info无效 - :param all_info: 是否返回所有信息,为False时只返回name、value、domain - :return: cookies信息 - """ - cookies = self.run_cdp('Network.getCookies')['cookies'] - if as_dict: - return {cookie['name']: cookie['value'] for cookie in cookies} - elif all_info: - return cookies - else: - return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} - for cookie in cookies] + return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 9def1d7..b2d0428 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -119,8 +119,6 @@ class WebPageTab(SessionPage, ChromiumTab): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... - def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... - # ----------------重写SessionPage的函数----------------------- def post(self, url: str, diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index 0a7cd14..e5aabd9 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -344,13 +344,21 @@ def set_browser_cookies(page, cookies): if cookie['value'] is None: cookie['value'] = '' - if cookie.get('domain', None): - try: - page.run_cdp_loaded('Network.setCookie', **cookie) - if is_cookie_in_driver(page, cookie): - continue - except Exception: - pass + if cookie['name'].startswith('__Secure-'): + cookie['secure'] = True + + if cookie['name'].startswith('__Host-'): + cookie['path'] = '/' + cookie['secure'] = True + + else: + if cookie.get('domain', None): + try: + page.run_cdp_loaded('Network.setCookie', **cookie) + if is_cookie_in_driver(page, cookie): + continue + except Exception: + pass ex_url = extract(page._browser_url) d_list = ex_url.subdomain.split('.') diff --git a/DrissionPage/configs/configs.ini b/DrissionPage/configs/configs.ini index 6591d2f..9a5ad35 100644 --- a/DrissionPage/configs/configs.ini +++ b/DrissionPage/configs/configs.ini @@ -1,5 +1,4 @@ [paths] -chromedriver_path = download_path = [chrome_options] diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 62e1a1b..8c3dd5e 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -48,6 +48,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._download_set = None self._set = None self._screencast = None + self._DownloadKit = None self._set_start_options(driver_or_options, session_or_options) self._set_runtime_settings() @@ -363,9 +364,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] self.session.headers.update({"User-Agent": selenium_user_agent}) - # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) - # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) - set_session_cookies(self.session, self._get_driver_cookies()) + set_session_cookies(self.session, super(SessionPage, self).get_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" @@ -386,7 +385,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return self._get_driver_cookies(as_dict, all_info) + return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) def get_tab(self, tab_id=None): """获取一个标签页对象 @@ -396,21 +395,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): tab_id = tab_id or self.tab_id return WebPageTab(self, tab_id) - def _get_driver_cookies(self, as_dict=False, all_info=False): - """获取浏览器cookies - :param as_dict: 是否以dict形式返回,为True时all_info无效 - :param all_info: 是否返回所有信息 - :return: cookies信息 - """ - cookies = self.run_cdp('Network.getCookies')['cookies'] - if as_dict: - return {cookie['name']: cookie['value'] for cookie in cookies} - elif all_info: - return cookies - else: - return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} - for cookie in cookies] - def close_driver(self): """关闭driver及浏览器""" if self._has_driver: diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index bc4eb58..475468a 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -129,8 +129,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def get_tab(self, tab_id: str = None) -> WebPageTab: ... - def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... - def close_driver(self) -> None: ... def close_session(self) -> None: ... From 8f33a9241e036a019187f219f202f1d2e164cf8b Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 27 Jul 2023 15:33:55 +0800 Subject: [PATCH 03/17] =?UTF-8?q?scroll.to=5Fsee()=E6=96=B9=E6=B3=95center?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E9=BB=98=E8=AE=A4None=EF=BC=9B=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E6=BB=9A=E5=8A=A8=E5=90=8E=E7=82=B9=E5=87=BB=E8=A2=AB?= =?UTF-8?q?=E5=9B=BA=E5=AE=9A=E5=85=83=E7=B4=A0=E9=81=AE=E6=8C=A1=E9=97=AE?= =?UTF-8?q?=E9=A2=98=EF=BC=9Bget()=E6=94=AF=E6=8C=81ipv6=E5=9C=B0=E5=9D=80?= =?UTF-8?q?=EF=BC=9Bpage=E5=AF=B9=E8=B1=A1=E5=A2=9E=E5=8A=A0user=5Fagent?= =?UTF-8?q?=E5=B1=9E=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/base.py | 2 +- DrissionPage/chromium_base.py | 32 ++++++++++++++++++++----------- DrissionPage/chromium_base.pyi | 9 ++++++--- DrissionPage/chromium_element.py | 4 ++-- DrissionPage/chromium_element.pyi | 2 +- DrissionPage/chromium_frame.py | 4 ++-- DrissionPage/chromium_frame.pyi | 2 +- DrissionPage/chromium_tab.py | 8 ++++++++ DrissionPage/chromium_tab.pyi | 3 +++ DrissionPage/session_page.py | 5 +++++ DrissionPage/session_page.pyi | 3 +++ DrissionPage/web_page.py | 12 ++++++++++-- DrissionPage/web_page.pyi | 3 +++ 13 files changed, 66 insertions(+), 23 deletions(-) diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 58ff3f3..6800ca6 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -387,7 +387,7 @@ class BasePage(BaseParser): :param interval: 重试间隔 :return: 重试次数和间隔组成的tuple """ - self._url = quote(url, safe='/:&?=%;#@+!') + self._url = quote(url, safe='/:&?=%;#@+![]') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 5392087..a86d5a1 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -325,6 +325,11 @@ class ChromiumBase(BasePage): """返回页面加载策略,有3种:'none'、'normal'、'eager'""" return self._page_load_strategy + @property + def user_agent(self): + """返回user agent""" + return self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + @property def scroll(self): """返回用于滚动滚动条的对象""" @@ -1174,10 +1179,10 @@ class ChromiumPageScroll(ChromiumScroll): self.t1 = 'window' self.t2 = 'document.documentElement' - def to_see(self, loc_or_ele, center=False): + def to_see(self, loc_or_ele, center=None): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ ele = self._driver._ele(loc_or_ele) @@ -1186,17 +1191,22 @@ class ChromiumPageScroll(ChromiumScroll): def _to_see(self, ele, center): """执行滚动页面直到元素可见 :param ele: 元素对象 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ - if center: - ele.run_js('this.scrollIntoViewIfNeeded();') - self._wait_scrolled() - return - - ele.run_js('this.scrollIntoViewIfNeeded(false);') - if ele.states.is_covered: - ele.run_js('this.scrollIntoViewIfNeeded();') + txt = 'true' if center else 'false' + ele.run_js(f'this.scrollIntoViewIfNeeded({txt});') + if center or (center is not False and ele.states.is_covered): + ele.run_js('''function getWindowScrollTop() {var scroll_top = 0; + if (document.documentElement && document.documentElement.scrollTop) { + scroll_top = document.documentElement.scrollTop; + } else if (document.body) {scroll_top = document.body.scrollTop;} + return scroll_top;} + const { top, height } = this.getBoundingClientRect(); + const elCenter = top + height / 2; + const center = window.innerHeight / 2; + window.scrollTo({top: getWindowScrollTop() - (center - elCenter), + behavior: 'instant'});''') self._wait_scrolled() diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 9638dc8..94d962d 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -111,6 +111,9 @@ class ChromiumBase(BasePage): @property def page_load_strategy(self) -> str: ... + @property + def user_agent(self) -> str: ... + @property def scroll(self) -> ChromiumPageScroll: ... @@ -267,9 +270,9 @@ class NetworkListener(object): class ChromiumPageScroll(ChromiumScroll): def __init__(self, page: ChromiumBase): ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ... - def _to_see(self, ele: ChromiumElement, center: bool) -> None: ... + def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... class ChromiumBaseSetter(object): @@ -366,4 +369,4 @@ class ScreencastMode(object): def frugal_imgs_mode(self) -> None: ... - def imgs_mode(self) -> None: ... \ No newline at end of file + def imgs_mode(self) -> None: ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 59f4eb0..e76ad7a 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1771,9 +1771,9 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center=False): + def to_see(self, center=None): """滚动页面直到元素可见 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ self._driver.page.scroll.to_see(self._driver, center=center) diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 53538ff..1084c65 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -496,7 +496,7 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center: bool = False) -> None: ... + def to_see(self, center: Union[bool, None] = None) -> None: ... class ChromiumSelect(object): diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index f9118c4..374df89 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -638,10 +638,10 @@ class ChromiumFrameScroll(ChromiumPageScroll): self.t1 = self.t2 = 'this.documentElement' self._wait_complete = False - def to_see(self, loc_or_ele, center=False): + def to_see(self, loc_or_ele, center=None): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 631fb5f..5326e53 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -203,7 +203,7 @@ class ChromiumFrameIds(object): class ChromiumFrameScroll(ChromiumPageScroll): def __init__(self, frame: ChromiumFrame) -> None: ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ... class ChromiumFrameSetter(ChromiumBaseSetter): diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index 50ad5c7..46716b8 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -120,6 +120,14 @@ class WebPageTab(SessionPage, ChromiumTab): """以dict方式返回cookies""" return super().cookies + @property + def user_agent(self): + """返回user agent""" + if self._mode == 's': + return super().user_agent + elif self._mode == 'd': + return super(SessionPage, self).user_agent + @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index b2d0428..fc1b132 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -65,6 +65,9 @@ class WebPageTab(SessionPage, ChromiumTab): @property def cookies(self) -> dict: ... + @property + def user_agent(self) -> str: ... + @property def session(self) -> Session: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 3d934b9..08a12bf 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -98,6 +98,11 @@ class SessionPage(BasePage): except Exception: return None + @property + def user_agent(self): + """返回user agent""" + return self.session.headers.get('user-agent', '') + @property def download_path(self): """返回下载路径""" diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index c551834..3a32942 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -63,6 +63,9 @@ class SessionPage(BasePage): @property def json(self) -> Union[dict, None]: ... + @property + def user_agent(self) -> str: ... + @property def download_path(self) -> str: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 8c3dd5e..71dc778 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -187,6 +187,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """以dict方式返回cookies""" return super().cookies + @property + def user_agent(self): + """返回user agent""" + if self._mode == 's': + return super().user_agent + elif self._mode == 'd': + return super(SessionPage, self).user_agent + @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" @@ -361,8 +369,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage): return if copy_user_agent: - selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": selenium_user_agent}) + user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self.session.headers.update({"User-Agent": user_agent}) set_session_cookies(self.session, super(SessionPage, self).get_cookies()) diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index 475468a..d9ddda2 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -67,6 +67,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def cookies(self) -> dict: ... + @property + def user_agent(self) -> str: ... + @property def session(self) -> Session: ... From e46f068218351716001b7f64484751c233a18562 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 27 Jul 2023 15:43:46 +0800 Subject: [PATCH 04/17] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E5=BD=93=E7=BD=91=E7=AB=99headers=E4=B8=8D=E8=A7=84=E8=8C=83?= =?UTF-8?q?=E6=97=B6=E8=8E=B7=E5=8F=96=E4=B8=8D=E5=88=B0=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=9B=E4=BF=AE=E5=A4=8D=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E7=9B=91=E5=90=AC=E6=97=B6=E5=8F=AF=E8=83=BD=E5=87=BA?= =?UTF-8?q?=E7=8E=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 4 ++-- DrissionPage/session_page.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index a86d5a1..3ecb753 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -1140,7 +1140,8 @@ class NetworkListener(object): def _loading_finished(self, **kwargs): """请求完成时处理方法""" request_id = kwargs['requestId'] - if request_id in self._requests: + request = self._requests.get(request_id) + if request: try: r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) body = r['body'] @@ -1149,7 +1150,6 @@ class NetworkListener(object): body = '' is_base64 = False - request = self._requests[request_id] target = request['target'] rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) rd.method = request['method'] diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 08a12bf..e237707 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -8,7 +8,7 @@ from time import sleep from urllib.parse import urlparse from DownloadKit import DownloadKit -from requests import Session, Response +from requests import Session from requests.structures import CaseInsensitiveDict from tldextract import extract @@ -507,15 +507,17 @@ class FileExists(object): self._setter.DownloadKit._file_exists = 'overwrite' -def check_headers(kwargs, headers, arg) -> bool: +def check_headers(kwargs, headers, arg): """检查kwargs或headers中是否有arg所示属性""" return arg in kwargs['headers'] or arg in headers -def set_charset(response) -> Response: +def set_charset(response): """设置Response对象的编码""" # 在headers中获取编码 content_type = response.headers.get('content-type', '').lower() + if not content_type.endswith(';'): + content_type += ';' charset = search(r'charset[=: ]*(.*)?;', content_type) if charset: From 3a416ec0614e91eb6f4e55f322c0907460aac5b7 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 27 Jul 2023 15:56:40 +0800 Subject: [PATCH 05/17] =?UTF-8?q?3.2.31=20set=5Fargument('--headless')?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E6=94=B9=E4=B8=BA=E6=96=B0=E5=86=99=E6=B3=95?= =?UTF-8?q?=EF=BC=9B=E4=BC=98=E5=8C=96find=5Ftabs()=E7=94=A8=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_page.py | 27 +- DrissionPage/chromium_page.pyi | 4 +- DrissionPage/configs/chromium_options.py | 7 +- DrissionPage/configs/driver_options.py | 364 +++++ DrissionPage/configs/driver_options.pyi | 89 ++ DrissionPage/mixpage/base.py | 324 +++++ DrissionPage/mixpage/base.pyi | 175 +++ DrissionPage/mixpage/drission.py | 458 +++++++ DrissionPage/mixpage/drission.pyi | 96 ++ DrissionPage/mixpage/driver_element.py | 1264 ++++++++++++++++++ DrissionPage/mixpage/driver_element.pyi | 326 +++++ DrissionPage/mixpage/driver_page.py | 611 +++++++++ DrissionPage/mixpage/driver_page.pyi | 189 +++ DrissionPage/mixpage/mix_page.py | 344 +++++ DrissionPage/mixpage/mix_page.pyi | 156 +++ DrissionPage/mixpage/session_element.py | 357 +++++ DrissionPage/mixpage/session_element.pyi | 114 ++ DrissionPage/mixpage/session_page.py | 533 ++++++++ DrissionPage/mixpage/session_page.pyi | 237 ++++ DrissionPage/mixpage/shadow_root_element.py | 219 +++ DrissionPage/mixpage/shadow_root_element.pyi | 84 ++ DrissionPage/session_page.py | 2 +- setup.py | 2 +- 23 files changed, 5964 insertions(+), 18 deletions(-) create mode 100644 DrissionPage/configs/driver_options.py create mode 100644 DrissionPage/configs/driver_options.pyi create mode 100644 DrissionPage/mixpage/base.py create mode 100644 DrissionPage/mixpage/base.pyi create mode 100644 DrissionPage/mixpage/drission.py create mode 100644 DrissionPage/mixpage/drission.pyi create mode 100644 DrissionPage/mixpage/driver_element.py create mode 100644 DrissionPage/mixpage/driver_element.pyi create mode 100644 DrissionPage/mixpage/driver_page.py create mode 100644 DrissionPage/mixpage/driver_page.pyi create mode 100644 DrissionPage/mixpage/mix_page.py create mode 100644 DrissionPage/mixpage/mix_page.pyi create mode 100644 DrissionPage/mixpage/session_element.py create mode 100644 DrissionPage/mixpage/session_element.pyi create mode 100644 DrissionPage/mixpage/session_page.py create mode 100644 DrissionPage/mixpage/session_page.pyi create mode 100644 DrissionPage/mixpage/shadow_root_element.py create mode 100644 DrissionPage/mixpage/shadow_root_element.pyi diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 16d7a7b..b66a801 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -194,22 +194,25 @@ class ChromiumPage(ChromiumBase): tab_id = tab_id or self.tab_id return ChromiumTab(self, tab_id) - def find_tabs(self, text=None, by_title=True, by_url=None, special=False): + def find_tabs(self, title=None, url=None, tab_type=None, single=True): """查找符合条件的tab,返回它们的id组成的列表 - :param text: 查询条件 - :param by_title: 是否匹配title - :param by_url: 是否匹配url - :param special: 是否匹配特殊tab,如打印页 - :return: tab id组成的列表 + :param title: 要匹配title的文本 + :param url: 要匹配url的文本 + :param tab_type: tab类型,可用列表输入多个 + :param single: 是否返回首个结果的id,为False返回所有信息 + :return: tab id或tab dict """ tabs = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp - if text is None or not (by_title or by_url): - return [i['id'] for i in tabs if (not special and i['type'] == 'page') - or (special and i['type'] not in ('page', 'iframe'))] + if isinstance(tab_type, str): + tab_type = {tab_type} + elif isinstance(tab_type, (list, tuple, set)): + tab_type = set(tab_type) + elif tab_type is not None: + raise TypeError('tab_type只能是set、list、tuple、str、None。') - return [i['id'] for i in tabs if ((not special and i['type'] == 'page') - or (special and i['type'] not in ('page', 'iframe'))) - and ((by_url and text in i['url']) or (by_title and text in i['title']))] + r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) + and (tab_type is None or i['type'] in tab_type))] + return r[0]['id'] if r and single else r def new_tab(self, url=None, switch_to=True): """新建一个标签页,该标签页在最后面 diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index d4ceb86..b6d7023 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -81,8 +81,8 @@ class ChromiumPage(ChromiumBase): def get_tab(self, tab_id: str = None) -> ChromiumTab: ... - def find_tabs(self, text: str = None, by_title: bool = True, by_url: bool = None, - special: bool = False) -> List[str]: ... + def find_tabs(self, title: str = None, url: str = None, + tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ... def new_tab(self, url: str = None, switch_to: bool = True) -> str: ... diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py index e31d97e..f5d32b2 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/configs/chromium_options.py @@ -146,8 +146,11 @@ class ChromiumOptions(object): """ self.remove_argument(arg) if value is not False: - arg_str = arg if value is None else f'{arg}={value}' - self._arguments.append(arg_str) + if arg == '--headless' and value is None: + self._arguments.append('--headless=new') + else: + arg_str = arg if value is None else f'{arg}={value}' + self._arguments.append(arg_str) return self def remove_argument(self, value): diff --git a/DrissionPage/configs/driver_options.py b/DrissionPage/configs/driver_options.py new file mode 100644 index 0000000..ce30964 --- /dev/null +++ b/DrissionPage/configs/driver_options.py @@ -0,0 +1,364 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from pathlib import Path + +from selenium.webdriver.chrome.options import Options + +from .options_manage import OptionsManager + + +class DriverOptions(Options): + """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, + 增加了删除配置和保存到文件方法。 + """ + + def __init__(self, read_file=True, ini_path=None): + """初始化,默认从文件读取设置 + :param read_file: 是否从默认ini文件中读取配置信息 + :param ini_path: ini文件路径,为None则读取默认ini文件 + """ + super().__init__() + self._user_data_path = None + + if read_file: + self.ini_path = str(ini_path) if ini_path else str(Path(__file__).parent / 'configs.ini') + om = OptionsManager(self.ini_path) + options_dict = om.chrome_options + + self._driver_path = om.paths.get('chromedriver_path', None) + self._download_path = om.paths.get('download_path', None) + self._binary_location = options_dict.get('binary_location', '') + self._arguments = options_dict.get('arguments', []) + self._extensions = options_dict.get('extensions', []) + self._experimental_options = options_dict.get('experimental_options', {}) + self._debugger_address = options_dict.get('debugger_address', None) + self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') + self.system_user_path = options_dict.get('system_user_path', False) + + for arg in self._arguments: + if arg.startswith('--user-data-dir='): + self.set_paths(user_data_path=arg[16:]) + break + + self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) + return + + self._driver_path = None + self._download_path = None + self.ini_path = None + self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} + self._debugger_address = '127.0.0.1:9222' + self.system_user_path = False + + @property + def driver_path(self): + """chromedriver文件路径""" + return self._driver_path + + @property + def download_path(self): + """默认下载路径文件路径""" + return self._download_path + + @property + def chrome_path(self): + """浏览器启动文件路径""" + return self.browser_path + + @property + def browser_path(self): + """浏览器启动文件路径""" + return self.binary_location or 'chrome' + + @property + def user_data_path(self): + """返回用户文件夹路径""" + return self._user_data_path + + # -------------重写父类方法,实现链式操作------------- + def add_argument(self, argument): + """添加一个配置项 + :param argument: 配置项内容 + :return: 当前对象 + """ + super().add_argument(argument) + return self + + def set_capability(self, name, value): + """设置一个capability + :param name: capability名称 + :param value: capability值 + :return: 当前对象 + """ + super().set_capability(name, value) + return self + + def add_extension(self, extension): + """添加插件 + :param extension: crx文件路径 + :return: 当前对象 + """ + super().add_extension(extension) + return self + + def add_encoded_extension(self, extension): + """将带有扩展数据的 Base64 编码字符串添加到将用于将其提取到 ChromeDriver 的列表中 + :param extension: 带有扩展数据的 Base64 编码字符串 + :return: 当前对象 + """ + super().add_encoded_extension(extension) + return self + + def add_experimental_option(self, name, value): + """添加一个实验选项到浏览器 + :param name: 选项名称 + :param value: 选项值 + :return: 当前对象 + """ + super().add_experimental_option(name, value) + return self + + # -------------重写父类方法结束------------- + + def save(self, path=None): + """保存设置到文件 + :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 + :return: 保存文件的绝对路径 + """ + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + + elif path is None: + if self.ini_path: + path = Path(self.ini_path).absolute() + else: + path = (Path(__file__).parent / 'configs.ini').absolute() + + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + if path.exists(): + om = OptionsManager(str(path)) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + options = self.as_dict() + + for i in options: + if i == 'driver_path': + om.set_item('paths', 'chromedriver_path', options[i]) + elif i == 'download_path': + om.set_item('paths', 'download_path', options[i]) + else: + om.set_item('chrome_options', i, options[i]) + + path = str(path) + om.save(path) + + return path + + def save_to_default(self): + """保存当前配置到默认ini文件""" + return self.save('default') + + def remove_argument(self, value): + """移除一个argument项 + :param value: 设置项名,有值的设置项传入设置名称即可 + :return: 当前对象 + """ + del_list = [] + + for argument in self._arguments: + if argument.startswith(value): + del_list.append(argument) + + for del_arg in del_list: + self._arguments.remove(del_arg) + + return self + + def remove_experimental_option(self, key): + """移除一个实验设置,传入key值删除 + :param key: 实验设置的名称 + :return: 当前对象 + """ + if key in self._experimental_options: + self._experimental_options.pop(key) + + return self + + def remove_all_extensions(self): + """移除所有插件 + :return: 当前对象 + """ + # 因插件是以整个文件储存,难以移除其中一个,故如须设置则全部移除再重设 + self._extensions = [] + return self + + def set_argument(self, arg, value): + """设置浏览器配置的argument属性 + :param arg: 属性名 + :param value: 属性值,有值的属性传入值,没有的传入bool + :return: 当前对象 + """ + self.remove_argument(arg) + + if value: + arg_str = arg if isinstance(value, bool) else f'{arg}={value}' + self.add_argument(arg_str) + + return self + + def set_timeouts(self, implicit=None, pageLoad=None, script=None): + """设置超时时间,设置单位为秒,selenium4以上版本有效 + :param implicit: 查找元素超时时间 + :param pageLoad: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: 当前对象 + """ + if implicit is not None: + self.timeouts['implicit'] = implicit + if pageLoad is not None: + self.timeouts['pageLoad'] = pageLoad + if script is not None: + self.timeouts['script'] = script + + return self + + def set_headless(self, on_off=True): + """设置是否隐藏浏览器界面 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--headless', on_off) + + def set_no_imgs(self, on_off=True): + """设置是否加载图片 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--blink-settings=imagesEnabled=false', on_off) + + def set_no_js(self, on_off=True): + """设置是否禁用js + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--disable-javascript', on_off) + + def set_mute(self, on_off=True): + """设置是否静音 + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--mute-audio', on_off) + + def set_user_agent(self, user_agent): + """设置user agent + :param user_agent: user agent文本 + :return: 当前对象 + """ + return self.set_argument('--user-agent', user_agent) + + def set_proxy(self, proxy): + """设置代理 + :param proxy: 代理url和端口 + :return: 当前对象 + """ + return self.set_argument('--proxy-server', proxy) + + def set_page_load_strategy(self, value): + """设置page_load_strategy,可接收 'normal', 'eager', 'none' + selenium4以上版本才支持此功能 + normal:默认情况下使用, 等待所有资源下载完成 + eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 + none:完全不阻塞WebDriver + :param value: 可接收 'normal', 'eager', 'none' + :return: 当前对象 + """ + if value not in ('normal', 'eager', 'none'): + raise ValueError("只能选择'normal', 'eager', 'none'。") + self.page_load_strategy = value.lower() + return self + + def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None, + debugger_address=None, download_path=None, user_data_path=None, cache_path=None): + """快捷的路径设置函数 + :param driver_path: chromedriver.exe路径 + :param chrome_path: chrome.exe路径 + :param browser_path: 浏览器可执行文件路径 + :param local_port: 本地端口号 + :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 + :param download_path: 下载文件路径 + :param user_data_path: 用户数据路径 + :param cache_path: 缓存路径 + :return: 当前对象 + """ + if driver_path is not None: + self._driver_path = str(driver_path) + + if chrome_path is not None: + self.binary_location = str(chrome_path) + + if browser_path is not None: + self.binary_location = str(browser_path) + + if local_port is not None: + self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}' + + if debugger_address is not None: + self.debugger_address = debugger_address + + if download_path is not None: + self._download_path = str(download_path) + + if user_data_path is not None: + self.set_argument('--user-data-dir', str(user_data_path)) + self._user_data_path = user_data_path + + if cache_path is not None: + self.set_argument('--disk-cache-dir', str(cache_path)) + + return self + + def as_dict(self): + """已dict方式返回所有配置信息""" + return chrome_options_to_dict(self) + + +def chrome_options_to_dict(options): + """把chrome配置对象转换为字典 + :param options: chrome配置对象,字典或DriverOptions对象 + :return: 配置字典 + """ + if options in (False, None): + return DriverOptions(read_file=False).as_dict() + + if isinstance(options, dict): + return options + + re_dict = dict() + attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', + 'page_load_strategy', 'download_path'] + + options_dir = options.__dir__() + for attr in attrs: + try: + re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None + except Exception: + pass + + if 'timeouts' in options_dir and 'timeouts' in options._caps: + timeouts = options.__getattribute__('timeouts') + re_dict['timeouts'] = timeouts + + return re_dict diff --git a/DrissionPage/configs/driver_options.pyi b/DrissionPage/configs/driver_options.pyi new file mode 100644 index 0000000..cb16b21 --- /dev/null +++ b/DrissionPage/configs/driver_options.pyi @@ -0,0 +1,89 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from pathlib import Path +from typing import Union, List + +from selenium.webdriver.chrome.options import Options + + +class DriverOptions(Options): + + def __init__(self, read_file: bool = True, ini_path: Union[str, Path] = None): + self.ini_path: str = ... + self._driver_path: str = ... + self._user_data_path: str = ... + self._download_path: str = ... + + @property + def driver_path(self) -> str: ... + + @property + def download_path(self) -> str: ... + + @property + def chrome_path(self) -> str: ... + + @property + def browser_path(self) -> str: ... + + @property + def user_data_path(self) -> str: ... + + # -------------重写父类方法,实现链式操作------------- + def add_argument(self, argument: str) -> DriverOptions: ... + + def set_capability(self, name: str, value: str) -> DriverOptions: ... + + def add_extension(self, extension: str) -> DriverOptions: ... + + def add_encoded_extension(self, extension: str) -> DriverOptions: ... + + def add_experimental_option(self, name: str, value: Union[str, int, dict, List[str]]) -> DriverOptions: ... + + # -------------重写父类方法结束------------- + + def save(self, path: str = None) -> str: ... + + def save_to_default(self) -> str: ... + + def remove_argument(self, value: str) -> DriverOptions: ... + + def remove_experimental_option(self, key: str) -> DriverOptions: ... + + def remove_all_extensions(self) -> DriverOptions: ... + + def set_argument(self, arg: str, value: Union[bool, str]) -> DriverOptions: ... + + def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> DriverOptions: ... + + def set_headless(self, on_off: bool = True) -> DriverOptions: ... + + def set_no_imgs(self, on_off: bool = True) -> DriverOptions: ... + + def set_no_js(self, on_off: bool = True) -> DriverOptions: ... + + def set_mute(self, on_off: bool = True) -> DriverOptions: ... + + def set_user_agent(self, user_agent: str) -> DriverOptions: ... + + def set_proxy(self, proxy: str) -> DriverOptions: ... + + def set_page_load_strategy(self, value: str) -> DriverOptions: ... + + def set_paths(self, + driver_path: Union[str, Path] = None, + chrome_path: Union[str, Path] = None, + browser_path: Union[str, Path] = None, + local_port: Union[int, str] = None, + debugger_address: str = None, + download_path: str = None, + user_data_path: str = None, + cache_path: str = None) -> DriverOptions: ... + + def as_dict(self) -> dict: ... + + +def chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: ... diff --git a/DrissionPage/mixpage/base.py b/DrissionPage/mixpage/base.py new file mode 100644 index 0000000..1194326 --- /dev/null +++ b/DrissionPage/mixpage/base.py @@ -0,0 +1,324 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from abc import abstractmethod +from re import sub +from urllib.parse import quote + +from DrissionPage.commons.web import format_html +from DrissionPage.commons.locator import get_loc + + +class BaseParser(object): + """所有页面、元素类的基类""" + + def __call__(self, loc_or_str): + return self.ele(loc_or_str) + + def ele(self, loc_or_ele, timeout=None): + return self._ele(loc_or_ele, timeout, True) + + def eles(self, loc_or_str, timeout=None): + return self._ele(loc_or_str, timeout, False) + + # ----------------以下属性或方法待后代实现---------------- + @property + def html(self): + return '' + + def s_ele(self, loc_or_ele): + pass + + def s_eles(self, loc_or_str): + pass + + @abstractmethod + def _ele(self, loc_or_ele, timeout=None, single=True): + pass + + +class BaseElement(BaseParser): + """各元素类的基类""" + + def __init__(self, page=None): + self.page = page + + # ----------------以下属性或方法由后代实现---------------- + @property + def tag(self): + return + + @abstractmethod + def _ele(self, loc_or_str, timeout=None, single=True, relative=False): + pass + + def parent(self, level_or_loc=1): + pass + + def prev(self, index=1): + return None # ShadowRootElement直接继承 + + def prevs(self) -> None: + return None # ShadowRootElement直接继承 + + def next(self, index=1): + pass + + def nexts(self): + pass + + +class DrissionElement(BaseElement): + """DriverElement、ChromiumElement 和 SessionElement的基类 + 但不是ShadowRootElement的基类""" + + @property + def link(self): + """返回href或src绝对url""" + return self.attr('href') or self.attr('src') + + @property + def css_path(self): + """返回css path路径""" + return self._get_ele_path('css') + + @property + def xpath(self): + """返回xpath路径""" + return self._get_ele_path('xpath') + + @property + def comments(self): + """返回元素注释文本组成的列表""" + return self.eles('xpath:.//comment()') + + def texts(self, text_node_only=False): + """返回元素内所有直接子节点的文本,包括元素和文本节点 + :param text_node_only: 是否只返回文本节点 + :return: 文本列表 + """ + if text_node_only: + texts = self.eles('xpath:/text()') + else: + texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] + + return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] + + def parent(self, level_or_loc=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :return: 上级元素对象 + """ + if isinstance(level_or_loc, int): + loc = f'xpath:./ancestor::*[{level_or_loc}]' + + elif isinstance(level_or_loc, (tuple, str)): + loc = get_loc(level_or_loc, True) + + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' + + else: + raise TypeError('level_or_loc参数只能是tuple、int或str。') + + return self._ele(loc, timeout=0, relative=True) + + def prev(self, index=1, filter_loc='', timeout=0): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 前面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout) + return nodes[-1] if nodes else None + + def next(self, index=1, filter_loc='', timeout=0): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 后面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout) + return nodes[0] if nodes else None + + def before(self, index=1, filter_loc='', timeout=None): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 前面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的某个元素或节点 + """ + nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout) + return nodes[-1] if nodes else None + + def after(self, index=1, filter_loc='', timeout=None): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 后面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的某个元素或节点 + """ + nodes = self._get_brothers(index, filter_loc, 'following', False, timeout) + return nodes[0] if nodes else None + + def prevs(self, filter_loc='', timeout=0): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout) + + def nexts(self, filter_loc='', timeout=0): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return self._get_brothers(filter_loc=filter_loc, direction='following', timeout=timeout) + + def befores(self, filter_loc='', timeout=None): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的元素或节点组成的列表 + """ + return self._get_brothers(filter_loc=filter_loc, direction='preceding', brother=False, timeout=timeout) + + def afters(self, filter_loc='', timeout=None): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的元素或节点组成的列表 + """ + return self._get_brothers(filter_loc=filter_loc, direction='following', brother=False, timeout=timeout) + + def _get_brothers(self, index=None, filter_loc='', direction='following', brother=True, timeout=.5): + """按要求返回兄弟元素或节点组成的列表 + :param index: 获取第几个,该参数不为None时只获取该编号的元素 + :param filter_loc: 用于筛选的查询语法 + :param direction: 'following' 或 'preceding',查找的方向 + :param brother: 查找范围,在同级查找还是整个dom前后查找 + :param timeout: 查找等待时间 + :return: DriverElement对象或字符串 + """ + if index is not None and index < 1: + raise ValueError('index必须大于等于1。') + + brother = '-sibling' if brother else '' + + if not filter_loc: + loc = '*' + + else: + loc = get_loc(filter_loc, True) # 把定位符转换为xpath + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + loc = loc[1].lstrip('./') + + loc = f'xpath:./{direction}{brother}::{loc}' + + nodes = self._ele(loc, timeout=timeout, single=False, relative=True) + nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] + + if nodes and index is not None: + index = index - 1 if direction == 'following' else -index + try: + return [nodes[index]] + except IndexError: + return [] + else: + return nodes + + # ----------------以下属性或方法由后代实现---------------- + @property + def attrs(self): + return + + @property + def text(self): + return + + @property + def raw_text(self): + return + + @abstractmethod + def attr(self, attr: str): + return '' + + def _get_ele_path(self, mode): + return '' + + +class BasePage(BaseParser): + """页面类的基类""" + + def __init__(self, timeout=None): + """初始化函数""" + self._url = None + self.timeout = timeout if timeout is not None else 10 + self.retry_times = 3 + self.retry_interval = 2 + self._url_available = None + + @property + def title(self): + """返回网页title""" + ele = self.ele('xpath://title') + return ele.text if ele else None + + @property + def timeout(self): + """返回查找元素时等待的秒数""" + return self._timeout + + @timeout.setter + def timeout(self, second): + """设置查找元素时等待的秒数""" + self._timeout = second + + @property + def cookies(self): + """返回cookies""" + return self.get_cookies(True) + + @property + def url_available(self): + """返回当前访问的url有效性""" + return self._url_available + + def _before_connect(self, url, retry, interval): + """连接前的准备 + :param url: 要访问的url + :param retry: 重试次数 + :param interval: 重试间隔 + :return: 重试次数和间隔组成的tuple + """ + self._url = quote(url, safe='/:&?=%;#@+!') + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + return retry, interval + + # ----------------以下属性或方法由后代实现---------------- + @property + def url(self): + return + + @property + def json(self): + return + + @abstractmethod + def get_cookies(self, as_dict=False): + return {} + + @abstractmethod + def get(self, url, show_errmsg=False, retry=None, interval=None): + pass diff --git a/DrissionPage/mixpage/base.pyi b/DrissionPage/mixpage/base.pyi new file mode 100644 index 0000000..1f12e80 --- /dev/null +++ b/DrissionPage/mixpage/base.pyi @@ -0,0 +1,175 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from abc import abstractmethod +from typing import Union, Tuple, List + + +class BaseParser(object): + + def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... + + def ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], timeout: float = None): ... + + def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): ... + + # ----------------以下属性或方法待后代实现---------------- + @property + def html(self) -> str: ... + + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement]): ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... + + @abstractmethod + def _ele(self, loc_or_ele, timeout: float = None, single: bool = True): ... + + +class BaseElement(BaseParser): + + def __init__(self, page: BasePage = None): + self.page: BasePage = ... + + # ----------------以下属性或方法由后代实现---------------- + @property + def tag(self) -> str: ... + + @abstractmethod + def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, + relative: bool = False): ... + + def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... + + def prev(self, index: int = 1) -> None: ... + + def prevs(self) -> None: ... + + def next(self, index: int = 1): ... + + def nexts(self): ... + + +class DrissionElement(BaseElement): + + def __init__(self, + page: BasePage = ...): + self.page: BasePage = ... + + @property + def link(self) -> str: ... + + @property + def css_path(self) -> str: ... + + @property + def xpath(self) -> str: ... + + @property + def comments(self) -> list: ... + + def texts(self, text_node_only: bool = False) -> list: ... + + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... + + def prev(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union[DrissionElement, str, None]: ... + + def next(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union[DrissionElement, str, None]: ... + + def before(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union[DrissionElement, str, None]: ... + + def after(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union[DrissionElement, str, None]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union[DrissionElement, str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union[DrissionElement, str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union[DrissionElement, str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union[DrissionElement, str]]: ... + + def _get_brothers(self, + index: int = None, + filter_loc: Union[tuple, str] = '', + direction: str = 'following', + brother: bool = True, + timeout: float = 0.5) -> List[Union[DrissionElement, str]]: ... + + # ----------------以下属性或方法由后代实现---------------- + @property + def attrs(self) -> dict: ... + + @property + def text(self) -> str: ... + + @property + def raw_text(self) -> str: ... + + @abstractmethod + def attr(self, attr: str) -> str: ... + + def _get_ele_path(self, mode) -> str: ... + + +class BasePage(BaseParser): + + def __init__(self, timeout: float = None): + self._url_available: bool = ... + self.retry_times: int = ... + self.retry_interval: float = ... + self._timeout = float = ... + + @property + def title(self) -> Union[str, None]: ... + + @property + def timeout(self) -> float: ... + + @timeout.setter + def timeout(self, second: float) -> None: ... + + @property + def cookies(self) -> dict: ... + + @property + def url_available(self) -> bool: ... + + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... + + # ----------------以下属性或方法由后代实现---------------- + @property + def url(self) -> str: ... + + @property + def json(self) -> dict: ... + + @abstractmethod + def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... + + @abstractmethod + def get(self, + url: str, + show_errmsg: bool = False, + retry: int = None, + interval: float = None): ... diff --git a/DrissionPage/mixpage/drission.py b/DrissionPage/mixpage/drission.py new file mode 100644 index 0000000..c8a6f90 --- /dev/null +++ b/DrissionPage/mixpage/drission.py @@ -0,0 +1,458 @@ +# -*- encoding: utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from platform import system +from sys import exit + +from requests import Session +from requests.structures import CaseInsensitiveDict +from selenium import webdriver +from selenium.common.exceptions import SessionNotCreatedException, WebDriverException +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver +from tldextract import extract + +from DrissionPage.commons.tools import get_pid_from_port, get_exe_from_port +from DrissionPage.commons.browser import connect_browser +from DrissionPage.commons.web import cookies_to_tuple +from DrissionPage.configs.session_options import SessionOptions, session_options_to_dict +from DrissionPage.configs.driver_options import DriverOptions + + +class Drission(object): + """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" + + def __init__(self, driver_or_options=None, session_or_options=None, ini_path=None, proxy=None): + """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象 + :param driver_or_options: driver对象或DriverOptions、Options类,传入False则创建空配置对象 + :param session_or_options: Session对象或设置字典,传入False则创建空配置对象 + :param ini_path: ini文件路径 + :param proxy: 代理设置 + """ + self._session = None + self._driver = None + self._session_options = None + self._driver_options = None + self._debugger = None + self._proxy = proxy + + # ------------------处理session options---------------------- + if session_or_options is None: + self._session_options = SessionOptions(ini_path=ini_path).as_dict() + + elif session_or_options is False: + self._session_options = SessionOptions(read_file=False).as_dict() + + elif isinstance(session_or_options, Session): + self._session = session_or_options + + elif isinstance(session_or_options, SessionOptions): + self._session_options = session_or_options.as_dict() + + elif isinstance(session_or_options, dict): + self._session_options = session_or_options + + else: + raise TypeError('session_or_options参数只能接收Session, dict, SessionOptions或False。') + + # ------------------处理driver options---------------------- + if driver_or_options is None: + self._driver_options = DriverOptions(ini_path=ini_path) + + elif driver_or_options is False: + self._driver_options = DriverOptions(read_file=False) + + elif isinstance(driver_or_options, RemoteWebDriver): + self._driver = driver_or_options + + elif isinstance(driver_or_options, (Options, DriverOptions)): + self._driver_options = driver_or_options + + else: + raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。') + + def __del__(self): + """关闭对象时关闭浏览器和Session""" + try: + self.close() + except ImportError: + pass + + @property + def session(self): + """返回Session对象,如未初始化则按配置信息创建""" + if self._session is None: + self._set_session(self._session_options) + + if self._proxy: + self._session.proxies = self._proxy + + return self._session + + @property + def driver(self): + """返回WebDriver对象,如未初始化则按配置信息创建。 + 如设置了本地调试浏览器,可自动接入或打开浏览器进程。 + """ + if self._driver is None: + if not self.driver_options.debugger_address and self._proxy: + self.driver_options.add_argument(f'--proxy-server={self._proxy["http"]}') + + driver_path = self.driver_options.driver_path or 'chromedriver' + chrome_path = self.driver_options.chrome_path + + # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程----------- + if self.driver_options.debugger_address: + # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径 + cp, self._debugger = connect_browser(self.driver_options) + + if cp in (None, 'chrome'): + system_type = system().lower() + ip, port = self.driver_options.debugger_address.split(':') + if ip not in ('127.0.0.1', 'localhost'): + chrome_path = None + elif chrome_path == 'chrome' and system_type == 'windows': + chrome_path = get_exe_from_port(port) + + # -----------创建WebDriver对象----------- + self._driver = create_driver(chrome_path, driver_path, self.driver_options) + + # -----------解决接管新版浏览器不能定位到正确的标签页的问题----------- + active_tab = self._driver.window_handles[0] + if active_tab != self._driver.current_window_handle: + self._driver.switch_to.window(active_tab) + + return self._driver + + @property + def driver_options(self): + """返回driver配置信息""" + return self._driver_options + + @property + def session_options(self): + """返回session配置信息""" + return self._session_options + + @session_options.setter + def session_options(self, options): + """设置session配置 + :param options: session配置字典 + :return: None + """ + self._session_options = session_options_to_dict(options) + self._set_session(self._session_options) + + @property + def proxy(self): + """返回代理信息""" + return self._proxy + + @proxy.setter + def proxy(self, proxies=None): + """设置代理信息 + :param proxies: 代理信息字典 + :return: None + """ + self._proxy = proxies + + if self._session: + self._session.proxies = proxies + + if self._driver: + cookies = self._driver.get_cookies() + url = self._driver.current_url + self._driver.quit() + self._driver = None + self._driver = self.driver + self._driver.get(url) + + for cookie in cookies: + self.set_cookies(cookie, set_driver=True) + + @property + def debugger_progress(self): + """调试浏览器进程""" + return self._debugger + + def kill_browser(self): + """关闭浏览器进程(如果可以)""" + pid = self.get_browser_progress_id() + if not kill_progress(pid): + self._driver.quit() + + def get_browser_progress_id(self): + """获取浏览器进程id""" + if self.debugger_progress: + return self.debugger_progress.pid + + address = str(self.driver_options.debugger_address).split(':') + if len(address) == 2: + ip, port = address + if ip not in ('127.0.0.1', 'localhost') or not port.isdigit(): + return None + + from os import popen + txt = '' + progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') + for progress in progresses: + if 'LISTENING' in progress: + txt = progress + break + if not txt: + return None + + return txt.split(' ')[-1] + + def hide_browser(self): + """隐藏浏览器界面""" + self._show_or_hide_browser() + + def show_browser(self): + """显示浏览器界面""" + self._show_or_hide_browser(False) + + def _show_or_hide_browser(self, hide=True): + if system().lower() != 'windows': + raise OSError('该方法只能在Windows系统使用。') + + try: + from win32gui import ShowWindow + from win32con import SW_HIDE, SW_SHOW + except ImportError: + raise ImportError('请先安装:pip install pypiwin32') + + pid = self.get_browser_progress_id() + if not pid: + print('只有设置了debugger_address参数才能使用 show_browser() 和 hide_browser()') + return + hds = get_chrome_hwnds_from_pid(pid) + sw = SW_HIDE if hide else SW_SHOW + for hd in hds: + ShowWindow(hd, sw) + + def set_cookies(self, cookies, set_session=False, set_driver=False): + """设置cookies + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :param set_session: 是否设置session的cookies + :param set_driver: 是否设置driver的cookies + :return: None + """ + cookies = cookies_to_tuple(cookies) + + for cookie in cookies: + if cookie['value'] is None: + cookie['value'] = '' + + # 添加cookie到session + if set_session: + kwargs = {x: cookie[x] for x in cookie + if x.lower() not in ('name', 'value', 'httponly', 'expiry', 'samesite')} + + if 'expiry' in cookie: + kwargs['expires'] = cookie['expiry'] + + self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) + + # 添加cookie到driver + if set_driver: + if 'expiry' in cookie: + cookie['expiry'] = int(cookie['expiry']) + + try: + browser_domain = extract(self.driver.current_url).fqdn + except AttributeError: + browser_domain = '' + + if not cookie.get('domain', None): + if browser_domain: + url = extract(browser_domain) + cookie_domain = f'{url.domain}.{url.suffix}' + else: + raise ValueError('cookie中没有域名或浏览器未访问过URL。') + + cookie['domain'] = cookie_domain + + else: + cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + + if cookie_domain not in browser_domain: + self.driver.get(cookie_domain if cookie_domain.startswith('http://') + else f'http://{cookie_domain}') + + # 避免selenium自动添加.后无法正确覆盖已有cookie + if cookie['domain'][0] != '.': + c = self.driver.get_cookie(cookie['name']) + if c and c['domain'] == cookie['domain']: + self.driver.delete_cookie(cookie['name']) + + self.driver.add_cookie(cookie) + + def _set_session(self, data): + """根据传入字典对session进行设置 + :param data: session配置字典 + :return: None + """ + if self._session is None: + self._session = Session() + + if 'headers' in data: + self._session.headers = CaseInsensitiveDict(data['headers']) + if 'cookies' in data: + self.set_cookies(data['cookies'], set_session=True) + + attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' + for i in attrs: + if i in data: + self._session.__setattr__(i, data[i]) + + def cookies_to_session(self, copy_user_agent=False): + """把driver对象的cookies复制到session对象 + :param copy_user_agent: 是否复制ua信息 + :return: None + """ + if copy_user_agent: + user_agent_to_session(self.driver, self.session) + + self.set_cookies(self.driver.get_cookies(), set_session=True) + + def cookies_to_driver(self, url): + """把session对象的cookies复制到driver对象 + :param url: 作用域 + :return: None + """ + browser_domain = extract(self.driver.current_url).fqdn + ex_url = extract(url) + + if ex_url.fqdn not in browser_domain: + self.driver.get(url) + + domain = f'{ex_url.domain}.{ex_url.suffix}' + + cookies = [] + for cookie in self.session.cookies: + if cookie.domain == '': + cookie.domain = domain + + if domain in cookie.domain: + cookies.append(cookie) + + self.set_cookies(cookies, set_driver=True) + + def close_driver(self, kill=False): + """关闭driver和浏览器""" + if self._driver: + kill_progress(port=self._driver.service.port) # 关闭chromedriver.exe进程 + + if kill: + self.kill_browser() + else: + self._driver.quit() + + self._driver = None + + def close_session(self): + """关闭session""" + if self._session: + self._session.close() + self._session = None + + def close(self): + """关闭session、driver和浏览器""" + if self._driver: + self.close_driver() + + if self._session: + self.close_session() + + +def user_agent_to_session(driver, session): + """把driver的user-agent复制到session + :param driver: 来源driver对象 + :param session: 目标session对象 + :return: None + """ + driver = driver + session = session + selenium_user_agent = driver.execute_script("return navigator.userAgent;") + session.headers.update({"User-Agent": selenium_user_agent}) + + +def create_driver(chrome_path, driver_path, options): + """创建 WebDriver 对象 + :param chrome_path: chrome.exe 路径 + :param driver_path: chromedriver.exe 路径 + :param options: Options 对象 + :return: WebDriver 对象 + """ + try: + debugger_address = options.debugger_address + if options.debugger_address: + options = Options() + options.debugger_address = debugger_address + + return webdriver.Chrome(driver_path, options=options) + + # 若版本不对,获取对应 chromedriver 再试 + except (WebDriverException, SessionNotCreatedException): + print('打开失败,尝试获取driver。\n') + from DrissionPage.easy_set import get_match_driver, get_chrome_path + + if chrome_path == 'chrome': + chrome_path = get_chrome_path(show_msg=False, from_ini=False) + + if chrome_path: + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=True) + if driver_path: + try: + options.binary_location = chrome_path + return webdriver.Chrome(driver_path, options=options) + except Exception: + pass + + print('无法启动,请检查浏览器路径,或手动设置chromedriver。\n下载地址:http://npm.taobao.org/mirrors/chromedriver/') + exit(0) + + +def get_chrome_hwnds_from_pid(pid): + """通过PID查询句柄ID""" + try: + from win32gui import IsWindow, GetWindowText, EnumWindows + from win32process import GetWindowThreadProcessId + except ImportError: + raise ImportError('请先安装win32gui,pip install pypiwin32') + + def callback(hwnd, hds): + if IsWindow(hwnd) and '- Google Chrome' in GetWindowText(hwnd): + _, found_pid = GetWindowThreadProcessId(hwnd) + if str(found_pid) == str(pid): + hds.append(hwnd) + return True + + hwnds = [] + EnumWindows(callback, hwnds) + return hwnds + + +def kill_progress(pid=None, port=None): + """关闭浏览器进程 + :param pid: 进程id + :param port: 端口号,如没有进程id,从端口号获取 + :return: 是否成功 + """ + from os import popen + if system().lower() != 'windows': + return False + + pid = pid or get_pid_from_port(port) + if not pid: + return False + + if popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'): + popen(f'taskkill /pid {pid} /F') + return True + else: + return False \ No newline at end of file diff --git a/DrissionPage/mixpage/drission.pyi b/DrissionPage/mixpage/drission.pyi new file mode 100644 index 0000000..3079bca --- /dev/null +++ b/DrissionPage/mixpage/drission.pyi @@ -0,0 +1,96 @@ +# -*- encoding: utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from subprocess import Popen +from typing import Union + +from requests import Session +from requests.cookies import RequestsCookieJar +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver + +from DrissionPage.configs.driver_options import DriverOptions +from DrissionPage.configs.session_options import SessionOptions + + +class Drission(object): + + def __init__(self, + driver_or_options: Union[RemoteWebDriver, Options, DriverOptions, bool] = None, + session_or_options: Union[Session, dict, SessionOptions, bool] = None, + ini_path: str = None, + proxy: dict = None): + self._session: Session = ... + self._session_options: dict = ... + self._proxy: dict = ... + self._driver: WebDriver = ... + self._debugger: Popen = ... + self._driver_options: DriverOptions = ... + + def __del__(self): ... + + @property + def session(self) -> Session: ... + + @property + def driver(self) -> WebDriver: ... + + @property + def driver_options(self) -> Union[DriverOptions, Options]: ... + + @property + def session_options(self) -> dict: ... + + @session_options.setter + def session_options(self, options: Union[dict, SessionOptions]) -> None: ... + + @property + def proxy(self) -> Union[None, dict]: ... + + @proxy.setter + def proxy(self, proxies: dict = None) -> None: ... + + @property + def debugger_progress(self): ... + + def kill_browser(self) -> None: ... + + def get_browser_progress_id(self) -> Union[str, None]: ... + + def hide_browser(self) -> None: ... + + def show_browser(self) -> None: ... + + def _show_or_hide_browser(self, hide: bool = True) -> None: ... + + def set_cookies(self, + cookies: Union[RequestsCookieJar, list, tuple, str, dict], + set_session: bool = False, + set_driver: bool = False) -> None: ... + + def _set_session(self, data: dict) -> None: ... + + def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... + + def cookies_to_driver(self, url: str) -> None: ... + + def close_driver(self, kill: bool = False) -> None: ... + + def close_session(self) -> None: ... + + def close(self) -> None: ... + + +def user_agent_to_session(driver: RemoteWebDriver, session: Session) -> None: ... + + +def create_driver(chrome_path: str, driver_path: str, options: Options) -> WebDriver: ... + + +def get_chrome_hwnds_from_pid(pid: str) -> list: ... + + +def kill_progress(pid: str = None, port: int = None) -> bool: ... diff --git a/DrissionPage/mixpage/driver_element.py b/DrissionPage/mixpage/driver_element.py new file mode 100644 index 0000000..d083961 --- /dev/null +++ b/DrissionPage/mixpage/driver_element.py @@ -0,0 +1,1264 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from os import sep +from pathlib import Path +from time import time, perf_counter, sleep + +from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException, \ + NoSuchElementException +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support import expected_conditions as ec +from selenium.webdriver.support.wait import WebDriverWait + +from .base import DrissionElement, BaseElement +from DrissionPage.commons.locator import str_to_loc, get_loc +from DrissionPage.commons.tools import get_usable_path +from DrissionPage.commons.web import format_html, get_ele_txt +from .session_element import make_session_ele + + +class DriverElement(DrissionElement): + """driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能""" + + def __init__(self, ele, page=None): + """初始化对象 + :param ele: 被包装的WebElement元素 + :param page: 元素所在页面 + """ + super().__init__(page) + self._select = None + self._scroll = None + self._inner_ele = ele + + def __repr__(self): + attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] + return f'' + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间 + :return: DriverElement对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) + + # -----------------共有属性和方法------------------- + @property + def inner_ele(self): + return self._inner_ele + + @property + def tag(self): + """返回元素类型""" + return self._inner_ele.tag_name.lower() + + @property + def html(self): + """返回元素outerHTML文本""" + return self.inner_ele.get_attribute('outerHTML') + + @property + def inner_html(self): + """返回元素innerHTML文本""" + return self.inner_ele.get_attribute('innerHTML') + + @property + def attrs(self): + """返回元素所有属性及值""" + js = ''' + var dom=arguments[0]; + var names="("; + var len = dom.attributes.length; + for(var i=0;i1){path = '/' + tag + '[' + nth + ']' + path;} + else{path = '/' + tag + path;}''' + txt5 = '''return path;''' + + elif mode == 'css': + txt1 = '' + # txt2 = '''return '#' + el.id + path;''' + txt3 = '' + txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' + txt5 = '''return path.substr(1);''' + + else: + raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") + + js = ''' + function e(el) { + if (!(el instanceof Element)) return; + var path = ''; + while (el.nodeType === Node.ELEMENT_NODE) { + ''' + txt1 + ''' + var sib = el, nth = 0; + while (sib) { + if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} + sib = sib.previousSibling; + } + ''' + txt4 + ''' + el = el.parentNode; + } + ''' + txt5 + ''' + } + return e(arguments[0]); + ''' + res_txt = self.run_script(js) + return f':root{res_txt}' if mode == 'css' else res_txt + + # -----------------driver独有属性和方法------------------- + @property + def size(self): + """返回元素宽和高""" + return self.inner_ele.size + + @property + def location(self): + """返回元素左上角坐标""" + return self.inner_ele.location + + @property + def shadow_root(self): + """返回当前元素的shadow_root元素对象""" + shadow = self.run_script('return arguments[0].shadowRoot') + if shadow: + from .shadow_root_element import ShadowRootElement + return ShadowRootElement(shadow, self) + + @property + def sr(self): + """返回当前元素的shadow_root元素对象""" + return self.shadow_root + + @property + def pseudo_before(self): + """返回当前元素的::before伪元素内容""" + return self.style('content', 'before') + + @property + def pseudo_after(self): + """返回当前元素的::after伪元素内容""" + return self.style('content', 'after') + + @property + def select(self): + """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" + if self._select is None: + if self.tag != 'select': + self._select = False + else: + self._select = Select(self) + + return self._select + + @property + def scroll(self): + """用于滚动滚动条的对象""" + if self._scroll is None: + self._scroll = Scroll(self) + return self._scroll + + def parent(self, level_or_loc=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :return: 上级元素对象 + """ + return super().parent(level_or_loc) + + def prev(self, index=1, filter_loc='', timeout=0): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 前面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + index, filter_loc = _exchange_arguments(index, filter_loc) + return super().prev(index, filter_loc, timeout) + + def next(self, index=1, filter_loc='', timeout=0): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 后面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + index, filter_loc = _exchange_arguments(index, filter_loc) + return super().next(index, filter_loc, timeout) + + def before(self, index=1, filter_loc='', timeout=None): + """返回当前元素前面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 + :param index: 前面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的某个元素或节点 + """ + index, filter_loc = _exchange_arguments(index, filter_loc) + return super().before(index, filter_loc, timeout) + + def after(self, index=1, filter_loc='', timeout=None): + """返回当前元素后面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 + :param index: 后面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的某个元素或节点 + """ + index, filter_loc = _exchange_arguments(index, filter_loc) + return super().after(index, filter_loc, timeout) + + def prevs(self, filter_loc='', timeout=0): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().prevs(filter_loc, timeout) + + def nexts(self, filter_loc='', timeout=0): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().nexts(filter_loc, timeout) + + def befores(self, filter_loc='', timeout=None): + """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的元素或节点组成的列表 + """ + return super().befores(filter_loc, timeout) + + def afters(self, filter_loc='', timeout=None): + """返回当前元素前面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的元素或节点组成的列表 + """ + return super().afters(filter_loc, timeout) + + def left(self, index=1, filter_loc=''): + """获取网页上显示在当前元素左边的某个元素,可设置选取条件,可指定结果中第几个 + :param index: 获取第几个 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + eles = self._get_relative_eles('left', filter_loc) + return eles[index - 1] if index <= len(eles) else None + + def right(self, index=1, filter_loc=''): + """获取网页上显示在当前元素右边的某个元素,可设置选取条件,可指定结果中第几个 + :param index: 获取第几个 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + eles = self._get_relative_eles('right', filter_loc) + return eles[index - 1] if index <= len(eles) else None + + def above(self, index=1, filter_loc=''): + """获取网页上显示在当前元素上边的某个元素,可设置选取条件,可指定结果中第几个 + :param index: 获取第几个 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + eles = self._get_relative_eles('left', filter_loc) + return eles[index - 1] if index <= len(eles) else None + + def below(self, index=1, filter_loc=''): + """获取网页上显示在当前元素下边的某个元素,可设置选取条件,可指定结果中第几个 + :param index: 获取第几个 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + eles = self._get_relative_eles('left', filter_loc) + return eles[index - 1] if index <= len(eles) else None + + def near(self, index=1, filter_loc=''): + """获取网页上显示在当前元素最近的某个元素,可设置选取条件,可指定结果中第几个 + :param index: 获取第几个 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + eles = self._get_relative_eles('near', filter_loc) + return eles[index - 1] if index <= len(eles) else None + + def lefts(self, filter_loc=''): + """获取网页上显示在当前元素左边的所有元素,可设置选取条件,从近到远排列 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象组成的列表 + """ + return self._get_relative_eles('left', filter_loc) + + def rights(self, filter_loc=''): + """获取网页上显示在当前元素右边的所有元,可设置选取条件,从近到远排列 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象组成的列表 + """ + return self._get_relative_eles('right', filter_loc) + + def aboves(self, filter_loc=''): + """获取网页上显示在当前元素上边的所有元素,可设置选取条件,从近到远排列 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象组成的列表 + """ + return self._get_relative_eles('left', filter_loc) + + def belows(self, filter_loc=''): + """获取网页上显示在当前元素下边的所有元素,可设置选取条件,从近到远排列 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象组成的列表 + """ + return self._get_relative_eles('left', filter_loc) + + def nears(self, filter_loc=''): + """获取网页上显示在当前元素附近元素,可设置选取条件,从近到远排列 + :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象组成的列表 + """ + return self._get_relative_eles('near', filter_loc) + + def wait_ele(self, loc_or_ele, timeout=None): + """等待子元素从dom删除、显示、隐藏 + :param loc_or_ele: 可以是元素、查询字符串、loc元组 + :param timeout: 等待超时时间 + :return: 等待是否成功 + """ + return ElementWaiter(self, loc_or_ele, timeout) + + def style(self, style, pseudo_ele=''): + """返回元素样式属性值,可获取伪元素属性值 + :param style: 样式属性名称 + :param pseudo_ele: 伪元素名称(如有) + :return: 样式属性的值 + """ + if pseudo_ele: + pseudo_ele = f', "{pseudo_ele}"' if pseudo_ele.startswith(':') else f', "::{pseudo_ele}"' + r = self.run_script(f'return window.getComputedStyle(arguments[0]{pseudo_ele}).getPropertyValue("{style}");') + + return None if r == 'none' else r + + def click(self, by_js=None, timeout=None): + """点击元素 + 尝试点击直到超时,若都失败就改用js点击 + :param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js + :param timeout: 尝试点击的超时时间,不指定则使用父页面的超时时间 + :return: 是否点击成功 + """ + + def do_it() -> bool: + try: + self.inner_ele.click() + return True + except Exception: + return False + + if not by_js: + timeout = timeout if timeout is not None else self.page.timeout + t1 = perf_counter() + click = do_it() + while not click and perf_counter() - t1 <= timeout: + click = do_it() + + if click: + return True + + # 若点击失败,用js方式点击 + if by_js is not False: + self.run_script('arguments[0].click()') + return True + + return False + + def click_at(self, x=None, y=None, by_js=False): + """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 + :param x: 相对元素左上角坐标的x轴偏移量 + :param y: 相对元素左上角坐标的y轴偏移量 + :param by_js: 是否用js点击 + :return: None + """ + if by_js: + x = self.location['x'] + int(x) if x is not None else self.location['x'] + self.size['width'] // 2 + y = self.location['y'] + int(y) if y is not None else self.location['y'] + self.size['height'] // 2 + js = f""" + var ev = document.createEvent('HTMLEvents'); + ev.clientX = {x}; + ev.clientY = {y}; + ev.initEvent('click', false, true); + arguments[0].dispatchEvent(ev); + """ + self.run_script(js) + + else: + x = int(x) if x is not None else self.size['width'] // 2 + y = int(y) if y is not None else self.size['height'] // 2 + + from selenium.webdriver import ActionChains + ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform() + + def r_click(self): + """右键单击""" + from selenium.webdriver import ActionChains + ActionChains(self.page.driver).context_click(self.inner_ele).perform() + + def r_click_at(self, x=None, y=None): + """带偏移量右键单击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 + :param x: 相对元素左上角坐标的x轴偏移量 + :param y: 相对元素左上角坐标的y轴偏移量 + :return: None + """ + x = int(x) if x is not None else self.size['width'] // 2 + y = int(y) if y is not None else self.size['height'] // 2 + from selenium.webdriver import ActionChains + ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).context_click().perform() + + def input(self, vals, clear=True, insure=True, timeout=None): + """输入文本或组合键,也可用于输入文件路径到input元素(文件间用\n间隔) + :param vals: 文本值或按键组合 + :param clear: 输入前是否清空文本框 + :param insure: 确保输入正确,解决文本框有时输入失效的问题,不能用于输入组合键 + :param timeout: 尝试输入的超时时间,不指定则使用父页面的超时时间,只在insure为True时生效 + :return: bool + """ + if not insure or self.tag != 'input' or self.prop('type') != 'text': # 普通输入 + if not isinstance(vals, (str, tuple)): + vals = str(vals) + if clear: + self.inner_ele.clear() + + self.inner_ele.send_keys(*vals) + return True + + else: # 确保输入正确 + if not isinstance(vals, str): + vals = str(vals) + enter = '\n' if vals.endswith('\n') else None + full_txt = vals if clear else f'{self.attr("value")}{vals}' + full_txt = full_txt.rstrip('\n') + + self.click(by_js=True) + timeout = timeout if timeout is not None else self.page.timeout + t1 = perf_counter() + while self.is_valid() and self.attr('value') != full_txt and perf_counter() - t1 <= timeout: + try: + if clear: + self.inner_ele.send_keys(u'\ue009', 'a', u'\ue017') # 有些ui下clear()不生效,用CTRL+a代替 + self.inner_ele.send_keys(vals) + + except Exception: + pass + + if not self.is_valid(): + return False + else: + if self.attr('value') != full_txt: + return False + else: + if enter: + self.inner_ele.send_keys(enter) + return True + + def run_script(self, script, *args): + """执行js代码,代码中用arguments[0]表示自己 + :param script: js文本 + :param args: 传入的参数 + :return: js执行结果 + """ + return self.inner_ele.parent.execute_script(script, self.inner_ele, *args) + + def submit(self): + """提交表单""" + try: + self.inner_ele.submit() + return True + except Exception: + pass + + def clear(self, insure=True): + """清空元素文本 + :param insure: 是否确保清空 + :return: 是否清空成功,不能清空的元素返回None + """ + if insure: + return self.input('') + + else: + try: + self.inner_ele.clear() + return True + except InvalidElementStateException: + return None + + def is_selected(self): + """是否选中""" + return self.inner_ele.is_selected() + + def is_enabled(self): + """是否可用""" + return self.inner_ele.is_enabled() + + def is_displayed(self): + """是否可见""" + return self.inner_ele.is_displayed() + + def is_valid(self): + """用于判断元素是否还在DOM内,应对页面跳转元素不能用的情况""" + try: + self.is_enabled() + return True + except Exception: + return False + + def screenshot(self, path=None, filename=None, as_bytes=False): + """对元素进行截图 + :param path: 保存路径 + :param filename: 图片文件名,不传入时以元素tag name命名 + :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 + :return: 图片完整路径或字节文本 + """ + # 等待元素加载完成 + if self.tag == 'img': + js = ('return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" ' + '&& arguments[0].naturalWidth > 0 && typeof arguments[0].naturalHeight != "undefined" ' + '&& arguments[0].naturalHeight > 0') + t1 = perf_counter() + while not self.run_script(js) and perf_counter() - t1 < self.page.timeout: + sleep(.1) + + if as_bytes: + return self.inner_ele.screenshot_as_png + + name = filename or self.tag + path = Path(path or '.').absolute() + path.mkdir(parents=True, exist_ok=True) + if not name.lower().endswith('.png'): + name = f'{name}.png' + + img_path = str(get_usable_path(f'{path}{sep}{name}')) + self.inner_ele.screenshot(img_path) + + return img_path + + def prop(self, prop): + """获取property属性值 + :param prop: 属性名 + :return: 属性值文本 + """ + return format_html(self.inner_ele.get_property(prop)) + + def set_prop(self, prop, value): + """设置元素property属性 + :param prop: 属性名 + :param value: 属性值 + :return: 是否设置成功 + """ + try: + value = value.replace("'", "\\'") + self.run_script(f"arguments[0].{prop}='{value}';") + return True + except Exception: + return False + + def set_attr(self, attr, value): + """设置元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: 是否设置成功 + """ + try: + self.run_script(f"arguments[0].setAttribute(arguments[1], arguments[2]);", attr, value) + return True + except Exception: + return False + + def remove_attr(self, attr): + """删除元素attribute属性 + :param attr: 属性名 + :return: 是否删除成功 + """ + try: + self.run_script(f'arguments[0].removeAttribute("{attr}");') + return True + except Exception: + return False + + def drag(self, x, y, speed=40, shake=True): + """拖拽当前元素到相对位置 + :param x: x变化值 + :param y: y变化值 + :param speed: 拖动的速度,传入0即瞬间到达 + :param shake: 是否随机抖动 + :return: None + """ + x += self.location['x'] + self.size['width'] // 2 + y += self.location['y'] + self.size['height'] // 2 + self.drag_to((x, y), speed, shake) + + def drag_to(self, ele_or_loc, speed=40, shake=True): + """拖拽当前元素,目标为另一个元素或坐标元组 + :param ele_or_loc: 另一个元素或坐标元组,坐标为元素中点的坐标 + :param speed: 拖动的速度,传入0即瞬间到达 + :param shake: 是否随机抖动 + :return: None + """ + # x, y:目标点坐标 + if isinstance(ele_or_loc, (DriverElement, WebElement)): + target_x = ele_or_loc.location['x'] + ele_or_loc.size['width'] // 2 + target_y = ele_or_loc.location['y'] + ele_or_loc.size['height'] // 2 + elif isinstance(ele_or_loc, tuple): + target_x, target_y = ele_or_loc + else: + raise TypeError('需要DriverElement、WebElement对象或坐标。') + + current_x = self.location['x'] + self.size['width'] // 2 + current_y = self.location['y'] + self.size['height'] // 2 + width = target_x - current_x + height = target_y - current_y + num = 0 if not speed else int(((abs(width) ** 2 + abs(height) ** 2) ** .5) // speed) + + # 将要经过的点存入列表 + points = [(int(current_x + i * (width / num)), int(current_y + i * (height / num))) for i in range(1, num)] + points.append((target_x, target_y)) + + from selenium.webdriver import ActionChains + from random import randint + actions = ActionChains(self.page.driver) + actions.click_and_hold(self.inner_ele) + + # 逐个访问要经过的点 + for x, y in points: + if shake: + x += randint(-3, 4) + y += randint(-3, 4) + actions.move_by_offset(x - current_x, y - current_y) + current_x, current_y = x, y + actions.release().perform() + + def hover(self, x=None, y=None): + """鼠标悬停,可接受偏移量,偏移量相对于元素左上角坐标。不传入x或y值时悬停在元素中点 + :param x: 相对元素左上角坐标的x轴偏移量 + :param y: 相对元素左上角坐标的y轴偏移量 + :return: None + """ + from selenium.webdriver import ActionChains + x = int(x) if x is not None else self.size['width'] // 2 + y = int(y) if y is not None else self.size['height'] // 2 + ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).perform() + + def _get_relative_eles(self, mode, loc=''): + """获取网页上相对于当前元素周围的某个元素,可设置选取条件 + :param mode: 可选:'left', 'right', 'above', 'below', 'near' + :param loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 + :return: DriverElement对象 + """ + from selenium.webdriver.support.relative_locator import RelativeBy + + if isinstance(loc, str): + loc = str_to_loc(loc) + + try: + if mode == 'left': + eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_left_of(self.inner_ele)) + elif mode == 'right': + eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_right_of(self.inner_ele)) + elif mode == 'above': + eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).above(self.inner_ele)) + elif mode == 'below': + eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).below(self.inner_ele)) + else: # 'near' + eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).near(self.inner_ele)) + + return [self.page.ele(e) for e in eles] + + except IndexError: + raise ValueError('未找到元素,请检查浏览器版本,低版本的浏览器无法使用此方法。') + + +def make_driver_ele(page_or_ele, loc, single=True, timeout=None): + """执行driver模式元素的查找 + 页面查找元素及元素查找下级元素皆使用此方法 + :param page_or_ele: DriverPage对象或DriverElement对象 + :param loc: 元素定位元组 + :param single: True则返回第一个,False则返回全部 + :param timeout: 查找元素超时时间 + :return: 返回DriverElement元素或它们组成的列表 + """ + # ---------------处理定位符--------------- + if isinstance(loc, (str, tuple)): + loc = get_loc(loc) + + elif str(type(loc)).endswith('RelativeBy'): + page = page_or_ele.page if isinstance(page_or_ele, BaseElement) else page_or_ele + driver = page.driver + eles = driver.find_elements(loc) + return DriverElement(eles[0], page) if single else [DriverElement(ele, page) for ele in eles] + + else: + raise ValueError("定位符必须为str、长度为2的tuple、或RelativeBy对象。") + + # ---------------设置 page 和 driver--------------- + if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc_str}' + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>') and isinstance(page_or_ele, DriverElement): + loc_str = f'{page_or_ele.css_path}{loc[1]}' + loc = loc[0], loc_str + + page = page_or_ele.page + driver = page_or_ele.inner_ele + + else: # 传入的是DriverPage对象 + page = page_or_ele + driver = page_or_ele.driver + + # -----------------设置等待对象----------------- + if timeout is not None and timeout != page.timeout: + wait = WebDriverWait(driver, timeout=timeout) + else: + page.wait_object._driver = driver + wait = page.wait_object + + # ---------------执行查找----------------- + try: + # 使用xpath查找 + if loc[0] == 'xpath': + return wait.until(ElementsByXpath(page, loc[1], single, timeout)) + + # 使用css selector查找 + else: + if single: + return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) + else: + eles = wait.until(ec.presence_of_all_elements_located(loc)) + return [DriverElement(ele, page) for ele in eles] + + except TimeoutException: + return [] if not single else None + + except InvalidElementStateException: + raise ValueError(f'无效的查找语句:{loc}') + + +class ElementsByXpath(object): + """用js通过xpath获取元素、节点或属性,与WebDriverWait配合使用""" + + def __init__(self, page, xpath=None, single=False, timeout=10): + """ + :param page: DrissionPage对象 + :param xpath: xpath文本 + :param single: True则返回第一个,False则返回全部 + :param timeout: 超时时间 + """ + self.page = page + self.xpath = xpath + self.single = single + self.timeout = timeout + + def __call__(self, ele_or_driver): + + def get_nodes(node=None, xpath_txt=None, type_txt='7'): + """用js通过xpath获取元素、节点或属性 + :param node: 'document' 或 元素对象 + :param xpath_txt: xpath语句 + :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate + :return: 元素对象或属性、文本字符串 + """ + node_txt = 'document' if not node or node == 'document' else 'arguments[0]' + for_txt = '' + + # 获取第一个元素、节点或属性 + if type_txt == '9': + return_txt = ''' + if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} + else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} + else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} + else{return e.singleNodeValue;} + ''' + + # 按顺序获取所有元素、节点或属性 + elif type_txt == '7': + for_txt = """ + var a=new Array(); + for(var i = 0; i 元素使用,现在是:{ele.tag}。") + + from selenium.webdriver.support.select import Select as SeleniumSelect + self.inner_ele = ele + self.select_ele = SeleniumSelect(ele.inner_ele) + + def __call__(self, text_or_index, timeout=None): + """选定下拉列表中子元素 + :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: None + """ + timeout = timeout if timeout is not None else self.inner_ele.page.timeout + return self.select(text_or_index, timeout=timeout) + + @property + def is_multi(self): + """返回是否多选表单""" + return self.select_ele.is_multiple + + @property + def options(self): + """返回所有选项元素组成的列表""" + return self.inner_ele.eles('tag:option') + + @property + def selected_option(self): + """返回第一个被选中的option元素 + :return: DriverElement对象或None + """ + ele = self.inner_ele.run_script('return arguments[0].options[arguments[0].selectedIndex];') + return None if ele is None else DriverElement(ele, self.inner_ele.page) + + @property + def selected_options(self): + """返回所有被选中的option元素列表 + :return: DriverElement对象组成的列表 + """ + return [x for x in self.options if x.is_selected()] + + def clear(self): + """清除所有已选项""" + self.select_ele.deselect_all() + + def select(self, text_or_index, timeout=None): + """选定下拉列表中子元素 + :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: 是否选择成功 + """ + i = 'index' if isinstance(text_or_index, int) else 'text' + timeout = timeout if timeout is not None else self.inner_ele.page.timeout + return self._select(text_or_index, i, False, timeout) + + def select_by_value(self, value, timeout=None): + """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple + :param value: value属性值,传入list或tuple可选择多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: None + """ + timeout = timeout if timeout is not None else self.inner_ele.page.timeout + return self._select(value, 'value', False, timeout) + + def deselect(self, text_or_index, timeout=None): + """取消选定下拉列表中子元素 + :param text_or_index: 根据文本或序号取消择选项,若允许多选,传入list或tuple可取消多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: None + """ + i = 'index' if isinstance(text_or_index, int) else 'text' + timeout = timeout if timeout is not None else self.inner_ele.page.timeout + return self._select(text_or_index, i, True, timeout) + + def deselect_by_value(self, value, timeout=None): + """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple + :param value: value属性值,传入list或tuple可取消多项 + :param timeout: 超时时间,不输入默认实用页面超时时间 + :return: None + """ + timeout = timeout if timeout is not None else self.inner_ele.page.timeout + return self._select(value, 'value', True, timeout) + + def invert(self): + """反选""" + if not self.is_multi: + raise NotImplementedError("只能对多项选框执行反选。") + + for i in self.options: + i.click(by_js=True) + + def _select(self, text_value_index, para_type='text', deselect=False, timeout=None): + """选定或取消选定下拉列表中子元素 + :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param deselect: 是否取消选择 + :return: 是否选择成功 + """ + if not self.is_multi and isinstance(text_value_index, (list, tuple)): + raise TypeError('单选下拉列表不能传入list和tuple') + + def do_select(): + try: + if para_type == 'text': + if deselect: + self.select_ele.deselect_by_visible_text(text_value_index) + else: + self.select_ele.select_by_visible_text(text_value_index) + elif para_type == 'value': + if deselect: + self.select_ele.deselect_by_value(text_value_index) + else: + self.select_ele.select_by_value(text_value_index) + elif para_type == 'index': + if deselect: + self.select_ele.deselect_by_index(int(text_value_index)) + else: + self.select_ele.select_by_index(int(text_value_index)) + else: + raise ValueError('para_type参数只能传入"text"、"value"或"index"。') + + return True + + except NoSuchElementException: + return False + + if isinstance(text_value_index, (str, int)): + t1 = perf_counter() + ok = do_select() + while not ok and perf_counter() - t1 < timeout: + sleep(.2) + ok = do_select() + return ok + + elif isinstance(text_value_index, (list, tuple)): + return self._select_multi(text_value_index, para_type, deselect) + + else: + raise TypeError('只能传入str、int、list和tuple类型。') + + def _select_multi(self, text_value_index=None, para_type='text', deselect=False) -> bool: + """选定或取消选定下拉列表中多个子元素 + :param text_value_index: 根据文本、值选或序号择选多项 + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param deselect: 是否取消选择 + :return: 是否选择成功 + """ + if para_type not in ('text', 'value', 'index'): + raise ValueError('para_type参数只能传入“text”、“value”或“index”') + + if not isinstance(text_value_index, (list, tuple)): + raise TypeError('只能传入list或tuple类型。') + + success = True + for i in text_value_index: + if not isinstance(i, (int, str)): + raise TypeError('列表只能由str或int组成') + + p = 'index' if isinstance(i, int) else para_type + if not self._select(i, p, deselect): + success = False + + return success + + +class ElementWaiter(object): + """等待元素在dom中某种状态,如删除、显示、隐藏""" + + def __init__(self, page_or_ele, loc_or_ele, timeout=None): + """等待元素在dom中某种状态,如删除、显示、隐藏 + :param page_or_ele: 页面或父元素 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + """ + if isinstance(page_or_ele, DriverElement): + page = page_or_ele.page + self.driver = page_or_ele.inner_ele + else: + page = page_or_ele + self.driver = page_or_ele.driver + + if isinstance(loc_or_ele, DriverElement): + self.target = loc_or_ele.inner_ele + + elif isinstance(loc_or_ele, WebElement): + self.target = loc_or_ele + + elif isinstance(loc_or_ele, str): + self.target = str_to_loc(loc_or_ele) + + elif isinstance(loc_or_ele, tuple): + self.target = loc_or_ele + + else: + raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型。') + + self.timeout = timeout if timeout is not None else page.timeout + + def delete(self): + """等待元素从dom删除""" + return self._wait_ele('del') + + def display(self): + """等待元素从dom显示""" + return self._wait_ele('display') + + def hidden(self): + """等待元素从dom隐藏""" + return self._wait_ele('hidden') + + def _wait_ele(self, mode): + """执行等待 + :param mode: 等待模式 + :return: 是否等待成功 + """ + if isinstance(self.target, WebElement): + end_time = time() + self.timeout + while time() < end_time: + if mode == 'del': + try: + self.target.is_enabled() + except Exception: + return True + + elif mode == 'display' and self.target.is_displayed(): + return True + + elif mode == 'hidden' and not self.target.is_displayed(): + return True + + return False + + else: + try: + if mode == 'del': + WebDriverWait(self.driver, self.timeout).until_not(ec.presence_of_element_located(self.target)) + + elif mode == 'display': + WebDriverWait(self.driver, self.timeout).until(ec.visibility_of_element_located(self.target)) + + elif mode == 'hidden': + WebDriverWait(self.driver, self.timeout).until_not(ec.visibility_of_element_located(self.target)) + + return True + + except Exception: + return False + + +class Scroll(object): + """用于滚动的对象""" + + def __init__(self, page_or_ele): + """ + :param page_or_ele: DriverPage或DriverElement + """ + self.driver = page_or_ele + if isinstance(page_or_ele, DriverElement): + self.t1 = self.t2 = 'arguments[0]' + else: + self.t1 = 'window' + self.t2 = 'document.documentElement' + + def to_top(self): + """滚动到顶端,水平位置不变""" + self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,0);') + + def to_bottom(self): + """滚动到底端,水平位置不变""" + self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight);') + + def to_half(self): + """滚动到垂直中间位置,水平位置不变""" + self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight/2);') + + def to_rightmost(self): + """滚动到最右边,垂直位置不变""" + self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollWidth,{self.t2}.scrollTop);') + + def to_leftmost(self): + """滚动到最左边,垂直位置不变""" + self.driver.run_script(f'{self.t1}.scrollTo(0,{self.t2}.scrollTop);') + + def to_location(self, x, y): + """滚动到指定位置 + :param x: 水平距离 + :param y: 垂直距离 + :return: None + """ + self.driver.run_script(f'{self.t1}.scrollTo({x},{y});') + + def up(self, pixel=300): + """向上滚动若干像素,水平位置不变 + :param pixel: 滚动的像素 + :return: None + """ + pixel = -pixel + self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') + + def down(self, pixel=300): + """向下滚动若干像素,水平位置不变 + :param pixel: 滚动的像素 + :return: None + """ + self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') + + def left(self, pixel=300): + """向左滚动若干像素,垂直位置不变 + :param pixel: 滚动的像素 + :return: None + """ + pixel = -pixel + self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') + + def right(self, pixel=300): + """向右滚动若干像素,垂直位置不变 + :param pixel: 滚动的像素 + :return: None + """ + self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') + + +def _exchange_arguments(index, filter_loc): + # 此方法用于兼容MixPage参数顺序相反的情况 + if isinstance(index, str) and isinstance(filter_loc, int): + index, filter_loc = filter_loc, index + elif isinstance(index, int) and filter_loc == 1: + filter_loc = '' + elif isinstance(filter_loc, str) and index == '': + index = 1 + return index, filter_loc diff --git a/DrissionPage/mixpage/driver_element.pyi b/DrissionPage/mixpage/driver_element.pyi new file mode 100644 index 0000000..98fa815 --- /dev/null +++ b/DrissionPage/mixpage/driver_element.pyi @@ -0,0 +1,326 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union, List, Any, Tuple + +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.select import Select as SeleniumSelect + +from .driver_page import DriverPage +from .mix_page import MixPage +from .shadow_root_element import ShadowRootElement +from .base import DrissionElement +from .session_element import SessionElement + + +class DriverElement(DrissionElement): + + def __init__(self, ele: WebElement, page: Union[DriverPage, MixPage] = None): + self._inner_ele: WebElement = ... + self._select: Select = ... + self._scroll: Scroll = ... + self.page: Union[DriverPage, MixPage] = ... + + def __repr__(self) -> str: ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['DriverElement', str, None]: ... + + # -----------------共有属性和方法------------------- + @property + def inner_ele(self) -> WebElement: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + @property + def attrs(self) -> dict: ... + + @property + def text(self) -> str: ... + + @property + def raw_text(self) -> str: ... + + def attr(self, attr: str) -> str: ... + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['DriverElement', str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union['DriverElement', str]]: ... + + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + single: bool = True, + relative: bool = False) -> Union['DriverElement', str, None, List[Union['DriverElement', str]]]: ... + + def _get_ele_path(self, mode) -> str: ... + + # -----------------driver独有属性和方法------------------- + @property + def size(self) -> dict: ... + + @property + def location(self) -> dict: ... + + @property + def shadow_root(self) -> ShadowRootElement: ... + + @property + def sr(self) -> ShadowRootElement: ... + + @property + def pseudo_before(self) -> str: ... + + @property + def pseudo_after(self) -> str: ... + + @property + def select(self) -> Select: ... + + @property + def scroll(self) -> Scroll: ... + + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['DriverElement', None]: ... + + def prev(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union['DriverElement', str, None]: ... + + def next(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union['DriverElement', str, None]: ... + + def before(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union['DriverElement', str, None]: ... + + def after(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union['DriverElement', str, None]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union['DriverElement', str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union['DriverElement', str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['DriverElement', str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['DriverElement', str]]: ... + + def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> DriverElement: ... + + def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... + + def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... + + def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... + + def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... + + def lefts(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... + + def rights(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... + + def aboves(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... + + def belows(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... + + def nears(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... + + def wait_ele(self, + loc_or_ele: Union[str, tuple, DrissionElement, WebElement], + timeout: float = None) -> 'ElementWaiter': ... + + def style(self, style: str, pseudo_ele: str = '') -> str: ... + + def click(self, by_js: bool = None, timeout: float = None) -> bool: ... + + def click_at(self, + x: Union[int, str] = None, + y: Union[int, str] = None, + by_js: bool = False) -> None: ... + + def r_click(self) -> None: ... + + def r_click_at(self, x: Union[int, str] = None, y: Union[int, str] = None) -> None: ... + + def input(self, + vals: Union[str, tuple], + clear: bool = True, + insure: bool = True, + timeout: float = None) -> bool: ... + + def run_script(self, script: str, *args) -> Any: ... + + def submit(self) -> Union[bool, None]: ... + + def clear(self, insure: bool = True) -> Union[None, bool]: ... + + def is_selected(self) -> bool: ... + + def is_enabled(self) -> bool: ... + + def is_displayed(self) -> bool: ... + + def is_valid(self) -> bool: ... + + def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... + + def prop(self, prop: str) -> str: ... + + def set_prop(self, prop: str, value: str) -> bool: ... + + def set_attr(self, attr: str, value: str) -> bool: ... + + def remove_attr(self, attr: str) -> bool: ... + + def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> None: ... + + def drag_to(self, + ele_or_loc: Union[tuple, WebElement, DrissionElement], + speed: int = 40, + shake: bool = True) -> None: ... + + def hover(self, x: int = None, y: int = None) -> None: ... + + def _get_relative_eles(self, + mode: str, + loc: Union[tuple, str] = '') -> Union[List['DriverElement'], 'DriverElement']: ... + + +def make_driver_ele(page_or_ele: Union[DriverPage, MixPage, DriverElement, ShadowRootElement], + loc: Union[str, Tuple[str, str]], + single: bool = True, + timeout: float = None) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... + + +class ElementsByXpath(object): + + def __init__(self, page, xpath: str = None, single: bool = False, timeout: float = 10): + self.single: bool = ... + self.xpath: str = ... + self.page: Union[MixPage, DriverPage] = ... + + def __call__(self, ele_or_driver: Union[RemoteWebDriver, WebElement]) \ + -> Union[str, DriverElement, None, List[str or DriverElement]]: ... + + +class Select(object): + + def __init__(self, ele: DriverElement): + self.select_ele: SeleniumSelect = ... + self.inner_ele: DriverElement = ... + + def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... + + @property + def is_multi(self) -> bool: ... + + @property + def options(self) -> List[DriverElement]: ... + + @property + def selected_option(self) -> Union[DriverElement, None]: ... + + @property + def selected_options(self) -> List[DriverElement]: ... + + def clear(self) -> None: ... + + def select(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... + + def select_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def deselect(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... + + def deselect_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... + + def invert(self) -> None: ... + + def _select(self, + text_value_index: Union[str, int, list, tuple] = ..., + para_type: str = 'text', + deselect: bool = False, + timeout: float = None) -> bool: ... + + def _select_multi(self, + text_value_index: Union[list, tuple] = None, + para_type: str = 'text', + deselect: bool = False) -> bool: ... + + +class ElementWaiter(object): + + def __init__(self, + page_or_ele, + loc_or_ele: Union[str, tuple, DriverElement, WebElement], + timeout: float = None): + self.target: Union[DriverElement, WebElement, tuple] = ... + self.timeout: float = ... + self.driver: Union[WebElement, RemoteWebDriver] = ... + + def delete(self) -> bool: ... + + def display(self) -> bool: ... + + def hidden(self) -> bool: ... + + def _wait_ele(self, mode: str) -> bool: ... + + +class Scroll(object): + + def __init__(self, page_or_ele): + self.driver: Union[DriverElement, DriverPage] = ... + self.t1: str = ... + self.t2: str = ... + + def to_top(self) -> None: ... + + def to_bottom(self) -> None: ... + + def to_half(self) -> None: ... + + def to_rightmost(self) -> None: ... + + def to_leftmost(self) -> None: ... + + def to_location(self, x: int, y: int) -> None: ... + + def up(self, pixel: int = 300) -> None: ... + + def down(self, pixel: int = 300) -> None: ... + + def left(self, pixel: int = 300) -> None: ... + + def right(self, pixel: int = 300) -> None: ... diff --git a/DrissionPage/mixpage/driver_page.py b/DrissionPage/mixpage/driver_page.py new file mode 100644 index 0000000..2f9fcfd --- /dev/null +++ b/DrissionPage/mixpage/driver_page.py @@ -0,0 +1,611 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from glob import glob +from os import sep +from pathlib import Path +from time import sleep, perf_counter + +from selenium.common.exceptions import NoAlertPresentException +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.wait import WebDriverWait + +from .base import BasePage +from DrissionPage.commons.tools import get_usable_path +from .driver_element import DriverElement, make_driver_ele, Scroll, ElementWaiter +from .session_element import make_session_ele + + +class DriverPage(BasePage): + """DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页""" + + def __init__(self, driver, timeout=10): + """初始化函数,接收一个WebDriver对象,用来操作网页""" + super().__init__(timeout) + self._driver = driver + self._wait_object = None + self._scroll = None + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele = page('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间 + :return: DriverElement对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) + + # -----------------共有属性和方法------------------- + @property + def url(self): + """返回当前网页url""" + if not self._driver or not self.driver.current_url.startswith('http'): + return None + else: + return self.driver.current_url + + @property + def html(self): + """返回页面的html文本""" + return self.driver.find_element('xpath', "//*").get_attribute("outerHTML") + + @property + def json(self): + """当返回内容是json格式时,返回对应的字典""" + from json import loads + return loads(self('t:pre').text) + + def get(self, url, show_errmsg=False, retry=None, interval=None): + """访问url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :return: 目标url是否可用,返回None表示不确定 + """ + retry, interval = self._before_connect(url, retry, interval) + self._url_available = self._d_connect(self._url, times=retry, interval=interval, show_errmsg=show_errmsg) + return self._url_available + + def ele(self, loc_or_ele, timeout=None): + """返回页面中符合条件的第一个元素 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :return: DriverElement对象或属性、文本 + """ + return self._ele(loc_or_ele, timeout) + + def eles(self, loc_or_str, timeout=None): + """返回页面中所有符合条件的元素 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :return: DriverElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, timeout, single=False) + + def s_ele(self, loc_or_ele=None): + """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + if isinstance(loc_or_ele, DriverElement): + return make_session_ele(loc_or_ele) + else: + return make_session_ele(self, loc_or_ele) + + def s_eles(self, loc_or_str): + """查找所有符合条件的元素以SessionElement列表形式返回 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象组成的列表 + """ + return make_session_ele(self, loc_or_str, single=False) + + def _ele(self, loc_or_ele, timeout=None, single=True): + """返回页面中符合条件的元素,默认返回第一个 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 查找元素超时时间 + :param single: True则返回第一个,False则返回全部 + :return: DriverElement对象 + """ + # 接收到字符串或元组,获取定位loc元组 + if isinstance(loc_or_ele, (str, tuple)): + return make_driver_ele(self, loc_or_ele, single, timeout) + + # 接收到DriverElement对象直接返回 + elif isinstance(loc_or_ele, DriverElement): + return loc_or_ele + + # 接收到WebElement对象打包成DriverElement对象返回 + elif isinstance(loc_or_ele, WebElement): + return DriverElement(loc_or_ele, self) + + # 接收到的类型不正确,抛出异常 + else: + raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 WebElement类型。') + + def get_cookies(self, as_dict=False): + """返回当前网站cookies""" + if as_dict: + return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()} + else: + return self.driver.get_cookies() + + @property + def timeout(self): + """返回查找元素时等待的秒数""" + return self._timeout + + @timeout.setter + def timeout(self, second): + """设置查找元素时等待的秒数""" + self._timeout = second + self._wait_object = None + + def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False): + """尝试连接,重试若干次 + :param to_url: 要访问的url + :param times: 重试次数 + :param interval: 重试间隔(秒) + :param show_errmsg: 是否抛出异常 + :return: 是否成功,返回None表示不确定 + """ + err = None + is_ok = False + + for _ in range(times + 1): + try: + self.driver.get(to_url) + go_ok = True + except Exception as e: + err = e + go_ok = False + + is_ok = self.check_page() if go_ok else False + + if is_ok is not False: + break + + if _ < times: + sleep(interval) + if show_errmsg: + print(f'重试 {to_url}') + + if is_ok is False and show_errmsg: + raise err if err is not None else ConnectionError('连接异常。') + + return is_ok + + # ----------------driver独有属性和方法----------------------- + @property + def driver(self): + return self._driver + + @property + def wait_object(self): + """返回WebDriverWait对象,重用避免每次新建对象""" + if self._wait_object is None: + self._wait_object = WebDriverWait(self.driver, timeout=self.timeout) + + return self._wait_object + + @property + def timeouts(self): + """返回三种超时时间,selenium4以上版本可用""" + return {'implicit': self.timeout, + 'pageLoad': self.driver.timeouts.page_load, + 'script': self.driver.timeouts.script} + + @property + def tabs_count(self): + """返回标签页数量""" + try: + return len(self.driver.window_handles) + except Exception: + return 0 + + @property + def tab_handles(self): + """返回所有标签页handle列表""" + return self.driver.window_handles + + @property + def current_tab_index(self): + """返回当前标签页序号""" + return self.driver.window_handles.index(self.driver.current_window_handle) + + @property + def current_tab_handle(self): + """返回当前标签页handle""" + return self.driver.current_window_handle + + @property + def active_ele(self): + """返回当前焦点所在元素""" + return DriverElement(self.driver.switch_to.active_element, self) + + @property + def scroll(self): + """用于滚动滚动条的对象""" + if self._scroll is None: + self._scroll = Scroll(self) + return self._scroll + + @property + def to_frame(self): + """用于跳转到frame的对象,调用其方法实现跳转 + 示例: + page.to_frame.by_loc('tag:iframe') - 通过传入frame的查询字符串定位 + page.to_frame.by_loc((By.TAG_NAME, 'iframe')) - 通过传入定位符定位 + page.to_frame.by_id('iframe_id') - 通过frame的id属性定位 + page.to_frame('iframe_name') - 通过frame的name属性定位 + page.to_frame(iframe_element) - 通过传入元素对象定位 + page.to_frame(0) - 通过frame的序号定位 + page.to_frame.main() - 跳到最顶层 + page.to_frame.parent() - 跳到上一层 + """ + return ToFrame(self) + + def set_timeouts(self, implicit=None, pageLoad=None, script=None): + """设置超时时间,单位为秒,selenium4以上版本有效 + :param implicit: 查找元素超时时间 + :param pageLoad: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: None + """ + if implicit is not None: + self.timeout = implicit + + if pageLoad is not None: + self.driver.set_page_load_timeout(pageLoad) + + if script is not None: + self.driver.set_script_timeout(script) + + def wait_ele(self, loc_or_ele, timeout=None): + """等待元素从dom删除、显示、隐藏 + :param loc_or_ele: 可以是元素、查询字符串、loc元组 + :param timeout: 等待超时时间 + :return: 用于等待的ElementWaiter对象 + """ + return ElementWaiter(self, loc_or_ele, timeout) + + def check_page(self): + """检查页面是否符合预期 + 由子类自行实现各页面的判定规则 + """ + return None + + def run_script(self, script, *args): + """执行js代码 + :param script: js文本 + :param args: 传入的参数 + :return: js执行结果 + """ + return self.driver.execute_script(script, *args) + + def run_async_script(self, script, *args): + """以异步方式执行js代码 + :param script: js文本 + :param args: 传入的参数 + :return: js执行结果 + """ + return self.driver.execute_async_script(script, *args) + + def run_cdp(self, cmd, **cmd_args): + """执行Chrome DevTools Protocol语句 + :param cmd: 协议项目 + :param cmd_args: 参数 + :return: 执行的结果 + """ + return self.driver.execute_cdp_cmd(cmd, cmd_args) + + def create_tab(self, url=''): + """新建并定位到一个标签页,该标签页在最后面 + :param url: 新标签页跳转到的网址 + :return: None + """ + self.driver.switch_to.new_window('tab') + if url: + self.get(url) + + def close_tabs(self, num_or_handles=None): + """关闭传入的标签页,默认关闭当前页。可传入多个 + 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 + :param num_or_handles:要关闭的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时关闭当前页 + :return: None + """ + tabs = (self.current_tab_handle,) if num_or_handles is None else get_handles(self.tab_handles, num_or_handles) + for i in tabs: + self.driver.switch_to.window(i) + self.driver.close() + + self.to_tab(0) + + def close_other_tabs(self, num_or_handles=None): + """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 + 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 + :param num_or_handles: 要保留的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时保存当前页 + :return: None + """ + all_tabs = self.driver.window_handles + reserve_tabs = {self.current_tab_handle} if num_or_handles is None else get_handles(all_tabs, num_or_handles) + + for i in set(all_tabs) - reserve_tabs: + self.driver.switch_to.window(i) + self.driver.close() + + self.to_tab(0) + + def to_tab(self, num_or_handle=0): + """跳转到标签页 + 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致 + :param num_or_handle: 标签页序号或handle字符串,序号第一个为0,最后为-1 + :return: None + """ + try: + tab = int(num_or_handle) + except (ValueError, TypeError): + tab = num_or_handle + + tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab + self.driver.switch_to.window(tab) + + def set_ua_to_tab(self, ua): + """为当前tab设置user agent,只在当前tab有效 + :param ua: user agent字符串 + :return: None + """ + self.driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": ua}) + + def get_session_storage(self, item=None): + """获取sessionStorage信息,不设置item则获取全部 + :param item: 要获取的项,不设置则返回全部 + :return: sessionStorage一个或所有项内容 + """ + js = f'return sessionStorage.getItem("{item}");' if item else 'return sessionStorage;' + return self.run_script(js) + + def get_local_storage(self, item=None): + """获取localStorage信息,不设置item则获取全部 + :param item: 要获取的项目,不设置则返回全部 + :return: localStorage一个或所有项内容 + """ + js = f'return localStorage.getItem("{item}");' if item else 'return localStorage;' + return self.run_script(js) + + def set_session_storage(self, item, value): + """设置或删除某项sessionStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + s = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' + self.run_script(s) + + def set_local_storage(self, item, value): + """设置或删除某项localStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + s = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' + self.run_script(s) + + def clean_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): + """清除缓存,可选要清除的项 + :param session_storage: 是否清除sessionStorage + :param local_storage: 是否清除localStorage + :param cache: 是否清除cache + :param cookies: 是否清除cookies + :return: None + """ + if session_storage: + self.run_script('sessionStorage.clear();') + if local_storage: + self.run_script('localStorage.clear();') + if cache: + self.run_cdp('Network.clearBrowserCache') + if cookies: + self.run_cdp('Network.clearBrowserCookies') + + def screenshot(self, path=None, filename=None, as_bytes=False): + """截取页面可见范围截图 + :param path: 保存路径 + :param filename: 图片文件名,不传入时以页面title命名 + :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 + :return: 图片完整路径或字节文本 + """ + if as_bytes: + return self.driver.get_screenshot_as_png() + + name = filename or self.title + if not name.lower().endswith('.png'): + name = f'{name}.png' + path = Path(path or '.').absolute() + path.mkdir(parents=True, exist_ok=True) + img_path = str(get_usable_path(f'{path}{sep}{name}')) + self.driver.save_screenshot(img_path) + return img_path + + def scroll_to_see(self, loc_or_ele): + """滚动页面直到元素可见 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串(详见ele函数注释) + :return: None + """ + ele = self.ele(loc_or_ele) + ele.run_script("arguments[0].scrollIntoView();") + + def refresh(self): + """刷新当前页面""" + self.driver.refresh() + + def stop_loading(self): + """强制停止页面加载""" + self.run_cdp('Page.stopLoading') + + def back(self): + """在浏览历史中后退一步""" + self.driver.back() + + def forward(self): + """在浏览历史中前进一步""" + self.driver.forward() + + def set_window_size(self, width=None, height=None): + """设置浏览器窗口大小,默认最大化,任一参数为0最小化 + :param width: 浏览器窗口高 + :param height: 浏览器窗口宽 + :return: None + """ + if width is None and height is None: + self.driver.maximize_window() + + elif width == 0 or height == 0: + self.driver.minimize_window() + + else: + if width < 0 or height < 0: + raise ValueError('x 和 y参数必须大于0。') + + new_x = width or self.driver.get_window_size()['width'] + new_y = height or self.driver.get_window_size()['height'] + self.driver.set_window_size(new_x, new_y) + + def chrome_downloading(self, download_path): + """返回浏览器下载中的文件列表 + :param download_path: 下载文件夹路径 + :return: 文件列表 + """ + return glob(f'{download_path}{sep}*.crdownload') + + def process_alert(self, ok=True, send=None, timeout=None): + """处理提示框 + :param ok: True表示确认,False表示取消,其它值不会按按钮但依然返回文本值 + :param send: 处理prompt提示框时可输入文本 + :param timeout: 等待提示框出现的超时时间 + :return: 提示框内容文本,未等到提示框则返回None + """ + + def do_it(): + try: + return self.driver.switch_to.alert + except NoAlertPresentException: + return False + + timeout = timeout if timeout is not None else self.timeout + t1 = perf_counter() + alert = do_it() + while alert is False and perf_counter() - t1 <= timeout: + alert = do_it() + + if alert is False: + return None + + res_text = alert.text + + if send is not None: + alert.send_keys(send) + + if ok is True: + alert.accept() + elif ok is False: + alert.dismiss() + + return res_text + + +class ToFrame(object): + """用于处理焦点跳转到页面框架的类""" + + def __init__(self, page): + self.page = page + + def __call__(self, condition='main'): + """跳转到(i)frame,可传入id、name、序号、元素对象、定位符 + :param condition: (i)frame,可传入id、name、序号、元素对象、定位符 + :return: 当前页面对象 + """ + if isinstance(condition, (DriverElement, WebElement)): + self.by_ele(condition) + elif isinstance(condition, int): + self.by_index(condition) + elif ':' not in condition and '=' not in condition and not condition.startswith(('#', '.', '@')): + self.by_id(condition) + else: + self.by_loc(condition) + + return self.page + + def main(self): + """焦点跳转到最高层级框架""" + self.page.driver.switch_to.default_content() + return self.page + + def parent(self, level=1): + """焦点跳转到上级框架,可指定上级层数 + :param level: 上面第几层框架 + :return: 框架所在页面对象 + """ + if level < 1: + raise ValueError('level参数须是大于0的整数。') + for _ in range(level): + self.page.driver.switch_to.parent_frame() + return self.page + + def by_id(self, id_): + """焦点跳转到id为该值的(i)frame + :param id_: (i)frame的id属性值 + :return: 框架所在页面对象 + """ + self.page.driver.switch_to.frame(id_) + return self.page + + def by_name(self, name): + """焦点跳转到name为该值的(i)frame + :param name: (i)frame的name属性值 + :return: 框架所在页面对象 + """ + self.page.driver.switch_to.frame(name) + return self.page + + def by_index(self, index): + """焦点跳转到页面中第几个(i)frame + :param index: 页面中第几个(i)frame + :return: 框架所在页面对象 + """ + self.page.driver.switch_to.frame(index) + return self.page + + def by_loc(self, loc): + """焦点跳转到根据定位符获取到的(i)frame + :param loc: 定位符,支持selenium原生和DriverPage定位符 + :return: 框架所在页面对象 + """ + self.page.driver.switch_to.frame(self.page(loc).inner_ele) + return self.page + + def by_ele(self, ele): + """焦点跳转到传入的(i)frame元素对象 + :param ele: (i)frame元素对象 + :return: 框架所在页面对象 + """ + if isinstance(ele, DriverElement): + ele = ele.inner_ele + self.page.driver.switch_to.frame(ele) + return self.page + + +def get_handles(handles, num_or_handles): + """返回指定标签页组成的set + :param handles: handles列表 + :param num_or_handles: 指定的标签页,可以是多个 + :return: 指定标签页组成的set + """ + if isinstance(num_or_handles, (int, str)): + num_or_handles = (num_or_handles,) + elif not isinstance(num_or_handles, (list, tuple)): + raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。') + + return set(i if isinstance(i, str) else handles[i] for i in num_or_handles) diff --git a/DrissionPage/mixpage/driver_page.pyi b/DrissionPage/mixpage/driver_page.pyi new file mode 100644 index 0000000..542a402 --- /dev/null +++ b/DrissionPage/mixpage/driver_page.pyi @@ -0,0 +1,189 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union, List, Any, Tuple + +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver +from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.wait import WebDriverWait + +from .base import BasePage +from .driver_element import DriverElement, Scroll, ElementWaiter +from .mix_page import MixPage +from .session_element import SessionElement + + +class DriverPage(BasePage): + + def __init__(self, driver: RemoteWebDriver, timeout: float = 10) -> None: + self._driver: RemoteWebDriver = ... + self._url: str = ... + self._wait_object: WebDriverWait = ... + self._scroll: Scroll = ... + + def __call__(self, loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement], + timeout: float = None) -> Union[DriverElement, str, None]: ... + + # -----------------共有属性和方法------------------- + @property + def url(self) -> Union[str, None]: ... + + @property + def html(self) -> str: ... + + @property + def json(self) -> dict: ... + + def get(self, + url: str, + show_errmsg: bool = False, + retry: int = None, + interval: float = None) -> Union[None, bool]: ... + + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], + timeout: float = None) -> Union[DriverElement, str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union[DriverElement, str]]: ... + + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement] = None) \ + -> Union[SessionElement, str, None]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + + def _ele(self, + loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], + timeout: float = None, + single: bool = True) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... + + def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... + + @property + def timeout(self) -> float: ... + + @timeout.setter + def timeout(self, second: float) -> None: ... + + def _d_connect(self, + to_url: str, + times: int = 0, + interval: float = 1, + show_errmsg: bool = False) -> Union[bool, None]: ... + + # ----------------driver独有属性和方法----------------------- + @property + def driver(self) -> WebDriver: ... + + @property + def wait_object(self) -> WebDriverWait: ... + + @property + def timeouts(self) -> dict: ... + + @property + def tabs_count(self) -> int: ... + + @property + def tab_handles(self) -> list: ... + + @property + def current_tab_index(self) -> int: ... + + @property + def current_tab_handle(self) -> str: ... + + @property + def active_ele(self) -> DriverElement: ... + + @property + def scroll(self) -> Scroll: ... + + @property + def to_frame(self) -> ToFrame: ... + + def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> None: ... + + def wait_ele(self, + loc_or_ele: Union[str, tuple, DriverElement, WebElement], + timeout: float = None) -> ElementWaiter: ... + + def check_page(self) -> Union[bool, None]: ... + + def run_script(self, script: str, *args) -> Any: ... + + def run_async_script(self, script: str, *args) -> Any: ... + + def run_cdp(self, cmd: str, **cmd_args) -> Any: ... + + def create_tab(self, url: str = '') -> None: ... + + def close_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... + + def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... + + def to_tab(self, num_or_handle: Union[int, str] = 0) -> None: ... + + def set_ua_to_tab(self, ua: str) -> None: ... + + def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ... + + def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ... + + def set_session_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def set_local_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def clean_cache(self, + session_storage: bool = True, + local_storage: bool = True, + cache: bool = True, + cookies: bool = True) -> None: ... + + def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... + + def scroll_to_see(self, loc_or_ele: Union[str, tuple, WebElement, DriverElement]) -> None: ... + + def refresh(self) -> None: ... + + def stop_loading(self) -> None: ... + + def back(self) -> None: ... + + def forward(self) -> None: ... + + def set_window_size(self, width: int = None, height: int = None) -> None: ... + + def chrome_downloading(self, download_path: str) -> list: ... + + def process_alert(self, ok: bool = True, send: str = None, timeout: float = None) -> Union[str, None]: ... + + +class ToFrame(object): + + def __init__(self, page: DriverPage): + self.page: DriverPage = ... + + def __call__(self, condition: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> Union[ + DriverPage, MixPage]: ... + + def main(self) -> DriverPage: ... + + def parent(self, level: int = 1) -> DriverPage: ... + + def by_id(self, id_: str) -> DriverPage: ... + + def by_name(self, name: str) -> DriverPage: ... + + def by_index(self, index: int) -> DriverPage: ... + + def by_loc(self, loc: Union[str, tuple]) -> DriverPage: ... + + def by_ele(self, ele: Union[DriverElement, WebElement]) -> DriverPage: ... + + +def get_handles(handles: list, num_or_handles: Union[int, str, list, tuple]) -> set: ... diff --git a/DrissionPage/mixpage/mix_page.py b/DrissionPage/mixpage/mix_page.py new file mode 100644 index 0000000..64312d7 --- /dev/null +++ b/DrissionPage/mixpage/mix_page.py @@ -0,0 +1,344 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from .base import BasePage +from .drission import Drission +from .driver_page import DriverPage +from .session_page import SessionPage + + +class MixPage(SessionPage, DriverPage, BasePage): + """MixPage整合了DriverPage和SessionPage,封装了对页面的操作, + 可在selenium(d模式)和requests(s模式)间无缝切换。 + 切换的时候会自动同步cookies。 + 获取信息功能为两种模式共有,操作页面元素功能只有d模式有。 + 调用某种模式独有的功能,会自动切换到该模式。 + """ + + def __init__(self, mode='d', drission=None, timeout=None, driver_options=None, session_options=None): + """初始化函数 + :param mode: 'd' 或 's',即driver模式和session模式 + :param drission: Drission对象,不传入时会自动创建,有传入时driver_options和session_options参数无效 + :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 + :param driver_options: 浏览器设置,没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象,传入False则不创建 + :param session_options: requests设置,没传入drission参数时会用这个设置新建Drission对象中的Session对象,传入False则不创建 + """ + self._mode = mode.lower() + if self._mode not in ('s', 'd'): + raise ValueError('mode参数只能是s或d。') + + super(DriverPage, self).__init__(timeout) + self._driver, self._session = (None, True) if self._mode == 's' else (True, None) + self._drission = drission or Drission(driver_options, session_options) + self._wait_object = None + self._response = None + self._scroll = None + self._download_set = None + self._download_path = None + + if self._mode == 'd': + try: + timeouts = self.drission.driver_options.timeouts + t = timeout if isinstance(timeout, (int, float)) else timeouts['implicit'] + self.set_timeouts(t, timeouts['pageLoad'], timeouts['script']) + + except Exception: + self.timeout = timeout if timeout is not None else 10 + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele = page('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间 + :return: 子元素对象或属性文本 + """ + if self._mode == 's': + return super().__call__(loc_or_str) + elif self._mode == 'd': + return super(SessionPage, self).__call__(loc_or_str, timeout) + + # -----------------共有属性和方法------------------- + @property + def url(self): + """返回当前url""" + if self._mode == 'd': + return self._drission.driver.current_url if self._driver else None + elif self._mode == 's': + return self._session_url + + @property + def title(self): + """返回网页title""" + if self._mode == 's': + return super().title + elif self._mode == 'd': + return super(SessionPage, self).title + + @property + def html(self): + """返回页面html文本""" + if self._mode == 's': + return super().html + elif self._mode == 'd': + return super(SessionPage, self).html + + @property + def json(self): + """当返回内容是json格式时,返回对应的字典""" + if self._mode == 's': + return super().json + elif self._mode == 'd': + return super(SessionPage, self).json + + def get(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): + """跳转到一个url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数,s模式专用 + :return: url是否可用,d模式返回None时表示不确定 + """ + if self._mode == 'd': + return super(SessionPage, self).get(url, show_errmsg, retry, interval) + elif self._mode == 's': + return super().get(url, show_errmsg, retry, interval, **kwargs) + + def ele(self, loc_or_ele, timeout=None): + """返回第一个符合条件的元素、属性或节点文本 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :return: 元素对象或属性、文本节点文本 + """ + if self._mode == 's': + return super().ele(loc_or_ele) + elif self._mode == 'd': + return super(SessionPage, self).ele(loc_or_ele, timeout=timeout) + + def eles(self, loc_or_str, timeout=None): + """返回页面中所有符合条件的元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与页面等待时间一致 + :return: 元素对象或属性、文本组成的列表 + """ + if self._mode == 's': + return super().eles(loc_or_str) + elif self._mode == 'd': + return super(SessionPage, self).eles(loc_or_str, timeout=timeout) + + def s_ele(self, loc_or_ele=None): + """查找第一个符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + if self._mode == 's': + return super().s_ele(loc_or_ele) + elif self._mode == 'd': + return super(SessionPage, self).s_ele(loc_or_ele) + + def s_eles(self, loc_or_str): + """查找所有符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本组成的列表 + """ + if self._mode == 's': + return super().s_eles(loc_or_str) + elif self._mode == 'd': + return super(SessionPage, self).s_eles(loc_or_str) + + def _ele(self, loc_or_ele, timeout=None, single=True): + """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 查找元素超时时间,d模式专用 + :param single: True则返回第一个,False则返回全部 + :return: 元素对象或属性、文本节点文本 + """ + if self._mode == 's': + return super()._ele(loc_or_ele, single=single) + elif self._mode == 'd': + return super(SessionPage, self)._ele(loc_or_ele, timeout=timeout, single=single) + + def get_cookies(self, as_dict=False, all_domains=False): + """返回cookies + :param as_dict: 是否以字典方式返回 + :param all_domains: 是否返回所有域的cookies + :return: cookies信息 + """ + if self._mode == 's': + return super().get_cookies(as_dict, all_domains) + elif self._mode == 'd': + return super(SessionPage, self).get_cookies(as_dict) + + # ----------------MixPage独有属性和方法----------------------- + @property + def drission(self): + """返回当前使用的 Dirssion 对象""" + return self._drission + + @property + def driver(self): + """返回 driver 对象,如没有则创建 + 每次访问时切换到 d 模式,用于独有函数及外部调用 + :return: WebDriver对象 + """ + self.change_mode('d') + return self._drission.driver + + @property + def session(self): + """返回 Session 对象,如没有则创建""" + return self._drission.session + + @property + def response(self): + """返回 s 模式获取到的 Response 对象,切换到 s 模式""" + self.change_mode('s') + return self._response + + @property + def mode(self): + """返回当前模式,'s'或'd' """ + return self._mode + + @property + def _session_url(self): + """返回 session 保存的url""" + return self._response.url if self._response else None + + def change_mode(self, mode=None, go=True, copy_cookies=True): + """切换模式,接收's'或'd',除此以外的字符串会切换为 d 模式 + 切换时会把当前模式的cookies复制到目标模式 + 切换后,如果go是True,调用相应的get函数使访问的页面同步 + 注意:s转d时,若浏览器当前网址域名和s模式不一样,必须会跳转 + :param mode: 模式字符串 + :param go: 是否跳转到原模式的url + :param copy_cookies: 是否复制cookies到目标模式 + """ + if mode is not None and mode.lower() == self._mode: + return + + self._mode = 's' if self._mode == 'd' else 'd' + + # s模式转d模式 + if self._mode == 'd': + self._driver = True + self._url = None if not self._driver else self._drission.driver.current_url + + if self._session_url: + if copy_cookies: + self.cookies_to_driver(self._session_url) + + if go: + self.get(self._session_url) + + # d模式转s模式 + elif self._mode == 's': + self._session = True + self._url = self._session_url + + if self._driver: + if copy_cookies: + self.cookies_to_session() + + if go and self._drission.driver.current_url.startswith('http'): + self.get(self._drission.driver.current_url) + + def set_cookies(self, cookies, refresh=True): + """设置cookies + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :param refresh: 设置cookies后是否刷新页面 + :return: None + """ + if self._mode == 's': + self.drission.set_cookies(cookies, set_session=True) + elif self._mode == 'd': + self.drission.set_cookies(cookies, set_driver=True) + if refresh: + self.refresh() + + def cookies_to_session(self, copy_user_agent=False): + """从driver复制cookies到session + :param copy_user_agent : 是否复制user agent信息 + """ + self._drission.cookies_to_session(copy_user_agent) + + def cookies_to_driver(self, url=None): + """从session复制cookies到driver + chrome需要指定域才能接收cookies + :param url: 目标域 + :return: None + """ + url = url or self._session_url + self._drission.cookies_to_driver(url) + + def check_page(self, by_requests=False): + """d模式时检查网页是否符合预期 + 默认由response状态检查,可重载实现针对性检查 + :param by_requests: 是否用内置response检查 + :return: bool或None,None代表不知道结果 + """ + if self._session_url and self._session_url == self.url: + return self._response.ok + + # 使用requests访问url并判断可用性 + if by_requests: + self.cookies_to_session() + r = self._make_response(self.url, retry=0)[0] + return r.ok if r else False + + def close_driver(self): + """关闭driver及浏览器""" + self._driver = None + self.drission.close_driver(True) + + def close_session(self): + """关闭session""" + self._session = None + self._response = None + self.drission.close_session() + + # ----------------重写SessionPage的函数----------------------- + def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + """用post方式跳转到url,会切换到s模式 + :param url: 目标url + :param data: post方式时提交的数据 + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + self.change_mode('s', go=False) + return super().post(url, data, show_errmsg, retry, interval, **kwargs) + + @property + def download(self): + """返回下载器对象""" + if self.mode == 'd': + self.cookies_to_session() + return super().download + + def chrome_downloading(self, path=None): + """返回浏览器下载中的文件列表 + :param path: 下载文件夹路径,默认读取配置信息 + :return: 正在下载的文件列表 + """ + try: + path = path or self._drission.driver_options.experimental_options['prefs']['download.default_directory'] + if not path: + raise ValueError('未指定下载路径。') + except Exception: + raise IOError('无法找到下载路径。') + + return super().chrome_downloading(path) + + # ----------------MixPage独有函数----------------------- + def hide_browser(self): + """隐藏浏览器窗口""" + self.drission.hide_browser() + + def show_browser(self): + """显示浏览器窗口""" + self.drission.show_browser() diff --git a/DrissionPage/mixpage/mix_page.pyi b/DrissionPage/mixpage/mix_page.pyi new file mode 100644 index 0000000..beea82e --- /dev/null +++ b/DrissionPage/mixpage/mix_page.pyi @@ -0,0 +1,156 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union, List, Tuple, Any + +from DownloadKit import DownloadKit +from requests import Response, Session +from requests.cookies import RequestsCookieJar +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.webdriver import WebDriver +from selenium.webdriver.remote.webelement import WebElement + +from .base import BasePage +from DrissionPage.configs.session_options import SessionOptions +from DrissionPage.configs.driver_options import DriverOptions +from .drission import Drission +from .driver_element import DriverElement +from .driver_page import DriverPage +from .session_element import SessionElement +from .session_page import SessionPage + + +class MixPage(SessionPage, DriverPage, BasePage): + + def __init__(self, + mode: str = 'd', + drission: Union[Drission, str] = None, + timeout: float = None, + driver_options: Union[Options, DriverOptions, bool] = None, + session_options: Union[dict, SessionOptions, bool] = None) -> None: + self._mode: str = ... + self._drission: Drission = ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], + timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... + + # -----------------共有属性和方法------------------- + @property + def url(self) -> Union[str, None]: ... + + @property + def title(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def json(self) -> dict: ... + + def get(self, + url: str, + show_errmsg: bool | None = False, + retry: int | None = None, + interval: float | None = None, + timeout: float | None = ..., + params: dict | None = ..., + data: Union[dict, str, None] = ..., + json: Union[dict, str, None] = ..., + headers: dict | None = ..., + cookies: Any | None = ..., + files: Any | None = ..., + auth: Any | None = ..., + allow_redirects: bool = ..., + proxies: dict | None = ..., + hooks: Any | None = ..., + stream: Any | None = ..., + verify: Any | None = ..., + cert: Any | None = ...) -> Union[bool, None]: ... + + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], + timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union[DriverElement, SessionElement, str]]: ... + + def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement] = None) \ + -> Union[SessionElement, str, None]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + + def _ele(self, + loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], + timeout: float = None, single: bool = False) \ + -> Union[DriverElement, SessionElement, str, None, List[Union[SessionElement, str]], List[ + Union[DriverElement, str]]]: ... + + def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: ... + + # ----------------MixPage独有属性和方法----------------------- + @property + def drission(self) -> Drission: ... + + @property + def driver(self) -> WebDriver: ... + + @property + def session(self) -> Session: ... + + @property + def response(self) -> Response: ... + + @property + def mode(self) -> str: ... + + @property + def _session_url(self) -> str: ... + + def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... + + def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict], refresh: bool = True) -> None: ... + + def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... + + def cookies_to_driver(self, url: str = None) -> None: ... + + def check_page(self, by_requests: bool = False) -> Union[bool, None]: ... + + def close_driver(self) -> None: ... + + def close_session(self) -> None: ... + + # ----------------重写SessionPage的函数----------------------- + def post(self, + url: str, + data: Union[dict, str, None] = None, + show_errmsg: bool = False, + retry: int | None = None, + interval: float | None = None, + timeout: float | None = ..., + params: dict | None = ..., + json: Union[dict, str, None] = ..., + headers: dict | None = ..., + cookies: Any | None = ..., + files: Any | None = ..., + auth: Any | None = ..., + allow_redirects: bool = ..., + proxies: dict | None = ..., + hooks: Any | None = ..., + stream: Any | None = ..., + verify: Any | None = ..., + cert: Any | None = ...) -> bool: ... + + @property + def download(self) -> DownloadKit: ... + + def chrome_downloading(self, path: str = None) -> list: ... + + # ----------------MixPage独有函数----------------------- + def hide_browser(self) -> None: ... + + def show_browser(self) -> None: ... diff --git a/DrissionPage/mixpage/session_element.py b/DrissionPage/mixpage/session_element.py new file mode 100644 index 0000000..39b5232 --- /dev/null +++ b/DrissionPage/mixpage/session_element.py @@ -0,0 +1,357 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from html import unescape +from re import match, DOTALL + +from lxml.etree import tostring +from lxml.html import HtmlElement, fromstring + +from .base import DrissionElement, BasePage, BaseElement +from DrissionPage.commons.locator import get_loc +from DrissionPage.commons.web import get_ele_txt, make_absolute_link + + +class SessionElement(DrissionElement): + """session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能""" + + def __init__(self, ele, page=None): + """初始化对象 + :param ele: 被包装的HtmlElement元素 + :param page: 元素所在页面对象,如果是从 html 文本生成的元素,则为 None + """ + super().__init__(page) + self._inner_ele = ele + + @property + def inner_ele(self): + return self._inner_ele + + def __repr__(self): + attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] + return f'' + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本 + """ + return self.ele(loc_or_str) + + @property + def tag(self): + """返回元素类型""" + return self._inner_ele.tag + + @property + def html(self): + """返回outerHTML文本""" + html = tostring(self._inner_ele, method="html").decode() + return unescape(html[:html.rfind('>') + 1]) # tostring()会把跟紧元素的文本节点也带上,因此要去掉 + + @property + def inner_html(self): + """返回元素innerHTML文本""" + r = match(r'<.*?>(.*)', self.html, flags=DOTALL) + return '' if not r else r.group(1) + + @property + def attrs(self): + """返回元素所有属性及值""" + return {attr: self.attr(attr) for attr, val in self.inner_ele.items()} + + @property + def text(self): + """返回元素内所有文本""" + return get_ele_txt(self) + + @property + def raw_text(self): + """返回未格式化处理的元素内文本""" + return str(self._inner_ele.text_content()) + + def parent(self, level_or_loc=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :return: 上级元素对象 + """ + return super().parent(level_or_loc) + + def prev(self, filter_loc='', index=1, timeout=None): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + return super().prev(index, filter_loc, timeout) + + def next(self, filter_loc='', index=1, timeout=None): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 后面第几个查询结果 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素 + """ + return super().next(index, filter_loc, timeout) + + def before(self, filter_loc='', index=1, timeout=None): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的某个元素或节点 + """ + return super().before(index, filter_loc, timeout) + + def after(self, filter_loc='', index=1, timeout=None): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param filter_loc: 用于筛选的查询语法 + :param index: 后面第几个查询结果 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的某个元素或节点 + """ + return super().after(index, filter_loc, timeout) + + def prevs(self, filter_loc='', timeout=None): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().prevs(filter_loc, timeout) + + def nexts(self, filter_loc='', timeout=None): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().nexts(filter_loc, timeout) + + def befores(self, filter_loc='', timeout=None): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素前面的元素或节点组成的列表 + """ + return super().befores(filter_loc, timeout) + + def afters(self, filter_loc='', timeout=None): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :param timeout: 查找节点的超时时间 + :return: 本元素后面的元素或节点组成的列表 + """ + return super().afters(filter_loc, timeout) + + def attr(self, attr): + """返回attribute属性值 + :param attr: 属性名 + :return: 属性值文本,没有该属性返回None + """ + # 获取href属性时返回绝对url + if attr == 'href': + link = self.inner_ele.get('href') + # 若为链接为None、js或邮件,直接返回 + if not link or link.lower().startswith(('javascript:', 'mailto:')): + return link + + else: # 其它情况直接返回绝对url + return make_absolute_link(link, self.page) + + elif attr == 'src': + return make_absolute_link(self.inner_ele.get('src'), self.page) + + elif attr == 'text': + return self.text + + elif attr == 'innerText': + return self.raw_text + + elif attr in ('html', 'outerHTML'): + return self.html + + elif attr == 'innerHTML': + return self.inner_html + + else: + return self.inner_ele.get(attr) + + def ele(self, loc_or_str, timeout=None): + """返回当前元素下级符合条件的第一个元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_str) + + def eles(self, loc_or_str, timeout=None): + """返回当前元素下级所有符合条件的子元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, single=False) + + def s_ele(self, loc_or_str=None): + """返回当前元素下级符合条件的第一个元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_str) + + def s_eles(self, loc_or_str): + """返回当前元素下级所有符合条件的子元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, single=False) + + def _ele(self, loc_or_str, timeout=None, single=True, relative=False): + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和父类对应 + :param single: True则返回第一个,False则返回全部 + :param relative: WebPage用的表示是否相对定位的参数 + :return: SessionElement对象 + """ + return make_session_ele(self, loc_or_str, single) + + def _get_ele_path(self, mode): + """获取css路径或xpath路径 + :param mode: 'css' 或 'xpath' + :return: css路径或xpath路径 + """ + path_str = '' + ele = self + + while ele: + if mode == 'css': + brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) + path_str = f'>:nth-child({brothers + 1}){path_str}' + else: + brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) + path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' + + ele = ele.parent() + + return f':root{path_str[1:]}' if mode == 'css' else path_str + + +def make_session_ele(html_or_ele, loc=None, single=True): + """从接收到的对象或html文本中查找元素,返回SessionElement对象 + 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 + :param html_or_ele: html文本、BaseParser对象 + :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 + :param single: True则返回第一个,False则返回全部 + :return: 返回SessionElement元素或列表,或属性文本 + """ + # ---------------处理定位符--------------- + if not loc: + if isinstance(html_or_ele, SessionElement): + return html_or_ele if single else [html_or_ele] + + loc = ('xpath', '.') + + elif isinstance(loc, (str, tuple)): + loc = get_loc(loc) + + else: + raise ValueError("定位符必须为str或长度为2的tuple。") + + # ---------------根据传入对象类型获取页面对象和lxml元素对象--------------- + the_type = str(type(html_or_ele)) + # SessionElement + if the_type.endswith(".SessionElement'>"): + page = html_or_ele.page + + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc[1]}' + html_or_ele = html_or_ele.inner_ele + + # 若css以>开头,表示找元素的直接子元素,要用page以绝对路径才能找到 + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): + loc_str = f'{html_or_ele.css_path}{loc[1]}' + if html_or_ele.page: + html_or_ele = fromstring(html_or_ele.page.html) + else: # 接收html文本,无page的情况 + html_or_ele = fromstring(html_or_ele('xpath:/ancestor::*').html) + + else: + html_or_ele = html_or_ele.inner_ele + + loc = loc[0], loc_str + + # ChromiumElement, DriverElement + elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc[1]}' + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): + loc_str = f'{html_or_ele.css_path}{loc[1]}' + loc = loc[0], loc_str + + # 获取整个页面html再定位到当前元素,以实现查找上级元素 + page = html_or_ele.page + xpath = html_or_ele.xpath + if hasattr(html_or_ele, 'doc_id'): # ChromiumElement,兼容传入的元素在iframe内的情况 + html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele.doc_id)['outerHTML'] + else: + html = html_or_ele.page.html + html_or_ele = fromstring(html) + html_or_ele = html_or_ele.xpath(xpath)[0] + + # 各种页面对象 + elif isinstance(html_or_ele, BasePage): + page = html_or_ele + html_or_ele = fromstring(html_or_ele.html) + + # 直接传入html文本 + elif isinstance(html_or_ele, str): + page = None + html_or_ele = fromstring(html_or_ele) + + # ShadowRootElement, ChromiumShadowRoot, ChromiumFrame + elif isinstance(html_or_ele, BaseElement) or the_type.endswith(".ChromiumFrame'>"): + page = html_or_ele.page + html_or_ele = fromstring(html_or_ele.html) + + else: + raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') + + # ---------------执行查找----------------- + try: + if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 + ele = html_or_ele.xpath(loc[1]) + else: # 用css selector获取元素对象列表 + ele = html_or_ele.cssselect(loc[1]) + + if not isinstance(ele, list): # 结果不是列表,如数字 + return ele + + # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 + if single: + ele = ele[0] if ele else None + if isinstance(ele, HtmlElement): + return SessionElement(ele, page) + elif isinstance(ele, str): + return ele + else: + return None + + else: # 返回全部 + return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] + + except Exception as e: + if 'Invalid expression' in str(e): + raise SyntaxError(f'无效的xpath语句:{loc}') + elif 'Expected selector' in str(e): + raise SyntaxError(f'无效的css select语句:{loc}') + + raise e diff --git a/DrissionPage/mixpage/session_element.pyi b/DrissionPage/mixpage/session_element.pyi new file mode 100644 index 0000000..69dcb35 --- /dev/null +++ b/DrissionPage/mixpage/session_element.pyi @@ -0,0 +1,114 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union, List, Tuple + +from lxml.html import HtmlElement + +from .base import DrissionElement, BaseElement +from .driver_element import DriverElement +from .driver_page import DriverPage +from .session_page import SessionPage + + +class SessionElement(DrissionElement): + + def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None): + self._inner_ele: HtmlElement = ... + self.page: SessionPage = ... + + @property + def inner_ele(self) -> HtmlElement: ... + + def __repr__(self) -> str: ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['SessionElement', str, None]: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + @property + def attrs(self) -> dict: ... + + @property + def text(self) -> str: ... + + @property + def raw_text(self) -> str: ... + + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... + + def prev(self, + filter_loc: Union[tuple, str] = '', + index: int = 1, + timeout: float = None) -> Union['SessionElement', str, None]: ... + + def next(self, + filter_loc: Union[tuple, str] = '', + index: int = 1, + timeout: float = None) -> Union['SessionElement', str, None]: ... + + def before(self, + filter_loc: Union[tuple, str] = '', + index: int = 1, + timeout: float = None) -> Union['SessionElement', str, None]: ... + + def after(self, + filter_loc: Union[tuple, str] = '', + index: int = 1, + timeout: float = None) -> Union['SessionElement', str, None]: ... + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['SessionElement', str]]: ... + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['SessionElement', str]]: ... + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['SessionElement', str]]: ... + + def afters(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['SessionElement', str]]: ... + + def attr(self, attr: str) -> Union[str, None]: ... + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['SessionElement', str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union['SessionElement', str]]: ... + + def s_ele(self, + loc_or_str: Union[Tuple[str, str], str] = None) -> Union['SessionElement', str, None]: ... + + def s_eles(self, + loc_or_str: Union[Tuple[str, str], str]) -> List[Union['SessionElement', str]]: ... + + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + single: bool = True, + relative: bool = False) -> Union['SessionElement', str, None, List[Union['SessionElement', str]]]: ... + + def _get_ele_path(self, mode: str) -> str: ... + + +def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, DriverElement, BaseElement, DriverPage], + loc: Union[str, Tuple[str, str]] = None, + single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... diff --git a/DrissionPage/mixpage/session_page.py b/DrissionPage/mixpage/session_page.py new file mode 100644 index 0000000..1ca8c2c --- /dev/null +++ b/DrissionPage/mixpage/session_page.py @@ -0,0 +1,533 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from re import search +from time import sleep +from urllib.parse import urlparse + +from DownloadKit import DownloadKit +from requests import Session, Response +from requests.structures import CaseInsensitiveDict +from tldextract import extract + +from .base import BasePage +from DrissionPage.configs.session_options import SessionOptions +from DrissionPage.commons.web import cookie_to_dict, set_session_cookies +from .session_element import SessionElement, make_session_ele + + +class SessionPage(BasePage): + """SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页""" + + def __init__(self, session_or_options=None, timeout=None): + """ + :param session_or_options: Session对象或SessionOptions对象 + :param timeout: 连接超时时间,为None时从ini文件读取 + """ + self._response = None + self._download_set = None + self._session = None + self._set = None + self._set_start_options(session_or_options, None) + self._set_runtime_settings() + self._create_session() + timeout = timeout if timeout is not None else self.timeout + super().__init__(timeout) + + def _set_start_options(self, session_or_options, none): + """启动配置 + :param session_or_options: Session、SessionOptions + :param none: 用于后代继承 + :return: None + """ + if not session_or_options or isinstance(session_or_options, SessionOptions): + self._session_options = session_or_options or SessionOptions(session_or_options) + + elif isinstance(session_or_options, Session): + self._session_options = SessionOptions() + self._session = session_or_options + + def _set_runtime_settings(self): + """设置运行时用到的属性""" + self._timeout = self._session_options.timeout + self._download_path = self._session_options.download_path + + def _create_session(self): + """创建内建Session对象""" + if not self._session: + self._set_session(self._session_options) + + def _set_session(self, opt): + """根据传入字典对session进行设置 + :param opt: session配置字典 + :return: None + """ + self._session = Session() + + if opt.headers: + self._session.headers = CaseInsensitiveDict(opt.headers) + if opt.cookies: + self.set.cookies(opt.cookies) + if opt.adapters: + for url, adapter in opt.adapters: + self._session.mount(url, adapter) + + attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'stream', 'trust_env', 'max_redirects'] + for i in attrs: + attr = opt.__getattribute__(i) + if attr: + self._session.__setattr__(i, attr) + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性文本 + """ + return self.ele(loc_or_str) + + # -----------------共有属性和方法------------------- + @property + def title(self): + """返回网页title""" + ele = self.ele('xpath://title') + return ele.text if ele else None + + @property + def url(self): + """返回当前访问url""" + return self._url + + @property + def html(self): + """返回页面的html文本""" + return self.response.text if self.response else '' + + @property + def json(self): + """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" + try: + return self.response.json() + except Exception: + return None + + @property + def download_path(self): + """返回下载路径""" + return self._download_path + + @property + def download_set(self): + """返回用于设置下载参数的对象""" + if self._download_set is None: + self._download_set = DownloadSetter(self) + return self._download_set + + @property + def download(self): + """返回下载器对象""" + return self.download_set.DownloadKit + + @property + def session(self): + """返回session对象""" + return self._session + + @property + def response(self): + """返回访问url得到的response对象""" + return self._response + + @property + def set(self): + """返回用于等待的对象""" + if self._set is None: + self._set = SessionPageSetter(self) + return self._set + + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): + """用get方式跳转到url + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param timeout: 连接超时时间(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs) + + def ele(self, loc_or_ele, timeout=None): + """返回页面中符合条件的第一个元素、属性或节点文本 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_ele) + + def eles(self, loc_or_str, timeout=None): + """返回页面中所有符合条件的元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 + :return: SessionElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, single=False) + + def s_ele(self, loc_or_ele=None): + """返回页面中符合条件的第一个元素、属性或节点文本 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele) + + def s_eles(self, loc_or_str): + """返回页面中符合条件的所有元素、属性或节点文本 + :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return self._ele(loc_or_str, single=False) + + def _ele(self, loc_or_ele, timeout=None, single=True): + """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 + :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param timeout: 不起实际作用,用于和父类对应 + :param single: True则返回第一个,False则返回全部 + :return: SessionElement对象 + """ + return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single) + + def get_cookies(self, as_dict=False, all_domains=False): + """返回cookies + :param as_dict: 是否以字典方式返回 + :param all_domains: 是否返回所有域的cookies + :return: cookies信息 + """ + if all_domains: + cookies = self.session.cookies + else: + if self.url: + url = extract(self.url) + domain = f'{url.domain}.{url.suffix}' + cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') + else: + cookies = tuple(x for x in self.session.cookies) + + if as_dict: + return {x.name: x.value for x in cookies} + else: + return [cookie_to_dict(cookie) for cookie in cookies] + + def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + """用post方式跳转到url + :param url: 目标url + :param data: 提交的数据 + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs) + + def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): + """执行get或post连接 + :param url: 目标url + :param mode: 'get' 或 'post' + :param data: 提交的数据 + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param kwargs: 连接参数 + :return: url是否可用 + """ + retry, interval = self._before_connect(url, retry, interval) + self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs) + + if self._response is None: + self._url_available = False + + else: + if self._response.ok: + self._url_available = True + + else: + if show_errmsg: + raise ConnectionError(f'状态码:{self._response.status_code}.') + self._url_available = False + + return self._url_available + + def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs): + """生成Response对象 + :param url: 目标url + :param mode: 'get' 或 'post' + :param data: post方式要提交的数据 + :param show_errmsg: 是否显示和抛出异常 + :param kwargs: 其它参数 + :return: tuple,第一位为Response或None,第二位为出错信息或'Success' + """ + kwargs = CaseInsensitiveDict(kwargs) + if 'headers' not in kwargs: + kwargs['headers'] = {} + else: + kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) + + # 设置referer和host值 + parsed_url = urlparse(url) + hostname = parsed_url.hostname + scheme = parsed_url.scheme + if not check_headers(kwargs, self.session.headers, 'Referer'): + kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' + if 'Host' not in kwargs['headers']: + kwargs['headers']['Host'] = hostname + + if not check_headers(kwargs, self.session.headers, 'timeout'): + kwargs['timeout'] = self.timeout + + if 'allow_redirects' not in kwargs: + kwargs['allow_redirects'] = False + + r = err = None + retry = retry if retry is not None else self.retry_times + interval = interval if interval is not None else self.retry_interval + for i in range(retry + 1): + try: + if mode == 'get': + r = self.session.get(url, **kwargs) + elif mode == 'post': + r = self.session.post(url, data=data, **kwargs) + + if r: + return set_charset(r), 'Success' + + except Exception as e: + err = e + + # if r and r.status_code in (403, 404): + # break + + if i < retry: + sleep(interval) + if show_errmsg: + print(f'重试 {url}') + + if r is None: + if show_errmsg: + if err: + raise err + else: + raise ConnectionError('连接失败') + return None, '连接失败' if err is None else err + + if not r.ok: + if show_errmsg: + raise ConnectionError(f'状态码:{r.status_code}') + return r, f'状态码:{r.status_code}' + + +class SessionPageSetter(object): + def __init__(self, page): + self._page = page + + def timeout(self, second): + """设置连接超时时间 + :param second: 秒数 + :return: None + """ + self._page.timeout = second + + def cookies(self, cookies): + """为Session对象设置cookies + :param cookies: cookies信息 + :return: None + """ + set_session_cookies(self._page.session, cookies) + + def headers(self, headers): + """设置通用的headers + :param headers: dict形式的headers + :return: None + """ + self._page.session.headers = CaseInsensitiveDict(headers) + + def header(self, attr, value): + """设置headers中一个项 + :param attr: 设置名称 + :param value: 设置值 + :return: None + """ + self._page.session.headers[attr.lower()] = value + + def user_agent(self, ua): + """设置user agent + :param ua: user agent + :return: None + """ + self._page.session.headers['user-agent'] = ua + + def proxies(self, http, https=None): + """设置proxies参数 + :param http: http代理地址 + :param https: https代理地址 + :return: None + """ + proxies = None if http == https is None else {'http': http, 'https': https or http} + self._page.session.proxies = proxies + + def auth(self, auth): + """设置认证元组或对象 + :param auth: 认证元组或对象 + :return: None + """ + self._page.session.auth = auth + + def hooks(self, hooks): + """设置回调方法 + :param hooks: 回调方法 + :return: None + """ + self._page.session.hooks = hooks + + def params(self, params): + """设置查询参数字典 + :param params: 查询参数字典 + :return: None + """ + self._page.session.params = params + + def verify(self, on_off): + """设置是否验证SSL证书 + :param on_off: 是否验证 SSL 证书 + :return: None + """ + self._page.session.verify = on_off + + def cert(self, cert): + """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 + :param cert: 证书路径或元组 + :return: None + """ + self._page.session.cert = cert + + def stream(self, on_off): + """设置是否使用流式响应内容 + :param on_off: 是否使用流式响应内容 + :return: None + """ + self._page.session.stream = on_off + + def trust_env(self, on_off): + """设置是否信任环境 + :param on_off: 是否信任环境 + :return: None + """ + self._page.session.trust_env = on_off + + def max_redirects(self, times): + """设置最大重定向次数 + :param times: 最大重定向次数 + :return: None + """ + self._page.session.max_redirects = times + + def add_adapter(self, url, adapter): + """添加适配器 + :param url: 适配器对应url + :param adapter: 适配器对象 + :return: None + """ + self._page.session.mount(url, adapter) + + +class DownloadSetter(object): + """用于设置下载参数的类""" + + def __init__(self, page): + self._page = page + self._DownloadKit = None + + @property + def DownloadKit(self): + if self._DownloadKit is None: + self._DownloadKit = DownloadKit(session=self._page.session, goal_path=self._page.download_path) + return self._DownloadKit + + @property + def if_file_exists(self): + """返回用于设置存在同名文件时处理方法的对象""" + return FileExists(self) + + def split(self, on_off): + """设置是否允许拆分大文件用多线程下载 + :param on_off: 是否启用多线程下载大文件 + :return: None + """ + self.DownloadKit.split = on_off + + def save_path(self, path): + """设置下载保存路径 + :param path: 下载保存路径 + :return: None + """ + path = path if path is None else str(path) + self._page._download_path = path + self.DownloadKit.goal_path = path + + +class FileExists(object): + """用于设置存在同名文件时处理方法""" + + def __init__(self, setter): + """ + :param setter: DownloadSetter对象 + """ + self._setter = setter + + def __call__(self, mode): + if mode not in ('skip', 'rename', 'overwrite'): + raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") + self._setter.DownloadKit.file_exists = mode + + def skip(self): + """设为跳过""" + self._setter.DownloadKit.file_exists = 'skip' + + def rename(self): + """设为重命名,文件名后加序号""" + self._setter.DownloadKit._file_exists = 'rename' + + def overwrite(self): + """设为覆盖""" + self._setter.DownloadKit._file_exists = 'overwrite' + + +def check_headers(kwargs, headers, arg) -> bool: + """检查kwargs或headers中是否有arg所示属性""" + return arg in kwargs['headers'] or arg in headers + + +def set_charset(response) -> Response: + """设置Response对象的编码""" + # 在headers中获取编码 + content_type = response.headers.get('content-type', '').lower() + charset = search(r'charset[=: ]*(.*)?;', content_type) + + if charset: + response.encoding = charset.group(1) + + # 在headers中获取不到编码,且如果是网页 + elif content_type.replace(' ', '').startswith('text/html'): + re_result = search(b']+).*?>', response.content) + + if re_result: + charset = re_result.group(1).decode() + else: + charset = response.apparent_encoding + + response.encoding = charset + + return response diff --git a/DrissionPage/mixpage/session_page.pyi b/DrissionPage/mixpage/session_page.pyi new file mode 100644 index 0000000..95011aa --- /dev/null +++ b/DrissionPage/mixpage/session_page.pyi @@ -0,0 +1,237 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from pathlib import Path +from typing import Any, Union, Tuple, List + +from DownloadKit import DownloadKit +from requests import Session, Response +from requests.adapters import HTTPAdapter +from requests.auth import HTTPBasicAuth +from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict + +from .base import BasePage +from DrissionPage.configs.session_options import SessionOptions +from .session_element import SessionElement + + +class SessionPage(BasePage): + def __init__(self, + session_or_options: Union[Session, SessionOptions] = None, + timeout: float = None): + self._session: Session = ... + self._session_options: SessionOptions = ... + self._url: str = ... + self._response: Response = ... + self._download_path: str = ... + self._download_set: DownloadSetter = ... + self._url_available: bool = ... + self.timeout: float = ... + self.retry_times: int = ... + self.retry_interval: float = ... + self._set: SessionPageSetter = ... + + def _set_start_options(self, session_or_options, none) -> None: ... + + def _create_session(self) -> None: ... + + def _set_session(self, opt: SessionOptions) -> None: ... + + def _set_runtime_settings(self) -> None: ... + + def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def set_headers(self, headers: dict) -> None: ... + + def set_user_agent(self, ua: str) -> None: ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str, SessionElement], + timeout: float = None) -> Union[SessionElement, str, None]: ... + + # -----------------共有属性和方法------------------- + @property + def title(self) -> str: ... + + @property + def url(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def json(self) -> Union[dict, None]: ... + + @property + def download_path(self) -> str: ... + + @property + def download_set(self) -> DownloadSetter: ... + + def get(self, + url: str, + show_errmsg: bool | None = False, + retry: int | None = None, + interval: float | None = None, + timeout: float | None = None, + params: dict | None = ..., + data: Union[dict, str, None] = ..., + json: Union[dict, str, None] = ..., + headers: dict | None = ..., + cookies: Any | None = ..., + files: Any | None = ..., + auth: Any | None = ..., + allow_redirects: bool = ..., + proxies: dict | None = ..., + hooks: Any | None = ..., + stream: Any | None = ..., + verify: Any | None = ..., + cert: Any | None = ...) -> bool: ... + + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, SessionElement], + timeout: float = None) -> Union[SessionElement, str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union[SessionElement, str]]: ... + + def s_ele(self, + loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \ + -> Union[SessionElement, str, None]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + + def _ele(self, + loc_or_ele: Union[Tuple[str, str], str, SessionElement], + timeout: float = None, + single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... + + def get_cookies(self, + as_dict: bool = False, + all_domains: bool = False) -> Union[dict, list]: ... + + # ----------------session独有属性和方法----------------------- + @property + def session(self) -> Session: ... + + @property + def response(self) -> Response: ... + + @property + def set(self) -> SessionPageSetter: ... + + @property + def download(self) -> DownloadKit: ... + + def post(self, + url: str, + data: Union[dict, str, None] = ..., + show_errmsg: bool = False, + retry: int | None = None, + interval: float | None = None, + timeout: float | None = ..., + params: dict | None = ..., + json: Union[dict, str, None] = ..., + headers: dict | None = ..., + cookies: Any | None = ..., + files: Any | None = ..., + auth: Any | None = ..., + allow_redirects: bool = ..., + proxies: dict | None = ..., + hooks: Any | None = ..., + stream: Any | None = ..., + verify: Any | None = ..., + cert: Any | None = ...) -> bool: ... + + def _s_connect(self, + url: str, + mode: str, + data: Union[dict, str, None] = None, + show_errmsg: bool = False, + retry: int = None, + interval: float = None, + **kwargs) -> bool: ... + + def _make_response(self, + url: str, + mode: str = 'get', + data: Union[dict, str] = None, + retry: int = None, + interval: float = None, + show_errmsg: bool = False, + **kwargs) -> tuple: ... + + +class SessionPageSetter(object): + def __init__(self, page: SessionPage): + self._page: SessionPage = ... + + def timeout(self, second: float) -> None: ... + + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def header(self, attr: str, value: str) -> None: ... + + def user_agent(self, ua: str) -> None: ... + + def proxies(self, http, https=None) -> None: ... + + def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... + + def hooks(self, hooks: Union[dict, None]) -> None: ... + + def params(self, params: Union[dict, None]) -> None: ... + + def verify(self, on_off: Union[bool, None]) -> None: ... + + def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... + + def stream(self, on_off: Union[bool, None]) -> None: ... + + def trust_env(self, on_off: Union[bool, None]) -> None: ... + + def max_redirects(self, times: Union[int, None]) -> None: ... + + def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... + + +class DownloadSetter(object): + def __init__(self, page: SessionPage): + self._page: SessionPage = ... + self._DownloadKit: DownloadKit = ... + + @property + def DownloadKit(self) -> DownloadKit: ... + + @property + def if_file_exists(self) -> FileExists: ... + + def split(self, on_off: bool) -> None: ... + + def save_path(self, path: Union[str, Path]): ... + + +class FileExists(object): + def __init__(self, setter: DownloadSetter): + self._setter: DownloadSetter = ... + + def __call__(self, mode: str) -> None: ... + + def skip(self) -> None: ... + + def rename(self) -> None: ... + + def overwrite(self) -> None: ... + + +def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], + arg: str) -> bool: ... + + +def set_charset(response: Response) -> Response: ... diff --git a/DrissionPage/mixpage/shadow_root_element.py b/DrissionPage/mixpage/shadow_root_element.py new file mode 100644 index 0000000..1ce0860 --- /dev/null +++ b/DrissionPage/mixpage/shadow_root_element.py @@ -0,0 +1,219 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from time import perf_counter +from typing import Union + +from selenium.webdriver.remote.webelement import WebElement + +from .base import BaseElement +from DrissionPage.commons.locator import get_loc +from .driver_element import make_driver_ele +from .session_element import make_session_ele, SessionElement + + +class ShadowRootElement(BaseElement): + """ShadowRootElement是用于处理ShadowRoot的类,使用方法和DriverElement基本一致""" + + def __init__(self, inner_ele, parent_ele): + super().__init__(parent_ele.page) + self.parent_ele = parent_ele + self._inner_ele = inner_ele + + @property + def inner_ele(self): + return self._inner_ele + + def __repr__(self): + return f'' + + def __call__(self, loc_or_str, timeout=None): + """在内部查找元素 + 例:ele2 = ele1('@id=ele_id') + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间 + :return: DriverElement对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) + + @property + def tag(self): + """元素标签名""" + return 'shadow-root' + + @property + def html(self): + return f'{self.inner_html}' + + @property + def inner_html(self): + """返回内部的html文本""" + shadow_root = WebElement(self.page.driver, self.inner_ele._id) + return shadow_root.get_attribute('innerHTML') + + def parent(self, level_or_loc=1): + """返回上面某一级父元素,可指定层数或用查询语法定位 + :param level_or_loc: 第几级父元素,或定位符 + :return: DriverElement对象 + """ + if isinstance(level_or_loc, int): + loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' + + elif isinstance(level_or_loc, (tuple, str)): + loc = get_loc(level_or_loc, True) + + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' + + else: + raise TypeError('level_or_loc参数只能是tuple、int或str。') + + return self.parent_ele.ele(loc, timeout=0) + + def next(self, index=1, filter_loc=''): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :return: DriverElement对象 + """ + nodes = self.nexts(filter_loc=filter_loc) + return nodes[index - 1] if nodes else None + + def before(self, index=1, filter_loc=''): + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 前面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素前面的某个元素或节点 + """ + nodes = self.befores(filter_loc=filter_loc) + return nodes[index - 1] if nodes else None + + def after(self, index=1, filter_loc=''): + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 + :param index: 后面第几个查询结果 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素后面的某个元素或节点 + """ + nodes = self.afters(filter_loc=filter_loc) + return nodes[index - 1] if nodes else None + + def nexts(self, filter_loc=''): + """返回后面所有兄弟元素或节点组成的列表 + :param filter_loc: 用于筛选的查询语法 + :return: DriverElement对象组成的列表 + """ + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./{loc}' + return self.parent_ele.eles(xpath, timeout=0.1) + + def befores(self, filter_loc=''): + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素前面的元素或节点组成的列表 + """ + loc = get_loc(filter_loc, True) + if loc[0] == 'css selector': + raise ValueError('此css selector语法不受支持,请换成xpath。') + + loc = loc[1].lstrip('./') + xpath = f'xpath:./preceding::{loc}' + return self.parent_ele.eles(xpath, timeout=0.1) + + def afters(self, filter_loc=''): + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 + :param filter_loc: 用于筛选的查询语法 + :return: 本元素后面的元素或节点组成的列表 + """ + eles1 = self.nexts(filter_loc) + loc = get_loc(filter_loc, True)[1].lstrip('./') + xpath = f'xpath:./following::{loc}' + return eles1 + self.parent_ele.eles(xpath, timeout=0.1) + + def ele(self, loc_or_str, timeout=None): + """返回当前元素下级符合条件的第一个元素,默认返回 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 + :return: DriverElement对象或属性、文本 + """ + return self._ele(loc_or_str, timeout) + + def eles(self, loc_or_str, timeout=None): + """返回当前元素下级所有符合条件的子元素 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 + :return: DriverElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, timeout=timeout, single=False) + + def s_ele(self, loc_or_str=None) -> Union[SessionElement, str, None]: + """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return make_session_ele(self, loc_or_str) + + def s_eles(self, loc_or_str): + """查找所有符合条件的元素以SessionElement列表形式返回,处理复杂页面时效率很高 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + return make_session_ele(self, loc_or_str, single=False) + + def _ele(self, loc_or_str, timeout=None, single=True, relative=False): + """返回当前元素下级符合条件的子元素,默认返回第一个 + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间 + :param single: True则返回第一个,False则返回全部 + :param relative: WebPage用的表示是否相对定位的参数 + :return: DriverElement对象 + """ + # 先转换为sessionElement,再获取所有元素,获取它们的css selector路径,再用路径在页面上执行查找 + loc = get_loc(loc_or_str) + if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): + loc = loc[0], loc[1][5:] + + timeout = timeout if timeout is not None else self.page.timeout + t1 = perf_counter() + eles = make_session_ele(self.html).eles(loc) + while not eles and perf_counter() - t1 <= timeout: + eles = make_session_ele(self.html).eles(loc) + + if not eles: + return None if single else eles + + css_paths = [i.css_path[47:] for i in eles] + + if single: + return make_driver_ele(self, f'css:{css_paths[0]}', single, timeout) + else: + return [make_driver_ele(self, f'css:{css}', True, timeout) for css in css_paths] + + def run_script(self, script, *args): + """执行js代码,传入自己为第一个参数 + :param script: js文本 + :param args: 传入的参数 + :return: js执行结果 + """ + shadow_root = WebElement(self.page.driver, self.inner_ele._id) + return shadow_root.parent.execute_script(script, shadow_root, *args) + + def is_enabled(self): + """是否可用""" + return self.inner_ele.is_enabled() + + def is_valid(self): + """用于判断元素是否还能用,应对页面跳转元素不能用的情况""" + try: + self.is_enabled() + return True + + except Exception: + return False diff --git a/DrissionPage/mixpage/shadow_root_element.pyi b/DrissionPage/mixpage/shadow_root_element.pyi new file mode 100644 index 0000000..8647391 --- /dev/null +++ b/DrissionPage/mixpage/shadow_root_element.pyi @@ -0,0 +1,84 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union, Any, Tuple, List + +from selenium.webdriver.remote.webelement import WebElement + +from .driver_page import DriverPage +from .mix_page import MixPage +from .base import BaseElement +from .driver_element import DriverElement +from .session_element import SessionElement + + +class ShadowRootElement(BaseElement): + + def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): + self._inner_ele: WebElement = ... + self.parent_ele: DriverElement = ... + self.page: Union[MixPage, DriverPage] = ... + + @property + def inner_ele(self) -> WebElement: ... + + def __repr__(self) -> str: ... + + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[DriverElement, str, None]: ... + + @property + def tag(self) -> str: ... + + @property + def html(self) -> str: ... + + @property + def inner_html(self) -> str: ... + + def parent(self, level_or_loc: Union[str, int] = 1) -> DriverElement: ... + + def next(self, + index: int = 1, + filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... + + def before(self, + index: int = 1, + filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... + + def after(self, + index: int = 1, + filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... + + def nexts(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... + + def befores(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... + + def afters(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union[DriverElement, str, None]: ... + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union[DriverElement, str]]: ... + + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... + + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = ..., + single: bool = ..., + relative: bool = ...) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... + + def run_script(self, script: str, *args) -> Any: ... + + def is_enabled(self) -> bool: ... + + def is_valid(self) -> bool: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index e237707..9348554 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -518,7 +518,7 @@ def set_charset(response): content_type = response.headers.get('content-type', '').lower() if not content_type.endswith(';'): content_type += ';' - charset = search(r'charset[=: ]*(.*)?;', content_type) + charset = search(r'charset[=: ]*(.*)?;?', content_type) if charset: response.encoding = charset.group(1) diff --git a/setup.py b/setup.py index d621cb6..3e591f4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.30", + version="3.2.31", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", From 874a0b0e2f53a40427f9fe76faadd47c7edbb686 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 5 Aug 2023 15:34:49 +0800 Subject: [PATCH 06/17] =?UTF-8?q?=E5=85=83=E7=B4=A0=E5=B1=8F=E5=B9=95?= =?UTF-8?q?=E5=9D=90=E6=A0=87=E4=B9=98=E4=BB=A5=E5=83=8F=E7=B4=A0=E6=AF=94?= =?UTF-8?q?=EF=BC=9Bnew=5Ftab()=E9=BB=98=E8=AE=A4=E4=B8=8D=E5=88=87?= =?UTF-8?q?=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_element.py | 9 ++++++--- DrissionPage/chromium_page.py | 2 +- DrissionPage/chromium_page.pyi | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index e76ad7a..929310a 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1535,21 +1535,24 @@ class Locations(object): """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_location - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) @property def screen_midpoint(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_midpoint - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) @property def screen_click_point(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_click_point - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) def _get_viewport_rect(self, quad): """按照类型返回在可视窗口中的范围 diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index b66a801..6b9ff62 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -214,7 +214,7 @@ class ChromiumPage(ChromiumBase): and (tab_type is None or i['type'] in tab_type))] return r[0]['id'] if r and single else r - def new_tab(self, url=None, switch_to=True): + def new_tab(self, url=None, switch_to=False): """新建一个标签页,该标签页在最后面 :param url: 新标签页跳转到的网址 :param switch_to: 新建标签页后是否把焦点移过去 diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index b6d7023..5fde86a 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -84,7 +84,7 @@ class ChromiumPage(ChromiumBase): def find_tabs(self, title: str = None, url: str = None, tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ... - def new_tab(self, url: str = None, switch_to: bool = True) -> str: ... + def new_tab(self, url: str = None, switch_to: bool = False) -> str: ... def to_main_tab(self) -> None: ... From 462c65ae639c0e608b77ac3fd840eb0e9ee9ba2f Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 5 Aug 2023 15:48:04 +0800 Subject: [PATCH 07/17] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=BD=95=E5=B1=8F?= =?UTF-8?q?=E8=A7=86=E9=A2=91=E7=BC=96=E7=A0=81=E4=B8=80=E4=BA=9B=E7=94=B5?= =?UTF-8?q?=E8=84=91=E4=B8=8D=E6=94=AF=E6=8C=81=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 3ecb753..b23cd8d 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -1379,7 +1379,7 @@ class Screencast(object): raise TypeError('转换成视频仅支持英文路径和文件名。') try: - from cv2 import VideoWriter, imread + from cv2 import VideoWriter, imread, VideoWriter_fourcc from numpy import fromfile, uint8 except ModuleNotFoundError: raise ModuleNotFoundError('请先安装cv2,pip install opencv-python') @@ -1389,10 +1389,7 @@ class Screencast(object): imgInfo = img.shape size = (imgInfo[1], imgInfo[0]) - # if video_name and not video_name.endswith('mp4'): - # video_name = f'{video_name}.mp4' - # name = f'{time()}.mp4' if not video_name else video_name - videoWrite = VideoWriter(path, 14, 5, size) + videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size) for i in pic_list: img = imread(str(i)) From 5bbbeb23d99922c14fcefb2b64eb288b86c263d0 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 9 Aug 2023 11:46:11 +0800 Subject: [PATCH 08/17] =?UTF-8?q?3.2.31=20get=5Fsrc()=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0base64=5Fto=5Fbytes=E5=8F=82=E6=95=B0?= =?UTF-8?q?=EF=BC=9B=E4=BD=BF=E7=94=A81.0.0=E7=89=88DownloadKit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_element.py | 10 +++++++--- DrissionPage/chromium_element.pyi | 2 +- requirements.txt | 2 +- setup.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 929310a..3367380 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -436,9 +436,10 @@ class ChromiumElement(DrissionElement): js = f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");' return self.run_js(js) - def get_src(self, timeout=None): + def get_src(self, timeout=None, base64_to_bytes=True): """返回元素src资源,base64的会转为bytes返回,其它返回str :param timeout: 等待资源加载的超时时间 + :param base64_to_bytes: 为True时,如果是base64数据,转换为bytes格式 :return: 资源内容 """ timeout = self.page.timeout if timeout is None else timeout @@ -471,8 +472,11 @@ class ChromiumElement(DrissionElement): return None if result['base64Encoded']: - from base64 import b64decode - data = b64decode(result['content']) + if base64_to_bytes: + from base64 import b64decode + data = b64decode(result['content']) + else: + data = result['content'] else: data = result['content'] return data diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 1084c65..37de21f 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -176,7 +176,7 @@ class ChromiumElement(DrissionElement): def style(self, style: str, pseudo_ele: str = '') -> str: ... - def get_src(self, timeout: float = None) -> Union[bytes, str, None]: ... + def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ... def save(self, path: [str, bool] = None, rename: str = None, timeout: float = None) -> None: ... diff --git a/requirements.txt b/requirements.txt index d04ff8e..4e712ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ requests lxml cssselect -DownloadKit>=0.5.3 +DownloadKit>=1.0.0 FlowViewer>=0.3.0 websocket-client click diff --git a/setup.py b/setup.py index 3e591f4..cccddb0 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( 'lxml', 'requests', 'cssselect', - 'DownloadKit>=0.5.3', + 'DownloadKit>=1.0.0', 'FlowViewer>=0.3.0', 'websocket-client', 'click', From 0aaff9fb351cc2150e6bb0bff0f529653e6be88a Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 9 Aug 2023 06:31:27 +0000 Subject: [PATCH 09/17] Revert "!19 v3.2.31" This reverts commit 6cc85c920976778ffc254a76e6f6edc0190922ac. --- DrissionPage/__init__.py | 8 + DrissionPage/base.py | 85 +-- DrissionPage/base.pyi | 39 +- DrissionPage/chromium_base.py | 462 +++++++++++++--- DrissionPage/chromium_base.pyi | 147 ++++- DrissionPage/chromium_driver.py | 61 ++- DrissionPage/chromium_driver.pyi | 2 +- DrissionPage/chromium_element.py | 207 +++++-- DrissionPage/chromium_element.pyi | 59 +- DrissionPage/chromium_frame.py | 39 +- DrissionPage/chromium_frame.pyi | 28 +- DrissionPage/chromium_page.py | 671 +++++++++++++---------- DrissionPage/chromium_page.pyi | 179 +++--- DrissionPage/chromium_tab.py | 111 +++- DrissionPage/chromium_tab.pyi | 43 +- DrissionPage/common.pyi | 7 - DrissionPage/commons/browser.py | 23 +- DrissionPage/commons/browser.pyi | 9 +- DrissionPage/commons/constants.py | 5 +- DrissionPage/commons/tools.py | 156 ++---- DrissionPage/commons/tools.pyi | 17 +- DrissionPage/commons/web.py | 114 +++- DrissionPage/commons/web.pyi | 61 +++ DrissionPage/configs/chromium_options.py | 11 +- DrissionPage/configs/configs.ini | 3 +- DrissionPage/configs/session_options.py | 9 +- DrissionPage/easy_set.py | 172 +++++- DrissionPage/easy_set.pyi | 21 +- DrissionPage/errors.py | 10 +- DrissionPage/network_listener.py | 325 ----------- DrissionPage/network_listener.pyi | 140 ----- DrissionPage/session_element.py | 24 +- DrissionPage/session_element.pyi | 18 +- DrissionPage/session_page.py | 217 +++++++- DrissionPage/session_page.pyi | 90 ++- DrissionPage/setter.py | 526 ------------------ DrissionPage/setter.pyi | 192 ------- DrissionPage/waiter.py | 291 ---------- DrissionPage/waiter.pyi | 85 --- DrissionPage/web_page.py | 169 +++++- DrissionPage/web_page.pyi | 56 +- requirements.txt | 2 +- setup.py | 4 +- 43 files changed, 2404 insertions(+), 2494 deletions(-) delete mode 100644 DrissionPage/common.pyi delete mode 100644 DrissionPage/network_listener.py delete mode 100644 DrissionPage/network_listener.pyi delete mode 100644 DrissionPage/setter.py delete mode 100644 DrissionPage/setter.pyi delete mode 100644 DrissionPage/waiter.py delete mode 100644 DrissionPage/waiter.pyi diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 1f553d7..335c6a4 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -11,3 +11,11 @@ from .web_page import WebPage # 启动配置类 from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions + +# 旧版页面类和启动配置类 +try: + from .mixpage.mix_page import MixPage + from .mixpage.drission import Drission + from .configs.driver_options import DriverOptions +except ModuleNotFoundError: + pass diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 54b9a7c..58ff3f3 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -4,12 +4,9 @@ @Contact : g1879@qq.com """ from abc import abstractmethod -from pathlib import Path from re import sub from urllib.parse import quote -from DownloadKit import DownloadKit - from .commons.constants import Settings, NoneElement from .commons.locator import get_loc from .commons.web import format_html @@ -61,6 +58,12 @@ class BaseElement(BaseParser): def parent(self, level_or_loc=1): pass + def prev(self, index=1): + return None # ShadowRootElement直接继承 + + def prevs(self) -> None: + return None # ShadowRootElement直接继承 + def next(self, index=1): pass @@ -71,7 +74,7 @@ class BaseElement(BaseParser): r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err) if not single or raise_err is False: return r - if not r and (Settings.raise_when_ele_not_found or raise_err is True): + if not r and (Settings.raise_ele_not_found or raise_err is True): raise ElementNotFoundError return r @@ -81,7 +84,7 @@ class BaseElement(BaseParser): class DrissionElement(BaseElement): - """ChromiumElement 和 SessionElement的基类 + """DriverElement、ChromiumElement 和 SessionElement的基类 但不是ShadowRootElement的基类""" @property @@ -116,10 +119,9 @@ class DrissionElement(BaseElement): return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] - def parent(self, level_or_loc=1, index=1): + def parent(self, level_or_loc=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ if isinstance(level_or_loc, int): @@ -131,27 +133,24 @@ class DrissionElement(BaseElement): if loc[0] == 'css selector': raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]' + loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' else: raise TypeError('level_or_loc参数只能是tuple、int或str。') return self._ele(loc, timeout=0, relative=True, raise_err=False) - def child(self, filter_loc='', index=1, timeout=None, ele_only=True): + def child(self, index=1, filter_loc='', timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 + :param filter_loc: 用于筛选的查询语法 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) if not nodes: - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -159,83 +158,71 @@ class DrissionElement(BaseElement): try: return nodes[index - 1] except IndexError: - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() - def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): + def prev(self, index=1, filter_loc='', timeout=0, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 + :param filter_loc: 用于筛选的查询语法 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() - def next(self, filter_loc='', index=1, timeout=0, ele_only=True): + def next(self, index=1, filter_loc='', timeout=0, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 + :param filter_loc: 用于筛选的查询语法 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() - def before(self, filter_loc='', index=1, timeout=None, ele_only=True): + def before(self, index=1, filter_loc='', timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 + :param filter_loc: 用于筛选的查询语法 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() - def after(self, filter_loc='', index=1, timeout=None, ele_only=True): + def after(self, index=1, filter_loc='', timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 + :param filter_loc: 用于筛选的查询语法 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - if isinstance(filter_loc, int): - index = filter_loc - filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -305,7 +292,7 @@ class DrissionElement(BaseElement): :param direction: 'following' 或 'preceding',查找的方向 :param brother: 查找范围,在同级查找还是整个dom前后查找 :param timeout: 查找等待时间 - :return: 元素对象或字符串 + :return: DriverElement对象或字符串 """ if index is not None and index < 1: raise ValueError('index必须大于等于1。') @@ -366,8 +353,6 @@ class BasePage(BaseParser): self.retry_times = 3 self.retry_interval = 2 self._url_available = None - self._download_path = '' - self._DownloadKit = None @property def title(self): @@ -395,18 +380,6 @@ class BasePage(BaseParser): """返回当前访问的url有效性""" return self._url_available - @property - def download_path(self): - """返回默认下载路径""" - return str(Path(self._download_path).absolute()) - - @property - def download(self): - """返回下载器对象""" - if self._DownloadKit is None: - self._DownloadKit = DownloadKit(session=self, goal_path=self.download_path) - return self._DownloadKit - def _before_connect(self, url, retry, interval): """连接前的准备 :param url: 要访问的url @@ -414,7 +387,7 @@ class BasePage(BaseParser): :param interval: 重试间隔 :return: 重试次数和间隔组成的tuple """ - self._url = quote(url, safe='/:&?=%;#@+![]') + self._url = quote(url, safe='/:&?=%;#@+!') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval @@ -444,7 +417,7 @@ class BasePage(BaseParser): if not single or raise_err is False: return r - if not r and (Settings.raise_when_ele_not_found is True or raise_err is True): + if not r and (Settings().raise_ele_not_found is True or raise_err is True): raise ElementNotFoundError return r diff --git a/DrissionPage/base.pyi b/DrissionPage/base.pyi index eda767f..690241d 100644 --- a/DrissionPage/base.pyi +++ b/DrissionPage/base.pyi @@ -6,8 +6,6 @@ from abc import abstractmethod from typing import Union, Tuple, List -from DownloadKit import DownloadKit - from .commons.constants import NoneElement @@ -80,35 +78,30 @@ class DrissionElement(BaseElement): def texts(self, text_node_only: bool = False) -> list: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... - def child(self, - filter_loc: Union[tuple, str, int] = '', - index: int = 1, + def child(self, index: int = 1, + filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def prev(self, - filter_loc: Union[tuple, str, int] = '', - index: int = 1, + def prev(self, index: int = 1, + filter_loc: Union[tuple, str] = '', timeout: float = 0, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def next(self, - filter_loc: Union[tuple, str, int] = '', - index: int = 1, + def next(self, index: int = 1, + filter_loc: Union[tuple, str] = '', timeout: float = 0, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def before(self, - filter_loc: Union[tuple, str, int] = '', - index: int = 1, + def before(self, index: int = 1, + filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def after(self, - filter_loc: Union[tuple, str, int] = '', - index: int = 1, + def after(self, index: int = 1, + filter_loc: Union[tuple, str] = '', timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... @@ -161,9 +154,7 @@ class BasePage(BaseParser): self._url_available: bool = ... self.retry_times: int = ... self.retry_interval: float = ... - self._timeout: float = ... - self._download_path: str = ... - self._DownloadKit: DownloadKit = ... + self._timeout = float = ... @property def title(self) -> Union[str, None]: ... @@ -180,12 +171,6 @@ class BasePage(BaseParser): @property def url_available(self) -> bool: ... - @property - def download_path(self) -> str: ... - - @property - def download(self) -> DownloadKit: ... - def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... # ----------------以下属性或方法由后代实现---------------- diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 283c199..b87fb7a 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -7,6 +7,7 @@ from base64 import b64decode from json import loads, JSONDecodeError from os import sep from pathlib import Path +from re import search from threading import Thread from time import perf_counter, sleep, time @@ -18,12 +19,10 @@ from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chro from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder -from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ - NoRectError, BrowserConnectError, GetDocumentError -from .network_listener import NetworkListener +from .commons.web import set_browser_cookies, ResponseData +from .errors import ContextLossError, ElementLossError, AlertExistsError, CallMethodError, TabClosedError, \ + NoRectError, BrowserConnectError from .session_element import make_session_ele -from .setter import ChromiumBaseSetter -from .waiter import ChromiumBaseWaiter class ChromiumBase(BasePage): @@ -42,7 +41,6 @@ class ChromiumBase(BasePage): self._tab_obj = None self._set = None self._screencast = None - self._listener = None if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): address = f'127.0.0.1:{address}' @@ -72,9 +70,7 @@ class ChromiumBase(BasePage): """ self._chromium_init() if not tab_id: - u = f'http://{self.address}/json' - json = self._control_session.get(u).json() - self._control_session.get(u, headers={'Connection': 'close'}) + json = self._control_session.get(f'http://{self.address}/json').json() tab_id = [i['id'] for i in json if i['type'] == 'page'] if not tab_id: raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') @@ -87,7 +83,6 @@ class ChromiumBase(BasePage): """浏览器初始设置""" self._control_session = Session() self._control_session.keep_alive = False - self._control_session.proxies = {'http': None, 'https': None} self._first_run = True self._is_reading = False self._upload_list = None @@ -103,14 +98,14 @@ class ChromiumBase(BasePage): self._tab_obj = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) self._tab_obj.start() - self._tab_obj.call_method('DOM.enable') - self._tab_obj.call_method('Page.enable') + self._tab_obj.DOM.enable() + self._tab_obj.Page.enable() - self._tab_obj.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading) - self._tab_obj.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading) - self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated) - self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired) - self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated) + self._tab_obj.Page.frameStoppedLoading = self._onFrameStoppedLoading + self._tab_obj.Page.frameStartedLoading = self._onFrameStartedLoading + self._tab_obj.DOM.documentUpdated = self._onDocumentUpdated + self._tab_obj.Page.loadEventFired = self._onLoadEventFired + self._tab_obj.Page.frameNavigated = self._onFrameNavigated def _get_document(self): """刷新cdp使用的document数据""" @@ -136,8 +131,7 @@ class ChromiumBase(BasePage): self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) break - except CDPError as e: - err = e + except Exception: if self._debug: print('重试获取document') if self._debug_recorder: @@ -146,9 +140,7 @@ class ChromiumBase(BasePage): sleep(.1) else: - txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ - f'报告网址:https://gitee.com/g1879/DrissionPage/issues' - raise GetDocumentError(txt) + raise RuntimeError('获取document失败。') if self._debug: print('获取document结束') @@ -238,7 +230,7 @@ class ChromiumBase(BasePage): files = self._upload_list if kwargs['mode'] == 'selectMultiple' else self._upload_list[:1] self.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=kwargs['backendNodeId']) - self.driver.set_listener('Page.fileChooserOpened', None) + self.driver.Page.fileChooserOpened = None self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self._upload_list = None @@ -333,11 +325,6 @@ class ChromiumBase(BasePage): """返回页面加载策略,有3种:'none'、'normal'、'eager'""" return self._page_load_strategy - @property - def user_agent(self): - """返回user agent""" - return self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - @property def scroll(self): """返回用于滚动滚动条的对象""" @@ -377,13 +364,6 @@ class ChromiumBase(BasePage): self._screencast = Screencast(self) return self._screencast - @property - def listener(self): - """返回用于聆听数据包的对象""" - if self._listener is None: - self._listener = NetworkListener(self) - return self._listener - def run_cdp(self, cmd, **cmd_args): """执行Chrome DevTools Protocol语句 :param cmd: 协议项目 @@ -411,7 +391,7 @@ class ChromiumBase(BasePage): elif error in ('Node does not have a layout object', 'Could not compute box model.'): raise NoRectError elif r['type'] == 'call_method_error': - raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}') + raise CallMethodError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}') else: raise RuntimeError(r) @@ -562,12 +542,9 @@ class ChromiumBase(BasePage): if ok: try: if single: - r = make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) - break - + return make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) else: - r = [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] - break + return [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] except ElementLossError: ok = False @@ -583,12 +560,6 @@ class ChromiumBase(BasePage): sleep(.1) - try: - self.run_cdp('DOM.discardSearchResults', searchId=search_result['searchId']) - except: - pass - return r - def refresh(self, ignore_cache=False): """刷新当前页面 :param ignore_cache: 是否忽略缓存 @@ -624,14 +595,14 @@ class ChromiumBase(BasePage): index = history['currentIndex'] history = history['entries'] direction = 1 if steps > 0 else -1 - curr_url = history[index]['url'] + curr_url = history[index]['userTypedURL'] nid = None for num in range(abs(steps)): for i in history[index::direction]: index += direction - if i['url'] != curr_url: + if i['userTypedURL'] != curr_url: nid = i['id'] - curr_url = i['url'] + curr_url = i['userTypedURL'] break if nid: @@ -813,7 +784,7 @@ class ChromiumBase(BasePage): while self.ready_state not in ('complete', None): sleep(.1) if self._debug or show_errmsg: - print(f'重试{t + 1} {to_url}') + print(f'重试 {to_url}') if err: if show_errmsg: @@ -890,6 +861,301 @@ class ChromiumBase(BasePage): return str(path.absolute()) +class ChromiumBaseSetter(object): + def __init__(self, page): + self._page = page + + @property + def load_strategy(self): + """返回用于设置页面加载策略的对象""" + return PageLoadStrategy(self._page) + + @property + def scroll(self): + """返回用于设置页面滚动设置的对象""" + return PageScrollSetter(self._page.scroll) + + def retry_times(self, times): + """设置连接失败重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败重连间隔""" + self._page.retry_interval = interval + + def timeouts(self, implicit=None, page_load=None, script=None): + """设置超时时间,单位为秒 + :param implicit: 查找元素超时时间 + :param page_load: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: None + """ + if implicit is not None: + self._page.timeouts.implicit = implicit + + if page_load is not None: + self._page.timeouts.page_load = page_load + + if script is not None: + self._page.timeouts.script = script + + def user_agent(self, ua, platform=None): + """为当前tab设置user agent,只在当前tab有效 + :param ua: user agent字符串 + :param platform: platform字符串 + :return: None + """ + keys = {'userAgent': ua} + if platform: + keys['platform'] = platform + self._page.run_cdp('Emulation.setUserAgentOverride', **keys) + + def session_storage(self, item, value): + """设置或删除某项sessionStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + js = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' + return self._page.run_js_loaded(js, as_expr=True) + + def local_storage(self, item, value): + """设置或删除某项localStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' + return self._page.run_js_loaded(js, as_expr=True) + + def cookies(self, cookies): + """设置cookies值 + :param cookies: cookies信息 + :return: None + """ + set_browser_cookies(self._page, cookies) + + def upload_files(self, files): + """等待上传的文件路径 + :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 + :return: None + """ + if not self._page._upload_list: + self._page.driver.Page.fileChooserOpened = self._page._onFileChooserOpened + self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) + + if isinstance(files, str): + files = files.split('\n') + self._page._upload_list = [str(Path(i).absolute()) for i in files] + + def headers(self, headers: dict) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + self._page.run_cdp('Network.enable') + self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) + + +class ChromiumBaseWaiter(object): + def __init__(self, page_or_ele): + """ + :param page_or_ele: 页面对象或元素对象 + """ + self._driver = page_or_ele + self._listener = None + + def ele_delete(self, loc_or_ele, timeout=None): + """等待元素从DOM中删除 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :return: 是否等待成功 + """ + if isinstance(loc_or_ele, (str, tuple)): + ele = self._driver._ele(loc_or_ele, timeout=.3, raise_err=False) + return ele.wait.delete(timeout) if ele else True + return loc_or_ele.wait.delete(timeout) + + def ele_display(self, loc_or_ele, timeout=None): + """等待元素变成显示状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False) + return ele.wait.display(timeout) if ele else False + + def ele_hidden(self, loc_or_ele, timeout=None): + """等待元素变成隐藏状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False) + return ele.wait.hidden(timeout) + + def load_start(self, timeout=None): + """等待页面开始加载 + :param timeout: 超时时间,为None时使用页面timeout属性 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, gap=.002) + + def load_complete(self, timeout=None): + """等待页面开始加载 + :param timeout: 超时时间,为None时使用页面timeout属性 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, start=False) + + def upload_paths_inputted(self): + """等待自动填写上传文件路径""" + while self._driver._upload_list: + sleep(.01) + + def _loading(self, timeout=None, start=True, gap=.01): + """等待页面开始加载或加载完成 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param start: 等待开始还是结束 + :param gap: 间隔秒数 + :return: 是否等待成功 + """ + if timeout != 0: + timeout = self._driver.timeout if timeout in (None, True) else timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._driver.is_loading == start: + return True + sleep(gap) + return False + + def set_targets(self, targets, is_regex=False): + """指定要等待的数据包 + :param targets: 要匹配的数据包url特征,可用list等传入多个 + :param is_regex: 设置的target是否正则表达式 + :return: None + """ + if not self._listener: + self._listener = NetworkListener(self._driver) + self._listener.set_targets(targets, is_regex) + + def data_packets(self, timeout=None, any_one=False): + """等待指定数据包加载完成 + :param timeout: 超时时间,为None则使用页面对象timeout + :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 + :return: ResponseData对象或监听结果字典 + """ + if not self._listener: + self._listener = NetworkListener(self._driver) + return self._listener.listen(timeout, any_one) + + def stop_listening(self): + """停止监听数据包""" + if not self._listener: + self._listener = NetworkListener(self._driver) + self._listener.stop() + + +class NetworkListener(object): + def __init__(self, page): + self._page = page + self._targets = None + self._is_regex = False + self._results = {} + self._single = False + self._requests = {} + + def set_targets(self, targets, is_regex=False): + """指定要等待的数据包 + :param targets: 要匹配的数据包url特征,可用list等传入多个 + :param is_regex: 设置的target是否正则表达式 + :return: None + """ + if not isinstance(targets, (str, list, tuple, set)): + raise TypeError('targets只能是str、list、tuple、set。') + self._is_regex = is_regex + if isinstance(targets, str): + self._targets = {targets} + self._single = True + else: + self._targets = set(targets) + self._single = False + self._page.run_cdp('Network.enable') + if targets is not None: + self._page.driver.Network.requestWillBeSent = self._requestWillBeSent + self._page.driver.Network.responseReceived = self._response_received + self._page.driver.Network.loadingFinished = self._loading_finished + else: + self.stop() + + def stop(self): + """停止监听数据包""" + self._page.run_cdp('Network.disable') + self._page.driver.Network.requestWillBeSent = None + self._page.driver.Network.responseReceived = None + self._page.driver.Network.loadingFinished = None + + def listen(self, timeout=None, any_one=False): + """等待指定数据包加载完成 + :param timeout: 超时时间,为None则使用页面对象timeout + :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 + :return: ResponseData对象或监听结果字典 + """ + if self._targets is None: + raise RuntimeError('必须先用set_targets()设置等待目标。') + + timeout = timeout if timeout is not None else self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._results and (any_one or set(self._results) == self._targets): + break + sleep(.1) + + self._requests = {} + if not self._results: + return False + r = list(self._results.values())[0] if self._single else self._results + self._results = {} + return r + + def _response_received(self, **kwargs): + """接收到返回信息时处理方法""" + if kwargs['requestId'] in self._requests: + self._requests[kwargs['requestId']]['response'] = kwargs['response'] + + def _loading_finished(self, **kwargs): + """请求完成时处理方法""" + request_id = kwargs['requestId'] + if request_id in self._requests: + try: + r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) + body = r['body'] + is_base64 = r['base64Encoded'] + except CallMethodError: + body = '' + is_base64 = False + + request = self._requests[request_id] + target = request['target'] + rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) + rd.method = request['method'] + rd.postData = request['post_data'] + rd._base64_body = is_base64 + rd.requestHeaders = request['request_headers'] + self._results[target] = rd + + def _requestWillBeSent(self, **kwargs): + """接收到请求时的回调函数""" + for target in self._targets: + if (self._is_regex and search(target, kwargs['request']['url'])) or ( + not self._is_regex and target in kwargs['request']['url']): + self._requests[kwargs['requestId']] = {'target': target, + 'method': kwargs['request']['method'], + 'post_data': kwargs['request'].get('postData', None), + 'request_headers': kwargs['request']['headers']} + break + + class ChromiumPageScroll(ChromiumScroll): def __init__(self, page): """ @@ -899,10 +1165,10 @@ class ChromiumPageScroll(ChromiumScroll): self.t1 = 'window' self.t2 = 'document.documentElement' - def to_see(self, loc_or_ele, center=None): + def to_see(self, loc_or_ele, center=False): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :param center: 是否尽量滚动到页面正中 :return: None """ ele = self._driver._ele(loc_or_ele) @@ -911,22 +1177,17 @@ class ChromiumPageScroll(ChromiumScroll): def _to_see(self, ele, center): """执行滚动页面直到元素可见 :param ele: 元素对象 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :param center: 是否尽量滚动到页面正中 :return: None """ - txt = 'true' if center else 'false' - ele.run_js(f'this.scrollIntoViewIfNeeded({txt});') - if center or (center is not False and ele.states.is_covered): - ele.run_js('''function getWindowScrollTop() {var scroll_top = 0; - if (document.documentElement && document.documentElement.scrollTop) { - scroll_top = document.documentElement.scrollTop; - } else if (document.body) {scroll_top = document.body.scrollTop;} - return scroll_top;} - const { top, height } = this.getBoundingClientRect(); - const elCenter = top + height / 2; - const center = window.innerHeight / 2; - window.scrollTo({top: getWindowScrollTop() - (center - elCenter), - behavior: 'instant'});''') + if center: + ele.run_js('this.scrollIntoViewIfNeeded();') + self._wait_scrolled() + return + + ele.run_js('this.scrollIntoViewIfNeeded(false);') + if ele.states.is_covered: + ele.run_js('this.scrollIntoViewIfNeeded();') self._wait_scrolled() @@ -949,6 +1210,62 @@ class Timeout(object): return str({'implicit': self.implicit, 'page_load': self.page_load, 'script': self.script}) +class PageLoadStrategy(object): + """用于设置页面加载策略的类""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + + def __call__(self, value): + """设置加载策略 + :param value: 可选 'normal', 'eager', 'none' + :return: None + """ + if value.lower() not in ('normal', 'eager', 'none'): + raise ValueError("只能选择 'normal', 'eager', 'none'。") + self._page._page_load_strategy = value + + def normal(self): + """设置页面加载策略为normal""" + self._page._page_load_strategy = 'normal' + + def eager(self): + """设置页面加载策略为eager""" + self._page._page_load_strategy = 'eager' + + def none(self): + """设置页面加载策略为none""" + self._page._page_load_strategy = 'none' + + +class PageScrollSetter(object): + def __init__(self, scroll): + self._scroll = scroll + + def wait_complete(self, on_off=True): + """设置滚动命令后是否等待完成 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + self._scroll._wait_complete = on_off + + def smooth(self, on_off=True): + """设置页面滚动是否平滑滚动 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + b = 'smooth' if on_off else 'auto' + self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') + self._scroll._wait_complete = on_off + + class Screencast(object): def __init__(self, page): self._page = page @@ -972,7 +1289,7 @@ class Screencast(object): raise ValueError('save_path必须设置。') clean_folder(self._path) if self._mode.startswith('frugal'): - self._page.driver.set_listener('Page.screencastFrame', self._onScreencastFrame) + self._page.driver.Page.screencastFrame = self._onScreencastFrame self._page.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100) elif not self._mode.startswith('js'): @@ -1029,7 +1346,7 @@ class Screencast(object): return path if self._mode.startswith('frugal'): - self._page.driver.set_listener('Page.screencastFrame', None) + self._page.driver.Page.screencastFrame = None self._page.run_cdp('Page.stopScreencast') else: self._enable = False @@ -1043,7 +1360,7 @@ class Screencast(object): raise TypeError('转换成视频仅支持英文路径和文件名。') try: - from cv2 import VideoWriter, imread, VideoWriter_fourcc + from cv2 import VideoWriter, imread from numpy import fromfile, uint8 except ModuleNotFoundError: raise ModuleNotFoundError('请先安装cv2,pip install opencv-python') @@ -1053,7 +1370,10 @@ class Screencast(object): imgInfo = img.shape size = (imgInfo[1], imgInfo[0]) - videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size) + # if video_name and not video_name.endswith('mp4'): + # video_name = f'{video_name}.mp4' + # name = f'{time()}.mp4' if not video_name else video_name + videoWrite = VideoWriter(path, 14, 5, size) for i in pic_list: img = imread(str(i)) diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index ebbbd1b..9638dc8 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -4,23 +4,23 @@ @Contact : g1879@qq.com """ from pathlib import Path -from typing import Union, Tuple, List, Any +from typing import Union, Tuple, List, Any, Dict from DataRecorder import Recorder from requests import Session +from requests.cookies import RequestsCookieJar from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement, ChromiumScroll from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement -from .network_listener import NetworkListener +from .commons.web import ResponseData from .session_element import SessionElement -from .setter import ChromiumBaseSetter -from .waiter import ChromiumBaseWaiter class ChromiumBase(BasePage): + def __init__(self, address: Union[str, int], tab_id: str = None, @@ -42,7 +42,6 @@ class ChromiumBase(BasePage): self._wait: ChromiumBaseWaiter = ... self._set: ChromiumBaseSetter = ... self._screencast: Screencast = ... - self._listener: NetworkListener = ... def _connect_browser(self, tab_id: str = None) -> None: ... @@ -112,9 +111,6 @@ class ChromiumBase(BasePage): @property def page_load_strategy(self) -> str: ... - @property - def user_agent(self) -> str: ... - @property def scroll(self) -> ChromiumPageScroll: ... @@ -133,33 +129,37 @@ class ChromiumBase(BasePage): @property def screencast(self) -> Screencast: ... - @property - def listener(self) -> NetworkListener: ... - def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... def run_js_loaded(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - def get(self, url: str, show_errmsg: bool = False, retry: int = None, - interval: float = None, timeout: float = None) -> Union[None, bool]: ... + def get(self, + url: str, + show_errmsg: bool = False, + retry: int = None, + interval: float = None, + timeout: float = None) -> Union[None, bool]: ... - def get_cookies(self, as_dict: bool = False, all_domains: bool = False, - all_info: bool = False) -> Union[list, dict]: ... + def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[ + list, dict]: ... - def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None) -> Union[ChromiumElement, str]: ... + def ele(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None) -> ChromiumElement: ... - def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[ChromiumElement, str]]: ... + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[ChromiumElement]: ... def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ -> Union[SessionElement, str, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + def _find_elements(self, + loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... @@ -214,12 +214,91 @@ class ChromiumBase(BasePage): timeout: float = None) -> Union[bool, None]: ... +class ChromiumBaseWaiter(object): + def __init__(self, page: ChromiumBase): + self._driver: ChromiumBase = ... + self._listener: NetworkListener = ... + + def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + + def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + + def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + + def _loading(self, timeout: float = None, start: bool = True, gap: float = .01) -> bool: ... + + def load_start(self, timeout: float = None) -> bool: ... + + def load_complete(self, timeout: float = None) -> bool: ... + + def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... + + def stop_listening(self) -> None: ... + + def data_packets(self, timeout: float = None, + any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... + + def upload_paths_inputted(self) -> None: ... + + +class NetworkListener(object): + def __init__(self, page): + self._page: ChromiumBase = ... + self._targets: Union[str, dict] = ... + self._single: bool = ... + self._results: Union[ResponseData, Dict[str, ResponseData], False] = ... + self._is_regex: bool = ... + self._requests: dict = ... + + def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... + + def stop(self) -> None: ... + + def listen(self, timeout: float = None, + any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... + + def _response_received(self, **kwargs) -> None: ... + + def _loading_finished(self, **kwargs) -> None: ... + + def _requestWillBeSent(self, **kwargs) -> None: ... + + class ChromiumPageScroll(ChromiumScroll): def __init__(self, page: ChromiumBase): ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... - def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... + def _to_see(self, ele: ChromiumElement, center: bool) -> None: ... + + +class ChromiumBaseSetter(object): + def __init__(self, page): + self._page: ChromiumBase = ... + + @property + def load_strategy(self) -> PageLoadStrategy: ... + + @property + def scroll(self) -> PageScrollSetter: ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def session_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def local_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def upload_files(self, files: Union[str, list, tuple]) -> None: ... class Timeout(object): @@ -231,6 +310,28 @@ class Timeout(object): self.script: float = ... +class PageLoadStrategy(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + + def __call__(self, value: str) -> None: ... + + def normal(self) -> None: ... + + def eager(self) -> None: ... + + def none(self) -> None: ... + + +class PageScrollSetter(object): + def __init__(self, scroll: ChromiumPageScroll): + self._scroll: ChromiumPageScroll = ... + + def wait_complete(self, on_off: bool = True): ... + + def smooth(self, on_off: bool = True): ... + + class Screencast(object): def __init__(self, page: ChromiumBase): self._page: ChromiumBase = ... @@ -265,4 +366,4 @@ class ScreencastMode(object): def frugal_imgs_mode(self) -> None: ... - def imgs_mode(self) -> None: ... + def imgs_mode(self) -> None: ... \ No newline at end of file diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index 09e9cbf..fbe08e1 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -3,6 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ +from functools import partial from json import dumps, loads from queue import Queue, Empty from threading import Thread, Event @@ -10,6 +11,26 @@ from threading import Thread, Event from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ create_connection +from .errors import CallMethodError + + +class GenericAttr(object): + def __init__(self, name, tab): + self.__dict__['name'] = name + self.__dict__['tab'] = tab + + def __getattr__(self, item): + method_name = f"{self.name}.{item}" + event_listener = self.tab.get_listener(method_name) + + if event_listener: + return event_listener + + return partial(self.tab.call_method, method_name) + + def __setattr__(self, key, value): + self.tab.set_listener(f"{self.name}.{key}", value) + class ChromiumDriver(object): _INITIAL_ = 'initial' @@ -58,13 +79,7 @@ class ChromiumDriver(object): message_json = dumps(message) if self.debug: - if self.debug is True or (isinstance(self.debug, str) and message.get('method', '').startswith(self.debug)): - print(f'发> {message_json}') - elif isinstance(self.debug, (list, tuple, set)): - for m in self.debug: - if message.get('method', '').startswith(m): - print(f'发> {message_json}') - break + print(f"发> {message_json}") if not isinstance(timeout, (int, float)) or timeout > 1: q_timeout = 1 @@ -102,7 +117,7 @@ class ChromiumDriver(object): try: self._ws.settimeout(1) message_json = self._ws.recv() - mes = loads(message_json) + message = loads(message_json) except WebSocketTimeoutException: continue except (WebSocketException, OSError, WebSocketConnectionClosedException): @@ -110,24 +125,17 @@ class ChromiumDriver(object): return if self.debug: - if self.debug is True or 'id' in mes or (isinstance(self.debug, str) - and mes.get('method', '').startswith(self.debug)): - print(f'<收 {message_json}') - elif isinstance(self.debug, (list, tuple, set)): - for m in self.debug: - if mes.get('method', '').startswith(m): - print(f'<收 {message_json}') - break + print(f'<收 {message_json}') - if "method" in mes: - self.event_queue.put(mes) + if "method" in message: + self.event_queue.put(message) - elif "id" in mes: - if mes["id"] in self.method_results: - self.method_results[mes['id']].put(mes) + elif "id" in message: + if message["id"] in self.method_results: + self.method_results[message['id']].put(message) elif self.debug: - print(f'未知信息:{mes}') + print(f'未知信息:{message}') def _handle_event_loop(self): """当接收到浏览器信息,执行已绑定的方法""" @@ -146,7 +154,12 @@ class ChromiumDriver(object): self.event_queue.task_done() - def call_method(self, _method, **kwargs): + def __getattr__(self, item): + attr = GenericAttr(item, self) + setattr(self, item, attr) + return attr + + def call_method(self, _method, *args, **kwargs): """执行cdp方法 :param _method: cdp方法名 :param args: cdp参数 @@ -156,6 +169,8 @@ class ChromiumDriver(object): if not self._started: self.start() # raise RuntimeError("不能在启动前调用方法。") + if args: + raise CallMethodError("参数必须是key=value形式。") if self._stopped.is_set(): return {'error': 'tab closed', 'type': 'tab_closed'} diff --git a/DrissionPage/chromium_driver.pyi b/DrissionPage/chromium_driver.pyi index 0c63041..df1bf9e 100644 --- a/DrissionPage/chromium_driver.pyi +++ b/DrissionPage/chromium_driver.pyi @@ -47,7 +47,7 @@ class ChromiumDriver(object): def __getattr__(self, item: str) -> Callable: ... - def call_method(self, _method: str, **kwargs) -> dict: ... + def call_method(self, _method: str, *args, **kwargs) -> dict: ... def start(self) -> bool: ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 3750000..59f4eb0 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -14,10 +14,8 @@ from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinition from .commons.locator import get_loc from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ - CDPError, NoResourceError, CanNotClickError + CallMethodError, NoResourceError, CanNotClickError from .session_element import make_session_ele -from .setter import ChromiumElementSetter -from .waiter import ChromiumElementWaiter class ChromiumElement(DrissionElement): @@ -101,7 +99,7 @@ class ChromiumElement(DrissionElement): try: attrs = self.page.run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} - except CDPError: # 文档根元素不能调用此方法 + except CallMethodError: # 文档根元素不能调用此方法 return {} @property @@ -205,13 +203,12 @@ class ChromiumElement(DrissionElement): return self._select - def parent(self, level_or_loc=1, index=1): + def parent(self, level_or_loc=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - return super().parent(level_or_loc, index) + return super().parent(level_or_loc) def child(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -221,7 +218,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本 """ - return super().child(filter_loc, index, timeout, ele_only=ele_only) + return super().child(index, filter_loc, timeout, ele_only=ele_only) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -231,7 +228,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本 """ - return super().prev(filter_loc, index, timeout, ele_only=ele_only) + return super().prev(index, filter_loc, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -241,7 +238,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本 """ - return super().next(filter_loc, index, timeout, ele_only=ele_only) + return super().next(index, filter_loc, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -252,7 +249,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - return super().before(filter_loc, index, timeout, ele_only=ele_only) + return super().before(index, filter_loc, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -263,7 +260,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - return super().after(filter_loc, index, timeout, ele_only=ele_only) + return super().after(index, filter_loc, timeout, ele_only=ele_only) def children(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -439,10 +436,9 @@ class ChromiumElement(DrissionElement): js = f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");' return self.run_js(js) - def get_src(self, timeout=None, base64_to_bytes=True): - """返回元素src资源,base64的可转为bytes返回,其它返回str + def get_src(self, timeout=None): + """返回元素src资源,base64的会转为bytes返回,其它返回str :param timeout: 等待资源加载的超时时间 - :param base64_to_bytes: 为True时,如果是base64数据,转换为bytes格式 :return: 资源内容 """ timeout = self.page.timeout if timeout is None else timeout @@ -468,18 +464,15 @@ class ChromiumElement(DrissionElement): try: result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) break - except CDPError: + except CallMethodError: sleep(.1) if not result: return None if result['base64Encoded']: - if base64_to_bytes: - from base64 import b64decode - data = b64decode(result['content']) - else: - data = result['content'] + from base64 import b64decode + data = b64decode(result['content']) else: data = result['content'] return data @@ -529,24 +522,15 @@ class ChromiumElement(DrissionElement): return self.page._get_screenshot(path, as_bytes=as_bytes, as_base64=as_base64, full_page=False, left_top=left_top, right_bottom=right_bottom, ele=self) - def input(self, vals, clear=True, by_js=False): + def input(self, vals, clear=True): """输入文本或组合键,也可用于输入文件路径到input元素(路径间用\n间隔) :param vals: 文本值或按键组合 :param clear: 输入前是否清空文本框 - :param by_js: 是否用js方式输入,不能输入组合键 :return: None """ if self.tag == 'input' and self.attr('type') == 'file': return self._set_file_input(vals) - if by_js: - if clear: - self.clear(True) - if isinstance(vals, (list, tuple)): - vals = ''.join([str(i) for i in vals]) - self.set.prop('value', str(vals)) - return - if clear and vals not in ('\n', '\ue007'): self.clear(by_js=False) else: @@ -765,7 +749,7 @@ class ChromiumShadowRoot(BaseElement): 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 超时时间 - :return: 元素对象或属性、文本 + :return: DriverElement对象或属性、文本 """ return self.ele(loc_or_str, timeout) @@ -815,10 +799,9 @@ class ChromiumShadowRoot(BaseElement): from threading import Thread Thread(target=run_js, args=(self, script, as_expr, self.page.timeouts.script, args)).start() - def parent(self, level_or_loc=1, index=1): + def parent(self, level_or_loc=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: ChromiumElement对象 """ if isinstance(level_or_loc, int): @@ -830,7 +813,7 @@ class ChromiumShadowRoot(BaseElement): if loc[0] == 'css selector': raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}[{index}]' + loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' else: raise TypeError('level_or_loc参数只能是tuple、int或str。') @@ -845,7 +828,7 @@ class ChromiumShadowRoot(BaseElement): """ nodes = self.children(filter_loc=filter_loc) if not nodes: - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -853,7 +836,7 @@ class ChromiumShadowRoot(BaseElement): try: return nodes[index - 1] except IndexError: - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -867,7 +850,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.nexts(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -882,7 +865,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.befores(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -897,7 +880,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.afters(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_when_ele_not_found: + if Settings.raise_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -1441,7 +1424,7 @@ class ChromiumElementStates(object): lx, ly = self._ele.locations.click_point try: r = self._ele.page.run_cdp('DOM.getNodeForLocation', x=lx, y=ly) - except CDPError: + except CallMethodError: return False if r.get('backendNodeId') != self._ele.ids.backend_id: @@ -1472,6 +1455,38 @@ class ShadowRootStates(object): return False +class ChromiumElementSetter(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + def attr(self, attr, value): + """设置元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele.ids.node_id, name=attr, value=str(value)) + + def prop(self, prop, value): + """设置元素property属性 + :param prop: 属性名 + :param value: 属性值 + :return: None + """ + value = value.replace('"', r'\"') + self._ele.run_js(f'this.{prop}="{value}";') + + def innerHTML(self, html): + """设置元素innerHTML + :param html: html文本 + :return: None + """ + self.prop('innerHTML', html) + + class Locations(object): def __init__(self, ele): """ @@ -1520,24 +1535,21 @@ class Locations(object): """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_location - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) + return vx + ex, ey + vy @property def screen_midpoint(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_midpoint - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) + return vx + ex, ey + vy @property def screen_click_point(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_click_point - pr = self._ele.page.run_js('return window.devicePixelRatio;') - return int((vx + ex) * pr), int((ey + vy) * pr) + return vx + ex, ey + vy def _get_viewport_rect(self, quad): """按照类型返回在可视窗口中的范围 @@ -1610,9 +1622,9 @@ class Click(object): if by_js is not False: self._ele.run_js('this.click();') return True - if Settings.raise_when_click_failed: - raise CanNotClickError + if Settings.raise_click_failed: + raise CanNotClickError return False def right(self): @@ -1759,9 +1771,9 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center=None): + def to_see(self, center=False): """滚动页面直到元素可见 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :param center: 是否尽量滚动到页面正中 :return: None """ self._driver.page.scroll.to_see(self._driver, center=center) @@ -2005,6 +2017,99 @@ class ChromiumSelect(object): self._ele.run_js('this.dispatchEvent(new UIEvent("change"));') +class ChromiumElementWaiter(object): + """等待元素在dom中某种状态,如删除、显示、隐藏""" + + def __init__(self, page, ele): + """等待元素在dom中某种状态,如删除、显示、隐藏 + :param page: 元素所在页面 + :param ele: 要等待的元素 + """ + self._page = page + self._ele = ele + + def delete(self, timeout=None): + """等待元素从dom删除 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_alive', False, timeout) + + def display(self, timeout=None): + """等待元素从dom显示 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', True, timeout) + + def hidden(self, timeout=None): + """等待元素从dom隐藏 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', False, timeout) + + def covered(self, timeout=None): + """等待当前元素被遮盖 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', True, timeout) + + def not_covered(self, timeout=None): + """等待当前元素被遮盖 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', False, timeout) + + def enabled(self, timeout=None): + """等待当前元素变成可用 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', True, timeout) + + def disabled(self, timeout=None): + """等待当前元素变成可用 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', False, timeout) + + def disabled_or_delete(self, timeout=None): + """等待当前元素变成不可用或从DOM移除 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if not self._ele.states.is_enabled or not self._ele.states.is_alive: + return True + sleep(.05) + + return False + + def _wait_state(self, attr, mode=False, timeout=None): + """等待元素某个bool状态到达指定状态 + :param attr: 状态名称 + :param mode: True或False + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._ele.states.__getattribute__(attr) == mode: + return True + sleep(.05) + + return False + + class Pseudo(object): def __init__(self, ele): """ diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index da4654f..53538ff 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -12,8 +12,6 @@ from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage from .commons.constants import NoneElement from .session_element import SessionElement -from .setter import ChromiumElementSetter -from .waiter import ChromiumElementWaiter from .web_page import WebPage @@ -96,29 +94,29 @@ class ChromiumElement(DrissionElement): @property def click(self) -> Click: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[ChromiumElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... - def child(self, filter_loc: Union[tuple, str, int] = '', + def child(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def prev(self, filter_loc: Union[tuple, str, int] = '', + def prev(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def next(self, filter_loc: Union[tuple, str, int] = '', + def next(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def before(self, filter_loc: Union[tuple, str, int] = '', + def before(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def after(self, filter_loc: Union[tuple, str, int] = '', + def after(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... @@ -178,14 +176,14 @@ class ChromiumElement(DrissionElement): def style(self, style: str, pseudo_ele: str = '') -> str: ... - def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ... + def get_src(self, timeout: float = None) -> Union[bytes, str, None]: ... def save(self, path: [str, bool] = None, rename: str = None, timeout: float = None) -> None: ... def get_screenshot(self, path: [str, Path] = None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None) -> Union[str, bytes]: ... - def input(self, vals: Any, clear: bool = True, by_js: bool = False) -> None: ... + def input(self, vals: Any, clear: bool = True) -> None: ... def _set_file_input(self, files: Union[str, list, tuple]) -> None: ... @@ -275,7 +273,7 @@ class ChromiumShadowRoot(BaseElement): def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - def parent(self, level_or_loc: Union[str, int] = 1, index: int = 1) -> ChromiumElement: ... + def parent(self, level_or_loc: Union[str, int] = 1) -> ChromiumElement: ... def child(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> Union[ChromiumElement, str, None]: ... @@ -384,6 +382,17 @@ def send_enter(ele: ChromiumElement) -> None: ... def send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ... +class ChromiumElementSetter(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + def attr(self, attr: str, value: str) -> None: ... + + def prop(self, prop: str, value: str) -> None: ... + + def innerHTML(self, html: str) -> None: ... + + class ShadowRootStates(object): def __init__(self, ele: ChromiumShadowRoot): """ @@ -487,7 +496,7 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center: Union[bool, None] = None) -> None: ... + def to_see(self, center: bool = False) -> None: ... class ChromiumSelect(object): @@ -545,6 +554,32 @@ class ChromiumSelect(object): def _dispatch_change(self) -> None: ... +class ChromiumElementWaiter(object): + def __init__(self, + page: ChromiumBase, + ele: ChromiumElement): + self._ele: ChromiumElement = ... + self._page: ChromiumBase = ... + + def delete(self, timeout: float = None) -> bool: ... + + def display(self, timeout: float = None) -> bool: ... + + def hidden(self, timeout: float = None) -> bool: ... + + def covered(self, timeout: float = None) -> bool: ... + + def not_covered(self, timeout: float = None) -> bool: ... + + def enabled(self, timeout: float = None) -> bool: ... + + def disabled(self, timeout: float = None) -> bool: ... + + def disabled_or_delete(self, timeout: float = None) -> bool: ... + + def _wait_state(self, attr: str, mode: bool = False, timeout: float = None) -> bool: ... + + class Pseudo(object): def __init__(self, ele: ChromiumElement): self._ele: ChromiumElement = ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 34aa115..f9118c4 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -7,12 +7,10 @@ from re import search from threading import Thread from time import sleep, perf_counter -from .chromium_base import ChromiumBase, ChromiumPageScroll -from .chromium_element import ChromiumElement +from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter +from .chromium_element import ChromiumElement, ChromiumElementWaiter from .commons.tools import get_usable_path from .errors import ContextLossError -from .setter import ChromiumFrameSetter -from .waiter import FrameWaiter class ChromiumFrame(ChromiumBase): @@ -71,9 +69,7 @@ class ChromiumFrame(ChromiumBase): try: super()._driver_init(tab_id) except: - u = f'http://{self.address}/json' - self._control_session.get(u) - self._control_session.get(u, headers={'Connection': 'close'}) + self._control_session.get(f'http://{self.address}/json') super()._driver_init(tab_id) def _reload(self): @@ -363,14 +359,13 @@ class ChromiumFrame(ChromiumBase): else: return self.doc_ele.run_js(script, *args, as_expr=as_expr) - def parent(self, level_or_loc=1, index=1): + def parent(self, level_or_loc=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ self._check_ok() - return self.frame_ele.parent(level_or_loc, index) + return self.frame_ele.parent(level_or_loc) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -643,11 +638,31 @@ class ChromiumFrameScroll(ChromiumPageScroll): self.t1 = self.t2 = 'this.documentElement' self._wait_complete = False - def to_see(self, loc_or_ele, center=None): + def to_see(self, loc_or_ele, center=False): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 + :param center: 是否尽量滚动到页面正中 :return: None """ ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) self._to_see(ele, center) + + +class ChromiumFrameSetter(ChromiumBaseSetter): + def attr(self, attr, value): + """设置frame元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._page._check_ok() + self._page.frame_ele.set.attr(attr, value) + + +class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + super().__init__(frame) + super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index a2bdce8..631fb5f 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -6,10 +6,8 @@ from pathlib import Path from typing import Union, Tuple, List, Any -from .chromium_base import ChromiumBase, ChromiumPageScroll -from .chromium_element import ChromiumElement, Locations, ChromiumElementStates -from .setter import ChromiumFrameSetter -from .waiter import FrameWaiter +from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter +from .chromium_element import ChromiumElement, Locations, ChromiumElementStates, ChromiumElementWaiter class ChromiumFrame(ChromiumBase): @@ -122,24 +120,24 @@ class ChromiumFrame(ChromiumBase): def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[ChromiumElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... - def prev(self, filter_loc: Union[tuple, str, int] = '', + def prev(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def next(self, filter_loc: Union[tuple, str, int] = '', + def next(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def before(self, filter_loc: Union[tuple, str, int] = '', + def before(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def after(self, filter_loc: Union[tuple, str, int] = '', + def after(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str]: ... @@ -205,4 +203,14 @@ class ChromiumFrameIds(object): class ChromiumFrameScroll(ChromiumPageScroll): def __init__(self, frame: ChromiumFrame) -> None: ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... + + +class ChromiumFrameSetter(ChromiumBaseSetter): + _page: ChromiumFrame = ... + + def attr(self, attr: str, value: str) -> None: ... + + +class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): + def __init__(self, frame: ChromiumFrame): ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 936243e..16d7a7b 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -3,17 +3,23 @@ @Author : g1879 @Contact : g1879@qq.com """ +from pathlib import Path +from platform import system +from threading import Thread from time import perf_counter, sleep +from warnings import warn -from .chromium_base import ChromiumBase, Timeout +from requests import Session + +from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBaseWaiter from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser from .commons.tools import port_is_using +from .commons.web import set_session_cookies from .configs.chromium_options import ChromiumOptions -from .errors import BrowserConnectError -from .setter import ChromiumPageSetter -from .waiter import ChromiumPageWaiter +from .errors import CallMethodError, BrowserConnectError +from .session_page import DownloadSetter class ChromiumPage(ChromiumBase): @@ -25,15 +31,17 @@ class ChromiumPage(ChromiumBase): :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间 """ + self._download_set = None + self._download_path = None super().__init__(addr_driver_opts, tab_id, timeout) def _set_start_options(self, addr_driver_opts, none): """设置浏览器启动属性 - :param addr_driver_opts: 'ip:port'、ChromiumOptions + :param addr_driver_opts: 'ip:port'、ChromiumDriver、ChromiumOptions :param none: 用于后代继承 :return: None """ - if not addr_driver_opts or isinstance(addr_driver_opts, ChromiumOptions): + if not addr_driver_opts or str(type(addr_driver_opts)).endswith(("ChromiumOptions'>", "DriverOptions'>")): self._driver_options = addr_driver_opts or ChromiumOptions(addr_driver_opts) # 接收浏览器地址和端口 @@ -72,9 +80,7 @@ class ChromiumPage(ChromiumBase): if not self._tab_obj: # 不是传入driver的情况 connect_browser(self._driver_options) if not tab_id: - u = f'http://{self.address}/json' - json = self._control_session.get(u).json() - self._control_session.get(u, headers={'Connection': 'close'}) + json = self._control_session.get(f'http://{self.address}/json').json() tab_id = [i['id'] for i in json if i['type'] == 'page'] if not tab_id: raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') @@ -88,25 +94,23 @@ class ChromiumPage(ChromiumBase): def _page_init(self): """页面相关设置""" - u = f'http://{self.address}/json/version' - ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] - self._control_session.get(u, headers={'Connection': 'close'}) + ws = self._control_session.get(f'http://{self.address}/json/version').json()['webSocketDebuggerUrl'] self._browser_driver = ChromiumDriver(ws.split('/')[-1], 'browser', self.address) self._browser_driver.start() self._alert = Alert() - self._tab_obj.set_listener('Page.javascriptDialogOpening', self._on_alert_open) - self._tab_obj.set_listener('Page.javascriptDialogClosed', self._on_alert_close) + self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open + self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close self._rect = None self._main_tab = self.tab_id - # try: - # self.download_set.by_browser() - # except CDPError: - # pass + try: + self.download_set.by_browser() + except CallMethodError: + pass self._process_id = None - r = self.browser_driver.call_method('SystemInfo.getProcessInfo') + r = self.browser_driver.SystemInfo.getProcessInfo() if 'processInfo' not in r: return None for i in r['processInfo']: @@ -127,9 +131,7 @@ class ChromiumPage(ChromiumBase): @property def tabs(self): """返回所有标签页id组成的列表""" - u = f'http://{self.address}/json' - j = self._control_session.get(u).json() # 不要改用cdp - self._control_session.get(u, headers={'Connection': 'close'}) + j = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp return [i['id'] for i in j if i['type'] == 'page'] @property @@ -153,6 +155,24 @@ class ChromiumPage(ChromiumBase): self._set = ChromiumPageSetter(self) return self._set + @property + def download_path(self): + """返回默认下载路径""" + p = self._download_path or '' + return str(Path(p).absolute()) + + @property + def download_set(self): + """返回用于设置下载参数的对象""" + if self._download_set is None: + self._download_set = ChromiumDownloadSetter(self) + return self._download_set + + @property + def download(self): + """返回下载器对象""" + return self.download_set._switched_DownloadKit + @property def rect(self): if self._rect is None: @@ -174,29 +194,24 @@ class ChromiumPage(ChromiumBase): tab_id = tab_id or self.tab_id return ChromiumTab(self, tab_id) - def find_tabs(self, title=None, url=None, tab_type=None, single=True): + def find_tabs(self, text=None, by_title=True, by_url=None, special=False): """查找符合条件的tab,返回它们的id组成的列表 - :param title: 要匹配title的文本 - :param url: 要匹配url的文本 - :param tab_type: tab类型,可用列表输入多个 - :param single: 是否返回首个结果的id,为False返回所有信息 - :return: tab id或tab dict + :param text: 查询条件 + :param by_title: 是否匹配title + :param by_url: 是否匹配url + :param special: 是否匹配特殊tab,如打印页 + :return: tab id组成的列表 """ - u = f'http://{self.address}/json' - tabs = self._control_session.get(u).json() # 不要改用cdp - self._control_session.get(u, headers={'Connection': 'close'}) - if isinstance(tab_type, str): - tab_type = {tab_type} - elif isinstance(tab_type, (list, tuple, set)): - tab_type = set(tab_type) - elif tab_type is not None: - raise TypeError('tab_type只能是set、list、tuple、str、None。') + tabs = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp + if text is None or not (by_title or by_url): + return [i['id'] for i in tabs if (not special and i['type'] == 'page') + or (special and i['type'] not in ('page', 'iframe'))] - r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) - and (tab_type is None or i['type'] in tab_type))] - return r[0]['id'] if r and single else r + return [i['id'] for i in tabs if ((not special and i['type'] == 'page') + or (special and i['type'] not in ('page', 'iframe'))) + and ((by_url and text in i['url']) or (by_title and text in i['title']))] - def new_tab(self, url=None, switch_to=False): + def new_tab(self, url=None, switch_to=True): """新建一个标签页,该标签页在最后面 :param url: 新标签页跳转到的网址 :param switch_to: 新建标签页后是否把焦点移过去 @@ -329,14 +344,14 @@ class ChromiumPage(ChromiumBase): res_text = self._alert.text if self._alert.type == 'prompt': - self.driver.call_method('Page.handleJavaScriptDialog', accept=accept, promptText=send) + self.driver.Page.handleJavaScriptDialog(accept=accept, promptText=send) else: - self.driver.call_method('Page.handleJavaScriptDialog', accept=accept) + self.driver.Page.handleJavaScriptDialog(accept=accept) return res_text def quit(self): """关闭浏览器""" - self._tab_obj.call_method('Browser.close') + self._tab_obj.Browser.close() self._tab_obj.stop() ip, port = self.address.split(':') while port_is_using(ip, port): @@ -363,6 +378,29 @@ class ChromiumPage(ChromiumBase): self._tab_obj.has_alert = True +class ChromiumPageWaiter(ChromiumBaseWaiter): + def __init__(self, page: ChromiumBase): + super().__init__(page) + self._listener = None + + def download_begin(self, timeout=None): + """等待浏览器下载开始 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :return: 是否等到下载开始 + """ + return self._driver.download_set.wait_download_begin(timeout) + + def new_tab(self, timeout=None): + """等待新标签页出现 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :return: 是否等到下载开始 + """ + timeout = timeout if timeout is not None else self._driver.timeout + end_time = perf_counter() + timeout + while self._driver.tab_id == self._driver.latest_tab and perf_counter() < end_time: + sleep(.01) + + class ChromiumTabRect(object): def __init__(self, page): self._page = page @@ -431,250 +469,128 @@ class ChromiumTabRect(object): def _get_browser_rect(self): """获取浏览器范围信息""" - return self._page.browser_driver.call_method('Browser.getWindowForTarget', targetId=self._page.tab_id)['bounds'] + return self._page.browser_driver.Browser.getWindowForTarget(targetId=self._page.tab_id)['bounds'] -# class BaseDownloadSetter(DownloadSetter): -# """用于设置下载参数的类""" -# -# def __init__(self, page): -# """ -# :param page: ChromiumPage对象 -# """ -# super().__init__(page) -# self._behavior = 'allowAndName' -# self._session = None -# self._save_path = '' -# self._rename = None -# self._waiting_download = False -# self._download_begin = False -# self._browser_missions = {} -# self._browser_downloading_count = 0 -# self._show_msg = True -# -# @property -# def session(self): -# """返回用于DownloadKit的Session对象""" -# if self._session is None: -# self._session = Session() -# return self._session -# -# @property -# def browser_missions(self): -# """返回浏览器下载任务""" -# return list(self._browser_missions.values()) -# -# @property -# def DownloadKit_missions(self): -# """返回DownloadKit下载任务""" -# return list(self.DownloadKit.missions.values()) -# -# @property -# def _switched_DownloadKit(self): -# """返回从浏览器同步cookies后的Session对象""" -# self._cookies_to_session() -# return self.DownloadKit -# -# def save_path(self, path): -# """设置下载路径 -# :param path: 下载路径 -# :return: None -# """ -# path = path or '' -# path = Path(path).absolute() -# path.mkdir(parents=True, exist_ok=True) -# path = str(path) -# self._save_path = path -# self._page._download_path = path -# try: -# self._page.browser_driver.Browser.setDownloadBehavior(behavior='allowAndName', downloadPath=path, -# eventsEnabled=True) -# except CDPError: -# warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') -# self._page.run_cdp('Page.setDownloadBehavior', behavior='allowAndName', downloadPath=path) -# -# self.DownloadKit.goal_path = path -# -# def rename(self, name): -# """设置浏览器下一个下载任务的文件名 -# :param name: 文件名,不带后缀时自动使用原后缀 -# :return: None -# """ -# self._rename = name -# -# def by_browser(self): -# """设置使用浏览器下载文件""" -# try: -# self._page.browser_driver.Browser.setDownloadBehavior(behavior='allowAndName', eventsEnabled=True, -# downloadPath=self._page.download_path) -# self._page.browser_driver.Browser.downloadWillBegin = self._download_will_begin -# self._page.browser_driver.Browser.downloadProgress = self._download_progress -# except CDPError: -# self._page.driver.Page.setDownloadBehavior(behavior='allowAndName', downloadPath=self._page.download_path) -# self._page.driver.Page.downloadWillBegin = self._download_will_begin -# self._page.driver.Page.downloadProgress = self._download_progress -# -# self._behavior = 'allowAndName' -# -# def by_DownloadKit(self): -# """设置使用DownloadKit下载文件""" -# try: -# self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) -# self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit -# except CDPError: -# raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') -# -# self._behavior = 'deny' -# -# def wait_download_begin(self, timeout=None): -# """等待浏览器下载开始 -# :param timeout: 等待超时时间,为None则使用页面对象timeout属性 -# :return: 是否等到下载开始 -# """ -# self._waiting_download = True -# result = False -# timeout = timeout if timeout is not None else self._page.timeout -# end_time = perf_counter() + timeout -# while perf_counter() < end_time: -# if self._download_begin: -# result = True -# break -# sleep(.05) -# self._download_begin = False -# self._waiting_download = False -# return result -# -# def wait_download_finish(self, timeout=None): -# """等待所有下载结束 -# :param timeout: 超时时间 -# :return: 是否等待到下载完成 -# """ -# timeout = timeout if timeout is not None else self._page.timeout -# end_time = perf_counter() + timeout -# while perf_counter() < end_time: -# if (self._DownloadKit is None or not self.DownloadKit.is_running) and self._browser_downloading_count == 0: -# return True -# sleep(.5) -# return False -# -# def show_msg(self, on_off=True): -# """是否显示下载信息 -# :param on_off: bool表示开或关 -# :return: None -# """ -# self._show_msg = on_off -# -# def _cookies_to_session(self): -# """把driver对象的cookies复制到session对象""" -# ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] -# self.session.headers.update({"User-Agent": ua}) -# set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False)) -# -# def _download_by_DownloadKit(self, **kwargs): -# """拦截浏览器下载并用downloadKit下载""" -# url = kwargs['url'] -# if url.startswith('blob:'): -# raise TypeError('bolb:开头的链接无法使用DownloadKit下载,请用浏览器下载功能。') -# -# self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid']) -# -# if self._rename: -# rename = get_rename(kwargs['suggestedFilename'], self._rename) -# self._rename = None -# else: -# rename = kwargs['suggestedFilename'] -# -# mission = self._page.download.add(file_url=url, goal_path=self._page.download_path, rename=rename) -# Thread(target=self._wait_download_complete, args=(mission,), daemon=False).start() -# -# if self._waiting_download: -# self._download_begin = True -# -# self._browser_downloading_count += 1 -# -# if self._show_msg: -# print(f'(DownloadKit)开始下载:{Path(self._save_path) / rename}') -# -# def _download_will_begin(self, **kwargs): -# """浏览器下载即将开始时调用""" -# if self._rename: -# rename = get_rename(kwargs['suggestedFilename'], self._rename) -# self._rename = None -# else: -# rename = kwargs['suggestedFilename'] -# -# m = BrowserDownloadMission(kwargs['guid'], kwargs['url'], rename) -# self._browser_missions[kwargs['guid']] = m -# aid_path = Path(self._save_path) / rename -# -# if self._show_msg: -# print(f'(Browser)开始下载:{rename}') -# self._browser_downloading_count += 1 -# -# if self._file_exists == 'skip' and aid_path.exists(): -# m.state = 'skipped' -# m.save_path = aid_path.absolute() -# self._page.browser_driver.call_method('Browser.cancelDownload', guid=kwargs['guid']) -# (Path(self._save_path) / kwargs["guid"]).unlink(missing_ok=True) -# return -# -# if self._waiting_download: -# self._download_begin = True -# -# def _download_progress(self, **kwargs): -# """下载状态产生变化时调用""" -# guid = kwargs['guid'] -# m = self._browser_missions.get(guid, None) -# if m: -# m.size = kwargs['totalBytes'] -# m.received = kwargs['receivedBytes'] -# m.state = kwargs['state'] -# -# if m.state == 'completed': -# path = Path(self._save_path) / m.name -# from_path = Path(self._save_path) / guid -# if path.exists(): -# if self._file_exists == 'rename': -# path = get_usable_path(path) -# else: # 'overwrite' -# path.unlink() -# from_path.rename(path) -# m.save_path = path.absolute() -# -# if kwargs['state'] != 'inProgress': -# if self._show_msg and m: -# if kwargs['state'] == 'completed': -# print(f'(Browser)下载完成:{m.save_path}') -# elif m.state != 'skipped': -# print(f'(Browser)下载失败:{m.save_path}') -# else: -# print(f'(Browser)已跳过:{m.save_path}') -# self._browser_downloading_count -= 1 -# -# def _wait_download_complete(self, mission): -# """等待DownloadKit下载完成""" -# mission.wait(show=False) -# if self._show_msg: -# if mission.result == 'skip': -# print(f'(DownloadKit)已跳过:{mission.path}') -# elif not mission.result: -# print(f'(DownloadKit)下载失败:{mission.path}') -# else: -# print(f'(DownloadKit)下载完成:{mission.path}') +class ChromiumDownloadSetter(DownloadSetter): + """用于设置下载参数的类""" + def __init__(self, page): + """ + :param page: ChromiumPage对象 + """ + super().__init__(page) + self._behavior = 'allow' + self._download_th = None + self._session = None + self._waiting_download = False + self._download_begin = False -class BrowserDownloadMission(object): - def __init__(self, guid, url, name): - self.id = guid - self.url = url - self.name = name - self.save_path = None - self.state = None - self.size = None - self.received = None + @property + def session(self): + """返回用于DownloadKit的Session对象""" + if self._session is None: + self._session = Session() + return self._session - def __repr__(self): - return f'' + @property + def _switched_DownloadKit(self): + """返回从浏览器同步cookies后的Session对象""" + self._cookies_to_session() + return self.DownloadKit + + def save_path(self, path): + """设置下载路径 + :param path: 下载路径 + :return: None + """ + path = path or '' + path = Path(path).absolute() + path.mkdir(parents=True, exist_ok=True) + path = str(path) + self._page._download_path = path + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=path, + eventsEnabled=True) + except CallMethodError: + warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') + self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path) + + self.DownloadKit.goal_path = path + + def by_browser(self): + """设置使用浏览器下载文件""" + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, + downloadPath=self._page.download_path) + self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser + except CallMethodError: + self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) + self._page.driver.Page.downloadWillBegin = self._download_by_browser + + self._behavior = 'allow' + + def by_DownloadKit(self): + """设置使用DownloadKit下载文件""" + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) + self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit + except CallMethodError: + raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') + self._behavior = 'deny' + + def wait_download_begin(self, timeout=None): + """等待浏览器下载开始 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :return: 是否等到下载开始 + """ + self._waiting_download = True + result = False + timeout = timeout if timeout is not None else self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._download_begin: + result = True + break + sleep(.05) + self._download_begin = False + self._waiting_download = False + return result + + def _cookies_to_session(self): + """把driver对象的cookies复制到session对象""" + ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self.session.headers.update({"User-Agent": ua}) + set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False)) + + def _download_by_DownloadKit(self, **kwargs): + """拦截浏览器下载并用downloadKit下载""" + url = kwargs['url'] + if url.startswith('blob:'): + self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, + downloadPath=self._page.download_path) + sleep(2) + self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) + + else: + self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid']) + self._page.download.add(file_url=url, goal_path=self._page.download_path, + rename=kwargs['suggestedFilename']) + if self._download_th is None or not self._download_th.is_alive(): + self._download_th = Thread(target=self._wait_download_complete, daemon=False) + self._download_th.start() + + if self._waiting_download: + self._download_begin = True + + def _download_by_browser(self, **kwargs): + """使用浏览器下载时调用""" + if self._waiting_download: + self._download_begin = True + + def _wait_download_complete(self): + """等待下载完成""" + self._page.download.wait() class Alert(object): @@ -689,9 +605,186 @@ class Alert(object): self.response_text = None -def get_rename(original, rename): - if '.' in rename: - return rename - else: - suffix = original[original.rfind('.'):] if '.' in original else '' - return f'{rename}{suffix}' +class WindowSetter(object): + """用于设置窗口大小的类""" + + def __init__(self, page): + """ + :param page: 页面对象 + """ + self._page = page + self._window_id = self._get_info()['windowId'] + + def maximized(self): + """窗口最大化""" + s = self._get_info()['bounds']['windowState'] + if s in ('fullscreen', 'minimized'): + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'maximized'}) + + def minimized(self): + """窗口最小化""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'minimized'}) + + def fullscreen(self): + """设置窗口为全屏""" + s = self._get_info()['bounds']['windowState'] + if s == 'minimized': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'fullscreen'}) + + def normal(self): + """设置窗口为常规模式""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'normal'}) + + def size(self, width=None, height=None): + """设置窗口大小 + :param width: 窗口宽度 + :param height: 窗口高度 + :return: None + """ + if width or height: + s = self._get_info()['bounds']['windowState'] + if s != 'normal': + self._perform({'windowState': 'normal'}) + info = self._get_info()['bounds'] + width = width - 16 if width else info['width'] + height = height + 7 if height else info['height'] + self._perform({'width': width, 'height': height}) + + def location(self, x=None, y=None): + """设置窗口在屏幕中的位置,相对左上角坐标 + :param x: 距离顶部距离 + :param y: 距离左边距离 + :return: None + """ + if x is not None or y is not None: + self.normal() + info = self._get_info()['bounds'] + x = x if x is not None else info['left'] + y = y if y is not None else info['top'] + self._perform({'left': x - 8, 'top': y}) + + def hide(self): + """隐藏浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=True) + + def show(self): + """显示浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=False) + + def _get_info(self): + """获取窗口位置及大小信息""" + return self._page.run_cdp('Browser.getWindowForTarget') + + def _perform(self, bounds): + """执行改变窗口大小操作 + :param bounds: 控制数据 + :return: None + """ + self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) + + +class ChromiumPageSetter(ChromiumBaseSetter): + def main_tab(self, tab_id=None): + """设置主tab + :param tab_id: 标签页id,不传入则设置当前tab + :return: None + """ + self._page._main_tab = tab_id or self._page.tab_id + + @property + def window(self): + """返回用于设置浏览器窗口的对象""" + return WindowSetter(self._page) + + def tab_to_front(self, tab_or_id=None): + """激活标签页使其处于最前面 + :param tab_or_id: 标签页对象或id,为None表示当前标签页 + :return: None + """ + if not tab_or_id: + tab_or_id = self._page.tab_id + elif isinstance(tab_or_id, ChromiumTab): + tab_or_id = tab_or_id.tab_id + self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') + + +def show_or_hide_browser(page, hide=True): + """执行显示或隐藏浏览器窗口 + :param page: ChromePage对象 + :param hide: 是否隐藏 + :return: None + """ + if not page.address.startswith(('127.0.0.1', 'localhost')): + return + + if system().lower() != 'windows': + raise OSError('该方法只能在Windows系统使用。') + + try: + from win32gui import ShowWindow + from win32con import SW_HIDE, SW_SHOW + except ImportError: + raise ImportError('请先安装:pip install pypiwin32') + + pid = page.process_id + if not pid: + return None + hds = get_chrome_hwnds_from_pid(pid, page.title) + sw = SW_HIDE if hide else SW_SHOW + for hd in hds: + ShowWindow(hd, sw) + + +def get_browser_progress_id(progress, address): + """获取浏览器进程id + :param progress: 已知的进程对象,没有时传入None + :param address: 浏览器管理地址,含端口 + :return: 进程id或None + """ + if progress: + return progress.pid + + from os import popen + port = address.split(':')[-1] + txt = '' + progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') + for progress in progresses: + if 'LISTENING' in progress: + txt = progress + break + if not txt: + return None + + return txt.split(' ')[-1] + + +def get_chrome_hwnds_from_pid(pid, title): + """通过PID查询句柄ID + :param pid: 进程id + :param title: 窗口标题 + :return: 进程句柄组成的列表 + """ + try: + from win32gui import IsWindow, GetWindowText, EnumWindows + from win32process import GetWindowThreadProcessId + except ImportError: + raise ImportError('请先安装win32gui,pip install pypiwin32') + + def callback(hwnd, hds): + if IsWindow(hwnd) and title in GetWindowText(hwnd): + _, found_pid = GetWindowThreadProcessId(hwnd) + if str(found_pid) == str(pid): + hds.append(hwnd) + return True + + hwnds = [] + EnumWindows(callback, hwnds) + return hwnds diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 916d85c..d4ceb86 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -3,35 +3,43 @@ @Author : g1879 @Contact : g1879@qq.com """ +from os import popen +from pathlib import Path +from threading import Thread from typing import Union, Tuple, List -from .setter import ChromiumPageSetter -from .chromium_base import ChromiumBase +from DownloadKit import DownloadKit +from requests import Session + +from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter, NetworkListener from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .configs.chromium_options import ChromiumOptions -from .waiter import ChromiumPageWaiter +from .configs.driver_options import DriverOptions +from .session_page import DownloadSetter class ChromiumPage(ChromiumBase): def __init__(self, - addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver] = None, + addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver, DriverOptions] = None, tab_id: str = None, timeout: float = None): - self._driver_options: ChromiumOptions = ... + self._driver_options: [ChromiumDriver, DriverOptions] = ... self._process_id: str = ... - # self._window_setter: WindowSetter = ... + self._window_setter: WindowSetter = ... self._main_tab: str = ... self._alert: Alert = ... + self._download_path: str = ... + self._download_set: ChromiumDownloadSetter = ... self._browser_driver: ChromiumDriver = ... self._rect: ChromiumTabRect = ... def _connect_browser(self, - addr_driver_opts: Union[str, ChromiumDriver] = None, + addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, tab_id: str = None) -> None: ... - def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver], none) -> None: ... + def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions], none) -> None: ... def _page_init(self) -> None: ... @@ -62,12 +70,21 @@ class ChromiumPage(ChromiumBase): @property def set(self) -> ChromiumPageSetter: ... + @property + def download_set(self) -> ChromiumDownloadSetter: ... + + @property + def download(self) -> DownloadKit: ... + + @property + def download_path(self) -> str: ... + def get_tab(self, tab_id: str = None) -> ChromiumTab: ... - def find_tabs(self, title: str = None, url: str = None, - tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ... + def find_tabs(self, text: str = None, by_title: bool = True, by_url: bool = None, + special: bool = False) -> List[str]: ... - def new_tab(self, url: str = None, switch_to: bool = False) -> str: ... + def new_tab(self, url: str = None, switch_to: bool = True) -> str: ... def to_main_tab(self) -> None: ... @@ -92,6 +109,15 @@ class ChromiumPage(ChromiumBase): def _on_alert_open(self, **kwargs): ... +class ChromiumPageWaiter(ChromiumBaseWaiter): + _driver: ChromiumPage = ... + _listener: Union[NetworkListener, None] = ... + + def download_begin(self, timeout: float = None) -> bool: ... + + def new_tab(self, timeout: float = None) -> bool: ... + + class ChromiumTabRect(object): def __init__(self, page: ChromiumPage): self._page: ChromiumPage = ... @@ -125,65 +151,36 @@ class ChromiumTabRect(object): def _get_browser_rect(self) -> dict: ... -# class BaseDownloadSetter(DownloadSetter): -# def __init__(self, page: ChromiumPage): -# self._page: ChromiumPage = ... -# self._behavior: str = ... -# self._session: Session = ... -# self._save_path: str = ... -# self._rename: str = ... -# self._waiting_download: bool = ... -# self._download_begin: bool = ... -# self._browser_missions: Dict[str, BrowserDownloadMission] = ... -# self._browser_downloading_count: int = ... -# self._show_msg: bool = ... -# -# @property -# def session(self) -> Session: ... -# -# @property -# def browser_missions(self) -> List[BrowserDownloadMission]: ... -# -# @property -# def DownloadKit_missions(self) -> List[Mission]: ... -# -# @property -# def _switched_DownloadKit(self) -> DownloadKit: ... -# -# def save_path(self, path: Union[str, Path]) -> None: ... -# -# def rename(self, name: str) -> None: ... -# -# def by_browser(self) -> None: ... -# -# def by_DownloadKit(self) -> None: ... -# -# def wait_download_begin(self, timeout: float = None) -> bool: ... -# -# def wait_download_finish(self, timeout: float = None) -> bool: ... -# -# def show_msg(self, on_off: bool = True) -> None: ... -# -# def _cookies_to_session(self) -> None: ... -# -# def _download_by_DownloadKit(self, **kwargs) -> None: ... -# -# def _download_will_begin(self, **kwargs) -> None: ... -# -# def _download_progress(self, **kwargs) -> None: ... -# -# def _wait_download_complete(self, mission: Mission) -> None: ... +class ChromiumDownloadSetter(DownloadSetter): + def __init__(self, page: ChromiumPage): + self._page: ChromiumPage = ... + self._behavior: str = ... + self._download_th: Thread = ... + self._session: Session = None + self._waiting_download: bool = ... + self._download_begin: bool = ... + @property + def session(self) -> Session: ... -class BrowserDownloadMission(object): - def __init__(self, guid: str, url: str, name: str): - self.id: str = ... - self.url: str = ... - self.name: str = ... - self.save_path: str = ... - self.state: str = ... - self.size: str = ... - self.received: str = ... + @property + def _switched_DownloadKit(self) -> DownloadKit: ... + + def save_path(self, path: Union[str, Path]) -> None: ... + + def by_browser(self) -> None: ... + + def by_DownloadKit(self) -> None: ... + + def wait_download_begin(self, timeout: float = None) -> bool: ... + + def _cookies_to_session(self) -> None: ... + + def _download_by_DownloadKit(self, **kwargs) -> None: ... + + def _download_by_browser(self, **kwargs) -> None: ... + + def _wait_download_complete(self) -> None: ... class Alert(object): @@ -197,4 +194,48 @@ class Alert(object): self.response_text: str = ... -def get_rename(original: str, rename: str) -> str: ... +class WindowSetter(object): + + def __init__(self, page: ChromiumPage): + self._page: ChromiumPage = ... + self._window_id: str = ... + + def maximized(self) -> None: ... + + def minimized(self) -> None: ... + + def fullscreen(self) -> None: ... + + def normal(self) -> None: ... + + def size(self, width: int = None, height: int = None) -> None: ... + + def location(self, x: int = None, y: int = None) -> None: ... + + def hide(self) -> None: ... + + def show(self) -> None: ... + + def _get_info(self) -> dict: ... + + def _perform(self, bounds: dict) -> None: ... + + +def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... + + +def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... + + +def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... + + +class ChromiumPageSetter(ChromiumBaseSetter): + _page: ChromiumPage = ... + + def main_tab(self, tab_id: str = None) -> None: ... + + @property + def window(self) -> WindowSetter: ... + + def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index 9279600..a97c9e7 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -5,10 +5,9 @@ """ from copy import copy -from .chromium_base import ChromiumBase +from .chromium_base import ChromiumBase, ChromiumBaseSetter from .commons.web import set_session_cookies, set_browser_cookies -from .session_page import SessionPage -from .setter import WebPageTabSetter +from .session_page import SessionPage, SessionPageSetter, DownloadSetter class ChromiumTab(ChromiumBase): @@ -29,10 +28,6 @@ class ChromiumTab(ChromiumBase): self.retry_interval = self.page.retry_interval self._page_load_strategy = self.page.page_load_strategy - def close(self): - """关闭当前标签页""" - self.page.close_tabs(self.tab_id) - @property def rect(self): """返回获取窗口坐标和大小的对象""" @@ -53,12 +48,11 @@ class WebPageTab(SessionPage, ChromiumTab): self._has_driver = True self._has_session = True self._session = copy(page.session) - self._response = None - self._set = None + self._response = None self._download_set = None - self._download_path = page.download_path - self._DownloadKit = None + self._download_path = None + self._set = None super(SessionPage, self)._set_runtime_settings() self._connect_browser(tab_id) @@ -126,14 +120,6 @@ class WebPageTab(SessionPage, ChromiumTab): """以dict方式返回cookies""" return super().cookies - @property - def user_agent(self): - """返回user agent""" - if self._mode == 's': - return super().user_agent - elif self._mode == 'd': - return super(SessionPage, self).user_agent - @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" @@ -166,6 +152,18 @@ class WebPageTab(SessionPage, ChromiumTab): self._set = WebPageTabSetter(self) return self._set + @property + def download_set(self): + """返回下载设置对象""" + if self._download_set is None: + self._download_set = WebPageTabDownloadSetter(self) + return self._download_set + + @property + def download(self): + """返回下载器对象""" + return self.download_set._switched_DownloadKit + def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url :param url: 目标url @@ -294,12 +292,17 @@ class WebPageTab(SessionPage, ChromiumTab): selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] self.session.headers.update({"User-Agent": selenium_user_agent}) - set_session_cookies(self.session, super(SessionPage, self).get_cookies()) + # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) + # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) + set_session_cookies(self.session, self._get_driver_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" if not self._has_driver: return + + # set_browser_cookies(self, super().get_cookies(as_dict=True)) + # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -312,7 +315,22 @@ class WebPageTab(SessionPage, ChromiumTab): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) + return self._get_driver_cookies(as_dict, all_info) + + def _get_driver_cookies(self, as_dict=False, all_info=False): + """获取浏览器cookies + :param as_dict: 是否以dict形式返回,为True时all_info无效 + :param all_info: 是否返回所有信息,为False时只返回name、value、domain + :return: cookies信息 + """ + cookies = self.run_cdp('Network.getCookies')['cookies'] + if as_dict: + return {cookie['name']: cookie['value'] for cookie in cookies} + elif all_info: + return cookies + else: + return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} + for cookie in cookies] def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 @@ -328,3 +346,54 @@ class WebPageTab(SessionPage, ChromiumTab): elif self._mode == 'd': return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, relative=relative) + + +class WebPageTabSetter(ChromiumBaseSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumBaseSetter(self._page) + + def cookies(self, cookies): + """添加cookies信息到浏览器或session对象 + :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + self._chromium_setter.cookies(cookies) + elif self._page.mode == 's' and self._page._has_session: + self._session_setter.cookies(cookies) + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page._has_session: + self._session_setter.headers(headers) + if self._page._has_driver: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page._has_session: + self._session_setter.user_agent(ua) + if self._page._has_driver: + self._chromium_setter.user_agent(ua, platform) + + +class WebPageTabDownloadSetter(DownloadSetter): + """用于设置下载参数的类""" + + def __init__(self, page): + super().__init__(page) + self._session = page.session + + @property + def _switched_DownloadKit(self): + """返回从浏览器同步cookies后的Session对象""" + if self._page.mode == 'd': + ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self._page.session.headers.update({"User-Agent": ua}) + set_session_cookies(self._page.session, self._page.get_cookies(as_dict=False, all_domains=False)) + return self.DownloadKit diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 04f3ad6..9def1d7 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -5,15 +5,15 @@ """ from typing import Union, Tuple, Any, List +from DownloadKit import DownloadKit from requests import Session, Response -from .chromium_base import ChromiumBase +from .chromium_base import ChromiumBase, ChromiumBaseSetter from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage, ChromiumTabRect from .session_element import SessionElement -from .session_page import SessionPage -from .setter import WebPageTabSetter +from .session_page import SessionPage, SessionPageSetter, DownloadSetter from .web_page import WebPage @@ -24,8 +24,6 @@ class ChromiumTab(ChromiumBase): def _set_runtime_settings(self) -> None: ... - def close(self) -> None: ... - @property def rect(self) -> ChromiumTabRect: ... @@ -36,6 +34,8 @@ class WebPageTab(SessionPage, ChromiumTab): self._mode: str = ... self._has_driver = ... self._has_session = ... + self._download_set = ... + self._download_path = ... def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], @@ -65,9 +65,6 @@ class WebPageTab(SessionPage, ChromiumTab): @property def cookies(self) -> dict: ... - @property - def user_agent(self) -> str: ... - @property def session(self) -> Session: ... @@ -122,6 +119,8 @@ class WebPageTab(SessionPage, ChromiumTab): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... + def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... + # ----------------重写SessionPage的函数----------------------- def post(self, url: str, @@ -146,7 +145,35 @@ class WebPageTab(SessionPage, ChromiumTab): @property def set(self) -> WebPageTabSetter: ... + @property + def download(self) -> DownloadKit: ... + + @property + def download_set(self) -> WebPageTabDownloadSetter: ... + def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ Union[ChromiumElement, str, ChromiumFrame]]]: ... + + +class WebPageTabSetter(ChromiumBaseSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumBaseSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def cookies(self, cookies) -> None: ... + + +class WebPageTabDownloadSetter(DownloadSetter): + """用于设置下载参数的类""" + + def __init__(self, page: WebPageTab): + self._page: WebPageTab = ... + + @property + def _switched_DownloadKit(self) -> DownloadKit: ... diff --git a/DrissionPage/common.pyi b/DrissionPage/common.pyi deleted file mode 100644 index 54677db..0000000 --- a/DrissionPage/common.pyi +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding:utf-8 -*- -from .session_element import make_session_ele as make_session_ele - -from .action_chains import ActionChains as ActionChains -from .commons.keys import Keys as Keys -from .commons.by import By as By -from .commons.constants import Settings as Settings diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 86b3e9e..73349aa 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -11,13 +11,14 @@ from time import perf_counter, sleep from requests import get as requests_get +from DrissionPage.configs.chromium_options import ChromiumOptions from DrissionPage.errors import BrowserConnectError from .tools import port_is_using def connect_browser(option): """连接或启动浏览器 - :param option: ChromiumOptions对象 + :param option: DriverOptions对象 :return: chrome 路径和进程对象组成的元组 """ debugger_address = option.debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') @@ -54,8 +55,8 @@ def connect_browser(option): def get_launch_args(opt): - """从ChromiumOptions获取命令行启动参数 - :param opt: ChromiumOptions + """从DriverOptions获取命令行启动参数 + :param opt: DriverOptions或ChromiumOptions :return: 启动参数列表 """ # ----------处理arguments----------- @@ -86,7 +87,7 @@ def get_launch_args(opt): result = list(result) # ----------处理插件extensions------------- - ext = opt.extensions + ext = opt.extensions if isinstance(opt, ChromiumOptions) else opt._extension_files if ext: ext = ','.join(set(ext)) ext = f'--load-extension={ext}' @@ -97,11 +98,15 @@ def get_launch_args(opt): def set_prefs(opt): """处理启动配置中的prefs项,目前只能对已存在文件夹配置 - :param opt: ChromiumOptions + :param opt: DriverOptions或ChromiumOptions :return: None """ - prefs = opt.preferences - del_list = opt._prefs_to_del + if isinstance(opt, ChromiumOptions): + prefs = opt.preferences + del_list = opt._prefs_to_del + else: + prefs = opt.experimental_options.get('prefs', []) + del_list = [] if not opt.user_data_path: return @@ -145,9 +150,7 @@ def test_connect(ip, port): end_time = perf_counter() + 30 while perf_counter() < end_time: try: - u = f'http://{ip}:{port}/json' - tabs = requests_get(u, timeout=10, proxies={'http': None, 'https': None}).json() - requests_get(u, headers={'Connection': 'close'}, proxies={'http': None, 'https': None}) + tabs = requests_get(f'http://{ip}:{port}/json', timeout=10).json() for tab in tabs: if tab['type'] == 'page': return diff --git a/DrissionPage/commons/browser.pyi b/DrissionPage/commons/browser.pyi index ede46db..2324000 100644 --- a/DrissionPage/commons/browser.pyi +++ b/DrissionPage/commons/browser.pyi @@ -3,13 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ +from typing import Union + from DrissionPage.configs.chromium_options import ChromiumOptions +from DrissionPage.configs.driver_options import DriverOptions -def connect_browser(option: ChromiumOptions) -> tuple: ... +def connect_browser(option: Union[ChromiumOptions, DriverOptions]) -> tuple: ... -def get_launch_args(opt: ChromiumOptions) -> list: ... +def get_launch_args(opt: Union[ChromiumOptions, DriverOptions]) -> list: ... -def set_prefs(opt: ChromiumOptions) -> None: ... +def set_prefs(opt: Union[ChromiumOptions, DriverOptions]) -> None: ... diff --git a/DrissionPage/commons/constants.py b/DrissionPage/commons/constants.py index 2a219c7..c06c2c4 100644 --- a/DrissionPage/commons/constants.py +++ b/DrissionPage/commons/constants.py @@ -11,9 +11,8 @@ ERROR = 'error' class Settings(object): - raise_when_ele_not_found = False - raise_when_click_failed = False - raise_when_wait_failed = False + raise_ele_not_found = False + raise_click_failed = False class NoneElement(object): diff --git a/DrissionPage/commons/tools.py b/DrissionPage/commons/tools.py index 5adf7ca..1a70f15 100644 --- a/DrissionPage/commons/tools.py +++ b/DrissionPage/commons/tools.py @@ -3,10 +3,50 @@ @Author : g1879 @Contact : g1879@qq.com """ -from platform import system from pathlib import Path from re import search, sub from shutil import rmtree +from zipfile import ZipFile + + +def get_exe_from_port(port): + """获取端口号第一条进程的可执行文件路径 + :param port: 端口号 + :return: 可执行文件的绝对路径 + """ + from os import popen + + pid = get_pid_from_port(port) + if not pid: + return + else: + file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n') + return file_lst[2].strip() if len(file_lst) > 2 else None + + +def get_pid_from_port(port): + """获取端口号第一条进程的pid + :param port: 端口号 + :return: 进程id + """ + from platform import system + if system().lower() != 'windows' or port is None: + return None + + from os import popen + from time import perf_counter + + try: # 避免Anaconda中可能产生的报错 + process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] + + t = perf_counter() + while not process and perf_counter() - t < 5: + process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] + + return process.split(' ')[-1] or None + + except Exception: + return None def get_usable_path(path): @@ -103,114 +143,10 @@ def clean_folder(folder_path, ignore=None): rmtree(f, True) -def show_or_hide_browser(page, hide=True): - """执行显示或隐藏浏览器窗口 - :param page: ChromePage对象 - :param hide: 是否隐藏 - :return: None - """ - if not page.address.startswith(('127.0.0.1', 'localhost')): +def unzip(zip_path, to_path): + """解压下载的chromedriver.zip文件""" + if not zip_path: return - if system().lower() != 'windows': - raise OSError('该方法只能在Windows系统使用。') - - try: - from win32gui import ShowWindow - from win32con import SW_HIDE, SW_SHOW - except ImportError: - raise ImportError('请先安装:pip install pypiwin32') - - pid = page.process_id - if not pid: - return None - hds = get_chrome_hwnds_from_pid(pid, page.title) - sw = SW_HIDE if hide else SW_SHOW - for hd in hds: - ShowWindow(hd, sw) - - -def get_browser_progress_id(progress, address): - """获取浏览器进程id - :param progress: 已知的进程对象,没有时传入None - :param address: 浏览器管理地址,含端口 - :return: 进程id或None - """ - if progress: - return progress.pid - - from os import popen - port = address.split(':')[-1] - txt = '' - progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') - for progress in progresses: - if 'LISTENING' in progress: - txt = progress - break - if not txt: - return None - - return txt.split(' ')[-1] - - -def get_chrome_hwnds_from_pid(pid, title): - """通过PID查询句柄ID - :param pid: 进程id - :param title: 窗口标题 - :return: 进程句柄组成的列表 - """ - try: - from win32gui import IsWindow, GetWindowText, EnumWindows - from win32process import GetWindowThreadProcessId - except ImportError: - raise ImportError('请先安装win32gui,pip install pypiwin32') - - def callback(hwnd, hds): - if IsWindow(hwnd) and title in GetWindowText(hwnd): - _, found_pid = GetWindowThreadProcessId(hwnd) - if str(found_pid) == str(pid): - hds.append(hwnd) - return True - - hwnds = [] - EnumWindows(callback, hwnds) - return hwnds - -# def get_exe_from_port(port): -# """获取端口号第一条进程的可执行文件路径 -# :param port: 端口号 -# :return: 可执行文件的绝对路径 -# """ -# from os import popen -# -# pid = get_pid_from_port(port) -# if not pid: -# return -# else: -# file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n') -# return file_lst[2].strip() if len(file_lst) > 2 else None -# -# -# def get_pid_from_port(port): -# """获取端口号第一条进程的pid -# :param port: 端口号 -# :return: 进程id -# """ -# from platform import system -# if system().lower() != 'windows' or port is None: -# return None -# -# from os import popen -# from time import perf_counter -# -# try: # 避免Anaconda中可能产生的报错 -# process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] -# -# t = perf_counter() -# while not process and perf_counter() - t < 5: -# process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] -# -# return process.split(' ')[-1] or None -# -# except Exception: -# return None + with ZipFile(zip_path, 'r') as f: + return [f.extract(f.namelist()[0], path=to_path)] diff --git a/DrissionPage/commons/tools.pyi b/DrissionPage/commons/tools.pyi index 54b8197..a95722d 100644 --- a/DrissionPage/commons/tools.pyi +++ b/DrissionPage/commons/tools.pyi @@ -3,17 +3,14 @@ @Author : g1879 @Contact : g1879@qq.com """ -from os import popen from pathlib import Path from typing import Union -from chromium_page import ChromiumPage + +def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... -# def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... - - -# def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ... +def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ... def get_usable_path(path: Union[str, Path]) -> Path: ... @@ -31,10 +28,4 @@ def port_is_using(ip: str, port: Union[str, int]) -> bool: ... def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... -def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... - - -def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... - - -def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... +def unzip(zip_path: str, to_path: str) -> Union[list, None]: ... diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index 928b545..0a7cd14 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -3,15 +3,103 @@ @Author : g1879 @Contact : g1879@qq.com """ +from base64 import b64decode from html import unescape from http.cookiejar import Cookie +from json import loads, JSONDecodeError from re import sub from urllib.parse import urlparse, urljoin, urlunparse from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict from tldextract import extract +class ResponseData(object): + """返回的数据包管理类""" + __slots__ = ('requestId', 'response', 'rawBody', 'tab', 'target', 'url', 'status', 'statusText', 'securityDetails', + 'headersText', 'mimeType', 'requestHeadersText', 'connectionReused', 'connectionId', 'remoteIPAddress', + 'remotePort', 'fromDiskCache', 'fromServiceWorker', 'fromPrefetchCache', 'encodedDataLength', 'timing', + 'serviceWorkerResponseSource', 'responseTime', 'cacheStorageCacheName', 'protocol', 'securityState', + '_requestHeaders', '_body', '_base64_body', '_rawPostData', '_postData', 'method') + + def __init__(self, request_id, response, body, tab, target): + """ + :param response: response的数据 + :param body: response包含的内容 + :param tab: 产生这个数据包的tab的id + :param target: 监听目标 + """ + self.requestId = request_id + self.response = CaseInsensitiveDict(response) + self.rawBody = body + self.tab = tab + self.target = target + self._requestHeaders = None + self._postData = None + self._body = None + self._base64_body = False + self._rawPostData = None + + def __getattr__(self, item): + return self.response.get(item, None) + + def __getitem__(self, item): + return self.response.get(item, None) + + def __repr__(self): + return f'' + + @property + def headers(self): + """以大小写不敏感字典返回headers数据""" + headers = self.response.get('headers', None) + return CaseInsensitiveDict(headers) if headers else None + + @property + def requestHeaders(self): + """以大小写不敏感字典返回requestHeaders数据""" + if self._requestHeaders: + return self._requestHeaders + headers = self.response.get('requestHeaders', None) + return CaseInsensitiveDict(headers) if headers else None + + @requestHeaders.setter + def requestHeaders(self, val): + """设置requestHeaders""" + self._requestHeaders = val + + @property + def postData(self): + """返回postData数据""" + if self._postData is None and self._rawPostData: + try: + self._postData = loads(self._rawPostData) + except (JSONDecodeError, TypeError): + self._postData = self._rawPostData + return self._postData + + @postData.setter + def postData(self, val): + """设置postData""" + self._rawPostData = val + + @property + def body(self): + """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" + if self._body is None: + if self._base64_body: + self._body = b64decode(self.rawBody) + + else: + try: + self._body = loads(self.rawBody) + except (JSONDecodeError, TypeError): + self._body = self.rawBody + + return self._body + + def get_ele_txt(e): """获取元素内所有文本 :param e: 元素对象 @@ -102,6 +190,8 @@ def location_in_viewport(page, loc_x, loc_y): if (x< scrollLeft || y < scrollTop || x > vWidth + scrollLeft || y > vHeight + scrollTop){{return false;}} return true;}}''' return page.run_js(js) + # const vWidth = window.innerWidth || document.documentElement.clientWidth; + # const vHeight = window.innerHeight || document.documentElement.clientHeight; def offset_scroll(ele, offset_x, offset_y): @@ -244,7 +334,8 @@ def set_browser_cookies(page, cookies): :param cookies: cookies信息 :return: None """ - for cookie in cookies_to_tuple(cookies): + cookies = cookies_to_tuple(cookies) + for cookie in cookies: if 'expiry' in cookie: cookie['expires'] = int(cookie['expiry']) cookie.pop('expiry') @@ -252,15 +343,6 @@ def set_browser_cookies(page, cookies): cookie['expires'] = int(cookie['expires']) if cookie['value'] is None: cookie['value'] = '' - if cookie['name'].startswith('__Secure-'): - cookie['secure'] = True - - if cookie['name'].startswith('__Host-'): - cookie['path'] = '/' - cookie['secure'] = True - cookie['url'] = page.url - page.run_cdp_loaded('Network.setCookie', **cookie) - continue # 不用设置域名,可退出 if cookie.get('domain', None): try: @@ -294,13 +376,7 @@ def is_cookie_in_driver(page, cookie): :param cookie: dict格式cookie :return: bool """ - if 'domain' in cookie: - for c in page.get_cookies(all_domains=True): - if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain', - None): - return True - else: - for c in page.get_cookies(all_domains=True): - if cookie['name'] == c['name'] and cookie['value'] == c['value']: - return True + for c in page.get_cookies(): + if cookie['name'] == c['name'] and cookie['value'] == c['value']: + return True return False diff --git a/DrissionPage/commons/web.pyi b/DrissionPage/commons/web.pyi index b91ba71..b57ed66 100644 --- a/DrissionPage/commons/web.pyi +++ b/DrissionPage/commons/web.pyi @@ -8,12 +8,73 @@ from typing import Union from requests import Session from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict from DrissionPage.base import DrissionElement, BasePage from DrissionPage.chromium_element import ChromiumElement from DrissionPage.chromium_base import ChromiumBase +class ResponseData(object): + + def __init__(self, request_id: str, response: dict, body: str, tab: str, target: str): + self.requestId: str = ... + self.response: CaseInsensitiveDict = ... + self.rawBody: str = ... + self._body: Union[str, dict, bytes] = ... + self._base64_body: bool = ... + self.tab: str = ... + self.target: str = ... + self.method: str = ... + self._postData: dict = ... + self._rawPostData: str = ... + self.url: str = ... + self.status: str = ... + self.statusText: str = ... + self.headersText: str = ... + self.mimeType: str = ... + self.requestHeadersText: str = ... + self.connectionReused: str = ... + self.connectionId: str = ... + self.remoteIPAddress: str = ... + self.remotePort: str = ... + self.fromDiskCache: str = ... + self.fromServiceWorker: str = ... + self.fromPrefetchCache: str = ... + self.encodedDataLength: str = ... + self.timing: str = ... + self.serviceWorkerResponseSource: str = ... + self.responseTime: str = ... + self.cacheStorageCacheName: str = ... + self.protocol: str = ... + self.securityState: str = ... + self.securityDetails: str = ... + + def __getattr__(self, item: str) -> Union[str, None]: ... + + def __getitem__(self, item: str) -> Union[str, None]: ... + + def __repr__(self) -> str: ... + + @property + def headers(self) -> Union[CaseInsensitiveDict, None]: ... + + @property + def requestHeaders(self) -> Union[CaseInsensitiveDict, None]: ... + + @requestHeaders.setter + def requestHeaders(self, val:dict) -> None: ... + + @property + def postData(self) -> Union[dict, str, None]: ... + + @postData.setter + def postData(self, val: Union[str, dict]) -> None: ... + + @property + def body(self) -> Union[str, dict, bytes]: ... + + def get_ele_txt(e: DrissionElement) -> str: ... diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py index 6f32d90..e31d97e 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/configs/chromium_options.py @@ -26,7 +26,7 @@ class ChromiumOptions(object): self.ini_path = om.ini_path options = om.chrome_options - self._download_path = om.paths.get('download_path', '') + self._download_path = om.paths.get('download_path', None) self._arguments = options.get('arguments', []) self._binary_location = options.get('binary_location', '') self._extensions = options.get('extensions', []) @@ -62,7 +62,7 @@ class ChromiumOptions(object): self.ini_path = None self._binary_location = "chrome" self._arguments = [] - self._download_path = '' + self._download_path = None self._extensions = [] self._prefs = {} self._timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} @@ -146,11 +146,8 @@ class ChromiumOptions(object): """ self.remove_argument(arg) if value is not False: - if arg == '--headless' and value is None: - self._arguments.append('--headless=new') - else: - arg_str = arg if value is None else f'{arg}={value}' - self._arguments.append(arg_str) + arg_str = arg if value is None else f'{arg}={value}' + self._arguments.append(arg_str) return self def remove_argument(self, value): diff --git a/DrissionPage/configs/configs.ini b/DrissionPage/configs/configs.ini index 54d20ab..6591d2f 100644 --- a/DrissionPage/configs/configs.ini +++ b/DrissionPage/configs/configs.ini @@ -1,10 +1,11 @@ [paths] +chromedriver_path = download_path = [chrome_options] debugger_address = 127.0.0.1:9222 binary_location = chrome -arguments = ['--remote-allow-origins=*', '--no-first-run', '--disable-infobars', '--disable-popup-blocking'] +arguments = ['--remote-allow-origins=*', '--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking'] extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}} page_load_strategy = normal diff --git a/DrissionPage/configs/session_options.py b/DrissionPage/configs/session_options.py index 03fc800..eaa763b 100644 --- a/DrissionPage/configs/session_options.py +++ b/DrissionPage/configs/session_options.py @@ -21,7 +21,7 @@ class SessionOptions(object): :param ini_path: ini文件路径 """ self.ini_path = None - self._download_path = '' + self._download_path = None self._headers = None self._cookies = None self._auth = None @@ -73,7 +73,7 @@ class SessionOptions(object): self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) self._timeout = om.timeouts.get('implicit', 10) - self._download_path = om.paths.get('download_path', '') + self._download_path = om.paths.get('download_path', None) # ===========须独立处理的项开始============ @property @@ -110,13 +110,14 @@ class SessionOptions(object): self._proxies = {} return self._proxies - def set_proxies(self, http=None, https=None): + def set_proxies(self, http, https=None): """设置proxies参数 :param http: http代理地址 :param https: https代理地址 :return: 返回当前对象 """ - self._sets('proxies', {'http': http, 'https': https}) + proxies = None if http == https is None else {'http': http, 'https': https or http} + self._sets('proxies', proxies) return self # ===========须独立处理的项结束============ diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index d783c5b..98e1260 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -6,9 +6,28 @@ from os import popen from pathlib import Path from re import search +from typing import Union +from .commons.constants import Settings +from .commons.tools import unzip from .configs.chromium_options import ChromiumOptions from .configs.options_manage import OptionsManager +from .session_page import SessionPage + +try: + from selenium import webdriver + from DrissionPage.mixpage.drission import Drission + from .configs.driver_options import DriverOptions +except ModuleNotFoundError: + pass + + +def raise_when_ele_not_found(on_off=True): + """设置全局变量,找不到元素时是否抛出异常 + :param on_off: True 或 False + :return: None + """ + Settings.raise_ele_not_found = on_off def configs_to_here(save_name=None): @@ -29,14 +48,19 @@ def show_settings(ini_path=None): OptionsManager(ini_path).show() -def set_paths(browser_path=None, +def set_paths(driver_path=None, + chrome_path=None, + browser_path=None, local_port=None, debugger_address=None, download_path=None, user_data_path=None, cache_path=None, - ini_path=None): + ini_path=None, + check_version=False): """快捷的路径设置函数 + :param driver_path: chromedriver.exe路径 + :param chrome_path: 浏览器可执行文件路径 :param browser_path: 浏览器可执行文件路径 :param local_port: 本地端口号 :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 @@ -44,6 +68,7 @@ def set_paths(browser_path=None, :param user_data_path: 用户数据路径 :param cache_path: 缓存路径 :param ini_path: 要修改的ini文件路径 + :param check_version: 是否检查chromedriver和chrome是否匹配 :return: None """ om = OptionsManager(ini_path) @@ -51,6 +76,12 @@ def set_paths(browser_path=None, def format_path(path: str) -> str: return str(path) if path else '' + if driver_path is not None: + om.set_item('paths', 'chromedriver_path', format_path(driver_path)) + + if chrome_path is not None: + om.set_item('chrome_options', 'binary_location', format_path(chrome_path)) + if browser_path is not None: om.set_item('chrome_options', 'binary_location', format_path(browser_path)) @@ -72,6 +103,9 @@ def set_paths(browser_path=None, if cache_path is not None: set_argument('--disk-cache-dir', format_path(cache_path), ini_path) + if check_version: + check_driver_version(format_path(driver_path), format_path(browser_path)) + def use_auto_port(on_off=True, ini_path=None): """设置启动浏览器时使用自动分配的端口和临时文件夹 @@ -169,6 +203,89 @@ def set_proxy(proxy, ini_path=None): set_argument('--proxy-server', proxy, ini_path) +def check_driver_version(driver_path=None, chrome_path=None): + """检查传入的chrome和chromedriver是否匹配 + :param driver_path: chromedriver.exe路径 + :param chrome_path: chrome.exe路径 + :return: 是否匹配 + """ + print('正在检测可用性...') + om = OptionsManager() + driver_path = driver_path or om.get_value('paths', 'chromedriver_path') or 'chromedriver' + chrome_path = str(chrome_path or om.get_value('chrome_options', 'binary_location')) + do = DriverOptions(read_file=False) + do.add_argument('--headless') + + if chrome_path: + do.binary_location = chrome_path + + try: + driver = webdriver.Chrome(driver_path, options=do) + driver.quit() + print('版本匹配,可正常使用。') + + return True + + except Exception as e: + print(f'出现异常:\n{e}\n可执行easy_set.get_match_driver()自动下载匹配的版本。\n' + f'或自行从以下网址下载:http://npm.taobao.org/mirrors/chromedriver/') + + return False + + +# -------------------------自动识别chrome版本号并下载对应driver------------------------ +def get_match_driver(ini_path='default', + save_path=None, + chrome_path=None, + show_msg=True, + check_version=True): + """自动识别chrome版本并下载匹配的driver + :param ini_path: 要读取和修改的ini文件路径 + :param save_path: chromedriver保存路径 + :param chrome_path: 指定chrome.exe位置 + :param show_msg: 是否打印信息 + :param check_version: 是否检查版本匹配 + :return: None + """ + save_path = save_path or str(Path(__file__).parent) + + chrome_path = chrome_path or get_chrome_path(ini_path, show_msg) + chrome_path = Path(chrome_path).absolute() if chrome_path else None + if show_msg: + print('chrome.exe路径', chrome_path) + + ver = _get_chrome_version(str(chrome_path)) + if show_msg: + print('version', ver) + + zip_path = _download_driver(ver, save_path, show_msg=show_msg) + + if not zip_path and show_msg: + print('没有找到对应版本的driver。') + + try: + driver_path = unzip(zip_path, save_path)[0] + except TypeError: + driver_path = None + + if show_msg: + print('解压路径', driver_path) + + if driver_path: + Path(zip_path).unlink() + if ini_path: + set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False) + + if check_version: + if not check_driver_version(driver_path, chrome_path) and show_msg: + print('获取失败,请手动配置。') + else: + if show_msg: + print('获取失败,请手动配置。') + + return driver_path + + def get_chrome_path(ini_path=None, show_msg=True, from_ini=True, @@ -248,3 +365,54 @@ def get_chrome_path(ini_path=None, return str(path) except OSError: pass + + +def _get_chrome_version(path: str) -> Union[str, None]: + """根据文件路径获取版本号 + :param path: chrome.exe文件路径 + :return: 版本号 + """ + if not path: + return + + path = str(path).replace('\\', '\\\\') + + try: + return (popen(f'wmic datafile where "name=\'{path}\'" get version').read() + .lower().split('\n')[2].replace(' ', '')) + except Exception: + return None + + +def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]: + """根据传入的版本号到镜像网站查找,下载最相近的 + :param version: 本地版本号 + :return: 保存地址 + """ + if not version: + return + + main_ver = version.split('.')[0] + remote_ver = None + + page = SessionPage(Drission().session) + page.get('https://registry.npmmirror.com/-/binary/chromedriver/') + + for version in page.json: + # 遍历所有版本,跳过大版本不一致的,如果有完全匹配的,获取url,如果没有,获取最后一个版本的url + if not version['name'].startswith(f'{main_ver}.'): + continue + + remote_ver = version['name'] + if version['name'] == f'{version}/': + break + + if remote_ver: + url = f'https://cdn.npmmirror.com/binaries/chromedriver/{remote_ver}chromedriver_win32.zip' + save_path = save_path or str(Path(__file__).parent) + result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg) + + if result[0]: + return result[1] + + return None diff --git a/DrissionPage/easy_set.pyi b/DrissionPage/easy_set.pyi index 3e8fc47..7ea52ca 100644 --- a/DrissionPage/easy_set.pyi +++ b/DrissionPage/easy_set.pyi @@ -7,19 +7,25 @@ from pathlib import Path from typing import Union +def raise_when_ele_not_found(on_off: bool = True) -> None: ... + + def configs_to_here(file_name: Union[Path, str] = None) -> None: ... def show_settings(ini_path: Union[str, Path] = None) -> None: ... -def set_paths(browser_path: Union[str, Path] = None, +def set_paths(driver_path: Union[str, Path] = None, + chrome_path: Union[str, Path] = None, + browser_path: Union[str, Path] = None, local_port: Union[int, str] = None, debugger_address: str = None, download_path: Union[str, Path] = None, user_data_path: Union[str, Path] = None, cache_path: Union[str, Path] = None, - ini_path: Union[str, Path] = None) -> None: ... + ini_path: Union[str, Path] = None, + check_version: bool = False) -> None: ... def use_auto_port(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... @@ -49,6 +55,17 @@ def set_user_agent(user_agent: str, ini_path: Union[str, Path] = None) -> None: def set_proxy(proxy: str, ini_path: Union[str, Path] = None) -> None: ... +def check_driver_version(driver_path: Union[str, Path] = None, chrome_path: str = None) -> bool: ... + + +# -------------------------自动识别chrome版本号并下载对应driver------------------------ +def get_match_driver(ini_path: Union[str, None] = 'default', + save_path: str = None, + chrome_path: str = None, + show_msg: bool = True, + check_version: bool = True) -> Union[str, None]: ... + + def get_chrome_path(ini_path: str = None, show_msg: bool = True, from_ini: bool = True, diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 7bab148..3bcfca0 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -24,7 +24,7 @@ class ElementLossError(BaseError): _info = '元素对象因刷新已失效。' -class CDPError(BaseError): +class CallMethodError(BaseError): _info = '方法调用错误。' @@ -54,11 +54,3 @@ class NoResourceError(BaseError): class CanNotClickError(BaseError): _info = '该元素无法滚动到视口或被遮挡,无法点击。' - - -class GetDocumentError(BaseError): - _info = '获取文档失败。' - - -class WaitTimeoutError(BaseError): - _info = '等待失败。' diff --git a/DrissionPage/network_listener.py b/DrissionPage/network_listener.py deleted file mode 100644 index ee11c08..0000000 --- a/DrissionPage/network_listener.py +++ /dev/null @@ -1,325 +0,0 @@ -# -*- coding:utf-8 -*- -from base64 import b64decode -from json import JSONDecodeError, loads -from queue import Queue -from re import search -from threading import Thread -from time import perf_counter, sleep - -from requests.structures import CaseInsensitiveDict - -from .errors import CDPError - - -class NetworkListener(object): - """监听器基类""" - - def __init__(self, page): - """ - :param page: ChromiumBase对象 - """ - self._page = page - self._driver = self._page.driver - - self._tmp = None # 临存捕捉到的数据 - self._request_ids = None # 暂存须要拦截的请求id - - self._total_count = None # 当次监听的数量上限 - self._caught_count = None # 当次已监听到的数量 - self._begin_time = None # 当次监听开始时间 - self._timeout = None # 当次监听超时时间 - - self.listening = False - self._targets = None # 默认监听所有 - self.tab_id = None # 当前tab的id - self._results = [] - - self._is_regex = False - self._method = None - - def set_targets(self, targets=True, is_regex=False, method=None): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个,为True时获取所有 - :param is_regex: 设置的target是否正则表达式 - :param method: 设置监听的请求类型,可用list等指定多个,为None时监听全部 - :return: None - """ - if targets is not None: - if not isinstance(targets, (str, list, tuple, set)) and targets is not True: - raise TypeError('targets只能是str、list、tuple、set、True。') - if targets is True: - targets = '' - - if isinstance(targets, str): - self._targets = {targets} - else: - self._targets = set(targets) - - self._is_regex = is_regex - - if method is not None: - if isinstance(method, str): - self._method = {method.upper()} - elif isinstance(method, (list, tuple, set)): - self._method = set(i.upper() for i in method) - else: - raise TypeError('method参数只能是str、list、tuple、set类型。') - - def listen(self, targets=None, count=None, timeout=None): - """拦截目标请求,直到超时或达到拦截个数,每次拦截前清空结果 - 可监听多个目标,请求url包含这些字符串就会被记录 - :param targets: 要监听的目标字符串或其组成的列表,True监听所有,None则保留之前的目标不变 - :param count: 要记录的个数,到达个数停止监听 - :param timeout: 监听最长时间,到时间即使未达到记录个数也停止,None为无限长 - :return: None - """ - if targets: - self.set_targets(targets) - - self.listening = True - self._results = [] - self._request_ids = {} - self._tmp = Queue(maxsize=0) - - self._caught_count = 0 - self._begin_time = perf_counter() - self._timeout = timeout - - self._set_callback_func() - - self._total_count = len(self._targets) if not count else count - - Thread(target=self._wait_to_stop).start() - - def stop(self): - """停止监听""" - self._stop() - self.listening = False - - def wait(self): - """等待监听结束""" - while self.listening: - sleep(.2) - return self._results - - def get_results(self, target=None): - """获取结果列表 - :param target: 要获取的目标,为None时获取全部 - :return: 结果数据组成的列表 - """ - return self._results if target is None else [i for i in self._results if i.target == target] - - def _wait_to_stop(self): - """当收到停止信号、到达须获取结果数、到时间就停止""" - while self._is_continue(): - sleep(.2) - self.stop() - - def _is_continue(self): - """是否继续当前监听""" - return self.listening \ - and (self._total_count is None or self._caught_count < self._total_count) \ - and (self._timeout is None or perf_counter() - self._begin_time < self._timeout) - - def steps(self, gap=1): - """用于单步操作,可实现没收到若干个数据包执行一步操作(如翻页) - :param gap: 每接收到多少个数据包触发 - :return: 用于在接收到监听目标时触发动作的可迭代对象 - """ - if not isinstance(gap, int) or gap < 1: - raise ValueError('gap参数必须为大于0的整数。') - while self.listening or not self._tmp.empty(): - while self._tmp.qsize() >= gap: - yield self._tmp.get(False) if gap == 1 else [self._tmp.get(False) for _ in range(gap)] - - sleep(.1) - - def _set_callback_func(self): - """设置监听请求的回调函数""" - self._driver.set_listener('Network.requestWillBeSent', self._requestWillBeSent) - self._driver.set_listener('Network.responseReceived', self._response_received) - self._driver.set_listener('Network.loadingFinished', self._loading_finished) - self._driver.set_listener('Network.loadingFailed', self._loading_failed) - self._driver.call_method('Network.enable') - - def _stop(self) -> None: - """停止监听前要做的工作""" - self._driver.set_listener('Network.requestWillBeSent', None) - self._driver.set_listener('Network.responseReceived', None) - self._driver.set_listener('Network.loadingFinished', None) - self._driver.set_listener('Network.loadingFailed', None) - # self._driver.call_method('Network.disable') - - def _requestWillBeSent(self, **kwargs): - """接收到请求时的回调函数""" - for target in self._targets: - if ((self._is_regex and search(target, kwargs['request']['url'])) or - (not self._is_regex and target in kwargs['request']['url'])) and ( - not self._method or kwargs['request']['method'] in self._method): - self._request_ids[kwargs['requestId']] = DataPacket(self._page.tab_id, target, kwargs) - - if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): - self._request_ids[kwargs['requestId']]._raw_post_data = \ - self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData'] - - break - - def _response_received(self, **kwargs): - """接收到返回信息时处理方法""" - request_id = kwargs['requestId'] - if request_id in self._request_ids: - self._request_ids[request_id]._raw_response = kwargs['response'] - self._request_ids[request_id]._resource_type = kwargs['type'] - - def _loading_finished(self, **kwargs): - """请求完成时处理方法""" - request_id = kwargs['requestId'] - dp = self._request_ids.get(request_id) - if dp: - try: - r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) - body = r['body'] - is_base64 = r['base64Encoded'] - except CDPError: - body = '' - is_base64 = False - - dp._raw_body = body - dp._base64_body = is_base64 - - self._tmp.put(dp) - self._results.append(dp) - self._caught_count += 1 - - def _loading_failed(self, **kwargs): - """请求失败时的回调方法""" - request_id = kwargs['requestId'] - if request_id in self._request_ids: - dp = self._request_ids[request_id] - dp.errorText = kwargs['errorText'] - dp._resource_type = kwargs['type'] - - self._tmp.put(dp) - self._results.append(dp) - self._caught_count += 1 - - -class DataPacket(object): - """返回的数据包管理类""" - - def __init__(self, tab, target, raw_request): - """ - :param tab: 产生这个数据包的tab的id - :param target: 监听目标 - :param raw_request: 原始request数据,从cdp获得 - """ - self.tab = tab - self.target = target - - self._raw_request = raw_request - self._raw_post_data = None - - self._raw_response = None - self._raw_body = None - self._base64_body = False - - self._request = None - self._response = None - self.errorText = None - self._resource_type = None - - @property - def url(self): - return self.request.url - - @property - def method(self): - return self.request.method - - @property - def frameId(self): - return self._raw_request.get('frameId') - - @property - def resourceType(self): - return self._resource_type - - @property - def request(self): - if self._request is None: - self._request = Request(self._raw_request['request'], self._raw_post_data) - return self._request - - @property - def response(self): - if self._response is None: - self._response = Response(self._raw_response, self._raw_body, self._base64_body) - return self._response - - -class Request(object): - def __init__(self, raw_request, post_data): - self._request = raw_request - self._raw_post_data = post_data - self._postData = None - self._headers = None - - def __getattr__(self, item): - return self._request.get(item, None) - - @property - def headers(self): - """以大小写不敏感字典返回headers数据""" - if self._headers is None: - self._headers = CaseInsensitiveDict(self._request['headers']) - return self._headers - - @property - def postData(self): - """返回postData数据""" - if self._postData is None: - if self._raw_post_data: - postData = self._raw_post_data - elif self._request.get('postData', None): - postData = self._request['postData'] - else: - postData = False - try: - self._postData = loads(postData) - except (JSONDecodeError, TypeError): - self._postData = postData - return self._postData - - -class Response(object): - def __init__(self, raw_response, raw_body, base64_body): - self._response = raw_response - self._raw_body = raw_body - self._is_base64_body = base64_body - self._body = None - self._headers = None - - def __getattr__(self, item): - return self._response.get(item, None) - - @property - def headers(self): - """以大小写不敏感字典返回headers数据""" - if self._headers is None: - self._headers = CaseInsensitiveDict(self._response['headers']) - return self._headers - - @property - def body(self): - """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" - if self._body is None: - if self._is_base64_body: - self._body = b64decode(self._raw_body) - - else: - try: - self._body = loads(self._raw_body) - except (JSONDecodeError, TypeError): - self._body = self._raw_body - - return self._body diff --git a/DrissionPage/network_listener.pyi b/DrissionPage/network_listener.pyi deleted file mode 100644 index 759f7b2..0000000 --- a/DrissionPage/network_listener.pyi +++ /dev/null @@ -1,140 +0,0 @@ -from queue import Queue -from typing import Union, Dict, List, Iterable, Tuple - -from requests.structures import CaseInsensitiveDict - -from chromium_base import ChromiumBase -from chromium_driver import ChromiumDriver - - -class NetworkListener(object): - def __init__(self, page: ChromiumBase): - self._page: ChromiumBase = ... - self._total_count: int = ... - self._caught_count: int = ... - self._targets: Union[str, dict] = ... - self._results: list = ... - self._method: set = ... - self._tmp: Queue = ... - self._is_regex: bool = ... - self._driver: ChromiumDriver = ... - self._request_ids: dict = ... - self.listening: bool = ... - self._timeout: float = ... - self._begin_time: float = ... - - def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False, - count: int = None, method: Union[str, list, tuple, set] = None) -> None: ... - - def stop(self) -> None: ... - - @property - def results(self) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... - - def clear(self) -> None: ... - - def listen(self, targets: Union[str, List[str], Tuple, bool, None] = ..., count: int = ..., - timeout: float = ...) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... - - def _listen(self, timeout: float = None, - any_one: bool = False) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... - - def _requestWillBeSent(self, **kwargs) -> None: ... - - def _response_received(self, **kwargs) -> None: ... - - def _loading_finished(self, **kwargs) -> None: ... - - def _loading_failed(self, **kwargs) -> None: ... - - def _request_paused(self, **kwargs) -> None: ... - - def _wait_to_stop(self) -> None: ... - - def _is_continue(self) -> bool: ... - - def steps(self, gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]: ... - - def _set_callback_func(self) -> None: ... - - def _stop(self) -> None: ... - - -class DataPacket(object): - """返回的数据包管理类""" - - def __init__(self, tab: str, target: str, raw_info: dict): - self.tab: str = ... - self.target: str = ... - self._raw_request: dict = ... - self._raw_response: dict = ... - self._raw_post_data: str = ... - self._raw_body: str = ... - self._base64_body: bool = ... - self._request: Request = ... - self._response: Response = ... - self.errorText: str = ... - self._resource_type: str = ... - - @property - def url(self) -> str: ... - - @property - def method(self) -> str: ... - - @property - def frameId(self) -> str: ... - - @property - def resourceType(self) -> str: ... - - @property - def request(self) -> Request: ... - - @property - def response(self) -> Response: ... - - -class Request(object): - url: str = ... - _headers: Union[CaseInsensitiveDict, None] = ... - method: str = ... - - # urlFragment: str = ... - # postDataEntries: list = ... - # mixedContentType: str = ... - # initialPriority: str = ... - # referrerPolicy: str = ... - # isLinkPreload: bool = ... - # trustTokenParams: dict = ... - # isSameSite: bool = ... - - def __init__(self, raw_request: dict, post_data: str): - self._request: dict = ... - self._raw_post_data: str = ... - self._postData: str = ... - - @property - def headers(self) -> dict: ... - - @property - def postData(self) -> Union[str, dict]: ... - - -class Response(object): - status: str = ... - statusText: int = ... - mimeType: str = ... - - def __init__(self, raw_response: dict, raw_body: str, base64_body: bool): - self._response: dict = ... - self._raw_body: str = ... - self._is_base64_body: bool = ... - self._body: Union[str, dict] = ... - self._headers: dict = ... - - @property - def headers(self) -> CaseInsensitiveDict: ... - - @property - def body(self) -> Union[str, dict, bool]: ... diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 7e8bb88..e7d375b 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -38,7 +38,7 @@ class SessionElement(DrissionElement): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 :return: SessionElement对象或属性、文本 """ return self.ele(loc_or_str) @@ -75,13 +75,12 @@ class SessionElement(DrissionElement): """返回未格式化处理的元素内文本""" return str(self._inner_ele.text_content()) - def parent(self, level_or_loc=1, index=1): + def parent(self, level_or_loc=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 - :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - return super().parent(level_or_loc, index) + return super().parent(level_or_loc) def child(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -91,7 +90,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本 """ - return super().child(filter_loc, index, timeout, ele_only=ele_only) + return super().child(index, filter_loc, timeout, ele_only=ele_only) def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -101,7 +100,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().prev(filter_loc, index, timeout, ele_only=ele_only) + return super().prev(index, filter_loc, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -111,7 +110,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().next(filter_loc, index, timeout, ele_only=ele_only) + return super().next(index, filter_loc, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -122,7 +121,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - return super().before(filter_loc, index, timeout, ele_only=ele_only) + return super().before(index, filter_loc, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -133,7 +132,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - return super().after(filter_loc, index, timeout, ele_only=ele_only) + return super().after(index, filter_loc, timeout, ele_only=ele_only) def children(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -218,7 +217,7 @@ class SessionElement(DrissionElement): def ele(self, loc_or_str, timeout=None): """返回当前元素下级符合条件的第一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 :return: SessionElement对象或属性、文本 """ return self._ele(loc_or_str) @@ -226,7 +225,7 @@ class SessionElement(DrissionElement): def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用 + :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 :return: SessionElement对象或属性、文本组成的列表 """ return self._ele(loc_or_str, single=False) @@ -322,7 +321,8 @@ def make_session_ele(html_or_ele, loc=None, single=True): loc = loc[0], loc_str - elif the_type.endswith(".ChromiumElement'>"): + # ChromiumElement, DriverElement + elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): loc_str = loc[1] if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): loc_str = f'.{loc[1]}' diff --git a/DrissionPage/session_element.pyi b/DrissionPage/session_element.pyi index c55dcfe..4d455e1 100644 --- a/DrissionPage/session_element.pyi +++ b/DrissionPage/session_element.pyi @@ -12,6 +12,8 @@ from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement +from mixpage.driver_element import DriverElement +from mixpage.driver_page import DriverPage from .session_page import SessionPage @@ -48,29 +50,29 @@ class SessionElement(DrissionElement): @property def raw_text(self) -> str: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union['SessionElement', None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... - def child(self, filter_loc: Union[tuple, str, int] = '', + def child(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def prev(self, filter_loc: Union[tuple, str, int] = '', + def prev(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def next(self, filter_loc: Union[tuple, str, int] = '', + def next(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def before(self, filter_loc: Union[tuple, str, int] = '', + def before(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def after(self, filter_loc: Union[tuple, str, int] = '', + def after(self, filter_loc: Union[tuple, str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... @@ -122,8 +124,8 @@ class SessionElement(DrissionElement): def _get_ele_path(self, mode: str) -> str: ... -def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, -ChromiumBase], +def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, DriverElement, BaseElement, +ChromiumFrame, ChromiumBase, DriverPage], loc: Union[str, Tuple[str, str]] = None, single: bool = True) -> Union[ SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 7e07aa7..3d934b9 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -7,15 +7,15 @@ from re import search from time import sleep from urllib.parse import urlparse -from requests import Session +from DownloadKit import DownloadKit +from requests import Session, Response from requests.structures import CaseInsensitiveDict from tldextract import extract from .base import BasePage -from .commons.web import cookie_to_dict +from .commons.web import cookie_to_dict, set_session_cookies from .configs.session_options import SessionOptions from .session_element import SessionElement, make_session_ele -from .setter import SessionPageSetter class SessionPage(BasePage): @@ -27,6 +27,7 @@ class SessionPage(BasePage): :param timeout: 连接超时时间,为None时从ini文件读取 """ self._response = None + self._download_set = None self._session = None self._set = None self._set_start_options(session_or_options, None) @@ -98,9 +99,21 @@ class SessionPage(BasePage): return None @property - def user_agent(self): - """返回user agent""" - return self.session.headers.get('user-agent', '') + def download_path(self): + """返回下载路径""" + return self._download_path + + @property + def download_set(self): + """返回用于设置下载参数的对象""" + if self._download_set is None: + self._download_set = DownloadSetter(self) + return self._download_set + + @property + def download(self): + """返回下载器对象""" + return self.download_set.DownloadKit @property def session(self): @@ -305,18 +318,200 @@ class SessionPage(BasePage): return r, f'状态码:{r.status_code}' -def check_headers(kwargs, headers, arg): +class SessionPageSetter(object): + def __init__(self, page): + self._page = page + + def retry_times(self, times): + """设置连接失败时重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败时重连间隔""" + self._page.retry_interval = interval + + def timeout(self, second): + """设置连接超时时间 + :param second: 秒数 + :return: None + """ + self._page.timeout = second + + def cookies(self, cookies): + """为Session对象设置cookies + :param cookies: cookies信息 + :return: None + """ + set_session_cookies(self._page.session, cookies) + + def headers(self, headers): + """设置通用的headers + :param headers: dict形式的headers + :return: None + """ + self._page.session.headers = CaseInsensitiveDict(headers) + + def header(self, attr, value): + """设置headers中一个项 + :param attr: 设置名称 + :param value: 设置值 + :return: None + """ + self._page.session.headers[attr.lower()] = value + + def user_agent(self, ua): + """设置user agent + :param ua: user agent + :return: None + """ + self._page.session.headers['user-agent'] = ua + + def proxies(self, http, https=None): + """设置proxies参数 + :param http: http代理地址 + :param https: https代理地址 + :return: None + """ + proxies = None if http == https is None else {'http': http, 'https': https or http} + self._page.session.proxies = proxies + + def auth(self, auth): + """设置认证元组或对象 + :param auth: 认证元组或对象 + :return: None + """ + self._page.session.auth = auth + + def hooks(self, hooks): + """设置回调方法 + :param hooks: 回调方法 + :return: None + """ + self._page.session.hooks = hooks + + def params(self, params): + """设置查询参数字典 + :param params: 查询参数字典 + :return: None + """ + self._page.session.params = params + + def verify(self, on_off): + """设置是否验证SSL证书 + :param on_off: 是否验证 SSL 证书 + :return: None + """ + self._page.session.verify = on_off + + def cert(self, cert): + """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 + :param cert: 证书路径或元组 + :return: None + """ + self._page.session.cert = cert + + def stream(self, on_off): + """设置是否使用流式响应内容 + :param on_off: 是否使用流式响应内容 + :return: None + """ + self._page.session.stream = on_off + + def trust_env(self, on_off): + """设置是否信任环境 + :param on_off: 是否信任环境 + :return: None + """ + self._page.session.trust_env = on_off + + def max_redirects(self, times): + """设置最大重定向次数 + :param times: 最大重定向次数 + :return: None + """ + self._page.session.max_redirects = times + + def add_adapter(self, url, adapter): + """添加适配器 + :param url: 适配器对应url + :param adapter: 适配器对象 + :return: None + """ + self._page.session.mount(url, adapter) + + +class DownloadSetter(object): + """用于设置下载参数的类""" + + def __init__(self, page): + self._page = page + self._DownloadKit = None + + @property + def DownloadKit(self): + if self._DownloadKit is None: + self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path) + return self._DownloadKit + + @property + def if_file_exists(self): + """返回用于设置存在同名文件时处理方法的对象""" + return FileExists(self) + + def split(self, on_off): + """设置是否允许拆分大文件用多线程下载 + :param on_off: 是否启用多线程下载大文件 + :return: None + """ + self.DownloadKit.split = on_off + + def save_path(self, path): + """设置下载保存路径 + :param path: 下载保存路径 + :return: None + """ + path = path if path is None else str(path) + self._page._download_path = path + self.DownloadKit.goal_path = path + + +class FileExists(object): + """用于设置存在同名文件时处理方法""" + + def __init__(self, setter): + """ + :param setter: DownloadSetter对象 + """ + self._setter = setter + + def __call__(self, mode): + if mode not in ('skip', 'rename', 'overwrite'): + raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") + self._setter.DownloadKit.file_exists = mode + + def skip(self): + """设为跳过""" + self._setter.DownloadKit.file_exists = 'skip' + + def rename(self): + """设为重命名,文件名后加序号""" + self._setter.DownloadKit._file_exists = 'rename' + + def overwrite(self): + """设为覆盖""" + self._setter.DownloadKit._file_exists = 'overwrite' + + +def check_headers(kwargs, headers, arg) -> bool: """检查kwargs或headers中是否有arg所示属性""" return arg in kwargs['headers'] or arg in headers -def set_charset(response): +def set_charset(response) -> Response: """设置Response对象的编码""" # 在headers中获取编码 content_type = response.headers.get('content-type', '').lower() - if not content_type.endswith(';'): - content_type += ';' - charset = search(r'charset[=: ]*(.*)?;?', content_type) + charset = search(r'charset[=: ]*(.*)?;', content_type) if charset: response.encoding = charset.group(1) diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index 5391a4a..c551834 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -3,16 +3,22 @@ @Author : g1879 @Contact : g1879@qq.com """ +from pathlib import Path from typing import Any, Union, Tuple, List +from DownloadKit import DownloadKit from requests import Session, Response +from requests.adapters import HTTPAdapter +from requests.auth import HTTPBasicAuth +from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict -from .base import BasePage from .commons.constants import NoneElement +from .base import BasePage +from .chromium_page import ChromiumPage from .configs.session_options import SessionOptions from .session_element import SessionElement -from .setter import SessionPageSetter +from .web_page import WebPage class SessionPage(BasePage): @@ -23,8 +29,8 @@ class SessionPage(BasePage): self._session_options: SessionOptions = ... self._url: str = ... self._response: Response = ... - # self._download_path: str = ... - # self._DownloadKit: DownloadKit = ... + self._download_path: str = ... + self._download_set: DownloadSetter = ... self._url_available: bool = ... self.timeout: float = ... self.retry_times: int = ... @@ -58,10 +64,10 @@ class SessionPage(BasePage): def json(self) -> Union[dict, None]: ... @property - def user_agent(self) -> str: ... + def download_path(self) -> str: ... @property - def download_path(self) -> str: ... + def download_set(self) -> DownloadSetter: ... def get(self, url: str, @@ -114,8 +120,8 @@ class SessionPage(BasePage): @property def set(self) -> SessionPageSetter: ... - # @property - # def download(self) -> DownloadKit: ... + @property + def download(self) -> DownloadKit: ... def post(self, url: str, @@ -156,6 +162,74 @@ class SessionPage(BasePage): **kwargs) -> tuple: ... +class SessionPageSetter(object): + def __init__(self, page: SessionPage): + self._page: SessionPage = ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def timeout(self, second: float) -> None: ... + + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def header(self, attr: str, value: str) -> None: ... + + def user_agent(self, ua: str) -> None: ... + + def proxies(self, http, https=None) -> None: ... + + def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... + + def hooks(self, hooks: Union[dict, None]) -> None: ... + + def params(self, params: Union[dict, None]) -> None: ... + + def verify(self, on_off: Union[bool, None]) -> None: ... + + def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... + + def stream(self, on_off: Union[bool, None]) -> None: ... + + def trust_env(self, on_off: Union[bool, None]) -> None: ... + + def max_redirects(self, times: Union[int, None]) -> None: ... + + def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... + + +class DownloadSetter(object): + def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]): + self._page: SessionPage = ... + self._DownloadKit: DownloadKit = ... + + @property + def DownloadKit(self) -> DownloadKit: ... + + @property + def if_file_exists(self) -> FileExists: ... + + def split(self, on_off: bool) -> None: ... + + def save_path(self, path: Union[str, Path]): ... + + +class FileExists(object): + def __init__(self, setter: DownloadSetter): + self._setter: DownloadSetter = ... + + def __call__(self, mode: str) -> None: ... + + def skip(self) -> None: ... + + def rename(self) -> None: ... + + def overwrite(self) -> None: ... + + def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool: ... diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py deleted file mode 100644 index 1faeaa2..0000000 --- a/DrissionPage/setter.py +++ /dev/null @@ -1,526 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path - -from requests.structures import CaseInsensitiveDict - -from .commons.tools import show_or_hide_browser -from .commons.web import set_browser_cookies, set_session_cookies - - -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page = page - - @property - def load_strategy(self): - """返回用于设置页面加载策略的对象""" - return PageLoadStrategy(self._page) - - @property - def scroll(self): - """返回用于设置页面滚动设置的对象""" - return PageScrollSetter(self._page.scroll) - - def retry_times(self, times): - """设置连接失败重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败重连间隔""" - self._page.retry_interval = interval - - def timeouts(self, implicit=None, page_load=None, script=None): - """设置超时时间,单位为秒 - :param implicit: 查找元素超时时间 - :param page_load: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: None - """ - if implicit is not None: - self._page.timeouts.implicit = implicit - - if page_load is not None: - self._page.timeouts.page_load = page_load - - if script is not None: - self._page.timeouts.script = script - - def user_agent(self, ua, platform=None): - """为当前tab设置user agent,只在当前tab有效 - :param ua: user agent字符串 - :param platform: platform字符串 - :return: None - """ - keys = {'userAgent': ua} - if platform: - keys['platform'] = platform - self._page.run_cdp('Emulation.setUserAgentOverride', **keys) - - def session_storage(self, item, value): - """设置或删除某项sessionStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def local_storage(self, item, value): - """设置或删除某项localStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def cookie(self, cookie): - """设置单个cookie - :param cookie: cookie信息 - :return: None - """ - if isinstance(cookie, str): - self.cookies(cookie) - else: - self.cookies([cookie]) - - def cookies(self, cookies): - """设置多个cookie,注意不要传入单个 - :param cookies: cookies信息 - :return: None - """ - set_browser_cookies(self._page, cookies) - - def upload_files(self, files): - """等待上传的文件路径 - :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 - :return: None - """ - if not self._page._upload_list: - self._page.driver.set_listener('Page.fileChooserOpened', self._page._onFileChooserOpened) - self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) - - if isinstance(files, str): - files = files.split('\n') - self._page._upload_list = [str(Path(i).absolute()) for i in files] - - def headers(self, headers: dict) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - self._page.run_cdp('Network.enable') - self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) - - -class ChromiumPageSetter(ChromiumBaseSetter): - def main_tab(self, tab_id=None): - """设置主tab - :param tab_id: 标签页id,不传入则设置当前tab - :return: None - """ - self._page._main_tab = tab_id or self._page.tab_id - - @property - def window(self): - """返回用于设置浏览器窗口的对象""" - return WindowSetter(self._page) - - def tab_to_front(self, tab_or_id=None): - """激活标签页使其处于最前面 - :param tab_or_id: 标签页对象或id,为None表示当前标签页 - :return: None - """ - if not tab_or_id: - tab_or_id = self._page.tab_id - elif not isinstance(tab_or_id, str): # 传入Tab对象 - tab_or_id = tab_or_id.tab_id - self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') - - -class SessionPageSetter(object): - def __init__(self, page): - self._page = page - - def retry_times(self, times): - """设置连接失败时重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败时重连间隔""" - self._page.retry_interval = interval - - def timeout(self, second): - """设置连接超时时间 - :param second: 秒数 - :return: None - """ - self._page.timeout = second - - def cookie(self, cookie): - """为Session对象设置单个cookie - :param cookie: cookie信息 - :return: None - """ - if isinstance(cookie, str): - self.cookies(cookie) - else: - self.cookies([cookie]) - - def cookies(self, cookies): - """为Session对象设置多个cookie,注意不要传入单个 - :param cookies: cookies信息 - :return: None - """ - set_session_cookies(self._page.session, cookies) - - def headers(self, headers): - """设置通用的headers - :param headers: dict形式的headers - :return: None - """ - self._page.session.headers = CaseInsensitiveDict(headers) - - def header(self, attr, value): - """设置headers中一个项 - :param attr: 设置名称 - :param value: 设置值 - :return: None - """ - self._page.session.headers[attr.lower()] = value - - def user_agent(self, ua): - """设置user agent - :param ua: user agent - :return: None - """ - self._page.session.headers['user-agent'] = ua - - def proxies(self, http=None, https=None): - """设置proxies参数 - :param http: http代理地址 - :param https: https代理地址 - :return: None - """ - self._page.session.proxies = {'http': http, 'https': https} - - def auth(self, auth): - """设置认证元组或对象 - :param auth: 认证元组或对象 - :return: None - """ - self._page.session.auth = auth - - def hooks(self, hooks): - """设置回调方法 - :param hooks: 回调方法 - :return: None - """ - self._page.session.hooks = hooks - - def params(self, params): - """设置查询参数字典 - :param params: 查询参数字典 - :return: None - """ - self._page.session.params = params - - def verify(self, on_off): - """设置是否验证SSL证书 - :param on_off: 是否验证 SSL 证书 - :return: None - """ - self._page.session.verify = on_off - - def cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 - :param cert: 证书路径或元组 - :return: None - """ - self._page.session.cert = cert - - def stream(self, on_off): - """设置是否使用流式响应内容 - :param on_off: 是否使用流式响应内容 - :return: None - """ - self._page.session.stream = on_off - - def trust_env(self, on_off): - """设置是否信任环境 - :param on_off: 是否信任环境 - :return: None - """ - self._page.session.trust_env = on_off - - def max_redirects(self, times): - """设置最大重定向次数 - :param times: 最大重定向次数 - :return: None - """ - self._page.session.max_redirects = times - - def add_adapter(self, url, adapter): - """添加适配器 - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: None - """ - self._page.session.mount(url, adapter) - - -class WebPageSetter(ChromiumPageSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumPageSetter(self._page) - - def cookies(self, cookies): - """添加cookies信息到浏览器或session对象 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page.mode == 's': - self._session_setter.headers(headers) - else: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page.mode == 's': - self._session_setter.user_agent(ua) - else: - self._chromium_setter.user_agent(ua, platform) - - -class WebPageTabSetter(ChromiumBaseSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumBaseSetter(self._page) - - def cookies(self, cookies): - """添加多个cookies信息到浏览器或session对象,注意不要传入单个 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page._has_session: - self._session_setter.headers(headers) - if self._page._has_driver: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page._has_session: - self._session_setter.user_agent(ua) - if self._page._has_driver: - self._chromium_setter.user_agent(ua, platform) - - -class ChromiumElementSetter(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - def attr(self, attr, value): - """设置元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele.ids.node_id, name=attr, value=str(value)) - - def prop(self, prop, value): - """设置元素property属性 - :param prop: 属性名 - :param value: 属性值 - :return: None - """ - value = value.replace('"', r'\"') - self._ele.run_js(f'this.{prop}="{value}";') - - def innerHTML(self, html): - """设置元素innerHTML - :param html: html文本 - :return: None - """ - self.prop('innerHTML', html) - - -class ChromiumFrameSetter(ChromiumBaseSetter): - def attr(self, attr, value): - """设置frame元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._page._check_ok() - self._page.frame_ele.set.attr(attr, value) - - -class PageLoadStrategy(object): - """用于设置页面加载策略的类""" - - def __init__(self, page): - """ - :param page: ChromiumBase对象 - """ - self._page = page - - def __call__(self, value): - """设置加载策略 - :param value: 可选 'normal', 'eager', 'none' - :return: None - """ - if value.lower() not in ('normal', 'eager', 'none'): - raise ValueError("只能选择 'normal', 'eager', 'none'。") - self._page._page_load_strategy = value - - def normal(self): - """设置页面加载策略为normal""" - self._page._page_load_strategy = 'normal' - - def eager(self): - """设置页面加载策略为eager""" - self._page._page_load_strategy = 'eager' - - def none(self): - """设置页面加载策略为none""" - self._page._page_load_strategy = 'none' - - -class PageScrollSetter(object): - def __init__(self, scroll): - self._scroll = scroll - - def wait_complete(self, on_off=True): - """设置滚动命令后是否等待完成 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - self._scroll._wait_complete = on_off - - def smooth(self, on_off=True): - """设置页面滚动是否平滑滚动 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - b = 'smooth' if on_off else 'auto' - self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') - self._scroll._wait_complete = on_off - - -class WindowSetter(object): - """用于设置窗口大小的类""" - - def __init__(self, page): - """ - :param page: 页面对象 - """ - self._page = page - self._window_id = self._get_info()['windowId'] - - def maximized(self): - """窗口最大化""" - s = self._get_info()['bounds']['windowState'] - if s in ('fullscreen', 'minimized'): - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'maximized'}) - - def minimized(self): - """窗口最小化""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'minimized'}) - - def fullscreen(self): - """设置窗口为全屏""" - s = self._get_info()['bounds']['windowState'] - if s == 'minimized': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'fullscreen'}) - - def normal(self): - """设置窗口为常规模式""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'normal'}) - - def size(self, width=None, height=None): - """设置窗口大小 - :param width: 窗口宽度 - :param height: 窗口高度 - :return: None - """ - if width or height: - s = self._get_info()['bounds']['windowState'] - if s != 'normal': - self._perform({'windowState': 'normal'}) - info = self._get_info()['bounds'] - width = width - 16 if width else info['width'] - height = height + 7 if height else info['height'] - self._perform({'width': width, 'height': height}) - - def location(self, x=None, y=None): - """设置窗口在屏幕中的位置,相对左上角坐标 - :param x: 距离顶部距离 - :param y: 距离左边距离 - :return: None - """ - if x is not None or y is not None: - self.normal() - info = self._get_info()['bounds'] - x = x if x is not None else info['left'] - y = y if y is not None else info['top'] - self._perform({'left': x - 8, 'top': y}) - - def hide(self): - """隐藏浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=True) - - def show(self): - """显示浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=False) - - def _get_info(self): - """获取窗口位置及大小信息""" - return self._page.run_cdp('Browser.getWindowForTarget') - - def _perform(self, bounds): - """执行改变窗口大小操作 - :param bounds: 控制数据 - :return: None - """ - self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) diff --git a/DrissionPage/setter.pyi b/DrissionPage/setter.pyi deleted file mode 100644 index e750130..0000000 --- a/DrissionPage/setter.pyi +++ /dev/null @@ -1,192 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from http.cookiejar import Cookie -from typing import Union, Tuple - -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar - -from .chromium_base import ChromiumBase, ChromiumPageScroll -from .chromium_element import ChromiumElement -from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage -from .chromium_tab import ChromiumTab -from .session_page import SessionPage -from .web_page import WebPage - - -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page: ChromiumBase = ... - - @property - def load_strategy(self) -> PageLoadStrategy: ... - - @property - def scroll(self) -> PageScrollSetter: ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def session_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def local_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def cookie(self, cookies: Union[RequestsCookieJar, str, dict]) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def upload_files(self, files: Union[str, list, tuple]) -> None: ... - - -class ChromiumPageSetter(ChromiumBaseSetter): - _page: ChromiumPage = ... - - def main_tab(self, tab_id: str = None) -> None: ... - - @property - def window(self) -> WindowSetter: ... - - def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... - - -class SessionPageSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeout(self, second: float) -> None: ... - - def cookie(self, cookie: Union[Cookie, str, dict]) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def header(self, attr: str, value: str) -> None: ... - - def user_agent(self, ua: str) -> None: ... - - def proxies(self, http: str = None, https: str = None) -> None: ... - - def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... - - def hooks(self, hooks: Union[dict, None]) -> None: ... - - def params(self, params: Union[dict, None]) -> None: ... - - def verify(self, on_off: Union[bool, None]) -> None: ... - - def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... - - def stream(self, on_off: Union[bool, None]) -> None: ... - - def trust_env(self, on_off: Union[bool, None]) -> None: ... - - def max_redirects(self, times: Union[int, None]) -> None: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... - - -class WebPageSetter(ChromiumPageSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumPageSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... - - -class WebPageTabSetter(ChromiumBaseSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumBaseSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... - - -class ChromiumElementSetter(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - def attr(self, attr: str, value: str) -> None: ... - - def prop(self, prop: str, value: str) -> None: ... - - def innerHTML(self, html: str) -> None: ... - - -class ChromiumFrameSetter(ChromiumBaseSetter): - _page: ChromiumFrame = ... - - def attr(self, attr: str, value: str) -> None: ... - - -class PageLoadStrategy(object): - def __init__(self, page: ChromiumBase): - self._page: ChromiumBase = ... - - def __call__(self, value: str) -> None: ... - - def normal(self) -> None: ... - - def eager(self) -> None: ... - - def none(self) -> None: ... - - -class PageScrollSetter(object): - def __init__(self, scroll: ChromiumPageScroll): - self._scroll: ChromiumPageScroll = ... - - def wait_complete(self, on_off: bool = True): ... - - def smooth(self, on_off: bool = True): ... - - -class WindowSetter(object): - - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - self._window_id: str = ... - - def maximized(self) -> None: ... - - def minimized(self) -> None: ... - - def fullscreen(self) -> None: ... - - def normal(self) -> None: ... - - def size(self, width: int = None, height: int = None) -> None: ... - - def location(self, x: int = None, y: int = None) -> None: ... - - def hide(self) -> None: ... - - def show(self) -> None: ... - - def _get_info(self) -> dict: ... - - def _perform(self, bounds: dict) -> None: ... diff --git a/DrissionPage/waiter.py b/DrissionPage/waiter.py deleted file mode 100644 index 25b98a1..0000000 --- a/DrissionPage/waiter.py +++ /dev/null @@ -1,291 +0,0 @@ -# -*- coding:utf-8 -*- -from time import sleep, perf_counter - -from .commons.constants import Settings -from .errors import WaitTimeoutError - - -class ChromiumBaseWaiter(object): - def __init__(self, page_or_ele): - """ - :param page_or_ele: 页面对象或元素对象 - """ - self._driver = page_or_ele - - def ele_delete(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素从DOM中删除 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.delete(timeout, raise_err=raise_err) if ele else True - - def ele_display(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素变成显示状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.display(timeout, raise_err=raise_err) - - def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素变成隐藏状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.hidden(timeout, raise_err=raise_err) - - def ele_load(self, loc, timeout=None, raise_err=None): - """等待元素加载到DOM - :param loc: 要等待的元素,输入定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 成功返回元素对象,失败返回False - """ - ele = self._driver._ele(loc, raise_err=False, timeout=timeout) - if ele: - return True - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素加载失败。') - else: - return False - - def load_start(self, timeout=None, raise_err=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) - - def load_complete(self, timeout=None, raise_err=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, start=False, raise_err=raise_err) - - def upload_paths_inputted(self): - """等待自动填写上传文件路径""" - while self._driver._upload_list: - sleep(.01) - - def url_change(self, text, exclude=False, timeout=None, raise_err=None): - """等待url变成包含或不包含指定文本 - :param text: 用于识别的文本 - :param exclude: 是否排除,为True时当url不包含text指定文本时返回True - :param timeout: 超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._change('url', text, exclude, timeout, raise_err) - - def title_change(self, text, exclude=False, timeout=None, raise_err=None): - """等待title变成包含或不包含指定文本 - :param text: 用于识别的文本 - :param exclude: 是否排除,为True时当title不包含text指定文本时返回True - :param timeout: 超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._change('title', text, exclude, timeout, raise_err) - - def _change(self, arg, text, exclude=False, timeout=None, raise_err=None): - """等待指定属性变成包含或不包含指定文本 - :param arg: 要被匹配的属性 - :param text: 用于识别的文本 - :param exclude: 是否排除,为True时当属性不包含text指定文本时返回True - :param timeout: 超时时间 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._driver.timeout - - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if arg == 'url': - val = self._driver.url - elif arg == 'title': - val = self._driver.title - else: - raise ValueError - if (not exclude and text in val) or (exclude and text not in val): - return True - sleep(.05) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError(f'等待{arg}改变失败。') - else: - return False - - def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): - """等待页面开始加载或加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param start: 等待开始还是结束 - :param gap: 间隔秒数 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout != 0: - if timeout is None or timeout is True: - timeout = self._driver.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._driver.is_loading == start: - return True - sleep(gap) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待页面加载失败。') - else: - return False - - -class ChromiumPageWaiter(ChromiumBaseWaiter): - def __init__(self, page): - super().__init__(page) - # self._listener = None - - def new_tab(self, timeout=None, raise_err=None): - """等待新标签页出现 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等到新标签页出现 - """ - timeout = timeout if timeout is not None else self._driver.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._driver.tab_id != self._driver.latest_tab: - return True - sleep(.01) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待新标签页失败。') - else: - return False - - -class ChromiumElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" - - def __init__(self, page, ele): - """等待元素在dom中某种状态,如删除、显示、隐藏 - :param page: 元素所在页面 - :param ele: 要等待的元素 - """ - self._page = page - self._ele = ele - - def delete(self, timeout=None, raise_err=None): - """等待元素从dom删除 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_alive', False, timeout, raise_err) - - def display(self, timeout=None, raise_err=None): - """等待元素从dom显示 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', True, timeout, raise_err) - - def hidden(self, timeout=None, raise_err=None): - """等待元素从dom隐藏 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', False, timeout, raise_err) - - def covered(self, timeout=None, raise_err=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', True, timeout, raise_err) - - def not_covered(self, timeout=None, raise_err=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', False, timeout, raise_err) - - def enabled(self, timeout=None, raise_err=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', True, timeout, raise_err) - - def disabled(self, timeout=None, raise_err=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', False, timeout, raise_err) - - def disabled_or_delete(self, timeout=None, raise_err=None): - """等待当前元素变成不可用或从DOM移除 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if not self._ele.states.is_enabled or not self._ele.states.is_alive: - return True - sleep(.05) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素隐藏或删除失败。') - else: - return False - - def _wait_state(self, attr, mode=False, timeout=None, raise_err=None): - """等待元素某个bool状态到达指定状态 - :param attr: 状态名称 - :param mode: True或False - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待失败时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._ele.states.__getattribute__(attr) == mode: - return True - sleep(.05) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素状态改变失败。') - else: - return False - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame): - """ - :param frame: ChromiumFrame对象 - """ - super().__init__(frame) - super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) diff --git a/DrissionPage/waiter.pyi b/DrissionPage/waiter.pyi deleted file mode 100644 index 548c167..0000000 --- a/DrissionPage/waiter.pyi +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union - -from .chromium_base import ChromiumBase -from .chromium_element import ChromiumElement -from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage - - -class ChromiumBaseWaiter(object): - def __init__(self, page: ChromiumBase): - self._driver: ChromiumBase = ... - - def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_load(self, loc: Union[str, tuple], timeout: float = None, - raise_err: bool = None) -> Union[bool, ChromiumElement]: ... - - def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ... - - def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def upload_paths_inputted(self) -> None: ... - - def url_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... - - def title_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... - - def _change(self, arg: str, text: str, exclude: bool = False, timeout: float = None, - raise_err: bool = None) -> bool: ... - - -class ChromiumPageWaiter(ChromiumBaseWaiter): - _driver: ChromiumPage = ... - - # _listener: Union[NetworkListener, None] = ... - - # def download_begin(self, timeout: float = 1.5) -> bool: ... - - # def download_finish(self, timeout: float = None) -> bool: ... - - def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - -class ChromiumElementWaiter(object): - def __init__(self, - page: ChromiumBase, - ele: ChromiumElement): - self._ele: ChromiumElement = ... - self._page: ChromiumBase = ... - - def delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def display(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def disabled_or_delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def _wait_state(self, attr: str, mode: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame: ChromiumFrame): ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index b36b09c..62e1a1b 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -3,18 +3,21 @@ @Author : g1879 @Contact : g1879@qq.com """ +from pathlib import Path +from warnings import warn + from requests import Session from .base import BasePage from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver -from .chromium_page import ChromiumPage +from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter from .chromium_tab import WebPageTab from .commons.web import set_session_cookies, set_browser_cookies from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions -from .session_page import SessionPage -from .setter import WebPageSetter +from .errors import CallMethodError +from .session_page import SessionPage, SessionPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): @@ -24,7 +27,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """初始化函数 :param mode: 'd' 或 's',即driver模式和session模式 :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_or_options: ChromiumDriver对象,只使用s模式时应传入False + :param driver_or_options: ChromiumDriver对象或DriverOptions对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ self._mode = mode.lower() @@ -42,6 +45,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._driver_options = None self._session_options = None self._response = None + self._download_set = None self._set = None self._screencast = None @@ -55,7 +59,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _set_start_options(self, dr_opt, se_opt): """处理两种模式的设置 - :param dr_opt: ChromiumDriver或ChromiumOptions对象,为None则从ini读取,为False用默认信息创建 + :param dr_opt: ChromiumDriver或DriverOptions对象,为None则从ini读取,为False用默认信息创建 :param se_opt: Session、SessionOptions对象或配置信息,为None则从ini读取,为False用默认信息创建 :return: None """ @@ -73,7 +77,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif dr_opt is False: self._driver_options = ChromiumOptions(read_file=False) - elif isinstance(dr_opt, ChromiumOptions): + elif str(type(dr_opt)).endswith(("ChromiumOptions'>", "DriverOptions'>")): self._driver_options = dr_opt else: @@ -103,6 +107,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._timeouts = Timeout(self) self._page_load_strategy = self._driver_options.page_load_strategy + self._download_path = None if se_opt is not False: self.set.timeouts(implicit=self._session_options.timeout) @@ -181,14 +186,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """以dict方式返回cookies""" return super().cookies - @property - def user_agent(self): - """返回user agent""" - if self._mode == 's': - return super().user_agent - elif self._mode == 'd': - return super(SessionPage, self).user_agent - @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" @@ -214,6 +211,23 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """ self.set.timeouts(implicit=second) + @property + def download_path(self): + """返回默认下载路径""" + return super(SessionPage, self).download_path + + @property + def download_set(self): + """返回下载设置对象""" + if self._download_set is None: + self._download_set = WebPageDownloadSetter(self) + return self._download_set + + @property + def download(self): + """返回下载器对象""" + return self.download_set._switched_DownloadKit + @property def set(self): """返回用于等待的对象""" @@ -346,15 +360,20 @@ class WebPage(SessionPage, ChromiumPage, BasePage): return if copy_user_agent: - user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": user_agent}) + selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self.session.headers.update({"User-Agent": selenium_user_agent}) - set_session_cookies(self.session, super(SessionPage, self).get_cookies()) + # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) + # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) + set_session_cookies(self.session, self._get_driver_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" if not self._has_driver: return + + # set_browser_cookies(self, super().get_cookies(as_dict=True)) + # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -367,7 +386,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) + return self._get_driver_cookies(as_dict, all_info) def get_tab(self, tab_id=None): """获取一个标签页对象 @@ -377,12 +396,27 @@ class WebPage(SessionPage, ChromiumPage, BasePage): tab_id = tab_id or self.tab_id return WebPageTab(self, tab_id) + def _get_driver_cookies(self, as_dict=False, all_info=False): + """获取浏览器cookies + :param as_dict: 是否以dict形式返回,为True时all_info无效 + :param all_info: 是否返回所有信息 + :return: cookies信息 + """ + cookies = self.run_cdp('Network.getCookies')['cookies'] + if as_dict: + return {cookie['name']: cookie['value'] for cookie in cookies} + elif all_info: + return cookies + else: + return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} + for cookie in cookies] + def close_driver(self): """关闭driver及浏览器""" if self._has_driver: self.change_mode('s') try: - self.driver.call_method('Browser.close') + self.driver.Browser.close() except Exception: pass self._tab_obj.stop() @@ -421,9 +455,104 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._response = None self._has_session = None if self._has_driver: - self._tab_obj.call_method('Browser.close') + self._tab_obj.Browser.close() self._tab_obj.stop() self._tab_obj = None self._has_driver = None +class WebPageSetter(ChromiumPageSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumPageSetter(self._page) + + def cookies(self, cookies): + """添加cookies信息到浏览器或session对象 + :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + self._chromium_setter.cookies(cookies) + elif self._page.mode == 's' and self._page._has_session: + self._session_setter.cookies(cookies) + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page.mode == 's': + self._session_setter.headers(headers) + else: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page.mode == 's': + self._session_setter.user_agent(ua) + else: + self._chromium_setter.user_agent(ua, platform) + + +class WebPageDownloadSetter(ChromiumDownloadSetter): + """用于设置下载参数的类""" + + def __init__(self, page): + super().__init__(page) + self._session = page.session + + @property + def _switched_DownloadKit(self): + """返回从浏览器同步cookies后的Session对象""" + if self._page.mode == 'd': + self._cookies_to_session() + return self.DownloadKit + + def save_path(self, path): + """设置下载路径 + :param path: 下载路径 + :return: None + """ + path = path or '' + path = Path(path).absolute() + path.mkdir(parents=True, exist_ok=True) + path = str(path) + self._page._download_path = path + self.DownloadKit.goal_path = path + + if self._page._has_driver: + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior=self._behavior, downloadPath=path, + eventsEnabled=True) + except CallMethodError: + warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') + self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path) + + def by_browser(self): + """设置使用浏览器下载文件""" + if not self._page._has_driver: + raise RuntimeError('浏览器未连接。') + + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, + downloadPath=self._page.download_path) + self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser + + except CallMethodError: + warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') + self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) + self._page.driver.Page.downloadWillBegin = self._download_by_browser + + self._behavior = 'allow' + + def by_DownloadKit(self): + """设置使用DownloadKit下载文件""" + if self._page._has_driver: + try: + self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) + self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit + except CallMethodError: + raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') + + self._behavior = 'deny' diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index 1621fd9..bc4eb58 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -12,13 +12,13 @@ from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage +from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter from .chromium_tab import WebPageTab from .configs.chromium_options import ChromiumOptions +from .configs.driver_options import DriverOptions from .configs.session_options import SessionOptions from .session_element import SessionElement -from .session_page import SessionPage -from .setter import WebPageSetter +from .session_page import SessionPage, SessionPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): @@ -26,15 +26,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __init__(self, mode: str = 'd', timeout: float = None, - driver_or_options: Union[ChromiumDriver, ChromiumOptions, bool] = None, + driver_or_options: Union[ChromiumDriver, ChromiumOptions, DriverOptions, bool] = None, session_or_options: Union[Session, SessionOptions, bool] = None) -> None: self._mode: str = ... self._has_driver: bool = ... self._has_session: bool = ... self.address: str = ... self._session_options: Union[SessionOptions, None] = ... - self._driver_options: Union[ChromiumOptions, None] = ... - self._DownloadKit: DownloadKit = ... + self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ... + self._download_set: WebPageDownloadSetter = ... self._download_path: str = ... self._tab_obj: ChromiumDriver = ... @@ -67,9 +67,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def cookies(self) -> dict: ... - @property - def user_agent(self) -> str: ... - @property def session(self) -> Session: ... @@ -82,6 +79,12 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @timeout.setter def timeout(self, second: float) -> None: ... + @property + def download_path(self) -> str: ... + + @property + def download_set(self) -> WebPageDownloadSetter: ... + def get(self, url: str, show_errmsg: bool = False, @@ -126,6 +129,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def get_tab(self, tab_id: str = None) -> WebPageTab: ... + def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... + def close_driver(self) -> None: ... def close_session(self) -> None: ... @@ -151,6 +156,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): verify: Any | None = ..., cert: Any | None = ...) -> bool: ... + @property + def download(self) -> DownloadKit: ... + @property def set(self) -> WebPageSetter: ... @@ -159,7 +167,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ Union[ChromiumElement, str, ChromiumFrame]]]: ... - def _set_start_options(self, dr_opt: Union[ChromiumDriver, bool, None], + def _set_start_options(self, dr_opt: Union[ChromiumDriver, DriverOptions, bool, None], se_opt: Union[Session, SessionOptions, bool, None]) -> None: ... def quit(self) -> None: ... @@ -167,3 +175,31 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _on_download_begin(self, **kwargs): ... +class WebPageSetter(ChromiumPageSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumPageSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def cookies(self, cookies) -> None: ... + + +class WebPageDownloadSetter(ChromiumDownloadSetter): + def __init__(self, page: WebPage): + self._page: WebPage = ... + self._behavior: str = ... + self._session: Session = None + + @property + def _switched_DownloadKit(self) -> DownloadKit: ... + + def save_path(self, path) -> None: ... + + def by_browser(self) -> None: ... + + def by_DownloadKit(self) -> None: ... + + def _download_by_DownloadKit(self, **kwargs) -> None: ... diff --git a/requirements.txt b/requirements.txt index 4e712ad..d04ff8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ requests lxml cssselect -DownloadKit>=1.0.0 +DownloadKit>=0.5.3 FlowViewer>=0.3.0 websocket-client click diff --git a/setup.py b/setup.py index ab09e66..d621cb6 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.3.0", + version="3.2.30", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -22,7 +22,7 @@ setup( 'lxml', 'requests', 'cssselect', - 'DownloadKit>=1.0.0', + 'DownloadKit>=0.5.3', 'FlowViewer>=0.3.0', 'websocket-client', 'click', From ca847a4c58c428c5d5cf7d0dad716e0bf5327133 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 11 Sep 2023 20:32:37 +0800 Subject: [PATCH 10/17] 3.2.32 --- DrissionPage/chromium_base.py | 1 + DrissionPage/chromium_driver.py | 3 +++ DrissionPage/chromium_element.py | 6 ++---- DrissionPage/chromium_page.py | 14 ++++++++++---- DrissionPage/commons/browser.py | 16 +++++++++++++--- DrissionPage/easy_set.py | 14 +++++++++++++- DrissionPage/web_page.py | 3 +-- setup.py | 2 +- 8 files changed, 44 insertions(+), 15 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index b23cd8d..83699f8 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -905,6 +905,7 @@ class ChromiumBaseSetter(object): """ if implicit is not None: self._page.timeouts.implicit = implicit + self._page._timeout = implicit if page_load is not None: self._page.timeouts.page_load = page_load diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index fbe08e1..a9dca5d 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -108,6 +108,9 @@ class ChromiumDriver(object): continue + except Exception: + return None + finally: self.method_results.pop(message['id'], None) diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 3367380..d9f507c 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -121,8 +121,8 @@ class ChromiumElement(DrissionElement): @property def size(self): """返回元素宽和高组成的元组""" - model = self.page.run_cdp('DOM.getBoxModel', backendNodeId=self._backend_id)['model'] - return model['width'], model['height'] + border = self.page.run_cdp('DOM.getBoxModel', backendNodeId=self._backend_id)['model']['border'] + return int(border[2] - border[0]), int(border[5] - border[1]) @property def set(self): @@ -515,8 +515,6 @@ class ChromiumElement(DrissionElement): while not self.run_js(js) and perf_counter() < end_time: sleep(.1) - self.scroll.to_see(center=True) - sleep(1) left, top = self.location width, height = self.size left_top = (left, top) diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 6b9ff62..240d776 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -15,7 +15,6 @@ from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBa from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser -from .commons.tools import port_is_using from .commons.web import set_session_cookies from .configs.chromium_options import ChromiumOptions from .errors import CallMethodError, BrowserConnectError @@ -356,9 +355,16 @@ class ChromiumPage(ChromiumBase): """关闭浏览器""" self._tab_obj.Browser.close() self._tab_obj.stop() - ip, port = self.address.split(':') - while port_is_using(ip, port): - sleep(.1) + + if self.process_id: + from os import popen + from platform import system + txt = f'tasklist | findstr {self.process_id}' if system().lower() == 'windows' \ + else f'ps -ef | grep {self.process_id}' + while True: + p = popen(txt) + if f' {self.process_id} ' not in p.read(): + break def _on_alert_close(self, **kwargs): """alert关闭时触发的方法""" diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 73349aa..5c4bf4e 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -5,9 +5,10 @@ """ from json import load, dump from pathlib import Path -from subprocess import Popen +from subprocess import Popen, DEVNULL from tempfile import gettempdir from time import perf_counter, sleep +from platform import system from requests import get as requests_get @@ -63,6 +64,7 @@ def get_launch_args(opt): result = set() has_user_path = False remote_allow = False + headless = False for i in opt.arguments: if i.startswith(('--load-extension=', '--remote-debugging-port=')): continue @@ -72,6 +74,8 @@ def get_launch_args(opt): continue elif i.startswith('--remote-allow-origins='): remote_allow = True + elif i.startswith('--headless'): + headless = True result.add(i) @@ -84,6 +88,12 @@ def get_launch_args(opt): if not remote_allow: result.add('--remote-allow-origins=*') + if not headless and system().lower() == 'linux': + from os import popen + r = popen('systemctl list-units | grep graphical.target') + if 'graphical.target' not in r.read(): + result.add('--headless=new') + result = list(result) # ----------处理插件extensions------------- @@ -167,7 +177,7 @@ def test_connect(ip, port): def _run_browser(port, path: str, args) -> Popen: """创建chrome进程 :param port: 端口号 - :param path: 浏览器地址 + :param path: 浏览器路径 :param args: 启动参数 :return: 进程对象 """ @@ -176,7 +186,7 @@ def _run_browser(port, path: str, args) -> Popen: arguments = [p, f'--remote-debugging-port={port}'] arguments.extend(args) try: - return Popen(arguments, shell=False) + return Popen(arguments, shell=False, stdout=DEVNULL, stderr=DEVNULL) except FileNotFoundError: raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。') diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 98e1260..2876e0c 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -314,7 +314,19 @@ def get_chrome_path(ini_path=None, return str(path) from platform import system - if system().lower() != 'windows': + sys = system().lower() + if sys in ('macos', 'darwin'): + return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' + + elif sys == 'linux': + paths = ('/usr/bin/google-chrome', '/opt/google/chrome/google-chrome', + '/user/lib/chromium-browser/chromium-browser') + for p in paths: + if Path(p).exists(): + return p + return None + + elif sys != 'windows': return None # -----------从注册表中获取-------------- diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 71dc778..cee4c34 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -447,8 +447,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._response = None self._has_session = None if self._has_driver: - self._tab_obj.Browser.close() - self._tab_obj.stop() + super(SessionPage, self).quit() self._tab_obj = None self._has_driver = None diff --git a/setup.py b/setup.py index cccddb0..4aef9a5 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.31", + version="3.2.32", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", From 640059a44528cd0fdd0338bdb0614491924b75dc Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 21 Sep 2023 23:41:42 +0800 Subject: [PATCH 11/17] =?UTF-8?q?=E5=85=83=E7=B4=A0=E5=AF=B9=E8=B1=A1?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0stats.is=5Fwhole=5Fin=5Fviewport=E5=B1=9E?= =?UTF-8?q?=E6=80=A7=EF=BC=9B=E4=BF=AE=E5=A4=8D=E5=85=83=E7=B4=A0=E6=88=AA?= =?UTF-8?q?=E5=9B=BE=E8=A7=86=E5=8F=A3=E5=A4=96=E7=A9=BA=E7=99=BD=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 20 ++++++++++++++++---- DrissionPage/chromium_element.py | 9 +++++++++ DrissionPage/chromium_element.pyi | 3 +++ DrissionPage/commons/web.py | 2 +- DrissionPage/commons/web.pyi | 4 ++-- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 83699f8..8a30804 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -19,7 +19,7 @@ from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chro from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder -from .commons.web import set_browser_cookies, ResponseData +from .commons.web import set_browser_cookies, ResponseData, location_in_viewport from .errors import ContextLossError, ElementLossError, AlertExistsError, CallMethodError, TabClosedError, \ NoRectError, BrowserConnectError from .session_element import make_session_ele @@ -823,7 +823,7 @@ class ChromiumBase(BasePage): pic_type = 'png' else: if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes elif as_base64: @@ -831,7 +831,7 @@ class ChromiumBase(BasePage): pic_type = 'png' else: if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 else: @@ -853,9 +853,21 @@ class ChromiumBase(BasePage): x, y = left_top w = right_bottom[0] - x h = right_bottom[1] - y + v = not (location_in_viewport(self, x, y) and + location_in_viewport(self, right_bottom[0], right_bottom[1])) + + if v: + shu = self.run_js('return document.body.scrollHeight > window.innerHeight;') + heng = self.run_js('return document.body.scrollWidth > window.innerWidth;') + if shu and not heng: + x += 10 + # elif heng and not shu: + # y += 5 + vp = {'x': x, 'y': y, 'width': w, 'height': h, 'scale': 1} png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, - captureBeyondViewport=False, clip=vp)['data'] + captureBeyondViewport=v, clip=vp)['data'] + else: png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type)['data'] diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index d9f507c..f40400d 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -515,6 +515,7 @@ class ChromiumElement(DrissionElement): while not self.run_js(js) and perf_counter() < end_time: sleep(.1) + self.scroll.to_see(True) left, top = self.location width, height = self.size left_top = (left, top) @@ -1420,6 +1421,14 @@ class ChromiumElementStates(object): x, y = self._ele.locations.click_point return location_in_viewport(self._ele.page, x, y) if x else False + @property + def is_whole_in_viewport(self): + """返回元素是否整个都在视口内""" + x1, y1 = self._ele.location + w, h = self._ele.size + x2, y2 = x1 + w, y1 + h + return location_in_viewport(self._ele.page, x1, y1) and location_in_viewport(self._ele.page, x2, y2) + @property def is_covered(self): """返回元素是否被覆盖,与是否在视口中无关""" diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 37de21f..fb85d6b 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -230,6 +230,9 @@ class ChromiumElementStates(object): @property def is_in_viewport(self) -> bool: ... + @property + def is_whole_in_viewport(self) -> bool: ... + @property def is_covered(self) -> bool: ... diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index e5aabd9..e6a24e6 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -180,7 +180,7 @@ def location_in_viewport(page, loc_x, loc_y): :param page: ChromePage对象 :param loc_x: 页面绝对坐标x :param loc_y: 页面绝对坐标y - :return: + :return: bool """ js = f'''function(){{var x = {loc_x}; var y = {loc_y}; const scrollLeft = document.documentElement.scrollLeft; diff --git a/DrissionPage/commons/web.pyi b/DrissionPage/commons/web.pyi index b57ed66..6e22253 100644 --- a/DrissionPage/commons/web.pyi +++ b/DrissionPage/commons/web.pyi @@ -63,7 +63,7 @@ class ResponseData(object): def requestHeaders(self) -> Union[CaseInsensitiveDict, None]: ... @requestHeaders.setter - def requestHeaders(self, val:dict) -> None: ... + def requestHeaders(self, val: dict) -> None: ... @property def postData(self) -> Union[dict, str, None]: ... @@ -81,7 +81,7 @@ def get_ele_txt(e: DrissionElement) -> str: ... def format_html(text: str) -> str: ... -def location_in_viewport(page, loc_x: int, loc_y: int) -> bool: ... +def location_in_viewport(page: ChromiumBase, loc_x: int, loc_y: int) -> bool: ... def offset_scroll(ele: ChromiumElement, offset_x: int, offset_y: int) -> tuple: ... From 56dac7abd7da12d7609be067528f773bd34834f0 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 22 Sep 2023 18:00:13 +0800 Subject: [PATCH 12/17] =?UTF-8?q?3.2.33=E4=BF=AE=E5=A4=8D=E5=BC=82?= =?UTF-8?q?=E5=9F=9Fiframe=E5=86=85=E5=85=83=E7=B4=A0=E6=88=AA=E5=9B=BE?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=9B=E5=85=83=E7=B4=A0=E5=AF=B9=E8=B1=A1?= =?UTF-8?q?=E5=88=A0=E9=99=A4stats.is=5Fwhole=5Fin=5Fviewport=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 5 ++--- DrissionPage/chromium_element.py | 8 -------- DrissionPage/chromium_element.pyi | 3 --- DrissionPage/chromium_frame.py | 13 +++++++++---- DrissionPage/chromium_frame.pyi | 1 - setup.py | 2 +- 6 files changed, 12 insertions(+), 20 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 8a30804..2575609 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -857,9 +857,8 @@ class ChromiumBase(BasePage): location_in_viewport(self, right_bottom[0], right_bottom[1])) if v: - shu = self.run_js('return document.body.scrollHeight > window.innerHeight;') - heng = self.run_js('return document.body.scrollWidth > window.innerWidth;') - if shu and not heng: + if (self.run_js('return document.body.scrollHeight > window.innerHeight;') and + not self.run_js('return document.body.scrollWidth > window.innerWidth;')): x += 10 # elif heng and not shu: # y += 5 diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index f40400d..f34b02e 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1421,14 +1421,6 @@ class ChromiumElementStates(object): x, y = self._ele.locations.click_point return location_in_viewport(self._ele.page, x, y) if x else False - @property - def is_whole_in_viewport(self): - """返回元素是否整个都在视口内""" - x1, y1 = self._ele.location - w, h = self._ele.size - x2, y2 = x1 + w, y1 + h - return location_in_viewport(self._ele.page, x1, y1) and location_in_viewport(self._ele.page, x2, y2) - @property def is_covered(self): """返回元素是否被覆盖,与是否在视口中无关""" diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index fb85d6b..37de21f 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -230,9 +230,6 @@ class ChromiumElementStates(object): @property def is_in_viewport(self) -> bool: ... - @property - def is_whole_in_viewport(self) -> bool: ... - @property def is_covered(self) -> bool: ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 374df89..b6b401c 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -485,7 +485,7 @@ class ChromiumFrame(ChromiumBase): pic_type = 'png' else: if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes elif as_base64: @@ -493,7 +493,7 @@ class ChromiumFrame(ChromiumBase): pic_type = 'png' else: if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + raise TypeError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 else: @@ -522,11 +522,16 @@ class ChromiumFrame(ChromiumBase): arguments[0].insertBefore(img, this); return img;''' new_ele = first_child.run_js(js, body) - new_ele.scroll.to_see(True) + new_ele.scroll.to_see(center=True) top = int(self.frame_ele.style('border-top').split('px')[0]) left = int(self.frame_ele.style('border-left').split('px')[0]) + + r = self.page.run_cdp('Page.getLayoutMetrics')['visualViewport'] + sx = r['pageX'] + sy = r['pageY'] r = self.page.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, - left_top=(cx + left, cy + top), right_bottom=(cx + w + left, cy + h + top)) + left_top=(cx + left + sx, cy + top + sy), + right_bottom=(cx + w + left + sx, cy + h + top + sy)) self.page.remove_ele(new_ele) return r diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 5326e53..d8232f7 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -17,7 +17,6 @@ class ChromiumFrame(ChromiumBase): self.frame_id: str = ... self._frame_ele: ChromiumElement = ... self._backend_id: str = ... - self.frame_page: ChromiumBase = ... self._doc_ele: ChromiumElement = ... self._is_diff_domain: bool = ... self.doc_ele: ChromiumElement = ... diff --git a/setup.py b/setup.py index 4aef9a5..d6e1e55 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.32", + version="3.2.33", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", From f9925e01fe5ac8ebe570500bc438a166c0e57097 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 16 Oct 2023 06:40:36 +0000 Subject: [PATCH 13/17] add .gitee/ISSUE_TEMPLATE.zh-CN.md. --- .gitee/ISSUE_TEMPLATE.zh-CN.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitee/ISSUE_TEMPLATE.zh-CN.md diff --git a/.gitee/ISSUE_TEMPLATE.zh-CN.md b/.gitee/ISSUE_TEMPLATE.zh-CN.md new file mode 100644 index 0000000..1c9e53c --- /dev/null +++ b/.gitee/ISSUE_TEMPLATE.zh-CN.md @@ -0,0 +1,3 @@ +- 使用上的问题请先查看文档[使用文档](http://g1879.gitee.io/drissionpagedocs) +- 遇到bug请详细描述如何重现,并附上代码 +- 提问前先给本库打个星,谢谢 \ No newline at end of file From f991e3dd81cfb501a68ec3d902e2572cb4469a1e Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 23 Oct 2023 17:53:25 +0800 Subject: [PATCH 14/17] =?UTF-8?q?3.2.34=E6=B5=8F=E8=A7=88=E5=99=A8?= =?UTF-8?q?=E6=9C=80=E5=B0=8F=E5=8C=96=E6=97=B6=E4=B9=9F=E8=83=BD=E5=93=8D?= =?UTF-8?q?=E5=BA=94=E6=A8=A1=E6=8B=9F=E6=93=8D=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 1 + README.md | 8 ++++---- setup.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 2575609..368af2a 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -100,6 +100,7 @@ class ChromiumBase(BasePage): self._tab_obj.start() self._tab_obj.DOM.enable() self._tab_obj.Page.enable() + self._tab_obj.call_method('Emulation.setFocusEmulationEnabled', enabled=True) self._tab_obj.Page.frameStoppedLoading = self._onFrameStoppedLoading self._tab_obj.Page.frameStartedLoading = self._onFrameStartedLoading diff --git a/README.md b/README.md index 0c1b297..d3378ff 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ python 版本:3.6 及以上 --- -**📖 使用文档:** [点击查看](http://g1879.gitee.io/drissionpagedocs) +**📖 使用文档:** [点击查看](https://g1879.gitee.io/drissionpagedocs) **交流 QQ 群:** 897838127[已满]、558778073 @@ -36,7 +36,7 @@ python 版本:3.6 及以上 # 🔥 新版预告 -查看下一步开发计划:[新版预告](http://g1879.gitee.io/drissionpagedocs/whatsnew/3_3/) +查看下一步开发计划:[新版预告](https://g1879.gitee.io/drissionpagedocs/whatsnew/3_3/) --- @@ -112,13 +112,13 @@ python 版本:3.6 及以上 # 🛠 使用文档 -[点击跳转到使用文档](http://g1879.gitee.io/drissionpage) +[点击跳转到使用文档](https://g1879.gitee.io/drissionpage) --- # 🔖 版本历史 -[点击查看版本历史](http://g1879.gitee.io/drissionpagedocs/history/3.x/) +[点击查看版本历史](https://g1879.gitee.io/drissionpagedocs/history/3.x/) --- diff --git a/setup.py b/setup.py index d6e1e55..3b50914 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.33", + version="3.2.34", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", From 38dcc88dfa25eeaad4e49c79e23a9c8dfc90195c Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 30 Nov 2023 14:48:50 +0800 Subject: [PATCH 15/17] =?UTF-8?q?3.2.35=E4=BF=AE=E5=A4=8D=E9=97=AE?= =?UTF-8?q?=E9=A2=98=EF=BC=88=E8=AF=A6=EF=BC=89=20tabs=E5=B1=9E=E6=80=A7?= =?UTF-8?q?=E5=BF=BD=E7=95=A5=E9=9A=90=E7=A7=81=E5=A3=B0=E6=98=8E=20?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=208x=20=E7=89=88=E6=B5=8F=E8=A7=88=E5=99=A8?= =?UTF-8?q?=E9=80=89=E6=8B=A9=E4=B8=8B=E6=8B=89=E5=88=97=E8=A1=A8=E6=97=B6?= =?UTF-8?q?=E6=8A=A5=E9=94=99=E9=97=AE=E9=A2=98=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E6=9F=90=E4=BA=9B=E6=83=85=E5=86=B5=E4=B8=8B=E4=B8=8B=E6=8B=89?= =?UTF-8?q?=E6=A1=86=E4=B8=8D=E8=A7=A6=E5=8F=91=E8=81=94=E5=8A=A8=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98=20=E4=BF=AE=E5=A4=8D=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=8D=9F=E5=9D=8F=E6=97=B6=E5=87=BA=E7=8E=B0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=20=E4=BF=AE=E5=A4=8Dget()=E6=96=B9?= =?UTF-8?q?=E6=B3=95url=E5=90=AB=E6=9F=90=E4=BA=9B=E7=89=B9=E6=AE=8A?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E6=97=B6=E8=BF=9E=E6=8E=A5=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/__init__.py | 2 ++ DrissionPage/base.py | 2 +- DrissionPage/chromium_element.py | 4 ++-- DrissionPage/chromium_page.py | 3 ++- DrissionPage/commons/browser.py | 15 +++++++------ DrissionPage/commons/cli.py | 4 ++-- DrissionPage/commons/constants.py | 2 +- DrissionPage/configs/chromium_options.py | 2 +- DrissionPage/configs/session_options.py | 2 +- DrissionPage/easy_set.py | 2 +- DrissionPage/mixpage/base.py | 4 ++-- DrissionPage/mixpage/drission.py | 12 +++++------ DrissionPage/mixpage/driver_element.py | 6 +++--- DrissionPage/mixpage/driver_page.py | 2 +- DrissionPage/mixpage/session_element.py | 4 ++-- DrissionPage/mixpage/session_page.py | 4 ++-- DrissionPage/mixpage/shadow_root_element.py | 2 +- README.md | 24 ++++++++++++--------- setup.py | 2 +- 19 files changed, 54 insertions(+), 44 deletions(-) diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 335c6a4..2f87351 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -19,3 +19,5 @@ try: from .configs.driver_options import DriverOptions except ModuleNotFoundError: pass + +__version__ = '3.2.35' diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 6800ca6..3bc3469 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -387,7 +387,7 @@ class BasePage(BaseParser): :param interval: 重试间隔 :return: 重试次数和间隔组成的tuple """ - self._url = quote(url, safe='/:&?=%;#@+![]') + self._url = quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index f34b02e..353182b 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1815,7 +1815,7 @@ class ChromiumSelect(object): @property def options(self): """返回所有选项元素组成的列表""" - return self._ele.eles('xpath://option') + return [e for e in self._ele.eles('xpath://option') if isinstance(e, ChromiumElement)] @property def selected_option(self): @@ -2020,7 +2020,7 @@ class ChromiumSelect(object): def _dispatch_change(self): """触发修改动作""" - self._ele.run_js('this.dispatchEvent(new UIEvent("change"));') + self._ele.run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') class ChromiumElementWaiter(object): diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 240d776..c654f95 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -131,7 +131,8 @@ class ChromiumPage(ChromiumBase): def tabs(self): """返回所有标签页id组成的列表""" j = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp - return [i['id'] for i in j if i['type'] == 'page'] + return [i['id'] for i in j if i['type'] == 'page' and not i['url'].startswith('devtools://') and i[ + 'url'] != 'chrome://privacy-sandbox-dialog/notice'] @property def main_tab(self): diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 5c4bf4e..368adb9 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -3,18 +3,18 @@ @Author : g1879 @Contact : g1879@qq.com """ -from json import load, dump +from json import load, dump, JSONDecodeError from pathlib import Path +from platform import system from subprocess import Popen, DEVNULL from tempfile import gettempdir from time import perf_counter, sleep -from platform import system from requests import get as requests_get -from DrissionPage.configs.chromium_options import ChromiumOptions -from DrissionPage.errors import BrowserConnectError from .tools import port_is_using +from ..configs.chromium_options import ChromiumOptions +from ..errors import BrowserConnectError def connect_browser(option): @@ -43,7 +43,7 @@ def connect_browser(option): # 传入的路径找不到,主动在ini文件、注册表、系统变量中找 except FileNotFoundError: - from DrissionPage.easy_set import get_chrome_path + from ..easy_set import get_chrome_path chrome_path = get_chrome_path(show_msg=False) if not chrome_path: @@ -136,7 +136,10 @@ def set_prefs(opt): f.write('{}') with open(prefs_file, "r", encoding='utf-8') as f: - prefs_dict = load(f) + try: + prefs_dict = load(f) + except JSONDecodeError: + prefs_dict = {} for pref in prefs: value = prefs[pref] diff --git a/DrissionPage/commons/cli.py b/DrissionPage/commons/cli.py index cc82107..f9507db 100644 --- a/DrissionPage/commons/cli.py +++ b/DrissionPage/commons/cli.py @@ -1,7 +1,7 @@ from click import command, option -from DrissionPage import ChromiumPage -from DrissionPage.easy_set import set_paths, configs_to_here as ch +from ..chromium_page import ChromiumPage +from ..easy_set import set_paths, configs_to_here as ch @command() diff --git a/DrissionPage/commons/constants.py b/DrissionPage/commons/constants.py index c06c2c4..612beb3 100644 --- a/DrissionPage/commons/constants.py +++ b/DrissionPage/commons/constants.py @@ -3,7 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ -from DrissionPage.errors import ElementNotFoundError +from ..errors import ElementNotFoundError HANDLE_ALERT_METHOD = 'Page.handleJavaScriptDialog' FRAME_ELEMENT = ('iframe', 'frame') diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py index f5d32b2..0ade900 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/configs/chromium_options.py @@ -6,8 +6,8 @@ from pathlib import Path from tempfile import gettempdir, TemporaryDirectory -from DrissionPage.commons.tools import port_is_using, clean_folder from .options_manage import OptionsManager +from ..commons.tools import port_is_using, clean_folder class ChromiumOptions(object): diff --git a/DrissionPage/configs/session_options.py b/DrissionPage/configs/session_options.py index eaa763b..01cc3d3 100644 --- a/DrissionPage/configs/session_options.py +++ b/DrissionPage/configs/session_options.py @@ -8,8 +8,8 @@ from pathlib import Path from requests import Session from requests.structures import CaseInsensitiveDict -from DrissionPage.commons.web import cookies_to_tuple, set_session_cookies from .options_manage import OptionsManager +from ..commons.web import cookies_to_tuple, set_session_cookies class SessionOptions(object): diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 2876e0c..b8defe1 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -16,7 +16,7 @@ from .session_page import SessionPage try: from selenium import webdriver - from DrissionPage.mixpage.drission import Drission + from .mixpage.drission import Drission from .configs.driver_options import DriverOptions except ModuleNotFoundError: pass diff --git a/DrissionPage/mixpage/base.py b/DrissionPage/mixpage/base.py index 1194326..d38527f 100644 --- a/DrissionPage/mixpage/base.py +++ b/DrissionPage/mixpage/base.py @@ -7,8 +7,8 @@ from abc import abstractmethod from re import sub from urllib.parse import quote -from DrissionPage.commons.web import format_html -from DrissionPage.commons.locator import get_loc +from ..commons.locator import get_loc +from ..commons.web import format_html class BaseParser(object): diff --git a/DrissionPage/mixpage/drission.py b/DrissionPage/mixpage/drission.py index c8a6f90..906fdda 100644 --- a/DrissionPage/mixpage/drission.py +++ b/DrissionPage/mixpage/drission.py @@ -14,11 +14,11 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from tldextract import extract -from DrissionPage.commons.tools import get_pid_from_port, get_exe_from_port -from DrissionPage.commons.browser import connect_browser -from DrissionPage.commons.web import cookies_to_tuple -from DrissionPage.configs.session_options import SessionOptions, session_options_to_dict -from DrissionPage.configs.driver_options import DriverOptions +from ..commons.browser import connect_browser +from ..commons.tools import get_pid_from_port, get_exe_from_port +from ..commons.web import cookies_to_tuple +from ..configs.driver_options import DriverOptions +from ..configs.session_options import SessionOptions, session_options_to_dict class Drission(object): @@ -399,7 +399,7 @@ def create_driver(chrome_path, driver_path, options): # 若版本不对,获取对应 chromedriver 再试 except (WebDriverException, SessionNotCreatedException): print('打开失败,尝试获取driver。\n') - from DrissionPage.easy_set import get_match_driver, get_chrome_path + from ..easy_set import get_match_driver, get_chrome_path if chrome_path == 'chrome': chrome_path = get_chrome_path(show_msg=False, from_ini=False) diff --git a/DrissionPage/mixpage/driver_element.py b/DrissionPage/mixpage/driver_element.py index d083961..e7c730c 100644 --- a/DrissionPage/mixpage/driver_element.py +++ b/DrissionPage/mixpage/driver_element.py @@ -15,10 +15,10 @@ from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.support.wait import WebDriverWait from .base import DrissionElement, BaseElement -from DrissionPage.commons.locator import str_to_loc, get_loc -from DrissionPage.commons.tools import get_usable_path -from DrissionPage.commons.web import format_html, get_ele_txt from .session_element import make_session_ele +from ..commons.locator import str_to_loc, get_loc +from ..commons.tools import get_usable_path +from ..commons.web import format_html, get_ele_txt class DriverElement(DrissionElement): diff --git a/DrissionPage/mixpage/driver_page.py b/DrissionPage/mixpage/driver_page.py index 2f9fcfd..0779b4b 100644 --- a/DrissionPage/mixpage/driver_page.py +++ b/DrissionPage/mixpage/driver_page.py @@ -13,9 +13,9 @@ from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support.wait import WebDriverWait from .base import BasePage -from DrissionPage.commons.tools import get_usable_path from .driver_element import DriverElement, make_driver_ele, Scroll, ElementWaiter from .session_element import make_session_ele +from ..commons.tools import get_usable_path class DriverPage(BasePage): diff --git a/DrissionPage/mixpage/session_element.py b/DrissionPage/mixpage/session_element.py index 39b5232..a2eb6d0 100644 --- a/DrissionPage/mixpage/session_element.py +++ b/DrissionPage/mixpage/session_element.py @@ -10,8 +10,8 @@ from lxml.etree import tostring from lxml.html import HtmlElement, fromstring from .base import DrissionElement, BasePage, BaseElement -from DrissionPage.commons.locator import get_loc -from DrissionPage.commons.web import get_ele_txt, make_absolute_link +from ..commons.locator import get_loc +from ..commons.web import get_ele_txt, make_absolute_link class SessionElement(DrissionElement): diff --git a/DrissionPage/mixpage/session_page.py b/DrissionPage/mixpage/session_page.py index 1ca8c2c..b866346 100644 --- a/DrissionPage/mixpage/session_page.py +++ b/DrissionPage/mixpage/session_page.py @@ -13,9 +13,9 @@ from requests.structures import CaseInsensitiveDict from tldextract import extract from .base import BasePage -from DrissionPage.configs.session_options import SessionOptions -from DrissionPage.commons.web import cookie_to_dict, set_session_cookies from .session_element import SessionElement, make_session_ele +from ..commons.web import cookie_to_dict, set_session_cookies +from ..configs.session_options import SessionOptions class SessionPage(BasePage): diff --git a/DrissionPage/mixpage/shadow_root_element.py b/DrissionPage/mixpage/shadow_root_element.py index 1ce0860..227d39e 100644 --- a/DrissionPage/mixpage/shadow_root_element.py +++ b/DrissionPage/mixpage/shadow_root_element.py @@ -9,9 +9,9 @@ from typing import Union from selenium.webdriver.remote.webelement import WebElement from .base import BaseElement -from DrissionPage.commons.locator import get_loc from .driver_element import make_driver_ele from .session_element import make_session_ele, SessionElement +from ..commons.locator import get_loc class ShadowRootElement(BaseElement): diff --git a/README.md b/README.md index d3378ff..022a82c 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,23 @@ python 版本:3.6 及以上 **📖 使用文档:** [点击查看](https://g1879.gitee.io/drissionpagedocs) -**交流 QQ 群:** 897838127[已满]、558778073 +**交流 QQ 群:** 636361957 --- -# 🔥 新版预告 +# 🔥 新版尝鲜 -查看下一步开发计划:[新版预告](https://g1879.gitee.io/drissionpagedocs/whatsnew/3_3/) +4.0 在 3.x 的基础上对底层进行了大幅重构,新增大量功能,改善运行效率和稳定性,优化项目结构,解决很多存在的问题。对比旧版本有质的提高。 + +现已发布 beta 版,欢迎尝鲜。 + +[4.0功能介绍](https://g1879.gitee.io/drissionpagedocs/whatsnew/4_0/) + +安装(目前是b14,关注文档,可能会有更新版本): + +```console +pip install DrissionPage==4.0.0b14 +``` --- @@ -108,17 +118,11 @@ python 版本:3.6 及以上 - 还有很多细节,这里不一一列举,欢迎实际使用中体验:) ---- - -# 🛠 使用文档 - -[点击跳转到使用文档](https://g1879.gitee.io/drissionpage) - --- # 🔖 版本历史 -[点击查看版本历史](https://g1879.gitee.io/drissionpagedocs/history/3.x/) +[点击查看版本历史](https://g1879.gitee.io/drissionpagedocs/history/introduction/) --- diff --git a/setup.py b/setup.py index 3b50914..dcaef44 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.34", + version="3.2.35", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", From adcc564997fe02961fe499aad49badec31a79641 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 30 Nov 2023 14:54:49 +0800 Subject: [PATCH 16/17] =?UTF-8?q?3.2.35=E4=BF=AE=E5=A4=8D=E9=97=AE?= =?UTF-8?q?=E9=A2=98=EF=BC=88=E8=AF=A6=EF=BC=89=20=E6=8E=A5=E7=AE=A1?= =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E6=97=A0=E9=A1=BB'--remote-allow-or?= =?UTF-8?q?igins=3D*'=E5=8F=82=E6=95=B0=20tabs=E5=B1=9E=E6=80=A7=E5=BF=BD?= =?UTF-8?q?=E7=95=A5=E9=9A=90=E7=A7=81=E5=A3=B0=E6=98=8E=20=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=208x=20=E7=89=88=E6=B5=8F=E8=A7=88=E5=99=A8=E9=80=89?= =?UTF-8?q?=E6=8B=A9=E4=B8=8B=E6=8B=89=E5=88=97=E8=A1=A8=E6=97=B6=E6=8A=A5?= =?UTF-8?q?=E9=94=99=E9=97=AE=E9=A2=98=20=E4=BF=AE=E5=A4=8D=E6=9F=90?= =?UTF-8?q?=E4=BA=9B=E6=83=85=E5=86=B5=E4=B8=8B=E4=B8=8B=E6=8B=89=E6=A1=86?= =?UTF-8?q?=E4=B8=8D=E8=A7=A6=E5=8F=91=E8=81=94=E5=8A=A8=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98=20=E4=BF=AE=E5=A4=8D=E9=85=8D=E7=BD=AE=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E6=8D=9F=E5=9D=8F=E6=97=B6=E5=87=BA=E7=8E=B0=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98=20=E4=BF=AE=E5=A4=8Dget()=E6=96=B9=E6=B3=95u?= =?UTF-8?q?rl=E5=90=AB=E6=9F=90=E4=BA=9B=E7=89=B9=E6=AE=8A=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=E6=97=B6=E8=BF=9E=E6=8E=A5=E5=A4=B1=E8=B4=A5=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_driver.py | 3 ++- DrissionPage/commons/browser.py | 6 ------ DrissionPage/configs/configs.ini | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index a9dca5d..f1a09a3 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -200,7 +200,8 @@ class ChromiumDriver(object): self._started = True self.status = self._STARTED_ self._stopped.clear() - self._ws = create_connection(self._websocket_url, enable_multithread=True) + self._ws = create_connection(self._websocket_url, enable_multithread=True, + suppress_origin=True) self._recv_th.start() self._handle_event_th.start() return True diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 368adb9..9a7cbec 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -63,7 +63,6 @@ def get_launch_args(opt): # ----------处理arguments----------- result = set() has_user_path = False - remote_allow = False headless = False for i in opt.arguments: if i.startswith(('--load-extension=', '--remote-debugging-port=')): @@ -72,8 +71,6 @@ def get_launch_args(opt): result.add(f'--user-data-dir={Path(i[16:]).absolute()}') has_user_path = True continue - elif i.startswith('--remote-allow-origins='): - remote_allow = True elif i.startswith('--headless'): headless = True @@ -85,9 +82,6 @@ def get_launch_args(opt): path.mkdir(parents=True, exist_ok=True) result.add(f'--user-data-dir={path}') - if not remote_allow: - result.add('--remote-allow-origins=*') - if not headless and system().lower() == 'linux': from os import popen r = popen('systemctl list-units | grep graphical.target') diff --git a/DrissionPage/configs/configs.ini b/DrissionPage/configs/configs.ini index 9a5ad35..ab4c0a1 100644 --- a/DrissionPage/configs/configs.ini +++ b/DrissionPage/configs/configs.ini @@ -4,7 +4,7 @@ download_path = [chrome_options] debugger_address = 127.0.0.1:9222 binary_location = chrome -arguments = ['--remote-allow-origins=*', '--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking'] +arguments = ['--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking'] extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}} page_load_strategy = normal From cf4ba9cda97fb05f288c4b5a5cdc1c322184dd5a Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 8 Jan 2024 20:24:45 +0800 Subject: [PATCH 17/17] 4.0.1 --- DrissionPage/mixpage/base.py | 324 ----- DrissionPage/mixpage/base.pyi | 175 --- DrissionPage/mixpage/drission.py | 458 ------- DrissionPage/mixpage/drission.pyi | 96 -- DrissionPage/mixpage/driver_element.py | 1264 ------------------ DrissionPage/mixpage/driver_element.pyi | 326 ----- DrissionPage/mixpage/driver_page.py | 611 --------- DrissionPage/mixpage/driver_page.pyi | 189 --- DrissionPage/mixpage/mix_page.py | 344 ----- DrissionPage/mixpage/mix_page.pyi | 156 --- DrissionPage/mixpage/session_element.py | 357 ----- DrissionPage/mixpage/session_element.pyi | 114 -- DrissionPage/mixpage/session_page.py | 533 -------- DrissionPage/mixpage/session_page.pyi | 237 ---- DrissionPage/mixpage/shadow_root_element.py | 219 --- DrissionPage/mixpage/shadow_root_element.pyi | 84 -- 16 files changed, 5487 deletions(-) delete mode 100644 DrissionPage/mixpage/base.py delete mode 100644 DrissionPage/mixpage/base.pyi delete mode 100644 DrissionPage/mixpage/drission.py delete mode 100644 DrissionPage/mixpage/drission.pyi delete mode 100644 DrissionPage/mixpage/driver_element.py delete mode 100644 DrissionPage/mixpage/driver_element.pyi delete mode 100644 DrissionPage/mixpage/driver_page.py delete mode 100644 DrissionPage/mixpage/driver_page.pyi delete mode 100644 DrissionPage/mixpage/mix_page.py delete mode 100644 DrissionPage/mixpage/mix_page.pyi delete mode 100644 DrissionPage/mixpage/session_element.py delete mode 100644 DrissionPage/mixpage/session_element.pyi delete mode 100644 DrissionPage/mixpage/session_page.py delete mode 100644 DrissionPage/mixpage/session_page.pyi delete mode 100644 DrissionPage/mixpage/shadow_root_element.py delete mode 100644 DrissionPage/mixpage/shadow_root_element.pyi diff --git a/DrissionPage/mixpage/base.py b/DrissionPage/mixpage/base.py deleted file mode 100644 index d38527f..0000000 --- a/DrissionPage/mixpage/base.py +++ /dev/null @@ -1,324 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from abc import abstractmethod -from re import sub -from urllib.parse import quote - -from ..commons.locator import get_loc -from ..commons.web import format_html - - -class BaseParser(object): - """所有页面、元素类的基类""" - - def __call__(self, loc_or_str): - return self.ele(loc_or_str) - - def ele(self, loc_or_ele, timeout=None): - return self._ele(loc_or_ele, timeout, True) - - def eles(self, loc_or_str, timeout=None): - return self._ele(loc_or_str, timeout, False) - - # ----------------以下属性或方法待后代实现---------------- - @property - def html(self): - return '' - - def s_ele(self, loc_or_ele): - pass - - def s_eles(self, loc_or_str): - pass - - @abstractmethod - def _ele(self, loc_or_ele, timeout=None, single=True): - pass - - -class BaseElement(BaseParser): - """各元素类的基类""" - - def __init__(self, page=None): - self.page = page - - # ----------------以下属性或方法由后代实现---------------- - @property - def tag(self): - return - - @abstractmethod - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - pass - - def parent(self, level_or_loc=1): - pass - - def prev(self, index=1): - return None # ShadowRootElement直接继承 - - def prevs(self) -> None: - return None # ShadowRootElement直接继承 - - def next(self, index=1): - pass - - def nexts(self): - pass - - -class DrissionElement(BaseElement): - """DriverElement、ChromiumElement 和 SessionElement的基类 - 但不是ShadowRootElement的基类""" - - @property - def link(self): - """返回href或src绝对url""" - return self.attr('href') or self.attr('src') - - @property - def css_path(self): - """返回css path路径""" - return self._get_ele_path('css') - - @property - def xpath(self): - """返回xpath路径""" - return self._get_ele_path('xpath') - - @property - def comments(self): - """返回元素注释文本组成的列表""" - return self.eles('xpath:.//comment()') - - def texts(self, text_node_only=False): - """返回元素内所有直接子节点的文本,包括元素和文本节点 - :param text_node_only: 是否只返回文本节点 - :return: 文本列表 - """ - if text_node_only: - texts = self.eles('xpath:/text()') - else: - texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] - - return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - if isinstance(level_or_loc, int): - loc = f'xpath:./ancestor::*[{level_or_loc}]' - - elif isinstance(level_or_loc, (tuple, str)): - loc = get_loc(level_or_loc, True) - - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' - - else: - raise TypeError('level_or_loc参数只能是tuple、int或str。') - - return self._ele(loc, timeout=0, relative=True) - - def prev(self, index=1, filter_loc='', timeout=0): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout) - return nodes[-1] if nodes else None - - def next(self, index=1, filter_loc='', timeout=0): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout) - return nodes[0] if nodes else None - - def before(self, index=1, filter_loc='', timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout) - return nodes[-1] if nodes else None - - def after(self, index=1, filter_loc='', timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - nodes = self._get_brothers(index, filter_loc, 'following', False, timeout) - return nodes[0] if nodes else None - - def prevs(self, filter_loc='', timeout=0): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout) - - def nexts(self, filter_loc='', timeout=0): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='following', timeout=timeout) - - def befores(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', brother=False, timeout=timeout) - - def afters(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='following', brother=False, timeout=timeout) - - def _get_brothers(self, index=None, filter_loc='', direction='following', brother=True, timeout=.5): - """按要求返回兄弟元素或节点组成的列表 - :param index: 获取第几个,该参数不为None时只获取该编号的元素 - :param filter_loc: 用于筛选的查询语法 - :param direction: 'following' 或 'preceding',查找的方向 - :param brother: 查找范围,在同级查找还是整个dom前后查找 - :param timeout: 查找等待时间 - :return: DriverElement对象或字符串 - """ - if index is not None and index < 1: - raise ValueError('index必须大于等于1。') - - brother = '-sibling' if brother else '' - - if not filter_loc: - loc = '*' - - else: - loc = get_loc(filter_loc, True) # 把定位符转换为xpath - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = loc[1].lstrip('./') - - loc = f'xpath:./{direction}{brother}::{loc}' - - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) - nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] - - if nodes and index is not None: - index = index - 1 if direction == 'following' else -index - try: - return [nodes[index]] - except IndexError: - return [] - else: - return nodes - - # ----------------以下属性或方法由后代实现---------------- - @property - def attrs(self): - return - - @property - def text(self): - return - - @property - def raw_text(self): - return - - @abstractmethod - def attr(self, attr: str): - return '' - - def _get_ele_path(self, mode): - return '' - - -class BasePage(BaseParser): - """页面类的基类""" - - def __init__(self, timeout=None): - """初始化函数""" - self._url = None - self.timeout = timeout if timeout is not None else 10 - self.retry_times = 3 - self.retry_interval = 2 - self._url_available = None - - @property - def title(self): - """返回网页title""" - ele = self.ele('xpath://title') - return ele.text if ele else None - - @property - def timeout(self): - """返回查找元素时等待的秒数""" - return self._timeout - - @timeout.setter - def timeout(self, second): - """设置查找元素时等待的秒数""" - self._timeout = second - - @property - def cookies(self): - """返回cookies""" - return self.get_cookies(True) - - @property - def url_available(self): - """返回当前访问的url有效性""" - return self._url_available - - def _before_connect(self, url, retry, interval): - """连接前的准备 - :param url: 要访问的url - :param retry: 重试次数 - :param interval: 重试间隔 - :return: 重试次数和间隔组成的tuple - """ - self._url = quote(url, safe='/:&?=%;#@+!') - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - return retry, interval - - # ----------------以下属性或方法由后代实现---------------- - @property - def url(self): - return - - @property - def json(self): - return - - @abstractmethod - def get_cookies(self, as_dict=False): - return {} - - @abstractmethod - def get(self, url, show_errmsg=False, retry=None, interval=None): - pass diff --git a/DrissionPage/mixpage/base.pyi b/DrissionPage/mixpage/base.pyi deleted file mode 100644 index 1f12e80..0000000 --- a/DrissionPage/mixpage/base.pyi +++ /dev/null @@ -1,175 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from abc import abstractmethod -from typing import Union, Tuple, List - - -class BaseParser(object): - - def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... - - def ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], timeout: float = None): ... - - def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): ... - - # ----------------以下属性或方法待后代实现---------------- - @property - def html(self) -> str: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement]): ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... - - @abstractmethod - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True): ... - - -class BaseElement(BaseParser): - - def __init__(self, page: BasePage = None): - self.page: BasePage = ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def tag(self) -> str: ... - - @abstractmethod - def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, - relative: bool = False): ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... - - def prev(self, index: int = 1) -> None: ... - - def prevs(self) -> None: ... - - def next(self, index: int = 1): ... - - def nexts(self): ... - - -class DrissionElement(BaseElement): - - def __init__(self, - page: BasePage = ...): - self.page: BasePage = ... - - @property - def link(self) -> str: ... - - @property - def css_path(self) -> str: ... - - @property - def xpath(self) -> str: ... - - @property - def comments(self) -> list: ... - - def texts(self, text_node_only: bool = False) -> list: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... - - def prev(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union[DrissionElement, str, None]: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union[DrissionElement, str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union[DrissionElement, str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union[DrissionElement, str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union[DrissionElement, str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union[DrissionElement, str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union[DrissionElement, str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union[DrissionElement, str]]: ... - - def _get_brothers(self, - index: int = None, - filter_loc: Union[tuple, str] = '', - direction: str = 'following', - brother: bool = True, - timeout: float = 0.5) -> List[Union[DrissionElement, str]]: ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - @abstractmethod - def attr(self, attr: str) -> str: ... - - def _get_ele_path(self, mode) -> str: ... - - -class BasePage(BaseParser): - - def __init__(self, timeout: float = None): - self._url_available: bool = ... - self.retry_times: int = ... - self.retry_interval: float = ... - self._timeout = float = ... - - @property - def title(self) -> Union[str, None]: ... - - @property - def timeout(self) -> float: ... - - @timeout.setter - def timeout(self, second: float) -> None: ... - - @property - def cookies(self) -> dict: ... - - @property - def url_available(self) -> bool: ... - - def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def url(self) -> str: ... - - @property - def json(self) -> dict: ... - - @abstractmethod - def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... - - @abstractmethod - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None): ... diff --git a/DrissionPage/mixpage/drission.py b/DrissionPage/mixpage/drission.py deleted file mode 100644 index 906fdda..0000000 --- a/DrissionPage/mixpage/drission.py +++ /dev/null @@ -1,458 +0,0 @@ -# -*- encoding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from platform import system -from sys import exit - -from requests import Session -from requests.structures import CaseInsensitiveDict -from selenium import webdriver -from selenium.common.exceptions import SessionNotCreatedException, WebDriverException -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from tldextract import extract - -from ..commons.browser import connect_browser -from ..commons.tools import get_pid_from_port, get_exe_from_port -from ..commons.web import cookies_to_tuple -from ..configs.driver_options import DriverOptions -from ..configs.session_options import SessionOptions, session_options_to_dict - - -class Drission(object): - """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" - - def __init__(self, driver_or_options=None, session_or_options=None, ini_path=None, proxy=None): - """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象 - :param driver_or_options: driver对象或DriverOptions、Options类,传入False则创建空配置对象 - :param session_or_options: Session对象或设置字典,传入False则创建空配置对象 - :param ini_path: ini文件路径 - :param proxy: 代理设置 - """ - self._session = None - self._driver = None - self._session_options = None - self._driver_options = None - self._debugger = None - self._proxy = proxy - - # ------------------处理session options---------------------- - if session_or_options is None: - self._session_options = SessionOptions(ini_path=ini_path).as_dict() - - elif session_or_options is False: - self._session_options = SessionOptions(read_file=False).as_dict() - - elif isinstance(session_or_options, Session): - self._session = session_or_options - - elif isinstance(session_or_options, SessionOptions): - self._session_options = session_or_options.as_dict() - - elif isinstance(session_or_options, dict): - self._session_options = session_or_options - - else: - raise TypeError('session_or_options参数只能接收Session, dict, SessionOptions或False。') - - # ------------------处理driver options---------------------- - if driver_or_options is None: - self._driver_options = DriverOptions(ini_path=ini_path) - - elif driver_or_options is False: - self._driver_options = DriverOptions(read_file=False) - - elif isinstance(driver_or_options, RemoteWebDriver): - self._driver = driver_or_options - - elif isinstance(driver_or_options, (Options, DriverOptions)): - self._driver_options = driver_or_options - - else: - raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。') - - def __del__(self): - """关闭对象时关闭浏览器和Session""" - try: - self.close() - except ImportError: - pass - - @property - def session(self): - """返回Session对象,如未初始化则按配置信息创建""" - if self._session is None: - self._set_session(self._session_options) - - if self._proxy: - self._session.proxies = self._proxy - - return self._session - - @property - def driver(self): - """返回WebDriver对象,如未初始化则按配置信息创建。 - 如设置了本地调试浏览器,可自动接入或打开浏览器进程。 - """ - if self._driver is None: - if not self.driver_options.debugger_address and self._proxy: - self.driver_options.add_argument(f'--proxy-server={self._proxy["http"]}') - - driver_path = self.driver_options.driver_path or 'chromedriver' - chrome_path = self.driver_options.chrome_path - - # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程----------- - if self.driver_options.debugger_address: - # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径 - cp, self._debugger = connect_browser(self.driver_options) - - if cp in (None, 'chrome'): - system_type = system().lower() - ip, port = self.driver_options.debugger_address.split(':') - if ip not in ('127.0.0.1', 'localhost'): - chrome_path = None - elif chrome_path == 'chrome' and system_type == 'windows': - chrome_path = get_exe_from_port(port) - - # -----------创建WebDriver对象----------- - self._driver = create_driver(chrome_path, driver_path, self.driver_options) - - # -----------解决接管新版浏览器不能定位到正确的标签页的问题----------- - active_tab = self._driver.window_handles[0] - if active_tab != self._driver.current_window_handle: - self._driver.switch_to.window(active_tab) - - return self._driver - - @property - def driver_options(self): - """返回driver配置信息""" - return self._driver_options - - @property - def session_options(self): - """返回session配置信息""" - return self._session_options - - @session_options.setter - def session_options(self, options): - """设置session配置 - :param options: session配置字典 - :return: None - """ - self._session_options = session_options_to_dict(options) - self._set_session(self._session_options) - - @property - def proxy(self): - """返回代理信息""" - return self._proxy - - @proxy.setter - def proxy(self, proxies=None): - """设置代理信息 - :param proxies: 代理信息字典 - :return: None - """ - self._proxy = proxies - - if self._session: - self._session.proxies = proxies - - if self._driver: - cookies = self._driver.get_cookies() - url = self._driver.current_url - self._driver.quit() - self._driver = None - self._driver = self.driver - self._driver.get(url) - - for cookie in cookies: - self.set_cookies(cookie, set_driver=True) - - @property - def debugger_progress(self): - """调试浏览器进程""" - return self._debugger - - def kill_browser(self): - """关闭浏览器进程(如果可以)""" - pid = self.get_browser_progress_id() - if not kill_progress(pid): - self._driver.quit() - - def get_browser_progress_id(self): - """获取浏览器进程id""" - if self.debugger_progress: - return self.debugger_progress.pid - - address = str(self.driver_options.debugger_address).split(':') - if len(address) == 2: - ip, port = address - if ip not in ('127.0.0.1', 'localhost') or not port.isdigit(): - return None - - from os import popen - txt = '' - progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') - for progress in progresses: - if 'LISTENING' in progress: - txt = progress - break - if not txt: - return None - - return txt.split(' ')[-1] - - def hide_browser(self): - """隐藏浏览器界面""" - self._show_or_hide_browser() - - def show_browser(self): - """显示浏览器界面""" - self._show_or_hide_browser(False) - - def _show_or_hide_browser(self, hide=True): - if system().lower() != 'windows': - raise OSError('该方法只能在Windows系统使用。') - - try: - from win32gui import ShowWindow - from win32con import SW_HIDE, SW_SHOW - except ImportError: - raise ImportError('请先安装:pip install pypiwin32') - - pid = self.get_browser_progress_id() - if not pid: - print('只有设置了debugger_address参数才能使用 show_browser() 和 hide_browser()') - return - hds = get_chrome_hwnds_from_pid(pid) - sw = SW_HIDE if hide else SW_SHOW - for hd in hds: - ShowWindow(hd, sw) - - def set_cookies(self, cookies, set_session=False, set_driver=False): - """设置cookies - :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict - :param set_session: 是否设置session的cookies - :param set_driver: 是否设置driver的cookies - :return: None - """ - cookies = cookies_to_tuple(cookies) - - for cookie in cookies: - if cookie['value'] is None: - cookie['value'] = '' - - # 添加cookie到session - if set_session: - kwargs = {x: cookie[x] for x in cookie - if x.lower() not in ('name', 'value', 'httponly', 'expiry', 'samesite')} - - if 'expiry' in cookie: - kwargs['expires'] = cookie['expiry'] - - self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) - - # 添加cookie到driver - if set_driver: - if 'expiry' in cookie: - cookie['expiry'] = int(cookie['expiry']) - - try: - browser_domain = extract(self.driver.current_url).fqdn - except AttributeError: - browser_domain = '' - - if not cookie.get('domain', None): - if browser_domain: - url = extract(browser_domain) - cookie_domain = f'{url.domain}.{url.suffix}' - else: - raise ValueError('cookie中没有域名或浏览器未访问过URL。') - - cookie['domain'] = cookie_domain - - else: - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] - - if cookie_domain not in browser_domain: - self.driver.get(cookie_domain if cookie_domain.startswith('http://') - else f'http://{cookie_domain}') - - # 避免selenium自动添加.后无法正确覆盖已有cookie - if cookie['domain'][0] != '.': - c = self.driver.get_cookie(cookie['name']) - if c and c['domain'] == cookie['domain']: - self.driver.delete_cookie(cookie['name']) - - self.driver.add_cookie(cookie) - - def _set_session(self, data): - """根据传入字典对session进行设置 - :param data: session配置字典 - :return: None - """ - if self._session is None: - self._session = Session() - - if 'headers' in data: - self._session.headers = CaseInsensitiveDict(data['headers']) - if 'cookies' in data: - self.set_cookies(data['cookies'], set_session=True) - - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' - for i in attrs: - if i in data: - self._session.__setattr__(i, data[i]) - - def cookies_to_session(self, copy_user_agent=False): - """把driver对象的cookies复制到session对象 - :param copy_user_agent: 是否复制ua信息 - :return: None - """ - if copy_user_agent: - user_agent_to_session(self.driver, self.session) - - self.set_cookies(self.driver.get_cookies(), set_session=True) - - def cookies_to_driver(self, url): - """把session对象的cookies复制到driver对象 - :param url: 作用域 - :return: None - """ - browser_domain = extract(self.driver.current_url).fqdn - ex_url = extract(url) - - if ex_url.fqdn not in browser_domain: - self.driver.get(url) - - domain = f'{ex_url.domain}.{ex_url.suffix}' - - cookies = [] - for cookie in self.session.cookies: - if cookie.domain == '': - cookie.domain = domain - - if domain in cookie.domain: - cookies.append(cookie) - - self.set_cookies(cookies, set_driver=True) - - def close_driver(self, kill=False): - """关闭driver和浏览器""" - if self._driver: - kill_progress(port=self._driver.service.port) # 关闭chromedriver.exe进程 - - if kill: - self.kill_browser() - else: - self._driver.quit() - - self._driver = None - - def close_session(self): - """关闭session""" - if self._session: - self._session.close() - self._session = None - - def close(self): - """关闭session、driver和浏览器""" - if self._driver: - self.close_driver() - - if self._session: - self.close_session() - - -def user_agent_to_session(driver, session): - """把driver的user-agent复制到session - :param driver: 来源driver对象 - :param session: 目标session对象 - :return: None - """ - driver = driver - session = session - selenium_user_agent = driver.execute_script("return navigator.userAgent;") - session.headers.update({"User-Agent": selenium_user_agent}) - - -def create_driver(chrome_path, driver_path, options): - """创建 WebDriver 对象 - :param chrome_path: chrome.exe 路径 - :param driver_path: chromedriver.exe 路径 - :param options: Options 对象 - :return: WebDriver 对象 - """ - try: - debugger_address = options.debugger_address - if options.debugger_address: - options = Options() - options.debugger_address = debugger_address - - return webdriver.Chrome(driver_path, options=options) - - # 若版本不对,获取对应 chromedriver 再试 - except (WebDriverException, SessionNotCreatedException): - print('打开失败,尝试获取driver。\n') - from ..easy_set import get_match_driver, get_chrome_path - - if chrome_path == 'chrome': - chrome_path = get_chrome_path(show_msg=False, from_ini=False) - - if chrome_path: - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=True) - if driver_path: - try: - options.binary_location = chrome_path - return webdriver.Chrome(driver_path, options=options) - except Exception: - pass - - print('无法启动,请检查浏览器路径,或手动设置chromedriver。\n下载地址:http://npm.taobao.org/mirrors/chromedriver/') - exit(0) - - -def get_chrome_hwnds_from_pid(pid): - """通过PID查询句柄ID""" - try: - from win32gui import IsWindow, GetWindowText, EnumWindows - from win32process import GetWindowThreadProcessId - except ImportError: - raise ImportError('请先安装win32gui,pip install pypiwin32') - - def callback(hwnd, hds): - if IsWindow(hwnd) and '- Google Chrome' in GetWindowText(hwnd): - _, found_pid = GetWindowThreadProcessId(hwnd) - if str(found_pid) == str(pid): - hds.append(hwnd) - return True - - hwnds = [] - EnumWindows(callback, hwnds) - return hwnds - - -def kill_progress(pid=None, port=None): - """关闭浏览器进程 - :param pid: 进程id - :param port: 端口号,如没有进程id,从端口号获取 - :return: 是否成功 - """ - from os import popen - if system().lower() != 'windows': - return False - - pid = pid or get_pid_from_port(port) - if not pid: - return False - - if popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'): - popen(f'taskkill /pid {pid} /F') - return True - else: - return False \ No newline at end of file diff --git a/DrissionPage/mixpage/drission.pyi b/DrissionPage/mixpage/drission.pyi deleted file mode 100644 index 3079bca..0000000 --- a/DrissionPage/mixpage/drission.pyi +++ /dev/null @@ -1,96 +0,0 @@ -# -*- encoding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from subprocess import Popen -from typing import Union - -from requests import Session -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver - -from DrissionPage.configs.driver_options import DriverOptions -from DrissionPage.configs.session_options import SessionOptions - - -class Drission(object): - - def __init__(self, - driver_or_options: Union[RemoteWebDriver, Options, DriverOptions, bool] = None, - session_or_options: Union[Session, dict, SessionOptions, bool] = None, - ini_path: str = None, - proxy: dict = None): - self._session: Session = ... - self._session_options: dict = ... - self._proxy: dict = ... - self._driver: WebDriver = ... - self._debugger: Popen = ... - self._driver_options: DriverOptions = ... - - def __del__(self): ... - - @property - def session(self) -> Session: ... - - @property - def driver(self) -> WebDriver: ... - - @property - def driver_options(self) -> Union[DriverOptions, Options]: ... - - @property - def session_options(self) -> dict: ... - - @session_options.setter - def session_options(self, options: Union[dict, SessionOptions]) -> None: ... - - @property - def proxy(self) -> Union[None, dict]: ... - - @proxy.setter - def proxy(self, proxies: dict = None) -> None: ... - - @property - def debugger_progress(self): ... - - def kill_browser(self) -> None: ... - - def get_browser_progress_id(self) -> Union[str, None]: ... - - def hide_browser(self) -> None: ... - - def show_browser(self) -> None: ... - - def _show_or_hide_browser(self, hide: bool = True) -> None: ... - - def set_cookies(self, - cookies: Union[RequestsCookieJar, list, tuple, str, dict], - set_session: bool = False, - set_driver: bool = False) -> None: ... - - def _set_session(self, data: dict) -> None: ... - - def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... - - def cookies_to_driver(self, url: str) -> None: ... - - def close_driver(self, kill: bool = False) -> None: ... - - def close_session(self) -> None: ... - - def close(self) -> None: ... - - -def user_agent_to_session(driver: RemoteWebDriver, session: Session) -> None: ... - - -def create_driver(chrome_path: str, driver_path: str, options: Options) -> WebDriver: ... - - -def get_chrome_hwnds_from_pid(pid: str) -> list: ... - - -def kill_progress(pid: str = None, port: int = None) -> bool: ... diff --git a/DrissionPage/mixpage/driver_element.py b/DrissionPage/mixpage/driver_element.py deleted file mode 100644 index e7c730c..0000000 --- a/DrissionPage/mixpage/driver_element.py +++ /dev/null @@ -1,1264 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from os import sep -from pathlib import Path -from time import time, perf_counter, sleep - -from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException, \ - NoSuchElementException -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support import expected_conditions as ec -from selenium.webdriver.support.wait import WebDriverWait - -from .base import DrissionElement, BaseElement -from .session_element import make_session_ele -from ..commons.locator import str_to_loc, get_loc -from ..commons.tools import get_usable_path -from ..commons.web import format_html, get_ele_txt - - -class DriverElement(DrissionElement): - """driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能""" - - def __init__(self, ele, page=None): - """初始化对象 - :param ele: 被包装的WebElement元素 - :param page: 元素所在页面 - """ - super().__init__(page) - self._select = None - self._scroll = None - self._inner_ele = ele - - def __repr__(self): - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def inner_ele(self): - return self._inner_ele - - @property - def tag(self): - """返回元素类型""" - return self._inner_ele.tag_name.lower() - - @property - def html(self): - """返回元素outerHTML文本""" - return self.inner_ele.get_attribute('outerHTML') - - @property - def inner_html(self): - """返回元素innerHTML文本""" - return self.inner_ele.get_attribute('innerHTML') - - @property - def attrs(self): - """返回元素所有属性及值""" - js = ''' - var dom=arguments[0]; - var names="("; - var len = dom.attributes.length; - for(var i=0;i1){path = '/' + tag + '[' + nth + ']' + path;} - else{path = '/' + tag + path;}''' - txt5 = '''return path;''' - - elif mode == 'css': - txt1 = '' - # txt2 = '''return '#' + el.id + path;''' - txt3 = '' - txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' - txt5 = '''return path.substr(1);''' - - else: - raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") - - js = ''' - function e(el) { - if (!(el instanceof Element)) return; - var path = ''; - while (el.nodeType === Node.ELEMENT_NODE) { - ''' + txt1 + ''' - var sib = el, nth = 0; - while (sib) { - if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} - sib = sib.previousSibling; - } - ''' + txt4 + ''' - el = el.parentNode; - } - ''' + txt5 + ''' - } - return e(arguments[0]); - ''' - res_txt = self.run_script(js) - return f':root{res_txt}' if mode == 'css' else res_txt - - # -----------------driver独有属性和方法------------------- - @property - def size(self): - """返回元素宽和高""" - return self.inner_ele.size - - @property - def location(self): - """返回元素左上角坐标""" - return self.inner_ele.location - - @property - def shadow_root(self): - """返回当前元素的shadow_root元素对象""" - shadow = self.run_script('return arguments[0].shadowRoot') - if shadow: - from .shadow_root_element import ShadowRootElement - return ShadowRootElement(shadow, self) - - @property - def sr(self): - """返回当前元素的shadow_root元素对象""" - return self.shadow_root - - @property - def pseudo_before(self): - """返回当前元素的::before伪元素内容""" - return self.style('content', 'before') - - @property - def pseudo_after(self): - """返回当前元素的::after伪元素内容""" - return self.style('content', 'after') - - @property - def select(self): - """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" - if self._select is None: - if self.tag != 'select': - self._select = False - else: - self._select = Select(self) - - return self._select - - @property - def scroll(self): - """用于滚动滚动条的对象""" - if self._scroll is None: - self._scroll = Scroll(self) - return self._scroll - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - return super().parent(level_or_loc) - - def prev(self, index=1, filter_loc='', timeout=0): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().prev(index, filter_loc, timeout) - - def next(self, index=1, filter_loc='', timeout=0): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().next(index, filter_loc, timeout) - - def before(self, index=1, filter_loc='', timeout=None): - """返回当前元素前面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().before(index, filter_loc, timeout) - - def after(self, index=1, filter_loc='', timeout=None): - """返回当前元素后面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().after(index, filter_loc, timeout) - - def prevs(self, filter_loc='', timeout=0): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().prevs(filter_loc, timeout) - - def nexts(self, filter_loc='', timeout=0): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().nexts(filter_loc, timeout) - - def befores(self, filter_loc='', timeout=None): - """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return super().befores(filter_loc, timeout) - - def afters(self, filter_loc='', timeout=None): - """返回当前元素前面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return super().afters(filter_loc, timeout) - - def left(self, index=1, filter_loc=''): - """获取网页上显示在当前元素左边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def right(self, index=1, filter_loc=''): - """获取网页上显示在当前元素右边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('right', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def above(self, index=1, filter_loc=''): - """获取网页上显示在当前元素上边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def below(self, index=1, filter_loc=''): - """获取网页上显示在当前元素下边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def near(self, index=1, filter_loc=''): - """获取网页上显示在当前元素最近的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('near', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def lefts(self, filter_loc=''): - """获取网页上显示在当前元素左边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def rights(self, filter_loc=''): - """获取网页上显示在当前元素右边的所有元,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('right', filter_loc) - - def aboves(self, filter_loc=''): - """获取网页上显示在当前元素上边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def belows(self, filter_loc=''): - """获取网页上显示在当前元素下边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def nears(self, filter_loc=''): - """获取网页上显示在当前元素附近元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('near', filter_loc) - - def wait_ele(self, loc_or_ele, timeout=None): - """等待子元素从dom删除、显示、隐藏 - :param loc_or_ele: 可以是元素、查询字符串、loc元组 - :param timeout: 等待超时时间 - :return: 等待是否成功 - """ - return ElementWaiter(self, loc_or_ele, timeout) - - def style(self, style, pseudo_ele=''): - """返回元素样式属性值,可获取伪元素属性值 - :param style: 样式属性名称 - :param pseudo_ele: 伪元素名称(如有) - :return: 样式属性的值 - """ - if pseudo_ele: - pseudo_ele = f', "{pseudo_ele}"' if pseudo_ele.startswith(':') else f', "::{pseudo_ele}"' - r = self.run_script(f'return window.getComputedStyle(arguments[0]{pseudo_ele}).getPropertyValue("{style}");') - - return None if r == 'none' else r - - def click(self, by_js=None, timeout=None): - """点击元素 - 尝试点击直到超时,若都失败就改用js点击 - :param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js - :param timeout: 尝试点击的超时时间,不指定则使用父页面的超时时间 - :return: 是否点击成功 - """ - - def do_it() -> bool: - try: - self.inner_ele.click() - return True - except Exception: - return False - - if not by_js: - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - click = do_it() - while not click and perf_counter() - t1 <= timeout: - click = do_it() - - if click: - return True - - # 若点击失败,用js方式点击 - if by_js is not False: - self.run_script('arguments[0].click()') - return True - - return False - - def click_at(self, x=None, y=None, by_js=False): - """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :param by_js: 是否用js点击 - :return: None - """ - if by_js: - x = self.location['x'] + int(x) if x is not None else self.location['x'] + self.size['width'] // 2 - y = self.location['y'] + int(y) if y is not None else self.location['y'] + self.size['height'] // 2 - js = f""" - var ev = document.createEvent('HTMLEvents'); - ev.clientX = {x}; - ev.clientY = {y}; - ev.initEvent('click', false, true); - arguments[0].dispatchEvent(ev); - """ - self.run_script(js) - - else: - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform() - - def r_click(self): - """右键单击""" - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).context_click(self.inner_ele).perform() - - def r_click_at(self, x=None, y=None): - """带偏移量右键单击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :return: None - """ - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).context_click().perform() - - def input(self, vals, clear=True, insure=True, timeout=None): - """输入文本或组合键,也可用于输入文件路径到input元素(文件间用\n间隔) - :param vals: 文本值或按键组合 - :param clear: 输入前是否清空文本框 - :param insure: 确保输入正确,解决文本框有时输入失效的问题,不能用于输入组合键 - :param timeout: 尝试输入的超时时间,不指定则使用父页面的超时时间,只在insure为True时生效 - :return: bool - """ - if not insure or self.tag != 'input' or self.prop('type') != 'text': # 普通输入 - if not isinstance(vals, (str, tuple)): - vals = str(vals) - if clear: - self.inner_ele.clear() - - self.inner_ele.send_keys(*vals) - return True - - else: # 确保输入正确 - if not isinstance(vals, str): - vals = str(vals) - enter = '\n' if vals.endswith('\n') else None - full_txt = vals if clear else f'{self.attr("value")}{vals}' - full_txt = full_txt.rstrip('\n') - - self.click(by_js=True) - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - while self.is_valid() and self.attr('value') != full_txt and perf_counter() - t1 <= timeout: - try: - if clear: - self.inner_ele.send_keys(u'\ue009', 'a', u'\ue017') # 有些ui下clear()不生效,用CTRL+a代替 - self.inner_ele.send_keys(vals) - - except Exception: - pass - - if not self.is_valid(): - return False - else: - if self.attr('value') != full_txt: - return False - else: - if enter: - self.inner_ele.send_keys(enter) - return True - - def run_script(self, script, *args): - """执行js代码,代码中用arguments[0]表示自己 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.inner_ele.parent.execute_script(script, self.inner_ele, *args) - - def submit(self): - """提交表单""" - try: - self.inner_ele.submit() - return True - except Exception: - pass - - def clear(self, insure=True): - """清空元素文本 - :param insure: 是否确保清空 - :return: 是否清空成功,不能清空的元素返回None - """ - if insure: - return self.input('') - - else: - try: - self.inner_ele.clear() - return True - except InvalidElementStateException: - return None - - def is_selected(self): - """是否选中""" - return self.inner_ele.is_selected() - - def is_enabled(self): - """是否可用""" - return self.inner_ele.is_enabled() - - def is_displayed(self): - """是否可见""" - return self.inner_ele.is_displayed() - - def is_valid(self): - """用于判断元素是否还在DOM内,应对页面跳转元素不能用的情况""" - try: - self.is_enabled() - return True - except Exception: - return False - - def screenshot(self, path=None, filename=None, as_bytes=False): - """对元素进行截图 - :param path: 保存路径 - :param filename: 图片文件名,不传入时以元素tag name命名 - :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 - :return: 图片完整路径或字节文本 - """ - # 等待元素加载完成 - if self.tag == 'img': - js = ('return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" ' - '&& arguments[0].naturalWidth > 0 && typeof arguments[0].naturalHeight != "undefined" ' - '&& arguments[0].naturalHeight > 0') - t1 = perf_counter() - while not self.run_script(js) and perf_counter() - t1 < self.page.timeout: - sleep(.1) - - if as_bytes: - return self.inner_ele.screenshot_as_png - - name = filename or self.tag - path = Path(path or '.').absolute() - path.mkdir(parents=True, exist_ok=True) - if not name.lower().endswith('.png'): - name = f'{name}.png' - - img_path = str(get_usable_path(f'{path}{sep}{name}')) - self.inner_ele.screenshot(img_path) - - return img_path - - def prop(self, prop): - """获取property属性值 - :param prop: 属性名 - :return: 属性值文本 - """ - return format_html(self.inner_ele.get_property(prop)) - - def set_prop(self, prop, value): - """设置元素property属性 - :param prop: 属性名 - :param value: 属性值 - :return: 是否设置成功 - """ - try: - value = value.replace("'", "\\'") - self.run_script(f"arguments[0].{prop}='{value}';") - return True - except Exception: - return False - - def set_attr(self, attr, value): - """设置元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: 是否设置成功 - """ - try: - self.run_script(f"arguments[0].setAttribute(arguments[1], arguments[2]);", attr, value) - return True - except Exception: - return False - - def remove_attr(self, attr): - """删除元素attribute属性 - :param attr: 属性名 - :return: 是否删除成功 - """ - try: - self.run_script(f'arguments[0].removeAttribute("{attr}");') - return True - except Exception: - return False - - def drag(self, x, y, speed=40, shake=True): - """拖拽当前元素到相对位置 - :param x: x变化值 - :param y: y变化值 - :param speed: 拖动的速度,传入0即瞬间到达 - :param shake: 是否随机抖动 - :return: None - """ - x += self.location['x'] + self.size['width'] // 2 - y += self.location['y'] + self.size['height'] // 2 - self.drag_to((x, y), speed, shake) - - def drag_to(self, ele_or_loc, speed=40, shake=True): - """拖拽当前元素,目标为另一个元素或坐标元组 - :param ele_or_loc: 另一个元素或坐标元组,坐标为元素中点的坐标 - :param speed: 拖动的速度,传入0即瞬间到达 - :param shake: 是否随机抖动 - :return: None - """ - # x, y:目标点坐标 - if isinstance(ele_or_loc, (DriverElement, WebElement)): - target_x = ele_or_loc.location['x'] + ele_or_loc.size['width'] // 2 - target_y = ele_or_loc.location['y'] + ele_or_loc.size['height'] // 2 - elif isinstance(ele_or_loc, tuple): - target_x, target_y = ele_or_loc - else: - raise TypeError('需要DriverElement、WebElement对象或坐标。') - - current_x = self.location['x'] + self.size['width'] // 2 - current_y = self.location['y'] + self.size['height'] // 2 - width = target_x - current_x - height = target_y - current_y - num = 0 if not speed else int(((abs(width) ** 2 + abs(height) ** 2) ** .5) // speed) - - # 将要经过的点存入列表 - points = [(int(current_x + i * (width / num)), int(current_y + i * (height / num))) for i in range(1, num)] - points.append((target_x, target_y)) - - from selenium.webdriver import ActionChains - from random import randint - actions = ActionChains(self.page.driver) - actions.click_and_hold(self.inner_ele) - - # 逐个访问要经过的点 - for x, y in points: - if shake: - x += randint(-3, 4) - y += randint(-3, 4) - actions.move_by_offset(x - current_x, y - current_y) - current_x, current_y = x, y - actions.release().perform() - - def hover(self, x=None, y=None): - """鼠标悬停,可接受偏移量,偏移量相对于元素左上角坐标。不传入x或y值时悬停在元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :return: None - """ - from selenium.webdriver import ActionChains - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).perform() - - def _get_relative_eles(self, mode, loc=''): - """获取网页上相对于当前元素周围的某个元素,可设置选取条件 - :param mode: 可选:'left', 'right', 'above', 'below', 'near' - :param loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - from selenium.webdriver.support.relative_locator import RelativeBy - - if isinstance(loc, str): - loc = str_to_loc(loc) - - try: - if mode == 'left': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_left_of(self.inner_ele)) - elif mode == 'right': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_right_of(self.inner_ele)) - elif mode == 'above': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).above(self.inner_ele)) - elif mode == 'below': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).below(self.inner_ele)) - else: # 'near' - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).near(self.inner_ele)) - - return [self.page.ele(e) for e in eles] - - except IndexError: - raise ValueError('未找到元素,请检查浏览器版本,低版本的浏览器无法使用此方法。') - - -def make_driver_ele(page_or_ele, loc, single=True, timeout=None): - """执行driver模式元素的查找 - 页面查找元素及元素查找下级元素皆使用此方法 - :param page_or_ele: DriverPage对象或DriverElement对象 - :param loc: 元素定位元组 - :param single: True则返回第一个,False则返回全部 - :param timeout: 查找元素超时时间 - :return: 返回DriverElement元素或它们组成的列表 - """ - # ---------------处理定位符--------------- - if isinstance(loc, (str, tuple)): - loc = get_loc(loc) - - elif str(type(loc)).endswith('RelativeBy'): - page = page_or_ele.page if isinstance(page_or_ele, BaseElement) else page_or_ele - driver = page.driver - eles = driver.find_elements(loc) - return DriverElement(eles[0], page) if single else [DriverElement(ele, page) for ele in eles] - - else: - raise ValueError("定位符必须为str、长度为2的tuple、或RelativeBy对象。") - - # ---------------设置 page 和 driver--------------- - if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc_str}' - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>') and isinstance(page_or_ele, DriverElement): - loc_str = f'{page_or_ele.css_path}{loc[1]}' - loc = loc[0], loc_str - - page = page_or_ele.page - driver = page_or_ele.inner_ele - - else: # 传入的是DriverPage对象 - page = page_or_ele - driver = page_or_ele.driver - - # -----------------设置等待对象----------------- - if timeout is not None and timeout != page.timeout: - wait = WebDriverWait(driver, timeout=timeout) - else: - page.wait_object._driver = driver - wait = page.wait_object - - # ---------------执行查找----------------- - try: - # 使用xpath查找 - if loc[0] == 'xpath': - return wait.until(ElementsByXpath(page, loc[1], single, timeout)) - - # 使用css selector查找 - else: - if single: - return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) - else: - eles = wait.until(ec.presence_of_all_elements_located(loc)) - return [DriverElement(ele, page) for ele in eles] - - except TimeoutException: - return [] if not single else None - - except InvalidElementStateException: - raise ValueError(f'无效的查找语句:{loc}') - - -class ElementsByXpath(object): - """用js通过xpath获取元素、节点或属性,与WebDriverWait配合使用""" - - def __init__(self, page, xpath=None, single=False, timeout=10): - """ - :param page: DrissionPage对象 - :param xpath: xpath文本 - :param single: True则返回第一个,False则返回全部 - :param timeout: 超时时间 - """ - self.page = page - self.xpath = xpath - self.single = single - self.timeout = timeout - - def __call__(self, ele_or_driver): - - def get_nodes(node=None, xpath_txt=None, type_txt='7'): - """用js通过xpath获取元素、节点或属性 - :param node: 'document' 或 元素对象 - :param xpath_txt: xpath语句 - :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate - :return: 元素对象或属性、文本字符串 - """ - node_txt = 'document' if not node or node == 'document' else 'arguments[0]' - for_txt = '' - - # 获取第一个元素、节点或属性 - if type_txt == '9': - return_txt = ''' - if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} - else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} - else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} - else{return e.singleNodeValue;} - ''' - - # 按顺序获取所有元素、节点或属性 - elif type_txt == '7': - for_txt = """ - var a=new Array(); - for(var i = 0; i 元素使用,现在是:{ele.tag}。") - - from selenium.webdriver.support.select import Select as SeleniumSelect - self.inner_ele = ele - self.select_ele = SeleniumSelect(ele.inner_ele) - - def __call__(self, text_or_index, timeout=None): - """选定下拉列表中子元素 - :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self.select(text_or_index, timeout=timeout) - - @property - def is_multi(self): - """返回是否多选表单""" - return self.select_ele.is_multiple - - @property - def options(self): - """返回所有选项元素组成的列表""" - return self.inner_ele.eles('tag:option') - - @property - def selected_option(self): - """返回第一个被选中的option元素 - :return: DriverElement对象或None - """ - ele = self.inner_ele.run_script('return arguments[0].options[arguments[0].selectedIndex];') - return None if ele is None else DriverElement(ele, self.inner_ele.page) - - @property - def selected_options(self): - """返回所有被选中的option元素列表 - :return: DriverElement对象组成的列表 - """ - return [x for x in self.options if x.is_selected()] - - def clear(self): - """清除所有已选项""" - self.select_ele.deselect_all() - - def select(self, text_or_index, timeout=None): - """选定下拉列表中子元素 - :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: 是否选择成功 - """ - i = 'index' if isinstance(text_or_index, int) else 'text' - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(text_or_index, i, False, timeout) - - def select_by_value(self, value, timeout=None): - """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可选择多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(value, 'value', False, timeout) - - def deselect(self, text_or_index, timeout=None): - """取消选定下拉列表中子元素 - :param text_or_index: 根据文本或序号取消择选项,若允许多选,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - i = 'index' if isinstance(text_or_index, int) else 'text' - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(text_or_index, i, True, timeout) - - def deselect_by_value(self, value, timeout=None): - """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(value, 'value', True, timeout) - - def invert(self): - """反选""" - if not self.is_multi: - raise NotImplementedError("只能对多项选框执行反选。") - - for i in self.options: - i.click(by_js=True) - - def _select(self, text_value_index, para_type='text', deselect=False, timeout=None): - """选定或取消选定下拉列表中子元素 - :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选 'text'、'value'、'index' - :param deselect: 是否取消选择 - :return: 是否选择成功 - """ - if not self.is_multi and isinstance(text_value_index, (list, tuple)): - raise TypeError('单选下拉列表不能传入list和tuple') - - def do_select(): - try: - if para_type == 'text': - if deselect: - self.select_ele.deselect_by_visible_text(text_value_index) - else: - self.select_ele.select_by_visible_text(text_value_index) - elif para_type == 'value': - if deselect: - self.select_ele.deselect_by_value(text_value_index) - else: - self.select_ele.select_by_value(text_value_index) - elif para_type == 'index': - if deselect: - self.select_ele.deselect_by_index(int(text_value_index)) - else: - self.select_ele.select_by_index(int(text_value_index)) - else: - raise ValueError('para_type参数只能传入"text"、"value"或"index"。') - - return True - - except NoSuchElementException: - return False - - if isinstance(text_value_index, (str, int)): - t1 = perf_counter() - ok = do_select() - while not ok and perf_counter() - t1 < timeout: - sleep(.2) - ok = do_select() - return ok - - elif isinstance(text_value_index, (list, tuple)): - return self._select_multi(text_value_index, para_type, deselect) - - else: - raise TypeError('只能传入str、int、list和tuple类型。') - - def _select_multi(self, text_value_index=None, para_type='text', deselect=False) -> bool: - """选定或取消选定下拉列表中多个子元素 - :param text_value_index: 根据文本、值选或序号择选多项 - :param para_type: 参数类型,可选 'text'、'value'、'index' - :param deselect: 是否取消选择 - :return: 是否选择成功 - """ - if para_type not in ('text', 'value', 'index'): - raise ValueError('para_type参数只能传入“text”、“value”或“index”') - - if not isinstance(text_value_index, (list, tuple)): - raise TypeError('只能传入list或tuple类型。') - - success = True - for i in text_value_index: - if not isinstance(i, (int, str)): - raise TypeError('列表只能由str或int组成') - - p = 'index' if isinstance(i, int) else para_type - if not self._select(i, p, deselect): - success = False - - return success - - -class ElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" - - def __init__(self, page_or_ele, loc_or_ele, timeout=None): - """等待元素在dom中某种状态,如删除、显示、隐藏 - :param page_or_ele: 页面或父元素 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - """ - if isinstance(page_or_ele, DriverElement): - page = page_or_ele.page - self.driver = page_or_ele.inner_ele - else: - page = page_or_ele - self.driver = page_or_ele.driver - - if isinstance(loc_or_ele, DriverElement): - self.target = loc_or_ele.inner_ele - - elif isinstance(loc_or_ele, WebElement): - self.target = loc_or_ele - - elif isinstance(loc_or_ele, str): - self.target = str_to_loc(loc_or_ele) - - elif isinstance(loc_or_ele, tuple): - self.target = loc_or_ele - - else: - raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型。') - - self.timeout = timeout if timeout is not None else page.timeout - - def delete(self): - """等待元素从dom删除""" - return self._wait_ele('del') - - def display(self): - """等待元素从dom显示""" - return self._wait_ele('display') - - def hidden(self): - """等待元素从dom隐藏""" - return self._wait_ele('hidden') - - def _wait_ele(self, mode): - """执行等待 - :param mode: 等待模式 - :return: 是否等待成功 - """ - if isinstance(self.target, WebElement): - end_time = time() + self.timeout - while time() < end_time: - if mode == 'del': - try: - self.target.is_enabled() - except Exception: - return True - - elif mode == 'display' and self.target.is_displayed(): - return True - - elif mode == 'hidden' and not self.target.is_displayed(): - return True - - return False - - else: - try: - if mode == 'del': - WebDriverWait(self.driver, self.timeout).until_not(ec.presence_of_element_located(self.target)) - - elif mode == 'display': - WebDriverWait(self.driver, self.timeout).until(ec.visibility_of_element_located(self.target)) - - elif mode == 'hidden': - WebDriverWait(self.driver, self.timeout).until_not(ec.visibility_of_element_located(self.target)) - - return True - - except Exception: - return False - - -class Scroll(object): - """用于滚动的对象""" - - def __init__(self, page_or_ele): - """ - :param page_or_ele: DriverPage或DriverElement - """ - self.driver = page_or_ele - if isinstance(page_or_ele, DriverElement): - self.t1 = self.t2 = 'arguments[0]' - else: - self.t1 = 'window' - self.t2 = 'document.documentElement' - - def to_top(self): - """滚动到顶端,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,0);') - - def to_bottom(self): - """滚动到底端,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight);') - - def to_half(self): - """滚动到垂直中间位置,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight/2);') - - def to_rightmost(self): - """滚动到最右边,垂直位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollWidth,{self.t2}.scrollTop);') - - def to_leftmost(self): - """滚动到最左边,垂直位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo(0,{self.t2}.scrollTop);') - - def to_location(self, x, y): - """滚动到指定位置 - :param x: 水平距离 - :param y: 垂直距离 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollTo({x},{y});') - - def up(self, pixel=300): - """向上滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') - - def down(self, pixel=300): - """向下滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') - - def left(self, pixel=300): - """向左滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') - - def right(self, pixel=300): - """向右滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') - - -def _exchange_arguments(index, filter_loc): - # 此方法用于兼容MixPage参数顺序相反的情况 - if isinstance(index, str) and isinstance(filter_loc, int): - index, filter_loc = filter_loc, index - elif isinstance(index, int) and filter_loc == 1: - filter_loc = '' - elif isinstance(filter_loc, str) and index == '': - index = 1 - return index, filter_loc diff --git a/DrissionPage/mixpage/driver_element.pyi b/DrissionPage/mixpage/driver_element.pyi deleted file mode 100644 index 98fa815..0000000 --- a/DrissionPage/mixpage/driver_element.pyi +++ /dev/null @@ -1,326 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Any, Tuple - -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.select import Select as SeleniumSelect - -from .driver_page import DriverPage -from .mix_page import MixPage -from .shadow_root_element import ShadowRootElement -from .base import DrissionElement -from .session_element import SessionElement - - -class DriverElement(DrissionElement): - - def __init__(self, ele: WebElement, page: Union[DriverPage, MixPage] = None): - self._inner_ele: WebElement = ... - self._select: Select = ... - self._scroll: Scroll = ... - self.page: Union[DriverPage, MixPage] = ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['DriverElement', str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def inner_ele(self) -> WebElement: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - def attr(self, attr: str) -> str: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True, - relative: bool = False) -> Union['DriverElement', str, None, List[Union['DriverElement', str]]]: ... - - def _get_ele_path(self, mode) -> str: ... - - # -----------------driver独有属性和方法------------------- - @property - def size(self) -> dict: ... - - @property - def location(self) -> dict: ... - - @property - def shadow_root(self) -> ShadowRootElement: ... - - @property - def sr(self) -> ShadowRootElement: ... - - @property - def pseudo_before(self) -> str: ... - - @property - def pseudo_after(self) -> str: ... - - @property - def select(self) -> Select: ... - - @property - def scroll(self) -> Scroll: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['DriverElement', None]: ... - - def prev(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union['DriverElement', str, None]: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union['DriverElement', str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union['DriverElement', str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union['DriverElement', str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> DriverElement: ... - - def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def lefts(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def rights(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def aboves(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def belows(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def nears(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def wait_ele(self, - loc_or_ele: Union[str, tuple, DrissionElement, WebElement], - timeout: float = None) -> 'ElementWaiter': ... - - def style(self, style: str, pseudo_ele: str = '') -> str: ... - - def click(self, by_js: bool = None, timeout: float = None) -> bool: ... - - def click_at(self, - x: Union[int, str] = None, - y: Union[int, str] = None, - by_js: bool = False) -> None: ... - - def r_click(self) -> None: ... - - def r_click_at(self, x: Union[int, str] = None, y: Union[int, str] = None) -> None: ... - - def input(self, - vals: Union[str, tuple], - clear: bool = True, - insure: bool = True, - timeout: float = None) -> bool: ... - - def run_script(self, script: str, *args) -> Any: ... - - def submit(self) -> Union[bool, None]: ... - - def clear(self, insure: bool = True) -> Union[None, bool]: ... - - def is_selected(self) -> bool: ... - - def is_enabled(self) -> bool: ... - - def is_displayed(self) -> bool: ... - - def is_valid(self) -> bool: ... - - def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... - - def prop(self, prop: str) -> str: ... - - def set_prop(self, prop: str, value: str) -> bool: ... - - def set_attr(self, attr: str, value: str) -> bool: ... - - def remove_attr(self, attr: str) -> bool: ... - - def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> None: ... - - def drag_to(self, - ele_or_loc: Union[tuple, WebElement, DrissionElement], - speed: int = 40, - shake: bool = True) -> None: ... - - def hover(self, x: int = None, y: int = None) -> None: ... - - def _get_relative_eles(self, - mode: str, - loc: Union[tuple, str] = '') -> Union[List['DriverElement'], 'DriverElement']: ... - - -def make_driver_ele(page_or_ele: Union[DriverPage, MixPage, DriverElement, ShadowRootElement], - loc: Union[str, Tuple[str, str]], - single: bool = True, - timeout: float = None) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - -class ElementsByXpath(object): - - def __init__(self, page, xpath: str = None, single: bool = False, timeout: float = 10): - self.single: bool = ... - self.xpath: str = ... - self.page: Union[MixPage, DriverPage] = ... - - def __call__(self, ele_or_driver: Union[RemoteWebDriver, WebElement]) \ - -> Union[str, DriverElement, None, List[str or DriverElement]]: ... - - -class Select(object): - - def __init__(self, ele: DriverElement): - self.select_ele: SeleniumSelect = ... - self.inner_ele: DriverElement = ... - - def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - @property - def is_multi(self) -> bool: ... - - @property - def options(self) -> List[DriverElement]: ... - - @property - def selected_option(self) -> Union[DriverElement, None]: ... - - @property - def selected_options(self) -> List[DriverElement]: ... - - def clear(self) -> None: ... - - def select(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - def select_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def deselect(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - def deselect_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def invert(self) -> None: ... - - def _select(self, - text_value_index: Union[str, int, list, tuple] = ..., - para_type: str = 'text', - deselect: bool = False, - timeout: float = None) -> bool: ... - - def _select_multi(self, - text_value_index: Union[list, tuple] = None, - para_type: str = 'text', - deselect: bool = False) -> bool: ... - - -class ElementWaiter(object): - - def __init__(self, - page_or_ele, - loc_or_ele: Union[str, tuple, DriverElement, WebElement], - timeout: float = None): - self.target: Union[DriverElement, WebElement, tuple] = ... - self.timeout: float = ... - self.driver: Union[WebElement, RemoteWebDriver] = ... - - def delete(self) -> bool: ... - - def display(self) -> bool: ... - - def hidden(self) -> bool: ... - - def _wait_ele(self, mode: str) -> bool: ... - - -class Scroll(object): - - def __init__(self, page_or_ele): - self.driver: Union[DriverElement, DriverPage] = ... - self.t1: str = ... - self.t2: str = ... - - def to_top(self) -> None: ... - - def to_bottom(self) -> None: ... - - def to_half(self) -> None: ... - - def to_rightmost(self) -> None: ... - - def to_leftmost(self) -> None: ... - - def to_location(self, x: int, y: int) -> None: ... - - def up(self, pixel: int = 300) -> None: ... - - def down(self, pixel: int = 300) -> None: ... - - def left(self, pixel: int = 300) -> None: ... - - def right(self, pixel: int = 300) -> None: ... diff --git a/DrissionPage/mixpage/driver_page.py b/DrissionPage/mixpage/driver_page.py deleted file mode 100644 index 0779b4b..0000000 --- a/DrissionPage/mixpage/driver_page.py +++ /dev/null @@ -1,611 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from glob import glob -from os import sep -from pathlib import Path -from time import sleep, perf_counter - -from selenium.common.exceptions import NoAlertPresentException -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.wait import WebDriverWait - -from .base import BasePage -from .driver_element import DriverElement, make_driver_ele, Scroll, ElementWaiter -from .session_element import make_session_ele -from ..commons.tools import get_usable_path - - -class DriverPage(BasePage): - """DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页""" - - def __init__(self, driver, timeout=10): - """初始化函数,接收一个WebDriver对象,用来操作网页""" - super().__init__(timeout) - self._driver = driver - self._wait_object = None - self._scroll = None - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele = page('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def url(self): - """返回当前网页url""" - if not self._driver or not self.driver.current_url.startswith('http'): - return None - else: - return self.driver.current_url - - @property - def html(self): - """返回页面的html文本""" - return self.driver.find_element('xpath', "//*").get_attribute("outerHTML") - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典""" - from json import loads - return loads(self('t:pre').text) - - def get(self, url, show_errmsg=False, retry=None, interval=None): - """访问url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :return: 目标url是否可用,返回None表示不确定 - """ - retry, interval = self._before_connect(url, retry, interval) - self._url_available = self._d_connect(self._url, times=retry, interval=interval, show_errmsg=show_errmsg) - return self._url_available - - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: DriverElement对象或属性、文本 - """ - return self._ele(loc_or_ele, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: DriverElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout, single=False) - - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - if isinstance(loc_or_ele, DriverElement): - return make_session_ele(loc_or_ele) - else: - return make_session_ele(self, loc_or_ele) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象组成的列表 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :return: DriverElement对象 - """ - # 接收到字符串或元组,获取定位loc元组 - if isinstance(loc_or_ele, (str, tuple)): - return make_driver_ele(self, loc_or_ele, single, timeout) - - # 接收到DriverElement对象直接返回 - elif isinstance(loc_or_ele, DriverElement): - return loc_or_ele - - # 接收到WebElement对象打包成DriverElement对象返回 - elif isinstance(loc_or_ele, WebElement): - return DriverElement(loc_or_ele, self) - - # 接收到的类型不正确,抛出异常 - else: - raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 WebElement类型。') - - def get_cookies(self, as_dict=False): - """返回当前网站cookies""" - if as_dict: - return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()} - else: - return self.driver.get_cookies() - - @property - def timeout(self): - """返回查找元素时等待的秒数""" - return self._timeout - - @timeout.setter - def timeout(self, second): - """设置查找元素时等待的秒数""" - self._timeout = second - self._wait_object = None - - def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False): - """尝试连接,重试若干次 - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param show_errmsg: 是否抛出异常 - :return: 是否成功,返回None表示不确定 - """ - err = None - is_ok = False - - for _ in range(times + 1): - try: - self.driver.get(to_url) - go_ok = True - except Exception as e: - err = e - go_ok = False - - is_ok = self.check_page() if go_ok else False - - if is_ok is not False: - break - - if _ < times: - sleep(interval) - if show_errmsg: - print(f'重试 {to_url}') - - if is_ok is False and show_errmsg: - raise err if err is not None else ConnectionError('连接异常。') - - return is_ok - - # ----------------driver独有属性和方法----------------------- - @property - def driver(self): - return self._driver - - @property - def wait_object(self): - """返回WebDriverWait对象,重用避免每次新建对象""" - if self._wait_object is None: - self._wait_object = WebDriverWait(self.driver, timeout=self.timeout) - - return self._wait_object - - @property - def timeouts(self): - """返回三种超时时间,selenium4以上版本可用""" - return {'implicit': self.timeout, - 'pageLoad': self.driver.timeouts.page_load, - 'script': self.driver.timeouts.script} - - @property - def tabs_count(self): - """返回标签页数量""" - try: - return len(self.driver.window_handles) - except Exception: - return 0 - - @property - def tab_handles(self): - """返回所有标签页handle列表""" - return self.driver.window_handles - - @property - def current_tab_index(self): - """返回当前标签页序号""" - return self.driver.window_handles.index(self.driver.current_window_handle) - - @property - def current_tab_handle(self): - """返回当前标签页handle""" - return self.driver.current_window_handle - - @property - def active_ele(self): - """返回当前焦点所在元素""" - return DriverElement(self.driver.switch_to.active_element, self) - - @property - def scroll(self): - """用于滚动滚动条的对象""" - if self._scroll is None: - self._scroll = Scroll(self) - return self._scroll - - @property - def to_frame(self): - """用于跳转到frame的对象,调用其方法实现跳转 - 示例: - page.to_frame.by_loc('tag:iframe') - 通过传入frame的查询字符串定位 - page.to_frame.by_loc((By.TAG_NAME, 'iframe')) - 通过传入定位符定位 - page.to_frame.by_id('iframe_id') - 通过frame的id属性定位 - page.to_frame('iframe_name') - 通过frame的name属性定位 - page.to_frame(iframe_element) - 通过传入元素对象定位 - page.to_frame(0) - 通过frame的序号定位 - page.to_frame.main() - 跳到最顶层 - page.to_frame.parent() - 跳到上一层 - """ - return ToFrame(self) - - def set_timeouts(self, implicit=None, pageLoad=None, script=None): - """设置超时时间,单位为秒,selenium4以上版本有效 - :param implicit: 查找元素超时时间 - :param pageLoad: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: None - """ - if implicit is not None: - self.timeout = implicit - - if pageLoad is not None: - self.driver.set_page_load_timeout(pageLoad) - - if script is not None: - self.driver.set_script_timeout(script) - - def wait_ele(self, loc_or_ele, timeout=None): - """等待元素从dom删除、显示、隐藏 - :param loc_or_ele: 可以是元素、查询字符串、loc元组 - :param timeout: 等待超时时间 - :return: 用于等待的ElementWaiter对象 - """ - return ElementWaiter(self, loc_or_ele, timeout) - - def check_page(self): - """检查页面是否符合预期 - 由子类自行实现各页面的判定规则 - """ - return None - - def run_script(self, script, *args): - """执行js代码 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.driver.execute_script(script, *args) - - def run_async_script(self, script, *args): - """以异步方式执行js代码 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.driver.execute_async_script(script, *args) - - def run_cdp(self, cmd, **cmd_args): - """执行Chrome DevTools Protocol语句 - :param cmd: 协议项目 - :param cmd_args: 参数 - :return: 执行的结果 - """ - return self.driver.execute_cdp_cmd(cmd, cmd_args) - - def create_tab(self, url=''): - """新建并定位到一个标签页,该标签页在最后面 - :param url: 新标签页跳转到的网址 - :return: None - """ - self.driver.switch_to.new_window('tab') - if url: - self.get(url) - - def close_tabs(self, num_or_handles=None): - """关闭传入的标签页,默认关闭当前页。可传入多个 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 - :param num_or_handles:要关闭的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时关闭当前页 - :return: None - """ - tabs = (self.current_tab_handle,) if num_or_handles is None else get_handles(self.tab_handles, num_or_handles) - for i in tabs: - self.driver.switch_to.window(i) - self.driver.close() - - self.to_tab(0) - - def close_other_tabs(self, num_or_handles=None): - """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 - :param num_or_handles: 要保留的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时保存当前页 - :return: None - """ - all_tabs = self.driver.window_handles - reserve_tabs = {self.current_tab_handle} if num_or_handles is None else get_handles(all_tabs, num_or_handles) - - for i in set(all_tabs) - reserve_tabs: - self.driver.switch_to.window(i) - self.driver.close() - - self.to_tab(0) - - def to_tab(self, num_or_handle=0): - """跳转到标签页 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致 - :param num_or_handle: 标签页序号或handle字符串,序号第一个为0,最后为-1 - :return: None - """ - try: - tab = int(num_or_handle) - except (ValueError, TypeError): - tab = num_or_handle - - tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab - self.driver.switch_to.window(tab) - - def set_ua_to_tab(self, ua): - """为当前tab设置user agent,只在当前tab有效 - :param ua: user agent字符串 - :return: None - """ - self.driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": ua}) - - def get_session_storage(self, item=None): - """获取sessionStorage信息,不设置item则获取全部 - :param item: 要获取的项,不设置则返回全部 - :return: sessionStorage一个或所有项内容 - """ - js = f'return sessionStorage.getItem("{item}");' if item else 'return sessionStorage;' - return self.run_script(js) - - def get_local_storage(self, item=None): - """获取localStorage信息,不设置item则获取全部 - :param item: 要获取的项目,不设置则返回全部 - :return: localStorage一个或所有项内容 - """ - js = f'return localStorage.getItem("{item}");' if item else 'return localStorage;' - return self.run_script(js) - - def set_session_storage(self, item, value): - """设置或删除某项sessionStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - s = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' - self.run_script(s) - - def set_local_storage(self, item, value): - """设置或删除某项localStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - s = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' - self.run_script(s) - - def clean_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): - """清除缓存,可选要清除的项 - :param session_storage: 是否清除sessionStorage - :param local_storage: 是否清除localStorage - :param cache: 是否清除cache - :param cookies: 是否清除cookies - :return: None - """ - if session_storage: - self.run_script('sessionStorage.clear();') - if local_storage: - self.run_script('localStorage.clear();') - if cache: - self.run_cdp('Network.clearBrowserCache') - if cookies: - self.run_cdp('Network.clearBrowserCookies') - - def screenshot(self, path=None, filename=None, as_bytes=False): - """截取页面可见范围截图 - :param path: 保存路径 - :param filename: 图片文件名,不传入时以页面title命名 - :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 - :return: 图片完整路径或字节文本 - """ - if as_bytes: - return self.driver.get_screenshot_as_png() - - name = filename or self.title - if not name.lower().endswith('.png'): - name = f'{name}.png' - path = Path(path or '.').absolute() - path.mkdir(parents=True, exist_ok=True) - img_path = str(get_usable_path(f'{path}{sep}{name}')) - self.driver.save_screenshot(img_path) - return img_path - - def scroll_to_see(self, loc_or_ele): - """滚动页面直到元素可见 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串(详见ele函数注释) - :return: None - """ - ele = self.ele(loc_or_ele) - ele.run_script("arguments[0].scrollIntoView();") - - def refresh(self): - """刷新当前页面""" - self.driver.refresh() - - def stop_loading(self): - """强制停止页面加载""" - self.run_cdp('Page.stopLoading') - - def back(self): - """在浏览历史中后退一步""" - self.driver.back() - - def forward(self): - """在浏览历史中前进一步""" - self.driver.forward() - - def set_window_size(self, width=None, height=None): - """设置浏览器窗口大小,默认最大化,任一参数为0最小化 - :param width: 浏览器窗口高 - :param height: 浏览器窗口宽 - :return: None - """ - if width is None and height is None: - self.driver.maximize_window() - - elif width == 0 or height == 0: - self.driver.minimize_window() - - else: - if width < 0 or height < 0: - raise ValueError('x 和 y参数必须大于0。') - - new_x = width or self.driver.get_window_size()['width'] - new_y = height or self.driver.get_window_size()['height'] - self.driver.set_window_size(new_x, new_y) - - def chrome_downloading(self, download_path): - """返回浏览器下载中的文件列表 - :param download_path: 下载文件夹路径 - :return: 文件列表 - """ - return glob(f'{download_path}{sep}*.crdownload') - - def process_alert(self, ok=True, send=None, timeout=None): - """处理提示框 - :param ok: True表示确认,False表示取消,其它值不会按按钮但依然返回文本值 - :param send: 处理prompt提示框时可输入文本 - :param timeout: 等待提示框出现的超时时间 - :return: 提示框内容文本,未等到提示框则返回None - """ - - def do_it(): - try: - return self.driver.switch_to.alert - except NoAlertPresentException: - return False - - timeout = timeout if timeout is not None else self.timeout - t1 = perf_counter() - alert = do_it() - while alert is False and perf_counter() - t1 <= timeout: - alert = do_it() - - if alert is False: - return None - - res_text = alert.text - - if send is not None: - alert.send_keys(send) - - if ok is True: - alert.accept() - elif ok is False: - alert.dismiss() - - return res_text - - -class ToFrame(object): - """用于处理焦点跳转到页面框架的类""" - - def __init__(self, page): - self.page = page - - def __call__(self, condition='main'): - """跳转到(i)frame,可传入id、name、序号、元素对象、定位符 - :param condition: (i)frame,可传入id、name、序号、元素对象、定位符 - :return: 当前页面对象 - """ - if isinstance(condition, (DriverElement, WebElement)): - self.by_ele(condition) - elif isinstance(condition, int): - self.by_index(condition) - elif ':' not in condition and '=' not in condition and not condition.startswith(('#', '.', '@')): - self.by_id(condition) - else: - self.by_loc(condition) - - return self.page - - def main(self): - """焦点跳转到最高层级框架""" - self.page.driver.switch_to.default_content() - return self.page - - def parent(self, level=1): - """焦点跳转到上级框架,可指定上级层数 - :param level: 上面第几层框架 - :return: 框架所在页面对象 - """ - if level < 1: - raise ValueError('level参数须是大于0的整数。') - for _ in range(level): - self.page.driver.switch_to.parent_frame() - return self.page - - def by_id(self, id_): - """焦点跳转到id为该值的(i)frame - :param id_: (i)frame的id属性值 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(id_) - return self.page - - def by_name(self, name): - """焦点跳转到name为该值的(i)frame - :param name: (i)frame的name属性值 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(name) - return self.page - - def by_index(self, index): - """焦点跳转到页面中第几个(i)frame - :param index: 页面中第几个(i)frame - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(index) - return self.page - - def by_loc(self, loc): - """焦点跳转到根据定位符获取到的(i)frame - :param loc: 定位符,支持selenium原生和DriverPage定位符 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(self.page(loc).inner_ele) - return self.page - - def by_ele(self, ele): - """焦点跳转到传入的(i)frame元素对象 - :param ele: (i)frame元素对象 - :return: 框架所在页面对象 - """ - if isinstance(ele, DriverElement): - ele = ele.inner_ele - self.page.driver.switch_to.frame(ele) - return self.page - - -def get_handles(handles, num_or_handles): - """返回指定标签页组成的set - :param handles: handles列表 - :param num_or_handles: 指定的标签页,可以是多个 - :return: 指定标签页组成的set - """ - if isinstance(num_or_handles, (int, str)): - num_or_handles = (num_or_handles,) - elif not isinstance(num_or_handles, (list, tuple)): - raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。') - - return set(i if isinstance(i, str) else handles[i] for i in num_or_handles) diff --git a/DrissionPage/mixpage/driver_page.pyi b/DrissionPage/mixpage/driver_page.pyi deleted file mode 100644 index 542a402..0000000 --- a/DrissionPage/mixpage/driver_page.pyi +++ /dev/null @@ -1,189 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Any, Tuple - -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.wait import WebDriverWait - -from .base import BasePage -from .driver_element import DriverElement, Scroll, ElementWaiter -from .mix_page import MixPage -from .session_element import SessionElement - - -class DriverPage(BasePage): - - def __init__(self, driver: RemoteWebDriver, timeout: float = 10) -> None: - self._driver: RemoteWebDriver = ... - self._url: str = ... - self._wait_object: WebDriverWait = ... - self._scroll: Scroll = ... - - def __call__(self, loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def url(self) -> Union[str, None]: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> dict: ... - - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None) -> Union[None, bool]: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, str]]: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None, - single: bool = True) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... - - @property - def timeout(self) -> float: ... - - @timeout.setter - def timeout(self, second: float) -> None: ... - - def _d_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - show_errmsg: bool = False) -> Union[bool, None]: ... - - # ----------------driver独有属性和方法----------------------- - @property - def driver(self) -> WebDriver: ... - - @property - def wait_object(self) -> WebDriverWait: ... - - @property - def timeouts(self) -> dict: ... - - @property - def tabs_count(self) -> int: ... - - @property - def tab_handles(self) -> list: ... - - @property - def current_tab_index(self) -> int: ... - - @property - def current_tab_handle(self) -> str: ... - - @property - def active_ele(self) -> DriverElement: ... - - @property - def scroll(self) -> Scroll: ... - - @property - def to_frame(self) -> ToFrame: ... - - def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> None: ... - - def wait_ele(self, - loc_or_ele: Union[str, tuple, DriverElement, WebElement], - timeout: float = None) -> ElementWaiter: ... - - def check_page(self) -> Union[bool, None]: ... - - def run_script(self, script: str, *args) -> Any: ... - - def run_async_script(self, script: str, *args) -> Any: ... - - def run_cdp(self, cmd: str, **cmd_args) -> Any: ... - - def create_tab(self, url: str = '') -> None: ... - - def close_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... - - def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... - - def to_tab(self, num_or_handle: Union[int, str] = 0) -> None: ... - - def set_ua_to_tab(self, ua: str) -> None: ... - - def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def set_session_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def set_local_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def clean_cache(self, - session_storage: bool = True, - local_storage: bool = True, - cache: bool = True, - cookies: bool = True) -> None: ... - - def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... - - def scroll_to_see(self, loc_or_ele: Union[str, tuple, WebElement, DriverElement]) -> None: ... - - def refresh(self) -> None: ... - - def stop_loading(self) -> None: ... - - def back(self) -> None: ... - - def forward(self) -> None: ... - - def set_window_size(self, width: int = None, height: int = None) -> None: ... - - def chrome_downloading(self, download_path: str) -> list: ... - - def process_alert(self, ok: bool = True, send: str = None, timeout: float = None) -> Union[str, None]: ... - - -class ToFrame(object): - - def __init__(self, page: DriverPage): - self.page: DriverPage = ... - - def __call__(self, condition: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> Union[ - DriverPage, MixPage]: ... - - def main(self) -> DriverPage: ... - - def parent(self, level: int = 1) -> DriverPage: ... - - def by_id(self, id_: str) -> DriverPage: ... - - def by_name(self, name: str) -> DriverPage: ... - - def by_index(self, index: int) -> DriverPage: ... - - def by_loc(self, loc: Union[str, tuple]) -> DriverPage: ... - - def by_ele(self, ele: Union[DriverElement, WebElement]) -> DriverPage: ... - - -def get_handles(handles: list, num_or_handles: Union[int, str, list, tuple]) -> set: ... diff --git a/DrissionPage/mixpage/mix_page.py b/DrissionPage/mixpage/mix_page.py deleted file mode 100644 index 64312d7..0000000 --- a/DrissionPage/mixpage/mix_page.py +++ /dev/null @@ -1,344 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from .base import BasePage -from .drission import Drission -from .driver_page import DriverPage -from .session_page import SessionPage - - -class MixPage(SessionPage, DriverPage, BasePage): - """MixPage整合了DriverPage和SessionPage,封装了对页面的操作, - 可在selenium(d模式)和requests(s模式)间无缝切换。 - 切换的时候会自动同步cookies。 - 获取信息功能为两种模式共有,操作页面元素功能只有d模式有。 - 调用某种模式独有的功能,会自动切换到该模式。 - """ - - def __init__(self, mode='d', drission=None, timeout=None, driver_options=None, session_options=None): - """初始化函数 - :param mode: 'd' 或 's',即driver模式和session模式 - :param drission: Drission对象,不传入时会自动创建,有传入时driver_options和session_options参数无效 - :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_options: 浏览器设置,没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象,传入False则不创建 - :param session_options: requests设置,没传入drission参数时会用这个设置新建Drission对象中的Session对象,传入False则不创建 - """ - self._mode = mode.lower() - if self._mode not in ('s', 'd'): - raise ValueError('mode参数只能是s或d。') - - super(DriverPage, self).__init__(timeout) - self._driver, self._session = (None, True) if self._mode == 's' else (True, None) - self._drission = drission or Drission(driver_options, session_options) - self._wait_object = None - self._response = None - self._scroll = None - self._download_set = None - self._download_path = None - - if self._mode == 'd': - try: - timeouts = self.drission.driver_options.timeouts - t = timeout if isinstance(timeout, (int, float)) else timeouts['implicit'] - self.set_timeouts(t, timeouts['pageLoad'], timeouts['script']) - - except Exception: - self.timeout = timeout if timeout is not None else 10 - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele = page('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: 子元素对象或属性文本 - """ - if self._mode == 's': - return super().__call__(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).__call__(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def url(self): - """返回当前url""" - if self._mode == 'd': - return self._drission.driver.current_url if self._driver else None - elif self._mode == 's': - return self._session_url - - @property - def title(self): - """返回网页title""" - if self._mode == 's': - return super().title - elif self._mode == 'd': - return super(SessionPage, self).title - - @property - def html(self): - """返回页面html文本""" - if self._mode == 's': - return super().html - elif self._mode == 'd': - return super(SessionPage, self).html - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典""" - if self._mode == 's': - return super().json - elif self._mode == 'd': - return super(SessionPage, self).json - - def get(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): - """跳转到一个url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数,s模式专用 - :return: url是否可用,d模式返回None时表示不确定 - """ - if self._mode == 'd': - return super(SessionPage, self).get(url, show_errmsg, retry, interval) - elif self._mode == 's': - return super().get(url, show_errmsg, retry, interval, **kwargs) - - def ele(self, loc_or_ele, timeout=None): - """返回第一个符合条件的元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: 元素对象或属性、文本节点文本 - """ - if self._mode == 's': - return super().ele(loc_or_ele) - elif self._mode == 'd': - return super(SessionPage, self).ele(loc_or_ele, timeout=timeout) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: 元素对象或属性、文本组成的列表 - """ - if self._mode == 's': - return super().eles(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).eles(loc_or_str, timeout=timeout) - - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - if self._mode == 's': - return super().s_ele(loc_or_ele) - elif self._mode == 'd': - return super(SessionPage, self).s_ele(loc_or_ele) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本组成的列表 - """ - if self._mode == 's': - return super().s_eles(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).s_eles(loc_or_str) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,d模式专用 - :param single: True则返回第一个,False则返回全部 - :return: 元素对象或属性、文本节点文本 - """ - if self._mode == 's': - return super()._ele(loc_or_ele, single=single) - elif self._mode == 'd': - return super(SessionPage, self)._ele(loc_or_ele, timeout=timeout, single=single) - - def get_cookies(self, as_dict=False, all_domains=False): - """返回cookies - :param as_dict: 是否以字典方式返回 - :param all_domains: 是否返回所有域的cookies - :return: cookies信息 - """ - if self._mode == 's': - return super().get_cookies(as_dict, all_domains) - elif self._mode == 'd': - return super(SessionPage, self).get_cookies(as_dict) - - # ----------------MixPage独有属性和方法----------------------- - @property - def drission(self): - """返回当前使用的 Dirssion 对象""" - return self._drission - - @property - def driver(self): - """返回 driver 对象,如没有则创建 - 每次访问时切换到 d 模式,用于独有函数及外部调用 - :return: WebDriver对象 - """ - self.change_mode('d') - return self._drission.driver - - @property - def session(self): - """返回 Session 对象,如没有则创建""" - return self._drission.session - - @property - def response(self): - """返回 s 模式获取到的 Response 对象,切换到 s 模式""" - self.change_mode('s') - return self._response - - @property - def mode(self): - """返回当前模式,'s'或'd' """ - return self._mode - - @property - def _session_url(self): - """返回 session 保存的url""" - return self._response.url if self._response else None - - def change_mode(self, mode=None, go=True, copy_cookies=True): - """切换模式,接收's'或'd',除此以外的字符串会切换为 d 模式 - 切换时会把当前模式的cookies复制到目标模式 - 切换后,如果go是True,调用相应的get函数使访问的页面同步 - 注意:s转d时,若浏览器当前网址域名和s模式不一样,必须会跳转 - :param mode: 模式字符串 - :param go: 是否跳转到原模式的url - :param copy_cookies: 是否复制cookies到目标模式 - """ - if mode is not None and mode.lower() == self._mode: - return - - self._mode = 's' if self._mode == 'd' else 'd' - - # s模式转d模式 - if self._mode == 'd': - self._driver = True - self._url = None if not self._driver else self._drission.driver.current_url - - if self._session_url: - if copy_cookies: - self.cookies_to_driver(self._session_url) - - if go: - self.get(self._session_url) - - # d模式转s模式 - elif self._mode == 's': - self._session = True - self._url = self._session_url - - if self._driver: - if copy_cookies: - self.cookies_to_session() - - if go and self._drission.driver.current_url.startswith('http'): - self.get(self._drission.driver.current_url) - - def set_cookies(self, cookies, refresh=True): - """设置cookies - :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict - :param refresh: 设置cookies后是否刷新页面 - :return: None - """ - if self._mode == 's': - self.drission.set_cookies(cookies, set_session=True) - elif self._mode == 'd': - self.drission.set_cookies(cookies, set_driver=True) - if refresh: - self.refresh() - - def cookies_to_session(self, copy_user_agent=False): - """从driver复制cookies到session - :param copy_user_agent : 是否复制user agent信息 - """ - self._drission.cookies_to_session(copy_user_agent) - - def cookies_to_driver(self, url=None): - """从session复制cookies到driver - chrome需要指定域才能接收cookies - :param url: 目标域 - :return: None - """ - url = url or self._session_url - self._drission.cookies_to_driver(url) - - def check_page(self, by_requests=False): - """d模式时检查网页是否符合预期 - 默认由response状态检查,可重载实现针对性检查 - :param by_requests: 是否用内置response检查 - :return: bool或None,None代表不知道结果 - """ - if self._session_url and self._session_url == self.url: - return self._response.ok - - # 使用requests访问url并判断可用性 - if by_requests: - self.cookies_to_session() - r = self._make_response(self.url, retry=0)[0] - return r.ok if r else False - - def close_driver(self): - """关闭driver及浏览器""" - self._driver = None - self.drission.close_driver(True) - - def close_session(self): - """关闭session""" - self._session = None - self._response = None - self.drission.close_session() - - # ----------------重写SessionPage的函数----------------------- - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url,会切换到s模式 - :param url: 目标url - :param data: post方式时提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - self.change_mode('s', go=False) - return super().post(url, data, show_errmsg, retry, interval, **kwargs) - - @property - def download(self): - """返回下载器对象""" - if self.mode == 'd': - self.cookies_to_session() - return super().download - - def chrome_downloading(self, path=None): - """返回浏览器下载中的文件列表 - :param path: 下载文件夹路径,默认读取配置信息 - :return: 正在下载的文件列表 - """ - try: - path = path or self._drission.driver_options.experimental_options['prefs']['download.default_directory'] - if not path: - raise ValueError('未指定下载路径。') - except Exception: - raise IOError('无法找到下载路径。') - - return super().chrome_downloading(path) - - # ----------------MixPage独有函数----------------------- - def hide_browser(self): - """隐藏浏览器窗口""" - self.drission.hide_browser() - - def show_browser(self): - """显示浏览器窗口""" - self.drission.show_browser() diff --git a/DrissionPage/mixpage/mix_page.pyi b/DrissionPage/mixpage/mix_page.pyi deleted file mode 100644 index beea82e..0000000 --- a/DrissionPage/mixpage/mix_page.pyi +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Tuple, Any - -from DownloadKit import DownloadKit -from requests import Response, Session -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webelement import WebElement - -from .base import BasePage -from DrissionPage.configs.session_options import SessionOptions -from DrissionPage.configs.driver_options import DriverOptions -from .drission import Drission -from .driver_element import DriverElement -from .driver_page import DriverPage -from .session_element import SessionElement -from .session_page import SessionPage - - -class MixPage(SessionPage, DriverPage, BasePage): - - def __init__(self, - mode: str = 'd', - drission: Union[Drission, str] = None, - timeout: float = None, - driver_options: Union[Options, DriverOptions, bool] = None, - session_options: Union[dict, SessionOptions, bool] = None) -> None: - self._mode: str = ... - self._drission: Drission = ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def url(self) -> Union[str, None]: ... - - @property - def title(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> dict: ... - - def get(self, - url: str, - show_errmsg: bool | None = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - data: Union[dict, str, None] = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> Union[bool, None]: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, SessionElement, str]]: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None, single: bool = False) \ - -> Union[DriverElement, SessionElement, str, None, List[Union[SessionElement, str]], List[ - Union[DriverElement, str]]]: ... - - def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: ... - - # ----------------MixPage独有属性和方法----------------------- - @property - def drission(self) -> Drission: ... - - @property - def driver(self) -> WebDriver: ... - - @property - def session(self) -> Session: ... - - @property - def response(self) -> Response: ... - - @property - def mode(self) -> str: ... - - @property - def _session_url(self) -> str: ... - - def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... - - def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict], refresh: bool = True) -> None: ... - - def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... - - def cookies_to_driver(self, url: str = None) -> None: ... - - def check_page(self, by_requests: bool = False) -> Union[bool, None]: ... - - def close_driver(self) -> None: ... - - def close_session(self) -> None: ... - - # ----------------重写SessionPage的函数----------------------- - def post(self, - url: str, - data: Union[dict, str, None] = None, - show_errmsg: bool = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - @property - def download(self) -> DownloadKit: ... - - def chrome_downloading(self, path: str = None) -> list: ... - - # ----------------MixPage独有函数----------------------- - def hide_browser(self) -> None: ... - - def show_browser(self) -> None: ... diff --git a/DrissionPage/mixpage/session_element.py b/DrissionPage/mixpage/session_element.py deleted file mode 100644 index a2eb6d0..0000000 --- a/DrissionPage/mixpage/session_element.py +++ /dev/null @@ -1,357 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from html import unescape -from re import match, DOTALL - -from lxml.etree import tostring -from lxml.html import HtmlElement, fromstring - -from .base import DrissionElement, BasePage, BaseElement -from ..commons.locator import get_loc -from ..commons.web import get_ele_txt, make_absolute_link - - -class SessionElement(DrissionElement): - """session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能""" - - def __init__(self, ele, page=None): - """初始化对象 - :param ele: 被包装的HtmlElement元素 - :param page: 元素所在页面对象,如果是从 html 文本生成的元素,则为 None - """ - super().__init__(page) - self._inner_ele = ele - - @property - def inner_ele(self): - return self._inner_ele - - def __repr__(self): - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self.ele(loc_or_str) - - @property - def tag(self): - """返回元素类型""" - return self._inner_ele.tag - - @property - def html(self): - """返回outerHTML文本""" - html = tostring(self._inner_ele, method="html").decode() - return unescape(html[:html.rfind('>') + 1]) # tostring()会把跟紧元素的文本节点也带上,因此要去掉 - - @property - def inner_html(self): - """返回元素innerHTML文本""" - r = match(r'<.*?>(.*)', self.html, flags=DOTALL) - return '' if not r else r.group(1) - - @property - def attrs(self): - """返回元素所有属性及值""" - return {attr: self.attr(attr) for attr, val in self.inner_ele.items()} - - @property - def text(self): - """返回元素内所有文本""" - return get_ele_txt(self) - - @property - def raw_text(self): - """返回未格式化处理的元素内文本""" - return str(self._inner_ele.text_content()) - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - return super().parent(level_or_loc) - - def prev(self, filter_loc='', index=1, timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - return super().prev(index, filter_loc, timeout) - - def next(self, filter_loc='', index=1, timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 后面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - return super().next(index, filter_loc, timeout) - - def before(self, filter_loc='', index=1, timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - return super().before(index, filter_loc, timeout) - - def after(self, filter_loc='', index=1, timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 后面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - return super().after(index, filter_loc, timeout) - - def prevs(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().prevs(filter_loc, timeout) - - def nexts(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().nexts(filter_loc, timeout) - - def befores(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return super().befores(filter_loc, timeout) - - def afters(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return super().afters(filter_loc, timeout) - - def attr(self, attr): - """返回attribute属性值 - :param attr: 属性名 - :return: 属性值文本,没有该属性返回None - """ - # 获取href属性时返回绝对url - if attr == 'href': - link = self.inner_ele.get('href') - # 若为链接为None、js或邮件,直接返回 - if not link or link.lower().startswith(('javascript:', 'mailto:')): - return link - - else: # 其它情况直接返回绝对url - return make_absolute_link(link, self.page) - - elif attr == 'src': - return make_absolute_link(self.inner_ele.get('src'), self.page) - - elif attr == 'text': - return self.text - - elif attr == 'innerText': - return self.raw_text - - elif attr in ('html', 'outerHTML'): - return self.html - - elif attr == 'innerHTML': - return self.inner_html - - else: - return self.inner_ele.get(attr) - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def s_ele(self, loc_or_str=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str) - - def s_eles(self, loc_or_str): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :return: SessionElement对象 - """ - return make_session_ele(self, loc_or_str, single) - - def _get_ele_path(self, mode): - """获取css路径或xpath路径 - :param mode: 'css' 或 'xpath' - :return: css路径或xpath路径 - """ - path_str = '' - ele = self - - while ele: - if mode == 'css': - brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) - path_str = f'>:nth-child({brothers + 1}){path_str}' - else: - brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) - path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' - - ele = ele.parent() - - return f':root{path_str[1:]}' if mode == 'css' else path_str - - -def make_session_ele(html_or_ele, loc=None, single=True): - """从接收到的对象或html文本中查找元素,返回SessionElement对象 - 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 - :param html_or_ele: html文本、BaseParser对象 - :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 - :param single: True则返回第一个,False则返回全部 - :return: 返回SessionElement元素或列表,或属性文本 - """ - # ---------------处理定位符--------------- - if not loc: - if isinstance(html_or_ele, SessionElement): - return html_or_ele if single else [html_or_ele] - - loc = ('xpath', '.') - - elif isinstance(loc, (str, tuple)): - loc = get_loc(loc) - - else: - raise ValueError("定位符必须为str或长度为2的tuple。") - - # ---------------根据传入对象类型获取页面对象和lxml元素对象--------------- - the_type = str(type(html_or_ele)) - # SessionElement - if the_type.endswith(".SessionElement'>"): - page = html_or_ele.page - - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc[1]}' - html_or_ele = html_or_ele.inner_ele - - # 若css以>开头,表示找元素的直接子元素,要用page以绝对路径才能找到 - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): - loc_str = f'{html_or_ele.css_path}{loc[1]}' - if html_or_ele.page: - html_or_ele = fromstring(html_or_ele.page.html) - else: # 接收html文本,无page的情况 - html_or_ele = fromstring(html_or_ele('xpath:/ancestor::*').html) - - else: - html_or_ele = html_or_ele.inner_ele - - loc = loc[0], loc_str - - # ChromiumElement, DriverElement - elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc[1]}' - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): - loc_str = f'{html_or_ele.css_path}{loc[1]}' - loc = loc[0], loc_str - - # 获取整个页面html再定位到当前元素,以实现查找上级元素 - page = html_or_ele.page - xpath = html_or_ele.xpath - if hasattr(html_or_ele, 'doc_id'): # ChromiumElement,兼容传入的元素在iframe内的情况 - html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele.doc_id)['outerHTML'] - else: - html = html_or_ele.page.html - html_or_ele = fromstring(html) - html_or_ele = html_or_ele.xpath(xpath)[0] - - # 各种页面对象 - elif isinstance(html_or_ele, BasePage): - page = html_or_ele - html_or_ele = fromstring(html_or_ele.html) - - # 直接传入html文本 - elif isinstance(html_or_ele, str): - page = None - html_or_ele = fromstring(html_or_ele) - - # ShadowRootElement, ChromiumShadowRoot, ChromiumFrame - elif isinstance(html_or_ele, BaseElement) or the_type.endswith(".ChromiumFrame'>"): - page = html_or_ele.page - html_or_ele = fromstring(html_or_ele.html) - - else: - raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') - - # ---------------执行查找----------------- - try: - if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 - ele = html_or_ele.xpath(loc[1]) - else: # 用css selector获取元素对象列表 - ele = html_or_ele.cssselect(loc[1]) - - if not isinstance(ele, list): # 结果不是列表,如数字 - return ele - - # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 - if single: - ele = ele[0] if ele else None - if isinstance(ele, HtmlElement): - return SessionElement(ele, page) - elif isinstance(ele, str): - return ele - else: - return None - - else: # 返回全部 - return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] - - except Exception as e: - if 'Invalid expression' in str(e): - raise SyntaxError(f'无效的xpath语句:{loc}') - elif 'Expected selector' in str(e): - raise SyntaxError(f'无效的css select语句:{loc}') - - raise e diff --git a/DrissionPage/mixpage/session_element.pyi b/DrissionPage/mixpage/session_element.pyi deleted file mode 100644 index 69dcb35..0000000 --- a/DrissionPage/mixpage/session_element.pyi +++ /dev/null @@ -1,114 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Tuple - -from lxml.html import HtmlElement - -from .base import DrissionElement, BaseElement -from .driver_element import DriverElement -from .driver_page import DriverPage -from .session_page import SessionPage - - -class SessionElement(DrissionElement): - - def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None): - self._inner_ele: HtmlElement = ... - self.page: SessionPage = ... - - @property - def inner_ele(self) -> HtmlElement: ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... - - def prev(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def next(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def before(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def after(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def attr(self, attr: str) -> Union[str, None]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def s_ele(self, - loc_or_str: Union[Tuple[str, str], str] = None) -> Union['SessionElement', str, None]: ... - - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[Union['SessionElement', str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True, - relative: bool = False) -> Union['SessionElement', str, None, List[Union['SessionElement', str]]]: ... - - def _get_ele_path(self, mode: str) -> str: ... - - -def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, DriverElement, BaseElement, DriverPage], - loc: Union[str, Tuple[str, str]] = None, - single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... diff --git a/DrissionPage/mixpage/session_page.py b/DrissionPage/mixpage/session_page.py deleted file mode 100644 index b866346..0000000 --- a/DrissionPage/mixpage/session_page.py +++ /dev/null @@ -1,533 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from re import search -from time import sleep -from urllib.parse import urlparse - -from DownloadKit import DownloadKit -from requests import Session, Response -from requests.structures import CaseInsensitiveDict -from tldextract import extract - -from .base import BasePage -from .session_element import SessionElement, make_session_ele -from ..commons.web import cookie_to_dict, set_session_cookies -from ..configs.session_options import SessionOptions - - -class SessionPage(BasePage): - """SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页""" - - def __init__(self, session_or_options=None, timeout=None): - """ - :param session_or_options: Session对象或SessionOptions对象 - :param timeout: 连接超时时间,为None时从ini文件读取 - """ - self._response = None - self._download_set = None - self._session = None - self._set = None - self._set_start_options(session_or_options, None) - self._set_runtime_settings() - self._create_session() - timeout = timeout if timeout is not None else self.timeout - super().__init__(timeout) - - def _set_start_options(self, session_or_options, none): - """启动配置 - :param session_or_options: Session、SessionOptions - :param none: 用于后代继承 - :return: None - """ - if not session_or_options or isinstance(session_or_options, SessionOptions): - self._session_options = session_or_options or SessionOptions(session_or_options) - - elif isinstance(session_or_options, Session): - self._session_options = SessionOptions() - self._session = session_or_options - - def _set_runtime_settings(self): - """设置运行时用到的属性""" - self._timeout = self._session_options.timeout - self._download_path = self._session_options.download_path - - def _create_session(self): - """创建内建Session对象""" - if not self._session: - self._set_session(self._session_options) - - def _set_session(self, opt): - """根据传入字典对session进行设置 - :param opt: session配置字典 - :return: None - """ - self._session = Session() - - if opt.headers: - self._session.headers = CaseInsensitiveDict(opt.headers) - if opt.cookies: - self.set.cookies(opt.cookies) - if opt.adapters: - for url, adapter in opt.adapters: - self._session.mount(url, adapter) - - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] - for i in attrs: - attr = opt.__getattribute__(i) - if attr: - self._session.__setattr__(i, attr) - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性文本 - """ - return self.ele(loc_or_str) - - # -----------------共有属性和方法------------------- - @property - def title(self): - """返回网页title""" - ele = self.ele('xpath://title') - return ele.text if ele else None - - @property - def url(self): - """返回当前访问url""" - return self._url - - @property - def html(self): - """返回页面的html文本""" - return self.response.text if self.response else '' - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" - try: - return self.response.json() - except Exception: - return None - - @property - def download_path(self): - """返回下载路径""" - return self._download_path - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = DownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set.DownloadKit - - @property - def session(self): - """返回session对象""" - return self._session - - @property - def response(self): - """返回访问url得到的response对象""" - return self._response - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = SessionPageSetter(self) - return self._set - - def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): - """用get方式跳转到url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs) - - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_ele) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def s_ele(self, loc_or_ele=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele) - - def s_eles(self, loc_or_str): - """返回页面中符合条件的所有元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str, single=False) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 - :return: SessionElement对象 - """ - return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single) - - def get_cookies(self, as_dict=False, all_domains=False): - """返回cookies - :param as_dict: 是否以字典方式返回 - :param all_domains: 是否返回所有域的cookies - :return: cookies信息 - """ - if all_domains: - cookies = self.session.cookies - else: - if self.url: - url = extract(self.url) - domain = f'{url.domain}.{url.suffix}' - cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') - else: - cookies = tuple(x for x in self.session.cookies) - - if as_dict: - return {x.name: x.value for x in cookies} - else: - return [cookie_to_dict(cookie) for cookie in cookies] - - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url - :param url: 目标url - :param data: 提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs) - - def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """执行get或post连接 - :param url: 目标url - :param mode: 'get' 或 'post' - :param data: 提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - retry, interval = self._before_connect(url, retry, interval) - self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs) - - if self._response is None: - self._url_available = False - - else: - if self._response.ok: - self._url_available = True - - else: - if show_errmsg: - raise ConnectionError(f'状态码:{self._response.status_code}.') - self._url_available = False - - return self._url_available - - def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs): - """生成Response对象 - :param url: 目标url - :param mode: 'get' 或 'post' - :param data: post方式要提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param kwargs: 其它参数 - :return: tuple,第一位为Response或None,第二位为出错信息或'Success' - """ - kwargs = CaseInsensitiveDict(kwargs) - if 'headers' not in kwargs: - kwargs['headers'] = {} - else: - kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) - - # 设置referer和host值 - parsed_url = urlparse(url) - hostname = parsed_url.hostname - scheme = parsed_url.scheme - if not check_headers(kwargs, self.session.headers, 'Referer'): - kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' - if 'Host' not in kwargs['headers']: - kwargs['headers']['Host'] = hostname - - if not check_headers(kwargs, self.session.headers, 'timeout'): - kwargs['timeout'] = self.timeout - - if 'allow_redirects' not in kwargs: - kwargs['allow_redirects'] = False - - r = err = None - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - for i in range(retry + 1): - try: - if mode == 'get': - r = self.session.get(url, **kwargs) - elif mode == 'post': - r = self.session.post(url, data=data, **kwargs) - - if r: - return set_charset(r), 'Success' - - except Exception as e: - err = e - - # if r and r.status_code in (403, 404): - # break - - if i < retry: - sleep(interval) - if show_errmsg: - print(f'重试 {url}') - - if r is None: - if show_errmsg: - if err: - raise err - else: - raise ConnectionError('连接失败') - return None, '连接失败' if err is None else err - - if not r.ok: - if show_errmsg: - raise ConnectionError(f'状态码:{r.status_code}') - return r, f'状态码:{r.status_code}' - - -class SessionPageSetter(object): - def __init__(self, page): - self._page = page - - def timeout(self, second): - """设置连接超时时间 - :param second: 秒数 - :return: None - """ - self._page.timeout = second - - def cookies(self, cookies): - """为Session对象设置cookies - :param cookies: cookies信息 - :return: None - """ - set_session_cookies(self._page.session, cookies) - - def headers(self, headers): - """设置通用的headers - :param headers: dict形式的headers - :return: None - """ - self._page.session.headers = CaseInsensitiveDict(headers) - - def header(self, attr, value): - """设置headers中一个项 - :param attr: 设置名称 - :param value: 设置值 - :return: None - """ - self._page.session.headers[attr.lower()] = value - - def user_agent(self, ua): - """设置user agent - :param ua: user agent - :return: None - """ - self._page.session.headers['user-agent'] = ua - - def proxies(self, http, https=None): - """设置proxies参数 - :param http: http代理地址 - :param https: https代理地址 - :return: None - """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._page.session.proxies = proxies - - def auth(self, auth): - """设置认证元组或对象 - :param auth: 认证元组或对象 - :return: None - """ - self._page.session.auth = auth - - def hooks(self, hooks): - """设置回调方法 - :param hooks: 回调方法 - :return: None - """ - self._page.session.hooks = hooks - - def params(self, params): - """设置查询参数字典 - :param params: 查询参数字典 - :return: None - """ - self._page.session.params = params - - def verify(self, on_off): - """设置是否验证SSL证书 - :param on_off: 是否验证 SSL 证书 - :return: None - """ - self._page.session.verify = on_off - - def cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 - :param cert: 证书路径或元组 - :return: None - """ - self._page.session.cert = cert - - def stream(self, on_off): - """设置是否使用流式响应内容 - :param on_off: 是否使用流式响应内容 - :return: None - """ - self._page.session.stream = on_off - - def trust_env(self, on_off): - """设置是否信任环境 - :param on_off: 是否信任环境 - :return: None - """ - self._page.session.trust_env = on_off - - def max_redirects(self, times): - """设置最大重定向次数 - :param times: 最大重定向次数 - :return: None - """ - self._page.session.max_redirects = times - - def add_adapter(self, url, adapter): - """添加适配器 - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: None - """ - self._page.session.mount(url, adapter) - - -class DownloadSetter(object): - """用于设置下载参数的类""" - - def __init__(self, page): - self._page = page - self._DownloadKit = None - - @property - def DownloadKit(self): - if self._DownloadKit is None: - self._DownloadKit = DownloadKit(session=self._page.session, goal_path=self._page.download_path) - return self._DownloadKit - - @property - def if_file_exists(self): - """返回用于设置存在同名文件时处理方法的对象""" - return FileExists(self) - - def split(self, on_off): - """设置是否允许拆分大文件用多线程下载 - :param on_off: 是否启用多线程下载大文件 - :return: None - """ - self.DownloadKit.split = on_off - - def save_path(self, path): - """设置下载保存路径 - :param path: 下载保存路径 - :return: None - """ - path = path if path is None else str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - -class FileExists(object): - """用于设置存在同名文件时处理方法""" - - def __init__(self, setter): - """ - :param setter: DownloadSetter对象 - """ - self._setter = setter - - def __call__(self, mode): - if mode not in ('skip', 'rename', 'overwrite'): - raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") - self._setter.DownloadKit.file_exists = mode - - def skip(self): - """设为跳过""" - self._setter.DownloadKit.file_exists = 'skip' - - def rename(self): - """设为重命名,文件名后加序号""" - self._setter.DownloadKit._file_exists = 'rename' - - def overwrite(self): - """设为覆盖""" - self._setter.DownloadKit._file_exists = 'overwrite' - - -def check_headers(kwargs, headers, arg) -> bool: - """检查kwargs或headers中是否有arg所示属性""" - return arg in kwargs['headers'] or arg in headers - - -def set_charset(response) -> Response: - """设置Response对象的编码""" - # 在headers中获取编码 - content_type = response.headers.get('content-type', '').lower() - charset = search(r'charset[=: ]*(.*)?;', content_type) - - if charset: - response.encoding = charset.group(1) - - # 在headers中获取不到编码,且如果是网页 - elif content_type.replace(' ', '').startswith('text/html'): - re_result = search(b']+).*?>', response.content) - - if re_result: - charset = re_result.group(1).decode() - else: - charset = response.apparent_encoding - - response.encoding = charset - - return response diff --git a/DrissionPage/mixpage/session_page.pyi b/DrissionPage/mixpage/session_page.pyi deleted file mode 100644 index 95011aa..0000000 --- a/DrissionPage/mixpage/session_page.pyi +++ /dev/null @@ -1,237 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Any, Union, Tuple, List - -from DownloadKit import DownloadKit -from requests import Session, Response -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict - -from .base import BasePage -from DrissionPage.configs.session_options import SessionOptions -from .session_element import SessionElement - - -class SessionPage(BasePage): - def __init__(self, - session_or_options: Union[Session, SessionOptions] = None, - timeout: float = None): - self._session: Session = ... - self._session_options: SessionOptions = ... - self._url: str = ... - self._response: Response = ... - self._download_path: str = ... - self._download_set: DownloadSetter = ... - self._url_available: bool = ... - self.timeout: float = ... - self.retry_times: int = ... - self.retry_interval: float = ... - self._set: SessionPageSetter = ... - - def _set_start_options(self, session_or_options, none) -> None: ... - - def _create_session(self) -> None: ... - - def _set_session(self, opt: SessionOptions) -> None: ... - - def _set_runtime_settings(self) -> None: ... - - def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def set_headers(self, headers: dict) -> None: ... - - def set_user_agent(self, ua: str) -> None: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def title(self) -> str: ... - - @property - def url(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> Union[dict, None]: ... - - @property - def download_path(self) -> str: ... - - @property - def download_set(self) -> DownloadSetter: ... - - def get(self, - url: str, - show_errmsg: bool | None = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = None, - params: dict | None = ..., - data: Union[dict, str, None] = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[SessionElement, str]]: ... - - def s_ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None, - single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... - - def get_cookies(self, - as_dict: bool = False, - all_domains: bool = False) -> Union[dict, list]: ... - - # ----------------session独有属性和方法----------------------- - @property - def session(self) -> Session: ... - - @property - def response(self) -> Response: ... - - @property - def set(self) -> SessionPageSetter: ... - - @property - def download(self) -> DownloadKit: ... - - def post(self, - url: str, - data: Union[dict, str, None] = ..., - show_errmsg: bool = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - def _s_connect(self, - url: str, - mode: str, - data: Union[dict, str, None] = None, - show_errmsg: bool = False, - retry: int = None, - interval: float = None, - **kwargs) -> bool: ... - - def _make_response(self, - url: str, - mode: str = 'get', - data: Union[dict, str] = None, - retry: int = None, - interval: float = None, - show_errmsg: bool = False, - **kwargs) -> tuple: ... - - -class SessionPageSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - - def timeout(self, second: float) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def header(self, attr: str, value: str) -> None: ... - - def user_agent(self, ua: str) -> None: ... - - def proxies(self, http, https=None) -> None: ... - - def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... - - def hooks(self, hooks: Union[dict, None]) -> None: ... - - def params(self, params: Union[dict, None]) -> None: ... - - def verify(self, on_off: Union[bool, None]) -> None: ... - - def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... - - def stream(self, on_off: Union[bool, None]) -> None: ... - - def trust_env(self, on_off: Union[bool, None]) -> None: ... - - def max_redirects(self, times: Union[int, None]) -> None: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... - - -class DownloadSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - self._DownloadKit: DownloadKit = ... - - @property - def DownloadKit(self) -> DownloadKit: ... - - @property - def if_file_exists(self) -> FileExists: ... - - def split(self, on_off: bool) -> None: ... - - def save_path(self, path: Union[str, Path]): ... - - -class FileExists(object): - def __init__(self, setter: DownloadSetter): - self._setter: DownloadSetter = ... - - def __call__(self, mode: str) -> None: ... - - def skip(self) -> None: ... - - def rename(self) -> None: ... - - def overwrite(self) -> None: ... - - -def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], - arg: str) -> bool: ... - - -def set_charset(response: Response) -> Response: ... diff --git a/DrissionPage/mixpage/shadow_root_element.py b/DrissionPage/mixpage/shadow_root_element.py deleted file mode 100644 index 227d39e..0000000 --- a/DrissionPage/mixpage/shadow_root_element.py +++ /dev/null @@ -1,219 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from time import perf_counter -from typing import Union - -from selenium.webdriver.remote.webelement import WebElement - -from .base import BaseElement -from .driver_element import make_driver_ele -from .session_element import make_session_ele, SessionElement -from ..commons.locator import get_loc - - -class ShadowRootElement(BaseElement): - """ShadowRootElement是用于处理ShadowRoot的类,使用方法和DriverElement基本一致""" - - def __init__(self, inner_ele, parent_ele): - super().__init__(parent_ele.page) - self.parent_ele = parent_ele - self._inner_ele = inner_ele - - @property - def inner_ele(self): - return self._inner_ele - - def __repr__(self): - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - @property - def tag(self): - """元素标签名""" - return 'shadow-root' - - @property - def html(self): - return f'{self.inner_html}' - - @property - def inner_html(self): - """返回内部的html文本""" - shadow_root = WebElement(self.page.driver, self.inner_ele._id) - return shadow_root.get_attribute('innerHTML') - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: DriverElement对象 - """ - if isinstance(level_or_loc, int): - loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' - - elif isinstance(level_or_loc, (tuple, str)): - loc = get_loc(level_or_loc, True) - - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' - - else: - raise TypeError('level_or_loc参数只能是tuple、int或str。') - - return self.parent_ele.ele(loc, timeout=0) - - def next(self, index=1, filter_loc=''): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: DriverElement对象 - """ - nodes = self.nexts(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def before(self, index=1, filter_loc=''): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素前面的某个元素或节点 - """ - nodes = self.befores(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def after(self, index=1, filter_loc=''): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素后面的某个元素或节点 - """ - nodes = self.afters(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def nexts(self, filter_loc=''): - """返回后面所有兄弟元素或节点组成的列表 - :param filter_loc: 用于筛选的查询语法 - :return: DriverElement对象组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./{loc}' - return self.parent_ele.eles(xpath, timeout=0.1) - - def befores(self, filter_loc=''): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素前面的元素或节点组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./preceding::{loc}' - return self.parent_ele.eles(xpath, timeout=0.1) - - def afters(self, filter_loc=''): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素后面的元素或节点组成的列表 - """ - eles1 = self.nexts(filter_loc) - loc = get_loc(filter_loc, True)[1].lstrip('./') - xpath = f'xpath:./following::{loc}' - return eles1 + self.parent_ele.eles(xpath, timeout=0.1) - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素,默认返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本 - """ - return self._ele(loc_or_str, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def s_ele(self, loc_or_str=None) -> Union[SessionElement, str, None]: - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_str) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - """返回当前元素下级符合条件的子元素,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :return: DriverElement对象 - """ - # 先转换为sessionElement,再获取所有元素,获取它们的css selector路径,再用路径在页面上执行查找 - loc = get_loc(loc_or_str) - if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): - loc = loc[0], loc[1][5:] - - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - eles = make_session_ele(self.html).eles(loc) - while not eles and perf_counter() - t1 <= timeout: - eles = make_session_ele(self.html).eles(loc) - - if not eles: - return None if single else eles - - css_paths = [i.css_path[47:] for i in eles] - - if single: - return make_driver_ele(self, f'css:{css_paths[0]}', single, timeout) - else: - return [make_driver_ele(self, f'css:{css}', True, timeout) for css in css_paths] - - def run_script(self, script, *args): - """执行js代码,传入自己为第一个参数 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - shadow_root = WebElement(self.page.driver, self.inner_ele._id) - return shadow_root.parent.execute_script(script, shadow_root, *args) - - def is_enabled(self): - """是否可用""" - return self.inner_ele.is_enabled() - - def is_valid(self): - """用于判断元素是否还能用,应对页面跳转元素不能用的情况""" - try: - self.is_enabled() - return True - - except Exception: - return False diff --git a/DrissionPage/mixpage/shadow_root_element.pyi b/DrissionPage/mixpage/shadow_root_element.pyi deleted file mode 100644 index 8647391..0000000 --- a/DrissionPage/mixpage/shadow_root_element.pyi +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, Any, Tuple, List - -from selenium.webdriver.remote.webelement import WebElement - -from .driver_page import DriverPage -from .mix_page import MixPage -from .base import BaseElement -from .driver_element import DriverElement -from .session_element import SessionElement - - -class ShadowRootElement(BaseElement): - - def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): - self._inner_ele: WebElement = ... - self.parent_ele: DriverElement = ... - self.page: Union[MixPage, DriverPage] = ... - - @property - def inner_ele(self) -> WebElement: ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - def parent(self, level_or_loc: Union[str, int] = 1) -> DriverElement: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, str]]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = ..., - single: bool = ..., - relative: bool = ...) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - def run_script(self, script: str, *args) -> Any: ... - - def is_enabled(self) -> bool: ... - - def is_valid(self) -> bool: ...