From 07d023daad1681ff86ddb59f3a545ce136931b71 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 28 Jun 2023 14:51:12 +0800 Subject: [PATCH 01/13] =?UTF-8?q?=E6=96=B0=E5=BB=BAdev=E5=88=86=E6=94=AF?= =?UTF-8?q?=E7=BB=A7=E7=BB=AD3.3=EF=BC=9B=E7=9B=B8=E5=AF=B9=E5=AE=9A?= =?UTF-8?q?=E4=BD=8D=E7=AC=AC=E4=B8=80=E4=B8=AA=E5=8F=82=E6=95=B0=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=95=B0=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/__init__.py | 8 - DrissionPage/base.py | 69 +++- DrissionPage/base.pyi | 39 +- DrissionPage/chromium_base.py | 240 ++++------- DrissionPage/chromium_base.pyi | 78 ++-- DrissionPage/chromium_driver.py | 35 +- DrissionPage/chromium_element.py | 45 ++- DrissionPage/chromium_element.pyi | 18 +- DrissionPage/chromium_frame.py | 13 +- DrissionPage/chromium_frame.pyi | 12 +- DrissionPage/chromium_page.py | 485 +++++++++++++++-------- DrissionPage/chromium_page.pyi | 79 ++-- DrissionPage/chromium_tab.py | 74 +--- DrissionPage/chromium_tab.pyi | 28 +- DrissionPage/common.pyi | 7 + DrissionPage/commons/browser.py | 23 +- DrissionPage/commons/browser.pyi | 9 +- DrissionPage/commons/tools.py | 87 ++-- DrissionPage/commons/tools.pyi | 7 +- DrissionPage/commons/web.py | 114 +----- DrissionPage/commons/web.pyi | 61 --- DrissionPage/configs/chromium_options.py | 4 +- DrissionPage/configs/configs.ini | 3 +- DrissionPage/configs/session_options.py | 9 +- DrissionPage/easy_set.py | 163 +------- DrissionPage/easy_set.pyi | 18 +- DrissionPage/errors.py | 6 +- DrissionPage/network_listener.py | 325 +++++++++++++++ DrissionPage/network_listener.pyi | 140 +++++++ DrissionPage/session_element.py | 24 +- DrissionPage/session_element.pyi | 18 +- DrissionPage/session_page.py | 99 +---- DrissionPage/session_page.pyi | 53 +-- DrissionPage/web_page.py | 130 +----- DrissionPage/web_page.pyi | 43 +- 35 files changed, 1278 insertions(+), 1288 deletions(-) create mode 100644 DrissionPage/common.pyi create mode 100644 DrissionPage/network_listener.py create mode 100644 DrissionPage/network_listener.pyi diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 335c6a4..1f553d7 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -11,11 +11,3 @@ from .web_page import WebPage # 启动配置类 from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions - -# 旧版页面类和启动配置类 -try: - from .mixpage.mix_page import MixPage - from .mixpage.drission import Drission - from .configs.driver_options import DriverOptions -except ModuleNotFoundError: - pass diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 58ff3f3..34044c3 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -4,9 +4,12 @@ @Contact : g1879@qq.com """ from abc import abstractmethod +from pathlib import Path from re import sub from urllib.parse import quote +from DownloadKit import DownloadKit + from .commons.constants import Settings, NoneElement from .commons.locator import get_loc from .commons.web import format_html @@ -58,12 +61,6 @@ class BaseElement(BaseParser): def parent(self, level_or_loc=1): pass - def prev(self, index=1): - return None # ShadowRootElement直接继承 - - def prevs(self) -> None: - return None # ShadowRootElement直接继承 - def next(self, index=1): pass @@ -84,7 +81,7 @@ class BaseElement(BaseParser): class DrissionElement(BaseElement): - """DriverElement、ChromiumElement 和 SessionElement的基类 + """ChromiumElement 和 SessionElement的基类 但不是ShadowRootElement的基类""" @property @@ -119,9 +116,10 @@ class DrissionElement(BaseElement): return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ if isinstance(level_or_loc, int): @@ -133,21 +131,24 @@ class DrissionElement(BaseElement): if loc[0] == 'css selector': raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' + loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]' else: raise TypeError('level_or_loc参数只能是tuple、int或str。') return self._ele(loc, timeout=0, relative=True, raise_err=False) - def child(self, index=1, filter_loc='', timeout=None, ele_only=True): + def child(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回直接子元素元素或节点组成的列表,可用查询语法筛选 - :param index: 第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 + :param index: 第几个查询结果,1开始 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) if not nodes: if Settings.raise_ele_not_found: @@ -163,14 +164,17 @@ class DrissionElement(BaseElement): else: return NoneElement() - def prev(self, index=1, filter_loc='', timeout=0, ele_only=True): + def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] @@ -179,14 +183,17 @@ class DrissionElement(BaseElement): else: return NoneElement() - def next(self, index=1, filter_loc='', timeout=0, ele_only=True): + def next(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 + :param index: 后面第几个查询结果,1开始 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) if nodes: return nodes[0] @@ -195,14 +202,17 @@ class DrissionElement(BaseElement): else: return NoneElement() - def before(self, index=1, filter_loc='', timeout=None, ele_only=True): + def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 + :param index: 前面第几个查询结果,1开始 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] @@ -211,14 +221,17 @@ class DrissionElement(BaseElement): else: return NoneElement() - def after(self, index=1, filter_loc='', timeout=None, ele_only=True): + def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果,1开始 :param filter_loc: 用于筛选的查询语法 + :param index: 后面第几个查询结果,1开始 :param timeout: 查找节点的超时时间 :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ + if isinstance(filter_loc, int): + index = filter_loc + filter_loc = '' nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) if nodes: return nodes[0] @@ -292,7 +305,7 @@ class DrissionElement(BaseElement): :param direction: 'following' 或 'preceding',查找的方向 :param brother: 查找范围,在同级查找还是整个dom前后查找 :param timeout: 查找等待时间 - :return: DriverElement对象或字符串 + :return: 元素对象或字符串 """ if index is not None and index < 1: raise ValueError('index必须大于等于1。') @@ -353,6 +366,8 @@ class BasePage(BaseParser): self.retry_times = 3 self.retry_interval = 2 self._url_available = None + self._download_path = '' + self._DownloadKit = None @property def title(self): @@ -380,6 +395,18 @@ class BasePage(BaseParser): """返回当前访问的url有效性""" return self._url_available + @property + def download_path(self): + """返回默认下载路径""" + return str(Path(self._download_path).absolute()) + + @property + def download(self): + """返回下载器对象""" + if self._DownloadKit is None: + self._DownloadKit = DownloadKit(session=self, goal_path=self.download_path) + return self._DownloadKit + def _before_connect(self, url, retry, interval): """连接前的准备 :param url: 要访问的url @@ -387,7 +414,7 @@ class BasePage(BaseParser): :param interval: 重试间隔 :return: 重试次数和间隔组成的tuple """ - self._url = quote(url, safe='/:&?=%;#@+!') + self._url = quote(url, safe='/:&?=%;#@+![]') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval diff --git a/DrissionPage/base.pyi b/DrissionPage/base.pyi index 690241d..eda767f 100644 --- a/DrissionPage/base.pyi +++ b/DrissionPage/base.pyi @@ -6,6 +6,8 @@ from abc import abstractmethod from typing import Union, Tuple, List +from DownloadKit import DownloadKit + from .commons.constants import NoneElement @@ -78,30 +80,35 @@ class DrissionElement(BaseElement): def texts(self, text_node_only: bool = False) -> list: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[DrissionElement, None]: ... - def child(self, index: int = 1, - filter_loc: Union[tuple, str] = '', + def child(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def prev(self, index: int = 1, - filter_loc: Union[tuple, str] = '', + def prev(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def next(self, index: int = 1, - filter_loc: Union[tuple, str] = '', + def next(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def before(self, index: int = 1, - filter_loc: Union[tuple, str] = '', + def before(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... - def after(self, index: int = 1, - filter_loc: Union[tuple, str] = '', + def after(self, + filter_loc: Union[tuple, str, int] = '', + index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: ... @@ -154,7 +161,9 @@ class BasePage(BaseParser): self._url_available: bool = ... self.retry_times: int = ... self.retry_interval: float = ... - self._timeout = float = ... + self._timeout: float = ... + self._download_path: str = ... + self._DownloadKit: DownloadKit = ... @property def title(self) -> Union[str, None]: ... @@ -171,6 +180,12 @@ class BasePage(BaseParser): @property def url_available(self) -> bool: ... + @property + def download_path(self) -> str: ... + + @property + def download(self) -> DownloadKit: ... + def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... # ----------------以下属性或方法由后代实现---------------- diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index b87fb7a..e972da5 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -7,7 +7,6 @@ from base64 import b64decode from json import loads, JSONDecodeError from os import sep from pathlib import Path -from re import search from threading import Thread from time import perf_counter, sleep, time @@ -19,9 +18,10 @@ from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chro from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder -from .commons.web import set_browser_cookies, ResponseData -from .errors import ContextLossError, ElementLossError, AlertExistsError, CallMethodError, TabClosedError, \ - NoRectError, BrowserConnectError +from .commons.web import set_browser_cookies +from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ + NoRectError, BrowserConnectError, GetDocumentError +from .network_listener import NetworkListener from .session_element import make_session_ele @@ -41,6 +41,7 @@ class ChromiumBase(BasePage): self._tab_obj = None self._set = None self._screencast = None + self._listener = None if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): address = f'127.0.0.1:{address}' @@ -70,7 +71,9 @@ class ChromiumBase(BasePage): """ self._chromium_init() if not tab_id: - json = self._control_session.get(f'http://{self.address}/json').json() + u = f'http://{self.address}/json' + json = self._control_session.get(u).json() + self._control_session.get(u, headers={'Connection': 'close'}) tab_id = [i['id'] for i in json if i['type'] == 'page'] if not tab_id: raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') @@ -83,6 +86,7 @@ class ChromiumBase(BasePage): """浏览器初始设置""" self._control_session = Session() self._control_session.keep_alive = False + self._control_session.proxies = {'http': None, 'https': None} self._first_run = True self._is_reading = False self._upload_list = None @@ -131,7 +135,8 @@ class ChromiumBase(BasePage): self._debug_recorder.add_data((perf_counter(), '信息', f'root_id:{self._root_id}')) break - except Exception: + except CDPError as e: + err = e if self._debug: print('重试获取document') if self._debug_recorder: @@ -140,7 +145,9 @@ class ChromiumBase(BasePage): sleep(.1) else: - raise RuntimeError('获取document失败。') + txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \ + f'报告网址:https://gitee.com/g1879/DrissionPage/issues' + raise GetDocumentError(txt) if self._debug: print('获取document结束') @@ -325,6 +332,11 @@ class ChromiumBase(BasePage): """返回页面加载策略,有3种:'none'、'normal'、'eager'""" return self._page_load_strategy + @property + def user_agent(self): + """返回user agent""" + return self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + @property def scroll(self): """返回用于滚动滚动条的对象""" @@ -364,6 +376,13 @@ class ChromiumBase(BasePage): self._screencast = Screencast(self) return self._screencast + @property + def listener(self): + """返回用于聆听数据包的对象""" + if self._listener is None: + self._listener = NetworkListener(self) + return self._listener + def run_cdp(self, cmd, **cmd_args): """执行Chrome DevTools Protocol语句 :param cmd: 协议项目 @@ -391,7 +410,7 @@ class ChromiumBase(BasePage): elif error in ('Node does not have a layout object', 'Could not compute box model.'): raise NoRectError elif r['type'] == 'call_method_error': - raise CallMethodError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}') + raise CDPError(f'\n错误:{r["error"]}\nmethod:{r["method"]}\nargs:{r["args"]}') else: raise RuntimeError(r) @@ -542,9 +561,12 @@ class ChromiumBase(BasePage): if ok: try: if single: - return make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) + r = make_chromium_ele(self, node_id=nodeIds['nodeIds'][0]) + break + else: - return [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] + r = [make_chromium_ele(self, node_id=i) for i in nodeIds['nodeIds']] + break except ElementLossError: ok = False @@ -560,6 +582,12 @@ class ChromiumBase(BasePage): sleep(.1) + try: + self.run_cdp('DOM.discardSearchResults', searchId=search_result['searchId']) + except: + pass + return r + def refresh(self, ignore_cache=False): """刷新当前页面 :param ignore_cache: 是否忽略缓存 @@ -784,7 +812,7 @@ class ChromiumBase(BasePage): while self.ready_state not in ('complete', None): sleep(.1) if self._debug or show_errmsg: - print(f'重试 {to_url}') + print(f'重试{t + 1} {to_url}') if err: if show_errmsg: @@ -928,8 +956,18 @@ class ChromiumBaseSetter(object): js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' return self._page.run_js_loaded(js, as_expr=True) + def cookie(self, cookie): + """设置单个cookie + :param cookie: cookie信息 + :return: None + """ + if isinstance(cookie, str): + self.cookies(cookie) + else: + self.cookies([cookie]) + def cookies(self, cookies): - """设置cookies值 + """设置多个cookie,注意不要传入单个 :param cookies: cookies信息 :return: None """ @@ -963,7 +1001,6 @@ class ChromiumBaseWaiter(object): :param page_or_ele: 页面对象或元素对象 """ self._driver = page_or_ele - self._listener = None def ele_delete(self, loc_or_ele, timeout=None): """等待元素从DOM中删除 @@ -971,10 +1008,8 @@ class ChromiumBaseWaiter(object): :param timeout: 超时时间,默认读取页面超时时间 :return: 是否等待成功 """ - if isinstance(loc_or_ele, (str, tuple)): - ele = self._driver._ele(loc_or_ele, timeout=.3, raise_err=False) - return ele.wait.delete(timeout) if ele else True - return loc_or_ele.wait.delete(timeout) + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.delete(timeout) if ele else True def ele_display(self, loc_or_ele, timeout=None): """等待元素变成显示状态 @@ -982,8 +1017,8 @@ class ChromiumBaseWaiter(object): :param timeout: 超时时间,默认读取页面超时时间 :return: 是否等待成功 """ - ele = self._driver._ele(loc_or_ele, raise_err=False) - return ele.wait.display(timeout) if ele else False + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.display(timeout) def ele_hidden(self, loc_or_ele, timeout=None): """等待元素变成隐藏状态 @@ -991,9 +1026,18 @@ class ChromiumBaseWaiter(object): :param timeout: 超时时间,默认读取页面超时时间 :return: 是否等待成功 """ - ele = self._driver._ele(loc_or_ele, raise_err=False) + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) return ele.wait.hidden(timeout) + def ele_load(self, loc, timeout=None): + """等待元素加载到DOM + :param loc: 要等待的元素,输入定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :return: 成功返回元素对象,失败返回False + """ + ele = self._driver._ele(loc, raise_err=False, timeout=timeout) + return ele if ele else False + def load_start(self, timeout=None): """等待页面开始加载 :param timeout: 超时时间,为None时使用页面timeout属性 @@ -1021,7 +1065,8 @@ class ChromiumBaseWaiter(object): :return: 是否等待成功 """ if timeout != 0: - timeout = self._driver.timeout if timeout in (None, True) else timeout + if timeout is None or timeout is True: + timeout = self._driver.timeout end_time = perf_counter() + timeout while perf_counter() < end_time: if self._driver.is_loading == start: @@ -1029,132 +1074,6 @@ class ChromiumBaseWaiter(object): sleep(gap) return False - def set_targets(self, targets, is_regex=False): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个 - :param is_regex: 设置的target是否正则表达式 - :return: None - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.set_targets(targets, is_regex) - - def data_packets(self, timeout=None, any_one=False): - """等待指定数据包加载完成 - :param timeout: 超时时间,为None则使用页面对象timeout - :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 - :return: ResponseData对象或监听结果字典 - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - return self._listener.listen(timeout, any_one) - - def stop_listening(self): - """停止监听数据包""" - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.stop() - - -class NetworkListener(object): - def __init__(self, page): - self._page = page - self._targets = None - self._is_regex = False - self._results = {} - self._single = False - self._requests = {} - - def set_targets(self, targets, is_regex=False): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个 - :param is_regex: 设置的target是否正则表达式 - :return: None - """ - if not isinstance(targets, (str, list, tuple, set)): - raise TypeError('targets只能是str、list、tuple、set。') - self._is_regex = is_regex - if isinstance(targets, str): - self._targets = {targets} - self._single = True - else: - self._targets = set(targets) - self._single = False - self._page.run_cdp('Network.enable') - if targets is not None: - self._page.driver.Network.requestWillBeSent = self._requestWillBeSent - self._page.driver.Network.responseReceived = self._response_received - self._page.driver.Network.loadingFinished = self._loading_finished - else: - self.stop() - - def stop(self): - """停止监听数据包""" - self._page.run_cdp('Network.disable') - self._page.driver.Network.requestWillBeSent = None - self._page.driver.Network.responseReceived = None - self._page.driver.Network.loadingFinished = None - - def listen(self, timeout=None, any_one=False): - """等待指定数据包加载完成 - :param timeout: 超时时间,为None则使用页面对象timeout - :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 - :return: ResponseData对象或监听结果字典 - """ - if self._targets is None: - raise RuntimeError('必须先用set_targets()设置等待目标。') - - timeout = timeout if timeout is not None else self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._results and (any_one or set(self._results) == self._targets): - break - sleep(.1) - - self._requests = {} - if not self._results: - return False - r = list(self._results.values())[0] if self._single else self._results - self._results = {} - return r - - def _response_received(self, **kwargs): - """接收到返回信息时处理方法""" - if kwargs['requestId'] in self._requests: - self._requests[kwargs['requestId']]['response'] = kwargs['response'] - - def _loading_finished(self, **kwargs): - """请求完成时处理方法""" - request_id = kwargs['requestId'] - if request_id in self._requests: - try: - r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) - body = r['body'] - is_base64 = r['base64Encoded'] - except CallMethodError: - body = '' - is_base64 = False - - request = self._requests[request_id] - target = request['target'] - rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) - rd.method = request['method'] - rd.postData = request['post_data'] - rd._base64_body = is_base64 - rd.requestHeaders = request['request_headers'] - self._results[target] = rd - - def _requestWillBeSent(self, **kwargs): - """接收到请求时的回调函数""" - for target in self._targets: - if (self._is_regex and search(target, kwargs['request']['url'])) or ( - not self._is_regex and target in kwargs['request']['url']): - self._requests[kwargs['requestId']] = {'target': target, - 'method': kwargs['request']['method'], - 'post_data': kwargs['request'].get('postData', None), - 'request_headers': kwargs['request']['headers']} - break - class ChromiumPageScroll(ChromiumScroll): def __init__(self, page): @@ -1165,10 +1084,10 @@ class ChromiumPageScroll(ChromiumScroll): self.t1 = 'window' self.t2 = 'document.documentElement' - def to_see(self, loc_or_ele, center=False): + def to_see(self, loc_or_ele, center=None): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ ele = self._driver._ele(loc_or_ele) @@ -1177,17 +1096,22 @@ class ChromiumPageScroll(ChromiumScroll): def _to_see(self, ele, center): """执行滚动页面直到元素可见 :param ele: 元素对象 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ - if center: - ele.run_js('this.scrollIntoViewIfNeeded();') - self._wait_scrolled() - return - - ele.run_js('this.scrollIntoViewIfNeeded(false);') - if ele.states.is_covered: - ele.run_js('this.scrollIntoViewIfNeeded();') + txt = 'true' if center else 'false' + ele.run_js(f'this.scrollIntoViewIfNeeded({txt});') + if center or (center is not False and ele.states.is_covered): + ele.run_js('''function getWindowScrollTop() {var scroll_top = 0; + if (document.documentElement && document.documentElement.scrollTop) { + scroll_top = document.documentElement.scrollTop; + } else if (document.body) {scroll_top = document.body.scrollTop;} + return scroll_top;} + const { top, height } = this.getBoundingClientRect(); + const elCenter = top + height / 2; + const center = window.innerHeight / 2; + window.scrollTo({top: getWindowScrollTop() - (center - elCenter), + behavior: 'instant'});''') self._wait_scrolled() diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 9638dc8..11c5878 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -4,7 +4,7 @@ @Contact : g1879@qq.com """ from pathlib import Path -from typing import Union, Tuple, List, Any, Dict +from typing import Union, Tuple, List, Any from DataRecorder import Recorder from requests import Session @@ -15,12 +15,11 @@ from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement, ChromiumScroll from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement -from .commons.web import ResponseData +from .network_listener import NetworkListener from .session_element import SessionElement class ChromiumBase(BasePage): - def __init__(self, address: Union[str, int], tab_id: str = None, @@ -42,6 +41,7 @@ class ChromiumBase(BasePage): self._wait: ChromiumBaseWaiter = ... self._set: ChromiumBaseSetter = ... self._screencast: Screencast = ... + self._listener: NetworkListener = ... def _connect_browser(self, tab_id: str = None) -> None: ... @@ -111,6 +111,9 @@ class ChromiumBase(BasePage): @property def page_load_strategy(self) -> str: ... + @property + def user_agent(self) -> str: ... + @property def scroll(self) -> ChromiumPageScroll: ... @@ -129,37 +132,33 @@ class ChromiumBase(BasePage): @property def screencast(self) -> Screencast: ... + @property + def listener(self) -> NetworkListener: ... + def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... def run_js_loaded(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None, - timeout: float = None) -> Union[None, bool]: ... + def get(self, url: str, show_errmsg: bool = False, retry: int = None, + interval: float = None, timeout: float = None) -> Union[None, bool]: ... - def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[ - list, dict]: ... + def get_cookies(self, as_dict: bool = False, all_domains: bool = False, + all_info: bool = False) -> Union[list, dict]: ... - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], - timeout: float = None) -> ChromiumElement: ... + def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + timeout: float = None) -> Union[ChromiumElement, str]: ... - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[ChromiumElement]: ... + def eles(self, loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union[ChromiumElement, str]]: ... def s_ele(self, loc_or_ele: Union[Tuple[str, str], str] = None) \ -> Union[SessionElement, str, NoneElement]: ... def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - def _find_elements(self, - loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], + def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, ChromiumFrame, NoneElement, List[Union[ChromiumElement, ChromiumFrame]]]: ... @@ -217,7 +216,6 @@ class ChromiumBase(BasePage): class ChromiumBaseWaiter(object): def __init__(self, page: ChromiumBase): self._driver: ChromiumBase = ... - self._listener: NetworkListener = ... def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... @@ -225,51 +223,23 @@ class ChromiumBaseWaiter(object): def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + def ele_load(self, loc: Union[str, tuple], timeout: float = None) -> Union[bool, ChromiumElement]: ... + def _loading(self, timeout: float = None, start: bool = True, gap: float = .01) -> bool: ... def load_start(self, timeout: float = None) -> bool: ... def load_complete(self, timeout: float = None) -> bool: ... - def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... - - def stop_listening(self) -> None: ... - - def data_packets(self, timeout: float = None, - any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... - def upload_paths_inputted(self) -> None: ... -class NetworkListener(object): - def __init__(self, page): - self._page: ChromiumBase = ... - self._targets: Union[str, dict] = ... - self._single: bool = ... - self._results: Union[ResponseData, Dict[str, ResponseData], False] = ... - self._is_regex: bool = ... - self._requests: dict = ... - - def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... - - def stop(self) -> None: ... - - def listen(self, timeout: float = None, - any_one: bool = False) -> Union[ResponseData, Dict[str, ResponseData], False]: ... - - def _response_received(self, **kwargs) -> None: ... - - def _loading_finished(self, **kwargs) -> None: ... - - def _requestWillBeSent(self, **kwargs) -> None: ... - - class ChromiumPageScroll(ChromiumScroll): def __init__(self, page: ChromiumBase): ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[bool, None] = None) -> None: ... - def _to_see(self, ele: ChromiumElement, center: bool) -> None: ... + def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... class ChromiumBaseSetter(object): @@ -294,6 +264,8 @@ class ChromiumBaseSetter(object): def local_storage(self, item: str, value: Union[str, bool]) -> None: ... + def cookie(self, cookies: Union[RequestsCookieJar, str, dict]) -> None: ... + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... def headers(self, headers: dict) -> None: ... @@ -366,4 +338,4 @@ class ScreencastMode(object): def frugal_imgs_mode(self) -> None: ... - def imgs_mode(self) -> None: ... \ No newline at end of file + def imgs_mode(self) -> None: ... diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index fbe08e1..9d053f3 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -11,7 +11,7 @@ from threading import Thread, Event from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ create_connection -from .errors import CallMethodError +from .errors import CDPError class GenericAttr(object): @@ -79,7 +79,13 @@ class ChromiumDriver(object): message_json = dumps(message) if self.debug: - print(f"发> {message_json}") + if self.debug is True or (isinstance(self.debug, str) and message.get('method', '').startswith(self.debug)): + print(f'发> {message_json}') + elif isinstance(self.debug, (list, tuple, set)): + for m in self.debug: + if message.get('method', '').startswith(m): + print(f'发> {message_json}') + break if not isinstance(timeout, (int, float)) or timeout > 1: q_timeout = 1 @@ -117,7 +123,7 @@ class ChromiumDriver(object): try: self._ws.settimeout(1) message_json = self._ws.recv() - message = loads(message_json) + mes = loads(message_json) except WebSocketTimeoutException: continue except (WebSocketException, OSError, WebSocketConnectionClosedException): @@ -125,17 +131,24 @@ class ChromiumDriver(object): return if self.debug: - print(f'<收 {message_json}') + if self.debug is True or 'id' in mes or (isinstance(self.debug, str) + and mes.get('method', '').startswith(self.debug)): + print(f'<收 {message_json}') + elif isinstance(self.debug, (list, tuple, set)): + for m in self.debug: + if mes.get('method', '').startswith(m): + print(f'<收 {message_json}') + break - if "method" in message: - self.event_queue.put(message) + if "method" in mes: + self.event_queue.put(mes) - elif "id" in message: - if message["id"] in self.method_results: - self.method_results[message['id']].put(message) + elif "id" in mes: + if mes["id"] in self.method_results: + self.method_results[mes['id']].put(mes) elif self.debug: - print(f'未知信息:{message}') + print(f'未知信息:{mes}') def _handle_event_loop(self): """当接收到浏览器信息,执行已绑定的方法""" @@ -170,7 +183,7 @@ class ChromiumDriver(object): self.start() # raise RuntimeError("不能在启动前调用方法。") if args: - raise CallMethodError("参数必须是key=value形式。") + raise CDPError("参数必须是key=value形式。") if self._stopped.is_set(): return {'error': 'tab closed', 'type': 'tab_closed'} diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 59f4eb0..5aad97a 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -14,7 +14,7 @@ from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinition from .commons.locator import get_loc from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ - CallMethodError, NoResourceError, CanNotClickError + CDPError, NoResourceError, CanNotClickError from .session_element import make_session_ele @@ -99,7 +99,7 @@ class ChromiumElement(DrissionElement): try: attrs = self.page.run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} - except CallMethodError: # 文档根元素不能调用此方法 + except CDPError: # 文档根元素不能调用此方法 return {} @property @@ -203,12 +203,13 @@ class ChromiumElement(DrissionElement): return self._select - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - return super().parent(level_or_loc) + return super().parent(level_or_loc, index) def child(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -218,7 +219,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本 """ - return super().child(index, filter_loc, timeout, ele_only=ele_only) + return super().child(filter_loc, index, timeout, ele_only=ele_only) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -228,7 +229,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本 """ - return super().prev(index, filter_loc, timeout, ele_only=ele_only) + return super().prev(filter_loc, index, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -238,7 +239,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本 """ - return super().next(index, filter_loc, timeout, ele_only=ele_only) + return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -249,7 +250,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - return super().before(index, filter_loc, timeout, ele_only=ele_only) + return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -260,7 +261,7 @@ class ChromiumElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - return super().after(index, filter_loc, timeout, ele_only=ele_only) + return super().after(filter_loc, index, timeout, ele_only=ele_only) def children(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -464,7 +465,7 @@ class ChromiumElement(DrissionElement): try: result = self.page.run_cdp('Page.getResourceContent', frameId=frame, url=src) break - except CallMethodError: + except CDPError: sleep(.1) if not result: @@ -522,15 +523,24 @@ class ChromiumElement(DrissionElement): return self.page._get_screenshot(path, as_bytes=as_bytes, as_base64=as_base64, full_page=False, left_top=left_top, right_bottom=right_bottom, ele=self) - def input(self, vals, clear=True): + def input(self, vals, clear=True, by_js=False): """输入文本或组合键,也可用于输入文件路径到input元素(路径间用\n间隔) :param vals: 文本值或按键组合 :param clear: 输入前是否清空文本框 + :param by_js: 是否用js方式输入,不能输入组合键 :return: None """ if self.tag == 'input' and self.attr('type') == 'file': return self._set_file_input(vals) + if by_js: + if clear: + self.clear(True) + if isinstance(vals, (list, tuple)): + vals = ''.join([str(i) for i in vals]) + self.set.prop('value', str(vals)) + return + if clear and vals not in ('\n', '\ue007'): self.clear(by_js=False) else: @@ -749,7 +759,7 @@ class ChromiumShadowRoot(BaseElement): 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 + :return: 元素对象或属性、文本 """ return self.ele(loc_or_str, timeout) @@ -799,9 +809,10 @@ class ChromiumShadowRoot(BaseElement): from threading import Thread Thread(target=run_js, args=(self, script, as_expr, self.page.timeouts.script, args)).start() - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: ChromiumElement对象 """ if isinstance(level_or_loc, int): @@ -813,7 +824,7 @@ class ChromiumShadowRoot(BaseElement): if loc[0] == 'css selector': raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' + loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}[{index}]' else: raise TypeError('level_or_loc参数只能是tuple、int或str。') @@ -1424,7 +1435,7 @@ class ChromiumElementStates(object): lx, ly = self._ele.locations.click_point try: r = self._ele.page.run_cdp('DOM.getNodeForLocation', x=lx, y=ly) - except CallMethodError: + except CDPError: return False if r.get('backendNodeId') != self._ele.ids.backend_id: @@ -1771,9 +1782,9 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center=False): + def to_see(self, center=None): """滚动页面直到元素可见 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ self._driver.page.scroll.to_see(self._driver, center=center) diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 53538ff..168bad7 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -94,29 +94,29 @@ class ChromiumElement(DrissionElement): @property def click(self) -> Click: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[ChromiumElement, None]: ... - def child(self, filter_loc: Union[tuple, str] = '', + def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def prev(self, filter_loc: Union[tuple, str] = '', + def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def next(self, filter_loc: Union[tuple, str] = '', + def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def before(self, filter_loc: Union[tuple, str] = '', + def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... - def after(self, filter_loc: Union[tuple, str] = '', + def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str, None]: ... @@ -183,7 +183,7 @@ class ChromiumElement(DrissionElement): def get_screenshot(self, path: [str, Path] = None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None) -> Union[str, bytes]: ... - def input(self, vals: Any, clear: bool = True) -> None: ... + def input(self, vals: Any, clear: bool = True, by_js: bool = False) -> None: ... def _set_file_input(self, files: Union[str, list, tuple]) -> None: ... @@ -273,7 +273,7 @@ class ChromiumShadowRoot(BaseElement): def run_async_js(self, script: str, *args: Any, as_expr: bool = False) -> None: ... - def parent(self, level_or_loc: Union[str, int] = 1) -> ChromiumElement: ... + def parent(self, level_or_loc: Union[str, int] = 1, index: int = 1) -> ChromiumElement: ... def child(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> Union[ChromiumElement, str, None]: ... @@ -496,7 +496,7 @@ class ChromiumScroll(object): class ChromiumElementScroll(ChromiumScroll): - def to_see(self, center: bool = False) -> None: ... + def to_see(self, center: Union[bool, None] = None) -> None: ... class ChromiumSelect(object): diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index f9118c4..6a9d70c 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -69,7 +69,9 @@ class ChromiumFrame(ChromiumBase): try: super()._driver_init(tab_id) except: - self._control_session.get(f'http://{self.address}/json') + u = f'http://{self.address}/json' + self._control_session.get(u) + self._control_session.get(u, headers={'Connection': 'close'}) super()._driver_init(tab_id) def _reload(self): @@ -359,13 +361,14 @@ class ChromiumFrame(ChromiumBase): else: return self.doc_ele.run_js(script, *args, as_expr=as_expr) - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ self._check_ok() - return self.frame_ele.parent(level_or_loc) + return self.frame_ele.parent(level_or_loc, index) def prev(self, filter_loc='', index=1, timeout=0, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -638,10 +641,10 @@ class ChromiumFrameScroll(ChromiumPageScroll): self.t1 = self.t2 = 'this.documentElement' self._wait_complete = False - def to_see(self, loc_or_ele, center=False): + def to_see(self, loc_or_ele, center=None): """滚动页面直到元素可见 :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :param center: 是否尽量滚动到页面正中 + :param center: 是否尽量滚动到页面正中,为None时如果被遮挡,则滚动到页面正中 :return: None """ ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 631fb5f..47dc8a1 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -120,24 +120,24 @@ class ChromiumFrame(ChromiumBase): def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[ChromiumElement, None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union[ChromiumElement, None]: ... - def prev(self, filter_loc: Union[tuple, str] = '', + def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def next(self, filter_loc: Union[tuple, str] = '', + def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = 0, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def before(self, filter_loc: Union[tuple, str] = '', + def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str]: ... - def after(self, filter_loc: Union[tuple, str] = '', + def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[ChromiumElement, str]: ... @@ -203,7 +203,7 @@ class ChromiumFrameIds(object): class ChromiumFrameScroll(ChromiumPageScroll): def __init__(self, frame: ChromiumFrame) -> None: ... - def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: bool = False) -> None: ... + def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ... class ChromiumFrameSetter(ChromiumBaseSetter): diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 16d7a7b..00ec09d 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -3,23 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ -from pathlib import Path from platform import system -from threading import Thread from time import perf_counter, sleep -from warnings import warn - -from requests import Session from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBaseWaiter from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser from .commons.tools import port_is_using -from .commons.web import set_session_cookies from .configs.chromium_options import ChromiumOptions -from .errors import CallMethodError, BrowserConnectError -from .session_page import DownloadSetter +from .errors import BrowserConnectError class ChromiumPage(ChromiumBase): @@ -31,17 +24,15 @@ class ChromiumPage(ChromiumBase): :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间 """ - self._download_set = None - self._download_path = None super().__init__(addr_driver_opts, tab_id, timeout) def _set_start_options(self, addr_driver_opts, none): """设置浏览器启动属性 - :param addr_driver_opts: 'ip:port'、ChromiumDriver、ChromiumOptions + :param addr_driver_opts: 'ip:port'、ChromiumOptions :param none: 用于后代继承 :return: None """ - if not addr_driver_opts or str(type(addr_driver_opts)).endswith(("ChromiumOptions'>", "DriverOptions'>")): + if not addr_driver_opts or isinstance(addr_driver_opts, ChromiumOptions): self._driver_options = addr_driver_opts or ChromiumOptions(addr_driver_opts) # 接收浏览器地址和端口 @@ -80,7 +71,9 @@ class ChromiumPage(ChromiumBase): if not self._tab_obj: # 不是传入driver的情况 connect_browser(self._driver_options) if not tab_id: - json = self._control_session.get(f'http://{self.address}/json').json() + u = f'http://{self.address}/json' + json = self._control_session.get(u).json() + self._control_session.get(u, headers={'Connection': 'close'}) tab_id = [i['id'] for i in json if i['type'] == 'page'] if not tab_id: raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') @@ -94,7 +87,9 @@ class ChromiumPage(ChromiumBase): def _page_init(self): """页面相关设置""" - ws = self._control_session.get(f'http://{self.address}/json/version').json()['webSocketDebuggerUrl'] + u = f'http://{self.address}/json/version' + ws = self._control_session.get(u).json()['webSocketDebuggerUrl'] + self._control_session.get(u, headers={'Connection': 'close'}) self._browser_driver = ChromiumDriver(ws.split('/')[-1], 'browser', self.address) self._browser_driver.start() @@ -104,10 +99,10 @@ class ChromiumPage(ChromiumBase): self._rect = None self._main_tab = self.tab_id - try: - self.download_set.by_browser() - except CallMethodError: - pass + # try: + # self.download_set.by_browser() + # except CDPError: + # pass self._process_id = None r = self.browser_driver.SystemInfo.getProcessInfo() @@ -131,7 +126,9 @@ class ChromiumPage(ChromiumBase): @property def tabs(self): """返回所有标签页id组成的列表""" - j = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp + u = f'http://{self.address}/json' + j = self._control_session.get(u).json() # 不要改用cdp + self._control_session.get(u, headers={'Connection': 'close'}) return [i['id'] for i in j if i['type'] == 'page'] @property @@ -155,23 +152,23 @@ class ChromiumPage(ChromiumBase): self._set = ChromiumPageSetter(self) return self._set - @property - def download_path(self): - """返回默认下载路径""" - p = self._download_path or '' - return str(Path(p).absolute()) - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = ChromiumDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit + # @property + # def download_path(self): + # """返回默认下载路径""" + # p = self._download_path or '' + # return str(Path(p).absolute()) + # + # @property + # def download_set(self): + # """返回用于设置下载参数的对象""" + # if self._download_set is None: + # self._download_set = BaseDownloadSetter(self) + # return self._download_set + # + # @property + # def download(self): + # """返回下载器对象""" + # return self.download_set._switched_DownloadKit @property def rect(self): @@ -194,24 +191,29 @@ class ChromiumPage(ChromiumBase): tab_id = tab_id or self.tab_id return ChromiumTab(self, tab_id) - def find_tabs(self, text=None, by_title=True, by_url=None, special=False): + def find_tabs(self, title=None, url=None, tab_type=None, single=True): """查找符合条件的tab,返回它们的id组成的列表 - :param text: 查询条件 - :param by_title: 是否匹配title - :param by_url: 是否匹配url - :param special: 是否匹配特殊tab,如打印页 - :return: tab id组成的列表 + :param title: 要匹配title的文本 + :param url: 要匹配url的文本 + :param tab_type: tab类型,可用列表输入多个 + :param single: 是否返回首个结果的id,为False返回所有信息 + :return: tab id或tab dict """ - tabs = self._control_session.get(f'http://{self.address}/json').json() # 不要改用cdp - if text is None or not (by_title or by_url): - return [i['id'] for i in tabs if (not special and i['type'] == 'page') - or (special and i['type'] not in ('page', 'iframe'))] + u = f'http://{self.address}/json' + tabs = self._control_session.get(u).json() # 不要改用cdp + self._control_session.get(u, headers={'Connection': 'close'}) + if isinstance(tab_type, str): + tab_type = {tab_type} + elif isinstance(tab_type, (list, tuple, set)): + tab_type = set(tab_type) + elif tab_type is not None: + raise TypeError('tab_type只能是set、list、tuple、str、None。') - return [i['id'] for i in tabs if ((not special and i['type'] == 'page') - or (special and i['type'] not in ('page', 'iframe'))) - and ((by_url and text in i['url']) or (by_title and text in i['title']))] + r = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) + and (tab_type is None or i['type'] in tab_type))] + return r[0]['id'] if r and single else r - def new_tab(self, url=None, switch_to=True): + def new_tab(self, url=None, switch_to=False): """新建一个标签页,该标签页在最后面 :param url: 新标签页跳转到的网址 :param switch_to: 新建标签页后是否把焦点移过去 @@ -383,13 +385,6 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): super().__init__(page) self._listener = None - def download_begin(self, timeout=None): - """等待浏览器下载开始 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 - """ - return self._driver.download_set.wait_download_begin(timeout) - def new_tab(self, timeout=None): """等待新标签页出现 :param timeout: 等待超时时间,为None则使用页面对象timeout属性 @@ -400,6 +395,20 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): while self._driver.tab_id == self._driver.latest_tab and perf_counter() < end_time: sleep(.01) + # def download_begin(self, timeout=1.5): + # """等待浏览器下载开始 + # :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + # :return: 是否等到下载开始 + # """ + # return self._driver.download_set.wait_download_begin(timeout) + # + # def download_finish(self, timeout=None): + # """等待下载结束 + # :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + # :return: 是否等到下载结束 + # """ + # return self._driver.download_set.wait_download_finish(timeout) + class ChromiumTabRect(object): def __init__(self, page): @@ -472,125 +481,247 @@ class ChromiumTabRect(object): return self._page.browser_driver.Browser.getWindowForTarget(targetId=self._page.tab_id)['bounds'] -class ChromiumDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" +# class BaseDownloadSetter(DownloadSetter): +# """用于设置下载参数的类""" +# +# def __init__(self, page): +# """ +# :param page: ChromiumPage对象 +# """ +# super().__init__(page) +# self._behavior = 'allowAndName' +# self._session = None +# self._save_path = '' +# self._rename = None +# self._waiting_download = False +# self._download_begin = False +# self._browser_missions = {} +# self._browser_downloading_count = 0 +# self._show_msg = True +# +# @property +# def session(self): +# """返回用于DownloadKit的Session对象""" +# if self._session is None: +# self._session = Session() +# return self._session +# +# @property +# def browser_missions(self): +# """返回浏览器下载任务""" +# return list(self._browser_missions.values()) +# +# @property +# def DownloadKit_missions(self): +# """返回DownloadKit下载任务""" +# return list(self.DownloadKit.missions.values()) +# +# @property +# def _switched_DownloadKit(self): +# """返回从浏览器同步cookies后的Session对象""" +# self._cookies_to_session() +# return self.DownloadKit +# +# def save_path(self, path): +# """设置下载路径 +# :param path: 下载路径 +# :return: None +# """ +# path = path or '' +# path = Path(path).absolute() +# path.mkdir(parents=True, exist_ok=True) +# path = str(path) +# self._save_path = path +# self._page._download_path = path +# try: +# self._page.browser_driver.Browser.setDownloadBehavior(behavior='allowAndName', downloadPath=path, +# eventsEnabled=True) +# except CDPError: +# warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') +# self._page.run_cdp('Page.setDownloadBehavior', behavior='allowAndName', downloadPath=path) +# +# self.DownloadKit.goal_path = path +# +# def rename(self, name): +# """设置浏览器下一个下载任务的文件名 +# :param name: 文件名,不带后缀时自动使用原后缀 +# :return: None +# """ +# self._rename = name +# +# def by_browser(self): +# """设置使用浏览器下载文件""" +# try: +# self._page.browser_driver.Browser.setDownloadBehavior(behavior='allowAndName', eventsEnabled=True, +# downloadPath=self._page.download_path) +# self._page.browser_driver.Browser.downloadWillBegin = self._download_will_begin +# self._page.browser_driver.Browser.downloadProgress = self._download_progress +# except CDPError: +# self._page.driver.Page.setDownloadBehavior(behavior='allowAndName', downloadPath=self._page.download_path) +# self._page.driver.Page.downloadWillBegin = self._download_will_begin +# self._page.driver.Page.downloadProgress = self._download_progress +# +# self._behavior = 'allowAndName' +# +# def by_DownloadKit(self): +# """设置使用DownloadKit下载文件""" +# try: +# self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) +# self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit +# except CDPError: +# raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') +# +# self._behavior = 'deny' +# +# def wait_download_begin(self, timeout=None): +# """等待浏览器下载开始 +# :param timeout: 等待超时时间,为None则使用页面对象timeout属性 +# :return: 是否等到下载开始 +# """ +# self._waiting_download = True +# result = False +# timeout = timeout if timeout is not None else self._page.timeout +# end_time = perf_counter() + timeout +# while perf_counter() < end_time: +# if self._download_begin: +# result = True +# break +# sleep(.05) +# self._download_begin = False +# self._waiting_download = False +# return result +# +# def wait_download_finish(self, timeout=None): +# """等待所有下载结束 +# :param timeout: 超时时间 +# :return: 是否等待到下载完成 +# """ +# timeout = timeout if timeout is not None else self._page.timeout +# end_time = perf_counter() + timeout +# while perf_counter() < end_time: +# if (self._DownloadKit is None or not self.DownloadKit.is_running) and self._browser_downloading_count == 0: +# return True +# sleep(.5) +# return False +# +# def show_msg(self, on_off=True): +# """是否显示下载信息 +# :param on_off: bool表示开或关 +# :return: None +# """ +# self._show_msg = on_off +# +# def _cookies_to_session(self): +# """把driver对象的cookies复制到session对象""" +# ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] +# self.session.headers.update({"User-Agent": ua}) +# set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False)) +# +# def _download_by_DownloadKit(self, **kwargs): +# """拦截浏览器下载并用downloadKit下载""" +# url = kwargs['url'] +# if url.startswith('blob:'): +# raise TypeError('bolb:开头的链接无法使用DownloadKit下载,请用浏览器下载功能。') +# +# self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid']) +# +# if self._rename: +# rename = get_rename(kwargs['suggestedFilename'], self._rename) +# self._rename = None +# else: +# rename = kwargs['suggestedFilename'] +# +# mission = self._page.download.add(file_url=url, goal_path=self._page.download_path, rename=rename) +# Thread(target=self._wait_download_complete, args=(mission,), daemon=False).start() +# +# if self._waiting_download: +# self._download_begin = True +# +# self._browser_downloading_count += 1 +# +# if self._show_msg: +# print(f'(DownloadKit)开始下载:{Path(self._save_path) / rename}') +# +# def _download_will_begin(self, **kwargs): +# """浏览器下载即将开始时调用""" +# if self._rename: +# rename = get_rename(kwargs['suggestedFilename'], self._rename) +# self._rename = None +# else: +# rename = kwargs['suggestedFilename'] +# +# m = BrowserDownloadMission(kwargs['guid'], kwargs['url'], rename) +# self._browser_missions[kwargs['guid']] = m +# aid_path = Path(self._save_path) / rename +# +# if self._show_msg: +# print(f'(Browser)开始下载:{rename}') +# self._browser_downloading_count += 1 +# +# if self._file_exists == 'skip' and aid_path.exists(): +# m.state = 'skipped' +# m.save_path = aid_path.absolute() +# self._page.browser_driver.call_method('Browser.cancelDownload', guid=kwargs['guid']) +# (Path(self._save_path) / kwargs["guid"]).unlink(missing_ok=True) +# return +# +# if self._waiting_download: +# self._download_begin = True +# +# def _download_progress(self, **kwargs): +# """下载状态产生变化时调用""" +# guid = kwargs['guid'] +# m = self._browser_missions.get(guid, None) +# if m: +# m.size = kwargs['totalBytes'] +# m.received = kwargs['receivedBytes'] +# m.state = kwargs['state'] +# +# if m.state == 'completed': +# path = Path(self._save_path) / m.name +# from_path = Path(self._save_path) / guid +# if path.exists(): +# if self._file_exists == 'rename': +# path = get_usable_path(path) +# else: # 'overwrite' +# path.unlink() +# from_path.rename(path) +# m.save_path = path.absolute() +# +# if kwargs['state'] != 'inProgress': +# if self._show_msg and m: +# if kwargs['state'] == 'completed': +# print(f'(Browser)下载完成:{m.save_path}') +# elif m.state != 'skipped': +# print(f'(Browser)下载失败:{m.save_path}') +# else: +# print(f'(Browser)已跳过:{m.save_path}') +# self._browser_downloading_count -= 1 +# +# def _wait_download_complete(self, mission): +# """等待DownloadKit下载完成""" +# mission.wait(show=False) +# if self._show_msg: +# if mission.result == 'skip': +# print(f'(DownloadKit)已跳过:{mission.path}') +# elif not mission.result: +# print(f'(DownloadKit)下载失败:{mission.path}') +# else: +# print(f'(DownloadKit)下载完成:{mission.path}') - def __init__(self, page): - """ - :param page: ChromiumPage对象 - """ - super().__init__(page) - self._behavior = 'allow' - self._download_th = None - self._session = None - self._waiting_download = False - self._download_begin = False - @property - def session(self): - """返回用于DownloadKit的Session对象""" - if self._session is None: - self._session = Session() - return self._session +class BrowserDownloadMission(object): + def __init__(self, guid, url, name): + self.id = guid + self.url = url + self.name = name + self.save_path = None + self.state = None + self.size = None + self.received = None - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - self._cookies_to_session() - return self.DownloadKit - - def save_path(self, path): - """设置下载路径 - :param path: 下载路径 - :return: None - """ - path = path or '' - path = Path(path).absolute() - path.mkdir(parents=True, exist_ok=True) - path = str(path) - self._page._download_path = path - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', downloadPath=path, - eventsEnabled=True) - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.run_cdp('Page.setDownloadBehavior', behavior='allow', downloadPath=path) - - self.DownloadKit.goal_path = path - - def by_browser(self): - """设置使用浏览器下载文件""" - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser - except CallMethodError: - self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) - self._page.driver.Page.downloadWillBegin = self._download_by_browser - - self._behavior = 'allow' - - def by_DownloadKit(self): - """设置使用DownloadKit下载文件""" - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit - except CallMethodError: - raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') - self._behavior = 'deny' - - def wait_download_begin(self, timeout=None): - """等待浏览器下载开始 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 - """ - self._waiting_download = True - result = False - timeout = timeout if timeout is not None else self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._download_begin: - result = True - break - sleep(.05) - self._download_begin = False - self._waiting_download = False - return result - - def _cookies_to_session(self): - """把driver对象的cookies复制到session对象""" - ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": ua}) - set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False)) - - def _download_by_DownloadKit(self, **kwargs): - """拦截浏览器下载并用downloadKit下载""" - url = kwargs['url'] - if url.startswith('blob:'): - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - sleep(2) - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - - else: - self._page.browser_driver.Browser.cancelDownload(guid=kwargs['guid']) - self._page.download.add(file_url=url, goal_path=self._page.download_path, - rename=kwargs['suggestedFilename']) - if self._download_th is None or not self._download_th.is_alive(): - self._download_th = Thread(target=self._wait_download_complete, daemon=False) - self._download_th.start() - - if self._waiting_download: - self._download_begin = True - - def _download_by_browser(self, **kwargs): - """使用浏览器下载时调用""" - if self._waiting_download: - self._download_begin = True - - def _wait_download_complete(self): - """等待下载完成""" - self._page.download.wait() + def __repr__(self): + return f'' class Alert(object): @@ -788,3 +919,11 @@ def get_chrome_hwnds_from_pid(pid, title): hwnds = [] EnumWindows(callback, hwnds) return hwnds + + +def get_rename(original, rename): + if '.' in rename: + return rename + else: + suffix = original[original.rfind('.'):] if '.' in original else '' + return f'{rename}{suffix}' diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index d4ceb86..cfcb25e 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -5,41 +5,39 @@ """ from os import popen from pathlib import Path -from threading import Thread -from typing import Union, Tuple, List +from typing import Union, Tuple, List, Dict from DownloadKit import DownloadKit +from DownloadKit.mission import Mission from requests import Session -from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter, NetworkListener +from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .configs.chromium_options import ChromiumOptions -from .configs.driver_options import DriverOptions +from .network_listener import NetworkListener from .session_page import DownloadSetter class ChromiumPage(ChromiumBase): def __init__(self, - addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver, DriverOptions] = None, + addr_driver_opts: Union[str, int, ChromiumOptions, ChromiumDriver] = None, tab_id: str = None, timeout: float = None): - self._driver_options: [ChromiumDriver, DriverOptions] = ... + self._driver_options: ChromiumOptions = ... self._process_id: str = ... self._window_setter: WindowSetter = ... self._main_tab: str = ... self._alert: Alert = ... - self._download_path: str = ... - self._download_set: ChromiumDownloadSetter = ... self._browser_driver: ChromiumDriver = ... self._rect: ChromiumTabRect = ... def _connect_browser(self, - addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = None, + addr_driver_opts: Union[str, ChromiumDriver] = None, tab_id: str = None) -> None: ... - def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver, DriverOptions], none) -> None: ... + def _set_start_options(self, addr_driver_opts: Union[str, ChromiumDriver], none) -> None: ... def _page_init(self) -> None: ... @@ -70,21 +68,12 @@ class ChromiumPage(ChromiumBase): @property def set(self) -> ChromiumPageSetter: ... - @property - def download_set(self) -> ChromiumDownloadSetter: ... - - @property - def download(self) -> DownloadKit: ... - - @property - def download_path(self) -> str: ... - def get_tab(self, tab_id: str = None) -> ChromiumTab: ... - def find_tabs(self, text: str = None, by_title: bool = True, by_url: bool = None, - special: bool = False) -> List[str]: ... + def find_tabs(self, title: str = None, url: str = None, + tab_type: Union[str, list, tuple, set] = None, single: bool = True) -> Union[str, List[str]]: ... - def new_tab(self, url: str = None, switch_to: bool = True) -> str: ... + def new_tab(self, url: str = None, switch_to: bool = False) -> str: ... def to_main_tab(self) -> None: ... @@ -113,7 +102,9 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): _driver: ChromiumPage = ... _listener: Union[NetworkListener, None] = ... - def download_begin(self, timeout: float = None) -> bool: ... + def download_begin(self, timeout: float = 1.5) -> bool: ... + + def download_finish(self, timeout: float = None) -> bool: ... def new_tab(self, timeout: float = None) -> bool: ... @@ -151,36 +142,65 @@ class ChromiumTabRect(object): def _get_browser_rect(self) -> dict: ... -class ChromiumDownloadSetter(DownloadSetter): +class BaseDownloadSetter(DownloadSetter): def __init__(self, page: ChromiumPage): self._page: ChromiumPage = ... self._behavior: str = ... - self._download_th: Thread = ... - self._session: Session = None + self._session: Session = ... + self._save_path: str = ... + self._rename: str = ... self._waiting_download: bool = ... self._download_begin: bool = ... + self._browser_missions: Dict[str, BrowserDownloadMission] = ... + self._browser_downloading_count: int = ... + self._show_msg: bool = ... @property def session(self) -> Session: ... + @property + def browser_missions(self) -> List[BrowserDownloadMission]: ... + + @property + def DownloadKit_missions(self) -> List[Mission]: ... + @property def _switched_DownloadKit(self) -> DownloadKit: ... def save_path(self, path: Union[str, Path]) -> None: ... + def rename(self, name: str) -> None: ... + def by_browser(self) -> None: ... def by_DownloadKit(self) -> None: ... def wait_download_begin(self, timeout: float = None) -> bool: ... + def wait_download_finish(self, timeout: float = None) -> bool: ... + + def show_msg(self, on_off: bool = True) -> None: ... + def _cookies_to_session(self) -> None: ... def _download_by_DownloadKit(self, **kwargs) -> None: ... - def _download_by_browser(self, **kwargs) -> None: ... + def _download_will_begin(self, **kwargs) -> None: ... - def _wait_download_complete(self) -> None: ... + def _download_progress(self, **kwargs) -> None: ... + + def _wait_download_complete(self, mission: Mission) -> None: ... + + +class BrowserDownloadMission(object): + def __init__(self, guid: str, url: str, name: str): + self.id: str = ... + self.url: str = ... + self.name: str = ... + self.save_path: str = ... + self.state: str = ... + self.size: str = ... + self.received: str = ... class Alert(object): @@ -239,3 +259,6 @@ class ChromiumPageSetter(ChromiumBaseSetter): def window(self) -> WindowSetter: ... def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... + + +def get_rename(original: str, rename: str) -> str: ... diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index a97c9e7..256569b 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -7,7 +7,7 @@ from copy import copy from .chromium_base import ChromiumBase, ChromiumBaseSetter from .commons.web import set_session_cookies, set_browser_cookies -from .session_page import SessionPage, SessionPageSetter, DownloadSetter +from .session_page import SessionPage, SessionPageSetter class ChromiumTab(ChromiumBase): @@ -28,6 +28,10 @@ class ChromiumTab(ChromiumBase): self.retry_interval = self.page.retry_interval self._page_load_strategy = self.page.page_load_strategy + def close(self): + """关闭当前标签页""" + self.page.close_tabs(self.tab_id) + @property def rect(self): """返回获取窗口坐标和大小的对象""" @@ -48,11 +52,12 @@ class WebPageTab(SessionPage, ChromiumTab): self._has_driver = True self._has_session = True self._session = copy(page.session) - self._response = None - self._download_set = None - self._download_path = None self._set = None + + self._download_set = None + self._download_path = page.download_path + self._DownloadKit = None super(SessionPage, self)._set_runtime_settings() self._connect_browser(tab_id) @@ -120,6 +125,14 @@ class WebPageTab(SessionPage, ChromiumTab): """以dict方式返回cookies""" return super().cookies + @property + def user_agent(self): + """返回user agent""" + if self._mode == 's': + return super().user_agent + elif self._mode == 'd': + return super(SessionPage, self).user_agent + @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" @@ -152,18 +165,6 @@ class WebPageTab(SessionPage, ChromiumTab): self._set = WebPageTabSetter(self) return self._set - @property - def download_set(self): - """返回下载设置对象""" - if self._download_set is None: - self._download_set = WebPageTabDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit - def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """跳转到一个url :param url: 目标url @@ -292,17 +293,12 @@ class WebPageTab(SessionPage, ChromiumTab): selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] self.session.headers.update({"User-Agent": selenium_user_agent}) - # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) - # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) - set_session_cookies(self.session, self._get_driver_cookies()) + set_session_cookies(self.session, super(SessionPage, self).get_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" if not self._has_driver: return - - # set_browser_cookies(self, super().get_cookies(as_dict=True)) - # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -315,22 +311,7 @@ class WebPageTab(SessionPage, ChromiumTab): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return self._get_driver_cookies(as_dict, all_info) - - def _get_driver_cookies(self, as_dict=False, all_info=False): - """获取浏览器cookies - :param as_dict: 是否以dict形式返回,为True时all_info无效 - :param all_info: 是否返回所有信息,为False时只返回name、value、domain - :return: cookies信息 - """ - cookies = self.run_cdp('Network.getCookies')['cookies'] - if as_dict: - return {cookie['name']: cookie['value'] for cookie in cookies} - elif all_info: - return cookies - else: - return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} - for cookie in cookies] + return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 @@ -355,7 +336,7 @@ class WebPageTabSetter(ChromiumBaseSetter): self._chromium_setter = ChromiumBaseSetter(self._page) def cookies(self, cookies): - """添加cookies信息到浏览器或session对象 + """添加多个cookies信息到浏览器或session对象,注意不要传入单个 :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` :return: None """ @@ -382,18 +363,3 @@ class WebPageTabSetter(ChromiumBaseSetter): self._chromium_setter.user_agent(ua, platform) -class WebPageTabDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page): - super().__init__(page) - self._session = page.session - - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - if self._page.mode == 'd': - ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self._page.session.headers.update({"User-Agent": ua}) - set_session_cookies(self._page.session, self._page.get_cookies(as_dict=False, all_domains=False)) - return self.DownloadKit diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 9def1d7..6a99610 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -5,7 +5,6 @@ """ from typing import Union, Tuple, Any, List -from DownloadKit import DownloadKit from requests import Session, Response from .chromium_base import ChromiumBase, ChromiumBaseSetter @@ -13,7 +12,7 @@ from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage, ChromiumTabRect from .session_element import SessionElement -from .session_page import SessionPage, SessionPageSetter, DownloadSetter +from .session_page import SessionPage, SessionPageSetter from .web_page import WebPage @@ -24,6 +23,8 @@ class ChromiumTab(ChromiumBase): def _set_runtime_settings(self) -> None: ... + def close(self) -> None: ... + @property def rect(self) -> ChromiumTabRect: ... @@ -34,8 +35,6 @@ class WebPageTab(SessionPage, ChromiumTab): self._mode: str = ... self._has_driver = ... self._has_session = ... - self._download_set = ... - self._download_path = ... def __call__(self, loc_or_str: Union[Tuple[str, str], str, ChromiumElement, SessionElement], @@ -65,6 +64,9 @@ class WebPageTab(SessionPage, ChromiumTab): @property def cookies(self) -> dict: ... + @property + def user_agent(self) -> str: ... + @property def session(self) -> Session: ... @@ -119,8 +121,6 @@ class WebPageTab(SessionPage, ChromiumTab): def get_cookies(self, as_dict: bool = False, all_domains: bool = False, all_info: bool = False) -> Union[dict, list]: ... - def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... - # ----------------重写SessionPage的函数----------------------- def post(self, url: str, @@ -145,12 +145,6 @@ class WebPageTab(SessionPage, ChromiumTab): @property def set(self) -> WebPageTabSetter: ... - @property - def download(self) -> DownloadKit: ... - - @property - def download_set(self) -> WebPageTabDownloadSetter: ... - def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, SessionElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ @@ -167,13 +161,3 @@ class WebPageTabSetter(ChromiumBaseSetter): def headers(self, headers: dict) -> None: ... def cookies(self, cookies) -> None: ... - - -class WebPageTabDownloadSetter(DownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page: WebPageTab): - self._page: WebPageTab = ... - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... diff --git a/DrissionPage/common.pyi b/DrissionPage/common.pyi new file mode 100644 index 0000000..54677db --- /dev/null +++ b/DrissionPage/common.pyi @@ -0,0 +1,7 @@ +# -*- coding:utf-8 -*- +from .session_element import make_session_ele as make_session_ele + +from .action_chains import ActionChains as ActionChains +from .commons.keys import Keys as Keys +from .commons.by import By as By +from .commons.constants import Settings as Settings diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 73349aa..86b3e9e 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -11,14 +11,13 @@ from time import perf_counter, sleep from requests import get as requests_get -from DrissionPage.configs.chromium_options import ChromiumOptions from DrissionPage.errors import BrowserConnectError from .tools import port_is_using def connect_browser(option): """连接或启动浏览器 - :param option: DriverOptions对象 + :param option: ChromiumOptions对象 :return: chrome 路径和进程对象组成的元组 """ debugger_address = option.debugger_address.replace('localhost', '127.0.0.1').lstrip('http://').lstrip('https://') @@ -55,8 +54,8 @@ def connect_browser(option): def get_launch_args(opt): - """从DriverOptions获取命令行启动参数 - :param opt: DriverOptions或ChromiumOptions + """从ChromiumOptions获取命令行启动参数 + :param opt: ChromiumOptions :return: 启动参数列表 """ # ----------处理arguments----------- @@ -87,7 +86,7 @@ def get_launch_args(opt): result = list(result) # ----------处理插件extensions------------- - ext = opt.extensions if isinstance(opt, ChromiumOptions) else opt._extension_files + ext = opt.extensions if ext: ext = ','.join(set(ext)) ext = f'--load-extension={ext}' @@ -98,15 +97,11 @@ def get_launch_args(opt): def set_prefs(opt): """处理启动配置中的prefs项,目前只能对已存在文件夹配置 - :param opt: DriverOptions或ChromiumOptions + :param opt: ChromiumOptions :return: None """ - if isinstance(opt, ChromiumOptions): - prefs = opt.preferences - del_list = opt._prefs_to_del - else: - prefs = opt.experimental_options.get('prefs', []) - del_list = [] + prefs = opt.preferences + del_list = opt._prefs_to_del if not opt.user_data_path: return @@ -150,7 +145,9 @@ def test_connect(ip, port): end_time = perf_counter() + 30 while perf_counter() < end_time: try: - tabs = requests_get(f'http://{ip}:{port}/json', timeout=10).json() + u = f'http://{ip}:{port}/json' + tabs = requests_get(u, timeout=10, proxies={'http': None, 'https': None}).json() + requests_get(u, headers={'Connection': 'close'}, proxies={'http': None, 'https': None}) for tab in tabs: if tab['type'] == 'page': return diff --git a/DrissionPage/commons/browser.pyi b/DrissionPage/commons/browser.pyi index 2324000..ede46db 100644 --- a/DrissionPage/commons/browser.pyi +++ b/DrissionPage/commons/browser.pyi @@ -3,16 +3,13 @@ @Author : g1879 @Contact : g1879@qq.com """ -from typing import Union - from DrissionPage.configs.chromium_options import ChromiumOptions -from DrissionPage.configs.driver_options import DriverOptions -def connect_browser(option: Union[ChromiumOptions, DriverOptions]) -> tuple: ... +def connect_browser(option: ChromiumOptions) -> tuple: ... -def get_launch_args(opt: Union[ChromiumOptions, DriverOptions]) -> list: ... +def get_launch_args(opt: ChromiumOptions) -> list: ... -def set_prefs(opt: Union[ChromiumOptions, DriverOptions]) -> None: ... +def set_prefs(opt: ChromiumOptions) -> None: ... diff --git a/DrissionPage/commons/tools.py b/DrissionPage/commons/tools.py index 1a70f15..a95dc7d 100644 --- a/DrissionPage/commons/tools.py +++ b/DrissionPage/commons/tools.py @@ -6,47 +6,6 @@ from pathlib import Path from re import search, sub from shutil import rmtree -from zipfile import ZipFile - - -def get_exe_from_port(port): - """获取端口号第一条进程的可执行文件路径 - :param port: 端口号 - :return: 可执行文件的绝对路径 - """ - from os import popen - - pid = get_pid_from_port(port) - if not pid: - return - else: - file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n') - return file_lst[2].strip() if len(file_lst) > 2 else None - - -def get_pid_from_port(port): - """获取端口号第一条进程的pid - :param port: 端口号 - :return: 进程id - """ - from platform import system - if system().lower() != 'windows' or port is None: - return None - - from os import popen - from time import perf_counter - - try: # 避免Anaconda中可能产生的报错 - process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] - - t = perf_counter() - while not process and perf_counter() - t < 5: - process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] - - return process.split(' ')[-1] or None - - except Exception: - return None def get_usable_path(path): @@ -142,11 +101,41 @@ def clean_folder(folder_path, ignore=None): elif f.is_dir(): rmtree(f, True) - -def unzip(zip_path, to_path): - """解压下载的chromedriver.zip文件""" - if not zip_path: - return - - with ZipFile(zip_path, 'r') as f: - return [f.extract(f.namelist()[0], path=to_path)] +# def get_exe_from_port(port): +# """获取端口号第一条进程的可执行文件路径 +# :param port: 端口号 +# :return: 可执行文件的绝对路径 +# """ +# from os import popen +# +# pid = get_pid_from_port(port) +# if not pid: +# return +# else: +# file_lst = popen(f'wmic process where processid={pid} get executablepath').read().split('\n') +# return file_lst[2].strip() if len(file_lst) > 2 else None +# +# +# def get_pid_from_port(port): +# """获取端口号第一条进程的pid +# :param port: 端口号 +# :return: 进程id +# """ +# from platform import system +# if system().lower() != 'windows' or port is None: +# return None +# +# from os import popen +# from time import perf_counter +# +# try: # 避免Anaconda中可能产生的报错 +# process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] +# +# t = perf_counter() +# while not process and perf_counter() - t < 5: +# process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] +# +# return process.split(' ')[-1] or None +# +# except Exception: +# return None diff --git a/DrissionPage/commons/tools.pyi b/DrissionPage/commons/tools.pyi index a95722d..f7b91e8 100644 --- a/DrissionPage/commons/tools.pyi +++ b/DrissionPage/commons/tools.pyi @@ -7,10 +7,10 @@ from pathlib import Path from typing import Union -def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... +# def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... -def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ... +# def get_pid_from_port(port: Union[str, int]) -> Union[str, None]: ... def get_usable_path(path: Union[str, Path]) -> Path: ... @@ -26,6 +26,3 @@ def port_is_using(ip: str, port: Union[str, int]) -> bool: ... def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... - - -def unzip(zip_path: str, to_path: str) -> Union[list, None]: ... diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index 0a7cd14..928b545 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -3,103 +3,15 @@ @Author : g1879 @Contact : g1879@qq.com """ -from base64 import b64decode from html import unescape from http.cookiejar import Cookie -from json import loads, JSONDecodeError from re import sub from urllib.parse import urlparse, urljoin, urlunparse from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict from tldextract import extract -class ResponseData(object): - """返回的数据包管理类""" - __slots__ = ('requestId', 'response', 'rawBody', 'tab', 'target', 'url', 'status', 'statusText', 'securityDetails', - 'headersText', 'mimeType', 'requestHeadersText', 'connectionReused', 'connectionId', 'remoteIPAddress', - 'remotePort', 'fromDiskCache', 'fromServiceWorker', 'fromPrefetchCache', 'encodedDataLength', 'timing', - 'serviceWorkerResponseSource', 'responseTime', 'cacheStorageCacheName', 'protocol', 'securityState', - '_requestHeaders', '_body', '_base64_body', '_rawPostData', '_postData', 'method') - - def __init__(self, request_id, response, body, tab, target): - """ - :param response: response的数据 - :param body: response包含的内容 - :param tab: 产生这个数据包的tab的id - :param target: 监听目标 - """ - self.requestId = request_id - self.response = CaseInsensitiveDict(response) - self.rawBody = body - self.tab = tab - self.target = target - self._requestHeaders = None - self._postData = None - self._body = None - self._base64_body = False - self._rawPostData = None - - def __getattr__(self, item): - return self.response.get(item, None) - - def __getitem__(self, item): - return self.response.get(item, None) - - def __repr__(self): - return f'' - - @property - def headers(self): - """以大小写不敏感字典返回headers数据""" - headers = self.response.get('headers', None) - return CaseInsensitiveDict(headers) if headers else None - - @property - def requestHeaders(self): - """以大小写不敏感字典返回requestHeaders数据""" - if self._requestHeaders: - return self._requestHeaders - headers = self.response.get('requestHeaders', None) - return CaseInsensitiveDict(headers) if headers else None - - @requestHeaders.setter - def requestHeaders(self, val): - """设置requestHeaders""" - self._requestHeaders = val - - @property - def postData(self): - """返回postData数据""" - if self._postData is None and self._rawPostData: - try: - self._postData = loads(self._rawPostData) - except (JSONDecodeError, TypeError): - self._postData = self._rawPostData - return self._postData - - @postData.setter - def postData(self, val): - """设置postData""" - self._rawPostData = val - - @property - def body(self): - """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" - if self._body is None: - if self._base64_body: - self._body = b64decode(self.rawBody) - - else: - try: - self._body = loads(self.rawBody) - except (JSONDecodeError, TypeError): - self._body = self.rawBody - - return self._body - - def get_ele_txt(e): """获取元素内所有文本 :param e: 元素对象 @@ -190,8 +102,6 @@ def location_in_viewport(page, loc_x, loc_y): if (x< scrollLeft || y < scrollTop || x > vWidth + scrollLeft || y > vHeight + scrollTop){{return false;}} return true;}}''' return page.run_js(js) - # const vWidth = window.innerWidth || document.documentElement.clientWidth; - # const vHeight = window.innerHeight || document.documentElement.clientHeight; def offset_scroll(ele, offset_x, offset_y): @@ -334,8 +244,7 @@ def set_browser_cookies(page, cookies): :param cookies: cookies信息 :return: None """ - cookies = cookies_to_tuple(cookies) - for cookie in cookies: + for cookie in cookies_to_tuple(cookies): if 'expiry' in cookie: cookie['expires'] = int(cookie['expiry']) cookie.pop('expiry') @@ -343,6 +252,15 @@ def set_browser_cookies(page, cookies): cookie['expires'] = int(cookie['expires']) if cookie['value'] is None: cookie['value'] = '' + if cookie['name'].startswith('__Secure-'): + cookie['secure'] = True + + if cookie['name'].startswith('__Host-'): + cookie['path'] = '/' + cookie['secure'] = True + cookie['url'] = page.url + page.run_cdp_loaded('Network.setCookie', **cookie) + continue # 不用设置域名,可退出 if cookie.get('domain', None): try: @@ -376,7 +294,13 @@ def is_cookie_in_driver(page, cookie): :param cookie: dict格式cookie :return: bool """ - for c in page.get_cookies(): - if cookie['name'] == c['name'] and cookie['value'] == c['value']: - return True + if 'domain' in cookie: + for c in page.get_cookies(all_domains=True): + if cookie['name'] == c['name'] and cookie['value'] == c['value'] and cookie['domain'] == c.get('domain', + None): + return True + else: + for c in page.get_cookies(all_domains=True): + if cookie['name'] == c['name'] and cookie['value'] == c['value']: + return True return False diff --git a/DrissionPage/commons/web.pyi b/DrissionPage/commons/web.pyi index b57ed66..b91ba71 100644 --- a/DrissionPage/commons/web.pyi +++ b/DrissionPage/commons/web.pyi @@ -8,73 +8,12 @@ from typing import Union from requests import Session from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict from DrissionPage.base import DrissionElement, BasePage from DrissionPage.chromium_element import ChromiumElement from DrissionPage.chromium_base import ChromiumBase -class ResponseData(object): - - def __init__(self, request_id: str, response: dict, body: str, tab: str, target: str): - self.requestId: str = ... - self.response: CaseInsensitiveDict = ... - self.rawBody: str = ... - self._body: Union[str, dict, bytes] = ... - self._base64_body: bool = ... - self.tab: str = ... - self.target: str = ... - self.method: str = ... - self._postData: dict = ... - self._rawPostData: str = ... - self.url: str = ... - self.status: str = ... - self.statusText: str = ... - self.headersText: str = ... - self.mimeType: str = ... - self.requestHeadersText: str = ... - self.connectionReused: str = ... - self.connectionId: str = ... - self.remoteIPAddress: str = ... - self.remotePort: str = ... - self.fromDiskCache: str = ... - self.fromServiceWorker: str = ... - self.fromPrefetchCache: str = ... - self.encodedDataLength: str = ... - self.timing: str = ... - self.serviceWorkerResponseSource: str = ... - self.responseTime: str = ... - self.cacheStorageCacheName: str = ... - self.protocol: str = ... - self.securityState: str = ... - self.securityDetails: str = ... - - def __getattr__(self, item: str) -> Union[str, None]: ... - - def __getitem__(self, item: str) -> Union[str, None]: ... - - def __repr__(self) -> str: ... - - @property - def headers(self) -> Union[CaseInsensitiveDict, None]: ... - - @property - def requestHeaders(self) -> Union[CaseInsensitiveDict, None]: ... - - @requestHeaders.setter - def requestHeaders(self, val:dict) -> None: ... - - @property - def postData(self) -> Union[dict, str, None]: ... - - @postData.setter - def postData(self, val: Union[str, dict]) -> None: ... - - @property - def body(self) -> Union[str, dict, bytes]: ... - - def get_ele_txt(e: DrissionElement) -> str: ... diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py index e31d97e..7a6b0f5 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/configs/chromium_options.py @@ -26,7 +26,7 @@ class ChromiumOptions(object): self.ini_path = om.ini_path options = om.chrome_options - self._download_path = om.paths.get('download_path', None) + self._download_path = om.paths.get('download_path', '') self._arguments = options.get('arguments', []) self._binary_location = options.get('binary_location', '') self._extensions = options.get('extensions', []) @@ -62,7 +62,7 @@ class ChromiumOptions(object): self.ini_path = None self._binary_location = "chrome" self._arguments = [] - self._download_path = None + self._download_path = '' self._extensions = [] self._prefs = {} self._timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} diff --git a/DrissionPage/configs/configs.ini b/DrissionPage/configs/configs.ini index 6591d2f..54d20ab 100644 --- a/DrissionPage/configs/configs.ini +++ b/DrissionPage/configs/configs.ini @@ -1,11 +1,10 @@ [paths] -chromedriver_path = download_path = [chrome_options] debugger_address = 127.0.0.1:9222 binary_location = chrome -arguments = ['--remote-allow-origins=*', '--no-first-run', '--disable-gpu', '--disable-infobars', '--disable-popup-blocking'] +arguments = ['--remote-allow-origins=*', '--no-first-run', '--disable-infobars', '--disable-popup-blocking'] extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}}} page_load_strategy = normal diff --git a/DrissionPage/configs/session_options.py b/DrissionPage/configs/session_options.py index eaa763b..03fc800 100644 --- a/DrissionPage/configs/session_options.py +++ b/DrissionPage/configs/session_options.py @@ -21,7 +21,7 @@ class SessionOptions(object): :param ini_path: ini文件路径 """ self.ini_path = None - self._download_path = None + self._download_path = '' self._headers = None self._cookies = None self._auth = None @@ -73,7 +73,7 @@ class SessionOptions(object): self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) self._timeout = om.timeouts.get('implicit', 10) - self._download_path = om.paths.get('download_path', None) + self._download_path = om.paths.get('download_path', '') # ===========须独立处理的项开始============ @property @@ -110,14 +110,13 @@ class SessionOptions(object): self._proxies = {} return self._proxies - def set_proxies(self, http, https=None): + def set_proxies(self, http=None, https=None): """设置proxies参数 :param http: http代理地址 :param https: https代理地址 :return: 返回当前对象 """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._sets('proxies', proxies) + self._sets('proxies', {'http': http, 'https': https}) return self # ===========须独立处理的项结束============ diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 98e1260..91fcacb 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -6,20 +6,10 @@ from os import popen from pathlib import Path from re import search -from typing import Union from .commons.constants import Settings -from .commons.tools import unzip from .configs.chromium_options import ChromiumOptions from .configs.options_manage import OptionsManager -from .session_page import SessionPage - -try: - from selenium import webdriver - from DrissionPage.mixpage.drission import Drission - from .configs.driver_options import DriverOptions -except ModuleNotFoundError: - pass def raise_when_ele_not_found(on_off=True): @@ -48,19 +38,14 @@ def show_settings(ini_path=None): OptionsManager(ini_path).show() -def set_paths(driver_path=None, - chrome_path=None, - browser_path=None, +def set_paths(browser_path=None, local_port=None, debugger_address=None, download_path=None, user_data_path=None, cache_path=None, - ini_path=None, - check_version=False): + ini_path=None): """快捷的路径设置函数 - :param driver_path: chromedriver.exe路径 - :param chrome_path: 浏览器可执行文件路径 :param browser_path: 浏览器可执行文件路径 :param local_port: 本地端口号 :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 @@ -68,7 +53,6 @@ def set_paths(driver_path=None, :param user_data_path: 用户数据路径 :param cache_path: 缓存路径 :param ini_path: 要修改的ini文件路径 - :param check_version: 是否检查chromedriver和chrome是否匹配 :return: None """ om = OptionsManager(ini_path) @@ -76,12 +60,6 @@ def set_paths(driver_path=None, def format_path(path: str) -> str: return str(path) if path else '' - if driver_path is not None: - om.set_item('paths', 'chromedriver_path', format_path(driver_path)) - - if chrome_path is not None: - om.set_item('chrome_options', 'binary_location', format_path(chrome_path)) - if browser_path is not None: om.set_item('chrome_options', 'binary_location', format_path(browser_path)) @@ -103,9 +81,6 @@ def set_paths(driver_path=None, if cache_path is not None: set_argument('--disk-cache-dir', format_path(cache_path), ini_path) - if check_version: - check_driver_version(format_path(driver_path), format_path(browser_path)) - def use_auto_port(on_off=True, ini_path=None): """设置启动浏览器时使用自动分配的端口和临时文件夹 @@ -203,89 +178,6 @@ def set_proxy(proxy, ini_path=None): set_argument('--proxy-server', proxy, ini_path) -def check_driver_version(driver_path=None, chrome_path=None): - """检查传入的chrome和chromedriver是否匹配 - :param driver_path: chromedriver.exe路径 - :param chrome_path: chrome.exe路径 - :return: 是否匹配 - """ - print('正在检测可用性...') - om = OptionsManager() - driver_path = driver_path or om.get_value('paths', 'chromedriver_path') or 'chromedriver' - chrome_path = str(chrome_path or om.get_value('chrome_options', 'binary_location')) - do = DriverOptions(read_file=False) - do.add_argument('--headless') - - if chrome_path: - do.binary_location = chrome_path - - try: - driver = webdriver.Chrome(driver_path, options=do) - driver.quit() - print('版本匹配,可正常使用。') - - return True - - except Exception as e: - print(f'出现异常:\n{e}\n可执行easy_set.get_match_driver()自动下载匹配的版本。\n' - f'或自行从以下网址下载:http://npm.taobao.org/mirrors/chromedriver/') - - return False - - -# -------------------------自动识别chrome版本号并下载对应driver------------------------ -def get_match_driver(ini_path='default', - save_path=None, - chrome_path=None, - show_msg=True, - check_version=True): - """自动识别chrome版本并下载匹配的driver - :param ini_path: 要读取和修改的ini文件路径 - :param save_path: chromedriver保存路径 - :param chrome_path: 指定chrome.exe位置 - :param show_msg: 是否打印信息 - :param check_version: 是否检查版本匹配 - :return: None - """ - save_path = save_path or str(Path(__file__).parent) - - chrome_path = chrome_path or get_chrome_path(ini_path, show_msg) - chrome_path = Path(chrome_path).absolute() if chrome_path else None - if show_msg: - print('chrome.exe路径', chrome_path) - - ver = _get_chrome_version(str(chrome_path)) - if show_msg: - print('version', ver) - - zip_path = _download_driver(ver, save_path, show_msg=show_msg) - - if not zip_path and show_msg: - print('没有找到对应版本的driver。') - - try: - driver_path = unzip(zip_path, save_path)[0] - except TypeError: - driver_path = None - - if show_msg: - print('解压路径', driver_path) - - if driver_path: - Path(zip_path).unlink() - if ini_path: - set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False) - - if check_version: - if not check_driver_version(driver_path, chrome_path) and show_msg: - print('获取失败,请手动配置。') - else: - if show_msg: - print('获取失败,请手动配置。') - - return driver_path - - def get_chrome_path(ini_path=None, show_msg=True, from_ini=True, @@ -365,54 +257,3 @@ def get_chrome_path(ini_path=None, return str(path) except OSError: pass - - -def _get_chrome_version(path: str) -> Union[str, None]: - """根据文件路径获取版本号 - :param path: chrome.exe文件路径 - :return: 版本号 - """ - if not path: - return - - path = str(path).replace('\\', '\\\\') - - try: - return (popen(f'wmic datafile where "name=\'{path}\'" get version').read() - .lower().split('\n')[2].replace(' ', '')) - except Exception: - return None - - -def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]: - """根据传入的版本号到镜像网站查找,下载最相近的 - :param version: 本地版本号 - :return: 保存地址 - """ - if not version: - return - - main_ver = version.split('.')[0] - remote_ver = None - - page = SessionPage(Drission().session) - page.get('https://registry.npmmirror.com/-/binary/chromedriver/') - - for version in page.json: - # 遍历所有版本,跳过大版本不一致的,如果有完全匹配的,获取url,如果没有,获取最后一个版本的url - if not version['name'].startswith(f'{main_ver}.'): - continue - - remote_ver = version['name'] - if version['name'] == f'{version}/': - break - - if remote_ver: - url = f'https://cdn.npmmirror.com/binaries/chromedriver/{remote_ver}chromedriver_win32.zip' - save_path = save_path or str(Path(__file__).parent) - result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg) - - if result[0]: - return result[1] - - return None diff --git a/DrissionPage/easy_set.pyi b/DrissionPage/easy_set.pyi index 7ea52ca..d70e8b9 100644 --- a/DrissionPage/easy_set.pyi +++ b/DrissionPage/easy_set.pyi @@ -16,16 +16,13 @@ def configs_to_here(file_name: Union[Path, str] = None) -> None: ... def show_settings(ini_path: Union[str, Path] = None) -> None: ... -def set_paths(driver_path: Union[str, Path] = None, - chrome_path: Union[str, Path] = None, - browser_path: Union[str, Path] = None, +def set_paths(browser_path: Union[str, Path] = None, local_port: Union[int, str] = None, debugger_address: str = None, download_path: Union[str, Path] = None, user_data_path: Union[str, Path] = None, cache_path: Union[str, Path] = None, - ini_path: Union[str, Path] = None, - check_version: bool = False) -> None: ... + ini_path: Union[str, Path] = None) -> None: ... def use_auto_port(on_off: bool = True, ini_path: Union[str, Path] = None) -> None: ... @@ -55,17 +52,6 @@ def set_user_agent(user_agent: str, ini_path: Union[str, Path] = None) -> None: def set_proxy(proxy: str, ini_path: Union[str, Path] = None) -> None: ... -def check_driver_version(driver_path: Union[str, Path] = None, chrome_path: str = None) -> bool: ... - - -# -------------------------自动识别chrome版本号并下载对应driver------------------------ -def get_match_driver(ini_path: Union[str, None] = 'default', - save_path: str = None, - chrome_path: str = None, - show_msg: bool = True, - check_version: bool = True) -> Union[str, None]: ... - - def get_chrome_path(ini_path: str = None, show_msg: bool = True, from_ini: bool = True, diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 3bcfca0..4eabfa6 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -24,7 +24,7 @@ class ElementLossError(BaseError): _info = '元素对象因刷新已失效。' -class CallMethodError(BaseError): +class CDPError(BaseError): _info = '方法调用错误。' @@ -54,3 +54,7 @@ class NoResourceError(BaseError): class CanNotClickError(BaseError): _info = '该元素无法滚动到视口或被遮挡,无法点击。' + + +class GetDocumentError(BaseError): + _info = '获取文档失败。' diff --git a/DrissionPage/network_listener.py b/DrissionPage/network_listener.py new file mode 100644 index 0000000..92f473b --- /dev/null +++ b/DrissionPage/network_listener.py @@ -0,0 +1,325 @@ +# -*- coding:utf-8 -*- +from base64 import b64decode +from json import JSONDecodeError, loads +from queue import Queue +from re import search +from threading import Thread +from time import perf_counter, sleep + +from requests.structures import CaseInsensitiveDict + +from .errors import CDPError + + +class NetworkListener(object): + """监听器基类""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + self._driver = self._page.driver + + self._tmp = None # 临存捕捉到的数据 + self._request_ids = None # 暂存须要拦截的请求id + + self._total_count = None # 当次监听的数量上限 + self._caught_count = None # 当次已监听到的数量 + self._begin_time = None # 当次监听开始时间 + self._timeout = None # 当次监听超时时间 + + self.listening = False + self._targets = None # 默认监听所有 + self.tab_id = None # 当前tab的id + self._results = [] + + self._is_regex = False + self._method = None + + def set_targets(self, targets=True, is_regex=False, method=None): + """指定要等待的数据包 + :param targets: 要匹配的数据包url特征,可用list等传入多个,为True时获取所有 + :param is_regex: 设置的target是否正则表达式 + :param method: 设置监听的请求类型,可用list等指定多个,为None时监听全部 + :return: None + """ + if targets is not None: + if not isinstance(targets, (str, list, tuple, set)) and targets is not True: + raise TypeError('targets只能是str、list、tuple、set、True。') + if targets is True: + targets = '' + + if isinstance(targets, str): + self._targets = {targets} + else: + self._targets = set(targets) + + self._is_regex = is_regex + + if method is not None: + if isinstance(method, str): + self._method = {method.upper()} + elif isinstance(method, (list, tuple, set)): + self._method = set(i.upper() for i in method) + else: + raise TypeError('method参数只能是str、list、tuple、set类型。') + + def listen(self, targets=None, count=None, timeout=None): + """拦截目标请求,直到超时或达到拦截个数,每次拦截前清空结果 + 可监听多个目标,请求url包含这些字符串就会被记录 + :param targets: 要监听的目标字符串或其组成的列表,True监听所有,None则保留之前的目标不变 + :param count: 要记录的个数,到达个数停止监听 + :param timeout: 监听最长时间,到时间即使未达到记录个数也停止,None为无限长 + :return: None + """ + if targets: + self.set_targets(targets) + + self.listening = True + self._results = [] + self._request_ids = {} + self._tmp = Queue(maxsize=0) + + self._caught_count = 0 + self._begin_time = perf_counter() + self._timeout = timeout + + self._set_callback_func() + + self._total_count = len(self._targets) if not count else count + + Thread(target=self._wait_to_stop).start() + + def stop(self): + """停止监听""" + self._stop() + self.listening = False + + def wait(self): + """等待监听结束""" + while self.listening: + sleep(.2) + return self._results + + def get_results(self, target=None): + """获取结果列表 + :param target: 要获取的目标,为None时获取全部 + :return: 结果数据组成的列表 + """ + return self._results if target is None else [i for i in self._results if i.target == target] + + def _wait_to_stop(self): + """当收到停止信号、到达须获取结果数、到时间就停止""" + while self._is_continue(): + sleep(.2) + self.stop() + + def _is_continue(self): + """是否继续当前监听""" + return self.listening \ + and (self._total_count is None or self._caught_count < self._total_count) \ + and (self._timeout is None or perf_counter() - self._begin_time < self._timeout) + + def steps(self, gap=1): + """用于单步操作,可实现没收到若干个数据包执行一步操作(如翻页) + :param gap: 每接收到多少个数据包触发 + :return: 用于在接收到监听目标时触发动作的可迭代对象 + """ + if not isinstance(gap, int) or gap < 1: + raise ValueError('gap参数必须为大于0的整数。') + while self.listening or not self._tmp.empty(): + while self._tmp.qsize() >= gap: + yield self._tmp.get(False) if gap == 1 else [self._tmp.get(False) for _ in range(gap)] + + sleep(.1) + + def _set_callback_func(self): + """设置监听请求的回调函数""" + self._driver.set_listener('Network.requestWillBeSent', self._requestWillBeSent) + self._driver.set_listener('Network.responseReceived', self._response_received) + self._driver.set_listener('Network.loadingFinished', self._loading_finished) + self._driver.set_listener('Network.loadingFailed', self._loading_failed) + self._driver.call_method('Network.enable') + + def _stop(self) -> None: + """停止监听前要做的工作""" + self._driver.set_listener('Network.requestWillBeSent', None) + self._driver.set_listener('Network.responseReceived', None) + self._driver.set_listener('Network.loadingFinished', None) + self._driver.set_listener('Network.loadingFailed', None) + # self._driver.call_method('Network.disable') + + def _requestWillBeSent(self, **kwargs): + """接收到请求时的回调函数""" + for target in self._targets: + if ((self._is_regex and search(target, kwargs['request']['url'])) or + (not self._is_regex and target in kwargs['request']['url'])) and ( + not self._method or kwargs['request']['method'] in self._method): + self._request_ids[kwargs['requestId']] = DataPacket(self._page.tab_id, target, kwargs) + + if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): + self._request_ids[kwargs['requestId']]._raw_post_data = \ + self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData'] + + break + + def _response_received(self, **kwargs): + """接收到返回信息时处理方法""" + request_id = kwargs['requestId'] + if request_id in self._request_ids: + self._request_ids[request_id]._raw_response = kwargs['response'] + self._request_ids[request_id]._resource_type = kwargs['type'] + + def _loading_finished(self, **kwargs): + """请求完成时处理方法""" + request_id = kwargs['requestId'] + if request_id in self._request_ids: + try: + r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) + body = r['body'] + is_base64 = r['base64Encoded'] + except CDPError: + body = '' + is_base64 = False + + dp = self._request_ids[request_id] + dp._raw_body = body + dp._base64_body = is_base64 + + self._tmp.put(dp) + self._results.append(dp) + self._caught_count += 1 + + def _loading_failed(self, **kwargs): + """请求失败时的回调方法""" + request_id = kwargs['requestId'] + if request_id in self._request_ids: + dp = self._request_ids[request_id] + dp.errorText = kwargs['errorText'] + dp._resource_type = kwargs['type'] + + self._tmp.put(dp) + self._results.append(dp) + self._caught_count += 1 + + +class DataPacket(object): + """返回的数据包管理类""" + + def __init__(self, tab, target, raw_request): + """ + :param tab: 产生这个数据包的tab的id + :param target: 监听目标 + :param raw_request: 原始request数据,从cdp获得 + """ + self.tab = tab + self.target = target + + self._raw_request = raw_request + self._raw_post_data = None + + self._raw_response = None + self._raw_body = None + self._base64_body = False + + self._request = None + self._response = None + self.errorText = None + self._resource_type = None + + @property + def url(self): + return self.request.url + + @property + def method(self): + return self.request.method + + @property + def frameId(self): + return self._raw_request.get('frameId') + + @property + def resourceType(self): + return self._resource_type + + @property + def request(self): + if self._request is None: + self._request = Request(self._raw_request['request'], self._raw_post_data) + return self._request + + @property + def response(self): + if self._response is None: + self._response = Response(self._raw_response, self._raw_body, self._base64_body) + return self._response + + +class Request(object): + def __init__(self, raw_request, post_data): + self._request = raw_request + self._raw_post_data = post_data + self._postData = None + self._headers = None + + def __getattr__(self, item): + return self._request.get(item, None) + + @property + def headers(self): + """以大小写不敏感字典返回headers数据""" + if self._headers is None: + self._headers = CaseInsensitiveDict(self._request['headers']) + return self._headers + + @property + def postData(self): + """返回postData数据""" + if self._postData is None: + if self._raw_post_data: + postData = self._raw_post_data + elif self._request.get('postData', None): + postData = self._request['postData'] + else: + postData = False + try: + self._postData = loads(postData) + except (JSONDecodeError, TypeError): + self._postData = postData + return self._postData + + +class Response(object): + def __init__(self, raw_response, raw_body, base64_body): + self._response = raw_response + self._raw_body = raw_body + self._is_base64_body = base64_body + self._body = None + self._headers = None + + def __getattr__(self, item): + return self._response.get(item, None) + + @property + def headers(self): + """以大小写不敏感字典返回headers数据""" + if self._headers is None: + self._headers = CaseInsensitiveDict(self._response['headers']) + return self._headers + + @property + def body(self): + """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" + if self._body is None: + if self._is_base64_body: + self._body = b64decode(self._raw_body) + + else: + try: + self._body = loads(self._raw_body) + except (JSONDecodeError, TypeError): + self._body = self._raw_body + + return self._body diff --git a/DrissionPage/network_listener.pyi b/DrissionPage/network_listener.pyi new file mode 100644 index 0000000..759f7b2 --- /dev/null +++ b/DrissionPage/network_listener.pyi @@ -0,0 +1,140 @@ +from queue import Queue +from typing import Union, Dict, List, Iterable, Tuple + +from requests.structures import CaseInsensitiveDict + +from chromium_base import ChromiumBase +from chromium_driver import ChromiumDriver + + +class NetworkListener(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + self._total_count: int = ... + self._caught_count: int = ... + self._targets: Union[str, dict] = ... + self._results: list = ... + self._method: set = ... + self._tmp: Queue = ... + self._is_regex: bool = ... + self._driver: ChromiumDriver = ... + self._request_ids: dict = ... + self.listening: bool = ... + self._timeout: float = ... + self._begin_time: float = ... + + def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False, + count: int = None, method: Union[str, list, tuple, set] = None) -> None: ... + + def stop(self) -> None: ... + + @property + def results(self) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... + + def clear(self) -> None: ... + + def listen(self, targets: Union[str, List[str], Tuple, bool, None] = ..., count: int = ..., + timeout: float = ...) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... + + def _listen(self, timeout: float = None, + any_one: bool = False) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... + + def _requestWillBeSent(self, **kwargs) -> None: ... + + def _response_received(self, **kwargs) -> None: ... + + def _loading_finished(self, **kwargs) -> None: ... + + def _loading_failed(self, **kwargs) -> None: ... + + def _request_paused(self, **kwargs) -> None: ... + + def _wait_to_stop(self) -> None: ... + + def _is_continue(self) -> bool: ... + + def steps(self, gap=1) -> Iterable[Union[DataPacket, List[DataPacket]]]: ... + + def _set_callback_func(self) -> None: ... + + def _stop(self) -> None: ... + + +class DataPacket(object): + """返回的数据包管理类""" + + def __init__(self, tab: str, target: str, raw_info: dict): + self.tab: str = ... + self.target: str = ... + self._raw_request: dict = ... + self._raw_response: dict = ... + self._raw_post_data: str = ... + self._raw_body: str = ... + self._base64_body: bool = ... + self._request: Request = ... + self._response: Response = ... + self.errorText: str = ... + self._resource_type: str = ... + + @property + def url(self) -> str: ... + + @property + def method(self) -> str: ... + + @property + def frameId(self) -> str: ... + + @property + def resourceType(self) -> str: ... + + @property + def request(self) -> Request: ... + + @property + def response(self) -> Response: ... + + +class Request(object): + url: str = ... + _headers: Union[CaseInsensitiveDict, None] = ... + method: str = ... + + # urlFragment: str = ... + # postDataEntries: list = ... + # mixedContentType: str = ... + # initialPriority: str = ... + # referrerPolicy: str = ... + # isLinkPreload: bool = ... + # trustTokenParams: dict = ... + # isSameSite: bool = ... + + def __init__(self, raw_request: dict, post_data: str): + self._request: dict = ... + self._raw_post_data: str = ... + self._postData: str = ... + + @property + def headers(self) -> dict: ... + + @property + def postData(self) -> Union[str, dict]: ... + + +class Response(object): + status: str = ... + statusText: int = ... + mimeType: str = ... + + def __init__(self, raw_response: dict, raw_body: str, base64_body: bool): + self._response: dict = ... + self._raw_body: str = ... + self._is_base64_body: bool = ... + self._body: Union[str, dict] = ... + self._headers: dict = ... + + @property + def headers(self) -> CaseInsensitiveDict: ... + + @property + def body(self) -> Union[str, dict, bool]: ... diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index e7d375b..7e8bb88 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -38,7 +38,7 @@ class SessionElement(DrissionElement): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本 """ return self.ele(loc_or_str) @@ -75,12 +75,13 @@ class SessionElement(DrissionElement): """返回未格式化处理的元素内文本""" return str(self._inner_ele.text_content()) - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - return super().parent(level_or_loc) + return super().parent(level_or_loc, index) def child(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -90,7 +91,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本 """ - return super().child(index, filter_loc, timeout, ele_only=ele_only) + return super().child(filter_loc, index, timeout, ele_only=ele_only) def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -100,7 +101,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().prev(index, filter_loc, timeout, ele_only=ele_only) + return super().prev(filter_loc, index, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -110,7 +111,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().next(index, filter_loc, timeout, ele_only=ele_only) + return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -121,7 +122,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - return super().before(index, filter_loc, timeout, ele_only=ele_only) + return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中此当前元素后面符合条件的第一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -132,7 +133,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - return super().after(index, filter_loc, timeout, ele_only=ele_only) + return super().after(filter_loc, index, timeout, ele_only=ele_only) def children(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -217,7 +218,7 @@ class SessionElement(DrissionElement): def ele(self, loc_or_str, timeout=None): """返回当前元素下级符合条件的第一个元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本 """ return self._ele(loc_or_str) @@ -225,7 +226,7 @@ class SessionElement(DrissionElement): def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本组成的列表 """ return self._ele(loc_or_str, single=False) @@ -321,8 +322,7 @@ def make_session_ele(html_or_ele, loc=None, single=True): loc = loc[0], loc_str - # ChromiumElement, DriverElement - elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): + elif the_type.endswith(".ChromiumElement'>"): loc_str = loc[1] if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): loc_str = f'.{loc[1]}' diff --git a/DrissionPage/session_element.pyi b/DrissionPage/session_element.pyi index 4d455e1..c55dcfe 100644 --- a/DrissionPage/session_element.pyi +++ b/DrissionPage/session_element.pyi @@ -12,8 +12,6 @@ from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement -from mixpage.driver_element import DriverElement -from mixpage.driver_page import DriverPage from .session_page import SessionPage @@ -50,29 +48,29 @@ class SessionElement(DrissionElement): @property def raw_text(self) -> str: ... - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... + def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1) -> Union['SessionElement', None]: ... - def child(self, filter_loc: Union[tuple, str] = '', + def child(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def prev(self, filter_loc: Union[tuple, str] = '', + def prev(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def next(self, filter_loc: Union[tuple, str] = '', + def next(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def before(self, filter_loc: Union[tuple, str] = '', + def before(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... - def after(self, filter_loc: Union[tuple, str] = '', + def after(self, filter_loc: Union[tuple, str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union['SessionElement', str, None]: ... @@ -124,8 +122,8 @@ class SessionElement(DrissionElement): def _get_ele_path(self, mode: str) -> str: ... -def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, DriverElement, BaseElement, -ChromiumFrame, ChromiumBase, DriverPage], +def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, ChromiumElement, BaseElement, ChromiumFrame, +ChromiumBase], loc: Union[str, Tuple[str, str]] = None, single: bool = True) -> Union[ SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 3d934b9..cc67f6e 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -7,7 +7,6 @@ from re import search from time import sleep from urllib.parse import urlparse -from DownloadKit import DownloadKit from requests import Session, Response from requests.structures import CaseInsensitiveDict from tldextract import extract @@ -27,7 +26,6 @@ class SessionPage(BasePage): :param timeout: 连接超时时间,为None时从ini文件读取 """ self._response = None - self._download_set = None self._session = None self._set = None self._set_start_options(session_or_options, None) @@ -99,21 +97,9 @@ class SessionPage(BasePage): return None @property - def download_path(self): - """返回下载路径""" - return self._download_path - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = DownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set.DownloadKit + def user_agent(self): + """返回user agent""" + return self.session.headers.get('user-agent', '') @property def session(self): @@ -337,8 +323,18 @@ class SessionPageSetter(object): """ self._page.timeout = second + def cookie(self, cookie): + """为Session对象设置单个cookie + :param cookie: cookie信息 + :return: None + """ + if isinstance(cookie, str): + self.cookies(cookie) + else: + self.cookies([cookie]) + def cookies(self, cookies): - """为Session对象设置cookies + """为Session对象设置多个cookie,注意不要传入单个 :param cookies: cookies信息 :return: None """ @@ -366,14 +362,13 @@ class SessionPageSetter(object): """ self._page.session.headers['user-agent'] = ua - def proxies(self, http, https=None): + def proxies(self, http=None, https=None): """设置proxies参数 :param http: http代理地址 :param https: https代理地址 :return: None """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._page.session.proxies = proxies + self._page.session.proxies = {'http': http, 'https': https} def auth(self, auth): """设置认证元组或对象 @@ -440,68 +435,6 @@ class SessionPageSetter(object): self._page.session.mount(url, adapter) -class DownloadSetter(object): - """用于设置下载参数的类""" - - def __init__(self, page): - self._page = page - self._DownloadKit = None - - @property - def DownloadKit(self): - if self._DownloadKit is None: - self._DownloadKit = DownloadKit(session=self._page, goal_path=self._page.download_path) - return self._DownloadKit - - @property - def if_file_exists(self): - """返回用于设置存在同名文件时处理方法的对象""" - return FileExists(self) - - def split(self, on_off): - """设置是否允许拆分大文件用多线程下载 - :param on_off: 是否启用多线程下载大文件 - :return: None - """ - self.DownloadKit.split = on_off - - def save_path(self, path): - """设置下载保存路径 - :param path: 下载保存路径 - :return: None - """ - path = path if path is None else str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - -class FileExists(object): - """用于设置存在同名文件时处理方法""" - - def __init__(self, setter): - """ - :param setter: DownloadSetter对象 - """ - self._setter = setter - - def __call__(self, mode): - if mode not in ('skip', 'rename', 'overwrite'): - raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") - self._setter.DownloadKit.file_exists = mode - - def skip(self): - """设为跳过""" - self._setter.DownloadKit.file_exists = 'skip' - - def rename(self): - """设为重命名,文件名后加序号""" - self._setter.DownloadKit._file_exists = 'rename' - - def overwrite(self): - """设为覆盖""" - self._setter.DownloadKit._file_exists = 'overwrite' - - def check_headers(kwargs, headers, arg) -> bool: """检查kwargs或headers中是否有arg所示属性""" return arg in kwargs['headers'] or arg in headers diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index c551834..bb803f0 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -3,22 +3,20 @@ @Author : g1879 @Contact : g1879@qq.com """ -from pathlib import Path +from http.cookiejar import Cookie from typing import Any, Union, Tuple, List -from DownloadKit import DownloadKit +# from DownloadKit import DownloadKit from requests import Session, Response from requests.adapters import HTTPAdapter from requests.auth import HTTPBasicAuth from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict -from .commons.constants import NoneElement from .base import BasePage -from .chromium_page import ChromiumPage +from .commons.constants import NoneElement from .configs.session_options import SessionOptions from .session_element import SessionElement -from .web_page import WebPage class SessionPage(BasePage): @@ -29,8 +27,8 @@ class SessionPage(BasePage): self._session_options: SessionOptions = ... self._url: str = ... self._response: Response = ... - self._download_path: str = ... - self._download_set: DownloadSetter = ... + # self._download_path: str = ... + # self._DownloadKit: DownloadKit = ... self._url_available: bool = ... self.timeout: float = ... self.retry_times: int = ... @@ -64,10 +62,10 @@ class SessionPage(BasePage): def json(self) -> Union[dict, None]: ... @property - def download_path(self) -> str: ... + def user_agent(self) -> str: ... @property - def download_set(self) -> DownloadSetter: ... + def download_path(self) -> str: ... def get(self, url: str, @@ -120,8 +118,8 @@ class SessionPage(BasePage): @property def set(self) -> SessionPageSetter: ... - @property - def download(self) -> DownloadKit: ... + # @property + # def download(self) -> DownloadKit: ... def post(self, url: str, @@ -172,6 +170,8 @@ class SessionPageSetter(object): def timeout(self, second: float) -> None: ... + def cookie(self, cookie: Union[Cookie, str, dict]) -> None: ... + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... def headers(self, headers: dict) -> None: ... @@ -180,7 +180,7 @@ class SessionPageSetter(object): def user_agent(self, ua: str) -> None: ... - def proxies(self, http, https=None) -> None: ... + def proxies(self, http: str = None, https: str = None) -> None: ... def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... @@ -201,35 +201,6 @@ class SessionPageSetter(object): def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... -class DownloadSetter(object): - def __init__(self, page: Union[SessionPage, WebPage, ChromiumPage]): - self._page: SessionPage = ... - self._DownloadKit: DownloadKit = ... - - @property - def DownloadKit(self) -> DownloadKit: ... - - @property - def if_file_exists(self) -> FileExists: ... - - def split(self, on_off: bool) -> None: ... - - def save_path(self, path: Union[str, Path]): ... - - -class FileExists(object): - def __init__(self, setter: DownloadSetter): - self._setter: DownloadSetter = ... - - def __call__(self, mode: str) -> None: ... - - def skip(self) -> None: ... - - def rename(self) -> None: ... - - def overwrite(self) -> None: ... - - def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 62e1a1b..059597b 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -3,20 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ -from pathlib import Path -from warnings import warn - from requests import Session from .base import BasePage from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver -from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter +from .chromium_page import ChromiumPage, ChromiumPageSetter from .chromium_tab import WebPageTab from .commons.web import set_session_cookies, set_browser_cookies from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions -from .errors import CallMethodError from .session_page import SessionPage, SessionPageSetter @@ -27,7 +23,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """初始化函数 :param mode: 'd' 或 's',即driver模式和session模式 :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_or_options: ChromiumDriver对象或DriverOptions对象,只使用s模式时应传入False + :param driver_or_options: ChromiumDriver对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ self._mode = mode.lower() @@ -45,7 +41,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._driver_options = None self._session_options = None self._response = None - self._download_set = None self._set = None self._screencast = None @@ -59,7 +54,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _set_start_options(self, dr_opt, se_opt): """处理两种模式的设置 - :param dr_opt: ChromiumDriver或DriverOptions对象,为None则从ini读取,为False用默认信息创建 + :param dr_opt: ChromiumDriver或ChromiumOptions对象,为None则从ini读取,为False用默认信息创建 :param se_opt: Session、SessionOptions对象或配置信息,为None则从ini读取,为False用默认信息创建 :return: None """ @@ -77,7 +72,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif dr_opt is False: self._driver_options = ChromiumOptions(read_file=False) - elif str(type(dr_opt)).endswith(("ChromiumOptions'>", "DriverOptions'>")): + elif isinstance(dr_opt, ChromiumOptions): self._driver_options = dr_opt else: @@ -107,7 +102,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._timeouts = Timeout(self) self._page_load_strategy = self._driver_options.page_load_strategy - self._download_path = None if se_opt is not False: self.set.timeouts(implicit=self._session_options.timeout) @@ -186,6 +180,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """以dict方式返回cookies""" return super().cookies + @property + def user_agent(self): + """返回user agent""" + if self._mode == 's': + return super().user_agent + elif self._mode == 'd': + return super(SessionPage, self).user_agent + @property def session(self): """返回Session对象,如未初始化则按配置信息创建""" @@ -211,23 +213,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): """ self.set.timeouts(implicit=second) - @property - def download_path(self): - """返回默认下载路径""" - return super(SessionPage, self).download_path - - @property - def download_set(self): - """返回下载设置对象""" - if self._download_set is None: - self._download_set = WebPageDownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set._switched_DownloadKit - @property def set(self): """返回用于等待的对象""" @@ -360,20 +345,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): return if copy_user_agent: - selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] - self.session.headers.update({"User-Agent": selenium_user_agent}) + user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value'] + self.session.headers.update({"User-Agent": user_agent}) - # set_session_cookies(self.session, self._get_driver_cookies(as_dict=True)) - # set_session_cookies(self.session, self._get_driver_cookies(all_domains=True)) - set_session_cookies(self.session, self._get_driver_cookies()) + set_session_cookies(self.session, super(SessionPage, self).get_cookies()) def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" if not self._has_driver: return - - # set_browser_cookies(self, super().get_cookies(as_dict=True)) - # set_browser_cookies(self, super().get_cookies(all_domains=True)) set_browser_cookies(self, super().get_cookies()) def get_cookies(self, as_dict=False, all_domains=False, all_info=False): @@ -386,7 +366,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._mode == 's': return super().get_cookies(as_dict, all_domains, all_info) elif self._mode == 'd': - return self._get_driver_cookies(as_dict, all_info) + return super(SessionPage, self).get_cookies(as_dict, all_domains, all_info) def get_tab(self, tab_id=None): """获取一个标签页对象 @@ -396,21 +376,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): tab_id = tab_id or self.tab_id return WebPageTab(self, tab_id) - def _get_driver_cookies(self, as_dict=False, all_info=False): - """获取浏览器cookies - :param as_dict: 是否以dict形式返回,为True时all_info无效 - :param all_info: 是否返回所有信息 - :return: cookies信息 - """ - cookies = self.run_cdp('Network.getCookies')['cookies'] - if as_dict: - return {cookie['name']: cookie['value'] for cookie in cookies} - elif all_info: - return cookies - else: - return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']} - for cookie in cookies] - def close_driver(self): """关闭driver及浏览器""" if self._has_driver: @@ -493,66 +458,3 @@ class WebPageSetter(ChromiumPageSetter): self._session_setter.user_agent(ua) else: self._chromium_setter.user_agent(ua, platform) - - -class WebPageDownloadSetter(ChromiumDownloadSetter): - """用于设置下载参数的类""" - - def __init__(self, page): - super().__init__(page) - self._session = page.session - - @property - def _switched_DownloadKit(self): - """返回从浏览器同步cookies后的Session对象""" - if self._page.mode == 'd': - self._cookies_to_session() - return self.DownloadKit - - def save_path(self, path): - """设置下载路径 - :param path: 下载路径 - :return: None - """ - path = path or '' - path = Path(path).absolute() - path.mkdir(parents=True, exist_ok=True) - path = str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - if self._page._has_driver: - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior=self._behavior, downloadPath=path, - eventsEnabled=True) - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.run_cdp('Page.setDownloadBehavior', behavior=self._behavior, downloadPath=path) - - def by_browser(self): - """设置使用浏览器下载文件""" - if not self._page._has_driver: - raise RuntimeError('浏览器未连接。') - - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='allow', eventsEnabled=True, - downloadPath=self._page.download_path) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_browser - - except CallMethodError: - warn('\n您的浏览器版本太低,用新标签页下载文件可能崩溃,建议升级。') - self._page.driver.Page.setDownloadBehavior(behavior='allow', downloadPath=self._page.download_path) - self._page.driver.Page.downloadWillBegin = self._download_by_browser - - self._behavior = 'allow' - - def by_DownloadKit(self): - """设置使用DownloadKit下载文件""" - if self._page._has_driver: - try: - self._page.browser_driver.Browser.setDownloadBehavior(behavior='deny', eventsEnabled=True) - self._page.browser_driver.Browser.downloadWillBegin = self._download_by_DownloadKit - except CallMethodError: - raise RuntimeError('您的浏览器版本太低,不支持此方法,请升级。') - - self._behavior = 'deny' diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index bc4eb58..d153c30 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -12,10 +12,9 @@ from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage, ChromiumDownloadSetter, ChromiumPageSetter +from .chromium_page import ChromiumPage, ChromiumPageSetter from .chromium_tab import WebPageTab from .configs.chromium_options import ChromiumOptions -from .configs.driver_options import DriverOptions from .configs.session_options import SessionOptions from .session_element import SessionElement from .session_page import SessionPage, SessionPageSetter @@ -26,15 +25,15 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def __init__(self, mode: str = 'd', timeout: float = None, - driver_or_options: Union[ChromiumDriver, ChromiumOptions, DriverOptions, bool] = None, + driver_or_options: Union[ChromiumDriver, ChromiumOptions, bool] = None, session_or_options: Union[Session, SessionOptions, bool] = None) -> None: self._mode: str = ... self._has_driver: bool = ... self._has_session: bool = ... self.address: str = ... self._session_options: Union[SessionOptions, None] = ... - self._driver_options: Union[ChromiumOptions, DriverOptions, None] = ... - self._download_set: WebPageDownloadSetter = ... + self._driver_options: Union[ChromiumOptions, None] = ... + self._DownloadKit: DownloadKit = ... self._download_path: str = ... self._tab_obj: ChromiumDriver = ... @@ -67,6 +66,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def cookies(self) -> dict: ... + @property + def user_agent(self) -> str: ... + @property def session(self) -> Session: ... @@ -79,12 +81,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @timeout.setter def timeout(self, second: float) -> None: ... - @property - def download_path(self) -> str: ... - - @property - def download_set(self) -> WebPageDownloadSetter: ... - def get(self, url: str, show_errmsg: bool = False, @@ -129,8 +125,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def get_tab(self, tab_id: str = None) -> WebPageTab: ... - def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ... - def close_driver(self) -> None: ... def close_session(self) -> None: ... @@ -156,9 +150,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage): verify: Any | None = ..., cert: Any | None = ...) -> bool: ... - @property - def download(self) -> DownloadKit: ... - @property def set(self) -> WebPageSetter: ... @@ -167,7 +158,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ Union[ChromiumElement, str, ChromiumFrame]]]: ... - def _set_start_options(self, dr_opt: Union[ChromiumDriver, DriverOptions, bool, None], + def _set_start_options(self, dr_opt: Union[ChromiumDriver, bool, None], se_opt: Union[Session, SessionOptions, bool, None]) -> None: ... def quit(self) -> None: ... @@ -185,21 +176,3 @@ class WebPageSetter(ChromiumPageSetter): def headers(self, headers: dict) -> None: ... def cookies(self, cookies) -> None: ... - - -class WebPageDownloadSetter(ChromiumDownloadSetter): - def __init__(self, page: WebPage): - self._page: WebPage = ... - self._behavior: str = ... - self._session: Session = None - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... - - def save_path(self, path) -> None: ... - - def by_browser(self) -> None: ... - - def by_DownloadKit(self) -> None: ... - - def _download_by_DownloadKit(self, **kwargs) -> None: ... From 3f999f066c1261b1524f063ec4d0cc6670ad6dfb Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 29 Jun 2023 15:57:14 +0800 Subject: [PATCH 02/13] =?UTF-8?q?=E7=AD=89=E5=BE=85=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=8F=AF=E6=8A=9B=E5=87=BA=E5=BC=82=E5=B8=B8=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=EF=BC=9Beasy=5Fset=E5=88=A0=E9=99=A4raise=5Fwhen=5Fele=5Fnot?= =?UTF-8?q?=5Ffound()=E6=96=B9=E6=B3=95=EF=BC=9BSettings=E7=9A=84raise=5Fe?= =?UTF-8?q?le=5Fnot=5Ffound=E5=92=8Craise=5Fwait=5Ffailed=E6=94=B9?= =?UTF-8?q?=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/base.py | 16 ++++---- DrissionPage/chromium_base.py | 48 ++++++++++++++-------- DrissionPage/chromium_base.pyi | 18 +++++---- DrissionPage/chromium_element.py | 67 +++++++++++++++++++------------ DrissionPage/chromium_element.pyi | 18 ++++----- DrissionPage/chromium_page.py | 47 ++++++---------------- DrissionPage/chromium_page.pyi | 6 +-- DrissionPage/commons/constants.py | 5 ++- DrissionPage/easy_set.py | 9 ----- DrissionPage/easy_set.pyi | 3 -- DrissionPage/errors.py | 4 ++ 11 files changed, 123 insertions(+), 118 deletions(-) diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 34044c3..54b9a7c 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -71,7 +71,7 @@ class BaseElement(BaseParser): r = self._find_elements(loc_or_str, timeout=timeout, single=single, relative=relative, raise_err=raise_err) if not single or raise_err is False: return r - if not r and (Settings.raise_ele_not_found or raise_err is True): + if not r and (Settings.raise_when_ele_not_found or raise_err is True): raise ElementNotFoundError return r @@ -151,7 +151,7 @@ class DrissionElement(BaseElement): filter_loc = '' nodes = self.children(filter_loc=filter_loc, timeout=timeout, ele_only=ele_only) if not nodes: - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -159,7 +159,7 @@ class DrissionElement(BaseElement): try: return nodes[index - 1] except IndexError: - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -178,7 +178,7 @@ class DrissionElement(BaseElement): nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -197,7 +197,7 @@ class DrissionElement(BaseElement): nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -216,7 +216,7 @@ class DrissionElement(BaseElement): nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout, ele_only=ele_only) if nodes: return nodes[-1] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -235,7 +235,7 @@ class DrissionElement(BaseElement): nodes = self._get_brothers(index, filter_loc, 'following', False, timeout, ele_only=ele_only) if nodes: return nodes[0] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -444,7 +444,7 @@ class BasePage(BaseParser): if not single or raise_err is False: return r - if not r and (Settings().raise_ele_not_found is True or raise_err is True): + if not r and (Settings.raise_when_ele_not_found is True or raise_err is True): raise ElementNotFoundError return r diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index e972da5..abc4257 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -15,12 +15,12 @@ from requests import Session from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chromium_ele -from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement +from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement, Settings from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder from .commons.web import set_browser_cookies from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ - NoRectError, BrowserConnectError, GetDocumentError + NoRectError, BrowserConnectError, GetDocumentError, WaitTimeoutError from .network_listener import NetworkListener from .session_element import make_session_ele @@ -1002,66 +1002,78 @@ class ChromiumBaseWaiter(object): """ self._driver = page_or_ele - def ele_delete(self, loc_or_ele, timeout=None): + def ele_delete(self, loc_or_ele, timeout=None, raise_err=None): """等待元素从DOM中删除 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.delete(timeout) if ele else True + return ele.wait.delete(timeout, raise_err=raise_err) if ele else True - def ele_display(self, loc_or_ele, timeout=None): + def ele_display(self, loc_or_ele, timeout=None, raise_err=None): """等待元素变成显示状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.display(timeout) + return ele.wait.display(timeout, raise_err=raise_err) - def ele_hidden(self, loc_or_ele, timeout=None): + def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): """等待元素变成隐藏状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.hidden(timeout) + return ele.wait.hidden(timeout, raise_err=raise_err) - def ele_load(self, loc, timeout=None): + def ele_load(self, loc, timeout=None, raise_err=None): """等待元素加载到DOM :param loc: 要等待的元素,输入定位符 :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 成功返回元素对象,失败返回False """ ele = self._driver._ele(loc, raise_err=False, timeout=timeout) - return ele if ele else False + if ele: + return True + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素加载失败。') + else: + return False - def load_start(self, timeout=None): + def load_start(self, timeout=None, raise_err=None): """等待页面开始加载 :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._loading(timeout=timeout, gap=.002) + return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) - def load_complete(self, timeout=None): + def load_complete(self, timeout=None, raise_err=None): """等待页面开始加载 :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._loading(timeout=timeout, start=False) + return self._loading(timeout=timeout, start=False, raise_err=raise_err) def upload_paths_inputted(self): """等待自动填写上传文件路径""" while self._driver._upload_list: sleep(.01) - def _loading(self, timeout=None, start=True, gap=.01): + def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): """等待页面开始加载或加载完成 :param timeout: 超时时间,为None时使用页面timeout属性 :param start: 等待开始还是结束 :param gap: 间隔秒数 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout != 0: @@ -1072,7 +1084,11 @@ class ChromiumBaseWaiter(object): if self._driver.is_loading == start: return True sleep(gap) - return False + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待页面加载失败。') + else: + return False class ChromiumPageScroll(ChromiumScroll): diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 11c5878..7b275a8 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -217,19 +217,23 @@ class ChromiumBaseWaiter(object): def __init__(self, page: ChromiumBase): self._driver: ChromiumBase = ... - def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... - def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... - def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None) -> bool: ... + def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... - def ele_load(self, loc: Union[str, tuple], timeout: float = None) -> Union[bool, ChromiumElement]: ... + def ele_load(self, loc: Union[str, tuple], timeout: float = None, + raise_err: bool = None) -> Union[bool, ChromiumElement]: ... - def _loading(self, timeout: float = None, start: bool = True, gap: float = .01) -> bool: ... + def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ... - def load_start(self, timeout: float = None) -> bool: ... + def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def load_complete(self, timeout: float = None) -> bool: ... + def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... def upload_paths_inputted(self) -> None: ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 5aad97a..03fb5f9 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -14,7 +14,7 @@ from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinition from .commons.locator import get_loc from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ - CDPError, NoResourceError, CanNotClickError + CDPError, NoResourceError, CanNotClickError, WaitTimeoutError from .session_element import make_session_ele @@ -839,7 +839,7 @@ class ChromiumShadowRoot(BaseElement): """ nodes = self.children(filter_loc=filter_loc) if not nodes: - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -847,7 +847,7 @@ class ChromiumShadowRoot(BaseElement): try: return nodes[index - 1] except IndexError: - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -861,7 +861,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.nexts(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -876,7 +876,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.befores(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -891,7 +891,7 @@ class ChromiumShadowRoot(BaseElement): nodes = self.afters(filter_loc=filter_loc) if nodes: return nodes[index - 1] - if Settings.raise_ele_not_found: + if Settings.raise_when_ele_not_found: raise ElementNotFoundError else: return NoneElement() @@ -1633,9 +1633,9 @@ class Click(object): if by_js is not False: self._ele.run_js('this.click();') return True - - if Settings.raise_click_failed: + if Settings.raise_when_click_failed: raise CanNotClickError + return False def right(self): @@ -2039,58 +2039,66 @@ class ChromiumElementWaiter(object): self._page = page self._ele = ele - def delete(self, timeout=None): + def delete(self, timeout=None, raise_err=None): """等待元素从dom删除 :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_alive', False, timeout) + return self._wait_state('is_alive', False, timeout, raise_err) - def display(self, timeout=None): + def display(self, timeout=None, raise_err=None): """等待元素从dom显示 :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_displayed', True, timeout) + return self._wait_state('is_displayed', True, timeout, raise_err) - def hidden(self, timeout=None): + def hidden(self, timeout=None, raise_err=None): """等待元素从dom隐藏 :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_displayed', False, timeout) + return self._wait_state('is_displayed', False, timeout, raise_err) - def covered(self, timeout=None): + def covered(self, timeout=None, raise_err=None): """等待当前元素被遮盖 :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_covered', True, timeout) + return self._wait_state('is_covered', True, timeout, raise_err) - def not_covered(self, timeout=None): + def not_covered(self, timeout=None, raise_err=None): """等待当前元素被遮盖 :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_covered', False, timeout) + return self._wait_state('is_covered', False, timeout, raise_err) - def enabled(self, timeout=None): + def enabled(self, timeout=None, raise_err=None): """等待当前元素变成可用 :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_enabled', True, timeout) + return self._wait_state('is_enabled', True, timeout, raise_err) - def disabled(self, timeout=None): + def disabled(self, timeout=None, raise_err=None): """等待当前元素变成可用 :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ - return self._wait_state('is_enabled', False, timeout) + return self._wait_state('is_enabled', False, timeout, raise_err) - def disabled_or_delete(self, timeout=None): + def disabled_or_delete(self, timeout=None, raise_err=None): """等待当前元素变成不可用或从DOM移除 :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout is None: @@ -2101,13 +2109,17 @@ class ChromiumElementWaiter(object): return True sleep(.05) - return False + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素隐藏或删除失败。') + else: + return False - def _wait_state(self, attr, mode=False, timeout=None): + def _wait_state(self, attr, mode=False, timeout=None, raise_err=None): """等待元素某个bool状态到达指定状态 :param attr: 状态名称 :param mode: True或False :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout is None: @@ -2118,7 +2130,10 @@ class ChromiumElementWaiter(object): return True sleep(.05) - return False + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素状态改变失败。') + else: + return False class Pseudo(object): diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 168bad7..a218d56 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -561,23 +561,23 @@ class ChromiumElementWaiter(object): self._ele: ChromiumElement = ... self._page: ChromiumBase = ... - def delete(self, timeout: float = None) -> bool: ... + def delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def display(self, timeout: float = None) -> bool: ... + def display(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def hidden(self, timeout: float = None) -> bool: ... + def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def covered(self, timeout: float = None) -> bool: ... + def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def not_covered(self, timeout: float = None) -> bool: ... + def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def enabled(self, timeout: float = None) -> bool: ... + def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def disabled(self, timeout: float = None) -> bool: ... + def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def disabled_or_delete(self, timeout: float = None) -> bool: ... + def disabled_or_delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - def _wait_state(self, attr: str, mode: bool = False, timeout: float = None) -> bool: ... + def _wait_state(self, attr: str, mode: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... class Pseudo(object): diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 00ec09d..81bde60 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -10,9 +10,10 @@ from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBa from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser +from .commons.constants import Settings from .commons.tools import port_is_using from .configs.chromium_options import ChromiumOptions -from .errors import BrowserConnectError +from .errors import BrowserConnectError, WaitTimeoutError class ChromiumPage(ChromiumBase): @@ -152,24 +153,6 @@ class ChromiumPage(ChromiumBase): self._set = ChromiumPageSetter(self) return self._set - # @property - # def download_path(self): - # """返回默认下载路径""" - # p = self._download_path or '' - # return str(Path(p).absolute()) - # - # @property - # def download_set(self): - # """返回用于设置下载参数的对象""" - # if self._download_set is None: - # self._download_set = BaseDownloadSetter(self) - # return self._download_set - # - # @property - # def download(self): - # """返回下载器对象""" - # return self.download_set._switched_DownloadKit - @property def rect(self): if self._rect is None: @@ -385,29 +368,23 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): super().__init__(page) self._listener = None - def new_tab(self, timeout=None): + def new_tab(self, timeout=None, raise_err=None): """等待新标签页出现 :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :return: 是否等到下载开始 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等到新标签页出现 """ timeout = timeout if timeout is not None else self._driver.timeout end_time = perf_counter() + timeout - while self._driver.tab_id == self._driver.latest_tab and perf_counter() < end_time: + while perf_counter() < end_time: + if self._driver.tab_id != self._driver.latest_tab: + return True sleep(.01) - # def download_begin(self, timeout=1.5): - # """等待浏览器下载开始 - # :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - # :return: 是否等到下载开始 - # """ - # return self._driver.download_set.wait_download_begin(timeout) - # - # def download_finish(self, timeout=None): - # """等待下载结束 - # :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - # :return: 是否等到下载结束 - # """ - # return self._driver.download_set.wait_download_finish(timeout) + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待新标签页失败。') + else: + return False class ChromiumTabRect(object): diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index cfcb25e..2a9eda4 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -102,11 +102,11 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): _driver: ChromiumPage = ... _listener: Union[NetworkListener, None] = ... - def download_begin(self, timeout: float = 1.5) -> bool: ... + # def download_begin(self, timeout: float = 1.5) -> bool: ... - def download_finish(self, timeout: float = None) -> bool: ... + # def download_finish(self, timeout: float = None) -> bool: ... - def new_tab(self, timeout: float = None) -> bool: ... + def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... class ChromiumTabRect(object): diff --git a/DrissionPage/commons/constants.py b/DrissionPage/commons/constants.py index c06c2c4..2a219c7 100644 --- a/DrissionPage/commons/constants.py +++ b/DrissionPage/commons/constants.py @@ -11,8 +11,9 @@ ERROR = 'error' class Settings(object): - raise_ele_not_found = False - raise_click_failed = False + raise_when_ele_not_found = False + raise_when_click_failed = False + raise_when_wait_failed = False class NoneElement(object): diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 91fcacb..d783c5b 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -7,19 +7,10 @@ from os import popen from pathlib import Path from re import search -from .commons.constants import Settings from .configs.chromium_options import ChromiumOptions from .configs.options_manage import OptionsManager -def raise_when_ele_not_found(on_off=True): - """设置全局变量,找不到元素时是否抛出异常 - :param on_off: True 或 False - :return: None - """ - Settings.raise_ele_not_found = on_off - - def configs_to_here(save_name=None): """把默认ini文件复制到当前目录 :param save_name: 指定文件名,为None则命名为'dp_configs.ini' diff --git a/DrissionPage/easy_set.pyi b/DrissionPage/easy_set.pyi index d70e8b9..3e8fc47 100644 --- a/DrissionPage/easy_set.pyi +++ b/DrissionPage/easy_set.pyi @@ -7,9 +7,6 @@ from pathlib import Path from typing import Union -def raise_when_ele_not_found(on_off: bool = True) -> None: ... - - def configs_to_here(file_name: Union[Path, str] = None) -> None: ... diff --git a/DrissionPage/errors.py b/DrissionPage/errors.py index 4eabfa6..7bab148 100644 --- a/DrissionPage/errors.py +++ b/DrissionPage/errors.py @@ -58,3 +58,7 @@ class CanNotClickError(BaseError): class GetDocumentError(BaseError): _info = '获取文档失败。' + + +class WaitTimeoutError(BaseError): + _info = '等待失败。' From b62c3cb6a1b4b24566ad316f6b449bc25461e26a Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 29 Jun 2023 17:14:25 +0800 Subject: [PATCH 03/13] =?UTF-8?q?Waiter=E9=87=8D=E6=9E=84=E5=88=B0?= =?UTF-8?q?=E4=B8=93=E5=B1=9E=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 101 +------------ DrissionPage/chromium_base.pyi | 26 +--- DrissionPage/chromium_element.py | 111 +------------- DrissionPage/chromium_element.pyi | 27 +--- DrissionPage/chromium_frame.py | 14 +- DrissionPage/chromium_frame.pyi | 9 +- DrissionPage/chromium_page.py | 30 +--- DrissionPage/chromium_page.pyi | 119 +++++++-------- DrissionPage/waiter.py | 242 ++++++++++++++++++++++++++++++ DrissionPage/waiter.pyi | 78 ++++++++++ 10 files changed, 387 insertions(+), 370 deletions(-) create mode 100644 DrissionPage/waiter.py create mode 100644 DrissionPage/waiter.pyi diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index abc4257..a046cc1 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -15,14 +15,15 @@ from requests import Session from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chromium_ele -from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement, Settings +from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder from .commons.web import set_browser_cookies from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ - NoRectError, BrowserConnectError, GetDocumentError, WaitTimeoutError + NoRectError, BrowserConnectError, GetDocumentError from .network_listener import NetworkListener from .session_element import make_session_ele +from .waiter import ChromiumBaseWaiter class ChromiumBase(BasePage): @@ -995,102 +996,6 @@ class ChromiumBaseSetter(object): self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) -class ChromiumBaseWaiter(object): - def __init__(self, page_or_ele): - """ - :param page_or_ele: 页面对象或元素对象 - """ - self._driver = page_or_ele - - def ele_delete(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素从DOM中删除 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.delete(timeout, raise_err=raise_err) if ele else True - - def ele_display(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素变成显示状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.display(timeout, raise_err=raise_err) - - def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): - """等待元素变成隐藏状态 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) - return ele.wait.hidden(timeout, raise_err=raise_err) - - def ele_load(self, loc, timeout=None, raise_err=None): - """等待元素加载到DOM - :param loc: 要等待的元素,输入定位符 - :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 成功返回元素对象,失败返回False - """ - ele = self._driver._ele(loc, raise_err=False, timeout=timeout) - if ele: - return True - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素加载失败。') - else: - return False - - def load_start(self, timeout=None, raise_err=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) - - def load_complete(self, timeout=None, raise_err=None): - """等待页面开始加载 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._loading(timeout=timeout, start=False, raise_err=raise_err) - - def upload_paths_inputted(self): - """等待自动填写上传文件路径""" - while self._driver._upload_list: - sleep(.01) - - def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): - """等待页面开始加载或加载完成 - :param timeout: 超时时间,为None时使用页面timeout属性 - :param start: 等待开始还是结束 - :param gap: 间隔秒数 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout != 0: - if timeout is None or timeout is True: - timeout = self._driver.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._driver.is_loading == start: - return True - sleep(gap) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待页面加载失败。') - else: - return False - - class ChromiumPageScroll(ChromiumScroll): def __init__(self, page): """ diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 7b275a8..160609d 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -10,6 +10,7 @@ from DataRecorder import Recorder from requests import Session from requests.cookies import RequestsCookieJar +from .waiter import ChromiumBaseWaiter from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement, ChromiumScroll @@ -213,31 +214,6 @@ class ChromiumBase(BasePage): timeout: float = None) -> Union[bool, None]: ... -class ChromiumBaseWaiter(object): - def __init__(self, page: ChromiumBase): - self._driver: ChromiumBase = ... - - def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, - raise_err: bool = None) -> bool: ... - - def ele_load(self, loc: Union[str, tuple], timeout: float = None, - raise_err: bool = None) -> Union[bool, ChromiumElement]: ... - - def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ... - - def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def upload_paths_inputted(self) -> None: ... - - class ChromiumPageScroll(ChromiumScroll): def __init__(self, page: ChromiumBase): ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 03fb5f9..68b63fe 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -14,8 +14,9 @@ from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinition from .commons.locator import get_loc from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ - CDPError, NoResourceError, CanNotClickError, WaitTimeoutError + CDPError, NoResourceError, CanNotClickError from .session_element import make_session_ele +from .waiter import ChromiumElementWaiter class ChromiumElement(DrissionElement): @@ -2028,114 +2029,6 @@ class ChromiumSelect(object): self._ele.run_js('this.dispatchEvent(new UIEvent("change"));') -class ChromiumElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" - - def __init__(self, page, ele): - """等待元素在dom中某种状态,如删除、显示、隐藏 - :param page: 元素所在页面 - :param ele: 要等待的元素 - """ - self._page = page - self._ele = ele - - def delete(self, timeout=None, raise_err=None): - """等待元素从dom删除 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_alive', False, timeout, raise_err) - - def display(self, timeout=None, raise_err=None): - """等待元素从dom显示 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', True, timeout, raise_err) - - def hidden(self, timeout=None, raise_err=None): - """等待元素从dom隐藏 - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_displayed', False, timeout, raise_err) - - def covered(self, timeout=None, raise_err=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', True, timeout, raise_err) - - def not_covered(self, timeout=None, raise_err=None): - """等待当前元素被遮盖 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_covered', False, timeout, raise_err) - - def enabled(self, timeout=None, raise_err=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', True, timeout, raise_err) - - def disabled(self, timeout=None, raise_err=None): - """等待当前元素变成可用 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - return self._wait_state('is_enabled', False, timeout, raise_err) - - def disabled_or_delete(self, timeout=None, raise_err=None): - """等待当前元素变成不可用或从DOM移除 - :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if not self._ele.states.is_enabled or not self._ele.states.is_alive: - return True - sleep(.05) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素隐藏或删除失败。') - else: - return False - - def _wait_state(self, attr, mode=False, timeout=None, raise_err=None): - """等待元素某个bool状态到达指定状态 - :param attr: 状态名称 - :param mode: True或False - :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等待成功 - """ - if timeout is None: - timeout = self._page.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._ele.states.__getattribute__(attr) == mode: - return True - sleep(.05) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待元素状态改变失败。') - else: - return False - - class Pseudo(object): def __init__(self, ele): """ diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index a218d56..ae78c42 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -12,6 +12,7 @@ from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage from .commons.constants import NoneElement from .session_element import SessionElement +from .waiter import ChromiumElementWaiter from .web_page import WebPage @@ -554,32 +555,6 @@ class ChromiumSelect(object): def _dispatch_change(self) -> None: ... -class ChromiumElementWaiter(object): - def __init__(self, - page: ChromiumBase, - ele: ChromiumElement): - self._ele: ChromiumElement = ... - self._page: ChromiumBase = ... - - def delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def display(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def disabled_or_delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - def _wait_state(self, attr: str, mode: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... - - class Pseudo(object): def __init__(self, ele: ChromiumElement): self._ele: ChromiumElement = ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 6a9d70c..65a76ae 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -7,10 +7,11 @@ from re import search from threading import Thread from time import sleep, perf_counter -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter -from .chromium_element import ChromiumElement, ChromiumElementWaiter +from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter +from .chromium_element import ChromiumElement from .commons.tools import get_usable_path from .errors import ContextLossError +from .waiter import FrameWaiter class ChromiumFrame(ChromiumBase): @@ -660,12 +661,3 @@ class ChromiumFrameSetter(ChromiumBaseSetter): """ self._page._check_ok() self._page.frame_ele.set.attr(attr, value) - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame): - """ - :param frame: ChromiumFrame对象 - """ - super().__init__(frame) - super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 47dc8a1..0f56dab 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -6,8 +6,9 @@ from pathlib import Path from typing import Union, Tuple, List, Any -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter, ChromiumBaseWaiter -from .chromium_element import ChromiumElement, Locations, ChromiumElementStates, ChromiumElementWaiter +from .waiter import FrameWaiter +from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter +from .chromium_element import ChromiumElement, Locations, ChromiumElementStates class ChromiumFrame(ChromiumBase): @@ -210,7 +211,3 @@ class ChromiumFrameSetter(ChromiumBaseSetter): _page: ChromiumFrame = ... def attr(self, attr: str, value: str) -> None: ... - - -class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): - def __init__(self, frame: ChromiumFrame): ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 81bde60..6dd7097 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -6,14 +6,14 @@ from platform import system from time import perf_counter, sleep -from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter, ChromiumBaseWaiter +from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser -from .commons.constants import Settings from .commons.tools import port_is_using from .configs.chromium_options import ChromiumOptions -from .errors import BrowserConnectError, WaitTimeoutError +from .errors import BrowserConnectError +from .waiter import ChromiumPageWaiter class ChromiumPage(ChromiumBase): @@ -363,30 +363,6 @@ class ChromiumPage(ChromiumBase): self._tab_obj.has_alert = True -class ChromiumPageWaiter(ChromiumBaseWaiter): - def __init__(self, page: ChromiumBase): - super().__init__(page) - self._listener = None - - def new_tab(self, timeout=None, raise_err=None): - """等待新标签页出现 - :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 - :return: 是否等到新标签页出现 - """ - timeout = timeout if timeout is not None else self._driver.timeout - end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._driver.tab_id != self._driver.latest_tab: - return True - sleep(.01) - - if raise_err is True or Settings.raise_when_wait_failed is True: - raise WaitTimeoutError('等待新标签页失败。') - else: - return False - - class ChromiumTabRect(object): def __init__(self, page): self._page = page diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 2a9eda4..47115cb 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -4,19 +4,13 @@ @Contact : g1879@qq.com """ from os import popen -from pathlib import Path -from typing import Union, Tuple, List, Dict +from typing import Union, Tuple, List -from DownloadKit import DownloadKit -from DownloadKit.mission import Mission -from requests import Session - -from .chromium_base import ChromiumBase, ChromiumBaseSetter, ChromiumBaseWaiter +from .chromium_base import ChromiumBase, ChromiumBaseSetter from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .configs.chromium_options import ChromiumOptions -from .network_listener import NetworkListener -from .session_page import DownloadSetter +from .waiter import ChromiumPageWaiter class ChromiumPage(ChromiumBase): @@ -98,17 +92,6 @@ class ChromiumPage(ChromiumBase): def _on_alert_open(self, **kwargs): ... -class ChromiumPageWaiter(ChromiumBaseWaiter): - _driver: ChromiumPage = ... - _listener: Union[NetworkListener, None] = ... - - # def download_begin(self, timeout: float = 1.5) -> bool: ... - - # def download_finish(self, timeout: float = None) -> bool: ... - - def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... - - class ChromiumTabRect(object): def __init__(self, page: ChromiumPage): self._page: ChromiumPage = ... @@ -142,54 +125,54 @@ class ChromiumTabRect(object): def _get_browser_rect(self) -> dict: ... -class BaseDownloadSetter(DownloadSetter): - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - self._behavior: str = ... - self._session: Session = ... - self._save_path: str = ... - self._rename: str = ... - self._waiting_download: bool = ... - self._download_begin: bool = ... - self._browser_missions: Dict[str, BrowserDownloadMission] = ... - self._browser_downloading_count: int = ... - self._show_msg: bool = ... - - @property - def session(self) -> Session: ... - - @property - def browser_missions(self) -> List[BrowserDownloadMission]: ... - - @property - def DownloadKit_missions(self) -> List[Mission]: ... - - @property - def _switched_DownloadKit(self) -> DownloadKit: ... - - def save_path(self, path: Union[str, Path]) -> None: ... - - def rename(self, name: str) -> None: ... - - def by_browser(self) -> None: ... - - def by_DownloadKit(self) -> None: ... - - def wait_download_begin(self, timeout: float = None) -> bool: ... - - def wait_download_finish(self, timeout: float = None) -> bool: ... - - def show_msg(self, on_off: bool = True) -> None: ... - - def _cookies_to_session(self) -> None: ... - - def _download_by_DownloadKit(self, **kwargs) -> None: ... - - def _download_will_begin(self, **kwargs) -> None: ... - - def _download_progress(self, **kwargs) -> None: ... - - def _wait_download_complete(self, mission: Mission) -> None: ... +# class BaseDownloadSetter(DownloadSetter): +# def __init__(self, page: ChromiumPage): +# self._page: ChromiumPage = ... +# self._behavior: str = ... +# self._session: Session = ... +# self._save_path: str = ... +# self._rename: str = ... +# self._waiting_download: bool = ... +# self._download_begin: bool = ... +# self._browser_missions: Dict[str, BrowserDownloadMission] = ... +# self._browser_downloading_count: int = ... +# self._show_msg: bool = ... +# +# @property +# def session(self) -> Session: ... +# +# @property +# def browser_missions(self) -> List[BrowserDownloadMission]: ... +# +# @property +# def DownloadKit_missions(self) -> List[Mission]: ... +# +# @property +# def _switched_DownloadKit(self) -> DownloadKit: ... +# +# def save_path(self, path: Union[str, Path]) -> None: ... +# +# def rename(self, name: str) -> None: ... +# +# def by_browser(self) -> None: ... +# +# def by_DownloadKit(self) -> None: ... +# +# def wait_download_begin(self, timeout: float = None) -> bool: ... +# +# def wait_download_finish(self, timeout: float = None) -> bool: ... +# +# def show_msg(self, on_off: bool = True) -> None: ... +# +# def _cookies_to_session(self) -> None: ... +# +# def _download_by_DownloadKit(self, **kwargs) -> None: ... +# +# def _download_will_begin(self, **kwargs) -> None: ... +# +# def _download_progress(self, **kwargs) -> None: ... +# +# def _wait_download_complete(self, mission: Mission) -> None: ... class BrowserDownloadMission(object): diff --git a/DrissionPage/waiter.py b/DrissionPage/waiter.py new file mode 100644 index 0000000..7bfd238 --- /dev/null +++ b/DrissionPage/waiter.py @@ -0,0 +1,242 @@ +# -*- coding:utf-8 -*- +from time import sleep, perf_counter + +from .commons.constants import Settings +from .errors import WaitTimeoutError + + +class ChromiumBaseWaiter(object): + def __init__(self, page_or_ele): + """ + :param page_or_ele: 页面对象或元素对象 + """ + self._driver = page_or_ele + + def ele_delete(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素从DOM中删除 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.delete(timeout, raise_err=raise_err) if ele else True + + def ele_display(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素变成显示状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.display(timeout, raise_err=raise_err) + + def ele_hidden(self, loc_or_ele, timeout=None, raise_err=None): + """等待元素变成隐藏状态 + :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) + return ele.wait.hidden(timeout, raise_err=raise_err) + + def ele_load(self, loc, timeout=None, raise_err=None): + """等待元素加载到DOM + :param loc: 要等待的元素,输入定位符 + :param timeout: 超时时间,默认读取页面超时时间 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 成功返回元素对象,失败返回False + """ + ele = self._driver._ele(loc, raise_err=False, timeout=timeout) + if ele: + return True + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素加载失败。') + else: + return False + + def load_start(self, timeout=None, raise_err=None): + """等待页面开始加载 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) + + def load_complete(self, timeout=None, raise_err=None): + """等待页面开始加载 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._loading(timeout=timeout, start=False, raise_err=raise_err) + + def upload_paths_inputted(self): + """等待自动填写上传文件路径""" + while self._driver._upload_list: + sleep(.01) + + def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): + """等待页面开始加载或加载完成 + :param timeout: 超时时间,为None时使用页面timeout属性 + :param start: 等待开始还是结束 + :param gap: 间隔秒数 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout != 0: + if timeout is None or timeout is True: + timeout = self._driver.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._driver.is_loading == start: + return True + sleep(gap) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待页面加载失败。') + else: + return False + + +class ChromiumPageWaiter(ChromiumBaseWaiter): + def __init__(self, page): + super().__init__(page) + # self._listener = None + + def new_tab(self, timeout=None, raise_err=None): + """等待新标签页出现 + :param timeout: 等待超时时间,为None则使用页面对象timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等到新标签页出现 + """ + timeout = timeout if timeout is not None else self._driver.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._driver.tab_id != self._driver.latest_tab: + return True + sleep(.01) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待新标签页失败。') + else: + return False + + +class ChromiumElementWaiter(object): + """等待元素在dom中某种状态,如删除、显示、隐藏""" + + def __init__(self, page, ele): + """等待元素在dom中某种状态,如删除、显示、隐藏 + :param page: 元素所在页面 + :param ele: 要等待的元素 + """ + self._page = page + self._ele = ele + + def delete(self, timeout=None, raise_err=None): + """等待元素从dom删除 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_alive', False, timeout, raise_err) + + def display(self, timeout=None, raise_err=None): + """等待元素从dom显示 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', True, timeout, raise_err) + + def hidden(self, timeout=None, raise_err=None): + """等待元素从dom隐藏 + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_displayed', False, timeout, raise_err) + + def covered(self, timeout=None, raise_err=None): + """等待当前元素被遮盖 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', True, timeout, raise_err) + + def not_covered(self, timeout=None, raise_err=None): + """等待当前元素被遮盖 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_covered', False, timeout, raise_err) + + def enabled(self, timeout=None, raise_err=None): + """等待当前元素变成可用 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', True, timeout, raise_err) + + def disabled(self, timeout=None, raise_err=None): + """等待当前元素变成可用 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._wait_state('is_enabled', False, timeout, raise_err) + + def disabled_or_delete(self, timeout=None, raise_err=None): + """等待当前元素变成不可用或从DOM移除 + :param timeout:超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if not self._ele.states.is_enabled or not self._ele.states.is_alive: + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素隐藏或删除失败。') + else: + return False + + def _wait_state(self, attr, mode=False, timeout=None, raise_err=None): + """等待元素某个bool状态到达指定状态 + :param attr: 状态名称 + :param mode: True或False + :param timeout: 超时时间,为None使用元素所在页面timeout属性 + :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._page.timeout + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if self._ele.states.__getattribute__(attr) == mode: + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError('等待元素状态改变失败。') + else: + return False + + +class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): + def __init__(self, frame): + """ + :param frame: ChromiumFrame对象 + """ + super().__init__(frame) + super(ChromiumBaseWaiter, self).__init__(frame, frame.frame_ele) diff --git a/DrissionPage/waiter.pyi b/DrissionPage/waiter.pyi new file mode 100644 index 0000000..41ea5e9 --- /dev/null +++ b/DrissionPage/waiter.pyi @@ -0,0 +1,78 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from typing import Union + +from .chromium_base import ChromiumBase +from .chromium_element import ChromiumElement +from .chromium_frame import ChromiumFrame +from .chromium_page import ChromiumPage + + +class ChromiumBaseWaiter(object): + def __init__(self, page: ChromiumBase): + self._driver: ChromiumBase = ... + + def ele_delete(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_display(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_hidden(self, loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = None, + raise_err: bool = None) -> bool: ... + + def ele_load(self, loc: Union[str, tuple], timeout: float = None, + raise_err: bool = None) -> Union[bool, ChromiumElement]: ... + + def _loading(self, timeout: float = None, start: bool = True, gap: float = .01, raise_err: bool = None) -> bool: ... + + def load_start(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def upload_paths_inputted(self) -> None: ... + + +class ChromiumPageWaiter(ChromiumBaseWaiter): + _driver: ChromiumPage = ... + + # _listener: Union[NetworkListener, None] = ... + + # def download_begin(self, timeout: float = 1.5) -> bool: ... + + # def download_finish(self, timeout: float = None) -> bool: ... + + def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + +class ChromiumElementWaiter(object): + def __init__(self, + page: ChromiumBase, + ele: ChromiumElement): + self._ele: ChromiumElement = ... + self._page: ChromiumBase = ... + + def delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def display(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def hidden(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def not_covered(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def enabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def disabled(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def disabled_or_delete(self, timeout: float = None, raise_err: bool = None) -> bool: ... + + def _wait_state(self, attr: str, mode: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... + + +class FrameWaiter(ChromiumBaseWaiter, ChromiumElementWaiter): + def __init__(self, frame: ChromiumFrame): ... From 0fd4d724f64b9e561e01add275cd081495665e9d Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 29 Jun 2023 18:51:54 +0800 Subject: [PATCH 04/13] =?UTF-8?q?Setter=E9=87=8D=E6=9E=84=E5=88=B0?= =?UTF-8?q?=E4=B8=93=E5=B1=9E=E6=96=87=E4=BB=B6=EF=BC=8C=E5=BE=85=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 164 +--------- DrissionPage/chromium_base.pyi | 57 +--- DrissionPage/chromium_element.py | 33 +- DrissionPage/chromium_element.pyi | 12 +- DrissionPage/chromium_frame.py | 14 +- DrissionPage/chromium_frame.pyi | 11 +- DrissionPage/chromium_page.py | 189 +---------- DrissionPage/chromium_page.pyi | 53 +-- DrissionPage/chromium_tab.py | 41 +-- DrissionPage/chromium_tab.pyi | 17 +- DrissionPage/commons/tools.py | 75 +++++ DrissionPage/commons/tools.pyi | 12 + DrissionPage/session_page.py | 134 +------- DrissionPage/session_page.pyi | 47 +-- DrissionPage/setter.py | 526 ++++++++++++++++++++++++++++++ DrissionPage/setter.pyi | 192 +++++++++++ DrissionPage/web_page.py | 37 +-- DrissionPage/web_page.pyi | 15 +- 18 files changed, 835 insertions(+), 794 deletions(-) create mode 100644 DrissionPage/setter.py create mode 100644 DrissionPage/setter.pyi diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index a046cc1..a77e9f1 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -18,11 +18,11 @@ from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chro from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement from .commons.locator import get_loc from .commons.tools import get_usable_path, clean_folder -from .commons.web import set_browser_cookies from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ NoRectError, BrowserConnectError, GetDocumentError from .network_listener import NetworkListener from .session_element import make_session_ele +from .setter import ChromiumBaseSetter from .waiter import ChromiumBaseWaiter @@ -890,112 +890,6 @@ class ChromiumBase(BasePage): return str(path.absolute()) -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page = page - - @property - def load_strategy(self): - """返回用于设置页面加载策略的对象""" - return PageLoadStrategy(self._page) - - @property - def scroll(self): - """返回用于设置页面滚动设置的对象""" - return PageScrollSetter(self._page.scroll) - - def retry_times(self, times): - """设置连接失败重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败重连间隔""" - self._page.retry_interval = interval - - def timeouts(self, implicit=None, page_load=None, script=None): - """设置超时时间,单位为秒 - :param implicit: 查找元素超时时间 - :param page_load: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: None - """ - if implicit is not None: - self._page.timeouts.implicit = implicit - - if page_load is not None: - self._page.timeouts.page_load = page_load - - if script is not None: - self._page.timeouts.script = script - - def user_agent(self, ua, platform=None): - """为当前tab设置user agent,只在当前tab有效 - :param ua: user agent字符串 - :param platform: platform字符串 - :return: None - """ - keys = {'userAgent': ua} - if platform: - keys['platform'] = platform - self._page.run_cdp('Emulation.setUserAgentOverride', **keys) - - def session_storage(self, item, value): - """设置或删除某项sessionStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def local_storage(self, item, value): - """设置或删除某项localStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' - return self._page.run_js_loaded(js, as_expr=True) - - def cookie(self, cookie): - """设置单个cookie - :param cookie: cookie信息 - :return: None - """ - if isinstance(cookie, str): - self.cookies(cookie) - else: - self.cookies([cookie]) - - def cookies(self, cookies): - """设置多个cookie,注意不要传入单个 - :param cookies: cookies信息 - :return: None - """ - set_browser_cookies(self._page, cookies) - - def upload_files(self, files): - """等待上传的文件路径 - :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 - :return: None - """ - if not self._page._upload_list: - self._page.driver.Page.fileChooserOpened = self._page._onFileChooserOpened - self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) - - if isinstance(files, str): - files = files.split('\n') - self._page._upload_list = [str(Path(i).absolute()) for i in files] - - def headers(self, headers: dict) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - self._page.run_cdp('Network.enable') - self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) - - class ChromiumPageScroll(ChromiumScroll): def __init__(self, page): """ @@ -1055,62 +949,6 @@ class Timeout(object): return str({'implicit': self.implicit, 'page_load': self.page_load, 'script': self.script}) -class PageLoadStrategy(object): - """用于设置页面加载策略的类""" - - def __init__(self, page): - """ - :param page: ChromiumBase对象 - """ - self._page = page - - def __call__(self, value): - """设置加载策略 - :param value: 可选 'normal', 'eager', 'none' - :return: None - """ - if value.lower() not in ('normal', 'eager', 'none'): - raise ValueError("只能选择 'normal', 'eager', 'none'。") - self._page._page_load_strategy = value - - def normal(self): - """设置页面加载策略为normal""" - self._page._page_load_strategy = 'normal' - - def eager(self): - """设置页面加载策略为eager""" - self._page._page_load_strategy = 'eager' - - def none(self): - """设置页面加载策略为none""" - self._page._page_load_strategy = 'none' - - -class PageScrollSetter(object): - def __init__(self, scroll): - self._scroll = scroll - - def wait_complete(self, on_off=True): - """设置滚动命令后是否等待完成 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - self._scroll._wait_complete = on_off - - def smooth(self, on_off=True): - """设置页面滚动是否平滑滚动 - :param on_off: 开或关 - :return: None - """ - if not isinstance(on_off, bool): - raise TypeError('on_off必须为bool。') - b = 'smooth' if on_off else 'auto' - self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') - self._scroll._wait_complete = on_off - - class Screencast(object): def __init__(self, page): self._page = page diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 160609d..ebbbd1b 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -8,9 +8,7 @@ from typing import Union, Tuple, List, Any from DataRecorder import Recorder from requests import Session -from requests.cookies import RequestsCookieJar -from .waiter import ChromiumBaseWaiter from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement, ChromiumScroll @@ -18,6 +16,8 @@ from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement from .network_listener import NetworkListener from .session_element import SessionElement +from .setter import ChromiumBaseSetter +from .waiter import ChromiumBaseWaiter class ChromiumBase(BasePage): @@ -222,37 +222,6 @@ class ChromiumPageScroll(ChromiumScroll): def _to_see(self, ele: ChromiumElement, center: Union[bool, None]) -> None: ... -class ChromiumBaseSetter(object): - def __init__(self, page): - self._page: ChromiumBase = ... - - @property - def load_strategy(self) -> PageLoadStrategy: ... - - @property - def scroll(self) -> PageScrollSetter: ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def session_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def local_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def cookie(self, cookies: Union[RequestsCookieJar, str, dict]) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def upload_files(self, files: Union[str, list, tuple]) -> None: ... - - class Timeout(object): def __init__(self, page: ChromiumBase, implicit=None, page_load=None, script=None): @@ -262,28 +231,6 @@ class Timeout(object): self.script: float = ... -class PageLoadStrategy(object): - def __init__(self, page: ChromiumBase): - self._page: ChromiumBase = ... - - def __call__(self, value: str) -> None: ... - - def normal(self) -> None: ... - - def eager(self) -> None: ... - - def none(self) -> None: ... - - -class PageScrollSetter(object): - def __init__(self, scroll: ChromiumPageScroll): - self._scroll: ChromiumPageScroll = ... - - def wait_complete(self, on_off: bool = True): ... - - def smooth(self, on_off: bool = True): ... - - class Screencast(object): def __init__(self, page: ChromiumBase): self._page: ChromiumBase = ... diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 68b63fe..ad1930f 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -16,6 +16,7 @@ from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_fun from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \ CDPError, NoResourceError, CanNotClickError from .session_element import make_session_ele +from .setter import ChromiumElementSetter from .waiter import ChromiumElementWaiter @@ -1467,38 +1468,6 @@ class ShadowRootStates(object): return False -class ChromiumElementSetter(object): - def __init__(self, ele): - """ - :param ele: ChromiumElement - """ - self._ele = ele - - def attr(self, attr, value): - """设置元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele.ids.node_id, name=attr, value=str(value)) - - def prop(self, prop, value): - """设置元素property属性 - :param prop: 属性名 - :param value: 属性值 - :return: None - """ - value = value.replace('"', r'\"') - self._ele.run_js(f'this.{prop}="{value}";') - - def innerHTML(self, html): - """设置元素innerHTML - :param html: html文本 - :return: None - """ - self.prop('innerHTML', html) - - class Locations(object): def __init__(self, ele): """ diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index ae78c42..3b5dfe1 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -12,6 +12,7 @@ from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage from .commons.constants import NoneElement from .session_element import SessionElement +from .setter import ChromiumElementSetter from .waiter import ChromiumElementWaiter from .web_page import WebPage @@ -383,17 +384,6 @@ def send_enter(ele: ChromiumElement) -> None: ... def send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ... -class ChromiumElementSetter(object): - def __init__(self, ele: ChromiumElement): - self._ele: ChromiumElement = ... - - def attr(self, attr: str, value: str) -> None: ... - - def prop(self, prop: str, value: str) -> None: ... - - def innerHTML(self, html: str) -> None: ... - - class ShadowRootStates(object): def __init__(self, ele: ChromiumShadowRoot): """ diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 65a76ae..34aa115 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -7,10 +7,11 @@ from re import search from threading import Thread from time import sleep, perf_counter -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter +from .chromium_base import ChromiumBase, ChromiumPageScroll from .chromium_element import ChromiumElement from .commons.tools import get_usable_path from .errors import ContextLossError +from .setter import ChromiumFrameSetter from .waiter import FrameWaiter @@ -650,14 +651,3 @@ class ChromiumFrameScroll(ChromiumPageScroll): """ ele = loc_or_ele if isinstance(loc_or_ele, ChromiumElement) else self._driver._ele(loc_or_ele) self._to_see(ele, center) - - -class ChromiumFrameSetter(ChromiumBaseSetter): - def attr(self, attr, value): - """设置frame元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: None - """ - self._page._check_ok() - self._page.frame_ele.set.attr(attr, value) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 0f56dab..a2bdce8 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -6,9 +6,10 @@ from pathlib import Path from typing import Union, Tuple, List, Any -from .waiter import FrameWaiter -from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter +from .chromium_base import ChromiumBase, ChromiumPageScroll from .chromium_element import ChromiumElement, Locations, ChromiumElementStates +from .setter import ChromiumFrameSetter +from .waiter import FrameWaiter class ChromiumFrame(ChromiumBase): @@ -205,9 +206,3 @@ class ChromiumFrameScroll(ChromiumPageScroll): def __init__(self, frame: ChromiumFrame) -> None: ... def to_see(self, loc_or_ele: Union[str, tuple, ChromiumElement], center: Union[None, bool] = None) -> None: ... - - -class ChromiumFrameSetter(ChromiumBaseSetter): - _page: ChromiumFrame = ... - - def attr(self, attr: str, value: str) -> None: ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 6dd7097..4903560 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -3,16 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ -from platform import system from time import perf_counter, sleep -from .chromium_base import ChromiumBase, Timeout, ChromiumBaseSetter +from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .commons.browser import connect_browser from .commons.tools import port_is_using from .configs.chromium_options import ChromiumOptions from .errors import BrowserConnectError +from .setter import ChromiumPageSetter from .waiter import ChromiumPageWaiter @@ -689,191 +689,6 @@ class Alert(object): self.response_text = None -class WindowSetter(object): - """用于设置窗口大小的类""" - - def __init__(self, page): - """ - :param page: 页面对象 - """ - self._page = page - self._window_id = self._get_info()['windowId'] - - def maximized(self): - """窗口最大化""" - s = self._get_info()['bounds']['windowState'] - if s in ('fullscreen', 'minimized'): - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'maximized'}) - - def minimized(self): - """窗口最小化""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'minimized'}) - - def fullscreen(self): - """设置窗口为全屏""" - s = self._get_info()['bounds']['windowState'] - if s == 'minimized': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'fullscreen'}) - - def normal(self): - """设置窗口为常规模式""" - s = self._get_info()['bounds']['windowState'] - if s == 'fullscreen': - self._perform({'windowState': 'normal'}) - self._perform({'windowState': 'normal'}) - - def size(self, width=None, height=None): - """设置窗口大小 - :param width: 窗口宽度 - :param height: 窗口高度 - :return: None - """ - if width or height: - s = self._get_info()['bounds']['windowState'] - if s != 'normal': - self._perform({'windowState': 'normal'}) - info = self._get_info()['bounds'] - width = width - 16 if width else info['width'] - height = height + 7 if height else info['height'] - self._perform({'width': width, 'height': height}) - - def location(self, x=None, y=None): - """设置窗口在屏幕中的位置,相对左上角坐标 - :param x: 距离顶部距离 - :param y: 距离左边距离 - :return: None - """ - if x is not None or y is not None: - self.normal() - info = self._get_info()['bounds'] - x = x if x is not None else info['left'] - y = y if y is not None else info['top'] - self._perform({'left': x - 8, 'top': y}) - - def hide(self): - """隐藏浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=True) - - def show(self): - """显示浏览器窗口,只在Windows系统可用""" - show_or_hide_browser(self._page, hide=False) - - def _get_info(self): - """获取窗口位置及大小信息""" - return self._page.run_cdp('Browser.getWindowForTarget') - - def _perform(self, bounds): - """执行改变窗口大小操作 - :param bounds: 控制数据 - :return: None - """ - self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) - - -class ChromiumPageSetter(ChromiumBaseSetter): - def main_tab(self, tab_id=None): - """设置主tab - :param tab_id: 标签页id,不传入则设置当前tab - :return: None - """ - self._page._main_tab = tab_id or self._page.tab_id - - @property - def window(self): - """返回用于设置浏览器窗口的对象""" - return WindowSetter(self._page) - - def tab_to_front(self, tab_or_id=None): - """激活标签页使其处于最前面 - :param tab_or_id: 标签页对象或id,为None表示当前标签页 - :return: None - """ - if not tab_or_id: - tab_or_id = self._page.tab_id - elif isinstance(tab_or_id, ChromiumTab): - tab_or_id = tab_or_id.tab_id - self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') - - -def show_or_hide_browser(page, hide=True): - """执行显示或隐藏浏览器窗口 - :param page: ChromePage对象 - :param hide: 是否隐藏 - :return: None - """ - if not page.address.startswith(('127.0.0.1', 'localhost')): - return - - if system().lower() != 'windows': - raise OSError('该方法只能在Windows系统使用。') - - try: - from win32gui import ShowWindow - from win32con import SW_HIDE, SW_SHOW - except ImportError: - raise ImportError('请先安装:pip install pypiwin32') - - pid = page.process_id - if not pid: - return None - hds = get_chrome_hwnds_from_pid(pid, page.title) - sw = SW_HIDE if hide else SW_SHOW - for hd in hds: - ShowWindow(hd, sw) - - -def get_browser_progress_id(progress, address): - """获取浏览器进程id - :param progress: 已知的进程对象,没有时传入None - :param address: 浏览器管理地址,含端口 - :return: 进程id或None - """ - if progress: - return progress.pid - - from os import popen - port = address.split(':')[-1] - txt = '' - progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') - for progress in progresses: - if 'LISTENING' in progress: - txt = progress - break - if not txt: - return None - - return txt.split(' ')[-1] - - -def get_chrome_hwnds_from_pid(pid, title): - """通过PID查询句柄ID - :param pid: 进程id - :param title: 窗口标题 - :return: 进程句柄组成的列表 - """ - try: - from win32gui import IsWindow, GetWindowText, EnumWindows - from win32process import GetWindowThreadProcessId - except ImportError: - raise ImportError('请先安装win32gui,pip install pypiwin32') - - def callback(hwnd, hds): - if IsWindow(hwnd) and title in GetWindowText(hwnd): - _, found_pid = GetWindowThreadProcessId(hwnd) - if str(found_pid) == str(pid): - hds.append(hwnd) - return True - - hwnds = [] - EnumWindows(callback, hwnds) - return hwnds - - def get_rename(original, rename): if '.' in rename: return rename diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 47115cb..916d85c 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -3,10 +3,10 @@ @Author : g1879 @Contact : g1879@qq.com """ -from os import popen from typing import Union, Tuple, List -from .chromium_base import ChromiumBase, ChromiumBaseSetter +from .setter import ChromiumPageSetter +from .chromium_base import ChromiumBase from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .configs.chromium_options import ChromiumOptions @@ -21,7 +21,7 @@ class ChromiumPage(ChromiumBase): timeout: float = None): self._driver_options: ChromiumOptions = ... self._process_id: str = ... - self._window_setter: WindowSetter = ... + # self._window_setter: WindowSetter = ... self._main_tab: str = ... self._alert: Alert = ... self._browser_driver: ChromiumDriver = ... @@ -197,51 +197,4 @@ class Alert(object): self.response_text: str = ... -class WindowSetter(object): - - def __init__(self, page: ChromiumPage): - self._page: ChromiumPage = ... - self._window_id: str = ... - - def maximized(self) -> None: ... - - def minimized(self) -> None: ... - - def fullscreen(self) -> None: ... - - def normal(self) -> None: ... - - def size(self, width: int = None, height: int = None) -> None: ... - - def location(self, x: int = None, y: int = None) -> None: ... - - def hide(self) -> None: ... - - def show(self) -> None: ... - - def _get_info(self) -> dict: ... - - def _perform(self, bounds: dict) -> None: ... - - -def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... - - -def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... - - -def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... - - -class ChromiumPageSetter(ChromiumBaseSetter): - _page: ChromiumPage = ... - - def main_tab(self, tab_id: str = None) -> None: ... - - @property - def window(self) -> WindowSetter: ... - - def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... - - def get_rename(original: str, rename: str) -> str: ... diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index 256569b..9279600 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -5,9 +5,10 @@ """ from copy import copy -from .chromium_base import ChromiumBase, ChromiumBaseSetter +from .chromium_base import ChromiumBase from .commons.web import set_session_cookies, set_browser_cookies -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .setter import WebPageTabSetter class ChromiumTab(ChromiumBase): @@ -327,39 +328,3 @@ class WebPageTab(SessionPage, ChromiumTab): elif self._mode == 'd': return super(SessionPage, self)._find_elements(loc_or_ele, timeout=timeout, single=single, relative=relative) - - -class WebPageTabSetter(ChromiumBaseSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumBaseSetter(self._page) - - def cookies(self, cookies): - """添加多个cookies信息到浏览器或session对象,注意不要传入单个 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page._has_session: - self._session_setter.headers(headers) - if self._page._has_driver: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page._has_session: - self._session_setter.user_agent(ua) - if self._page._has_driver: - self._chromium_setter.user_agent(ua, platform) - - diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 6a99610..04f3ad6 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -7,12 +7,13 @@ from typing import Union, Tuple, Any, List from requests import Session, Response -from .chromium_base import ChromiumBase, ChromiumBaseSetter +from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame from .chromium_page import ChromiumPage, ChromiumTabRect from .session_element import SessionElement -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .setter import WebPageTabSetter from .web_page import WebPage @@ -149,15 +150,3 @@ class WebPageTab(SessionPage, ChromiumTab): timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \ -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ Union[ChromiumElement, str, ChromiumFrame]]]: ... - - -class WebPageTabSetter(ChromiumBaseSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumBaseSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... diff --git a/DrissionPage/commons/tools.py b/DrissionPage/commons/tools.py index a95dc7d..5adf7ca 100644 --- a/DrissionPage/commons/tools.py +++ b/DrissionPage/commons/tools.py @@ -3,6 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ +from platform import system from pathlib import Path from re import search, sub from shutil import rmtree @@ -101,6 +102,80 @@ def clean_folder(folder_path, ignore=None): elif f.is_dir(): rmtree(f, True) + +def show_or_hide_browser(page, hide=True): + """执行显示或隐藏浏览器窗口 + :param page: ChromePage对象 + :param hide: 是否隐藏 + :return: None + """ + if not page.address.startswith(('127.0.0.1', 'localhost')): + return + + if system().lower() != 'windows': + raise OSError('该方法只能在Windows系统使用。') + + try: + from win32gui import ShowWindow + from win32con import SW_HIDE, SW_SHOW + except ImportError: + raise ImportError('请先安装:pip install pypiwin32') + + pid = page.process_id + if not pid: + return None + hds = get_chrome_hwnds_from_pid(pid, page.title) + sw = SW_HIDE if hide else SW_SHOW + for hd in hds: + ShowWindow(hd, sw) + + +def get_browser_progress_id(progress, address): + """获取浏览器进程id + :param progress: 已知的进程对象,没有时传入None + :param address: 浏览器管理地址,含端口 + :return: 进程id或None + """ + if progress: + return progress.pid + + from os import popen + port = address.split(':')[-1] + txt = '' + progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') + for progress in progresses: + if 'LISTENING' in progress: + txt = progress + break + if not txt: + return None + + return txt.split(' ')[-1] + + +def get_chrome_hwnds_from_pid(pid, title): + """通过PID查询句柄ID + :param pid: 进程id + :param title: 窗口标题 + :return: 进程句柄组成的列表 + """ + try: + from win32gui import IsWindow, GetWindowText, EnumWindows + from win32process import GetWindowThreadProcessId + except ImportError: + raise ImportError('请先安装win32gui,pip install pypiwin32') + + def callback(hwnd, hds): + if IsWindow(hwnd) and title in GetWindowText(hwnd): + _, found_pid = GetWindowThreadProcessId(hwnd) + if str(found_pid) == str(pid): + hds.append(hwnd) + return True + + hwnds = [] + EnumWindows(callback, hwnds) + return hwnds + # def get_exe_from_port(port): # """获取端口号第一条进程的可执行文件路径 # :param port: 端口号 diff --git a/DrissionPage/commons/tools.pyi b/DrissionPage/commons/tools.pyi index f7b91e8..54b8197 100644 --- a/DrissionPage/commons/tools.pyi +++ b/DrissionPage/commons/tools.pyi @@ -3,9 +3,12 @@ @Author : g1879 @Contact : g1879@qq.com """ +from os import popen from pathlib import Path from typing import Union +from chromium_page import ChromiumPage + # def get_exe_from_port(port: Union[str, int]) -> Union[str, None]: ... @@ -26,3 +29,12 @@ def port_is_using(ip: str, port: Union[str, int]) -> bool: ... def clean_folder(folder_path: Union[str, Path], ignore: Union[tuple, list] = None) -> None: ... + + +def show_or_hide_browser(page: ChromiumPage, hide: bool = True) -> None: ... + + +def get_browser_progress_id(progress: Union[popen, None], address: str) -> Union[str, None]: ... + + +def get_chrome_hwnds_from_pid(pid: Union[str, int], title: str) -> list: ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index cc67f6e..b6c1193 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -12,9 +12,10 @@ from requests.structures import CaseInsensitiveDict from tldextract import extract from .base import BasePage -from .commons.web import cookie_to_dict, set_session_cookies +from .commons.web import cookie_to_dict from .configs.session_options import SessionOptions from .session_element import SessionElement, make_session_ele +from .setter import SessionPageSetter class SessionPage(BasePage): @@ -304,137 +305,6 @@ class SessionPage(BasePage): return r, f'状态码:{r.status_code}' -class SessionPageSetter(object): - def __init__(self, page): - self._page = page - - def retry_times(self, times): - """设置连接失败时重连次数""" - self._page.retry_times = times - - def retry_interval(self, interval): - """设置连接失败时重连间隔""" - self._page.retry_interval = interval - - def timeout(self, second): - """设置连接超时时间 - :param second: 秒数 - :return: None - """ - self._page.timeout = second - - def cookie(self, cookie): - """为Session对象设置单个cookie - :param cookie: cookie信息 - :return: None - """ - if isinstance(cookie, str): - self.cookies(cookie) - else: - self.cookies([cookie]) - - def cookies(self, cookies): - """为Session对象设置多个cookie,注意不要传入单个 - :param cookies: cookies信息 - :return: None - """ - set_session_cookies(self._page.session, cookies) - - def headers(self, headers): - """设置通用的headers - :param headers: dict形式的headers - :return: None - """ - self._page.session.headers = CaseInsensitiveDict(headers) - - def header(self, attr, value): - """设置headers中一个项 - :param attr: 设置名称 - :param value: 设置值 - :return: None - """ - self._page.session.headers[attr.lower()] = value - - def user_agent(self, ua): - """设置user agent - :param ua: user agent - :return: None - """ - self._page.session.headers['user-agent'] = ua - - def proxies(self, http=None, https=None): - """设置proxies参数 - :param http: http代理地址 - :param https: https代理地址 - :return: None - """ - self._page.session.proxies = {'http': http, 'https': https} - - def auth(self, auth): - """设置认证元组或对象 - :param auth: 认证元组或对象 - :return: None - """ - self._page.session.auth = auth - - def hooks(self, hooks): - """设置回调方法 - :param hooks: 回调方法 - :return: None - """ - self._page.session.hooks = hooks - - def params(self, params): - """设置查询参数字典 - :param params: 查询参数字典 - :return: None - """ - self._page.session.params = params - - def verify(self, on_off): - """设置是否验证SSL证书 - :param on_off: 是否验证 SSL 证书 - :return: None - """ - self._page.session.verify = on_off - - def cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 - :param cert: 证书路径或元组 - :return: None - """ - self._page.session.cert = cert - - def stream(self, on_off): - """设置是否使用流式响应内容 - :param on_off: 是否使用流式响应内容 - :return: None - """ - self._page.session.stream = on_off - - def trust_env(self, on_off): - """设置是否信任环境 - :param on_off: 是否信任环境 - :return: None - """ - self._page.session.trust_env = on_off - - def max_redirects(self, times): - """设置最大重定向次数 - :param times: 最大重定向次数 - :return: None - """ - self._page.session.max_redirects = times - - def add_adapter(self, url, adapter): - """添加适配器 - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: None - """ - self._page.session.mount(url, adapter) - - def check_headers(kwargs, headers, arg) -> bool: """检查kwargs或headers中是否有arg所示属性""" return arg in kwargs['headers'] or arg in headers diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index bb803f0..5391a4a 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -3,20 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ -from http.cookiejar import Cookie from typing import Any, Union, Tuple, List -# from DownloadKit import DownloadKit from requests import Session, Response -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict from .base import BasePage from .commons.constants import NoneElement from .configs.session_options import SessionOptions from .session_element import SessionElement +from .setter import SessionPageSetter class SessionPage(BasePage): @@ -160,47 +156,6 @@ class SessionPage(BasePage): **kwargs) -> tuple: ... -class SessionPageSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - - def retry_times(self, times: int) -> None: ... - - def retry_interval(self, interval: float) -> None: ... - - def timeout(self, second: float) -> None: ... - - def cookie(self, cookie: Union[Cookie, str, dict]) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def header(self, attr: str, value: str) -> None: ... - - def user_agent(self, ua: str) -> None: ... - - def proxies(self, http: str = None, https: str = None) -> None: ... - - def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... - - def hooks(self, hooks: Union[dict, None]) -> None: ... - - def params(self, params: Union[dict, None]) -> None: ... - - def verify(self, on_off: Union[bool, None]) -> None: ... - - def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... - - def stream(self, on_off: Union[bool, None]) -> None: ... - - def trust_env(self, on_off: Union[bool, None]) -> None: ... - - def max_redirects(self, times: Union[int, None]) -> None: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... - - def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], arg: str) -> bool: ... diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py new file mode 100644 index 0000000..e14f23f --- /dev/null +++ b/DrissionPage/setter.py @@ -0,0 +1,526 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from pathlib import Path + +from requests.structures import CaseInsensitiveDict + +from .commons.tools import show_or_hide_browser +from .commons.web import set_browser_cookies, set_session_cookies + + +class ChromiumBaseSetter(object): + def __init__(self, page): + self._page = page + + @property + def load_strategy(self): + """返回用于设置页面加载策略的对象""" + return PageLoadStrategy(self._page) + + @property + def scroll(self): + """返回用于设置页面滚动设置的对象""" + return PageScrollSetter(self._page.scroll) + + def retry_times(self, times): + """设置连接失败重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败重连间隔""" + self._page.retry_interval = interval + + def timeouts(self, implicit=None, page_load=None, script=None): + """设置超时时间,单位为秒 + :param implicit: 查找元素超时时间 + :param page_load: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: None + """ + if implicit is not None: + self._page.timeouts.implicit = implicit + + if page_load is not None: + self._page.timeouts.page_load = page_load + + if script is not None: + self._page.timeouts.script = script + + def user_agent(self, ua, platform=None): + """为当前tab设置user agent,只在当前tab有效 + :param ua: user agent字符串 + :param platform: platform字符串 + :return: None + """ + keys = {'userAgent': ua} + if platform: + keys['platform'] = platform + self._page.run_cdp('Emulation.setUserAgentOverride', **keys) + + def session_storage(self, item, value): + """设置或删除某项sessionStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + js = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' + return self._page.run_js_loaded(js, as_expr=True) + + def local_storage(self, item, value): + """设置或删除某项localStorage信息 + :param item: 要设置的项 + :param value: 项的值,设置为False时,删除该项 + :return: None + """ + js = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' + return self._page.run_js_loaded(js, as_expr=True) + + def cookie(self, cookie): + """设置单个cookie + :param cookie: cookie信息 + :return: None + """ + if isinstance(cookie, str): + self.cookies(cookie) + else: + self.cookies([cookie]) + + def cookies(self, cookies): + """设置多个cookie,注意不要传入单个 + :param cookies: cookies信息 + :return: None + """ + set_browser_cookies(self._page, cookies) + + def upload_files(self, files): + """等待上传的文件路径 + :param files: 文件路径列表或字符串,字符串时多个文件用回车分隔 + :return: None + """ + if not self._page._upload_list: + self._page.driver.Page.fileChooserOpened = self._page._onFileChooserOpened + self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) + + if isinstance(files, str): + files = files.split('\n') + self._page._upload_list = [str(Path(i).absolute()) for i in files] + + def headers(self, headers: dict) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + self._page.run_cdp('Network.enable') + self._page.run_cdp('Network.setExtraHTTPHeaders', headers=headers) + + +class ChromiumPageSetter(ChromiumBaseSetter): + def main_tab(self, tab_id=None): + """设置主tab + :param tab_id: 标签页id,不传入则设置当前tab + :return: None + """ + self._page._main_tab = tab_id or self._page.tab_id + + @property + def window(self): + """返回用于设置浏览器窗口的对象""" + return WindowSetter(self._page) + + def tab_to_front(self, tab_or_id=None): + """激活标签页使其处于最前面 + :param tab_or_id: 标签页对象或id,为None表示当前标签页 + :return: None + """ + if not tab_or_id: + tab_or_id = self._page.tab_id + elif 'ChromiumTab' in str(type(tab_or_id)): + tab_or_id = tab_or_id.tab_id + self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') + + +class SessionPageSetter(object): + def __init__(self, page): + self._page = page + + def retry_times(self, times): + """设置连接失败时重连次数""" + self._page.retry_times = times + + def retry_interval(self, interval): + """设置连接失败时重连间隔""" + self._page.retry_interval = interval + + def timeout(self, second): + """设置连接超时时间 + :param second: 秒数 + :return: None + """ + self._page.timeout = second + + def cookie(self, cookie): + """为Session对象设置单个cookie + :param cookie: cookie信息 + :return: None + """ + if isinstance(cookie, str): + self.cookies(cookie) + else: + self.cookies([cookie]) + + def cookies(self, cookies): + """为Session对象设置多个cookie,注意不要传入单个 + :param cookies: cookies信息 + :return: None + """ + set_session_cookies(self._page.session, cookies) + + def headers(self, headers): + """设置通用的headers + :param headers: dict形式的headers + :return: None + """ + self._page.session.headers = CaseInsensitiveDict(headers) + + def header(self, attr, value): + """设置headers中一个项 + :param attr: 设置名称 + :param value: 设置值 + :return: None + """ + self._page.session.headers[attr.lower()] = value + + def user_agent(self, ua): + """设置user agent + :param ua: user agent + :return: None + """ + self._page.session.headers['user-agent'] = ua + + def proxies(self, http=None, https=None): + """设置proxies参数 + :param http: http代理地址 + :param https: https代理地址 + :return: None + """ + self._page.session.proxies = {'http': http, 'https': https} + + def auth(self, auth): + """设置认证元组或对象 + :param auth: 认证元组或对象 + :return: None + """ + self._page.session.auth = auth + + def hooks(self, hooks): + """设置回调方法 + :param hooks: 回调方法 + :return: None + """ + self._page.session.hooks = hooks + + def params(self, params): + """设置查询参数字典 + :param params: 查询参数字典 + :return: None + """ + self._page.session.params = params + + def verify(self, on_off): + """设置是否验证SSL证书 + :param on_off: 是否验证 SSL 证书 + :return: None + """ + self._page.session.verify = on_off + + def cert(self, cert): + """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 + :param cert: 证书路径或元组 + :return: None + """ + self._page.session.cert = cert + + def stream(self, on_off): + """设置是否使用流式响应内容 + :param on_off: 是否使用流式响应内容 + :return: None + """ + self._page.session.stream = on_off + + def trust_env(self, on_off): + """设置是否信任环境 + :param on_off: 是否信任环境 + :return: None + """ + self._page.session.trust_env = on_off + + def max_redirects(self, times): + """设置最大重定向次数 + :param times: 最大重定向次数 + :return: None + """ + self._page.session.max_redirects = times + + def add_adapter(self, url, adapter): + """添加适配器 + :param url: 适配器对应url + :param adapter: 适配器对象 + :return: None + """ + self._page.session.mount(url, adapter) + + +class WebPageSetter(ChromiumPageSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumPageSetter(self._page) + + def cookies(self, cookies): + """添加cookies信息到浏览器或session对象 + :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + self._chromium_setter.cookies(cookies) + elif self._page.mode == 's' and self._page._has_session: + self._session_setter.cookies(cookies) + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page.mode == 's': + self._session_setter.headers(headers) + else: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page.mode == 's': + self._session_setter.user_agent(ua) + else: + self._chromium_setter.user_agent(ua, platform) + + +class WebPageTabSetter(ChromiumBaseSetter): + def __init__(self, page): + super().__init__(page) + self._session_setter = SessionPageSetter(self._page) + self._chromium_setter = ChromiumBaseSetter(self._page) + + def cookies(self, cookies): + """添加多个cookies信息到浏览器或session对象,注意不要传入单个 + :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` + :return: None + """ + if self._page.mode == 'd' and self._page._has_driver: + self._chromium_setter.cookies(cookies) + elif self._page.mode == 's' and self._page._has_session: + self._session_setter.cookies(cookies) + + def headers(self, headers) -> None: + """设置固定发送的headers + :param headers: dict格式的headers数据 + :return: None + """ + if self._page._has_session: + self._session_setter.headers(headers) + if self._page._has_driver: + self._chromium_setter.headers(headers) + + def user_agent(self, ua, platform=None): + """设置user agent,d模式下只有当前tab有效""" + if self._page._has_session: + self._session_setter.user_agent(ua) + if self._page._has_driver: + self._chromium_setter.user_agent(ua, platform) + + +class ChromiumElementSetter(object): + def __init__(self, ele): + """ + :param ele: ChromiumElement + """ + self._ele = ele + + def attr(self, attr, value): + """设置元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._ele.page.run_cdp('DOM.setAttributeValue', nodeId=self._ele.ids.node_id, name=attr, value=str(value)) + + def prop(self, prop, value): + """设置元素property属性 + :param prop: 属性名 + :param value: 属性值 + :return: None + """ + value = value.replace('"', r'\"') + self._ele.run_js(f'this.{prop}="{value}";') + + def innerHTML(self, html): + """设置元素innerHTML + :param html: html文本 + :return: None + """ + self.prop('innerHTML', html) + + +class ChromiumFrameSetter(ChromiumBaseSetter): + def attr(self, attr, value): + """设置frame元素attribute属性 + :param attr: 属性名 + :param value: 属性值 + :return: None + """ + self._page._check_ok() + self._page.frame_ele.set.attr(attr, value) + + +class PageLoadStrategy(object): + """用于设置页面加载策略的类""" + + def __init__(self, page): + """ + :param page: ChromiumBase对象 + """ + self._page = page + + def __call__(self, value): + """设置加载策略 + :param value: 可选 'normal', 'eager', 'none' + :return: None + """ + if value.lower() not in ('normal', 'eager', 'none'): + raise ValueError("只能选择 'normal', 'eager', 'none'。") + self._page._page_load_strategy = value + + def normal(self): + """设置页面加载策略为normal""" + self._page._page_load_strategy = 'normal' + + def eager(self): + """设置页面加载策略为eager""" + self._page._page_load_strategy = 'eager' + + def none(self): + """设置页面加载策略为none""" + self._page._page_load_strategy = 'none' + + +class PageScrollSetter(object): + def __init__(self, scroll): + self._scroll = scroll + + def wait_complete(self, on_off=True): + """设置滚动命令后是否等待完成 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + self._scroll._wait_complete = on_off + + def smooth(self, on_off=True): + """设置页面滚动是否平滑滚动 + :param on_off: 开或关 + :return: None + """ + if not isinstance(on_off, bool): + raise TypeError('on_off必须为bool。') + b = 'smooth' if on_off else 'auto' + self._scroll._driver.run_js(f'document.documentElement.style.setProperty("scroll-behavior","{b}");') + self._scroll._wait_complete = on_off + + +class WindowSetter(object): + """用于设置窗口大小的类""" + + def __init__(self, page): + """ + :param page: 页面对象 + """ + self._page = page + self._window_id = self._get_info()['windowId'] + + def maximized(self): + """窗口最大化""" + s = self._get_info()['bounds']['windowState'] + if s in ('fullscreen', 'minimized'): + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'maximized'}) + + def minimized(self): + """窗口最小化""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'minimized'}) + + def fullscreen(self): + """设置窗口为全屏""" + s = self._get_info()['bounds']['windowState'] + if s == 'minimized': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'fullscreen'}) + + def normal(self): + """设置窗口为常规模式""" + s = self._get_info()['bounds']['windowState'] + if s == 'fullscreen': + self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'normal'}) + + def size(self, width=None, height=None): + """设置窗口大小 + :param width: 窗口宽度 + :param height: 窗口高度 + :return: None + """ + if width or height: + s = self._get_info()['bounds']['windowState'] + if s != 'normal': + self._perform({'windowState': 'normal'}) + info = self._get_info()['bounds'] + width = width - 16 if width else info['width'] + height = height + 7 if height else info['height'] + self._perform({'width': width, 'height': height}) + + def location(self, x=None, y=None): + """设置窗口在屏幕中的位置,相对左上角坐标 + :param x: 距离顶部距离 + :param y: 距离左边距离 + :return: None + """ + if x is not None or y is not None: + self.normal() + info = self._get_info()['bounds'] + x = x if x is not None else info['left'] + y = y if y is not None else info['top'] + self._perform({'left': x - 8, 'top': y}) + + def hide(self): + """隐藏浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=True) + + def show(self): + """显示浏览器窗口,只在Windows系统可用""" + show_or_hide_browser(self._page, hide=False) + + def _get_info(self): + """获取窗口位置及大小信息""" + return self._page.run_cdp('Browser.getWindowForTarget') + + def _perform(self, bounds): + """执行改变窗口大小操作 + :param bounds: 控制数据 + :return: None + """ + self._page.run_cdp('Browser.setWindowBounds', windowId=self._window_id, bounds=bounds) diff --git a/DrissionPage/setter.pyi b/DrissionPage/setter.pyi new file mode 100644 index 0000000..e750130 --- /dev/null +++ b/DrissionPage/setter.pyi @@ -0,0 +1,192 @@ +# -*- coding:utf-8 -*- +""" +@Author : g1879 +@Contact : g1879@qq.com +""" +from http.cookiejar import Cookie +from typing import Union, Tuple + +from requests.adapters import HTTPAdapter +from requests.auth import HTTPBasicAuth +from requests.cookies import RequestsCookieJar + +from .chromium_base import ChromiumBase, ChromiumPageScroll +from .chromium_element import ChromiumElement +from .chromium_frame import ChromiumFrame +from .chromium_page import ChromiumPage +from .chromium_tab import ChromiumTab +from .session_page import SessionPage +from .web_page import WebPage + + +class ChromiumBaseSetter(object): + def __init__(self, page): + self._page: ChromiumBase = ... + + @property + def load_strategy(self) -> PageLoadStrategy: ... + + @property + def scroll(self) -> PageScrollSetter: ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def timeouts(self, implicit: float = None, page_load: float = None, script: float = None) -> None: ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def session_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def local_storage(self, item: str, value: Union[str, bool]) -> None: ... + + def cookie(self, cookies: Union[RequestsCookieJar, str, dict]) -> None: ... + + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def upload_files(self, files: Union[str, list, tuple]) -> None: ... + + +class ChromiumPageSetter(ChromiumBaseSetter): + _page: ChromiumPage = ... + + def main_tab(self, tab_id: str = None) -> None: ... + + @property + def window(self) -> WindowSetter: ... + + def tab_to_front(self, tab_or_id: Union[str, ChromiumTab] = None) -> None: ... + + +class SessionPageSetter(object): + def __init__(self, page: SessionPage): + self._page: SessionPage = ... + + def retry_times(self, times: int) -> None: ... + + def retry_interval(self, interval: float) -> None: ... + + def timeout(self, second: float) -> None: ... + + def cookie(self, cookie: Union[Cookie, str, dict]) -> None: ... + + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def header(self, attr: str, value: str) -> None: ... + + def user_agent(self, ua: str) -> None: ... + + def proxies(self, http: str = None, https: str = None) -> None: ... + + def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... + + def hooks(self, hooks: Union[dict, None]) -> None: ... + + def params(self, params: Union[dict, None]) -> None: ... + + def verify(self, on_off: Union[bool, None]) -> None: ... + + def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... + + def stream(self, on_off: Union[bool, None]) -> None: ... + + def trust_env(self, on_off: Union[bool, None]) -> None: ... + + def max_redirects(self, times: Union[int, None]) -> None: ... + + def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... + + +class WebPageSetter(ChromiumPageSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumPageSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def cookies(self, cookies) -> None: ... + + +class WebPageTabSetter(ChromiumBaseSetter): + _page: WebPage = ... + _session_setter: SessionPageSetter = ... + _chromium_setter: ChromiumBaseSetter = ... + + def user_agent(self, ua: str, platform: str = None) -> None: ... + + def headers(self, headers: dict) -> None: ... + + def cookies(self, cookies) -> None: ... + + +class ChromiumElementSetter(object): + def __init__(self, ele: ChromiumElement): + self._ele: ChromiumElement = ... + + def attr(self, attr: str, value: str) -> None: ... + + def prop(self, prop: str, value: str) -> None: ... + + def innerHTML(self, html: str) -> None: ... + + +class ChromiumFrameSetter(ChromiumBaseSetter): + _page: ChromiumFrame = ... + + def attr(self, attr: str, value: str) -> None: ... + + +class PageLoadStrategy(object): + def __init__(self, page: ChromiumBase): + self._page: ChromiumBase = ... + + def __call__(self, value: str) -> None: ... + + def normal(self) -> None: ... + + def eager(self) -> None: ... + + def none(self) -> None: ... + + +class PageScrollSetter(object): + def __init__(self, scroll: ChromiumPageScroll): + self._scroll: ChromiumPageScroll = ... + + def wait_complete(self, on_off: bool = True): ... + + def smooth(self, on_off: bool = True): ... + + +class WindowSetter(object): + + def __init__(self, page: ChromiumPage): + self._page: ChromiumPage = ... + self._window_id: str = ... + + def maximized(self) -> None: ... + + def minimized(self) -> None: ... + + def fullscreen(self) -> None: ... + + def normal(self) -> None: ... + + def size(self, width: int = None, height: int = None) -> None: ... + + def location(self, x: int = None, y: int = None) -> None: ... + + def hide(self) -> None: ... + + def show(self) -> None: ... + + def _get_info(self) -> dict: ... + + def _perform(self, bounds: dict) -> None: ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 059597b..e85bdca 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -8,12 +8,13 @@ from requests import Session from .base import BasePage from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver -from .chromium_page import ChromiumPage, ChromiumPageSetter +from .chromium_page import ChromiumPage from .chromium_tab import WebPageTab from .commons.web import set_session_cookies, set_browser_cookies from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): @@ -426,35 +427,3 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._has_driver = None -class WebPageSetter(ChromiumPageSetter): - def __init__(self, page): - super().__init__(page) - self._session_setter = SessionPageSetter(self._page) - self._chromium_setter = ChromiumPageSetter(self._page) - - def cookies(self, cookies): - """添加cookies信息到浏览器或session对象 - :param cookies: 可以接收`CookieJar`、`list`、`tuple`、`str`、`dict`格式的`cookies` - :return: None - """ - if self._page.mode == 'd' and self._page._has_driver: - self._chromium_setter.cookies(cookies) - elif self._page.mode == 's' and self._page._has_session: - self._session_setter.cookies(cookies) - - def headers(self, headers) -> None: - """设置固定发送的headers - :param headers: dict格式的headers数据 - :return: None - """ - if self._page.mode == 's': - self._session_setter.headers(headers) - else: - self._chromium_setter.headers(headers) - - def user_agent(self, ua, platform=None): - """设置user agent,d模式下只有当前tab有效""" - if self._page.mode == 's': - self._session_setter.user_agent(ua) - else: - self._chromium_setter.user_agent(ua, platform) diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index d153c30..1621fd9 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -12,12 +12,13 @@ from .base import BasePage from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement from .chromium_frame import ChromiumFrame -from .chromium_page import ChromiumPage, ChromiumPageSetter +from .chromium_page import ChromiumPage from .chromium_tab import WebPageTab from .configs.chromium_options import ChromiumOptions from .configs.session_options import SessionOptions from .session_element import SessionElement -from .session_page import SessionPage, SessionPageSetter +from .session_page import SessionPage +from .setter import WebPageSetter class WebPage(SessionPage, ChromiumPage, BasePage): @@ -166,13 +167,3 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def _on_download_begin(self, **kwargs): ... -class WebPageSetter(ChromiumPageSetter): - _page: WebPage = ... - _session_setter: SessionPageSetter = ... - _chromium_setter: ChromiumPageSetter = ... - - def user_agent(self, ua: str, platform: str = None) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def cookies(self, cookies) -> None: ... From ccaeda6a983801eba804d30b08835271fe926f76 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 30 Jun 2023 16:06:32 +0800 Subject: [PATCH 05/13] =?UTF-8?q?=E5=BE=AE=E8=B0=83=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/setter.py | 2 +- DrissionPage/waiter.py | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py index e14f23f..754bd32 100644 --- a/DrissionPage/setter.py +++ b/DrissionPage/setter.py @@ -137,7 +137,7 @@ class ChromiumPageSetter(ChromiumBaseSetter): """ if not tab_or_id: tab_or_id = self._page.tab_id - elif 'ChromiumTab' in str(type(tab_or_id)): + elif not isinstance(tab_or_id, str): # 传入Tab对象 tab_or_id = tab_or_id.tab_id self._page._control_session.get(f'http://{self._page.address}/json/activate/{tab_or_id}') diff --git a/DrissionPage/waiter.py b/DrissionPage/waiter.py index 7bfd238..3ea50d5 100644 --- a/DrissionPage/waiter.py +++ b/DrissionPage/waiter.py @@ -16,7 +16,7 @@ class ChromiumBaseWaiter(object): """等待元素从DOM中删除 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) @@ -26,7 +26,7 @@ class ChromiumBaseWaiter(object): """等待元素变成显示状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) @@ -36,7 +36,7 @@ class ChromiumBaseWaiter(object): """等待元素变成隐藏状态 :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ ele = self._driver._ele(loc_or_ele, raise_err=False, timeout=0) @@ -46,7 +46,7 @@ class ChromiumBaseWaiter(object): """等待元素加载到DOM :param loc: 要等待的元素,输入定位符 :param timeout: 超时时间,默认读取页面超时时间 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 成功返回元素对象,失败返回False """ ele = self._driver._ele(loc, raise_err=False, timeout=timeout) @@ -60,7 +60,7 @@ class ChromiumBaseWaiter(object): def load_start(self, timeout=None, raise_err=None): """等待页面开始加载 :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._loading(timeout=timeout, gap=.002, raise_err=raise_err) @@ -68,7 +68,7 @@ class ChromiumBaseWaiter(object): def load_complete(self, timeout=None, raise_err=None): """等待页面开始加载 :param timeout: 超时时间,为None时使用页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._loading(timeout=timeout, start=False, raise_err=raise_err) @@ -83,7 +83,7 @@ class ChromiumBaseWaiter(object): :param timeout: 超时时间,为None时使用页面timeout属性 :param start: 等待开始还是结束 :param gap: 间隔秒数 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout != 0: @@ -109,7 +109,7 @@ class ChromiumPageWaiter(ChromiumBaseWaiter): def new_tab(self, timeout=None, raise_err=None): """等待新标签页出现 :param timeout: 等待超时时间,为None则使用页面对象timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等到新标签页出现 """ timeout = timeout if timeout is not None else self._driver.timeout @@ -139,7 +139,7 @@ class ChromiumElementWaiter(object): def delete(self, timeout=None, raise_err=None): """等待元素从dom删除 :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_alive', False, timeout, raise_err) @@ -147,7 +147,7 @@ class ChromiumElementWaiter(object): def display(self, timeout=None, raise_err=None): """等待元素从dom显示 :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_displayed', True, timeout, raise_err) @@ -155,7 +155,7 @@ class ChromiumElementWaiter(object): def hidden(self, timeout=None, raise_err=None): """等待元素从dom隐藏 :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_displayed', False, timeout, raise_err) @@ -163,7 +163,7 @@ class ChromiumElementWaiter(object): def covered(self, timeout=None, raise_err=None): """等待当前元素被遮盖 :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_covered', True, timeout, raise_err) @@ -171,7 +171,7 @@ class ChromiumElementWaiter(object): def not_covered(self, timeout=None, raise_err=None): """等待当前元素被遮盖 :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_covered', False, timeout, raise_err) @@ -179,7 +179,7 @@ class ChromiumElementWaiter(object): def enabled(self, timeout=None, raise_err=None): """等待当前元素变成可用 :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_enabled', True, timeout, raise_err) @@ -187,7 +187,7 @@ class ChromiumElementWaiter(object): def disabled(self, timeout=None, raise_err=None): """等待当前元素变成可用 :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ return self._wait_state('is_enabled', False, timeout, raise_err) @@ -195,7 +195,7 @@ class ChromiumElementWaiter(object): def disabled_or_delete(self, timeout=None, raise_err=None): """等待当前元素变成不可用或从DOM移除 :param timeout:超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout is None: @@ -216,7 +216,7 @@ class ChromiumElementWaiter(object): :param attr: 状态名称 :param mode: True或False :param timeout: 超时时间,为None使用元素所在页面timeout属性 - :param raise_err: 等待识别时是否报错,为None时根据Settings设置 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 :return: 是否等待成功 """ if timeout is None: From b46b516b73d410129d9b56b7d691c5957ecb81a3 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 4 Jul 2023 17:34:25 +0800 Subject: [PATCH 06/13] =?UTF-8?q?=E5=A2=9E=E5=8A=A0wait.title=5Fchange()?= =?UTF-8?q?=E5=92=8Cwait.url=5Fchange()=EF=BC=9B=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=B8=80=E4=B8=AA=E7=9B=91=E5=90=AC=E6=97=B6=E5=8F=AF=E8=83=BD?= =?UTF-8?q?=E5=87=BA=E7=8E=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/network_listener.py | 4 +-- DrissionPage/waiter.py | 49 ++++++++++++++++++++++++++++++++ DrissionPage/waiter.pyi | 7 +++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/DrissionPage/network_listener.py b/DrissionPage/network_listener.py index 92f473b..ee11c08 100644 --- a/DrissionPage/network_listener.py +++ b/DrissionPage/network_listener.py @@ -174,7 +174,8 @@ class NetworkListener(object): def _loading_finished(self, **kwargs): """请求完成时处理方法""" request_id = kwargs['requestId'] - if request_id in self._request_ids: + dp = self._request_ids.get(request_id) + if dp: try: r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) body = r['body'] @@ -183,7 +184,6 @@ class NetworkListener(object): body = '' is_base64 = False - dp = self._request_ids[request_id] dp._raw_body = body dp._base64_body = is_base64 diff --git a/DrissionPage/waiter.py b/DrissionPage/waiter.py index 3ea50d5..25b98a1 100644 --- a/DrissionPage/waiter.py +++ b/DrissionPage/waiter.py @@ -78,6 +78,55 @@ class ChromiumBaseWaiter(object): while self._driver._upload_list: sleep(.01) + def url_change(self, text, exclude=False, timeout=None, raise_err=None): + """等待url变成包含或不包含指定文本 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当url不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._change('url', text, exclude, timeout, raise_err) + + def title_change(self, text, exclude=False, timeout=None, raise_err=None): + """等待title变成包含或不包含指定文本 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当title不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + return self._change('title', text, exclude, timeout, raise_err) + + def _change(self, arg, text, exclude=False, timeout=None, raise_err=None): + """等待指定属性变成包含或不包含指定文本 + :param arg: 要被匹配的属性 + :param text: 用于识别的文本 + :param exclude: 是否排除,为True时当属性不包含text指定文本时返回True + :param timeout: 超时时间 + :param raise_err: 等待失败时是否报错,为None时根据Settings设置 + :return: 是否等待成功 + """ + if timeout is None: + timeout = self._driver.timeout + + end_time = perf_counter() + timeout + while perf_counter() < end_time: + if arg == 'url': + val = self._driver.url + elif arg == 'title': + val = self._driver.title + else: + raise ValueError + if (not exclude and text in val) or (exclude and text not in val): + return True + sleep(.05) + + if raise_err is True or Settings.raise_when_wait_failed is True: + raise WaitTimeoutError(f'等待{arg}改变失败。') + else: + return False + def _loading(self, timeout=None, start=True, gap=.01, raise_err=None): """等待页面开始加载或加载完成 :param timeout: 超时时间,为None时使用页面timeout属性 diff --git a/DrissionPage/waiter.pyi b/DrissionPage/waiter.pyi index 41ea5e9..548c167 100644 --- a/DrissionPage/waiter.pyi +++ b/DrissionPage/waiter.pyi @@ -35,6 +35,13 @@ class ChromiumBaseWaiter(object): def upload_paths_inputted(self) -> None: ... + def url_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... + + def title_change(self, text: str, exclude: bool = False, timeout: float = None, raise_err: bool = None) -> bool: ... + + def _change(self, arg: str, text: str, exclude: bool = False, timeout: float = None, + raise_err: bool = None) -> bool: ... + class ChromiumPageWaiter(ChromiumBaseWaiter): _driver: ChromiumPage = ... From e1daebd35059c41e32a115327e57fab1c031655b Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 17 Jul 2023 17:12:45 +0800 Subject: [PATCH 07/13] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E5=BD=93=E7=BD=91=E7=AB=99headers=E4=B8=8D=E8=A7=84=E8=8C=83?= =?UTF-8?q?=E6=97=B6=E8=8E=B7=E5=8F=96=E4=B8=8D=E5=88=B0=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index b6c1193..738d9a4 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -7,7 +7,7 @@ from re import search from time import sleep from urllib.parse import urlparse -from requests import Session, Response +from requests import Session from requests.structures import CaseInsensitiveDict from tldextract import extract @@ -305,15 +305,17 @@ class SessionPage(BasePage): return r, f'状态码:{r.status_code}' -def check_headers(kwargs, headers, arg) -> bool: +def check_headers(kwargs, headers, arg): """检查kwargs或headers中是否有arg所示属性""" return arg in kwargs['headers'] or arg in headers -def set_charset(response) -> Response: +def set_charset(response): """设置Response对象的编码""" # 在headers中获取编码 content_type = response.headers.get('content-type', '').lower() + if not content_type.endswith(';'): + content_type += ';' charset = search(r'charset[=: ]*(.*)?;', content_type) if charset: From 0845814dc5fea6bc4b4c301efe146acc7ddf3c30 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 18 Jul 2023 17:25:43 +0800 Subject: [PATCH 08/13] =?UTF-8?q?set=5Fargument('--headless')=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E6=94=B9=E4=B8=BA=E6=96=B0=E5=86=99=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/configs/chromium_options.py | 7 +++++-- DrissionPage/session_page.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py index 7a6b0f5..6f32d90 100644 --- a/DrissionPage/configs/chromium_options.py +++ b/DrissionPage/configs/chromium_options.py @@ -146,8 +146,11 @@ class ChromiumOptions(object): """ self.remove_argument(arg) if value is not False: - arg_str = arg if value is None else f'{arg}={value}' - self._arguments.append(arg_str) + if arg == '--headless' and value is None: + self._arguments.append('--headless=new') + else: + arg_str = arg if value is None else f'{arg}={value}' + self._arguments.append(arg_str) return self def remove_argument(self, value): diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 738d9a4..7e07aa7 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -316,7 +316,7 @@ def set_charset(response): content_type = response.headers.get('content-type', '').lower() if not content_type.endswith(';'): content_type += ';' - charset = search(r'charset[=: ]*(.*)?;', content_type) + charset = search(r'charset[=: ]*(.*)?;?', content_type) if charset: response.encoding = charset.group(1) From cbc93671272eec4e917fe2242ccb04bfafc04130 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 24 Jul 2023 15:42:43 +0800 Subject: [PATCH 09/13] =?UTF-8?q?ChromiumDirver=E5=88=A0=E9=99=A4GenericAt?= =?UTF-8?q?tr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 20 ++++++++++---------- DrissionPage/chromium_driver.py | 30 +----------------------------- DrissionPage/chromium_driver.pyi | 2 +- DrissionPage/chromium_page.py | 14 +++++++------- DrissionPage/setter.py | 2 +- DrissionPage/web_page.py | 4 ++-- README.md | 2 +- 7 files changed, 23 insertions(+), 51 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index a77e9f1..839ddf9 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -103,14 +103,14 @@ class ChromiumBase(BasePage): self._tab_obj = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) self._tab_obj.start() - self._tab_obj.DOM.enable() - self._tab_obj.Page.enable() + self._tab_obj.call_method('DOM.enable') + self._tab_obj.call_method('Page.enable') - self._tab_obj.Page.frameStoppedLoading = self._onFrameStoppedLoading - self._tab_obj.Page.frameStartedLoading = self._onFrameStartedLoading - self._tab_obj.DOM.documentUpdated = self._onDocumentUpdated - self._tab_obj.Page.loadEventFired = self._onLoadEventFired - self._tab_obj.Page.frameNavigated = self._onFrameNavigated + self._tab_obj.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading) + self._tab_obj.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading) + self._tab_obj.set_listener('DOM.documentUpdated', self._onDocumentUpdated) + self._tab_obj.set_listener('Page.loadEventFired', self._onLoadEventFired) + self._tab_obj.set_listener('Page.frameNavigated', self._onFrameNavigated) def _get_document(self): """刷新cdp使用的document数据""" @@ -238,7 +238,7 @@ class ChromiumBase(BasePage): files = self._upload_list if kwargs['mode'] == 'selectMultiple' else self._upload_list[:1] self.run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=kwargs['backendNodeId']) - self.driver.Page.fileChooserOpened = None + self.driver.set_listener('Page.fileChooserOpened', None) self.run_cdp('Page.setInterceptFileChooserDialog', enabled=False) self._upload_list = None @@ -972,7 +972,7 @@ class Screencast(object): raise ValueError('save_path必须设置。') clean_folder(self._path) if self._mode.startswith('frugal'): - self._page.driver.Page.screencastFrame = self._onScreencastFrame + self._page.driver.set_listener('Page.screencastFrame', self._onScreencastFrame) self._page.run_cdp('Page.startScreencast', everyNthFrame=1, quality=100) elif not self._mode.startswith('js'): @@ -1029,7 +1029,7 @@ class Screencast(object): return path if self._mode.startswith('frugal'): - self._page.driver.Page.screencastFrame = None + self._page.driver.set_listener('Page.screencastFrame', None) self._page.run_cdp('Page.stopScreencast') else: self._enable = False diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index 9d053f3..09e9cbf 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -3,7 +3,6 @@ @Author : g1879 @Contact : g1879@qq.com """ -from functools import partial from json import dumps, loads from queue import Queue, Empty from threading import Thread, Event @@ -11,26 +10,6 @@ from threading import Thread, Event from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ create_connection -from .errors import CDPError - - -class GenericAttr(object): - def __init__(self, name, tab): - self.__dict__['name'] = name - self.__dict__['tab'] = tab - - def __getattr__(self, item): - method_name = f"{self.name}.{item}" - event_listener = self.tab.get_listener(method_name) - - if event_listener: - return event_listener - - return partial(self.tab.call_method, method_name) - - def __setattr__(self, key, value): - self.tab.set_listener(f"{self.name}.{key}", value) - class ChromiumDriver(object): _INITIAL_ = 'initial' @@ -167,12 +146,7 @@ class ChromiumDriver(object): self.event_queue.task_done() - def __getattr__(self, item): - attr = GenericAttr(item, self) - setattr(self, item, attr) - return attr - - def call_method(self, _method, *args, **kwargs): + def call_method(self, _method, **kwargs): """执行cdp方法 :param _method: cdp方法名 :param args: cdp参数 @@ -182,8 +156,6 @@ class ChromiumDriver(object): if not self._started: self.start() # raise RuntimeError("不能在启动前调用方法。") - if args: - raise CDPError("参数必须是key=value形式。") if self._stopped.is_set(): return {'error': 'tab closed', 'type': 'tab_closed'} diff --git a/DrissionPage/chromium_driver.pyi b/DrissionPage/chromium_driver.pyi index df1bf9e..0c63041 100644 --- a/DrissionPage/chromium_driver.pyi +++ b/DrissionPage/chromium_driver.pyi @@ -47,7 +47,7 @@ class ChromiumDriver(object): def __getattr__(self, item: str) -> Callable: ... - def call_method(self, _method: str, *args, **kwargs) -> dict: ... + def call_method(self, _method: str, **kwargs) -> dict: ... def start(self) -> bool: ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 4903560..936243e 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -95,8 +95,8 @@ class ChromiumPage(ChromiumBase): self._browser_driver.start() self._alert = Alert() - self._tab_obj.Page.javascriptDialogOpening = self._on_alert_open - self._tab_obj.Page.javascriptDialogClosed = self._on_alert_close + self._tab_obj.set_listener('Page.javascriptDialogOpening', self._on_alert_open) + self._tab_obj.set_listener('Page.javascriptDialogClosed', self._on_alert_close) self._rect = None self._main_tab = self.tab_id @@ -106,7 +106,7 @@ class ChromiumPage(ChromiumBase): # pass self._process_id = None - r = self.browser_driver.SystemInfo.getProcessInfo() + r = self.browser_driver.call_method('SystemInfo.getProcessInfo') if 'processInfo' not in r: return None for i in r['processInfo']: @@ -329,14 +329,14 @@ class ChromiumPage(ChromiumBase): res_text = self._alert.text if self._alert.type == 'prompt': - self.driver.Page.handleJavaScriptDialog(accept=accept, promptText=send) + self.driver.call_method('Page.handleJavaScriptDialog', accept=accept, promptText=send) else: - self.driver.Page.handleJavaScriptDialog(accept=accept) + self.driver.call_method('Page.handleJavaScriptDialog', accept=accept) return res_text def quit(self): """关闭浏览器""" - self._tab_obj.Browser.close() + self._tab_obj.call_method('Browser.close') self._tab_obj.stop() ip, port = self.address.split(':') while port_is_using(ip, port): @@ -431,7 +431,7 @@ class ChromiumTabRect(object): def _get_browser_rect(self): """获取浏览器范围信息""" - return self._page.browser_driver.Browser.getWindowForTarget(targetId=self._page.tab_id)['bounds'] + return self._page.browser_driver.call_method('Browser.getWindowForTarget', targetId=self._page.tab_id)['bounds'] # class BaseDownloadSetter(DownloadSetter): diff --git a/DrissionPage/setter.py b/DrissionPage/setter.py index 754bd32..1faeaa2 100644 --- a/DrissionPage/setter.py +++ b/DrissionPage/setter.py @@ -101,7 +101,7 @@ class ChromiumBaseSetter(object): :return: None """ if not self._page._upload_list: - self._page.driver.Page.fileChooserOpened = self._page._onFileChooserOpened + self._page.driver.set_listener('Page.fileChooserOpened', self._page._onFileChooserOpened) self._page.run_cdp('Page.setInterceptFileChooserDialog', enabled=True) if isinstance(files, str): diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index e85bdca..b36b09c 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -382,7 +382,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): if self._has_driver: self.change_mode('s') try: - self.driver.Browser.close() + self.driver.call_method('Browser.close') except Exception: pass self._tab_obj.stop() @@ -421,7 +421,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): self._response = None self._has_session = None if self._has_driver: - self._tab_obj.Browser.close() + self._tab_obj.call_method('Browser.close') self._tab_obj.stop() self._tab_obj = None self._has_driver = None diff --git a/README.md b/README.md index b94e27e..0c1b297 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ python 版本:3.6 及以上 用 requests 做数据采集面对要登录的网站时,要分析数据包、JS 源码,构造复杂的请求,往往还要应付验证码、JS 混淆、签名参数等反爬手段,门槛较高,开发效率不高。 使用浏览器,可以很大程度上绕过这些坑,但浏览器运行效率不高。 -因此,这个库设计初衷,是将它们合而为一,同时实现“写得快”和“跑得快”。能够在不同须要时切换相应模式,并提供一种人性化的使用方法,提高开发和运行效率。 +因此,这个库设计初衷,是将它们合而为一,同时实现“写得快”和“跑得快”。能够在不同需要时切换相应模式,并提供一种人性化的使用方法,提高开发和运行效率。 除了合并两者,本库还以网页为单位封装了常用功能,提供非常简便的操作和语句,使用户可减少考虑细节,专注功能实现。 以简单的方式实现强大的功能,使代码更优雅。 以前的版本是对 selenium 进行重新封装实现的。从 3.0 开始,作者另起炉灶,对底层进行了重新开发,摆脱对 selenium 的依赖,增强了功能,提升了运行效率。 From 08831e7ce849ff54fa349099c5f28ca0c2401efe Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 27 Jul 2023 15:03:20 +0800 Subject: [PATCH 10/13] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dback()=E5=90=8E?= =?UTF-8?q?=E9=80=80=E4=B8=8D=E5=87=86=E7=A1=AE=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 839ddf9..93b8a77 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -624,14 +624,14 @@ class ChromiumBase(BasePage): index = history['currentIndex'] history = history['entries'] direction = 1 if steps > 0 else -1 - curr_url = history[index]['userTypedURL'] + curr_url = history[index]['url'] nid = None for num in range(abs(steps)): for i in history[index::direction]: index += direction - if i['userTypedURL'] != curr_url: + if i['url'] != curr_url: nid = i['id'] - curr_url = i['userTypedURL'] + curr_url = i['url'] break if nid: From dcfa1ff2a1230ae9a61c56248137d3e93371c43a Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 5 Aug 2023 15:36:24 +0800 Subject: [PATCH 11/13] =?UTF-8?q?=E5=85=83=E7=B4=A0=E5=B1=8F=E5=B9=95?= =?UTF-8?q?=E5=9D=90=E6=A0=87=E4=B9=98=E4=BB=A5=E5=83=8F=E7=B4=A0=E6=AF=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_element.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index ad1930f..9e56a8e 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1516,21 +1516,24 @@ class Locations(object): """返回元素左上角在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_location - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) @property def screen_midpoint(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_midpoint - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) @property def screen_click_point(self): """返回元素中点在屏幕上坐标,左上角为(0, 0)""" vx, vy = self._ele.page.rect.viewport_location ex, ey = self.viewport_click_point - return vx + ex, ey + vy + pr = self._ele.page.run_js('return window.devicePixelRatio;') + return int((vx + ex) * pr), int((ey + vy) * pr) def _get_viewport_rect(self, quad): """按照类型返回在可视窗口中的范围 From dea41ab0dc67bf373a68bca019d0582efb9a19aa Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 5 Aug 2023 15:47:10 +0800 Subject: [PATCH 12/13] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=BD=95=E5=B1=8F?= =?UTF-8?q?=E8=A7=86=E9=A2=91=E7=BC=96=E7=A0=81=E4=B8=80=E4=BA=9B=E7=94=B5?= =?UTF-8?q?=E8=84=91=E4=B8=8D=E6=94=AF=E6=8C=81=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 93b8a77..283c199 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -1043,7 +1043,7 @@ class Screencast(object): raise TypeError('转换成视频仅支持英文路径和文件名。') try: - from cv2 import VideoWriter, imread + from cv2 import VideoWriter, imread, VideoWriter_fourcc from numpy import fromfile, uint8 except ModuleNotFoundError: raise ModuleNotFoundError('请先安装cv2,pip install opencv-python') @@ -1053,10 +1053,7 @@ class Screencast(object): imgInfo = img.shape size = (imgInfo[1], imgInfo[0]) - # if video_name and not video_name.endswith('mp4'): - # video_name = f'{video_name}.mp4' - # name = f'{time()}.mp4' if not video_name else video_name - videoWrite = VideoWriter(path, 14, 5, size) + videoWrite = VideoWriter(path, VideoWriter_fourcc(*"mp4v"), 5, size) for i in pic_list: img = imread(str(i)) From 01a930b7403106a936f8ce51559909299d298b65 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 9 Aug 2023 11:50:49 +0800 Subject: [PATCH 13/13] =?UTF-8?q?get=5Fsrc()=E6=96=B9=E6=B3=95=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0base64=5Fto=5Fbytes=E5=8F=82=E6=95=B0=EF=BC=9B?= =?UTF-8?q?=E4=BD=BF=E7=94=A81.0.0=E7=89=88DownloadKit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_element.py | 12 ++++++++---- DrissionPage/chromium_element.pyi | 2 +- requirements.txt | 2 +- setup.py | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 9e56a8e..3750000 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -439,9 +439,10 @@ class ChromiumElement(DrissionElement): js = f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");' return self.run_js(js) - def get_src(self, timeout=None): - """返回元素src资源,base64的会转为bytes返回,其它返回str + def get_src(self, timeout=None, base64_to_bytes=True): + """返回元素src资源,base64的可转为bytes返回,其它返回str :param timeout: 等待资源加载的超时时间 + :param base64_to_bytes: 为True时,如果是base64数据,转换为bytes格式 :return: 资源内容 """ timeout = self.page.timeout if timeout is None else timeout @@ -474,8 +475,11 @@ class ChromiumElement(DrissionElement): return None if result['base64Encoded']: - from base64 import b64decode - data = b64decode(result['content']) + if base64_to_bytes: + from base64 import b64decode + data = b64decode(result['content']) + else: + data = result['content'] else: data = result['content'] return data diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index 3b5dfe1..da4654f 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -178,7 +178,7 @@ class ChromiumElement(DrissionElement): def style(self, style: str, pseudo_ele: str = '') -> str: ... - def get_src(self, timeout: float = None) -> Union[bytes, str, None]: ... + def get_src(self, timeout: float = None, base64_to_bytes: bool = True) -> Union[bytes, str, None]: ... def save(self, path: [str, bool] = None, rename: str = None, timeout: float = None) -> None: ... diff --git a/requirements.txt b/requirements.txt index d04ff8e..4e712ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ requests lxml cssselect -DownloadKit>=0.5.3 +DownloadKit>=1.0.0 FlowViewer>=0.3.0 websocket-client click diff --git a/setup.py b/setup.py index d621cb6..ab09e66 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.30", + version="3.3.0", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -22,7 +22,7 @@ setup( 'lxml', 'requests', 'cssselect', - 'DownloadKit>=0.5.3', + 'DownloadKit>=1.0.0', 'FlowViewer>=0.3.0', 'websocket-client', 'click',