From 4f12493edb882559f21e53f1d8e587bc9f6b9e7f Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 18 Jan 2024 01:06:50 +0800 Subject: [PATCH] =?UTF-8?q?4.0.3=E5=A2=9E=E5=8A=A0DrissionPage.items?= =?UTF-8?q?=EF=BC=9B=E4=BF=AE=E5=A4=8D=E5=90=88=E5=B9=B6=E4=BA=86=E6=97=A7?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=9B=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0get=5Fblob()=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/__init__.py | 2 +- DrissionPage/_configs/options_manage.py | 6 ++-- DrissionPage/_configs/options_manage.pyi | 5 --- DrissionPage/_elements/chromium_element.py | 38 +++++----------------- DrissionPage/_elements/session_element.py | 22 ++++++------- DrissionPage/_functions/tools.py | 2 ++ DrissionPage/_functions/web.py | 27 +++++++++++++++ DrissionPage/_functions/web.pyi | 3 ++ DrissionPage/_units/setter.py | 7 +++- DrissionPage/common.py | 3 +- DrissionPage/items.py | 9 +++++ setup.py | 2 +- 12 files changed, 74 insertions(+), 52 deletions(-) create mode 100644 DrissionPage/items.py diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index 06964d5..91fc392 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -14,4 +14,4 @@ from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions __all__ = ['ChromiumPage', 'ChromiumOptions', 'SessionOptions', 'SessionPage', 'WebPage', '__version__'] -__version__ = '4.0.2' +__version__ = '4.0.3' diff --git a/DrissionPage/_configs/options_manage.py b/DrissionPage/_configs/options_manage.py index 703adbd..bb5d964 100644 --- a/DrissionPage/_configs/options_manage.py +++ b/DrissionPage/_configs/options_manage.py @@ -8,6 +8,7 @@ from configparser import RawConfigParser, NoSectionError, NoOptionError from pathlib import Path from pprint import pprint +from time import sleep class OptionsManager(object): @@ -28,8 +29,9 @@ class OptionsManager(object): self.ini_path = str(path) if not Path(self.ini_path).exists(): - input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission' - 'pagedocs/advance/packaging/') + print('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\n' + 'https://g1879.gitee.io/drissionpagedocs/advance/packaging/') + sleep(10) self._conf = RawConfigParser() self._conf.read(self.ini_path, encoding='utf-8') diff --git a/DrissionPage/_configs/options_manage.pyi b/DrissionPage/_configs/options_manage.pyi index 805ae92..6bb3e4f 100644 --- a/DrissionPage/_configs/options_manage.pyi +++ b/DrissionPage/_configs/options_manage.pyi @@ -12,11 +12,6 @@ from typing import Any class OptionsManager(object): ini_path: str = ... _conf: RawConfigParser = ... - paths: dict = ... - chrome_options: dict = ... - session_options: dict = ... - timeouts: dict = ... - proxies: dict = ... def __init__(self, path: str = None): ... diff --git a/DrissionPage/_elements/chromium_element.py b/DrissionPage/_elements/chromium_element.py index a7e49fe..90fa300 100644 --- a/DrissionPage/_elements/chromium_element.py +++ b/DrissionPage/_elements/chromium_element.py @@ -19,7 +19,7 @@ from .._base.base import DrissionElement, BaseElement from .._functions.keys import input_text_or_keys from .._functions.locator import get_loc from .._functions.settings import Settings -from .._functions.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll +from .._functions.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll, get_blob from .._units.clicker import Clicker from .._units.rect import ElementRect from .._units.scroller import ElementScroller @@ -495,26 +495,9 @@ class ChromiumElement(DrissionElement): end_time = perf_counter() + timeout while perf_counter() < end_time: if is_blob: - js = """ - function fetchData(url) { - return new Promise((resolve, reject) => { - var xhr = new XMLHttpRequest(); - xhr.responseType = 'blob'; - xhr.onload = function() { - var reader = new FileReader(); - reader.onloadend = function() {resolve(reader.result);} - reader.readAsDataURL(xhr.response); - }; - xhr.open('GET', url, true); - xhr.send(); - }); - } - """ - try: - result = self.page.run_js(js, src) + result = get_blob(self.page, src, base64_to_bytes) + if result: break - except: - continue else: src = self.prop('currentSrc') @@ -534,18 +517,13 @@ class ChromiumElement(DrissionElement): return None if is_blob: - if base64_to_bytes: - from base64 import b64decode - return b64decode(result.split(',', 1)[-1]) - else: - return result + return result + if result['base64Encoded'] and base64_to_bytes: + from base64 import b64decode + return b64decode(result['content']) else: - if result['base64Encoded'] and base64_to_bytes: - from base64 import b64decode - return b64decode(result['content']) - else: - return result['content'] + return result['content'] def save(self, path=None, name=None, timeout=None): """保存图片或其它有src属性的元素的资源 diff --git a/DrissionPage/_elements/session_element.py b/DrissionPage/_elements/session_element.py index 5ab59f3..b8b9f9d 100644 --- a/DrissionPage/_elements/session_element.py +++ b/DrissionPage/_elements/session_element.py @@ -40,7 +40,7 @@ class SessionElement(DrissionElement): """在内部查找元素 例:ele2 = ele1('@id=ele_id') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本 """ return self.ele(loc_or_str) @@ -80,12 +80,13 @@ class SessionElement(DrissionElement): """返回未格式化处理的元素内文本""" return str(self._inner_ele.text_content()) - def parent(self, level_or_loc=1): + def parent(self, level_or_loc=1, index=1): """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,或定位符 + :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果 :return: 上级元素对象 """ - return super().parent(level_or_loc) + return super().parent(level_or_loc, index) def child(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素的一个符合条件的直接子元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -95,7 +96,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本 """ - return super().child(index, filter_loc, timeout, ele_only=ele_only) + return super().child(filter_loc, index, timeout, ele_only=ele_only) def prev(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素前面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -105,7 +106,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().prev(index, filter_loc, timeout, ele_only=ele_only) + return super().prev(filter_loc, index, timeout, ele_only=ele_only) def next(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回当前元素后面一个符合条件的同级元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -115,7 +116,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 同级元素 """ - return super().next(index, filter_loc, timeout, ele_only=ele_only) + return super().next(filter_loc, index, timeout, ele_only=ele_only) def before(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中当前元素前面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -126,7 +127,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ - return super().before(index, filter_loc, timeout, ele_only=ele_only) + return super().before(filter_loc, index, timeout, ele_only=ele_only) def after(self, filter_loc='', index=1, timeout=None, ele_only=True): """返回文档中此当前元素后面符合条件的一个元素,可用查询语法筛选,可指定返回筛选结果的第几个 @@ -137,7 +138,7 @@ class SessionElement(DrissionElement): :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ - return super().after(index, filter_loc, timeout, ele_only=ele_only) + return super().after(filter_loc, index, timeout, ele_only=ele_only) def children(self, filter_loc='', timeout=0, ele_only=True): """返回当前元素符合条件的直接子元素或节点组成的列表,可用查询语法筛选 @@ -231,7 +232,7 @@ class SessionElement(DrissionElement): def eles(self, loc_or_str, timeout=None): """返回当前元素下级所有符合条件的子元素、属性或节点文本 :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 + :param timeout: 不起实际作用 :return: SessionElement对象或属性、文本组成的列表 """ return self._ele(loc_or_str, index=None) @@ -327,8 +328,7 @@ def make_session_ele(html_or_ele, loc=None, index=1): loc = loc[0], loc_str - # ChromiumElement, DriverElement - elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): + elif the_type.endswith(".ChromiumElement'>"): loc_str = loc[1] if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): loc_str = f'.{loc[1]}' diff --git a/DrissionPage/_functions/tools.py b/DrissionPage/_functions/tools.py index b785ed1..3e7f8ff 100644 --- a/DrissionPage/_functions/tools.py +++ b/DrissionPage/_functions/tools.py @@ -215,6 +215,8 @@ def raise_error(result, ignore=None): r = CookieFormatError(f'cookie格式不正确:{result["args"]}') elif error == 'Given expression does not evaluate to a function': r = JavaScriptError(f'传入的js无法解析成函数:\n{result["args"]["functionDeclaration"]}') + elif error.endswith("' wasn't found"): + r = RuntimeError(f'你的浏览器可能太旧。\nmethod:{result["method"]}\nargs:{result["args"]}') elif result['type'] in ('call_method_error', 'timeout'): from DrissionPage import __version__ from time import process_time diff --git a/DrissionPage/_functions/web.py b/DrissionPage/_functions/web.py index 9479c05..f3a458e 100644 --- a/DrissionPage/_functions/web.py +++ b/DrissionPage/_functions/web.py @@ -328,3 +328,30 @@ def is_cookie_in_driver(page, cookie): if cookie['name'] == c['name'] and cookie['value'] == c['value']: return True return False + + +def get_blob(page, url, base64_to_bytes=True): + if not url.startswith('blob'): + return None + js = """ + function fetchData(url) { + return new Promise((resolve, reject) => { + var xhr = new XMLHttpRequest(); + xhr.responseType = 'blob'; + xhr.onload = function() { + var reader = new FileReader(); + reader.onloadend = function(){resolve(reader.result);} + reader.readAsDataURL(xhr.response); + }; + xhr.open('GET', url, true); + xhr.send(); + }); + } +""" + try: + result = page.run_js(js, url) + except: + return None + if base64_to_bytes: + from base64 import b64decode + return b64decode(result.split(',', 1)[-1]) diff --git a/DrissionPage/_functions/web.pyi b/DrissionPage/_functions/web.pyi index 240bdab..e86b124 100644 --- a/DrissionPage/_functions/web.pyi +++ b/DrissionPage/_functions/web.pyi @@ -47,3 +47,6 @@ def set_browser_cookies(page: ChromiumBase, cookies: Union[RequestsCookieJar, li def is_cookie_in_driver(page: ChromiumBase, cookie: dict) -> bool: ... + + +def get_blob(page: ChromiumBase, url: str, base64_to_bytes: bool = True) -> bytes: ... diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py index 17e4b44..2e10d4f 100644 --- a/DrissionPage/_units/setter.py +++ b/DrissionPage/_units/setter.py @@ -6,6 +6,7 @@ @License : BSD 3-Clause. """ from pathlib import Path +from time import sleep from requests.structures import CaseInsensitiveDict @@ -608,7 +609,11 @@ class WindowSetter(object): def _get_info(self): """获取窗口位置及大小信息""" - return self._page.run_cdp('Browser.getWindowForTarget') + for _ in range(50): + try: + return self._page.run_cdp('Browser.getWindowForTarget') + except: + sleep(.1) def _perform(self, bounds): """执行改变窗口大小操作 diff --git a/DrissionPage/common.py b/DrissionPage/common.py index df2b6aa..5c1e2b8 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -10,6 +10,7 @@ from ._functions.by import By from ._functions.keys import Keys from ._functions.settings import Settings from ._functions.tools import wait_until, configs_to_here +from ._functions.web import get_blob from ._units.actions import Actions -__all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here'] +__all__ = ['make_session_ele', 'Actions', 'Keys', 'By', 'Settings', 'wait_until', 'configs_to_here', 'get_blob'] diff --git a/DrissionPage/items.py b/DrissionPage/items.py new file mode 100644 index 0000000..fea5bb6 --- /dev/null +++ b/DrissionPage/items.py @@ -0,0 +1,9 @@ +# -*- coding:utf-8 -*- +from ._elements.chromium_element import ChromiumElement, ShadowRoot +from ._elements.none_element import NoneElement +from ._elements.session_element import SessionElement +from ._pages.chromium_frame import ChromiumFrame +from ._pages.chromium_tab import ChromiumTab, WebPageTab + +__all__ = ['ChromiumElement', 'ShadowRoot', 'NoneElement', 'SessionElement', 'ChromiumFrame', 'ChromiumTab', + 'WebPageTab'] diff --git a/setup.py b/setup.py index f228274..2a450f9 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="4.0.2", + version="4.0.3", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.",