diff --git a/DrissionPage/action_chains.pyi b/DrissionPage/action_chains.pyi index f44fd01..b9558d5 100644 --- a/DrissionPage/action_chains.pyi +++ b/DrissionPage/action_chains.pyi @@ -6,17 +6,16 @@ from typing import Union, Tuple from .chromium_base import ChromiumBase -from .tab import Tab +from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement from .chromium_page import ChromiumPage class ActionChains: - """用于实现动作链的类""" def __init__(self, page:ChromiumBase): self.page: ChromiumPage = ... - self._dr: Tab = ... + self._dr: ChromiumDriver = ... self.curr_x: int = ... self.curr_y: int = ... self.modifier: int = ... diff --git a/DrissionPage/base.py b/DrissionPage/base.py index 07ddbfc..cd4fc99 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -49,10 +49,6 @@ class BaseElement(BaseParser): def tag(self): return - @property - def is_valid(self): - return True - @abstractmethod def _ele(self, loc_or_str, timeout=None, single=True, relative=False): pass diff --git a/DrissionPage/base.pyi b/DrissionPage/base.pyi index 0175360..76832f3 100644 --- a/DrissionPage/base.pyi +++ b/DrissionPage/base.pyi @@ -8,7 +8,6 @@ from typing import Union, Tuple, List class BaseParser(object): - """所有页面、元素类的基类""" def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... @@ -29,7 +28,6 @@ class BaseParser(object): class BaseElement(BaseParser): - """各元素类的基类""" def __init__(self, page: BasePage): self.page: BasePage = ... @@ -38,9 +36,6 @@ class BaseElement(BaseParser): @property def tag(self)->str: ... - @property - def is_valid(self)->bool: ... - @abstractmethod def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout:float=..., single:bool=..., relative:bool=...): ... @@ -56,7 +51,6 @@ class BaseElement(BaseParser): class DrissionElement(BaseElement): - """DriverElement 和 SessionElement的基类,但不是ShadowRootElement的基类""" def __init__(self, page: BasePage = ...): @@ -138,7 +132,6 @@ class DrissionElement(BaseElement): class BasePage(BaseParser): - """页面类的基类""" def __init__(self, timeout: float = ...): self._url_available: bool = ... diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 78ac759..51cb540 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -9,11 +9,11 @@ from time import perf_counter, sleep from requests import Session from .base import BasePage -from .chromium_element import ChromiumElementWaiter, ChromeScroll, ChromiumElement, run_script, make_chromium_ele +from .chromium_element import ChromiumElementWaiter, ChromiumScroll, ChromiumElement, run_script, make_chromium_ele from .common import get_loc from .config import cookies_to_tuple from .session_element import make_session_ele -from .tab import Tab +from .chromium_driver import ChromiumDriver class ChromiumBase(BasePage): @@ -33,9 +33,9 @@ class ChromiumBase(BasePage): self.timeouts = Timeout(self) self._connect_browser(address, tab_id) - def _connect_browser(self, addr_tab_opts=None, tab_id=None): + def _connect_browser(self, addr_driver_opts=None, tab_id=None): """连接浏览器,在第一次时运行 \n - :param addr_tab_opts: 浏览器地址、Tab对象或DriverOptions对象 + :param addr_driver_opts: 浏览器地址、ChromiumDriver对象或DriverOptions对象 :param tab_id: 要控制的标签页id,不指定默认为激活的 :return: None """ @@ -45,7 +45,7 @@ class ChromiumBase(BasePage): self._first_run = True self._is_reading = False # 用于避免不同线程重复读取document - self.address = addr_tab_opts + self.address = addr_driver_opts if not tab_id: json = self._control_session.get(f'http://{self.address}/json').json() tab_id = [i['id'] for i in json if i['type'] == 'page'][0] @@ -61,8 +61,8 @@ class ChromiumBase(BasePage): """ self._is_loading = True if tab_id: - self._tab_obj = Tab(id=tab_id, type='page', - webSocketDebuggerUrl=f'ws://{self.address}/devtools/page/{tab_id}') + self._tab_obj = ChromiumDriver(id=tab_id, type='page', + webSocketDebuggerUrl=f'ws://{self.address}/devtools/page/{tab_id}') self._tab_obj.start() self._tab_obj.DOM.enable() @@ -195,16 +195,17 @@ class ChromiumBase(BasePage): @property def driver(self): - """返回用于控制浏览器的Tab对象""" + """返回用于控制浏览器的ChromiumDriver对象""" return self._tab_obj @property def _driver(self): + """返回用于控制浏览器的ChromiumDriver对象""" return self._tab_obj @property def _wait_driver(self): - """返回用于控制浏览器的Tab对象,会先等待页面加载完毕""" + """返回用于控制浏览器的ChromiumDriver对象,会先等待页面加载完毕""" while self._is_loading: sleep(.1) return self._tab_obj @@ -226,8 +227,11 @@ class ChromiumBase(BasePage): @property def json(self): - """当返回内容是json格式时,返回对应的字典""" - return loads(self('t:pre').text) + """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" + try: + return loads(self('t:pre', timeout=.5).text) + except Exception: + return None @property def tab_id(self): @@ -260,7 +264,7 @@ class ChromiumBase(BasePage): def scroll(self): """返回用于滚动滚动条的对象""" if not hasattr(self, '_scroll'): - self._scroll = ChromeScroll(self) + self._scroll = ChromiumScroll(self) return self._scroll @property diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index b9e4e1e..b4717ef 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -10,15 +10,14 @@ from requests import Session from requests.cookies import RequestsCookieJar from .base import BasePage -from .chromium_element import ChromiumElement, ChromiumElementWaiter, ChromeScroll +from .chromium_element import ChromiumElement, ChromiumElementWaiter, ChromiumScroll from .chromium_frame import ChromiumFrame from .config import DriverOptions from .session_element import SessionElement -from .tab import Tab +from .chromium_driver import ChromiumDriver class ChromiumBase(BasePage): - """标签页、frame、页面基类""" def __init__(self, address: str, @@ -26,20 +25,20 @@ class ChromiumBase(BasePage): timeout: float = ...): self._control_session: Session = ... self.address: str = ... - self._tab_obj: Tab = ... + self._tab_obj: ChromiumDriver = ... self._is_reading: bool = ... self.timeouts: Timeout = ... self._first_run: bool = ... self._is_loading: bool = ... self._page_load_strategy: str = ... - self._scroll: ChromeScroll = ... + self._scroll: ChromiumScroll = ... self._url: str = ... self._root_id: str = ... self._debug: bool = ... self._debug_recorder: Recorder = ... def _connect_browser(self, - addr_tab_opts: Union[str, Tab, DriverOptions] = ..., + addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ..., tab_id: str = ...) -> None: ... def _init_page(self, tab_id: str = ...) -> None: ... @@ -67,13 +66,13 @@ class ChromiumBase(BasePage): def title(self) -> str: ... @property - def driver(self) -> Tab: ... + def driver(self) -> ChromiumDriver: ... @property - def _driver(self) -> Tab: ... + def _driver(self) -> ChromiumDriver: ... @property - def _wait_driver(self) -> Tab: ... + def _wait_driver(self) -> ChromiumDriver: ... @property def is_loading(self) -> bool: ... @@ -85,7 +84,7 @@ class ChromiumBase(BasePage): def html(self) -> str: ... @property - def json(self) -> dict: ... + def json(self) -> Union[dict, None]: ... @property def tab_id(self) -> str: ... @@ -103,7 +102,7 @@ class ChromiumBase(BasePage): def page_load_strategy(self) -> str: ... @property - def scroll(self) -> ChromeScroll: ... + def scroll(self) -> ChromiumScroll: ... @property def set_page_load_strategy(self) -> PageLoadStrategy: ... @@ -188,7 +187,6 @@ class ChromiumBase(BasePage): class Timeout(object): - """用于保存d模式timeout信息的类""" def __init__(self, page: ChromiumBase): self.page: ChromiumBase = ... diff --git a/DrissionPage/tab.py b/DrissionPage/chromium_driver.py similarity index 82% rename from DrissionPage/tab.py rename to DrissionPage/chromium_driver.py index dc2fcb2..e325f4a 100644 --- a/DrissionPage/tab.py +++ b/DrissionPage/chromium_driver.py @@ -1,14 +1,9 @@ # -*- coding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" from functools import partial from json import dumps, loads from logging import getLogger from os import getenv from threading import Thread, Event -from warnings import warn from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ create_connection @@ -39,7 +34,7 @@ class GenericAttr(object): self.tab.set_listener("%s.%s" % (self.name, key), value) -class Tab(object): +class ChromiumDriver(object): status_initial = 'initial' status_started = 'started' status_stopped = 'stopped' @@ -101,7 +96,7 @@ class Tab(object): return self.method_results[message['id']].get(timeout=q_timeout) except queue.Empty: if isinstance(timeout, (int, float)) and timeout <= 0: - raise TimeoutException("Calling %s timeout" % message['method']) + raise TimeoutError(f"调用{message['method']}超时。") continue @@ -132,8 +127,8 @@ class Tab(object): elif "id" in message: if message["id"] in self.method_results: self.method_results[message['id']].put(message) - else: # pragma: no cover - warn("unknown message: %s" % message) + # else: # pragma: no cover + # warn("unknown message: %s" % message) def _handle_event_loop(self): while not self._stopped.is_set(): @@ -157,45 +152,27 @@ class Tab(object): def call_method(self, _method, *args, **kwargs): if not self._started: - raise RuntimeException("Cannot call method before it is started") + raise RuntimeError("不能在启动前调用方法。") if args: - raise CallMethodException("the params should be key=value format") + raise CallMethodException("参数必须是key=value形式。") if self._stopped.is_set(): - raise RuntimeException("Tab has been stopped") + raise RuntimeError("Driver已经停止。") timeout = kwargs.pop("_timeout", None) result = self._send({"method": _method, "params": kwargs}, timeout=timeout) if 'result' not in result and 'error' in result: - warn("%s error: %s" % (_method, result['error']['message'])) - raise CallMethodException("calling method: %s error: %s" % (_method, result['error']['message'])) + raise CallMethodException(f"调用方法:{_method} 错误:{result['error']['message']}") return result['result'] - def set_listener(self, event, callback): - if not callback: - return self.event_handlers.pop(event, None) - - if not callable(callback): - raise RuntimeException("callback should be callable") - - self.event_handlers[event] = callback - return True - - def get_listener(self, event): - return self.event_handlers.get(event, None) - - def del_all_listeners(self): - self.event_handlers = {} - return True - def start(self): if self._started: return False if not self._websocket_url: - raise RuntimeException("Already has another client connect to this tab") + raise RuntimeError("已存在另一个连接。") self._started = True self.status = self.status_started @@ -210,7 +187,7 @@ class Tab(object): return False if not self._started: - raise RuntimeException("Tab is not running") + raise RuntimeError("Driver正在运行。") self.status = self.status_stopped self._stopped.set() @@ -218,9 +195,26 @@ class Tab(object): self._ws.close() return True + def set_listener(self, event, callback): + if not callback: + return self.event_handlers.pop(event, None) + + if not callable(callback): + raise RuntimeError("方法不能调用。") + + self.event_handlers[event] = callback + return True + + def get_listener(self, event): + return self.event_handlers.get(event, None) + + def del_all_listeners(self): + self.event_handlers = {} + return True + def wait(self, timeout=None): if not self._started: - raise RuntimeException("Tab is not running") + raise RuntimeError("Driver仍未运行。") if timeout: return self._stopped.wait(timeout) @@ -230,30 +224,10 @@ class Tab(object): return True def __str__(self): - return "" % self.id + return f"" __repr__ = __str__ -class PyChromeException(Exception): - pass - - -class UserAbortException(PyChromeException): - pass - - -class TabConnectionException(PyChromeException): - pass - - -class CallMethodException(PyChromeException): - pass - - -class TimeoutException(PyChromeException): - pass - - -class RuntimeException(PyChromeException): +class CallMethodException(Exception): pass diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 83cbcb9..3106d02 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -116,7 +116,7 @@ class ChromiumElement(DrissionElement): @property def doc_id(self): - """返回document的object id""" + """返回所在document的object id""" return self._doc_id @property @@ -192,7 +192,7 @@ class ChromiumElement(DrissionElement): def scroll(self): """用于滚动滚动条的对象""" if self._scroll is None: - self._scroll = ChromeScroll(self) + self._scroll = ChromiumScroll(self) return self._scroll def parent(self, level_or_loc=1): @@ -277,7 +277,7 @@ class ChromiumElement(DrissionElement): if self.tag != 'select': self._select = False else: - self._select = ChromeSelect(self) + self._select = ChromiumSelect(self) return self._select @@ -392,7 +392,7 @@ class ChromiumElement(DrissionElement): """运行javascript代码 \n :param script: js文本 :param as_expr: 是否作为表达式运行,为True时args无效 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[2]... + :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... :return: 运行的结果 """ return run_script(self, script, as_expr, self.page.timeouts.script, args, True) @@ -492,7 +492,7 @@ class ChromiumElement(DrissionElement): def save(self, path=None, rename=None): """保存图片或其它有src属性的元素的资源 \n - :param path: 文件保存路径,为None时保存到当前文件夹,为False时不保存 + :param path: 文件保存路径,为None时保存到当前文件夹 :param rename: 文件名称,为None时从资源url获取 :return: None """ @@ -1399,29 +1399,26 @@ def _offset_scroll(ele, offset_x, offset_y): return cx, cy -class ChromeScroll(object): +class ChromiumScroll(object): """用于滚动的对象""" def __init__(self, page_or_ele): """ :param page_or_ele: ChromePage或ChromiumElement """ + self.page_or_ele = page_or_ele if isinstance(page_or_ele, ChromiumElement): self.t1 = self.t2 = 'this' - self.obj_id = page_or_ele.obj_id - self.page = page_or_ele.page else: self.t1 = 'window' self.t2 = 'document.documentElement' - self.obj_id = None - self.page = page_or_ele def _run_script(self, js): js = js.format(self.t1, self.t2, self.t2) - if self.obj_id: - self.page.run_script(js) + if self.t1 == 'this': # 在元素上滚动 + self.page_or_ele.run_script(js) else: - self.page.driver.Runtime.evaluate(expression=js) + self.page_or_ele.run_script(js, as_expr=True) def to_top(self): """滚动到顶端,水平位置不变""" @@ -1482,8 +1479,8 @@ class ChromeScroll(object): self._run_script(f'{{}}.scrollBy({pixel},0);') -class ChromeSelect(object): - """ChromeSelect 类专门用于处理 d 模式下 select 标签""" +class ChromiumSelect(object): + """ChromiumSelect 类专门用于处理 d 模式下 select 标签""" def __init__(self, ele): """初始化 \n @@ -1542,7 +1539,7 @@ class ChromeSelect(object): """此方法用于根据text值选择项。当元素是多选列表时,可以接收list或tuple \n :param text: text属性值,传入list或tuple可选择多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否选择成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(text, 'text', False, timeout) @@ -1551,25 +1548,25 @@ class ChromeSelect(object): """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple \n :param value: value属性值,传入list或tuple可选择多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否选择成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(value, 'value', False, timeout) def by_index(self, index, timeout=None): """此方法用于根据index值选择项。当元素是多选列表时,可以接收list或tuple \n - :param index: index属性值,传入list或tuple可选择多项 + :param index: 序号,0开始,传入list或tuple可选择多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否选择成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(index, 'index', False, timeout) def cancel_by_text(self, text, timeout=None): """此方法用于根据text值取消选择项。当元素是多选列表时,可以接收list或tuple \n - :param text: text属性值,传入list或tuple可取消多项 + :param text: 文本,传入list或tuple可取消多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否取消成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(text, 'text', True, timeout) @@ -1578,16 +1575,16 @@ class ChromeSelect(object): """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple \n :param value: value属性值,传入list或tuple可取消多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否取消成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(value, 'value', True, timeout) def cancel_by_index(self, index, timeout=None): """此方法用于根据index值取消选择项。当元素是多选列表时,可以接收list或tuple \n - :param index: value属性值,传入list或tuple可取消多项 + :param index: 序号,0开始,传入list或tuple可取消多项 :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None + :return: 是否取消成功 """ timeout = timeout if timeout is not None else self._ele.page.timeout return self._select(index, 'index', True, timeout) diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index a432893..a0dee99 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -15,7 +15,6 @@ from .web_page import WebPage class ChromiumElement(DrissionElement): - """ChromePage页面对象中的元素对象""" def __init__(self, page: ChromiumBase, @@ -26,8 +25,8 @@ class ChromiumElement(DrissionElement): self._obj_id: str = ... self._backend_id: str = ... self._doc_id: str = ... - self._scroll: ChromeScroll = ... - self._select: ChromeSelect = ... + self._scroll: ChromiumScroll = ... + self._select: ChromiumSelect = ... def __repr__(self) -> str: ... @@ -100,7 +99,7 @@ class ChromiumElement(DrissionElement): def pseudo_after(self) -> str: ... @property - def scroll(self) -> ChromeScroll: ... + def scroll(self) -> ChromiumScroll: ... def parent(self, level_or_loc: Union[tuple, str, int] = ...) -> Union[ChromiumElement, None]: ... @@ -138,10 +137,10 @@ class ChromiumElement(DrissionElement): def wait_ele(self, loc_or_ele: Union[str, tuple, ChromiumElement], - timeout: float = ...) -> 'ChromiumElementWaiter': ... + timeout: float = ...) -> ChromiumElementWaiter: ... @property - def select(self) -> 'ChromeSelect': ... + def select(self) -> ChromiumSelect: ... @property def is_selected(self) -> bool: ... @@ -199,7 +198,7 @@ class ChromiumElement(DrissionElement): def get_screenshot(self, path: [str, Path] = ..., as_bytes: [bool, str] = ...) -> Union[str, bytes]: ... - def input(self, vals: Union[str, tuple, list], clear: bool = ...) -> None: ... + def input(self, vals: Any, clear: bool = ...) -> None: ... def _set_file_input(self, files: Union[str, list, tuple]) -> None: ... @@ -238,7 +237,6 @@ class ChromiumElement(DrissionElement): class ChromiumShadowRootElement(BaseElement): - """ChromiumShadowRootElement是用于处理ShadowRoot的类,使用方法和ChromiumElement基本一致""" def __init__(self, parent_ele: ChromiumElement, @@ -376,14 +374,12 @@ def _send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ... def _offset_scroll(ele: ChromiumElement, offset_x: int, offset_y: int) -> tuple: ... -class ChromeScroll(object): - """用于滚动的对象""" +class ChromiumScroll(object): def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement]): self.t1: str = ... self.t2: str = ... - self.obj_id: str = ... - self.page: ChromiumPage = ... + self.page_or_ele: Union[ChromiumPage, ChromiumElement] = ... def _run_script(self, js: str): ... @@ -408,8 +404,7 @@ class ChromeScroll(object): def right(self, pixel: int = ...) -> None: ... -class ChromeSelect(object): - """ChromeSelect 类专门用于处理 d 模式下 select 标签""" +class ChromiumSelect(object): def __init__(self, ele: ChromiumElement): self._ele: ChromiumElement = ... @@ -457,7 +452,6 @@ class ChromeSelect(object): class ChromiumElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement], diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index e35da16..2c82a67 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -10,9 +10,6 @@ from .chromium_base import ChromiumBase class ChromiumFrame(ChromiumBase): - """frame元素的类。 - frame既是元素,也是页面,可以获取元素属性和定位周边元素,也能跳转到网址。 - 同域和异域的frame处理方式不一样,同域的当作元素看待,异域的当作页面看待。""" def __init__(self, page: ChromiumBase, ele: ChromiumElement): self.page: ChromiumBase = ... diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 9d4a8ff..a7965b3 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -14,23 +14,23 @@ from .chromium_base import Timeout, ChromiumBase from .chromium_tab import ChromiumTab from .common import connect_chrome from .config import DriverOptions -from .tab import Tab +from .chromium_driver import ChromiumDriver class ChromiumPage(ChromiumBase): """用于管理浏览器的类""" - def __init__(self, addr_tab_opts=None, tab_id=None, timeout=None): - """初始化 \n - :param addr_tab_opts: 浏览器地址:端口、Tab对象或DriverOptions对象 + def __init__(self, addr_driver_opts=None, tab_id=None, timeout=None): + """初始化 \n + :param addr_driver_opts: 浏览器地址:端口、ChromiumDriver对象或DriverOptions对象 :param tab_id: 要控制的标签页id,不指定默认为激活的 :param timeout: 超时时间 """ - super().__init__(addr_tab_opts, tab_id, timeout) + super().__init__(addr_driver_opts, tab_id, timeout) - def _connect_browser(self, addr_tab_opts=None, tab_id=None): + def _connect_browser(self, addr_driver_opts=None, tab_id=None): """连接浏览器,在第一次时运行 \n - :param addr_tab_opts: 浏览器地址、Tab对象或DriverOptions对象 + :param addr_driver_opts: 浏览器地址、ChromiumDriver对象或DriverOptions对象 :param tab_id: 要控制的标签页id,不指定默认为激活的 :return: None """ @@ -44,32 +44,32 @@ class ChromiumPage(ChromiumBase): self._first_run = True # 接管或启动浏览器 - if addr_tab_opts is None or isinstance(addr_tab_opts, DriverOptions): - self.options = addr_tab_opts or DriverOptions() # 从ini文件读取 + if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions): + self.options = addr_driver_opts or DriverOptions() # 从ini文件读取 self.address = self.options.debugger_address self.process = connect_chrome(self.options)[1] json = self._control_session.get(f'http://{self.address}/json').json() tab_id = [i['id'] for i in json if i['type'] == 'page'][0] # 接收浏览器地址和端口 - elif isinstance(addr_tab_opts, str): - self.address = addr_tab_opts + elif isinstance(addr_driver_opts, str): + self.address = addr_driver_opts self.options = DriverOptions(read_file=False) - self.options.debugger_address = addr_tab_opts + self.options.debugger_address = addr_driver_opts self.process = connect_chrome(self.options)[1] if not tab_id: json = self._control_session.get(f'http://{self.address}/json').json() tab_id = [i['id'] for i in json if i['type'] == 'page'][0] - # 接收传递过来的Tab,浏览器 - elif isinstance(addr_tab_opts, Tab): - self._tab_obj = addr_tab_opts - self.address = search(r'ws://(.*?)/dev', addr_tab_opts._websocket_url).group(1) + # 接收传递过来的ChromiumDriver,浏览器 + elif isinstance(addr_driver_opts, ChromiumDriver): + self._tab_obj = addr_driver_opts + self.address = search(r'ws://(.*?)/dev', addr_driver_opts._websocket_url).group(1) self.process = None self.options = DriverOptions(read_file=False) else: - raise TypeError('只能接收Tab或DriverOptions类型参数。') + raise TypeError('只能接收ChromiumDriver或DriverOptions类型参数。') self._set_options() self._init_page(tab_id) @@ -129,8 +129,8 @@ class ChromiumPage(ChromiumBase): def get_screenshot(self, path=None, as_bytes=None, full_page=False, left_top=None, right_bottom=None): """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 \n - :param path: 完整路径,后缀可选'jpg','jpeg','png','webp' - :param as_bytes: 是否已字节形式返回图片,可选'jpg','jpeg','png','webp',生效时path参数无效 + :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否已字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 :param left_top: 截取范围左上角坐标 :param right_bottom: 截取范围右下角角坐标 diff --git a/DrissionPage/chromium_page.pyi b/DrissionPage/chromium_page.pyi index 7a782c0..57e3008 100644 --- a/DrissionPage/chromium_page.pyi +++ b/DrissionPage/chromium_page.pyi @@ -10,14 +10,13 @@ from typing import Union, Tuple, List from .chromium_base import ChromiumBase from .chromium_tab import ChromiumTab from .config import DriverOptions -from .tab import Tab +from .chromium_driver import ChromiumDriver class ChromiumPage(ChromiumBase): - """用于管理浏览器的类""" def __init__(self, - addr_tab_opts: Union[str, Tab, DriverOptions] = ..., + addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ..., tab_id: str = ..., timeout: float = ...): self.options: DriverOptions = ... @@ -27,7 +26,7 @@ class ChromiumPage(ChromiumBase): self._alert: Alert = ... def _connect_browser(self, - addr_tab_opts: Union[str, Tab, DriverOptions] = ..., + addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ..., tab_id: str = ...) -> None: ... def _init_page(self, tab_id: str = ...) -> None: ... @@ -82,7 +81,6 @@ class ChromiumPage(ChromiumBase): class Alert(object): - """用于保存alert信息的类""" def __init__(self): self.activated: bool = ... @@ -94,10 +92,9 @@ class Alert(object): class WindowSizeSetter(object): - """用于设置窗口大小的类""" def __init__(self, page: ChromiumPage): - self.driver: Tab = ... + self.driver: ChromiumDriver = ... self.window_id: str = ... def maximized(self) -> None: ... diff --git a/DrissionPage/chromium_tab.pyi b/DrissionPage/chromium_tab.pyi index 23da931..7bab21a 100644 --- a/DrissionPage/chromium_tab.pyi +++ b/DrissionPage/chromium_tab.pyi @@ -8,7 +8,6 @@ from .chromium_page import ChromiumPage class ChromiumTab(ChromiumBase): - """实现浏览器标签页的类""" def __init__(self, page:ChromiumPage, tab_id: str = ...): self.page: ChromiumPage = ... diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 96db2bd..8b50562 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -131,8 +131,10 @@ class OptionsManager(object): class SessionOptions(object): + """requests的Session对象配置类""" + def __init__(self, read_file=True, ini_path=None): - """requests的Session对象配置类 \n + """ :param read_file: 是否从文件读取配置 :param ini_path: ini文件路径 """ diff --git a/DrissionPage/config.pyi b/DrissionPage/config.pyi index 5e117fd..4dded5d 100644 --- a/DrissionPage/config.pyi +++ b/DrissionPage/config.pyi @@ -12,7 +12,6 @@ from selenium.webdriver.chrome.options import Options class OptionsManager(object): - """管理配置文件内容的类""" def __init__(self, path: str = ...): self.ini_path: str = ... @@ -147,9 +146,6 @@ class SessionOptions(object): class DriverOptions(Options): - """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, - 增加了删除配置和保存到文件方法。 - """ def __init__(self, read_file: bool = ..., ini_path: str = ...): self.ini_path: str = ... diff --git a/DrissionPage/drission.pyi b/DrissionPage/drission.pyi index b85ee4e..d328b2b 100644 --- a/DrissionPage/drission.pyi +++ b/DrissionPage/drission.pyi @@ -16,7 +16,6 @@ from .config import SessionOptions, DriverOptions class Drission(object): - """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" def __init__(self, driver_or_options: Union[RemoteWebDriver, Options, DriverOptions, bool] = ..., diff --git a/DrissionPage/driver_element.pyi b/DrissionPage/driver_element.pyi index a4fc77d..1a07688 100644 --- a/DrissionPage/driver_element.pyi +++ b/DrissionPage/driver_element.pyi @@ -17,9 +17,8 @@ from .session_element import SessionElement class DriverElement(DrissionElement): - """driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能""" - def __init__(self, ele: WebElement, page:Union[DriverPage, MixPage]=...): + def __init__(self, ele: WebElement, page: Union[DriverPage, MixPage] = ...): self._inner_ele: WebElement = ... self._select: Select = ... self._scroll: Scroll = ... @@ -219,14 +218,13 @@ class DriverElement(DrissionElement): loc: Union[tuple, str] = ...) -> Union[List['DriverElement'], 'DriverElement']: ... -def make_driver_ele(page_or_ele: Union[DriverPage, MixPage,DriverElement, ShadowRootElement], +def make_driver_ele(page_or_ele: Union[DriverPage, MixPage, DriverElement, ShadowRootElement], loc: Union[str, Tuple[str, str]], single: bool = ..., timeout: float = ...) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... class ElementsByXpath(object): - """用js通过xpath获取元素、节点或属性,与WebDriverWait配合使用""" def __init__(self, page, xpath: str = ..., single: bool = ..., timeout: float = ...): self.single: bool = ... @@ -238,7 +236,6 @@ class ElementsByXpath(object): class Select(object): - """Select 类专门用于处理 d 模式下 select 标签""" def __init__(self, ele: DriverElement): self.select_ele: SeleniumSelect = ... @@ -283,7 +280,6 @@ class Select(object): class ElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" def __init__(self, page_or_ele, @@ -303,7 +299,6 @@ class ElementWaiter(object): class Scroll(object): - """用于滚动的对象""" def __init__(self, page_or_ele): self.driver: Union[DriverElement, DriverPage] = ... diff --git a/DrissionPage/driver_page.pyi b/DrissionPage/driver_page.pyi index f33acaf..1e91dc8 100644 --- a/DrissionPage/driver_page.pyi +++ b/DrissionPage/driver_page.pyi @@ -17,7 +17,6 @@ from .session_element import SessionElement class DriverPage(BasePage): - """DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页""" def __init__(self, driver: RemoteWebDriver, timeout: float = ...) -> None: self._driver: RemoteWebDriver = ... @@ -165,7 +164,6 @@ class DriverPage(BasePage): class ToFrame(object): - """用于处理焦点跳转到页面框架的类""" def __init__(self, page: DriverPage): self.page: DriverPage = ... diff --git a/DrissionPage/keys.py b/DrissionPage/keys.py index 9d94072..de53373 100644 --- a/DrissionPage/keys.py +++ b/DrissionPage/keys.py @@ -21,6 +21,7 @@ class Keys: SHIFT = '\ue008' LEFT_SHIFT = SHIFT CONTROL = '\ue009' + CTRL = '\ue009' LEFT_CONTROL = CONTROL ALT = '\ue00a' LEFT_ALT = ALT @@ -41,6 +42,7 @@ class Keys: ARROW_DOWN = DOWN INSERT = '\ue016' DELETE = '\ue017' + DEL = '\ue017' SEMICOLON = '\ue018' EQUALS = '\ue019' diff --git a/DrissionPage/mix_page.pyi b/DrissionPage/mix_page.pyi index 7ef00e1..3b4af17 100644 --- a/DrissionPage/mix_page.pyi +++ b/DrissionPage/mix_page.pyi @@ -22,12 +22,6 @@ from .session_page import SessionPage class MixPage(SessionPage, DriverPage, BasePage): - """MixPage整合了DriverPage和SessionPage,封装了对页面的操作, - 可在selenium(d模式)和requests(s模式)间无缝切换。 - 切换的时候会自动同步cookies。 - 获取信息功能为两种模式共有,操作页面元素功能只有d模式有。 - 调用某种模式独有的功能,会自动切换到该模式。 - """ def __init__(self, mode: str = ..., @@ -56,24 +50,24 @@ class MixPage(SessionPage, DriverPage, BasePage): def json(self) -> dict: ... def get(self, - url: str, - show_errmsg: bool | None = ..., - retry: int | None = ..., - interval: float | None = ..., - timeout: float | None = ..., - params: dict | None = ..., - data: Union[dict, str, None] = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> Union[bool, None]: ... + url: str, + show_errmsg: bool | None = ..., + retry: int | None = ..., + interval: float | None = ..., + timeout: float | None = ..., + params: dict | None = ..., + data: Union[dict, str, None] = ..., + json: Union[dict, str, None] = ..., + headers: dict | None = ..., + cookies: Any | None = ..., + files: Any | None = ..., + auth: Any | None = ..., + allow_redirects: bool = ..., + proxies: dict | None = ..., + hooks: Any | None = ..., + stream: Any | None = ..., + verify: Any | None = ..., + cert: Any | None = ...) -> Union[bool, None]: ... def ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], diff --git a/DrissionPage/session_element.pyi b/DrissionPage/session_element.pyi index 61bd7e5..6bfcbad 100644 --- a/DrissionPage/session_element.pyi +++ b/DrissionPage/session_element.pyi @@ -16,7 +16,6 @@ from .session_page import SessionPage class SessionElement(DrissionElement): - """session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能""" def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = ...): self._inner_ele: HtmlElement = ... diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 5e42514..900967f 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -95,8 +95,11 @@ class SessionPage(BasePage): @property def json(self): - """当返回内容是json格式时,返回对应的字典""" - return self.response.json() + """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" + try: + return self.response.json() + except Exception: + return None def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): """用get方式跳转到url \n diff --git a/DrissionPage/session_page.pyi b/DrissionPage/session_page.pyi index 9424c85..b72c2fc 100644 --- a/DrissionPage/session_page.pyi +++ b/DrissionPage/session_page.pyi @@ -47,7 +47,7 @@ class SessionPage(BasePage): def html(self) -> str: ... @property - def json(self) -> dict: ... + def json(self) -> Union[dict, None]: ... def get(self, url: str, diff --git a/DrissionPage/shadow_root_element.pyi b/DrissionPage/shadow_root_element.pyi index 82eb002..cd0cf53 100644 --- a/DrissionPage/shadow_root_element.pyi +++ b/DrissionPage/shadow_root_element.pyi @@ -15,7 +15,6 @@ from .session_element import SessionElement class ShadowRootElement(BaseElement): - """ShadowRootElement是用于处理ShadowRoot的类,使用方法和DriverElement基本一致""" def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): self._inner_ele: WebElement = ... diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 5845ab7..0ee85e8 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -13,17 +13,17 @@ from .chromium_base import ChromiumBase, Timeout from .chromium_page import ChromiumPage from .config import DriverOptions, SessionOptions, cookies_to_tuple from .session_page import SessionPage -from .tab import Tab +from .chromium_driver import ChromiumDriver class WebPage(SessionPage, ChromiumPage, BasePage): """整合浏览器和request的页面类""" def __init__(self, mode='d', timeout=10, tab_id=None, driver_or_options=None, session_or_options=None): - """初始化函数 \n + """初始化函数 \n :param mode: 'd' 或 's',即driver模式和session模式 :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_or_options: Tab对象或DriverOptions对象,只使用s模式时应传入False + :param driver_or_options: ChromiumDriver对象或DriverOptions对象,只使用s模式时应传入False :param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False """ self._mode = mode.lower() @@ -111,19 +111,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def driver(self): - """返回纯粹的Tab对象""" + """返回纯粹的ChromiumDriver对象""" return self._tab_obj @property def _wait_driver(self): - """返回用于控制浏览器的Tab对象,会先等待页面加载完毕""" + """返回用于控制浏览器的ChromiumDriver对象,会先等待页面加载完毕""" while self._is_loading: sleep(.1) return self._driver @property def _driver(self): - """返回纯粹的Tab对象,调用时切换到d模式,并连接浏览器""" + """返回纯粹的ChromiumDriver对象,调用时切换到d模式,并连接浏览器""" self.change_mode('d') if self._tab_obj is None: self._connect_browser(self._driver_options, self._setting_tab_id) @@ -377,20 +377,20 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 'd': return super(SessionPage, self)._ele(loc_or_ele, timeout=timeout, single=single, relative=relative) - def _set_driver_options(self, Tab_or_Options): + def _set_driver_options(self, driver_or_Options): """处理driver设置""" - if Tab_or_Options is None: + if driver_or_Options is None: self._driver_options = DriverOptions() - elif Tab_or_Options is False: + elif driver_or_Options is False: self._driver_options = DriverOptions(read_file=False) - elif isinstance(Tab_or_Options, Tab): - self._connect_browser(Tab_or_Options) + elif isinstance(driver_or_Options, ChromiumDriver): + self._connect_browser(driver_or_Options) self._has_driver = True - elif isinstance(Tab_or_Options, DriverOptions): - self._driver_options = Tab_or_Options + elif isinstance(driver_or_Options, DriverOptions): + self._driver_options = driver_or_Options else: raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。') diff --git a/DrissionPage/web_page.pyi b/DrissionPage/web_page.pyi index b2df91c..cdd72f8 100644 --- a/DrissionPage/web_page.pyi +++ b/DrissionPage/web_page.pyi @@ -15,17 +15,16 @@ from .chromium_page import ChromiumPage from .config import DriverOptions, SessionOptions from .session_element import SessionElement from .session_page import SessionPage -from .tab import Tab +from .chromium_driver import ChromiumDriver class WebPage(SessionPage, ChromiumPage, BasePage): - """整合浏览器和request的页面类""" def __init__(self, mode: str = ..., timeout: float = ..., tab_id: str = ..., - driver_or_options: Union[Tab, DriverOptions, bool] = ..., + driver_or_options: Union[ChromiumDriver, DriverOptions, bool] = ..., session_or_options: Union[Session, SessionOptions, bool] = ...) -> None: self._mode: str = ... self._has_driver: bool = ... @@ -61,16 +60,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def session(self) -> Session: ... @property - def driver(self) -> Tab: ... + def driver(self) -> ChromiumDriver: ... @property - def _wait_driver(self) -> Tab: ... + def _wait_driver(self) -> ChromiumDriver: ... @property - def _driver(self) -> Tab: ... + def _driver(self) -> ChromiumDriver: ... @_driver.setter - def _driver(self, tab:Tab): ... + def _driver(self, tab: ChromiumDriver): ... @property def _session_url(self) -> str: ... @@ -156,7 +155,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): -> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[ Union[ChromiumElement, str, ChromiumFrame]]]: ... - def _set_driver_options(self, Tab_or_Options:Union[Tab, DriverOptions]) -> None: ... + def _set_driver_options(self, driver_or_Options: Union[ChromiumDriver, DriverOptions]) -> None: ... def _set_session_options(self, Session_or_Options:Union[Session, SessionOptions]) -> None: ... diff --git a/docs/WebPage使用方法/3.2访问网页.md b/docs/WebPage使用方法/3.2访问网页.md index 93bf508..04bad1e 100644 --- a/docs/WebPage使用方法/3.2访问网页.md +++ b/docs/WebPage使用方法/3.2访问网页.md @@ -99,6 +99,8 @@ proxies = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'} page.get(url, headers=headers, cookies=cookies, proxies=proxies) ``` +?> s 模式访问时默认设置`redirect`参数为`False`,即访问重定向链接时须手动处理。 + ## 📍 `post()`方法 此方法是用 post 方式请求页面。用法与`get()`一致。调用时,`WebPage`对象会自动切换到 s 模式。 diff --git a/docs/WebPage使用方法/3.3查找元素.md b/docs/WebPage使用方法/3.3查找元素.md index e5028eb..962ed61 100644 --- a/docs/WebPage使用方法/3.3查找元素.md +++ b/docs/WebPage使用方法/3.3查找元素.md @@ -457,13 +457,17 @@ loc1 = (By.ID, 'ele_id') ele = page.ele(loc1) # 按 xpath 查找 -loc2 = (By.XPATH, '//div[@class="ele_class"]' +loc2 = (By.XPATH, '//div[@class="ele_class"]') ele = page.ele(loc2) ``` # ✔️ 等待 -d 模式下所有查找元素操作都自带等待,默认为跟随元素所在页面`timeout`属性(默认 10 秒),也可以在每次查找时单独设置,单独设置的等待时间不会改变页面原来设置。 +## 📍 等待元素加载 + +由于网络的不稳定性、js 运行时间的不确定性等因素,经常须要等待元素加载到 DOM 中才能使用。 + +d 模式下所有查找元素操作都自带等待,时间默认跟随元素所在页面`timeout`属性(默认 10 秒),也可以在每次查找时单独设置,单独设置的等待时间不会改变页面原来设置。 ```python # 页面初始化时设置查找元素超时时间为 15 秒 @@ -481,6 +485,41 @@ ele2 = ele1.ele('search text') ele2 = ele1.ele('some text', timeout=1) ``` +## 📍 等待元素状态改变 + +有时候我们须要等待元素到达某种状态,如显示、隐藏、删除。页面对象和元素对象都内置了`wait_ele()`方法,用于等待元素状态变化。 + +该方法可接收现成的`ChromiumElement`对象,或定位符,默认等待时间为页面对象的`timeout`值,也可以单独设定。 + +`wait_ele()`方法 + +**参数:** + +- `loc_or_ele`:要等待的元素,可以是元素或定位符 +- `timeout`:等待超时时间,默认使用页面超时时间 + +方法: + +| 方法 | 参数 | 功能 | +| --------- | --- | ------------ | +| display() | 无 | 等待元素从 DOM 显示 | +| hidden() | 无 | 等待元素从 DOM 隐藏 | +| delete() | 无 | 等待元素从 DOM 删除 | + +**返回:** 这些方法返回布尔值,代表是否等待成功。 + +```python +# 等待 id 为 div1 的元素显示,超时使用页面设置 +page.wait_ele('#div1').display() + +# 等待 id 为 div1 的元素被删除(使用 loc 元组),设置超时3秒 +ele.wait_ele('#div1', timeout=3).delete() + +# 等待已获取到的元素被隐藏 +ele2 = ele1.ele('#div1') +ele1.wait_ele(ele2).hidden() +``` + # ✔️ 相对定位 以下方法可以以某元素为基准,在 DOM 中按照条件获取其兄弟元素、祖先元素、文档前后元素。 @@ -683,16 +722,54 @@ divs = ele1.befores('tag:div') # ✔️ 查找 frame 里的元素 -与 selenium 不同,本库可以直接查找 frame 里面的元素,而无需切入切出,大大简化了程序逻辑,使用更便捷。 +## 📍 在页面下跨级查找 -未完待续。。。 +与 selenium 不同,本库可以直接查找 frame 里面的元素,而且无视层级,可以直接获取到多层 iframe 里的元素。无需切入切出,大大简化了程序逻辑,使用更便捷。 + +假设在页面中有个两级 iframe,其中有个元素`
`,可以这样获取: + +```python +page = WebPage() +ele = page('#abc') +``` + +获取前后无须切入切出,也不影响获取页面上其它元素。如果用 selenium,要这样: + +```python +driver = webdriver.Chrome() +driver.switch_to.frame(0) +driver.switch_to.frame(0) +ele = driver.find_element(By.ID, 'abc') +driver.switch_to.default_content() +``` + +显然比较繁琐,而且切入到 iframe 后无法对 iframe 外的元素进行操作。 + +!>**注意:**
跨级查找只是页面对象支持,元素对象不能直接查找内部 iframe 里的元素。 + +## 📍 在 iframe 元素下查找 + +本库把 iframe 看作一个特殊元素/页面对象看待,逻辑更清晰,还可以实现同时操作多个 iframe,而无须来回切换。查找元素外的更多功能详见介绍 iframe 的章节。 + +对于跨域名的 iframe,我们无法通过页面直接查找里面的元素,可以先获取到 iframe 元素,再在其下查找。当然,非跨域 iframe 也可以这样操作。 + +假设一个 iframe 的 id 为 `'iframe1'`,要在其中查找一个 id 为`'abc'`的元素: + +```python +page = WebPage() +iframe = page('#iframe1') +ele = iframe('#abc') +``` + +这个 iframe 元素是一个页面对象,因此可以继续在其下进行跨 iframe 查找(相对这个 iframe 不跨域的)。 # ✔️ `ShadowRootElement`相关查找 本库把 shadow-root 也作为元素对象看待,是为`ChromiumShadowRootElement`对象。该对象可与普通元素一样查找下级元素和 DOM 内相对定位。 对`ChromiumShadowRootElement`对象进行相对定位时,把它看作其父对象内部的第一个对象,其余定位逻辑与普通对象一致。 -!> **注意:**
如果`ChromiumShadowRootElement`元素的下级元素中有其它`ChromiumShadowRootElement`元素,那这些下级`ChromiumShadowRootElement`元素内部是无法直接通过定位语句查找到的,只能先定位到其父元素,再用`shadow-root`属性获取。 +!> **注意:**
如果`ChromiumShadowRootElement`元素的下级元素中有其它`ChromiumShadowRootElement`元素,那这些下级`ChromiumShadowRootElement` +元素内部是无法直接通过定位语句查找到的,只能先定位到其父元素,再用`shadow-root`属性获取。 ```python # 获取一个 shadow-root 元素 diff --git a/docs/WebPage使用方法/3.4获取元素信息.md b/docs/WebPage使用方法/3.4获取元素信息.md new file mode 100644 index 0000000..31e0ee7 --- /dev/null +++ b/docs/WebPage使用方法/3.4获取元素信息.md @@ -0,0 +1,415 @@ +获取到须要的页面元素后,可以使用元素对象获取元素的信息。 + +`WebPage`生成的元素对象有四种: + +- `ChromiumElement`:浏览器一般元素 + +- `ChromiumShadowRootElement`:shadow-root 元素 + +- `ChromiumFrame`:s 模式产生的元素 + +- `SessionElement`:s 模式的元素,或前 3 者转换而成的静态元素 + +前三者是 d 模式下通过浏览器页面元素生成,后者是 s 模式由静态文本生成。 + +`ChromiumElement`对象拥有`SessionElement`对象所有属性。 + +`ChromiumFrame`作为既是元素又是页面的存在,后面章节单独介绍。 + +# ✔️ 简单示例 + +以下示例可直接运行查看结果: + +```python +from DrissionPage import WebPage + +page = WebPage('s') +page.get('https://gitee.com/explore') + +# 获取推荐目录下所有 a 元素 +li_eles = page('tag:ul@@text():全部推荐项目').eles('t:a') + +for i in li_eles: # 遍历列表 + print(i.tag, i.text, i.attr('href')) # 获取并打印标签名、文本、href 属性 + +"""输出: +a 全部推荐项目 https://gitee.com/explore/all +a 前沿技术 https://gitee.com/explore/new-tech +a 智能硬件 https://gitee.com/explore/hardware +a IOT/物联网/边缘计算 https://gitee.com/explore/iot +a 车载应用 https://gitee.com/explore/vehicle +以下省略…… +""" +``` + +# ✔️ `SessionElement`属性 + +假设`ele`为以下`div`元素的对象: + +```html +
Hello World! +

行元素

+ +
+``` + +## 📍 `html` + +此属性返回元素的`outerHTML`文本。 + +```python +html = ele.html +"""返回: +
Hello World! +

行元素

+ +
+""" +``` + +## 📍 `inner_html` + +此属性返回元素的`innerHTML`文本。 + +```python +inner_html = ele.inner_html +"""返回: +Hello World! +

行元素

+ +""" +``` + +## 📍 `tag` + +此属性返回元素的标签名。 + +```python +tag = ele.tag +# 返回:div +``` + +## 📍 `text` + +此属性返回元素内所有文本组合成的字符串。 +该字符串已格式化,即已转码,已去除多余换行符,符合人读取习惯,便于直接使用。无须重复写处理代码。 + +```python +text = ele.text +"""返回: +Hello World! +行元素 +""" +``` + +## 📍 `raw_text` + +此属性返回元素内原始文本。 + +```python +text = ele.raw_text +"""返回(注意保留了元素间的空格和换行): +Hello World! + 行元素 + + +""" +``` + +## 📍 `texts()` + +此方法返回元素内所有**直接**子节点的文本,包括元素和文本节点。 它有一个参数`text_node_only`,为`True`时则只获取只返回不被包裹的文本节点。这个方法适用于获取文本节点和元素节点混排的情况。 + +参数: + +- `text_node_only`:是否只返回文本节点 + +返回:文本列表 + +```python +texts = ele.texts() +print(e.texts()) +# 输出:['Hello World!', '行元素'] + +print(e.texts(text_node_only=True)) +# 输出:['Hello World!'] +``` + +## 📍 `comments` + +此属性以列表形式返回元素内的注释。 + +```python +comments = ele.comments +# 返回:[] +``` + +## 📍 `attrs` + +此属性以字典形式返回元素所有属性及值。 + +```python +attrs = ele.attrs +# 返回:{'id': 'div1', 'class': 'divs'} +``` + +## 📍 `attr()` + +此方法返回元素某个`attribute`属性值。它接收一个字符串参数`attr`,返回该属性值文本,无该属性时返回`None`。 +此属性返回的`src`、`href`属性为已补充完整的路径。`text`属性为已格式化文本。 + +参数: + +- `attr`:属性名称 + +返回:属性值文本 + +```python +ele_id = ele.attr('id') +# 返回:div1 +``` + +## 📍 `link` + +此方法返回元素的 href 属性或 src 属性,没有这两个属性则返回`None`。 + +```html +百度 +``` + +假设`a_ele`为以上元素的对象: + +```python +link = a_ele.link +# 返回:http://www.baidu.com +``` + +## 📍 `page` + +此属性返回元素所在的页面对象。由 html 文本直接生成的`SessionElement`的`page`属性为`None`。 + +```python +page = ele.page +``` + +## 📍 `xpath` + +此属性返回当前元素在页面中 xpath 的绝对路径。 + +```python +xpath = ele.xpath +# 返回:/html/body/div +``` + +## 📍 `css_path` + +此属性返回当前元素在页面中 css selector 的绝对路径。 + +```python +css = ele.css_path +# 返回::nth-child(1)>:nth-child(1)>:nth-child(1) +``` + +# ✔️ `ChromiumElement`属性 + +`ChromiumElement`对象拥有`SessionElement`对象上述所有属性,并因运行在浏览器中拥有更丰富的属性。 + +## 📍 `size` + +此属性以元组形式返回元素的大小。 + +```python +size = ele.size +# 返回:(50, 50) +``` + +## 📍 `location` + +此属性以元组形式返回元素**左上角**在**整个页面**中的坐标。 + +```python +loc = ele.location +# 返回:(50, 50) +``` + +## 📍 `client_location` + +此属性以元组形式返回元素**左上角**在**当前视口**中的坐标。 + +```python +loc = ele.client_location +# 返回:(50, 50) +``` + +## 📍 `midpoint` + +此属性以元组形式返回元素**中点**在**整个页面**中的坐标。 + +```python +loc = ele.midpoint +# 返回:(55, 55) +``` + +## 📍 `client_midpoint` + +此属性以元组形式返回元素**中点**在**视口**中的坐标。 + +```python +loc = ele.client_midpoint +# 返回:(55, 55) +``` + +## 📍 `pseudo_before` + +此属性以文本形式返回当前元素的`::before`伪元素内容。 + +```python +before_txt = ele.pseudo_before +``` + +## 📍 `pseudo_after` + +此属性以文本形式返回当前元素的`::after`伪元素内容。 + +```python +after_txt = ele.pseudo_after +``` + +## 📍 `style()` + +该方法返回元素 css 样式属性值,可获取伪元素的属性。它有两个参数,`style`参数输入样式属性名称,`pseudo_ele`参数输入伪元素名称,省略则获取普通元素的 css 样式属性。 + +参数: + +- `style`:样式名称 +- `pseudo_ele`:伪元素名称(如有) + +返回:样式属性值 + +```python +# 获取 css 属性的 color 值 +prop = ele.style('color') + +# 获取 after 伪元素的内容 +prop = ele.style('content', 'after') +``` + +## 📍 `prop()` + +此方法返回`property`属性值。它接收一个字符串参数,返回该参数的属性值。 + +参数: + +- `prop`:属性名称 + +返回:属性值 + +## 📍`is_in_viewport` + +此属性以布尔值方式返回元素是否在视口中,以元素可以接受点击的点为判断。 + +## 📍`is_alive` + +此属性以布尔值形式返回当前元素是否仍可用。用于判断 d 模式下是否因页面刷新而导致元素失效。 + +## 📍 `is_selected` + +此属性以布尔值返回元素是否选中。 + +## 📍 `is_enabled` + +此属性以布尔值返回元素是否可用。 + +## 📍 `is_displayed` + +此属性以布尔值返回元素是否可见。 + +# ✔️ 保存和截图 + +保存功能是本库一个特色功能,可以直接读取浏览器缓存,无须依赖 gui 或重新下载就可以保存页面资源。 + +作为对比,selenium 无法自身实现图片另存,往往须要通过使用 gui 进行辅助,不仅效率和可靠性低,还占用键鼠资源。 + +## 📍 `get_src()` + +此方法用于返回元素`src`属性所使用的资源。base64 的会转为`bytes`返回,其它的以`str`返回。无资源的返回`None`。 + +例如,可获取页面上图片字节数据,用于识别内容,或保存到文件。`