mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
SessionElement删除is_valid属性;页面对象返回非json时调用json属性返回None;修复元素滚动问题;调整一些类名;修改文档
This commit is contained in:
parent
bd4b91e915
commit
b00ebcb881
@ -6,17 +6,16 @@
|
||||
from typing import Union, Tuple
|
||||
|
||||
from .chromium_base import ChromiumBase
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_page import ChromiumPage
|
||||
|
||||
|
||||
class ActionChains:
|
||||
"""用于实现动作链的类"""
|
||||
|
||||
def __init__(self, page:ChromiumBase):
|
||||
self.page: ChromiumPage = ...
|
||||
self._dr: Tab = ...
|
||||
self._dr: ChromiumDriver = ...
|
||||
self.curr_x: int = ...
|
||||
self.curr_y: int = ...
|
||||
self.modifier: int = ...
|
||||
|
@ -49,10 +49,6 @@ class BaseElement(BaseParser):
|
||||
def tag(self):
|
||||
return
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def _ele(self, loc_or_str, timeout=None, single=True, relative=False):
|
||||
pass
|
||||
|
@ -8,7 +8,6 @@ from typing import Union, Tuple, List
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
"""所有页面、元素类的基类"""
|
||||
|
||||
def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ...
|
||||
|
||||
@ -29,7 +28,6 @@ class BaseParser(object):
|
||||
|
||||
|
||||
class BaseElement(BaseParser):
|
||||
"""各元素类的基类"""
|
||||
|
||||
def __init__(self, page: BasePage):
|
||||
self.page: BasePage = ...
|
||||
@ -38,9 +36,6 @@ class BaseElement(BaseParser):
|
||||
@property
|
||||
def tag(self)->str: ...
|
||||
|
||||
@property
|
||||
def is_valid(self)->bool: ...
|
||||
|
||||
@abstractmethod
|
||||
def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout:float=..., single:bool=..., relative:bool=...): ...
|
||||
|
||||
@ -56,7 +51,6 @@ class BaseElement(BaseParser):
|
||||
|
||||
|
||||
class DrissionElement(BaseElement):
|
||||
"""DriverElement 和 SessionElement的基类,但不是ShadowRootElement的基类"""
|
||||
|
||||
def __init__(self,
|
||||
page: BasePage = ...):
|
||||
@ -138,7 +132,6 @@ class DrissionElement(BaseElement):
|
||||
|
||||
|
||||
class BasePage(BaseParser):
|
||||
"""页面类的基类"""
|
||||
|
||||
def __init__(self, timeout: float = ...):
|
||||
self._url_available: bool = ...
|
||||
|
@ -9,11 +9,11 @@ from time import perf_counter, sleep
|
||||
from requests import Session
|
||||
|
||||
from .base import BasePage
|
||||
from .chromium_element import ChromiumElementWaiter, ChromeScroll, ChromiumElement, run_script, make_chromium_ele
|
||||
from .chromium_element import ChromiumElementWaiter, ChromiumScroll, ChromiumElement, run_script, make_chromium_ele
|
||||
from .common import get_loc
|
||||
from .config import cookies_to_tuple
|
||||
from .session_element import make_session_ele
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class ChromiumBase(BasePage):
|
||||
@ -33,9 +33,9 @@ class ChromiumBase(BasePage):
|
||||
self.timeouts = Timeout(self)
|
||||
self._connect_browser(address, tab_id)
|
||||
|
||||
def _connect_browser(self, addr_tab_opts=None, tab_id=None):
|
||||
def _connect_browser(self, addr_driver_opts=None, tab_id=None):
|
||||
"""连接浏览器,在第一次时运行 \n
|
||||
:param addr_tab_opts: 浏览器地址、Tab对象或DriverOptions对象
|
||||
:param addr_driver_opts: 浏览器地址、ChromiumDriver对象或DriverOptions对象
|
||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||
:return: None
|
||||
"""
|
||||
@ -45,7 +45,7 @@ class ChromiumBase(BasePage):
|
||||
self._first_run = True
|
||||
self._is_reading = False # 用于避免不同线程重复读取document
|
||||
|
||||
self.address = addr_tab_opts
|
||||
self.address = addr_driver_opts
|
||||
if not tab_id:
|
||||
json = self._control_session.get(f'http://{self.address}/json').json()
|
||||
tab_id = [i['id'] for i in json if i['type'] == 'page'][0]
|
||||
@ -61,8 +61,8 @@ class ChromiumBase(BasePage):
|
||||
"""
|
||||
self._is_loading = True
|
||||
if tab_id:
|
||||
self._tab_obj = Tab(id=tab_id, type='page',
|
||||
webSocketDebuggerUrl=f'ws://{self.address}/devtools/page/{tab_id}')
|
||||
self._tab_obj = ChromiumDriver(id=tab_id, type='page',
|
||||
webSocketDebuggerUrl=f'ws://{self.address}/devtools/page/{tab_id}')
|
||||
|
||||
self._tab_obj.start()
|
||||
self._tab_obj.DOM.enable()
|
||||
@ -195,16 +195,17 @@ class ChromiumBase(BasePage):
|
||||
|
||||
@property
|
||||
def driver(self):
|
||||
"""返回用于控制浏览器的Tab对象"""
|
||||
"""返回用于控制浏览器的ChromiumDriver对象"""
|
||||
return self._tab_obj
|
||||
|
||||
@property
|
||||
def _driver(self):
|
||||
"""返回用于控制浏览器的ChromiumDriver对象"""
|
||||
return self._tab_obj
|
||||
|
||||
@property
|
||||
def _wait_driver(self):
|
||||
"""返回用于控制浏览器的Tab对象,会先等待页面加载完毕"""
|
||||
"""返回用于控制浏览器的ChromiumDriver对象,会先等待页面加载完毕"""
|
||||
while self._is_loading:
|
||||
sleep(.1)
|
||||
return self._tab_obj
|
||||
@ -226,8 +227,11 @@ class ChromiumBase(BasePage):
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
"""当返回内容是json格式时,返回对应的字典"""
|
||||
return loads(self('t:pre').text)
|
||||
"""当返回内容是json格式时,返回对应的字典,非json格式时返回None"""
|
||||
try:
|
||||
return loads(self('t:pre', timeout=.5).text)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@property
|
||||
def tab_id(self):
|
||||
@ -260,7 +264,7 @@ class ChromiumBase(BasePage):
|
||||
def scroll(self):
|
||||
"""返回用于滚动滚动条的对象"""
|
||||
if not hasattr(self, '_scroll'):
|
||||
self._scroll = ChromeScroll(self)
|
||||
self._scroll = ChromiumScroll(self)
|
||||
return self._scroll
|
||||
|
||||
@property
|
||||
|
@ -10,15 +10,14 @@ from requests import Session
|
||||
from requests.cookies import RequestsCookieJar
|
||||
|
||||
from .base import BasePage
|
||||
from .chromium_element import ChromiumElement, ChromiumElementWaiter, ChromeScroll
|
||||
from .chromium_element import ChromiumElement, ChromiumElementWaiter, ChromiumScroll
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .config import DriverOptions
|
||||
from .session_element import SessionElement
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class ChromiumBase(BasePage):
|
||||
"""标签页、frame、页面基类"""
|
||||
|
||||
def __init__(self,
|
||||
address: str,
|
||||
@ -26,20 +25,20 @@ class ChromiumBase(BasePage):
|
||||
timeout: float = ...):
|
||||
self._control_session: Session = ...
|
||||
self.address: str = ...
|
||||
self._tab_obj: Tab = ...
|
||||
self._tab_obj: ChromiumDriver = ...
|
||||
self._is_reading: bool = ...
|
||||
self.timeouts: Timeout = ...
|
||||
self._first_run: bool = ...
|
||||
self._is_loading: bool = ...
|
||||
self._page_load_strategy: str = ...
|
||||
self._scroll: ChromeScroll = ...
|
||||
self._scroll: ChromiumScroll = ...
|
||||
self._url: str = ...
|
||||
self._root_id: str = ...
|
||||
self._debug: bool = ...
|
||||
self._debug_recorder: Recorder = ...
|
||||
|
||||
def _connect_browser(self,
|
||||
addr_tab_opts: Union[str, Tab, DriverOptions] = ...,
|
||||
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ...,
|
||||
tab_id: str = ...) -> None: ...
|
||||
|
||||
def _init_page(self, tab_id: str = ...) -> None: ...
|
||||
@ -67,13 +66,13 @@ class ChromiumBase(BasePage):
|
||||
def title(self) -> str: ...
|
||||
|
||||
@property
|
||||
def driver(self) -> Tab: ...
|
||||
def driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@property
|
||||
def _driver(self) -> Tab: ...
|
||||
def _driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@property
|
||||
def _wait_driver(self) -> Tab: ...
|
||||
def _wait_driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@property
|
||||
def is_loading(self) -> bool: ...
|
||||
@ -85,7 +84,7 @@ class ChromiumBase(BasePage):
|
||||
def html(self) -> str: ...
|
||||
|
||||
@property
|
||||
def json(self) -> dict: ...
|
||||
def json(self) -> Union[dict, None]: ...
|
||||
|
||||
@property
|
||||
def tab_id(self) -> str: ...
|
||||
@ -103,7 +102,7 @@ class ChromiumBase(BasePage):
|
||||
def page_load_strategy(self) -> str: ...
|
||||
|
||||
@property
|
||||
def scroll(self) -> ChromeScroll: ...
|
||||
def scroll(self) -> ChromiumScroll: ...
|
||||
|
||||
@property
|
||||
def set_page_load_strategy(self) -> PageLoadStrategy: ...
|
||||
@ -188,7 +187,6 @@ class ChromiumBase(BasePage):
|
||||
|
||||
|
||||
class Timeout(object):
|
||||
"""用于保存d模式timeout信息的类"""
|
||||
|
||||
def __init__(self, page: ChromiumBase):
|
||||
self.page: ChromiumBase = ...
|
||||
|
@ -1,14 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from functools import partial
|
||||
from json import dumps, loads
|
||||
from logging import getLogger
|
||||
from os import getenv
|
||||
from threading import Thread, Event
|
||||
from warnings import warn
|
||||
|
||||
from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \
|
||||
create_connection
|
||||
@ -39,7 +34,7 @@ class GenericAttr(object):
|
||||
self.tab.set_listener("%s.%s" % (self.name, key), value)
|
||||
|
||||
|
||||
class Tab(object):
|
||||
class ChromiumDriver(object):
|
||||
status_initial = 'initial'
|
||||
status_started = 'started'
|
||||
status_stopped = 'stopped'
|
||||
@ -101,7 +96,7 @@ class Tab(object):
|
||||
return self.method_results[message['id']].get(timeout=q_timeout)
|
||||
except queue.Empty:
|
||||
if isinstance(timeout, (int, float)) and timeout <= 0:
|
||||
raise TimeoutException("Calling %s timeout" % message['method'])
|
||||
raise TimeoutError(f"调用{message['method']}超时。")
|
||||
|
||||
continue
|
||||
|
||||
@ -132,8 +127,8 @@ class Tab(object):
|
||||
elif "id" in message:
|
||||
if message["id"] in self.method_results:
|
||||
self.method_results[message['id']].put(message)
|
||||
else: # pragma: no cover
|
||||
warn("unknown message: %s" % message)
|
||||
# else: # pragma: no cover
|
||||
# warn("unknown message: %s" % message)
|
||||
|
||||
def _handle_event_loop(self):
|
||||
while not self._stopped.is_set():
|
||||
@ -157,45 +152,27 @@ class Tab(object):
|
||||
|
||||
def call_method(self, _method, *args, **kwargs):
|
||||
if not self._started:
|
||||
raise RuntimeException("Cannot call method before it is started")
|
||||
raise RuntimeError("不能在启动前调用方法。")
|
||||
|
||||
if args:
|
||||
raise CallMethodException("the params should be key=value format")
|
||||
raise CallMethodException("参数必须是key=value形式。")
|
||||
|
||||
if self._stopped.is_set():
|
||||
raise RuntimeException("Tab has been stopped")
|
||||
raise RuntimeError("Driver已经停止。")
|
||||
|
||||
timeout = kwargs.pop("_timeout", None)
|
||||
result = self._send({"method": _method, "params": kwargs}, timeout=timeout)
|
||||
if 'result' not in result and 'error' in result:
|
||||
warn("%s error: %s" % (_method, result['error']['message']))
|
||||
raise CallMethodException("calling method: %s error: %s" % (_method, result['error']['message']))
|
||||
raise CallMethodException(f"调用方法:{_method} 错误:{result['error']['message']}")
|
||||
|
||||
return result['result']
|
||||
|
||||
def set_listener(self, event, callback):
|
||||
if not callback:
|
||||
return self.event_handlers.pop(event, None)
|
||||
|
||||
if not callable(callback):
|
||||
raise RuntimeException("callback should be callable")
|
||||
|
||||
self.event_handlers[event] = callback
|
||||
return True
|
||||
|
||||
def get_listener(self, event):
|
||||
return self.event_handlers.get(event, None)
|
||||
|
||||
def del_all_listeners(self):
|
||||
self.event_handlers = {}
|
||||
return True
|
||||
|
||||
def start(self):
|
||||
if self._started:
|
||||
return False
|
||||
|
||||
if not self._websocket_url:
|
||||
raise RuntimeException("Already has another client connect to this tab")
|
||||
raise RuntimeError("已存在另一个连接。")
|
||||
|
||||
self._started = True
|
||||
self.status = self.status_started
|
||||
@ -210,7 +187,7 @@ class Tab(object):
|
||||
return False
|
||||
|
||||
if not self._started:
|
||||
raise RuntimeException("Tab is not running")
|
||||
raise RuntimeError("Driver正在运行。")
|
||||
|
||||
self.status = self.status_stopped
|
||||
self._stopped.set()
|
||||
@ -218,9 +195,26 @@ class Tab(object):
|
||||
self._ws.close()
|
||||
return True
|
||||
|
||||
def set_listener(self, event, callback):
|
||||
if not callback:
|
||||
return self.event_handlers.pop(event, None)
|
||||
|
||||
if not callable(callback):
|
||||
raise RuntimeError("方法不能调用。")
|
||||
|
||||
self.event_handlers[event] = callback
|
||||
return True
|
||||
|
||||
def get_listener(self, event):
|
||||
return self.event_handlers.get(event, None)
|
||||
|
||||
def del_all_listeners(self):
|
||||
self.event_handlers = {}
|
||||
return True
|
||||
|
||||
def wait(self, timeout=None):
|
||||
if not self._started:
|
||||
raise RuntimeException("Tab is not running")
|
||||
raise RuntimeError("Driver仍未运行。")
|
||||
|
||||
if timeout:
|
||||
return self._stopped.wait(timeout)
|
||||
@ -230,30 +224,10 @@ class Tab(object):
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
return "<Tab [%s]>" % self.id
|
||||
return f"<ChromiumDriver {self.id}>"
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
|
||||
class PyChromeException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UserAbortException(PyChromeException):
|
||||
pass
|
||||
|
||||
|
||||
class TabConnectionException(PyChromeException):
|
||||
pass
|
||||
|
||||
|
||||
class CallMethodException(PyChromeException):
|
||||
pass
|
||||
|
||||
|
||||
class TimeoutException(PyChromeException):
|
||||
pass
|
||||
|
||||
|
||||
class RuntimeException(PyChromeException):
|
||||
class CallMethodException(Exception):
|
||||
pass
|
@ -116,7 +116,7 @@ class ChromiumElement(DrissionElement):
|
||||
|
||||
@property
|
||||
def doc_id(self):
|
||||
"""返回document的object id"""
|
||||
"""返回所在document的object id"""
|
||||
return self._doc_id
|
||||
|
||||
@property
|
||||
@ -192,7 +192,7 @@ class ChromiumElement(DrissionElement):
|
||||
def scroll(self):
|
||||
"""用于滚动滚动条的对象"""
|
||||
if self._scroll is None:
|
||||
self._scroll = ChromeScroll(self)
|
||||
self._scroll = ChromiumScroll(self)
|
||||
return self._scroll
|
||||
|
||||
def parent(self, level_or_loc=1):
|
||||
@ -277,7 +277,7 @@ class ChromiumElement(DrissionElement):
|
||||
if self.tag != 'select':
|
||||
self._select = False
|
||||
else:
|
||||
self._select = ChromeSelect(self)
|
||||
self._select = ChromiumSelect(self)
|
||||
|
||||
return self._select
|
||||
|
||||
@ -392,7 +392,7 @@ class ChromiumElement(DrissionElement):
|
||||
"""运行javascript代码 \n
|
||||
:param script: js文本
|
||||
:param as_expr: 是否作为表达式运行,为True时args无效
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[2]...
|
||||
:param args: 参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
:return: 运行的结果
|
||||
"""
|
||||
return run_script(self, script, as_expr, self.page.timeouts.script, args, True)
|
||||
@ -492,7 +492,7 @@ class ChromiumElement(DrissionElement):
|
||||
|
||||
def save(self, path=None, rename=None):
|
||||
"""保存图片或其它有src属性的元素的资源 \n
|
||||
:param path: 文件保存路径,为None时保存到当前文件夹,为False时不保存
|
||||
:param path: 文件保存路径,为None时保存到当前文件夹
|
||||
:param rename: 文件名称,为None时从资源url获取
|
||||
:return: None
|
||||
"""
|
||||
@ -1399,29 +1399,26 @@ def _offset_scroll(ele, offset_x, offset_y):
|
||||
return cx, cy
|
||||
|
||||
|
||||
class ChromeScroll(object):
|
||||
class ChromiumScroll(object):
|
||||
"""用于滚动的对象"""
|
||||
|
||||
def __init__(self, page_or_ele):
|
||||
"""
|
||||
:param page_or_ele: ChromePage或ChromiumElement
|
||||
"""
|
||||
self.page_or_ele = page_or_ele
|
||||
if isinstance(page_or_ele, ChromiumElement):
|
||||
self.t1 = self.t2 = 'this'
|
||||
self.obj_id = page_or_ele.obj_id
|
||||
self.page = page_or_ele.page
|
||||
else:
|
||||
self.t1 = 'window'
|
||||
self.t2 = 'document.documentElement'
|
||||
self.obj_id = None
|
||||
self.page = page_or_ele
|
||||
|
||||
def _run_script(self, js):
|
||||
js = js.format(self.t1, self.t2, self.t2)
|
||||
if self.obj_id:
|
||||
self.page.run_script(js)
|
||||
if self.t1 == 'this': # 在元素上滚动
|
||||
self.page_or_ele.run_script(js)
|
||||
else:
|
||||
self.page.driver.Runtime.evaluate(expression=js)
|
||||
self.page_or_ele.run_script(js, as_expr=True)
|
||||
|
||||
def to_top(self):
|
||||
"""滚动到顶端,水平位置不变"""
|
||||
@ -1482,8 +1479,8 @@ class ChromeScroll(object):
|
||||
self._run_script(f'{{}}.scrollBy({pixel},0);')
|
||||
|
||||
|
||||
class ChromeSelect(object):
|
||||
"""ChromeSelect 类专门用于处理 d 模式下 select 标签"""
|
||||
class ChromiumSelect(object):
|
||||
"""ChromiumSelect 类专门用于处理 d 模式下 select 标签"""
|
||||
|
||||
def __init__(self, ele):
|
||||
"""初始化 \n
|
||||
@ -1542,7 +1539,7 @@ class ChromeSelect(object):
|
||||
"""此方法用于根据text值选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param text: text属性值,传入list或tuple可选择多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否选择成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(text, 'text', False, timeout)
|
||||
@ -1551,25 +1548,25 @@ class ChromeSelect(object):
|
||||
"""此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param value: value属性值,传入list或tuple可选择多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否选择成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(value, 'value', False, timeout)
|
||||
|
||||
def by_index(self, index, timeout=None):
|
||||
"""此方法用于根据index值选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param index: index属性值,传入list或tuple可选择多项
|
||||
:param index: 序号,0开始,传入list或tuple可选择多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否选择成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(index, 'index', False, timeout)
|
||||
|
||||
def cancel_by_text(self, text, timeout=None):
|
||||
"""此方法用于根据text值取消选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param text: text属性值,传入list或tuple可取消多项
|
||||
:param text: 文本,传入list或tuple可取消多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否取消成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(text, 'text', True, timeout)
|
||||
@ -1578,16 +1575,16 @@ class ChromeSelect(object):
|
||||
"""此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param value: value属性值,传入list或tuple可取消多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否取消成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(value, 'value', True, timeout)
|
||||
|
||||
def cancel_by_index(self, index, timeout=None):
|
||||
"""此方法用于根据index值取消选择项。当元素是多选列表时,可以接收list或tuple \n
|
||||
:param index: value属性值,传入list或tuple可取消多项
|
||||
:param index: 序号,0开始,传入list或tuple可取消多项
|
||||
:param timeout: 超时时间,不输入默认实用页面超时时间
|
||||
:return: None
|
||||
:return: 是否取消成功
|
||||
"""
|
||||
timeout = timeout if timeout is not None else self._ele.page.timeout
|
||||
return self._select(index, 'index', True, timeout)
|
||||
|
@ -15,7 +15,6 @@ from .web_page import WebPage
|
||||
|
||||
|
||||
class ChromiumElement(DrissionElement):
|
||||
"""ChromePage页面对象中的元素对象"""
|
||||
|
||||
def __init__(self,
|
||||
page: ChromiumBase,
|
||||
@ -26,8 +25,8 @@ class ChromiumElement(DrissionElement):
|
||||
self._obj_id: str = ...
|
||||
self._backend_id: str = ...
|
||||
self._doc_id: str = ...
|
||||
self._scroll: ChromeScroll = ...
|
||||
self._select: ChromeSelect = ...
|
||||
self._scroll: ChromiumScroll = ...
|
||||
self._select: ChromiumSelect = ...
|
||||
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
@ -100,7 +99,7 @@ class ChromiumElement(DrissionElement):
|
||||
def pseudo_after(self) -> str: ...
|
||||
|
||||
@property
|
||||
def scroll(self) -> ChromeScroll: ...
|
||||
def scroll(self) -> ChromiumScroll: ...
|
||||
|
||||
def parent(self, level_or_loc: Union[tuple, str, int] = ...) -> Union[ChromiumElement, None]: ...
|
||||
|
||||
@ -138,10 +137,10 @@ class ChromiumElement(DrissionElement):
|
||||
|
||||
def wait_ele(self,
|
||||
loc_or_ele: Union[str, tuple, ChromiumElement],
|
||||
timeout: float = ...) -> 'ChromiumElementWaiter': ...
|
||||
timeout: float = ...) -> ChromiumElementWaiter: ...
|
||||
|
||||
@property
|
||||
def select(self) -> 'ChromeSelect': ...
|
||||
def select(self) -> ChromiumSelect: ...
|
||||
|
||||
@property
|
||||
def is_selected(self) -> bool: ...
|
||||
@ -199,7 +198,7 @@ class ChromiumElement(DrissionElement):
|
||||
|
||||
def get_screenshot(self, path: [str, Path] = ..., as_bytes: [bool, str] = ...) -> Union[str, bytes]: ...
|
||||
|
||||
def input(self, vals: Union[str, tuple, list], clear: bool = ...) -> None: ...
|
||||
def input(self, vals: Any, clear: bool = ...) -> None: ...
|
||||
|
||||
def _set_file_input(self, files: Union[str, list, tuple]) -> None: ...
|
||||
|
||||
@ -238,7 +237,6 @@ class ChromiumElement(DrissionElement):
|
||||
|
||||
|
||||
class ChromiumShadowRootElement(BaseElement):
|
||||
"""ChromiumShadowRootElement是用于处理ShadowRoot的类,使用方法和ChromiumElement基本一致"""
|
||||
|
||||
def __init__(self,
|
||||
parent_ele: ChromiumElement,
|
||||
@ -376,14 +374,12 @@ def _send_key(ele: ChromiumElement, modifier: int, key: str) -> None: ...
|
||||
def _offset_scroll(ele: ChromiumElement, offset_x: int, offset_y: int) -> tuple: ...
|
||||
|
||||
|
||||
class ChromeScroll(object):
|
||||
"""用于滚动的对象"""
|
||||
class ChromiumScroll(object):
|
||||
|
||||
def __init__(self, page_or_ele: Union[ChromiumBase, ChromiumElement]):
|
||||
self.t1: str = ...
|
||||
self.t2: str = ...
|
||||
self.obj_id: str = ...
|
||||
self.page: ChromiumPage = ...
|
||||
self.page_or_ele: Union[ChromiumPage, ChromiumElement] = ...
|
||||
|
||||
def _run_script(self, js: str): ...
|
||||
|
||||
@ -408,8 +404,7 @@ class ChromeScroll(object):
|
||||
def right(self, pixel: int = ...) -> None: ...
|
||||
|
||||
|
||||
class ChromeSelect(object):
|
||||
"""ChromeSelect 类专门用于处理 d 模式下 select 标签"""
|
||||
class ChromiumSelect(object):
|
||||
|
||||
def __init__(self, ele: ChromiumElement):
|
||||
self._ele: ChromiumElement = ...
|
||||
@ -457,7 +452,6 @@ class ChromeSelect(object):
|
||||
|
||||
|
||||
class ChromiumElementWaiter(object):
|
||||
"""等待元素在dom中某种状态,如删除、显示、隐藏"""
|
||||
|
||||
def __init__(self,
|
||||
page_or_ele: Union[ChromiumBase, ChromiumElement],
|
||||
|
@ -10,9 +10,6 @@ from .chromium_base import ChromiumBase
|
||||
|
||||
|
||||
class ChromiumFrame(ChromiumBase):
|
||||
"""frame元素的类。
|
||||
frame既是元素,也是页面,可以获取元素属性和定位周边元素,也能跳转到网址。
|
||||
同域和异域的frame处理方式不一样,同域的当作元素看待,异域的当作页面看待。"""
|
||||
|
||||
def __init__(self, page: ChromiumBase, ele: ChromiumElement):
|
||||
self.page: ChromiumBase = ...
|
||||
|
@ -14,23 +14,23 @@ from .chromium_base import Timeout, ChromiumBase
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .common import connect_chrome
|
||||
from .config import DriverOptions
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class ChromiumPage(ChromiumBase):
|
||||
"""用于管理浏览器的类"""
|
||||
|
||||
def __init__(self, addr_tab_opts=None, tab_id=None, timeout=None):
|
||||
"""初始化 \n
|
||||
:param addr_tab_opts: 浏览器地址:端口、Tab对象或DriverOptions对象
|
||||
def __init__(self, addr_driver_opts=None, tab_id=None, timeout=None):
|
||||
"""初始化 \n
|
||||
:param addr_driver_opts: 浏览器地址:端口、ChromiumDriver对象或DriverOptions对象
|
||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||
:param timeout: 超时时间
|
||||
"""
|
||||
super().__init__(addr_tab_opts, tab_id, timeout)
|
||||
super().__init__(addr_driver_opts, tab_id, timeout)
|
||||
|
||||
def _connect_browser(self, addr_tab_opts=None, tab_id=None):
|
||||
def _connect_browser(self, addr_driver_opts=None, tab_id=None):
|
||||
"""连接浏览器,在第一次时运行 \n
|
||||
:param addr_tab_opts: 浏览器地址、Tab对象或DriverOptions对象
|
||||
:param addr_driver_opts: 浏览器地址、ChromiumDriver对象或DriverOptions对象
|
||||
:param tab_id: 要控制的标签页id,不指定默认为激活的
|
||||
:return: None
|
||||
"""
|
||||
@ -44,32 +44,32 @@ class ChromiumPage(ChromiumBase):
|
||||
self._first_run = True
|
||||
|
||||
# 接管或启动浏览器
|
||||
if addr_tab_opts is None or isinstance(addr_tab_opts, DriverOptions):
|
||||
self.options = addr_tab_opts or DriverOptions() # 从ini文件读取
|
||||
if addr_driver_opts is None or isinstance(addr_driver_opts, DriverOptions):
|
||||
self.options = addr_driver_opts or DriverOptions() # 从ini文件读取
|
||||
self.address = self.options.debugger_address
|
||||
self.process = connect_chrome(self.options)[1]
|
||||
json = self._control_session.get(f'http://{self.address}/json').json()
|
||||
tab_id = [i['id'] for i in json if i['type'] == 'page'][0]
|
||||
|
||||
# 接收浏览器地址和端口
|
||||
elif isinstance(addr_tab_opts, str):
|
||||
self.address = addr_tab_opts
|
||||
elif isinstance(addr_driver_opts, str):
|
||||
self.address = addr_driver_opts
|
||||
self.options = DriverOptions(read_file=False)
|
||||
self.options.debugger_address = addr_tab_opts
|
||||
self.options.debugger_address = addr_driver_opts
|
||||
self.process = connect_chrome(self.options)[1]
|
||||
if not tab_id:
|
||||
json = self._control_session.get(f'http://{self.address}/json').json()
|
||||
tab_id = [i['id'] for i in json if i['type'] == 'page'][0]
|
||||
|
||||
# 接收传递过来的Tab,浏览器
|
||||
elif isinstance(addr_tab_opts, Tab):
|
||||
self._tab_obj = addr_tab_opts
|
||||
self.address = search(r'ws://(.*?)/dev', addr_tab_opts._websocket_url).group(1)
|
||||
# 接收传递过来的ChromiumDriver,浏览器
|
||||
elif isinstance(addr_driver_opts, ChromiumDriver):
|
||||
self._tab_obj = addr_driver_opts
|
||||
self.address = search(r'ws://(.*?)/dev', addr_driver_opts._websocket_url).group(1)
|
||||
self.process = None
|
||||
self.options = DriverOptions(read_file=False)
|
||||
|
||||
else:
|
||||
raise TypeError('只能接收Tab或DriverOptions类型参数。')
|
||||
raise TypeError('只能接收ChromiumDriver或DriverOptions类型参数。')
|
||||
|
||||
self._set_options()
|
||||
self._init_page(tab_id)
|
||||
@ -129,8 +129,8 @@ class ChromiumPage(ChromiumBase):
|
||||
|
||||
def get_screenshot(self, path=None, as_bytes=None, full_page=False, left_top=None, right_bottom=None):
|
||||
"""对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 \n
|
||||
:param path: 完整路径,后缀可选'jpg','jpeg','png','webp'
|
||||
:param as_bytes: 是否已字节形式返回图片,可选'jpg','jpeg','png','webp',生效时path参数无效
|
||||
:param path: 完整路径,后缀可选 'jpg','jpeg','png','webp'
|
||||
:param as_bytes: 是否已字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效
|
||||
:param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口
|
||||
:param left_top: 截取范围左上角坐标
|
||||
:param right_bottom: 截取范围右下角角坐标
|
||||
|
@ -10,14 +10,13 @@ from typing import Union, Tuple, List
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .config import DriverOptions
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class ChromiumPage(ChromiumBase):
|
||||
"""用于管理浏览器的类"""
|
||||
|
||||
def __init__(self,
|
||||
addr_tab_opts: Union[str, Tab, DriverOptions] = ...,
|
||||
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ...,
|
||||
tab_id: str = ...,
|
||||
timeout: float = ...):
|
||||
self.options: DriverOptions = ...
|
||||
@ -27,7 +26,7 @@ class ChromiumPage(ChromiumBase):
|
||||
self._alert: Alert = ...
|
||||
|
||||
def _connect_browser(self,
|
||||
addr_tab_opts: Union[str, Tab, DriverOptions] = ...,
|
||||
addr_driver_opts: Union[str, ChromiumDriver, DriverOptions] = ...,
|
||||
tab_id: str = ...) -> None: ...
|
||||
|
||||
def _init_page(self, tab_id: str = ...) -> None: ...
|
||||
@ -82,7 +81,6 @@ class ChromiumPage(ChromiumBase):
|
||||
|
||||
|
||||
class Alert(object):
|
||||
"""用于保存alert信息的类"""
|
||||
|
||||
def __init__(self):
|
||||
self.activated: bool = ...
|
||||
@ -94,10 +92,9 @@ class Alert(object):
|
||||
|
||||
|
||||
class WindowSizeSetter(object):
|
||||
"""用于设置窗口大小的类"""
|
||||
|
||||
def __init__(self, page: ChromiumPage):
|
||||
self.driver: Tab = ...
|
||||
self.driver: ChromiumDriver = ...
|
||||
self.window_id: str = ...
|
||||
|
||||
def maximized(self) -> None: ...
|
||||
|
@ -8,7 +8,6 @@ from .chromium_page import ChromiumPage
|
||||
|
||||
|
||||
class ChromiumTab(ChromiumBase):
|
||||
"""实现浏览器标签页的类"""
|
||||
|
||||
def __init__(self, page:ChromiumPage, tab_id: str = ...):
|
||||
self.page: ChromiumPage = ...
|
||||
|
@ -131,8 +131,10 @@ class OptionsManager(object):
|
||||
|
||||
|
||||
class SessionOptions(object):
|
||||
"""requests的Session对象配置类"""
|
||||
|
||||
def __init__(self, read_file=True, ini_path=None):
|
||||
"""requests的Session对象配置类 \n
|
||||
"""
|
||||
:param read_file: 是否从文件读取配置
|
||||
:param ini_path: ini文件路径
|
||||
"""
|
||||
|
@ -12,7 +12,6 @@ from selenium.webdriver.chrome.options import Options
|
||||
|
||||
|
||||
class OptionsManager(object):
|
||||
"""管理配置文件内容的类"""
|
||||
|
||||
def __init__(self, path: str = ...):
|
||||
self.ini_path: str = ...
|
||||
@ -147,9 +146,6 @@ class SessionOptions(object):
|
||||
|
||||
|
||||
class DriverOptions(Options):
|
||||
"""chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类,
|
||||
增加了删除配置和保存到文件方法。
|
||||
"""
|
||||
|
||||
def __init__(self, read_file: bool = ..., ini_path: str = ...):
|
||||
self.ini_path: str = ...
|
||||
|
@ -16,7 +16,6 @@ from .config import SessionOptions, DriverOptions
|
||||
|
||||
|
||||
class Drission(object):
|
||||
"""Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
|
||||
|
||||
def __init__(self,
|
||||
driver_or_options: Union[RemoteWebDriver, Options, DriverOptions, bool] = ...,
|
||||
|
@ -17,9 +17,8 @@ from .session_element import SessionElement
|
||||
|
||||
|
||||
class DriverElement(DrissionElement):
|
||||
"""driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能"""
|
||||
|
||||
def __init__(self, ele: WebElement, page:Union[DriverPage, MixPage]=...):
|
||||
def __init__(self, ele: WebElement, page: Union[DriverPage, MixPage] = ...):
|
||||
self._inner_ele: WebElement = ...
|
||||
self._select: Select = ...
|
||||
self._scroll: Scroll = ...
|
||||
@ -219,14 +218,13 @@ class DriverElement(DrissionElement):
|
||||
loc: Union[tuple, str] = ...) -> Union[List['DriverElement'], 'DriverElement']: ...
|
||||
|
||||
|
||||
def make_driver_ele(page_or_ele: Union[DriverPage, MixPage,DriverElement, ShadowRootElement],
|
||||
def make_driver_ele(page_or_ele: Union[DriverPage, MixPage, DriverElement, ShadowRootElement],
|
||||
loc: Union[str, Tuple[str, str]],
|
||||
single: bool = ...,
|
||||
timeout: float = ...) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ...
|
||||
|
||||
|
||||
class ElementsByXpath(object):
|
||||
"""用js通过xpath获取元素、节点或属性,与WebDriverWait配合使用"""
|
||||
|
||||
def __init__(self, page, xpath: str = ..., single: bool = ..., timeout: float = ...):
|
||||
self.single: bool = ...
|
||||
@ -238,7 +236,6 @@ class ElementsByXpath(object):
|
||||
|
||||
|
||||
class Select(object):
|
||||
"""Select 类专门用于处理 d 模式下 select 标签"""
|
||||
|
||||
def __init__(self, ele: DriverElement):
|
||||
self.select_ele: SeleniumSelect = ...
|
||||
@ -283,7 +280,6 @@ class Select(object):
|
||||
|
||||
|
||||
class ElementWaiter(object):
|
||||
"""等待元素在dom中某种状态,如删除、显示、隐藏"""
|
||||
|
||||
def __init__(self,
|
||||
page_or_ele,
|
||||
@ -303,7 +299,6 @@ class ElementWaiter(object):
|
||||
|
||||
|
||||
class Scroll(object):
|
||||
"""用于滚动的对象"""
|
||||
|
||||
def __init__(self, page_or_ele):
|
||||
self.driver: Union[DriverElement, DriverPage] = ...
|
||||
|
@ -17,7 +17,6 @@ from .session_element import SessionElement
|
||||
|
||||
|
||||
class DriverPage(BasePage):
|
||||
"""DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页"""
|
||||
|
||||
def __init__(self, driver: RemoteWebDriver, timeout: float = ...) -> None:
|
||||
self._driver: RemoteWebDriver = ...
|
||||
@ -165,7 +164,6 @@ class DriverPage(BasePage):
|
||||
|
||||
|
||||
class ToFrame(object):
|
||||
"""用于处理焦点跳转到页面框架的类"""
|
||||
|
||||
def __init__(self, page: DriverPage):
|
||||
self.page: DriverPage = ...
|
||||
|
@ -21,6 +21,7 @@ class Keys:
|
||||
SHIFT = '\ue008'
|
||||
LEFT_SHIFT = SHIFT
|
||||
CONTROL = '\ue009'
|
||||
CTRL = '\ue009'
|
||||
LEFT_CONTROL = CONTROL
|
||||
ALT = '\ue00a'
|
||||
LEFT_ALT = ALT
|
||||
@ -41,6 +42,7 @@ class Keys:
|
||||
ARROW_DOWN = DOWN
|
||||
INSERT = '\ue016'
|
||||
DELETE = '\ue017'
|
||||
DEL = '\ue017'
|
||||
SEMICOLON = '\ue018'
|
||||
EQUALS = '\ue019'
|
||||
|
||||
|
@ -22,12 +22,6 @@ from .session_page import SessionPage
|
||||
|
||||
|
||||
class MixPage(SessionPage, DriverPage, BasePage):
|
||||
"""MixPage整合了DriverPage和SessionPage,封装了对页面的操作,
|
||||
可在selenium(d模式)和requests(s模式)间无缝切换。
|
||||
切换的时候会自动同步cookies。
|
||||
获取信息功能为两种模式共有,操作页面元素功能只有d模式有。
|
||||
调用某种模式独有的功能,会自动切换到该模式。
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
mode: str = ...,
|
||||
@ -56,24 +50,24 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
def json(self) -> dict: ...
|
||||
|
||||
def get(self,
|
||||
url: str,
|
||||
show_errmsg: bool | None = ...,
|
||||
retry: int | None = ...,
|
||||
interval: float | None = ...,
|
||||
timeout: float | None = ...,
|
||||
params: dict | None = ...,
|
||||
data: Union[dict, str, None] = ...,
|
||||
json: Union[dict, str, None] = ...,
|
||||
headers: dict | None = ...,
|
||||
cookies: Any | None = ...,
|
||||
files: Any | None = ...,
|
||||
auth: Any | None = ...,
|
||||
allow_redirects: bool = ...,
|
||||
proxies: dict | None = ...,
|
||||
hooks: Any | None = ...,
|
||||
stream: Any | None = ...,
|
||||
verify: Any | None = ...,
|
||||
cert: Any | None = ...) -> Union[bool, None]: ...
|
||||
url: str,
|
||||
show_errmsg: bool | None = ...,
|
||||
retry: int | None = ...,
|
||||
interval: float | None = ...,
|
||||
timeout: float | None = ...,
|
||||
params: dict | None = ...,
|
||||
data: Union[dict, str, None] = ...,
|
||||
json: Union[dict, str, None] = ...,
|
||||
headers: dict | None = ...,
|
||||
cookies: Any | None = ...,
|
||||
files: Any | None = ...,
|
||||
auth: Any | None = ...,
|
||||
allow_redirects: bool = ...,
|
||||
proxies: dict | None = ...,
|
||||
hooks: Any | None = ...,
|
||||
stream: Any | None = ...,
|
||||
verify: Any | None = ...,
|
||||
cert: Any | None = ...) -> Union[bool, None]: ...
|
||||
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
||||
|
@ -16,7 +16,6 @@ from .session_page import SessionPage
|
||||
|
||||
|
||||
class SessionElement(DrissionElement):
|
||||
"""session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能"""
|
||||
|
||||
def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = ...):
|
||||
self._inner_ele: HtmlElement = ...
|
||||
|
@ -95,8 +95,11 @@ class SessionPage(BasePage):
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
"""当返回内容是json格式时,返回对应的字典"""
|
||||
return self.response.json()
|
||||
"""当返回内容是json格式时,返回对应的字典,非json格式时返回None"""
|
||||
try:
|
||||
return self.response.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""用get方式跳转到url \n
|
||||
|
@ -47,7 +47,7 @@ class SessionPage(BasePage):
|
||||
def html(self) -> str: ...
|
||||
|
||||
@property
|
||||
def json(self) -> dict: ...
|
||||
def json(self) -> Union[dict, None]: ...
|
||||
|
||||
def get(self,
|
||||
url: str,
|
||||
|
@ -15,7 +15,6 @@ from .session_element import SessionElement
|
||||
|
||||
|
||||
class ShadowRootElement(BaseElement):
|
||||
"""ShadowRootElement是用于处理ShadowRoot的类,使用方法和DriverElement基本一致"""
|
||||
|
||||
def __init__(self, inner_ele: WebElement, parent_ele: DriverElement):
|
||||
self._inner_ele: WebElement = ...
|
||||
|
@ -13,17 +13,17 @@ from .chromium_base import ChromiumBase, Timeout
|
||||
from .chromium_page import ChromiumPage
|
||||
from .config import DriverOptions, SessionOptions, cookies_to_tuple
|
||||
from .session_page import SessionPage
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
"""整合浏览器和request的页面类"""
|
||||
|
||||
def __init__(self, mode='d', timeout=10, tab_id=None, driver_or_options=None, session_or_options=None):
|
||||
"""初始化函数 \n
|
||||
"""初始化函数 \n
|
||||
:param mode: 'd' 或 's',即driver模式和session模式
|
||||
:param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒
|
||||
:param driver_or_options: Tab对象或DriverOptions对象,只使用s模式时应传入False
|
||||
:param driver_or_options: ChromiumDriver对象或DriverOptions对象,只使用s模式时应传入False
|
||||
:param session_or_options: Session对象或SessionOptions对象,只使用d模式时应传入False
|
||||
"""
|
||||
self._mode = mode.lower()
|
||||
@ -111,19 +111,19 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
|
||||
@property
|
||||
def driver(self):
|
||||
"""返回纯粹的Tab对象"""
|
||||
"""返回纯粹的ChromiumDriver对象"""
|
||||
return self._tab_obj
|
||||
|
||||
@property
|
||||
def _wait_driver(self):
|
||||
"""返回用于控制浏览器的Tab对象,会先等待页面加载完毕"""
|
||||
"""返回用于控制浏览器的ChromiumDriver对象,会先等待页面加载完毕"""
|
||||
while self._is_loading:
|
||||
sleep(.1)
|
||||
return self._driver
|
||||
|
||||
@property
|
||||
def _driver(self):
|
||||
"""返回纯粹的Tab对象,调用时切换到d模式,并连接浏览器"""
|
||||
"""返回纯粹的ChromiumDriver对象,调用时切换到d模式,并连接浏览器"""
|
||||
self.change_mode('d')
|
||||
if self._tab_obj is None:
|
||||
self._connect_browser(self._driver_options, self._setting_tab_id)
|
||||
@ -377,20 +377,20 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self)._ele(loc_or_ele, timeout=timeout, single=single, relative=relative)
|
||||
|
||||
def _set_driver_options(self, Tab_or_Options):
|
||||
def _set_driver_options(self, driver_or_Options):
|
||||
"""处理driver设置"""
|
||||
if Tab_or_Options is None:
|
||||
if driver_or_Options is None:
|
||||
self._driver_options = DriverOptions()
|
||||
|
||||
elif Tab_or_Options is False:
|
||||
elif driver_or_Options is False:
|
||||
self._driver_options = DriverOptions(read_file=False)
|
||||
|
||||
elif isinstance(Tab_or_Options, Tab):
|
||||
self._connect_browser(Tab_or_Options)
|
||||
elif isinstance(driver_or_Options, ChromiumDriver):
|
||||
self._connect_browser(driver_or_Options)
|
||||
self._has_driver = True
|
||||
|
||||
elif isinstance(Tab_or_Options, DriverOptions):
|
||||
self._driver_options = Tab_or_Options
|
||||
elif isinstance(driver_or_Options, DriverOptions):
|
||||
self._driver_options = driver_or_Options
|
||||
|
||||
else:
|
||||
raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。')
|
||||
|
@ -15,17 +15,16 @@ from .chromium_page import ChromiumPage
|
||||
from .config import DriverOptions, SessionOptions
|
||||
from .session_element import SessionElement
|
||||
from .session_page import SessionPage
|
||||
from .tab import Tab
|
||||
from .chromium_driver import ChromiumDriver
|
||||
|
||||
|
||||
class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
"""整合浏览器和request的页面类"""
|
||||
|
||||
def __init__(self,
|
||||
mode: str = ...,
|
||||
timeout: float = ...,
|
||||
tab_id: str = ...,
|
||||
driver_or_options: Union[Tab, DriverOptions, bool] = ...,
|
||||
driver_or_options: Union[ChromiumDriver, DriverOptions, bool] = ...,
|
||||
session_or_options: Union[Session, SessionOptions, bool] = ...) -> None:
|
||||
self._mode: str = ...
|
||||
self._has_driver: bool = ...
|
||||
@ -61,16 +60,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
def session(self) -> Session: ...
|
||||
|
||||
@property
|
||||
def driver(self) -> Tab: ...
|
||||
def driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@property
|
||||
def _wait_driver(self) -> Tab: ...
|
||||
def _wait_driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@property
|
||||
def _driver(self) -> Tab: ...
|
||||
def _driver(self) -> ChromiumDriver: ...
|
||||
|
||||
@_driver.setter
|
||||
def _driver(self, tab:Tab): ...
|
||||
def _driver(self, tab: ChromiumDriver): ...
|
||||
|
||||
@property
|
||||
def _session_url(self) -> str: ...
|
||||
@ -156,7 +155,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
|
||||
-> Union[ChromiumElement, SessionElement, ChromiumFrame, str, None, List[Union[SessionElement, str]], List[
|
||||
Union[ChromiumElement, str, ChromiumFrame]]]: ...
|
||||
|
||||
def _set_driver_options(self, Tab_or_Options:Union[Tab, DriverOptions]) -> None: ...
|
||||
def _set_driver_options(self, driver_or_Options: Union[ChromiumDriver, DriverOptions]) -> None: ...
|
||||
|
||||
def _set_session_options(self, Session_or_Options:Union[Session, SessionOptions]) -> None: ...
|
||||
|
||||
|
@ -99,6 +99,8 @@ proxies = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'}
|
||||
page.get(url, headers=headers, cookies=cookies, proxies=proxies)
|
||||
```
|
||||
|
||||
?> s 模式访问时默认设置`redirect`参数为`False`,即访问重定向链接时须手动处理。
|
||||
|
||||
## 📍 `post()`方法
|
||||
|
||||
此方法是用 post 方式请求页面。用法与`get()`一致。调用时,`WebPage`对象会自动切换到 s 模式。
|
||||
|
@ -457,13 +457,17 @@ loc1 = (By.ID, 'ele_id')
|
||||
ele = page.ele(loc1)
|
||||
|
||||
# 按 xpath 查找
|
||||
loc2 = (By.XPATH, '//div[@class="ele_class"]'
|
||||
loc2 = (By.XPATH, '//div[@class="ele_class"]')
|
||||
ele = page.ele(loc2)
|
||||
```
|
||||
|
||||
# ✔️ 等待
|
||||
|
||||
d 模式下所有查找元素操作都自带等待,默认为跟随元素所在页面`timeout`属性(默认 10 秒),也可以在每次查找时单独设置,单独设置的等待时间不会改变页面原来设置。
|
||||
## 📍 等待元素加载
|
||||
|
||||
由于网络的不稳定性、js 运行时间的不确定性等因素,经常须要等待元素加载到 DOM 中才能使用。
|
||||
|
||||
d 模式下所有查找元素操作都自带等待,时间默认跟随元素所在页面`timeout`属性(默认 10 秒),也可以在每次查找时单独设置,单独设置的等待时间不会改变页面原来设置。
|
||||
|
||||
```python
|
||||
# 页面初始化时设置查找元素超时时间为 15 秒
|
||||
@ -481,6 +485,41 @@ ele2 = ele1.ele('search text')
|
||||
ele2 = ele1.ele('some text', timeout=1)
|
||||
```
|
||||
|
||||
## 📍 等待元素状态改变
|
||||
|
||||
有时候我们须要等待元素到达某种状态,如显示、隐藏、删除。页面对象和元素对象都内置了`wait_ele()`方法,用于等待元素状态变化。
|
||||
|
||||
该方法可接收现成的`ChromiumElement`对象,或定位符,默认等待时间为页面对象的`timeout`值,也可以单独设定。
|
||||
|
||||
`wait_ele()`方法
|
||||
|
||||
**参数:**
|
||||
|
||||
- `loc_or_ele`:要等待的元素,可以是元素或定位符
|
||||
- `timeout`:等待超时时间,默认使用页面超时时间
|
||||
|
||||
方法:
|
||||
|
||||
| 方法 | 参数 | 功能 |
|
||||
| --------- | --- | ------------ |
|
||||
| display() | 无 | 等待元素从 DOM 显示 |
|
||||
| hidden() | 无 | 等待元素从 DOM 隐藏 |
|
||||
| delete() | 无 | 等待元素从 DOM 删除 |
|
||||
|
||||
**返回:** 这些方法返回布尔值,代表是否等待成功。
|
||||
|
||||
```python
|
||||
# 等待 id 为 div1 的元素显示,超时使用页面设置
|
||||
page.wait_ele('#div1').display()
|
||||
|
||||
# 等待 id 为 div1 的元素被删除(使用 loc 元组),设置超时3秒
|
||||
ele.wait_ele('#div1', timeout=3).delete()
|
||||
|
||||
# 等待已获取到的元素被隐藏
|
||||
ele2 = ele1.ele('#div1')
|
||||
ele1.wait_ele(ele2).hidden()
|
||||
```
|
||||
|
||||
# ✔️ 相对定位
|
||||
|
||||
以下方法可以以某元素为基准,在 DOM 中按照条件获取其兄弟元素、祖先元素、文档前后元素。
|
||||
@ -683,16 +722,54 @@ divs = ele1.befores('tag:div')
|
||||
|
||||
# ✔️ 查找 frame 里的元素
|
||||
|
||||
与 selenium 不同,本库可以直接查找 frame 里面的元素,而无需切入切出,大大简化了程序逻辑,使用更便捷。
|
||||
## 📍 在页面下跨级查找
|
||||
|
||||
未完待续。。。
|
||||
与 selenium 不同,本库可以直接查找 frame 里面的元素,而且无视层级,可以直接获取到多层 iframe 里的元素。无需切入切出,大大简化了程序逻辑,使用更便捷。
|
||||
|
||||
假设在页面中有个两级 iframe,其中有个元素`<div id='abc'></div>`,可以这样获取:
|
||||
|
||||
```python
|
||||
page = WebPage()
|
||||
ele = page('#abc')
|
||||
```
|
||||
|
||||
获取前后无须切入切出,也不影响获取页面上其它元素。如果用 selenium,要这样:
|
||||
|
||||
```python
|
||||
driver = webdriver.Chrome()
|
||||
driver.switch_to.frame(0)
|
||||
driver.switch_to.frame(0)
|
||||
ele = driver.find_element(By.ID, 'abc')
|
||||
driver.switch_to.default_content()
|
||||
```
|
||||
|
||||
显然比较繁琐,而且切入到 iframe 后无法对 iframe 外的元素进行操作。
|
||||
|
||||
!>**注意:**<br>跨级查找只是页面对象支持,元素对象不能直接查找内部 iframe 里的元素。
|
||||
|
||||
## 📍 在 iframe 元素下查找
|
||||
|
||||
本库把 iframe 看作一个特殊元素/页面对象看待,逻辑更清晰,还可以实现同时操作多个 iframe,而无须来回切换。查找元素外的更多功能详见介绍 iframe 的章节。
|
||||
|
||||
对于跨域名的 iframe,我们无法通过页面直接查找里面的元素,可以先获取到 iframe 元素,再在其下查找。当然,非跨域 iframe 也可以这样操作。
|
||||
|
||||
假设一个 iframe 的 id 为 `'iframe1'`,要在其中查找一个 id 为`'abc'`的元素:
|
||||
|
||||
```python
|
||||
page = WebPage()
|
||||
iframe = page('#iframe1')
|
||||
ele = iframe('#abc')
|
||||
```
|
||||
|
||||
这个 iframe 元素是一个页面对象,因此可以继续在其下进行跨 iframe 查找(相对这个 iframe 不跨域的)。
|
||||
|
||||
# ✔️ `ShadowRootElement`相关查找
|
||||
|
||||
本库把 shadow-root 也作为元素对象看待,是为`ChromiumShadowRootElement`对象。该对象可与普通元素一样查找下级元素和 DOM 内相对定位。
|
||||
对`ChromiumShadowRootElement`对象进行相对定位时,把它看作其父对象内部的第一个对象,其余定位逻辑与普通对象一致。
|
||||
|
||||
!> **注意:** <br>如果`ChromiumShadowRootElement`元素的下级元素中有其它`ChromiumShadowRootElement`元素,那这些下级`ChromiumShadowRootElement`元素内部是无法直接通过定位语句查找到的,只能先定位到其父元素,再用`shadow-root`属性获取。
|
||||
!> **注意:** <br>如果`ChromiumShadowRootElement`元素的下级元素中有其它`ChromiumShadowRootElement`元素,那这些下级`ChromiumShadowRootElement`
|
||||
元素内部是无法直接通过定位语句查找到的,只能先定位到其父元素,再用`shadow-root`属性获取。
|
||||
|
||||
```python
|
||||
# 获取一个 shadow-root 元素
|
||||
|
415
docs/WebPage使用方法/3.4获取元素信息.md
Normal file
415
docs/WebPage使用方法/3.4获取元素信息.md
Normal file
@ -0,0 +1,415 @@
|
||||
获取到须要的页面元素后,可以使用元素对象获取元素的信息。
|
||||
|
||||
`WebPage`生成的元素对象有四种:
|
||||
|
||||
- `ChromiumElement`:浏览器一般元素
|
||||
|
||||
- `ChromiumShadowRootElement`:shadow-root 元素
|
||||
|
||||
- `ChromiumFrame`:s 模式产生的元素
|
||||
|
||||
- `SessionElement`:s 模式的元素,或前 3 者转换而成的静态元素
|
||||
|
||||
前三者是 d 模式下通过浏览器页面元素生成,后者是 s 模式由静态文本生成。
|
||||
|
||||
`ChromiumElement`对象拥有`SessionElement`对象所有属性。
|
||||
|
||||
`ChromiumFrame`作为既是元素又是页面的存在,后面章节单独介绍。
|
||||
|
||||
# ✔️ 简单示例
|
||||
|
||||
以下示例可直接运行查看结果:
|
||||
|
||||
```python
|
||||
from DrissionPage import WebPage
|
||||
|
||||
page = WebPage('s')
|
||||
page.get('https://gitee.com/explore')
|
||||
|
||||
# 获取推荐目录下所有 a 元素
|
||||
li_eles = page('tag:ul@@text():全部推荐项目').eles('t:a')
|
||||
|
||||
for i in li_eles: # 遍历列表
|
||||
print(i.tag, i.text, i.attr('href')) # 获取并打印标签名、文本、href 属性
|
||||
|
||||
"""输出:
|
||||
a 全部推荐项目 https://gitee.com/explore/all
|
||||
a 前沿技术 https://gitee.com/explore/new-tech
|
||||
a 智能硬件 https://gitee.com/explore/hardware
|
||||
a IOT/物联网/边缘计算 https://gitee.com/explore/iot
|
||||
a 车载应用 https://gitee.com/explore/vehicle
|
||||
以下省略……
|
||||
"""
|
||||
```
|
||||
|
||||
# ✔️ `SessionElement`属性
|
||||
|
||||
假设`ele`为以下`div`元素的对象:
|
||||
|
||||
```html
|
||||
<div id="div1" class="divs">Hello World!
|
||||
<p>行元素</p>
|
||||
<!--这是注释-->
|
||||
</div>
|
||||
```
|
||||
|
||||
## 📍 `html`
|
||||
|
||||
此属性返回元素的`outerHTML`文本。
|
||||
|
||||
```python
|
||||
html = ele.html
|
||||
"""返回:
|
||||
<div id="div1" class="divs">Hello World!
|
||||
<p>行元素</p>
|
||||
<!--这是注释-->
|
||||
</div>
|
||||
"""
|
||||
```
|
||||
|
||||
## 📍 `inner_html`
|
||||
|
||||
此属性返回元素的`innerHTML`文本。
|
||||
|
||||
```python
|
||||
inner_html = ele.inner_html
|
||||
"""返回:
|
||||
Hello World!
|
||||
<p>行元素</p>
|
||||
<!--这是注释-->
|
||||
"""
|
||||
```
|
||||
|
||||
## 📍 `tag`
|
||||
|
||||
此属性返回元素的标签名。
|
||||
|
||||
```python
|
||||
tag = ele.tag
|
||||
# 返回:div
|
||||
```
|
||||
|
||||
## 📍 `text`
|
||||
|
||||
此属性返回元素内所有文本组合成的字符串。
|
||||
该字符串已格式化,即已转码,已去除多余换行符,符合人读取习惯,便于直接使用。无须重复写处理代码。
|
||||
|
||||
```python
|
||||
text = ele.text
|
||||
"""返回:
|
||||
Hello World!
|
||||
行元素
|
||||
"""
|
||||
```
|
||||
|
||||
## 📍 `raw_text`
|
||||
|
||||
此属性返回元素内原始文本。
|
||||
|
||||
```python
|
||||
text = ele.raw_text
|
||||
"""返回(注意保留了元素间的空格和换行):
|
||||
Hello World!
|
||||
行元素
|
||||
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
## 📍 `texts()`
|
||||
|
||||
此方法返回元素内所有**直接**子节点的文本,包括元素和文本节点。 它有一个参数`text_node_only`,为`True`时则只获取只返回不被包裹的文本节点。这个方法适用于获取文本节点和元素节点混排的情况。
|
||||
|
||||
参数:
|
||||
|
||||
- `text_node_only`:是否只返回文本节点
|
||||
|
||||
返回:文本列表
|
||||
|
||||
```python
|
||||
texts = ele.texts()
|
||||
print(e.texts())
|
||||
# 输出:['Hello World!', '行元素']
|
||||
|
||||
print(e.texts(text_node_only=True))
|
||||
# 输出:['Hello World!']
|
||||
```
|
||||
|
||||
## 📍 `comments`
|
||||
|
||||
此属性以列表形式返回元素内的注释。
|
||||
|
||||
```python
|
||||
comments = ele.comments
|
||||
# 返回:[<!--这是注释-->]
|
||||
```
|
||||
|
||||
## 📍 `attrs`
|
||||
|
||||
此属性以字典形式返回元素所有属性及值。
|
||||
|
||||
```python
|
||||
attrs = ele.attrs
|
||||
# 返回:{'id': 'div1', 'class': 'divs'}
|
||||
```
|
||||
|
||||
## 📍 `attr()`
|
||||
|
||||
此方法返回元素某个`attribute`属性值。它接收一个字符串参数`attr`,返回该属性值文本,无该属性时返回`None`。
|
||||
此属性返回的`src`、`href`属性为已补充完整的路径。`text`属性为已格式化文本。
|
||||
|
||||
参数:
|
||||
|
||||
- `attr`:属性名称
|
||||
|
||||
返回:属性值文本
|
||||
|
||||
```python
|
||||
ele_id = ele.attr('id')
|
||||
# 返回:div1
|
||||
```
|
||||
|
||||
## 📍 `link`
|
||||
|
||||
此方法返回元素的 href 属性或 src 属性,没有这两个属性则返回`None`。
|
||||
|
||||
```html
|
||||
<a href='http://www.baidu.com'>百度</a>
|
||||
```
|
||||
|
||||
假设`a_ele`为以上元素的对象:
|
||||
|
||||
```python
|
||||
link = a_ele.link
|
||||
# 返回:http://www.baidu.com
|
||||
```
|
||||
|
||||
## 📍 `page`
|
||||
|
||||
此属性返回元素所在的页面对象。由 html 文本直接生成的`SessionElement`的`page`属性为`None`。
|
||||
|
||||
```python
|
||||
page = ele.page
|
||||
```
|
||||
|
||||
## 📍 `xpath`
|
||||
|
||||
此属性返回当前元素在页面中 xpath 的绝对路径。
|
||||
|
||||
```python
|
||||
xpath = ele.xpath
|
||||
# 返回:/html/body/div
|
||||
```
|
||||
|
||||
## 📍 `css_path`
|
||||
|
||||
此属性返回当前元素在页面中 css selector 的绝对路径。
|
||||
|
||||
```python
|
||||
css = ele.css_path
|
||||
# 返回::nth-child(1)>:nth-child(1)>:nth-child(1)
|
||||
```
|
||||
|
||||
# ✔️ `ChromiumElement`属性
|
||||
|
||||
`ChromiumElement`对象拥有`SessionElement`对象上述所有属性,并因运行在浏览器中拥有更丰富的属性。
|
||||
|
||||
## 📍 `size`
|
||||
|
||||
此属性以元组形式返回元素的大小。
|
||||
|
||||
```python
|
||||
size = ele.size
|
||||
# 返回:(50, 50)
|
||||
```
|
||||
|
||||
## 📍 `location`
|
||||
|
||||
此属性以元组形式返回元素**左上角**在**整个页面**中的坐标。
|
||||
|
||||
```python
|
||||
loc = ele.location
|
||||
# 返回:(50, 50)
|
||||
```
|
||||
|
||||
## 📍 `client_location`
|
||||
|
||||
此属性以元组形式返回元素**左上角**在**当前视口**中的坐标。
|
||||
|
||||
```python
|
||||
loc = ele.client_location
|
||||
# 返回:(50, 50)
|
||||
```
|
||||
|
||||
## 📍 `midpoint`
|
||||
|
||||
此属性以元组形式返回元素**中点**在**整个页面**中的坐标。
|
||||
|
||||
```python
|
||||
loc = ele.midpoint
|
||||
# 返回:(55, 55)
|
||||
```
|
||||
|
||||
## 📍 `client_midpoint`
|
||||
|
||||
此属性以元组形式返回元素**中点**在**视口**中的坐标。
|
||||
|
||||
```python
|
||||
loc = ele.client_midpoint
|
||||
# 返回:(55, 55)
|
||||
```
|
||||
|
||||
## 📍 `pseudo_before`
|
||||
|
||||
此属性以文本形式返回当前元素的`::before`伪元素内容。
|
||||
|
||||
```python
|
||||
before_txt = ele.pseudo_before
|
||||
```
|
||||
|
||||
## 📍 `pseudo_after`
|
||||
|
||||
此属性以文本形式返回当前元素的`::after`伪元素内容。
|
||||
|
||||
```python
|
||||
after_txt = ele.pseudo_after
|
||||
```
|
||||
|
||||
## 📍 `style()`
|
||||
|
||||
该方法返回元素 css 样式属性值,可获取伪元素的属性。它有两个参数,`style`参数输入样式属性名称,`pseudo_ele`参数输入伪元素名称,省略则获取普通元素的 css 样式属性。
|
||||
|
||||
参数:
|
||||
|
||||
- `style`:样式名称
|
||||
- `pseudo_ele`:伪元素名称(如有)
|
||||
|
||||
返回:样式属性值
|
||||
|
||||
```python
|
||||
# 获取 css 属性的 color 值
|
||||
prop = ele.style('color')
|
||||
|
||||
# 获取 after 伪元素的内容
|
||||
prop = ele.style('content', 'after')
|
||||
```
|
||||
|
||||
## 📍 `prop()`
|
||||
|
||||
此方法返回`property`属性值。它接收一个字符串参数,返回该参数的属性值。
|
||||
|
||||
参数:
|
||||
|
||||
- `prop`:属性名称
|
||||
|
||||
返回:属性值
|
||||
|
||||
## 📍`is_in_viewport`
|
||||
|
||||
此属性以布尔值方式返回元素是否在视口中,以元素可以接受点击的点为判断。
|
||||
|
||||
## 📍`is_alive`
|
||||
|
||||
此属性以布尔值形式返回当前元素是否仍可用。用于判断 d 模式下是否因页面刷新而导致元素失效。
|
||||
|
||||
## 📍 `is_selected`
|
||||
|
||||
此属性以布尔值返回元素是否选中。
|
||||
|
||||
## 📍 `is_enabled`
|
||||
|
||||
此属性以布尔值返回元素是否可用。
|
||||
|
||||
## 📍 `is_displayed`
|
||||
|
||||
此属性以布尔值返回元素是否可见。
|
||||
|
||||
# ✔️ 保存和截图
|
||||
|
||||
保存功能是本库一个特色功能,可以直接读取浏览器缓存,无须依赖 gui 或重新下载就可以保存页面资源。
|
||||
|
||||
作为对比,selenium 无法自身实现图片另存,往往须要通过使用 gui 进行辅助,不仅效率和可靠性低,还占用键鼠资源。
|
||||
|
||||
## 📍 `get_src()`
|
||||
|
||||
此方法用于返回元素`src`属性所使用的资源。base64 的会转为`bytes`返回,其它的以`str`返回。无资源的返回`None`。
|
||||
|
||||
例如,可获取页面上图片字节数据,用于识别内容,或保存到文件。`<script>`标签也可获取 js 文本。
|
||||
|
||||
!> 无法获取 Blob 内容。
|
||||
|
||||
**参数:** 无
|
||||
|
||||
**返回:** 资源内容字符串
|
||||
|
||||
```python
|
||||
img = page('tag:img')
|
||||
src = img.get_src()
|
||||
```
|
||||
|
||||
## 📍 `save()`
|
||||
|
||||
此方法用于保存`get_src()`方法获取到的资源到文件。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `path`:文件保存路径,为`None`时保存到当前文件夹
|
||||
|
||||
- `rename`:文件名称,须包含后缀,为`None`时从资源url获取
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
img = page('tag:img')
|
||||
img.save('D:\\img.png')
|
||||
```
|
||||
|
||||
## 📍 `get_screenshot()`
|
||||
|
||||
此方法用于对元素进行截图。若截图时元素在视口外,须 90 以上版本内核支持。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `path`:图片完整路径,后缀可选`'jpg'`、`'jpeg'`、`'png'`、`'webp'`
|
||||
|
||||
- `as_bytes`:是否已字节形式返回图片,可选`'jpg'`、`'jpeg'`、`'png'`、`'webp'`。为`True`时以`'png'`输出。生效时`path`参数无效。
|
||||
|
||||
**返回:** 图片完整路径或字节文本
|
||||
|
||||
```python
|
||||
img = page('tag:img')
|
||||
img.get_screenshot()
|
||||
bytes_str = img.get_screenshot(as_bytes='png') # 返回截图二进制文本
|
||||
```
|
||||
|
||||
# ✔️ `ChromiumShadowRootElement`属性
|
||||
|
||||
本库把 shadow dom 的`root`看作一个元素处理,可以获取属性,也可以执行其下级的查找,使用逻辑与`ChromiumElement`一致,但属性较之少,有如下这些:
|
||||
|
||||
## 📍 `tag`
|
||||
|
||||
此属性返回元素标签名,即`'shadow-root'`。
|
||||
|
||||
## 📍 `html`
|
||||
|
||||
此属性返回`shadow_root`的 html 文本,由`<shadow_root></shadow_root>` 标签包裹。
|
||||
|
||||
## 📍 `inner_html`
|
||||
|
||||
此属性返回`shadow_root`内部的 html 文本。
|
||||
|
||||
## 📍 `page`
|
||||
|
||||
此属性返回元素所在页面对象。
|
||||
|
||||
## 📍 `parent_ele`
|
||||
|
||||
此属性返回所依附的普通元素对象。
|
||||
|
||||
## 📍 `is_enabled`
|
||||
|
||||
与`ChromiumElement`一致。
|
||||
|
||||
## 📍 `is_alive`
|
||||
|
||||
与`ChromiumElement`一致。
|
454
docs/WebPage使用方法/3.5元素操作.md
Normal file
454
docs/WebPage使用方法/3.5元素操作.md
Normal file
@ -0,0 +1,454 @@
|
||||
只有 d 模式下才能对元素进行操作,本节介绍普通元素`ChromiumElement`内置方法。
|
||||
|
||||
iframe 元素`ChromiumFrame`使用方法后面章节单独介绍。
|
||||
|
||||
# ✔️ 点击元素
|
||||
|
||||
## 📍 `click()`
|
||||
|
||||
此方法用于点击元素。可选择模拟点击或 js 点击。
|
||||
|
||||
还可以在点击被遮挡时执行重试。`retry`参数为`True`、`by_js`参数为`None`时,会先用模拟方式点击,若遇到其它元素遮挡,会重试点击直到超时。若都失败,自动改用 js 方式进行点击。
|
||||
|
||||
`by_js`参数为`True`时,只会通过 js 点击一次,不会重试。
|
||||
|
||||
此设计除了可保证点击成功,还可以用于检测页面上的遮罩层是否消失。遮罩层经常出现在 js 方式翻页的时候,它的覆盖会阻碍模拟点击,所以可以通过对其下面的元素不断重试点击,来判断遮罩层是否存在。当然,这个方法是否可行要看具体网页设计。
|
||||
而如果直接使用 js 进行点击,则可无视任何遮挡,只要元素在 DOM 内,就能点击得到,这样可以根据须要灵活地对元素进行操作。
|
||||
|
||||
通常,点击链接后立刻获取新页面的元素,程序可自动等待元素加载,但若跳转前的页面拥有和跳转后页面相同定位符的元素,会导致过早获取元素,跳转后失效的问题。可以把`wait_loading`参数设为`True`,点击后程序会等待页面进入
|
||||
loading 状态,才会继续往下执行,从而避免上述问题。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `by_js`:是否用 js 方式点击,为`None`时如`retry`为`True`,先用模拟方法点击,重试失败超时后改为用 js 点击;为`True`时直接用 js 点击;为`False`时即使重试超时也不会改用 js
|
||||
- `retry`:遇到其它元素遮挡时,是否重试
|
||||
- `timeout`:点击失败重试超时时间,为`None`时使用父页面`timeout`设置
|
||||
- `wait_loading`:是否等待页面进入加载状态
|
||||
|
||||
**返回:**`bool`,表示是否点击成功。
|
||||
|
||||
```python
|
||||
# 对 ele 元素进行点击
|
||||
ele.click()
|
||||
|
||||
# 用 js 方式点击 ele 元素
|
||||
ele.click(by_js=True)
|
||||
|
||||
# 假设遮罩层出现,ele 是遮罩层下方的元素
|
||||
ele.click(by_js=False, retry=True, timeout = 10) # 不断重试点击,直到遮罩层消失,或到达 10 秒
|
||||
ele.click(by_js=True) # 无视遮罩层,直接用 js 点击下方元素
|
||||
```
|
||||
|
||||
## 📍 `click_at()`
|
||||
|
||||
此方法用于带偏移量点击元素,偏移量相对于元素左上角坐标。不传入`offset_x`或`offset_y`值时点击元素左上角可接受点击的点。
|
||||
可用于点击一些奇怪的东西,比如用伪元素表示的控件。
|
||||
点击的目标不一定在元素上,可以传入负值,或大于元素大小的值,点击元素附近的区域。向右和向下为正值,向左和向上为负值。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `offset_x`:相对元素左上角坐标的 x 轴偏移量
|
||||
- `offset_y`:相对元素左上角坐标的 y 轴偏移量
|
||||
- `button`:点击哪个键,传入`'left'`或`right`
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 点击元素右上方 50*50 的位置
|
||||
ele.click_at(50, -50)
|
||||
|
||||
# 点击元素上中部,x 相对左上角向右偏移50,y 保持在元素中点
|
||||
ele.click_at(offset_x=50)
|
||||
|
||||
# 和 click() 相似,但没有重试功能
|
||||
ele.click_at()
|
||||
```
|
||||
|
||||
## 📍 `r_click()`
|
||||
|
||||
此方法实现右键单击元素。
|
||||
|
||||
**参数:** 无
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
ele.r_click()
|
||||
```
|
||||
|
||||
## 📍 `r_click_at()`
|
||||
|
||||
此方法用于带偏移量右键点击元素,用法和`click_at()`相似。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `offset_x`:相对元素左上角坐标的 x 轴偏移量
|
||||
- `offset_y`:相对元素左上角坐标的 y 轴偏移量
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 点击距离元素左上角 50*50 的位置(位于元素内部)
|
||||
ele.r_click_at(50, 50)
|
||||
```
|
||||
|
||||
# ✔️ 输入内容
|
||||
|
||||
## 📍 `clear()`
|
||||
|
||||
此方法用于清空元素文本,可选择模拟按键或 js 方式。
|
||||
|
||||
模拟按键方式会自动输入`ctrl-a-del`组合键来清除文本框,js 方式则直接把元素`value`属性设置为`''`。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `by_js`:是否用 js 方式清空
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
ele.clear()
|
||||
```
|
||||
|
||||
## 📍 `input()`
|
||||
|
||||
此方法用于向元素输入文本或组合键,也可用于输入文件路径到`input`元素。可选择输入前是否清空元素。
|
||||
|
||||
组合键以`tuple`方式传入。
|
||||
|
||||
多行上传控件,多个路径以`list`、`tuple`或以`\n`分隔的字符串传入。
|
||||
?> **Tips:** <br>- 有些文本框可以接收回车代替点击按钮,可以直接在文本末尾加上`'\n'`。<br>- 会自动把非`str`数据转换为`str`。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `vals`:文本值或按键组合,
|
||||
- `clear`:输入前是否清空文本框
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 输入文本
|
||||
ele.input('Hello world!')
|
||||
|
||||
# 输入文本并回车
|
||||
ele.input('Hello world!\n')
|
||||
```
|
||||
|
||||
## 📍 输入组合键
|
||||
|
||||
使用组合键或要传入特殊按键前,先要导入按键类`Keys`。
|
||||
|
||||
```python
|
||||
from DrissionPage.keys import Keys
|
||||
```
|
||||
|
||||
然后将组合键放在一个`tuple`中传入`input()`即可。
|
||||
|
||||
```python
|
||||
from DrissionPage.keys import Keys
|
||||
|
||||
ele.input((Keys.CTRL, 'a', Keys.DEL)) # ctrl+a+del
|
||||
```
|
||||
|
||||
## 📍 上传文件控件
|
||||
|
||||
上传文件也是用`input()`输入,用法与输入文本一致,稍有不同的是无论`clear`是什么,都会清空原控件内容。
|
||||
|
||||
多文件上传控件,多个路径以`list`、`tuple`或以`\n`分隔的字符串传入。
|
||||
|
||||
```python
|
||||
# 传入一个路径
|
||||
ele.input('D:\\test1.txt')
|
||||
|
||||
# 传入多个路径,方式 1
|
||||
paths = 'D:\\test1.txt\nD:\\test2.txt'
|
||||
ele.input(paths)
|
||||
|
||||
# 传入多个路径,方式 2
|
||||
paths = ['D:\\test1.txt', 'D:\\test2.txt']
|
||||
ele.input(paths)
|
||||
```
|
||||
|
||||
# ✔️ 拖拽和悬停
|
||||
|
||||
?> 除了以下方法,本库还提供更灵活的动作链`ActionChains`功能,详见后面章节。
|
||||
|
||||
## 📍 `drag()`
|
||||
|
||||
此方法用于拖拽元素到相对于当前的一个新位置,可以设置速度,可以选择是否随机抖动。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `offset_x`:x 变化值
|
||||
- `offset_y`:y 变化值
|
||||
- `speed`:拖动的速度,传入 0 即瞬间到达
|
||||
- `shake`:是否随机抖动
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 拖动当前元素到距离 50*50 的位置,速度为 100,不随机抖动
|
||||
ele.drag(50, 50, 100, False)
|
||||
```
|
||||
|
||||
## 📍 `drag_to()`
|
||||
|
||||
此方法用于拖拽元素到另一个元素上或一个坐标上。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `ele_or_loc`: 另一个元素对象或坐标元组
|
||||
- `speed`: 拖动的速度,传入 0 即瞬间到达
|
||||
- `shake`: 是否随机抖动
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 把 ele1 拖拽到 ele2 上
|
||||
ele1 = page.ele('#div1')
|
||||
ele2 = page.ele('#div2')
|
||||
ele1.drag_to(ele2)
|
||||
|
||||
# 把 ele1 拖拽到网页 50, 50 的位置
|
||||
ele1.drag_to((50, 50))
|
||||
```
|
||||
|
||||
## 📍 `hover()`
|
||||
|
||||
此方法用于模拟鼠标悬停在元素上,可接受偏移量,偏移量相对于元素左上角坐标。不传入`offset_x`或`offset_y`值时悬停在元素中点。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `offset_x`:相对元素左上角坐标的 x 轴偏移量
|
||||
- `offset_y`:相对元素左上角坐标的 y 轴偏移量
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
# 悬停在元素右上方 50*50 的位置
|
||||
ele.hover(50, -50)
|
||||
|
||||
# 悬停在元素上中部,x 相对左上角向右偏移50,y 保持在元素中点
|
||||
ele.hover(offset_x=50)
|
||||
|
||||
# 悬停在元素中点
|
||||
ele.hover()
|
||||
```
|
||||
|
||||
# ✔️ 修改元素
|
||||
|
||||
## 📍 `set_innerHTML()`
|
||||
|
||||
此方法用于设置元素的 innerHTML内容。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `html`:html文本
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
## 📍 `set_prop()`
|
||||
|
||||
此方法用于设置元素`property`属性。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `prop`: 属性名
|
||||
- `value`: 属性值
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
ele.set_prop('value', 'Hello world!')
|
||||
```
|
||||
|
||||
## 📍 `set_attr()`
|
||||
|
||||
此方法用于设置元素`attribute`属性。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `attr`:属性名
|
||||
- `value`:属性值
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
ele.set_attr('href', 'http://www.gitee.com')
|
||||
```
|
||||
|
||||
## 📍 `remove_attr()`
|
||||
|
||||
此方法用于删除元素`attribute`属性。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `attr`:属性名
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
```python
|
||||
ele.remove_attr('href')
|
||||
```
|
||||
|
||||
# ✔️ 执行 js 脚本
|
||||
|
||||
## 📍 `run_script()`
|
||||
|
||||
此方法用于对元素执行 js 代码,代码中用`this`表示元素自己。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `script`:js 文本
|
||||
- `as_expr`:是否作为表达式运行,为`True`时`args`参数无效
|
||||
- `*args`:传入 js 的参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
|
||||
**返回:** js 执行的结果
|
||||
|
||||
!>**注意:**<br>要获取 js 结果记得写上`return`。
|
||||
|
||||
```python
|
||||
# 用执行 js 的方式点击元素
|
||||
ele.run_script('this.click();')
|
||||
|
||||
# 用 js 获取元素高度
|
||||
height = ele.run_script('return this.offsetHeight;')
|
||||
```
|
||||
|
||||
## 📍 `run_async_script()`
|
||||
|
||||
此方法用于以异步方式执行js代码,代码中用`this`表示元素自己。
|
||||
|
||||
**参数:**
|
||||
|
||||
- `script`:js 文本
|
||||
- `as_expr`:是否作为表达式运行,为`True`时`args`参数无效
|
||||
- `*args`:传入 js 的参数,按顺序在js文本中对应argument[0]、argument[1]...
|
||||
|
||||
**返回:**`None`
|
||||
|
||||
# ✔️ 元素滚动
|
||||
|
||||
## 📍 `scroll`
|
||||
|
||||
此属性用于以某种方式滚动元素中的滚动条。
|
||||
调用此属性返回一个`ChromiumScroll`对象,调用该对象的方法实现各种方式的滚动。
|
||||
|
||||
| 方法 | 参数说明 | 功能 |
|
||||
|:-----------------:|:------:|:----------------:|
|
||||
| to_top() | 无 | 滚动到顶端,水平位置不变 |
|
||||
| to_bottom() | 无 | 滚动到底端,水平位置不变 |
|
||||
| to_half() | 无 | 滚动到垂直中间位置,水平位置不变 |
|
||||
| to_rightmost() | 无 | 滚动到最右边,垂直位置不变 |
|
||||
| to_leftmost() | 无 | 滚动到最左边,垂直位置不变 |
|
||||
| to_location(x, y) | 滚动条坐标值 | 滚动到指定位置 |
|
||||
| up(pixel) | 滚动的像素 | 向上滚动若干像素,水平位置不变 |
|
||||
| down(pixel) | 滚动的像素 | 向下滚动若干像素,水平位置不变 |
|
||||
| right(pixel) | 滚动的像素 | 向左滚动若干像素,垂直位置不变 |
|
||||
| left(pixel) | 滚动的像素 | 向右滚动若干像素,垂直位置不变 |
|
||||
|
||||
```python
|
||||
# 滚动到底部
|
||||
ele.scroll.to_bottom()
|
||||
|
||||
# 滚动到最右边
|
||||
ele.scroll.to_rightmost()
|
||||
|
||||
# 向下滚动 200 像素
|
||||
ele.scroll.down(200)
|
||||
|
||||
# 滚动到指定位置
|
||||
ele.scroll.to_location(100, 300)
|
||||
```
|
||||
|
||||
# ✔️ 列表选择
|
||||
|
||||
## 📍 `select`
|
||||
|
||||
此属性用于对`<select>`元素的操作。非`<select>`元素此属性为`None`。
|
||||
|
||||
调用此属性时返回一个`ChromiumSelect`对象,调用该对象的方法实现列表项的选中与取消。
|
||||
|
||||
假设有以下`<select>`元素,下面示例以此为基础:
|
||||
|
||||
```html
|
||||
<select id='s' multiple>
|
||||
<option value='value1'>text1</option>
|
||||
<option value='value2'>text2</option>
|
||||
<option value='value3'>text3</option>
|
||||
</select>
|
||||
```
|
||||
|
||||
该对象实现了`__call__()`方法,可直接调用进行按文本选择项目。
|
||||
|
||||
```python
|
||||
ele = page.ele('#s')
|
||||
|
||||
ele.select('text1') # 选中文本为 'text1' 的项目
|
||||
```
|
||||
|
||||
## 📍 方法
|
||||
|
||||
| 方法 | 参数说明 | 功能 |
|
||||
|:-------------------------------:|:-------:|:--------------:|
|
||||
| by_text(text, timeout) | 文本,超时时间 | 根据文本选择项 |
|
||||
| by_value(value, timeout) | 项值,超时时间 | 根据值选择项 |
|
||||
| by_index(index, timeout) | 序号,超时时间 | 根据序号选择项(0开始) |
|
||||
| cancel_by_text(text, timeout) | 文本,超时时间 | 根据文本取消选择(多选列表) |
|
||||
| cancel_by_value(value, timeout) | 项值,超时时间 | 根据项值取消选择(多选列表) |
|
||||
| cancel_by_index(index, timeout) | 序号,超时时间 | 根据序号取消选择(多选列表) |
|
||||
| invert() | 无 | 反选(多选列表) |
|
||||
| clear() | 无 | 清空列表(多选列表) |
|
||||
|
||||
```python
|
||||
ele.select.by_text('text1') # 和 ele.select('text1') 一样
|
||||
ele.select.by_value('value2') # 选中 value 属性为 'value2' 的项
|
||||
ele.select.by_index(2) # 选中第 3 项
|
||||
|
||||
ele.select.cancel_by_text('text1') # 取消选中文本为 'text1' 的项
|
||||
ele.select.cancel_by_value('value2') # 取消选中 value 属性为 'value2' 的项
|
||||
ele.select.cancel_by_index(2) # 取消选中第 3 项
|
||||
|
||||
ele.invert() # 反选
|
||||
ele.clear() # 清空
|
||||
```
|
||||
|
||||
## 📍 属性
|
||||
|
||||
| 属性 | 说明 |
|
||||
| ---------------- | --------------------- |
|
||||
| is_multi | 返回是否多选表单 |
|
||||
| options | 返回所有选项元素组成的列表 |
|
||||
| selected_option | 返回第一个被选中的option元素 |
|
||||
| selected_options | 返回所有被选中的option元素组成的列表 |
|
||||
|
||||
## 📍 多选
|
||||
|
||||
上述各种选择/取消选择的方法均支持多选下拉列表。
|
||||
|
||||
要选择/取消选择多项,只要传入相应内容组成的`tuple`或`list`即可。
|
||||
|
||||
```python
|
||||
# 选择多个文本项
|
||||
ele.select(('text1', 'text2'))
|
||||
|
||||
# 选择多个值
|
||||
ele.select.by_value(('value1', 'value2'))
|
||||
|
||||
# 取消选择多个序号
|
||||
ele.select.cancel_by_index((0, 2))
|
||||
```
|
||||
|
||||
## 📍 等待
|
||||
|
||||
很多网站下拉列表采用 js 加载,如果加载不及时会导致异常。
|
||||
|
||||
因此本库在此集成了一个贴心小功能,上面各种方法均设置了`timeout`参数,如果选择目标未找到,会在限时内等待该项出现,超时就返回`False`。
|
||||
|
||||
```python
|
||||
# 目标选择 'abc',设置超时时间为 3 秒
|
||||
result = ele.select('abc', 3)
|
||||
|
||||
# 输出:
|
||||
False
|
||||
```
|
@ -16,6 +16,8 @@
|
||||
* [🔨 3.1 创建页面对象](WebPage使用方法\3.1创建页面对象.md)
|
||||
* [🔨 3.2 访问网页](WebPage使用方法\3.2访问网页.md)
|
||||
* [🔨 3.3 查找元素](WebPage使用方法\3.3查找元素.md)
|
||||
* [🔨 3.4 获取元素信息](WebPage使用方法\3.4获取元素信息.md)
|
||||
* [🔨 3.5 元素操作](WebPage使用方法\3.5元素操作.md)
|
||||
|
||||
* [🛠 4 MixPage 使用方法](#)
|
||||
|
||||
|
@ -1,12 +1,11 @@
|
||||
DrissionPage 带一个简便易用的下载器,一行即可实现下载功能。
|
||||
|
||||
```python
|
||||
from DrissionPage import MixPage
|
||||
from DrissionPage import WebPage
|
||||
|
||||
url = 'https://www.baidu.com/img/flexible/logo/pc/result.png'
|
||||
save_path = r'C:\download'
|
||||
|
||||
page = MixPage('s')
|
||||
page = WebPage('s')
|
||||
page.download(url, save_path)
|
||||
```
|
||||
|
||||
|
@ -14,7 +14,7 @@ element = html.xpath('//h1')[0]
|
||||
title = element.text
|
||||
|
||||
# 使用 DrissionPage:
|
||||
page = MixPage('s')
|
||||
page = WebPage('s')
|
||||
page.get(url)
|
||||
title = page('tag:h1').text
|
||||
```
|
||||
@ -32,8 +32,7 @@ r = requests.get(url)
|
||||
with open(f'{save_path}\\img.png', 'wb') as fd:
|
||||
for chunk in r.iter_content():
|
||||
fd.write(chunk)
|
||||
|
||||
|
||||
# 使用 DrissionPage:
|
||||
page.download(url, save_path, 'img') # 支持重命名,处理文件名冲突
|
||||
```
|
||||
|
||||
|
@ -49,7 +49,7 @@ ele1.drag_to(ele2)
|
||||
driver.execute_script("window.scrollTo(document.documentElement.scrollLeft, document.body.scrollHeight);")
|
||||
|
||||
# 使用 DrissionPage:
|
||||
page.scroll_to('bottom')
|
||||
page.scroll.to_bottom()
|
||||
```
|
||||
|
||||
## 设置 headless 模式
|
||||
@ -95,4 +95,3 @@ shadow_element = element.shadow_root
|
||||
class_name = element('xpath://div[@id="div_id"]/@class')
|
||||
text = element('xpath://div[@id="div_id"]/text()[2]')
|
||||
```
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
用 selenium 登录网站,然后切换到 requests 读取网页。两者会共享登录信息。
|
||||
|
||||
```python
|
||||
from DrissionPage import MixPage
|
||||
from DrissionPage import WebPage
|
||||
from time import sleep
|
||||
|
||||
# 创建页面对象,默认 d 模式
|
||||
page = MixPage()
|
||||
page = WebPage()
|
||||
# 访问个人中心页面(未登录,重定向到登录页面)
|
||||
page.get('https://gitee.com/profile')
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# v3.0.25
|
||||
# v3.0.26
|
||||
|
||||
- 各种大小、位置信息从`dict`改为用`tuple`返回
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
|
||||
- 增加`wait_loading`方法和参数
|
||||
|
||||
- 其它优化
|
||||
- 其它优化和问题修复
|
||||
|
||||
# v3.0.22
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
|
||||
|
||||
setup(
|
||||
name="DrissionPage",
|
||||
version="3.0.25",
|
||||
version="3.0.26",
|
||||
author="g1879",
|
||||
author_email="g1879@qq.com",
|
||||
description="A module that integrates selenium and requests session, encapsulates common page operations.",
|
||||
|
Loading…
x
Reference in New Issue
Block a user