From cf4ba9cda97fb05f288c4b5a5cdc1c322184dd5a Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 8 Jan 2024 20:24:45 +0800 Subject: [PATCH] 4.0.1 --- DrissionPage/mixpage/base.py | 324 ----- DrissionPage/mixpage/base.pyi | 175 --- DrissionPage/mixpage/drission.py | 458 ------- DrissionPage/mixpage/drission.pyi | 96 -- DrissionPage/mixpage/driver_element.py | 1264 ------------------ DrissionPage/mixpage/driver_element.pyi | 326 ----- DrissionPage/mixpage/driver_page.py | 611 --------- DrissionPage/mixpage/driver_page.pyi | 189 --- DrissionPage/mixpage/mix_page.py | 344 ----- DrissionPage/mixpage/mix_page.pyi | 156 --- DrissionPage/mixpage/session_element.py | 357 ----- DrissionPage/mixpage/session_element.pyi | 114 -- DrissionPage/mixpage/session_page.py | 533 -------- DrissionPage/mixpage/session_page.pyi | 237 ---- DrissionPage/mixpage/shadow_root_element.py | 219 --- DrissionPage/mixpage/shadow_root_element.pyi | 84 -- 16 files changed, 5487 deletions(-) delete mode 100644 DrissionPage/mixpage/base.py delete mode 100644 DrissionPage/mixpage/base.pyi delete mode 100644 DrissionPage/mixpage/drission.py delete mode 100644 DrissionPage/mixpage/drission.pyi delete mode 100644 DrissionPage/mixpage/driver_element.py delete mode 100644 DrissionPage/mixpage/driver_element.pyi delete mode 100644 DrissionPage/mixpage/driver_page.py delete mode 100644 DrissionPage/mixpage/driver_page.pyi delete mode 100644 DrissionPage/mixpage/mix_page.py delete mode 100644 DrissionPage/mixpage/mix_page.pyi delete mode 100644 DrissionPage/mixpage/session_element.py delete mode 100644 DrissionPage/mixpage/session_element.pyi delete mode 100644 DrissionPage/mixpage/session_page.py delete mode 100644 DrissionPage/mixpage/session_page.pyi delete mode 100644 DrissionPage/mixpage/shadow_root_element.py delete mode 100644 DrissionPage/mixpage/shadow_root_element.pyi diff --git a/DrissionPage/mixpage/base.py b/DrissionPage/mixpage/base.py deleted file mode 100644 index d38527f..0000000 --- a/DrissionPage/mixpage/base.py +++ /dev/null @@ -1,324 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from abc import abstractmethod -from re import sub -from urllib.parse import quote - -from ..commons.locator import get_loc -from ..commons.web import format_html - - -class BaseParser(object): - """所有页面、元素类的基类""" - - def __call__(self, loc_or_str): - return self.ele(loc_or_str) - - def ele(self, loc_or_ele, timeout=None): - return self._ele(loc_or_ele, timeout, True) - - def eles(self, loc_or_str, timeout=None): - return self._ele(loc_or_str, timeout, False) - - # ----------------以下属性或方法待后代实现---------------- - @property - def html(self): - return '' - - def s_ele(self, loc_or_ele): - pass - - def s_eles(self, loc_or_str): - pass - - @abstractmethod - def _ele(self, loc_or_ele, timeout=None, single=True): - pass - - -class BaseElement(BaseParser): - """各元素类的基类""" - - def __init__(self, page=None): - self.page = page - - # ----------------以下属性或方法由后代实现---------------- - @property - def tag(self): - return - - @abstractmethod - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - pass - - def parent(self, level_or_loc=1): - pass - - def prev(self, index=1): - return None # ShadowRootElement直接继承 - - def prevs(self) -> None: - return None # ShadowRootElement直接继承 - - def next(self, index=1): - pass - - def nexts(self): - pass - - -class DrissionElement(BaseElement): - """DriverElement、ChromiumElement 和 SessionElement的基类 - 但不是ShadowRootElement的基类""" - - @property - def link(self): - """返回href或src绝对url""" - return self.attr('href') or self.attr('src') - - @property - def css_path(self): - """返回css path路径""" - return self._get_ele_path('css') - - @property - def xpath(self): - """返回xpath路径""" - return self._get_ele_path('xpath') - - @property - def comments(self): - """返回元素注释文本组成的列表""" - return self.eles('xpath:.//comment()') - - def texts(self, text_node_only=False): - """返回元素内所有直接子节点的文本,包括元素和文本节点 - :param text_node_only: 是否只返回文本节点 - :return: 文本列表 - """ - if text_node_only: - texts = self.eles('xpath:/text()') - else: - texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] - - return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - if isinstance(level_or_loc, int): - loc = f'xpath:./ancestor::*[{level_or_loc}]' - - elif isinstance(level_or_loc, (tuple, str)): - loc = get_loc(level_or_loc, True) - - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}' - - else: - raise TypeError('level_or_loc参数只能是tuple、int或str。') - - return self._ele(loc, timeout=0, relative=True) - - def prev(self, index=1, filter_loc='', timeout=0): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - nodes = self._get_brothers(index, filter_loc, 'preceding', timeout=timeout) - return nodes[-1] if nodes else None - - def next(self, index=1, filter_loc='', timeout=0): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - nodes = self._get_brothers(index, filter_loc, 'following', timeout=timeout) - return nodes[0] if nodes else None - - def before(self, index=1, filter_loc='', timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - nodes = self._get_brothers(index, filter_loc, 'preceding', False, timeout=timeout) - return nodes[-1] if nodes else None - - def after(self, index=1, filter_loc='', timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - nodes = self._get_brothers(index, filter_loc, 'following', False, timeout) - return nodes[0] if nodes else None - - def prevs(self, filter_loc='', timeout=0): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', timeout=timeout) - - def nexts(self, filter_loc='', timeout=0): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='following', timeout=timeout) - - def befores(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='preceding', brother=False, timeout=timeout) - - def afters(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return self._get_brothers(filter_loc=filter_loc, direction='following', brother=False, timeout=timeout) - - def _get_brothers(self, index=None, filter_loc='', direction='following', brother=True, timeout=.5): - """按要求返回兄弟元素或节点组成的列表 - :param index: 获取第几个,该参数不为None时只获取该编号的元素 - :param filter_loc: 用于筛选的查询语法 - :param direction: 'following' 或 'preceding',查找的方向 - :param brother: 查找范围,在同级查找还是整个dom前后查找 - :param timeout: 查找等待时间 - :return: DriverElement对象或字符串 - """ - if index is not None and index < 1: - raise ValueError('index必须大于等于1。') - - brother = '-sibling' if brother else '' - - if not filter_loc: - loc = '*' - - else: - loc = get_loc(filter_loc, True) # 把定位符转换为xpath - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - loc = loc[1].lstrip('./') - - loc = f'xpath:./{direction}{brother}::{loc}' - - nodes = self._ele(loc, timeout=timeout, single=False, relative=True) - nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] - - if nodes and index is not None: - index = index - 1 if direction == 'following' else -index - try: - return [nodes[index]] - except IndexError: - return [] - else: - return nodes - - # ----------------以下属性或方法由后代实现---------------- - @property - def attrs(self): - return - - @property - def text(self): - return - - @property - def raw_text(self): - return - - @abstractmethod - def attr(self, attr: str): - return '' - - def _get_ele_path(self, mode): - return '' - - -class BasePage(BaseParser): - """页面类的基类""" - - def __init__(self, timeout=None): - """初始化函数""" - self._url = None - self.timeout = timeout if timeout is not None else 10 - self.retry_times = 3 - self.retry_interval = 2 - self._url_available = None - - @property - def title(self): - """返回网页title""" - ele = self.ele('xpath://title') - return ele.text if ele else None - - @property - def timeout(self): - """返回查找元素时等待的秒数""" - return self._timeout - - @timeout.setter - def timeout(self, second): - """设置查找元素时等待的秒数""" - self._timeout = second - - @property - def cookies(self): - """返回cookies""" - return self.get_cookies(True) - - @property - def url_available(self): - """返回当前访问的url有效性""" - return self._url_available - - def _before_connect(self, url, retry, interval): - """连接前的准备 - :param url: 要访问的url - :param retry: 重试次数 - :param interval: 重试间隔 - :return: 重试次数和间隔组成的tuple - """ - self._url = quote(url, safe='/:&?=%;#@+!') - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - return retry, interval - - # ----------------以下属性或方法由后代实现---------------- - @property - def url(self): - return - - @property - def json(self): - return - - @abstractmethod - def get_cookies(self, as_dict=False): - return {} - - @abstractmethod - def get(self, url, show_errmsg=False, retry=None, interval=None): - pass diff --git a/DrissionPage/mixpage/base.pyi b/DrissionPage/mixpage/base.pyi deleted file mode 100644 index 1f12e80..0000000 --- a/DrissionPage/mixpage/base.pyi +++ /dev/null @@ -1,175 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from abc import abstractmethod -from typing import Union, Tuple, List - - -class BaseParser(object): - - def __call__(self, loc_or_str: Union[Tuple[str, str], str]): ... - - def ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement], timeout: float = None): ... - - def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): ... - - # ----------------以下属性或方法待后代实现---------------- - @property - def html(self) -> str: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, BaseElement]): ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): ... - - @abstractmethod - def _ele(self, loc_or_ele, timeout: float = None, single: bool = True): ... - - -class BaseElement(BaseParser): - - def __init__(self, page: BasePage = None): - self.page: BasePage = ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def tag(self) -> str: ... - - @abstractmethod - def _ele(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None, single: bool = True, - relative: bool = False): ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... - - def prev(self, index: int = 1) -> None: ... - - def prevs(self) -> None: ... - - def next(self, index: int = 1): ... - - def nexts(self): ... - - -class DrissionElement(BaseElement): - - def __init__(self, - page: BasePage = ...): - self.page: BasePage = ... - - @property - def link(self) -> str: ... - - @property - def css_path(self) -> str: ... - - @property - def xpath(self) -> str: ... - - @property - def comments(self) -> list: ... - - def texts(self, text_node_only: bool = False) -> list: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union[DrissionElement, None]: ... - - def prev(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union[DrissionElement, str, None]: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union[DrissionElement, str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union[DrissionElement, str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union[DrissionElement, str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union[DrissionElement, str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union[DrissionElement, str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union[DrissionElement, str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union[DrissionElement, str]]: ... - - def _get_brothers(self, - index: int = None, - filter_loc: Union[tuple, str] = '', - direction: str = 'following', - brother: bool = True, - timeout: float = 0.5) -> List[Union[DrissionElement, str]]: ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - @abstractmethod - def attr(self, attr: str) -> str: ... - - def _get_ele_path(self, mode) -> str: ... - - -class BasePage(BaseParser): - - def __init__(self, timeout: float = None): - self._url_available: bool = ... - self.retry_times: int = ... - self.retry_interval: float = ... - self._timeout = float = ... - - @property - def title(self) -> Union[str, None]: ... - - @property - def timeout(self) -> float: ... - - @timeout.setter - def timeout(self, second: float) -> None: ... - - @property - def cookies(self) -> dict: ... - - @property - def url_available(self) -> bool: ... - - def _before_connect(self, url: str, retry: int, interval: float) -> tuple: ... - - # ----------------以下属性或方法由后代实现---------------- - @property - def url(self) -> str: ... - - @property - def json(self) -> dict: ... - - @abstractmethod - def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... - - @abstractmethod - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None): ... diff --git a/DrissionPage/mixpage/drission.py b/DrissionPage/mixpage/drission.py deleted file mode 100644 index 906fdda..0000000 --- a/DrissionPage/mixpage/drission.py +++ /dev/null @@ -1,458 +0,0 @@ -# -*- encoding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from platform import system -from sys import exit - -from requests import Session -from requests.structures import CaseInsensitiveDict -from selenium import webdriver -from selenium.common.exceptions import SessionNotCreatedException, WebDriverException -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from tldextract import extract - -from ..commons.browser import connect_browser -from ..commons.tools import get_pid_from_port, get_exe_from_port -from ..commons.web import cookies_to_tuple -from ..configs.driver_options import DriverOptions -from ..configs.session_options import SessionOptions, session_options_to_dict - - -class Drission(object): - """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" - - def __init__(self, driver_or_options=None, session_or_options=None, ini_path=None, proxy=None): - """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象 - :param driver_or_options: driver对象或DriverOptions、Options类,传入False则创建空配置对象 - :param session_or_options: Session对象或设置字典,传入False则创建空配置对象 - :param ini_path: ini文件路径 - :param proxy: 代理设置 - """ - self._session = None - self._driver = None - self._session_options = None - self._driver_options = None - self._debugger = None - self._proxy = proxy - - # ------------------处理session options---------------------- - if session_or_options is None: - self._session_options = SessionOptions(ini_path=ini_path).as_dict() - - elif session_or_options is False: - self._session_options = SessionOptions(read_file=False).as_dict() - - elif isinstance(session_or_options, Session): - self._session = session_or_options - - elif isinstance(session_or_options, SessionOptions): - self._session_options = session_or_options.as_dict() - - elif isinstance(session_or_options, dict): - self._session_options = session_or_options - - else: - raise TypeError('session_or_options参数只能接收Session, dict, SessionOptions或False。') - - # ------------------处理driver options---------------------- - if driver_or_options is None: - self._driver_options = DriverOptions(ini_path=ini_path) - - elif driver_or_options is False: - self._driver_options = DriverOptions(read_file=False) - - elif isinstance(driver_or_options, RemoteWebDriver): - self._driver = driver_or_options - - elif isinstance(driver_or_options, (Options, DriverOptions)): - self._driver_options = driver_or_options - - else: - raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。') - - def __del__(self): - """关闭对象时关闭浏览器和Session""" - try: - self.close() - except ImportError: - pass - - @property - def session(self): - """返回Session对象,如未初始化则按配置信息创建""" - if self._session is None: - self._set_session(self._session_options) - - if self._proxy: - self._session.proxies = self._proxy - - return self._session - - @property - def driver(self): - """返回WebDriver对象,如未初始化则按配置信息创建。 - 如设置了本地调试浏览器,可自动接入或打开浏览器进程。 - """ - if self._driver is None: - if not self.driver_options.debugger_address and self._proxy: - self.driver_options.add_argument(f'--proxy-server={self._proxy["http"]}') - - driver_path = self.driver_options.driver_path or 'chromedriver' - chrome_path = self.driver_options.chrome_path - - # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程----------- - if self.driver_options.debugger_address: - # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径 - cp, self._debugger = connect_browser(self.driver_options) - - if cp in (None, 'chrome'): - system_type = system().lower() - ip, port = self.driver_options.debugger_address.split(':') - if ip not in ('127.0.0.1', 'localhost'): - chrome_path = None - elif chrome_path == 'chrome' and system_type == 'windows': - chrome_path = get_exe_from_port(port) - - # -----------创建WebDriver对象----------- - self._driver = create_driver(chrome_path, driver_path, self.driver_options) - - # -----------解决接管新版浏览器不能定位到正确的标签页的问题----------- - active_tab = self._driver.window_handles[0] - if active_tab != self._driver.current_window_handle: - self._driver.switch_to.window(active_tab) - - return self._driver - - @property - def driver_options(self): - """返回driver配置信息""" - return self._driver_options - - @property - def session_options(self): - """返回session配置信息""" - return self._session_options - - @session_options.setter - def session_options(self, options): - """设置session配置 - :param options: session配置字典 - :return: None - """ - self._session_options = session_options_to_dict(options) - self._set_session(self._session_options) - - @property - def proxy(self): - """返回代理信息""" - return self._proxy - - @proxy.setter - def proxy(self, proxies=None): - """设置代理信息 - :param proxies: 代理信息字典 - :return: None - """ - self._proxy = proxies - - if self._session: - self._session.proxies = proxies - - if self._driver: - cookies = self._driver.get_cookies() - url = self._driver.current_url - self._driver.quit() - self._driver = None - self._driver = self.driver - self._driver.get(url) - - for cookie in cookies: - self.set_cookies(cookie, set_driver=True) - - @property - def debugger_progress(self): - """调试浏览器进程""" - return self._debugger - - def kill_browser(self): - """关闭浏览器进程(如果可以)""" - pid = self.get_browser_progress_id() - if not kill_progress(pid): - self._driver.quit() - - def get_browser_progress_id(self): - """获取浏览器进程id""" - if self.debugger_progress: - return self.debugger_progress.pid - - address = str(self.driver_options.debugger_address).split(':') - if len(address) == 2: - ip, port = address - if ip not in ('127.0.0.1', 'localhost') or not port.isdigit(): - return None - - from os import popen - txt = '' - progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') - for progress in progresses: - if 'LISTENING' in progress: - txt = progress - break - if not txt: - return None - - return txt.split(' ')[-1] - - def hide_browser(self): - """隐藏浏览器界面""" - self._show_or_hide_browser() - - def show_browser(self): - """显示浏览器界面""" - self._show_or_hide_browser(False) - - def _show_or_hide_browser(self, hide=True): - if system().lower() != 'windows': - raise OSError('该方法只能在Windows系统使用。') - - try: - from win32gui import ShowWindow - from win32con import SW_HIDE, SW_SHOW - except ImportError: - raise ImportError('请先安装:pip install pypiwin32') - - pid = self.get_browser_progress_id() - if not pid: - print('只有设置了debugger_address参数才能使用 show_browser() 和 hide_browser()') - return - hds = get_chrome_hwnds_from_pid(pid) - sw = SW_HIDE if hide else SW_SHOW - for hd in hds: - ShowWindow(hd, sw) - - def set_cookies(self, cookies, set_session=False, set_driver=False): - """设置cookies - :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict - :param set_session: 是否设置session的cookies - :param set_driver: 是否设置driver的cookies - :return: None - """ - cookies = cookies_to_tuple(cookies) - - for cookie in cookies: - if cookie['value'] is None: - cookie['value'] = '' - - # 添加cookie到session - if set_session: - kwargs = {x: cookie[x] for x in cookie - if x.lower() not in ('name', 'value', 'httponly', 'expiry', 'samesite')} - - if 'expiry' in cookie: - kwargs['expires'] = cookie['expiry'] - - self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) - - # 添加cookie到driver - if set_driver: - if 'expiry' in cookie: - cookie['expiry'] = int(cookie['expiry']) - - try: - browser_domain = extract(self.driver.current_url).fqdn - except AttributeError: - browser_domain = '' - - if not cookie.get('domain', None): - if browser_domain: - url = extract(browser_domain) - cookie_domain = f'{url.domain}.{url.suffix}' - else: - raise ValueError('cookie中没有域名或浏览器未访问过URL。') - - cookie['domain'] = cookie_domain - - else: - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] - - if cookie_domain not in browser_domain: - self.driver.get(cookie_domain if cookie_domain.startswith('http://') - else f'http://{cookie_domain}') - - # 避免selenium自动添加.后无法正确覆盖已有cookie - if cookie['domain'][0] != '.': - c = self.driver.get_cookie(cookie['name']) - if c and c['domain'] == cookie['domain']: - self.driver.delete_cookie(cookie['name']) - - self.driver.add_cookie(cookie) - - def _set_session(self, data): - """根据传入字典对session进行设置 - :param data: session配置字典 - :return: None - """ - if self._session is None: - self._session = Session() - - if 'headers' in data: - self._session.headers = CaseInsensitiveDict(data['headers']) - if 'cookies' in data: - self.set_cookies(data['cookies'], set_session=True) - - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' - for i in attrs: - if i in data: - self._session.__setattr__(i, data[i]) - - def cookies_to_session(self, copy_user_agent=False): - """把driver对象的cookies复制到session对象 - :param copy_user_agent: 是否复制ua信息 - :return: None - """ - if copy_user_agent: - user_agent_to_session(self.driver, self.session) - - self.set_cookies(self.driver.get_cookies(), set_session=True) - - def cookies_to_driver(self, url): - """把session对象的cookies复制到driver对象 - :param url: 作用域 - :return: None - """ - browser_domain = extract(self.driver.current_url).fqdn - ex_url = extract(url) - - if ex_url.fqdn not in browser_domain: - self.driver.get(url) - - domain = f'{ex_url.domain}.{ex_url.suffix}' - - cookies = [] - for cookie in self.session.cookies: - if cookie.domain == '': - cookie.domain = domain - - if domain in cookie.domain: - cookies.append(cookie) - - self.set_cookies(cookies, set_driver=True) - - def close_driver(self, kill=False): - """关闭driver和浏览器""" - if self._driver: - kill_progress(port=self._driver.service.port) # 关闭chromedriver.exe进程 - - if kill: - self.kill_browser() - else: - self._driver.quit() - - self._driver = None - - def close_session(self): - """关闭session""" - if self._session: - self._session.close() - self._session = None - - def close(self): - """关闭session、driver和浏览器""" - if self._driver: - self.close_driver() - - if self._session: - self.close_session() - - -def user_agent_to_session(driver, session): - """把driver的user-agent复制到session - :param driver: 来源driver对象 - :param session: 目标session对象 - :return: None - """ - driver = driver - session = session - selenium_user_agent = driver.execute_script("return navigator.userAgent;") - session.headers.update({"User-Agent": selenium_user_agent}) - - -def create_driver(chrome_path, driver_path, options): - """创建 WebDriver 对象 - :param chrome_path: chrome.exe 路径 - :param driver_path: chromedriver.exe 路径 - :param options: Options 对象 - :return: WebDriver 对象 - """ - try: - debugger_address = options.debugger_address - if options.debugger_address: - options = Options() - options.debugger_address = debugger_address - - return webdriver.Chrome(driver_path, options=options) - - # 若版本不对,获取对应 chromedriver 再试 - except (WebDriverException, SessionNotCreatedException): - print('打开失败,尝试获取driver。\n') - from ..easy_set import get_match_driver, get_chrome_path - - if chrome_path == 'chrome': - chrome_path = get_chrome_path(show_msg=False, from_ini=False) - - if chrome_path: - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=True) - if driver_path: - try: - options.binary_location = chrome_path - return webdriver.Chrome(driver_path, options=options) - except Exception: - pass - - print('无法启动,请检查浏览器路径,或手动设置chromedriver。\n下载地址:http://npm.taobao.org/mirrors/chromedriver/') - exit(0) - - -def get_chrome_hwnds_from_pid(pid): - """通过PID查询句柄ID""" - try: - from win32gui import IsWindow, GetWindowText, EnumWindows - from win32process import GetWindowThreadProcessId - except ImportError: - raise ImportError('请先安装win32gui,pip install pypiwin32') - - def callback(hwnd, hds): - if IsWindow(hwnd) and '- Google Chrome' in GetWindowText(hwnd): - _, found_pid = GetWindowThreadProcessId(hwnd) - if str(found_pid) == str(pid): - hds.append(hwnd) - return True - - hwnds = [] - EnumWindows(callback, hwnds) - return hwnds - - -def kill_progress(pid=None, port=None): - """关闭浏览器进程 - :param pid: 进程id - :param port: 端口号,如没有进程id,从端口号获取 - :return: 是否成功 - """ - from os import popen - if system().lower() != 'windows': - return False - - pid = pid or get_pid_from_port(port) - if not pid: - return False - - if popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'): - popen(f'taskkill /pid {pid} /F') - return True - else: - return False \ No newline at end of file diff --git a/DrissionPage/mixpage/drission.pyi b/DrissionPage/mixpage/drission.pyi deleted file mode 100644 index 3079bca..0000000 --- a/DrissionPage/mixpage/drission.pyi +++ /dev/null @@ -1,96 +0,0 @@ -# -*- encoding: utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from subprocess import Popen -from typing import Union - -from requests import Session -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver - -from DrissionPage.configs.driver_options import DriverOptions -from DrissionPage.configs.session_options import SessionOptions - - -class Drission(object): - - def __init__(self, - driver_or_options: Union[RemoteWebDriver, Options, DriverOptions, bool] = None, - session_or_options: Union[Session, dict, SessionOptions, bool] = None, - ini_path: str = None, - proxy: dict = None): - self._session: Session = ... - self._session_options: dict = ... - self._proxy: dict = ... - self._driver: WebDriver = ... - self._debugger: Popen = ... - self._driver_options: DriverOptions = ... - - def __del__(self): ... - - @property - def session(self) -> Session: ... - - @property - def driver(self) -> WebDriver: ... - - @property - def driver_options(self) -> Union[DriverOptions, Options]: ... - - @property - def session_options(self) -> dict: ... - - @session_options.setter - def session_options(self, options: Union[dict, SessionOptions]) -> None: ... - - @property - def proxy(self) -> Union[None, dict]: ... - - @proxy.setter - def proxy(self, proxies: dict = None) -> None: ... - - @property - def debugger_progress(self): ... - - def kill_browser(self) -> None: ... - - def get_browser_progress_id(self) -> Union[str, None]: ... - - def hide_browser(self) -> None: ... - - def show_browser(self) -> None: ... - - def _show_or_hide_browser(self, hide: bool = True) -> None: ... - - def set_cookies(self, - cookies: Union[RequestsCookieJar, list, tuple, str, dict], - set_session: bool = False, - set_driver: bool = False) -> None: ... - - def _set_session(self, data: dict) -> None: ... - - def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... - - def cookies_to_driver(self, url: str) -> None: ... - - def close_driver(self, kill: bool = False) -> None: ... - - def close_session(self) -> None: ... - - def close(self) -> None: ... - - -def user_agent_to_session(driver: RemoteWebDriver, session: Session) -> None: ... - - -def create_driver(chrome_path: str, driver_path: str, options: Options) -> WebDriver: ... - - -def get_chrome_hwnds_from_pid(pid: str) -> list: ... - - -def kill_progress(pid: str = None, port: int = None) -> bool: ... diff --git a/DrissionPage/mixpage/driver_element.py b/DrissionPage/mixpage/driver_element.py deleted file mode 100644 index e7c730c..0000000 --- a/DrissionPage/mixpage/driver_element.py +++ /dev/null @@ -1,1264 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from os import sep -from pathlib import Path -from time import time, perf_counter, sleep - -from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException, \ - NoSuchElementException -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support import expected_conditions as ec -from selenium.webdriver.support.wait import WebDriverWait - -from .base import DrissionElement, BaseElement -from .session_element import make_session_ele -from ..commons.locator import str_to_loc, get_loc -from ..commons.tools import get_usable_path -from ..commons.web import format_html, get_ele_txt - - -class DriverElement(DrissionElement): - """driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能""" - - def __init__(self, ele, page=None): - """初始化对象 - :param ele: 被包装的WebElement元素 - :param page: 元素所在页面 - """ - super().__init__(page) - self._select = None - self._scroll = None - self._inner_ele = ele - - def __repr__(self): - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def inner_ele(self): - return self._inner_ele - - @property - def tag(self): - """返回元素类型""" - return self._inner_ele.tag_name.lower() - - @property - def html(self): - """返回元素outerHTML文本""" - return self.inner_ele.get_attribute('outerHTML') - - @property - def inner_html(self): - """返回元素innerHTML文本""" - return self.inner_ele.get_attribute('innerHTML') - - @property - def attrs(self): - """返回元素所有属性及值""" - js = ''' - var dom=arguments[0]; - var names="("; - var len = dom.attributes.length; - for(var i=0;i1){path = '/' + tag + '[' + nth + ']' + path;} - else{path = '/' + tag + path;}''' - txt5 = '''return path;''' - - elif mode == 'css': - txt1 = '' - # txt2 = '''return '#' + el.id + path;''' - txt3 = '' - txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' - txt5 = '''return path.substr(1);''' - - else: - raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") - - js = ''' - function e(el) { - if (!(el instanceof Element)) return; - var path = ''; - while (el.nodeType === Node.ELEMENT_NODE) { - ''' + txt1 + ''' - var sib = el, nth = 0; - while (sib) { - if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} - sib = sib.previousSibling; - } - ''' + txt4 + ''' - el = el.parentNode; - } - ''' + txt5 + ''' - } - return e(arguments[0]); - ''' - res_txt = self.run_script(js) - return f':root{res_txt}' if mode == 'css' else res_txt - - # -----------------driver独有属性和方法------------------- - @property - def size(self): - """返回元素宽和高""" - return self.inner_ele.size - - @property - def location(self): - """返回元素左上角坐标""" - return self.inner_ele.location - - @property - def shadow_root(self): - """返回当前元素的shadow_root元素对象""" - shadow = self.run_script('return arguments[0].shadowRoot') - if shadow: - from .shadow_root_element import ShadowRootElement - return ShadowRootElement(shadow, self) - - @property - def sr(self): - """返回当前元素的shadow_root元素对象""" - return self.shadow_root - - @property - def pseudo_before(self): - """返回当前元素的::before伪元素内容""" - return self.style('content', 'before') - - @property - def pseudo_after(self): - """返回当前元素的::after伪元素内容""" - return self.style('content', 'after') - - @property - def select(self): - """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" - if self._select is None: - if self.tag != 'select': - self._select = False - else: - self._select = Select(self) - - return self._select - - @property - def scroll(self): - """用于滚动滚动条的对象""" - if self._scroll is None: - self._scroll = Scroll(self) - return self._scroll - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - return super().parent(level_or_loc) - - def prev(self, index=1, filter_loc='', timeout=0): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().prev(index, filter_loc, timeout) - - def next(self, index=1, filter_loc='', timeout=0): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().next(index, filter_loc, timeout) - - def before(self, index=1, filter_loc='', timeout=None): - """返回当前元素前面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().before(index, filter_loc, timeout) - - def after(self, index=1, filter_loc='', timeout=None): - """返回当前元素后面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - index, filter_loc = _exchange_arguments(index, filter_loc) - return super().after(index, filter_loc, timeout) - - def prevs(self, filter_loc='', timeout=0): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().prevs(filter_loc, timeout) - - def nexts(self, filter_loc='', timeout=0): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().nexts(filter_loc, timeout) - - def befores(self, filter_loc='', timeout=None): - """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return super().befores(filter_loc, timeout) - - def afters(self, filter_loc='', timeout=None): - """返回当前元素前面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return super().afters(filter_loc, timeout) - - def left(self, index=1, filter_loc=''): - """获取网页上显示在当前元素左边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def right(self, index=1, filter_loc=''): - """获取网页上显示在当前元素右边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('right', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def above(self, index=1, filter_loc=''): - """获取网页上显示在当前元素上边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def below(self, index=1, filter_loc=''): - """获取网页上显示在当前元素下边的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('left', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def near(self, index=1, filter_loc=''): - """获取网页上显示在当前元素最近的某个元素,可设置选取条件,可指定结果中第几个 - :param index: 获取第几个 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - eles = self._get_relative_eles('near', filter_loc) - return eles[index - 1] if index <= len(eles) else None - - def lefts(self, filter_loc=''): - """获取网页上显示在当前元素左边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def rights(self, filter_loc=''): - """获取网页上显示在当前元素右边的所有元,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('right', filter_loc) - - def aboves(self, filter_loc=''): - """获取网页上显示在当前元素上边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def belows(self, filter_loc=''): - """获取网页上显示在当前元素下边的所有元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('left', filter_loc) - - def nears(self, filter_loc=''): - """获取网页上显示在当前元素附近元素,可设置选取条件,从近到远排列 - :param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象组成的列表 - """ - return self._get_relative_eles('near', filter_loc) - - def wait_ele(self, loc_or_ele, timeout=None): - """等待子元素从dom删除、显示、隐藏 - :param loc_or_ele: 可以是元素、查询字符串、loc元组 - :param timeout: 等待超时时间 - :return: 等待是否成功 - """ - return ElementWaiter(self, loc_or_ele, timeout) - - def style(self, style, pseudo_ele=''): - """返回元素样式属性值,可获取伪元素属性值 - :param style: 样式属性名称 - :param pseudo_ele: 伪元素名称(如有) - :return: 样式属性的值 - """ - if pseudo_ele: - pseudo_ele = f', "{pseudo_ele}"' if pseudo_ele.startswith(':') else f', "::{pseudo_ele}"' - r = self.run_script(f'return window.getComputedStyle(arguments[0]{pseudo_ele}).getPropertyValue("{style}");') - - return None if r == 'none' else r - - def click(self, by_js=None, timeout=None): - """点击元素 - 尝试点击直到超时,若都失败就改用js点击 - :param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js - :param timeout: 尝试点击的超时时间,不指定则使用父页面的超时时间 - :return: 是否点击成功 - """ - - def do_it() -> bool: - try: - self.inner_ele.click() - return True - except Exception: - return False - - if not by_js: - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - click = do_it() - while not click and perf_counter() - t1 <= timeout: - click = do_it() - - if click: - return True - - # 若点击失败,用js方式点击 - if by_js is not False: - self.run_script('arguments[0].click()') - return True - - return False - - def click_at(self, x=None, y=None, by_js=False): - """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :param by_js: 是否用js点击 - :return: None - """ - if by_js: - x = self.location['x'] + int(x) if x is not None else self.location['x'] + self.size['width'] // 2 - y = self.location['y'] + int(y) if y is not None else self.location['y'] + self.size['height'] // 2 - js = f""" - var ev = document.createEvent('HTMLEvents'); - ev.clientX = {x}; - ev.clientY = {y}; - ev.initEvent('click', false, true); - arguments[0].dispatchEvent(ev); - """ - self.run_script(js) - - else: - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform() - - def r_click(self): - """右键单击""" - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).context_click(self.inner_ele).perform() - - def r_click_at(self, x=None, y=None): - """带偏移量右键单击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :return: None - """ - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - from selenium.webdriver import ActionChains - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).context_click().perform() - - def input(self, vals, clear=True, insure=True, timeout=None): - """输入文本或组合键,也可用于输入文件路径到input元素(文件间用\n间隔) - :param vals: 文本值或按键组合 - :param clear: 输入前是否清空文本框 - :param insure: 确保输入正确,解决文本框有时输入失效的问题,不能用于输入组合键 - :param timeout: 尝试输入的超时时间,不指定则使用父页面的超时时间,只在insure为True时生效 - :return: bool - """ - if not insure or self.tag != 'input' or self.prop('type') != 'text': # 普通输入 - if not isinstance(vals, (str, tuple)): - vals = str(vals) - if clear: - self.inner_ele.clear() - - self.inner_ele.send_keys(*vals) - return True - - else: # 确保输入正确 - if not isinstance(vals, str): - vals = str(vals) - enter = '\n' if vals.endswith('\n') else None - full_txt = vals if clear else f'{self.attr("value")}{vals}' - full_txt = full_txt.rstrip('\n') - - self.click(by_js=True) - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - while self.is_valid() and self.attr('value') != full_txt and perf_counter() - t1 <= timeout: - try: - if clear: - self.inner_ele.send_keys(u'\ue009', 'a', u'\ue017') # 有些ui下clear()不生效,用CTRL+a代替 - self.inner_ele.send_keys(vals) - - except Exception: - pass - - if not self.is_valid(): - return False - else: - if self.attr('value') != full_txt: - return False - else: - if enter: - self.inner_ele.send_keys(enter) - return True - - def run_script(self, script, *args): - """执行js代码,代码中用arguments[0]表示自己 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.inner_ele.parent.execute_script(script, self.inner_ele, *args) - - def submit(self): - """提交表单""" - try: - self.inner_ele.submit() - return True - except Exception: - pass - - def clear(self, insure=True): - """清空元素文本 - :param insure: 是否确保清空 - :return: 是否清空成功,不能清空的元素返回None - """ - if insure: - return self.input('') - - else: - try: - self.inner_ele.clear() - return True - except InvalidElementStateException: - return None - - def is_selected(self): - """是否选中""" - return self.inner_ele.is_selected() - - def is_enabled(self): - """是否可用""" - return self.inner_ele.is_enabled() - - def is_displayed(self): - """是否可见""" - return self.inner_ele.is_displayed() - - def is_valid(self): - """用于判断元素是否还在DOM内,应对页面跳转元素不能用的情况""" - try: - self.is_enabled() - return True - except Exception: - return False - - def screenshot(self, path=None, filename=None, as_bytes=False): - """对元素进行截图 - :param path: 保存路径 - :param filename: 图片文件名,不传入时以元素tag name命名 - :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 - :return: 图片完整路径或字节文本 - """ - # 等待元素加载完成 - if self.tag == 'img': - js = ('return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" ' - '&& arguments[0].naturalWidth > 0 && typeof arguments[0].naturalHeight != "undefined" ' - '&& arguments[0].naturalHeight > 0') - t1 = perf_counter() - while not self.run_script(js) and perf_counter() - t1 < self.page.timeout: - sleep(.1) - - if as_bytes: - return self.inner_ele.screenshot_as_png - - name = filename or self.tag - path = Path(path or '.').absolute() - path.mkdir(parents=True, exist_ok=True) - if not name.lower().endswith('.png'): - name = f'{name}.png' - - img_path = str(get_usable_path(f'{path}{sep}{name}')) - self.inner_ele.screenshot(img_path) - - return img_path - - def prop(self, prop): - """获取property属性值 - :param prop: 属性名 - :return: 属性值文本 - """ - return format_html(self.inner_ele.get_property(prop)) - - def set_prop(self, prop, value): - """设置元素property属性 - :param prop: 属性名 - :param value: 属性值 - :return: 是否设置成功 - """ - try: - value = value.replace("'", "\\'") - self.run_script(f"arguments[0].{prop}='{value}';") - return True - except Exception: - return False - - def set_attr(self, attr, value): - """设置元素attribute属性 - :param attr: 属性名 - :param value: 属性值 - :return: 是否设置成功 - """ - try: - self.run_script(f"arguments[0].setAttribute(arguments[1], arguments[2]);", attr, value) - return True - except Exception: - return False - - def remove_attr(self, attr): - """删除元素attribute属性 - :param attr: 属性名 - :return: 是否删除成功 - """ - try: - self.run_script(f'arguments[0].removeAttribute("{attr}");') - return True - except Exception: - return False - - def drag(self, x, y, speed=40, shake=True): - """拖拽当前元素到相对位置 - :param x: x变化值 - :param y: y变化值 - :param speed: 拖动的速度,传入0即瞬间到达 - :param shake: 是否随机抖动 - :return: None - """ - x += self.location['x'] + self.size['width'] // 2 - y += self.location['y'] + self.size['height'] // 2 - self.drag_to((x, y), speed, shake) - - def drag_to(self, ele_or_loc, speed=40, shake=True): - """拖拽当前元素,目标为另一个元素或坐标元组 - :param ele_or_loc: 另一个元素或坐标元组,坐标为元素中点的坐标 - :param speed: 拖动的速度,传入0即瞬间到达 - :param shake: 是否随机抖动 - :return: None - """ - # x, y:目标点坐标 - if isinstance(ele_or_loc, (DriverElement, WebElement)): - target_x = ele_or_loc.location['x'] + ele_or_loc.size['width'] // 2 - target_y = ele_or_loc.location['y'] + ele_or_loc.size['height'] // 2 - elif isinstance(ele_or_loc, tuple): - target_x, target_y = ele_or_loc - else: - raise TypeError('需要DriverElement、WebElement对象或坐标。') - - current_x = self.location['x'] + self.size['width'] // 2 - current_y = self.location['y'] + self.size['height'] // 2 - width = target_x - current_x - height = target_y - current_y - num = 0 if not speed else int(((abs(width) ** 2 + abs(height) ** 2) ** .5) // speed) - - # 将要经过的点存入列表 - points = [(int(current_x + i * (width / num)), int(current_y + i * (height / num))) for i in range(1, num)] - points.append((target_x, target_y)) - - from selenium.webdriver import ActionChains - from random import randint - actions = ActionChains(self.page.driver) - actions.click_and_hold(self.inner_ele) - - # 逐个访问要经过的点 - for x, y in points: - if shake: - x += randint(-3, 4) - y += randint(-3, 4) - actions.move_by_offset(x - current_x, y - current_y) - current_x, current_y = x, y - actions.release().perform() - - def hover(self, x=None, y=None): - """鼠标悬停,可接受偏移量,偏移量相对于元素左上角坐标。不传入x或y值时悬停在元素中点 - :param x: 相对元素左上角坐标的x轴偏移量 - :param y: 相对元素左上角坐标的y轴偏移量 - :return: None - """ - from selenium.webdriver import ActionChains - x = int(x) if x is not None else self.size['width'] // 2 - y = int(y) if y is not None else self.size['height'] // 2 - ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).perform() - - def _get_relative_eles(self, mode, loc=''): - """获取网页上相对于当前元素周围的某个元素,可设置选取条件 - :param mode: 可选:'left', 'right', 'above', 'below', 'near' - :param loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法 - :return: DriverElement对象 - """ - from selenium.webdriver.support.relative_locator import RelativeBy - - if isinstance(loc, str): - loc = str_to_loc(loc) - - try: - if mode == 'left': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_left_of(self.inner_ele)) - elif mode == 'right': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).to_right_of(self.inner_ele)) - elif mode == 'above': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).above(self.inner_ele)) - elif mode == 'below': - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).below(self.inner_ele)) - else: # 'near' - eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).near(self.inner_ele)) - - return [self.page.ele(e) for e in eles] - - except IndexError: - raise ValueError('未找到元素,请检查浏览器版本,低版本的浏览器无法使用此方法。') - - -def make_driver_ele(page_or_ele, loc, single=True, timeout=None): - """执行driver模式元素的查找 - 页面查找元素及元素查找下级元素皆使用此方法 - :param page_or_ele: DriverPage对象或DriverElement对象 - :param loc: 元素定位元组 - :param single: True则返回第一个,False则返回全部 - :param timeout: 查找元素超时时间 - :return: 返回DriverElement元素或它们组成的列表 - """ - # ---------------处理定位符--------------- - if isinstance(loc, (str, tuple)): - loc = get_loc(loc) - - elif str(type(loc)).endswith('RelativeBy'): - page = page_or_ele.page if isinstance(page_or_ele, BaseElement) else page_or_ele - driver = page.driver - eles = driver.find_elements(loc) - return DriverElement(eles[0], page) if single else [DriverElement(ele, page) for ele in eles] - - else: - raise ValueError("定位符必须为str、长度为2的tuple、或RelativeBy对象。") - - # ---------------设置 page 和 driver--------------- - if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc_str}' - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>') and isinstance(page_or_ele, DriverElement): - loc_str = f'{page_or_ele.css_path}{loc[1]}' - loc = loc[0], loc_str - - page = page_or_ele.page - driver = page_or_ele.inner_ele - - else: # 传入的是DriverPage对象 - page = page_or_ele - driver = page_or_ele.driver - - # -----------------设置等待对象----------------- - if timeout is not None and timeout != page.timeout: - wait = WebDriverWait(driver, timeout=timeout) - else: - page.wait_object._driver = driver - wait = page.wait_object - - # ---------------执行查找----------------- - try: - # 使用xpath查找 - if loc[0] == 'xpath': - return wait.until(ElementsByXpath(page, loc[1], single, timeout)) - - # 使用css selector查找 - else: - if single: - return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) - else: - eles = wait.until(ec.presence_of_all_elements_located(loc)) - return [DriverElement(ele, page) for ele in eles] - - except TimeoutException: - return [] if not single else None - - except InvalidElementStateException: - raise ValueError(f'无效的查找语句:{loc}') - - -class ElementsByXpath(object): - """用js通过xpath获取元素、节点或属性,与WebDriverWait配合使用""" - - def __init__(self, page, xpath=None, single=False, timeout=10): - """ - :param page: DrissionPage对象 - :param xpath: xpath文本 - :param single: True则返回第一个,False则返回全部 - :param timeout: 超时时间 - """ - self.page = page - self.xpath = xpath - self.single = single - self.timeout = timeout - - def __call__(self, ele_or_driver): - - def get_nodes(node=None, xpath_txt=None, type_txt='7'): - """用js通过xpath获取元素、节点或属性 - :param node: 'document' 或 元素对象 - :param xpath_txt: xpath语句 - :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate - :return: 元素对象或属性、文本字符串 - """ - node_txt = 'document' if not node or node == 'document' else 'arguments[0]' - for_txt = '' - - # 获取第一个元素、节点或属性 - if type_txt == '9': - return_txt = ''' - if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} - else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} - else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} - else{return e.singleNodeValue;} - ''' - - # 按顺序获取所有元素、节点或属性 - elif type_txt == '7': - for_txt = """ - var a=new Array(); - for(var i = 0; i 元素使用,现在是:{ele.tag}。") - - from selenium.webdriver.support.select import Select as SeleniumSelect - self.inner_ele = ele - self.select_ele = SeleniumSelect(ele.inner_ele) - - def __call__(self, text_or_index, timeout=None): - """选定下拉列表中子元素 - :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self.select(text_or_index, timeout=timeout) - - @property - def is_multi(self): - """返回是否多选表单""" - return self.select_ele.is_multiple - - @property - def options(self): - """返回所有选项元素组成的列表""" - return self.inner_ele.eles('tag:option') - - @property - def selected_option(self): - """返回第一个被选中的option元素 - :return: DriverElement对象或None - """ - ele = self.inner_ele.run_script('return arguments[0].options[arguments[0].selectedIndex];') - return None if ele is None else DriverElement(ele, self.inner_ele.page) - - @property - def selected_options(self): - """返回所有被选中的option元素列表 - :return: DriverElement对象组成的列表 - """ - return [x for x in self.options if x.is_selected()] - - def clear(self): - """清除所有已选项""" - self.select_ele.deselect_all() - - def select(self, text_or_index, timeout=None): - """选定下拉列表中子元素 - :param text_or_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: 是否选择成功 - """ - i = 'index' if isinstance(text_or_index, int) else 'text' - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(text_or_index, i, False, timeout) - - def select_by_value(self, value, timeout=None): - """此方法用于根据value值选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可选择多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(value, 'value', False, timeout) - - def deselect(self, text_or_index, timeout=None): - """取消选定下拉列表中子元素 - :param text_or_index: 根据文本或序号取消择选项,若允许多选,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - i = 'index' if isinstance(text_or_index, int) else 'text' - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(text_or_index, i, True, timeout) - - def deselect_by_value(self, value, timeout=None): - """此方法用于根据value值取消选择项。当元素是多选列表时,可以接收list或tuple - :param value: value属性值,传入list或tuple可取消多项 - :param timeout: 超时时间,不输入默认实用页面超时时间 - :return: None - """ - timeout = timeout if timeout is not None else self.inner_ele.page.timeout - return self._select(value, 'value', True, timeout) - - def invert(self): - """反选""" - if not self.is_multi: - raise NotImplementedError("只能对多项选框执行反选。") - - for i in self.options: - i.click(by_js=True) - - def _select(self, text_value_index, para_type='text', deselect=False, timeout=None): - """选定或取消选定下拉列表中子元素 - :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选 'text'、'value'、'index' - :param deselect: 是否取消选择 - :return: 是否选择成功 - """ - if not self.is_multi and isinstance(text_value_index, (list, tuple)): - raise TypeError('单选下拉列表不能传入list和tuple') - - def do_select(): - try: - if para_type == 'text': - if deselect: - self.select_ele.deselect_by_visible_text(text_value_index) - else: - self.select_ele.select_by_visible_text(text_value_index) - elif para_type == 'value': - if deselect: - self.select_ele.deselect_by_value(text_value_index) - else: - self.select_ele.select_by_value(text_value_index) - elif para_type == 'index': - if deselect: - self.select_ele.deselect_by_index(int(text_value_index)) - else: - self.select_ele.select_by_index(int(text_value_index)) - else: - raise ValueError('para_type参数只能传入"text"、"value"或"index"。') - - return True - - except NoSuchElementException: - return False - - if isinstance(text_value_index, (str, int)): - t1 = perf_counter() - ok = do_select() - while not ok and perf_counter() - t1 < timeout: - sleep(.2) - ok = do_select() - return ok - - elif isinstance(text_value_index, (list, tuple)): - return self._select_multi(text_value_index, para_type, deselect) - - else: - raise TypeError('只能传入str、int、list和tuple类型。') - - def _select_multi(self, text_value_index=None, para_type='text', deselect=False) -> bool: - """选定或取消选定下拉列表中多个子元素 - :param text_value_index: 根据文本、值选或序号择选多项 - :param para_type: 参数类型,可选 'text'、'value'、'index' - :param deselect: 是否取消选择 - :return: 是否选择成功 - """ - if para_type not in ('text', 'value', 'index'): - raise ValueError('para_type参数只能传入“text”、“value”或“index”') - - if not isinstance(text_value_index, (list, tuple)): - raise TypeError('只能传入list或tuple类型。') - - success = True - for i in text_value_index: - if not isinstance(i, (int, str)): - raise TypeError('列表只能由str或int组成') - - p = 'index' if isinstance(i, int) else para_type - if not self._select(i, p, deselect): - success = False - - return success - - -class ElementWaiter(object): - """等待元素在dom中某种状态,如删除、显示、隐藏""" - - def __init__(self, page_or_ele, loc_or_ele, timeout=None): - """等待元素在dom中某种状态,如删除、显示、隐藏 - :param page_or_ele: 页面或父元素 - :param loc_or_ele: 要等待的元素,可以是已有元素、定位符 - :param timeout: 超时时间,默认读取页面超时时间 - """ - if isinstance(page_or_ele, DriverElement): - page = page_or_ele.page - self.driver = page_or_ele.inner_ele - else: - page = page_or_ele - self.driver = page_or_ele.driver - - if isinstance(loc_or_ele, DriverElement): - self.target = loc_or_ele.inner_ele - - elif isinstance(loc_or_ele, WebElement): - self.target = loc_or_ele - - elif isinstance(loc_or_ele, str): - self.target = str_to_loc(loc_or_ele) - - elif isinstance(loc_or_ele, tuple): - self.target = loc_or_ele - - else: - raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型。') - - self.timeout = timeout if timeout is not None else page.timeout - - def delete(self): - """等待元素从dom删除""" - return self._wait_ele('del') - - def display(self): - """等待元素从dom显示""" - return self._wait_ele('display') - - def hidden(self): - """等待元素从dom隐藏""" - return self._wait_ele('hidden') - - def _wait_ele(self, mode): - """执行等待 - :param mode: 等待模式 - :return: 是否等待成功 - """ - if isinstance(self.target, WebElement): - end_time = time() + self.timeout - while time() < end_time: - if mode == 'del': - try: - self.target.is_enabled() - except Exception: - return True - - elif mode == 'display' and self.target.is_displayed(): - return True - - elif mode == 'hidden' and not self.target.is_displayed(): - return True - - return False - - else: - try: - if mode == 'del': - WebDriverWait(self.driver, self.timeout).until_not(ec.presence_of_element_located(self.target)) - - elif mode == 'display': - WebDriverWait(self.driver, self.timeout).until(ec.visibility_of_element_located(self.target)) - - elif mode == 'hidden': - WebDriverWait(self.driver, self.timeout).until_not(ec.visibility_of_element_located(self.target)) - - return True - - except Exception: - return False - - -class Scroll(object): - """用于滚动的对象""" - - def __init__(self, page_or_ele): - """ - :param page_or_ele: DriverPage或DriverElement - """ - self.driver = page_or_ele - if isinstance(page_or_ele, DriverElement): - self.t1 = self.t2 = 'arguments[0]' - else: - self.t1 = 'window' - self.t2 = 'document.documentElement' - - def to_top(self): - """滚动到顶端,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,0);') - - def to_bottom(self): - """滚动到底端,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight);') - - def to_half(self): - """滚动到垂直中间位置,水平位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollLeft,{self.t2}.scrollHeight/2);') - - def to_rightmost(self): - """滚动到最右边,垂直位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo({self.t2}.scrollWidth,{self.t2}.scrollTop);') - - def to_leftmost(self): - """滚动到最左边,垂直位置不变""" - self.driver.run_script(f'{self.t1}.scrollTo(0,{self.t2}.scrollTop);') - - def to_location(self, x, y): - """滚动到指定位置 - :param x: 水平距离 - :param y: 垂直距离 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollTo({x},{y});') - - def up(self, pixel=300): - """向上滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') - - def down(self, pixel=300): - """向下滚动若干像素,水平位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollBy(0,{pixel});') - - def left(self, pixel=300): - """向左滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - pixel = -pixel - self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') - - def right(self, pixel=300): - """向右滚动若干像素,垂直位置不变 - :param pixel: 滚动的像素 - :return: None - """ - self.driver.run_script(f'{self.t1}.scrollBy({pixel},0);') - - -def _exchange_arguments(index, filter_loc): - # 此方法用于兼容MixPage参数顺序相反的情况 - if isinstance(index, str) and isinstance(filter_loc, int): - index, filter_loc = filter_loc, index - elif isinstance(index, int) and filter_loc == 1: - filter_loc = '' - elif isinstance(filter_loc, str) and index == '': - index = 1 - return index, filter_loc diff --git a/DrissionPage/mixpage/driver_element.pyi b/DrissionPage/mixpage/driver_element.pyi deleted file mode 100644 index 98fa815..0000000 --- a/DrissionPage/mixpage/driver_element.pyi +++ /dev/null @@ -1,326 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Any, Tuple - -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.select import Select as SeleniumSelect - -from .driver_page import DriverPage -from .mix_page import MixPage -from .shadow_root_element import ShadowRootElement -from .base import DrissionElement -from .session_element import SessionElement - - -class DriverElement(DrissionElement): - - def __init__(self, ele: WebElement, page: Union[DriverPage, MixPage] = None): - self._inner_ele: WebElement = ... - self._select: Select = ... - self._scroll: Scroll = ... - self.page: Union[DriverPage, MixPage] = ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['DriverElement', str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def inner_ele(self) -> WebElement: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - def attr(self, attr: str) -> str: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True, - relative: bool = False) -> Union['DriverElement', str, None, List[Union['DriverElement', str]]]: ... - - def _get_ele_path(self, mode) -> str: ... - - # -----------------driver独有属性和方法------------------- - @property - def size(self) -> dict: ... - - @property - def location(self) -> dict: ... - - @property - def shadow_root(self) -> ShadowRootElement: ... - - @property - def sr(self) -> ShadowRootElement: ... - - @property - def pseudo_before(self) -> str: ... - - @property - def pseudo_after(self) -> str: ... - - @property - def select(self) -> Select: ... - - @property - def scroll(self) -> Scroll: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['DriverElement', None]: ... - - def prev(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union['DriverElement', str, None]: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> Union['DriverElement', str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> Union['DriverElement', str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union['DriverElement', str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = 0) -> List[Union['DriverElement', str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['DriverElement', str]]: ... - - def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> DriverElement: ... - - def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement': ... - - def lefts(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def rights(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def aboves(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def belows(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def nears(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']: ... - - def wait_ele(self, - loc_or_ele: Union[str, tuple, DrissionElement, WebElement], - timeout: float = None) -> 'ElementWaiter': ... - - def style(self, style: str, pseudo_ele: str = '') -> str: ... - - def click(self, by_js: bool = None, timeout: float = None) -> bool: ... - - def click_at(self, - x: Union[int, str] = None, - y: Union[int, str] = None, - by_js: bool = False) -> None: ... - - def r_click(self) -> None: ... - - def r_click_at(self, x: Union[int, str] = None, y: Union[int, str] = None) -> None: ... - - def input(self, - vals: Union[str, tuple], - clear: bool = True, - insure: bool = True, - timeout: float = None) -> bool: ... - - def run_script(self, script: str, *args) -> Any: ... - - def submit(self) -> Union[bool, None]: ... - - def clear(self, insure: bool = True) -> Union[None, bool]: ... - - def is_selected(self) -> bool: ... - - def is_enabled(self) -> bool: ... - - def is_displayed(self) -> bool: ... - - def is_valid(self) -> bool: ... - - def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... - - def prop(self, prop: str) -> str: ... - - def set_prop(self, prop: str, value: str) -> bool: ... - - def set_attr(self, attr: str, value: str) -> bool: ... - - def remove_attr(self, attr: str) -> bool: ... - - def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> None: ... - - def drag_to(self, - ele_or_loc: Union[tuple, WebElement, DrissionElement], - speed: int = 40, - shake: bool = True) -> None: ... - - def hover(self, x: int = None, y: int = None) -> None: ... - - def _get_relative_eles(self, - mode: str, - loc: Union[tuple, str] = '') -> Union[List['DriverElement'], 'DriverElement']: ... - - -def make_driver_ele(page_or_ele: Union[DriverPage, MixPage, DriverElement, ShadowRootElement], - loc: Union[str, Tuple[str, str]], - single: bool = True, - timeout: float = None) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - -class ElementsByXpath(object): - - def __init__(self, page, xpath: str = None, single: bool = False, timeout: float = 10): - self.single: bool = ... - self.xpath: str = ... - self.page: Union[MixPage, DriverPage] = ... - - def __call__(self, ele_or_driver: Union[RemoteWebDriver, WebElement]) \ - -> Union[str, DriverElement, None, List[str or DriverElement]]: ... - - -class Select(object): - - def __init__(self, ele: DriverElement): - self.select_ele: SeleniumSelect = ... - self.inner_ele: DriverElement = ... - - def __call__(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - @property - def is_multi(self) -> bool: ... - - @property - def options(self) -> List[DriverElement]: ... - - @property - def selected_option(self) -> Union[DriverElement, None]: ... - - @property - def selected_options(self) -> List[DriverElement]: ... - - def clear(self) -> None: ... - - def select(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - def select_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def deselect(self, text_or_index: Union[str, int, list, tuple], timeout: float = None) -> bool: ... - - def deselect_by_value(self, value: Union[str, list, tuple], timeout: float = None) -> bool: ... - - def invert(self) -> None: ... - - def _select(self, - text_value_index: Union[str, int, list, tuple] = ..., - para_type: str = 'text', - deselect: bool = False, - timeout: float = None) -> bool: ... - - def _select_multi(self, - text_value_index: Union[list, tuple] = None, - para_type: str = 'text', - deselect: bool = False) -> bool: ... - - -class ElementWaiter(object): - - def __init__(self, - page_or_ele, - loc_or_ele: Union[str, tuple, DriverElement, WebElement], - timeout: float = None): - self.target: Union[DriverElement, WebElement, tuple] = ... - self.timeout: float = ... - self.driver: Union[WebElement, RemoteWebDriver] = ... - - def delete(self) -> bool: ... - - def display(self) -> bool: ... - - def hidden(self) -> bool: ... - - def _wait_ele(self, mode: str) -> bool: ... - - -class Scroll(object): - - def __init__(self, page_or_ele): - self.driver: Union[DriverElement, DriverPage] = ... - self.t1: str = ... - self.t2: str = ... - - def to_top(self) -> None: ... - - def to_bottom(self) -> None: ... - - def to_half(self) -> None: ... - - def to_rightmost(self) -> None: ... - - def to_leftmost(self) -> None: ... - - def to_location(self, x: int, y: int) -> None: ... - - def up(self, pixel: int = 300) -> None: ... - - def down(self, pixel: int = 300) -> None: ... - - def left(self, pixel: int = 300) -> None: ... - - def right(self, pixel: int = 300) -> None: ... diff --git a/DrissionPage/mixpage/driver_page.py b/DrissionPage/mixpage/driver_page.py deleted file mode 100644 index 0779b4b..0000000 --- a/DrissionPage/mixpage/driver_page.py +++ /dev/null @@ -1,611 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from glob import glob -from os import sep -from pathlib import Path -from time import sleep, perf_counter - -from selenium.common.exceptions import NoAlertPresentException -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.wait import WebDriverWait - -from .base import BasePage -from .driver_element import DriverElement, make_driver_ele, Scroll, ElementWaiter -from .session_element import make_session_ele -from ..commons.tools import get_usable_path - - -class DriverPage(BasePage): - """DriverPage封装了页面操作的常用功能,使用selenium来获取、解析、操作网页""" - - def __init__(self, driver, timeout=10): - """初始化函数,接收一个WebDriver对象,用来操作网页""" - super().__init__(timeout) - self._driver = driver - self._wait_object = None - self._scroll = None - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele = page('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def url(self): - """返回当前网页url""" - if not self._driver or not self.driver.current_url.startswith('http'): - return None - else: - return self.driver.current_url - - @property - def html(self): - """返回页面的html文本""" - return self.driver.find_element('xpath', "//*").get_attribute("outerHTML") - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典""" - from json import loads - return loads(self('t:pre').text) - - def get(self, url, show_errmsg=False, retry=None, interval=None): - """访问url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :return: 目标url是否可用,返回None表示不确定 - """ - retry, interval = self._before_connect(url, retry, interval) - self._url_available = self._d_connect(self._url, times=retry, interval=interval, show_errmsg=show_errmsg) - return self._url_available - - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: DriverElement对象或属性、文本 - """ - return self._ele(loc_or_ele, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: DriverElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout, single=False) - - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - if isinstance(loc_or_ele, DriverElement): - return make_session_ele(loc_or_ele) - else: - return make_session_ele(self, loc_or_ele) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象组成的列表 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :return: DriverElement对象 - """ - # 接收到字符串或元组,获取定位loc元组 - if isinstance(loc_or_ele, (str, tuple)): - return make_driver_ele(self, loc_or_ele, single, timeout) - - # 接收到DriverElement对象直接返回 - elif isinstance(loc_or_ele, DriverElement): - return loc_or_ele - - # 接收到WebElement对象打包成DriverElement对象返回 - elif isinstance(loc_or_ele, WebElement): - return DriverElement(loc_or_ele, self) - - # 接收到的类型不正确,抛出异常 - else: - raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 WebElement类型。') - - def get_cookies(self, as_dict=False): - """返回当前网站cookies""" - if as_dict: - return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()} - else: - return self.driver.get_cookies() - - @property - def timeout(self): - """返回查找元素时等待的秒数""" - return self._timeout - - @timeout.setter - def timeout(self, second): - """设置查找元素时等待的秒数""" - self._timeout = second - self._wait_object = None - - def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False): - """尝试连接,重试若干次 - :param to_url: 要访问的url - :param times: 重试次数 - :param interval: 重试间隔(秒) - :param show_errmsg: 是否抛出异常 - :return: 是否成功,返回None表示不确定 - """ - err = None - is_ok = False - - for _ in range(times + 1): - try: - self.driver.get(to_url) - go_ok = True - except Exception as e: - err = e - go_ok = False - - is_ok = self.check_page() if go_ok else False - - if is_ok is not False: - break - - if _ < times: - sleep(interval) - if show_errmsg: - print(f'重试 {to_url}') - - if is_ok is False and show_errmsg: - raise err if err is not None else ConnectionError('连接异常。') - - return is_ok - - # ----------------driver独有属性和方法----------------------- - @property - def driver(self): - return self._driver - - @property - def wait_object(self): - """返回WebDriverWait对象,重用避免每次新建对象""" - if self._wait_object is None: - self._wait_object = WebDriverWait(self.driver, timeout=self.timeout) - - return self._wait_object - - @property - def timeouts(self): - """返回三种超时时间,selenium4以上版本可用""" - return {'implicit': self.timeout, - 'pageLoad': self.driver.timeouts.page_load, - 'script': self.driver.timeouts.script} - - @property - def tabs_count(self): - """返回标签页数量""" - try: - return len(self.driver.window_handles) - except Exception: - return 0 - - @property - def tab_handles(self): - """返回所有标签页handle列表""" - return self.driver.window_handles - - @property - def current_tab_index(self): - """返回当前标签页序号""" - return self.driver.window_handles.index(self.driver.current_window_handle) - - @property - def current_tab_handle(self): - """返回当前标签页handle""" - return self.driver.current_window_handle - - @property - def active_ele(self): - """返回当前焦点所在元素""" - return DriverElement(self.driver.switch_to.active_element, self) - - @property - def scroll(self): - """用于滚动滚动条的对象""" - if self._scroll is None: - self._scroll = Scroll(self) - return self._scroll - - @property - def to_frame(self): - """用于跳转到frame的对象,调用其方法实现跳转 - 示例: - page.to_frame.by_loc('tag:iframe') - 通过传入frame的查询字符串定位 - page.to_frame.by_loc((By.TAG_NAME, 'iframe')) - 通过传入定位符定位 - page.to_frame.by_id('iframe_id') - 通过frame的id属性定位 - page.to_frame('iframe_name') - 通过frame的name属性定位 - page.to_frame(iframe_element) - 通过传入元素对象定位 - page.to_frame(0) - 通过frame的序号定位 - page.to_frame.main() - 跳到最顶层 - page.to_frame.parent() - 跳到上一层 - """ - return ToFrame(self) - - def set_timeouts(self, implicit=None, pageLoad=None, script=None): - """设置超时时间,单位为秒,selenium4以上版本有效 - :param implicit: 查找元素超时时间 - :param pageLoad: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: None - """ - if implicit is not None: - self.timeout = implicit - - if pageLoad is not None: - self.driver.set_page_load_timeout(pageLoad) - - if script is not None: - self.driver.set_script_timeout(script) - - def wait_ele(self, loc_or_ele, timeout=None): - """等待元素从dom删除、显示、隐藏 - :param loc_or_ele: 可以是元素、查询字符串、loc元组 - :param timeout: 等待超时时间 - :return: 用于等待的ElementWaiter对象 - """ - return ElementWaiter(self, loc_or_ele, timeout) - - def check_page(self): - """检查页面是否符合预期 - 由子类自行实现各页面的判定规则 - """ - return None - - def run_script(self, script, *args): - """执行js代码 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.driver.execute_script(script, *args) - - def run_async_script(self, script, *args): - """以异步方式执行js代码 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - return self.driver.execute_async_script(script, *args) - - def run_cdp(self, cmd, **cmd_args): - """执行Chrome DevTools Protocol语句 - :param cmd: 协议项目 - :param cmd_args: 参数 - :return: 执行的结果 - """ - return self.driver.execute_cdp_cmd(cmd, cmd_args) - - def create_tab(self, url=''): - """新建并定位到一个标签页,该标签页在最后面 - :param url: 新标签页跳转到的网址 - :return: None - """ - self.driver.switch_to.new_window('tab') - if url: - self.get(url) - - def close_tabs(self, num_or_handles=None): - """关闭传入的标签页,默认关闭当前页。可传入多个 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 - :param num_or_handles:要关闭的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时关闭当前页 - :return: None - """ - tabs = (self.current_tab_handle,) if num_or_handles is None else get_handles(self.tab_handles, num_or_handles) - for i in tabs: - self.driver.switch_to.window(i) - self.driver.close() - - self.to_tab(0) - - def close_other_tabs(self, num_or_handles=None): - """关闭传入的标签页以外标签页,默认保留当前页。可传入多个 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致,不能按序号关闭。 - :param num_or_handles: 要保留的标签页序号或handle,可传入handle和序号组成的列表或元组,为None时保存当前页 - :return: None - """ - all_tabs = self.driver.window_handles - reserve_tabs = {self.current_tab_handle} if num_or_handles is None else get_handles(all_tabs, num_or_handles) - - for i in set(all_tabs) - reserve_tabs: - self.driver.switch_to.window(i) - self.driver.close() - - self.to_tab(0) - - def to_tab(self, num_or_handle=0): - """跳转到标签页 - 注意:当程序使用的是接管的浏览器,获取到的 handle 顺序和视觉效果不一致 - :param num_or_handle: 标签页序号或handle字符串,序号第一个为0,最后为-1 - :return: None - """ - try: - tab = int(num_or_handle) - except (ValueError, TypeError): - tab = num_or_handle - - tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab - self.driver.switch_to.window(tab) - - def set_ua_to_tab(self, ua): - """为当前tab设置user agent,只在当前tab有效 - :param ua: user agent字符串 - :return: None - """ - self.driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": ua}) - - def get_session_storage(self, item=None): - """获取sessionStorage信息,不设置item则获取全部 - :param item: 要获取的项,不设置则返回全部 - :return: sessionStorage一个或所有项内容 - """ - js = f'return sessionStorage.getItem("{item}");' if item else 'return sessionStorage;' - return self.run_script(js) - - def get_local_storage(self, item=None): - """获取localStorage信息,不设置item则获取全部 - :param item: 要获取的项目,不设置则返回全部 - :return: localStorage一个或所有项内容 - """ - js = f'return localStorage.getItem("{item}");' if item else 'return localStorage;' - return self.run_script(js) - - def set_session_storage(self, item, value): - """设置或删除某项sessionStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - s = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");' - self.run_script(s) - - def set_local_storage(self, item, value): - """设置或删除某项localStorage信息 - :param item: 要设置的项 - :param value: 项的值,设置为False时,删除该项 - :return: None - """ - s = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");' - self.run_script(s) - - def clean_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): - """清除缓存,可选要清除的项 - :param session_storage: 是否清除sessionStorage - :param local_storage: 是否清除localStorage - :param cache: 是否清除cache - :param cookies: 是否清除cookies - :return: None - """ - if session_storage: - self.run_script('sessionStorage.clear();') - if local_storage: - self.run_script('localStorage.clear();') - if cache: - self.run_cdp('Network.clearBrowserCache') - if cookies: - self.run_cdp('Network.clearBrowserCookies') - - def screenshot(self, path=None, filename=None, as_bytes=False): - """截取页面可见范围截图 - :param path: 保存路径 - :param filename: 图片文件名,不传入时以页面title命名 - :param as_bytes: 是否已字节形式返回图片,为True时上面两个参数失效 - :return: 图片完整路径或字节文本 - """ - if as_bytes: - return self.driver.get_screenshot_as_png() - - name = filename or self.title - if not name.lower().endswith('.png'): - name = f'{name}.png' - path = Path(path or '.').absolute() - path.mkdir(parents=True, exist_ok=True) - img_path = str(get_usable_path(f'{path}{sep}{name}')) - self.driver.save_screenshot(img_path) - return img_path - - def scroll_to_see(self, loc_or_ele): - """滚动页面直到元素可见 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串(详见ele函数注释) - :return: None - """ - ele = self.ele(loc_or_ele) - ele.run_script("arguments[0].scrollIntoView();") - - def refresh(self): - """刷新当前页面""" - self.driver.refresh() - - def stop_loading(self): - """强制停止页面加载""" - self.run_cdp('Page.stopLoading') - - def back(self): - """在浏览历史中后退一步""" - self.driver.back() - - def forward(self): - """在浏览历史中前进一步""" - self.driver.forward() - - def set_window_size(self, width=None, height=None): - """设置浏览器窗口大小,默认最大化,任一参数为0最小化 - :param width: 浏览器窗口高 - :param height: 浏览器窗口宽 - :return: None - """ - if width is None and height is None: - self.driver.maximize_window() - - elif width == 0 or height == 0: - self.driver.minimize_window() - - else: - if width < 0 or height < 0: - raise ValueError('x 和 y参数必须大于0。') - - new_x = width or self.driver.get_window_size()['width'] - new_y = height or self.driver.get_window_size()['height'] - self.driver.set_window_size(new_x, new_y) - - def chrome_downloading(self, download_path): - """返回浏览器下载中的文件列表 - :param download_path: 下载文件夹路径 - :return: 文件列表 - """ - return glob(f'{download_path}{sep}*.crdownload') - - def process_alert(self, ok=True, send=None, timeout=None): - """处理提示框 - :param ok: True表示确认,False表示取消,其它值不会按按钮但依然返回文本值 - :param send: 处理prompt提示框时可输入文本 - :param timeout: 等待提示框出现的超时时间 - :return: 提示框内容文本,未等到提示框则返回None - """ - - def do_it(): - try: - return self.driver.switch_to.alert - except NoAlertPresentException: - return False - - timeout = timeout if timeout is not None else self.timeout - t1 = perf_counter() - alert = do_it() - while alert is False and perf_counter() - t1 <= timeout: - alert = do_it() - - if alert is False: - return None - - res_text = alert.text - - if send is not None: - alert.send_keys(send) - - if ok is True: - alert.accept() - elif ok is False: - alert.dismiss() - - return res_text - - -class ToFrame(object): - """用于处理焦点跳转到页面框架的类""" - - def __init__(self, page): - self.page = page - - def __call__(self, condition='main'): - """跳转到(i)frame,可传入id、name、序号、元素对象、定位符 - :param condition: (i)frame,可传入id、name、序号、元素对象、定位符 - :return: 当前页面对象 - """ - if isinstance(condition, (DriverElement, WebElement)): - self.by_ele(condition) - elif isinstance(condition, int): - self.by_index(condition) - elif ':' not in condition and '=' not in condition and not condition.startswith(('#', '.', '@')): - self.by_id(condition) - else: - self.by_loc(condition) - - return self.page - - def main(self): - """焦点跳转到最高层级框架""" - self.page.driver.switch_to.default_content() - return self.page - - def parent(self, level=1): - """焦点跳转到上级框架,可指定上级层数 - :param level: 上面第几层框架 - :return: 框架所在页面对象 - """ - if level < 1: - raise ValueError('level参数须是大于0的整数。') - for _ in range(level): - self.page.driver.switch_to.parent_frame() - return self.page - - def by_id(self, id_): - """焦点跳转到id为该值的(i)frame - :param id_: (i)frame的id属性值 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(id_) - return self.page - - def by_name(self, name): - """焦点跳转到name为该值的(i)frame - :param name: (i)frame的name属性值 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(name) - return self.page - - def by_index(self, index): - """焦点跳转到页面中第几个(i)frame - :param index: 页面中第几个(i)frame - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(index) - return self.page - - def by_loc(self, loc): - """焦点跳转到根据定位符获取到的(i)frame - :param loc: 定位符,支持selenium原生和DriverPage定位符 - :return: 框架所在页面对象 - """ - self.page.driver.switch_to.frame(self.page(loc).inner_ele) - return self.page - - def by_ele(self, ele): - """焦点跳转到传入的(i)frame元素对象 - :param ele: (i)frame元素对象 - :return: 框架所在页面对象 - """ - if isinstance(ele, DriverElement): - ele = ele.inner_ele - self.page.driver.switch_to.frame(ele) - return self.page - - -def get_handles(handles, num_or_handles): - """返回指定标签页组成的set - :param handles: handles列表 - :param num_or_handles: 指定的标签页,可以是多个 - :return: 指定标签页组成的set - """ - if isinstance(num_or_handles, (int, str)): - num_or_handles = (num_or_handles,) - elif not isinstance(num_or_handles, (list, tuple)): - raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。') - - return set(i if isinstance(i, str) else handles[i] for i in num_or_handles) diff --git a/DrissionPage/mixpage/driver_page.pyi b/DrissionPage/mixpage/driver_page.pyi deleted file mode 100644 index 542a402..0000000 --- a/DrissionPage/mixpage/driver_page.pyi +++ /dev/null @@ -1,189 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Any, Tuple - -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.wait import WebDriverWait - -from .base import BasePage -from .driver_element import DriverElement, Scroll, ElementWaiter -from .mix_page import MixPage -from .session_element import SessionElement - - -class DriverPage(BasePage): - - def __init__(self, driver: RemoteWebDriver, timeout: float = 10) -> None: - self._driver: RemoteWebDriver = ... - self._url: str = ... - self._wait_object: WebDriverWait = ... - self._scroll: Scroll = ... - - def __call__(self, loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def url(self) -> Union[str, None]: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> dict: ... - - def get(self, - url: str, - show_errmsg: bool = False, - retry: int = None, - interval: float = None) -> Union[None, bool]: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, str]]: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], - timeout: float = None, - single: bool = True) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ... - - @property - def timeout(self) -> float: ... - - @timeout.setter - def timeout(self, second: float) -> None: ... - - def _d_connect(self, - to_url: str, - times: int = 0, - interval: float = 1, - show_errmsg: bool = False) -> Union[bool, None]: ... - - # ----------------driver独有属性和方法----------------------- - @property - def driver(self) -> WebDriver: ... - - @property - def wait_object(self) -> WebDriverWait: ... - - @property - def timeouts(self) -> dict: ... - - @property - def tabs_count(self) -> int: ... - - @property - def tab_handles(self) -> list: ... - - @property - def current_tab_index(self) -> int: ... - - @property - def current_tab_handle(self) -> str: ... - - @property - def active_ele(self) -> DriverElement: ... - - @property - def scroll(self) -> Scroll: ... - - @property - def to_frame(self) -> ToFrame: ... - - def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> None: ... - - def wait_ele(self, - loc_or_ele: Union[str, tuple, DriverElement, WebElement], - timeout: float = None) -> ElementWaiter: ... - - def check_page(self) -> Union[bool, None]: ... - - def run_script(self, script: str, *args) -> Any: ... - - def run_async_script(self, script: str, *args) -> Any: ... - - def run_cdp(self, cmd: str, **cmd_args) -> Any: ... - - def create_tab(self, url: str = '') -> None: ... - - def close_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... - - def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: ... - - def to_tab(self, num_or_handle: Union[int, str] = 0) -> None: ... - - def set_ua_to_tab(self, ua: str) -> None: ... - - def get_session_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def get_local_storage(self, item: str = None) -> Union[str, dict, None]: ... - - def set_session_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def set_local_storage(self, item: str, value: Union[str, bool]) -> None: ... - - def clean_cache(self, - session_storage: bool = True, - local_storage: bool = True, - cache: bool = True, - cookies: bool = True) -> None: ... - - def screenshot(self, path: str = None, filename: str = None, as_bytes: bool = False) -> Union[str, bytes]: ... - - def scroll_to_see(self, loc_or_ele: Union[str, tuple, WebElement, DriverElement]) -> None: ... - - def refresh(self) -> None: ... - - def stop_loading(self) -> None: ... - - def back(self) -> None: ... - - def forward(self) -> None: ... - - def set_window_size(self, width: int = None, height: int = None) -> None: ... - - def chrome_downloading(self, download_path: str) -> list: ... - - def process_alert(self, ok: bool = True, send: str = None, timeout: float = None) -> Union[str, None]: ... - - -class ToFrame(object): - - def __init__(self, page: DriverPage): - self.page: DriverPage = ... - - def __call__(self, condition: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> Union[ - DriverPage, MixPage]: ... - - def main(self) -> DriverPage: ... - - def parent(self, level: int = 1) -> DriverPage: ... - - def by_id(self, id_: str) -> DriverPage: ... - - def by_name(self, name: str) -> DriverPage: ... - - def by_index(self, index: int) -> DriverPage: ... - - def by_loc(self, loc: Union[str, tuple]) -> DriverPage: ... - - def by_ele(self, ele: Union[DriverElement, WebElement]) -> DriverPage: ... - - -def get_handles(handles: list, num_or_handles: Union[int, str, list, tuple]) -> set: ... diff --git a/DrissionPage/mixpage/mix_page.py b/DrissionPage/mixpage/mix_page.py deleted file mode 100644 index 64312d7..0000000 --- a/DrissionPage/mixpage/mix_page.py +++ /dev/null @@ -1,344 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from .base import BasePage -from .drission import Drission -from .driver_page import DriverPage -from .session_page import SessionPage - - -class MixPage(SessionPage, DriverPage, BasePage): - """MixPage整合了DriverPage和SessionPage,封装了对页面的操作, - 可在selenium(d模式)和requests(s模式)间无缝切换。 - 切换的时候会自动同步cookies。 - 获取信息功能为两种模式共有,操作页面元素功能只有d模式有。 - 调用某种模式独有的功能,会自动切换到该模式。 - """ - - def __init__(self, mode='d', drission=None, timeout=None, driver_options=None, session_options=None): - """初始化函数 - :param mode: 'd' 或 's',即driver模式和session模式 - :param drission: Drission对象,不传入时会自动创建,有传入时driver_options和session_options参数无效 - :param timeout: 超时时间,d模式时为寻找元素时间,s模式时为连接时间,默认10秒 - :param driver_options: 浏览器设置,没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象,传入False则不创建 - :param session_options: requests设置,没传入drission参数时会用这个设置新建Drission对象中的Session对象,传入False则不创建 - """ - self._mode = mode.lower() - if self._mode not in ('s', 'd'): - raise ValueError('mode参数只能是s或d。') - - super(DriverPage, self).__init__(timeout) - self._driver, self._session = (None, True) if self._mode == 's' else (True, None) - self._drission = drission or Drission(driver_options, session_options) - self._wait_object = None - self._response = None - self._scroll = None - self._download_set = None - self._download_path = None - - if self._mode == 'd': - try: - timeouts = self.drission.driver_options.timeouts - t = timeout if isinstance(timeout, (int, float)) else timeouts['implicit'] - self.set_timeouts(t, timeouts['pageLoad'], timeouts['script']) - - except Exception: - self.timeout = timeout if timeout is not None else 10 - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele = page('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: 子元素对象或属性文本 - """ - if self._mode == 's': - return super().__call__(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).__call__(loc_or_str, timeout) - - # -----------------共有属性和方法------------------- - @property - def url(self): - """返回当前url""" - if self._mode == 'd': - return self._drission.driver.current_url if self._driver else None - elif self._mode == 's': - return self._session_url - - @property - def title(self): - """返回网页title""" - if self._mode == 's': - return super().title - elif self._mode == 'd': - return super(SessionPage, self).title - - @property - def html(self): - """返回页面html文本""" - if self._mode == 's': - return super().html - elif self._mode == 'd': - return super(SessionPage, self).html - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典""" - if self._mode == 's': - return super().json - elif self._mode == 'd': - return super(SessionPage, self).json - - def get(self, url, show_errmsg=False, retry=None, interval=None, **kwargs): - """跳转到一个url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数,s模式专用 - :return: url是否可用,d模式返回None时表示不确定 - """ - if self._mode == 'd': - return super(SessionPage, self).get(url, show_errmsg, retry, interval) - elif self._mode == 's': - return super().get(url, show_errmsg, retry, interval, **kwargs) - - def ele(self, loc_or_ele, timeout=None): - """返回第一个符合条件的元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: 元素对象或属性、文本节点文本 - """ - if self._mode == 's': - return super().ele(loc_or_ele) - elif self._mode == 'd': - return super(SessionPage, self).ele(loc_or_ele, timeout=timeout) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与页面等待时间一致 - :return: 元素对象或属性、文本组成的列表 - """ - if self._mode == 's': - return super().eles(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).eles(loc_or_str, timeout=timeout) - - def s_ele(self, loc_or_ele=None): - """查找第一个符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 - :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - if self._mode == 's': - return super().s_ele(loc_or_ele) - elif self._mode == 'd': - return super(SessionPage, self).s_ele(loc_or_ele) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement形式返回,d模式处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本组成的列表 - """ - if self._mode == 's': - return super().s_eles(loc_or_str) - elif self._mode == 'd': - return super(SessionPage, self).s_eles(loc_or_str) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 查找元素超时时间,d模式专用 - :param single: True则返回第一个,False则返回全部 - :return: 元素对象或属性、文本节点文本 - """ - if self._mode == 's': - return super()._ele(loc_or_ele, single=single) - elif self._mode == 'd': - return super(SessionPage, self)._ele(loc_or_ele, timeout=timeout, single=single) - - def get_cookies(self, as_dict=False, all_domains=False): - """返回cookies - :param as_dict: 是否以字典方式返回 - :param all_domains: 是否返回所有域的cookies - :return: cookies信息 - """ - if self._mode == 's': - return super().get_cookies(as_dict, all_domains) - elif self._mode == 'd': - return super(SessionPage, self).get_cookies(as_dict) - - # ----------------MixPage独有属性和方法----------------------- - @property - def drission(self): - """返回当前使用的 Dirssion 对象""" - return self._drission - - @property - def driver(self): - """返回 driver 对象,如没有则创建 - 每次访问时切换到 d 模式,用于独有函数及外部调用 - :return: WebDriver对象 - """ - self.change_mode('d') - return self._drission.driver - - @property - def session(self): - """返回 Session 对象,如没有则创建""" - return self._drission.session - - @property - def response(self): - """返回 s 模式获取到的 Response 对象,切换到 s 模式""" - self.change_mode('s') - return self._response - - @property - def mode(self): - """返回当前模式,'s'或'd' """ - return self._mode - - @property - def _session_url(self): - """返回 session 保存的url""" - return self._response.url if self._response else None - - def change_mode(self, mode=None, go=True, copy_cookies=True): - """切换模式,接收's'或'd',除此以外的字符串会切换为 d 模式 - 切换时会把当前模式的cookies复制到目标模式 - 切换后,如果go是True,调用相应的get函数使访问的页面同步 - 注意:s转d时,若浏览器当前网址域名和s模式不一样,必须会跳转 - :param mode: 模式字符串 - :param go: 是否跳转到原模式的url - :param copy_cookies: 是否复制cookies到目标模式 - """ - if mode is not None and mode.lower() == self._mode: - return - - self._mode = 's' if self._mode == 'd' else 'd' - - # s模式转d模式 - if self._mode == 'd': - self._driver = True - self._url = None if not self._driver else self._drission.driver.current_url - - if self._session_url: - if copy_cookies: - self.cookies_to_driver(self._session_url) - - if go: - self.get(self._session_url) - - # d模式转s模式 - elif self._mode == 's': - self._session = True - self._url = self._session_url - - if self._driver: - if copy_cookies: - self.cookies_to_session() - - if go and self._drission.driver.current_url.startswith('http'): - self.get(self._drission.driver.current_url) - - def set_cookies(self, cookies, refresh=True): - """设置cookies - :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict - :param refresh: 设置cookies后是否刷新页面 - :return: None - """ - if self._mode == 's': - self.drission.set_cookies(cookies, set_session=True) - elif self._mode == 'd': - self.drission.set_cookies(cookies, set_driver=True) - if refresh: - self.refresh() - - def cookies_to_session(self, copy_user_agent=False): - """从driver复制cookies到session - :param copy_user_agent : 是否复制user agent信息 - """ - self._drission.cookies_to_session(copy_user_agent) - - def cookies_to_driver(self, url=None): - """从session复制cookies到driver - chrome需要指定域才能接收cookies - :param url: 目标域 - :return: None - """ - url = url or self._session_url - self._drission.cookies_to_driver(url) - - def check_page(self, by_requests=False): - """d模式时检查网页是否符合预期 - 默认由response状态检查,可重载实现针对性检查 - :param by_requests: 是否用内置response检查 - :return: bool或None,None代表不知道结果 - """ - if self._session_url and self._session_url == self.url: - return self._response.ok - - # 使用requests访问url并判断可用性 - if by_requests: - self.cookies_to_session() - r = self._make_response(self.url, retry=0)[0] - return r.ok if r else False - - def close_driver(self): - """关闭driver及浏览器""" - self._driver = None - self.drission.close_driver(True) - - def close_session(self): - """关闭session""" - self._session = None - self._response = None - self.drission.close_session() - - # ----------------重写SessionPage的函数----------------------- - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url,会切换到s模式 - :param url: 目标url - :param data: post方式时提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - self.change_mode('s', go=False) - return super().post(url, data, show_errmsg, retry, interval, **kwargs) - - @property - def download(self): - """返回下载器对象""" - if self.mode == 'd': - self.cookies_to_session() - return super().download - - def chrome_downloading(self, path=None): - """返回浏览器下载中的文件列表 - :param path: 下载文件夹路径,默认读取配置信息 - :return: 正在下载的文件列表 - """ - try: - path = path or self._drission.driver_options.experimental_options['prefs']['download.default_directory'] - if not path: - raise ValueError('未指定下载路径。') - except Exception: - raise IOError('无法找到下载路径。') - - return super().chrome_downloading(path) - - # ----------------MixPage独有函数----------------------- - def hide_browser(self): - """隐藏浏览器窗口""" - self.drission.hide_browser() - - def show_browser(self): - """显示浏览器窗口""" - self.drission.show_browser() diff --git a/DrissionPage/mixpage/mix_page.pyi b/DrissionPage/mixpage/mix_page.pyi deleted file mode 100644 index beea82e..0000000 --- a/DrissionPage/mixpage/mix_page.pyi +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Tuple, Any - -from DownloadKit import DownloadKit -from requests import Response, Session -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options -from selenium.webdriver.chrome.webdriver import WebDriver -from selenium.webdriver.remote.webelement import WebElement - -from .base import BasePage -from DrissionPage.configs.session_options import SessionOptions -from DrissionPage.configs.driver_options import DriverOptions -from .drission import Drission -from .driver_element import DriverElement -from .driver_page import DriverPage -from .session_element import SessionElement -from .session_page import SessionPage - - -class MixPage(SessionPage, DriverPage, BasePage): - - def __init__(self, - mode: str = 'd', - drission: Union[Drission, str] = None, - timeout: float = None, - driver_options: Union[Options, DriverOptions, bool] = None, - session_options: Union[dict, SessionOptions, bool] = None) -> None: - self._mode: str = ... - self._drission: Drission = ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def url(self) -> Union[str, None]: ... - - @property - def title(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> dict: ... - - def get(self, - url: str, - show_errmsg: bool | None = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - data: Union[dict, str, None] = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> Union[bool, None]: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None) -> Union[DriverElement, SessionElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, SessionElement, str]]: ... - - def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], - timeout: float = None, single: bool = False) \ - -> Union[DriverElement, SessionElement, str, None, List[Union[SessionElement, str]], List[ - Union[DriverElement, str]]]: ... - - def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: ... - - # ----------------MixPage独有属性和方法----------------------- - @property - def drission(self) -> Drission: ... - - @property - def driver(self) -> WebDriver: ... - - @property - def session(self) -> Session: ... - - @property - def response(self) -> Response: ... - - @property - def mode(self) -> str: ... - - @property - def _session_url(self) -> str: ... - - def change_mode(self, mode: str = None, go: bool = True, copy_cookies: bool = True) -> None: ... - - def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict], refresh: bool = True) -> None: ... - - def cookies_to_session(self, copy_user_agent: bool = False) -> None: ... - - def cookies_to_driver(self, url: str = None) -> None: ... - - def check_page(self, by_requests: bool = False) -> Union[bool, None]: ... - - def close_driver(self) -> None: ... - - def close_session(self) -> None: ... - - # ----------------重写SessionPage的函数----------------------- - def post(self, - url: str, - data: Union[dict, str, None] = None, - show_errmsg: bool = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - @property - def download(self) -> DownloadKit: ... - - def chrome_downloading(self, path: str = None) -> list: ... - - # ----------------MixPage独有函数----------------------- - def hide_browser(self) -> None: ... - - def show_browser(self) -> None: ... diff --git a/DrissionPage/mixpage/session_element.py b/DrissionPage/mixpage/session_element.py deleted file mode 100644 index a2eb6d0..0000000 --- a/DrissionPage/mixpage/session_element.py +++ /dev/null @@ -1,357 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from html import unescape -from re import match, DOTALL - -from lxml.etree import tostring -from lxml.html import HtmlElement, fromstring - -from .base import DrissionElement, BasePage, BaseElement -from ..commons.locator import get_loc -from ..commons.web import get_ele_txt, make_absolute_link - - -class SessionElement(DrissionElement): - """session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能""" - - def __init__(self, ele, page=None): - """初始化对象 - :param ele: 被包装的HtmlElement元素 - :param page: 元素所在页面对象,如果是从 html 文本生成的元素,则为 None - """ - super().__init__(page) - self._inner_ele = ele - - @property - def inner_ele(self): - return self._inner_ele - - def __repr__(self): - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self.ele(loc_or_str) - - @property - def tag(self): - """返回元素类型""" - return self._inner_ele.tag - - @property - def html(self): - """返回outerHTML文本""" - html = tostring(self._inner_ele, method="html").decode() - return unescape(html[:html.rfind('>') + 1]) # tostring()会把跟紧元素的文本节点也带上,因此要去掉 - - @property - def inner_html(self): - """返回元素innerHTML文本""" - r = match(r'<.*?>(.*)', self.html, flags=DOTALL) - return '' if not r else r.group(1) - - @property - def attrs(self): - """返回元素所有属性及值""" - return {attr: self.attr(attr) for attr, val in self.inner_ele.items()} - - @property - def text(self): - """返回元素内所有文本""" - return get_ele_txt(self) - - @property - def raw_text(self): - """返回未格式化处理的元素内文本""" - return str(self._inner_ele.text_content()) - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: 上级元素对象 - """ - return super().parent(level_or_loc) - - def prev(self, filter_loc='', index=1, timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - return super().prev(index, filter_loc, timeout) - - def next(self, filter_loc='', index=1, timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 后面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素 - """ - return super().next(index, filter_loc, timeout) - - def before(self, filter_loc='', index=1, timeout=None): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 前面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的某个元素或节点 - """ - return super().before(index, filter_loc, timeout) - - def after(self, filter_loc='', index=1, timeout=None): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param filter_loc: 用于筛选的查询语法 - :param index: 后面第几个查询结果 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的某个元素或节点 - """ - return super().after(index, filter_loc, timeout) - - def prevs(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().prevs(filter_loc, timeout) - - def nexts(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 兄弟元素或节点文本组成的列表 - """ - return super().nexts(filter_loc, timeout) - - def befores(self, filter_loc='', timeout=None): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素前面的元素或节点组成的列表 - """ - return super().befores(filter_loc, timeout) - - def afters(self, filter_loc='', timeout=None): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :param timeout: 查找节点的超时时间 - :return: 本元素后面的元素或节点组成的列表 - """ - return super().afters(filter_loc, timeout) - - def attr(self, attr): - """返回attribute属性值 - :param attr: 属性名 - :return: 属性值文本,没有该属性返回None - """ - # 获取href属性时返回绝对url - if attr == 'href': - link = self.inner_ele.get('href') - # 若为链接为None、js或邮件,直接返回 - if not link or link.lower().startswith(('javascript:', 'mailto:')): - return link - - else: # 其它情况直接返回绝对url - return make_absolute_link(link, self.page) - - elif attr == 'src': - return make_absolute_link(self.inner_ele.get('src'), self.page) - - elif attr == 'text': - return self.text - - elif attr == 'innerText': - return self.raw_text - - elif attr in ('html', 'outerHTML'): - return self.html - - elif attr == 'innerHTML': - return self.inner_html - - else: - return self.inner_ele.get(attr) - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和DriverElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def s_ele(self, loc_or_str=None): - """返回当前元素下级符合条件的第一个元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str) - - def s_eles(self, loc_or_str): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :return: SessionElement对象 - """ - return make_session_ele(self, loc_or_str, single) - - def _get_ele_path(self, mode): - """获取css路径或xpath路径 - :param mode: 'css' 或 'xpath' - :return: css路径或xpath路径 - """ - path_str = '' - ele = self - - while ele: - if mode == 'css': - brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) - path_str = f'>:nth-child({brothers + 1}){path_str}' - else: - brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) - path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' - - ele = ele.parent() - - return f':root{path_str[1:]}' if mode == 'css' else path_str - - -def make_session_ele(html_or_ele, loc=None, single=True): - """从接收到的对象或html文本中查找元素,返回SessionElement对象 - 如要直接从html生成SessionElement而不在下级查找,loc输入None即可 - :param html_or_ele: html文本、BaseParser对象 - :param loc: 定位元组或字符串,为None时不在下级查找,返回根元素 - :param single: True则返回第一个,False则返回全部 - :return: 返回SessionElement元素或列表,或属性文本 - """ - # ---------------处理定位符--------------- - if not loc: - if isinstance(html_or_ele, SessionElement): - return html_or_ele if single else [html_or_ele] - - loc = ('xpath', '.') - - elif isinstance(loc, (str, tuple)): - loc = get_loc(loc) - - else: - raise ValueError("定位符必须为str或长度为2的tuple。") - - # ---------------根据传入对象类型获取页面对象和lxml元素对象--------------- - the_type = str(type(html_or_ele)) - # SessionElement - if the_type.endswith(".SessionElement'>"): - page = html_or_ele.page - - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc[1]}' - html_or_ele = html_or_ele.inner_ele - - # 若css以>开头,表示找元素的直接子元素,要用page以绝对路径才能找到 - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): - loc_str = f'{html_or_ele.css_path}{loc[1]}' - if html_or_ele.page: - html_or_ele = fromstring(html_or_ele.page.html) - else: # 接收html文本,无page的情况 - html_or_ele = fromstring(html_or_ele('xpath:/ancestor::*').html) - - else: - html_or_ele = html_or_ele.inner_ele - - loc = loc[0], loc_str - - # ChromiumElement, DriverElement - elif the_type.endswith((".ChromiumElement'>", ".DriverElement'>")): - loc_str = loc[1] - if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): - loc_str = f'.{loc[1]}' - elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): - loc_str = f'{html_or_ele.css_path}{loc[1]}' - loc = loc[0], loc_str - - # 获取整个页面html再定位到当前元素,以实现查找上级元素 - page = html_or_ele.page - xpath = html_or_ele.xpath - if hasattr(html_or_ele, 'doc_id'): # ChromiumElement,兼容传入的元素在iframe内的情况 - html = html_or_ele.page.run_cdp('DOM.getOuterHTML', objectId=html_or_ele.doc_id)['outerHTML'] - else: - html = html_or_ele.page.html - html_or_ele = fromstring(html) - html_or_ele = html_or_ele.xpath(xpath)[0] - - # 各种页面对象 - elif isinstance(html_or_ele, BasePage): - page = html_or_ele - html_or_ele = fromstring(html_or_ele.html) - - # 直接传入html文本 - elif isinstance(html_or_ele, str): - page = None - html_or_ele = fromstring(html_or_ele) - - # ShadowRootElement, ChromiumShadowRoot, ChromiumFrame - elif isinstance(html_or_ele, BaseElement) or the_type.endswith(".ChromiumFrame'>"): - page = html_or_ele.page - html_or_ele = fromstring(html_or_ele.html) - - else: - raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') - - # ---------------执行查找----------------- - try: - if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 - ele = html_or_ele.xpath(loc[1]) - else: # 用css selector获取元素对象列表 - ele = html_or_ele.cssselect(loc[1]) - - if not isinstance(ele, list): # 结果不是列表,如数字 - return ele - - # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 - if single: - ele = ele[0] if ele else None - if isinstance(ele, HtmlElement): - return SessionElement(ele, page) - elif isinstance(ele, str): - return ele - else: - return None - - else: # 返回全部 - return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] - - except Exception as e: - if 'Invalid expression' in str(e): - raise SyntaxError(f'无效的xpath语句:{loc}') - elif 'Expected selector' in str(e): - raise SyntaxError(f'无效的css select语句:{loc}') - - raise e diff --git a/DrissionPage/mixpage/session_element.pyi b/DrissionPage/mixpage/session_element.pyi deleted file mode 100644 index 69dcb35..0000000 --- a/DrissionPage/mixpage/session_element.pyi +++ /dev/null @@ -1,114 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, List, Tuple - -from lxml.html import HtmlElement - -from .base import DrissionElement, BaseElement -from .driver_element import DriverElement -from .driver_page import DriverPage -from .session_page import SessionPage - - -class SessionElement(DrissionElement): - - def __init__(self, ele: HtmlElement, page: Union[SessionPage, None] = None): - self._inner_ele: HtmlElement = ... - self.page: SessionPage = ... - - @property - def inner_ele(self) -> HtmlElement: ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - @property - def attrs(self) -> dict: ... - - @property - def text(self) -> str: ... - - @property - def raw_text(self) -> str: ... - - def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['SessionElement', None]: ... - - def prev(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def next(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def before(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def after(self, - filter_loc: Union[tuple, str] = '', - index: int = 1, - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def prevs(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def nexts(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def befores(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def afters(self, - filter_loc: Union[tuple, str] = '', - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def attr(self, attr: str) -> Union[str, None]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['SessionElement', str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['SessionElement', str]]: ... - - def s_ele(self, - loc_or_str: Union[Tuple[str, str], str] = None) -> Union['SessionElement', str, None]: ... - - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[Union['SessionElement', str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True, - relative: bool = False) -> Union['SessionElement', str, None, List[Union['SessionElement', str]]]: ... - - def _get_ele_path(self, mode: str) -> str: ... - - -def make_session_ele(html_or_ele: Union[str, SessionElement, SessionPage, DriverElement, BaseElement, DriverPage], - loc: Union[str, Tuple[str, str]] = None, - single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... diff --git a/DrissionPage/mixpage/session_page.py b/DrissionPage/mixpage/session_page.py deleted file mode 100644 index b866346..0000000 --- a/DrissionPage/mixpage/session_page.py +++ /dev/null @@ -1,533 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from re import search -from time import sleep -from urllib.parse import urlparse - -from DownloadKit import DownloadKit -from requests import Session, Response -from requests.structures import CaseInsensitiveDict -from tldextract import extract - -from .base import BasePage -from .session_element import SessionElement, make_session_ele -from ..commons.web import cookie_to_dict, set_session_cookies -from ..configs.session_options import SessionOptions - - -class SessionPage(BasePage): - """SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页""" - - def __init__(self, session_or_options=None, timeout=None): - """ - :param session_or_options: Session对象或SessionOptions对象 - :param timeout: 连接超时时间,为None时从ini文件读取 - """ - self._response = None - self._download_set = None - self._session = None - self._set = None - self._set_start_options(session_or_options, None) - self._set_runtime_settings() - self._create_session() - timeout = timeout if timeout is not None else self.timeout - super().__init__(timeout) - - def _set_start_options(self, session_or_options, none): - """启动配置 - :param session_or_options: Session、SessionOptions - :param none: 用于后代继承 - :return: None - """ - if not session_or_options or isinstance(session_or_options, SessionOptions): - self._session_options = session_or_options or SessionOptions(session_or_options) - - elif isinstance(session_or_options, Session): - self._session_options = SessionOptions() - self._session = session_or_options - - def _set_runtime_settings(self): - """设置运行时用到的属性""" - self._timeout = self._session_options.timeout - self._download_path = self._session_options.download_path - - def _create_session(self): - """创建内建Session对象""" - if not self._session: - self._set_session(self._session_options) - - def _set_session(self, opt): - """根据传入字典对session进行设置 - :param opt: session配置字典 - :return: None - """ - self._session = Session() - - if opt.headers: - self._session.headers = CaseInsensitiveDict(opt.headers) - if opt.cookies: - self.set.cookies(opt.cookies) - if opt.adapters: - for url, adapter in opt.adapters: - self._session.mount(url, adapter) - - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] - for i in attrs: - attr = opt.__getattribute__(i) - if attr: - self._session.__setattr__(i, attr) - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性文本 - """ - return self.ele(loc_or_str) - - # -----------------共有属性和方法------------------- - @property - def title(self): - """返回网页title""" - ele = self.ele('xpath://title') - return ele.text if ele else None - - @property - def url(self): - """返回当前访问url""" - return self._url - - @property - def html(self): - """返回页面的html文本""" - return self.response.text if self.response else '' - - @property - def json(self): - """当返回内容是json格式时,返回对应的字典,非json格式时返回None""" - try: - return self.response.json() - except Exception: - return None - - @property - def download_path(self): - """返回下载路径""" - return self._download_path - - @property - def download_set(self): - """返回用于设置下载参数的对象""" - if self._download_set is None: - self._download_set = DownloadSetter(self) - return self._download_set - - @property - def download(self): - """返回下载器对象""" - return self.download_set.DownloadKit - - @property - def session(self): - """返回session对象""" - return self._session - - @property - def response(self): - """返回访问url得到的response对象""" - return self._response - - @property - def set(self): - """返回用于等待的对象""" - if self._set is None: - self._set = SessionPageSetter(self) - return self._set - - def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs): - """用get方式跳转到url - :param url: 目标url - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param timeout: 连接超时时间(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs) - - def ele(self, loc_or_ele, timeout=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_ele) - - def eles(self, loc_or_str, timeout=None): - """返回页面中所有符合条件的元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用 - :return: SessionElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, single=False) - - def s_ele(self, loc_or_ele=None): - """返回页面中符合条件的第一个元素、属性或节点文本 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele) - - def s_eles(self, loc_or_str): - """返回页面中符合条件的所有元素、属性或节点文本 - :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return self._ele(loc_or_str, single=False) - - def _ele(self, loc_or_ele, timeout=None, single=True): - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 - :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 - :param timeout: 不起实际作用,用于和父类对应 - :param single: True则返回第一个,False则返回全部 - :return: SessionElement对象 - """ - return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single) - - def get_cookies(self, as_dict=False, all_domains=False): - """返回cookies - :param as_dict: 是否以字典方式返回 - :param all_domains: 是否返回所有域的cookies - :return: cookies信息 - """ - if all_domains: - cookies = self.session.cookies - else: - if self.url: - url = extract(self.url) - domain = f'{url.domain}.{url.suffix}' - cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') - else: - cookies = tuple(x for x in self.session.cookies) - - if as_dict: - return {x.name: x.value for x in cookies} - else: - return [cookie_to_dict(cookie) for cookie in cookies] - - def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """用post方式跳转到url - :param url: 目标url - :param data: 提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs) - - def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs): - """执行get或post连接 - :param url: 目标url - :param mode: 'get' 或 'post' - :param data: 提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param retry: 重试次数 - :param interval: 重试间隔(秒) - :param kwargs: 连接参数 - :return: url是否可用 - """ - retry, interval = self._before_connect(url, retry, interval) - self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs) - - if self._response is None: - self._url_available = False - - else: - if self._response.ok: - self._url_available = True - - else: - if show_errmsg: - raise ConnectionError(f'状态码:{self._response.status_code}.') - self._url_available = False - - return self._url_available - - def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs): - """生成Response对象 - :param url: 目标url - :param mode: 'get' 或 'post' - :param data: post方式要提交的数据 - :param show_errmsg: 是否显示和抛出异常 - :param kwargs: 其它参数 - :return: tuple,第一位为Response或None,第二位为出错信息或'Success' - """ - kwargs = CaseInsensitiveDict(kwargs) - if 'headers' not in kwargs: - kwargs['headers'] = {} - else: - kwargs['headers'] = CaseInsensitiveDict(kwargs['headers']) - - # 设置referer和host值 - parsed_url = urlparse(url) - hostname = parsed_url.hostname - scheme = parsed_url.scheme - if not check_headers(kwargs, self.session.headers, 'Referer'): - kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}' - if 'Host' not in kwargs['headers']: - kwargs['headers']['Host'] = hostname - - if not check_headers(kwargs, self.session.headers, 'timeout'): - kwargs['timeout'] = self.timeout - - if 'allow_redirects' not in kwargs: - kwargs['allow_redirects'] = False - - r = err = None - retry = retry if retry is not None else self.retry_times - interval = interval if interval is not None else self.retry_interval - for i in range(retry + 1): - try: - if mode == 'get': - r = self.session.get(url, **kwargs) - elif mode == 'post': - r = self.session.post(url, data=data, **kwargs) - - if r: - return set_charset(r), 'Success' - - except Exception as e: - err = e - - # if r and r.status_code in (403, 404): - # break - - if i < retry: - sleep(interval) - if show_errmsg: - print(f'重试 {url}') - - if r is None: - if show_errmsg: - if err: - raise err - else: - raise ConnectionError('连接失败') - return None, '连接失败' if err is None else err - - if not r.ok: - if show_errmsg: - raise ConnectionError(f'状态码:{r.status_code}') - return r, f'状态码:{r.status_code}' - - -class SessionPageSetter(object): - def __init__(self, page): - self._page = page - - def timeout(self, second): - """设置连接超时时间 - :param second: 秒数 - :return: None - """ - self._page.timeout = second - - def cookies(self, cookies): - """为Session对象设置cookies - :param cookies: cookies信息 - :return: None - """ - set_session_cookies(self._page.session, cookies) - - def headers(self, headers): - """设置通用的headers - :param headers: dict形式的headers - :return: None - """ - self._page.session.headers = CaseInsensitiveDict(headers) - - def header(self, attr, value): - """设置headers中一个项 - :param attr: 设置名称 - :param value: 设置值 - :return: None - """ - self._page.session.headers[attr.lower()] = value - - def user_agent(self, ua): - """设置user agent - :param ua: user agent - :return: None - """ - self._page.session.headers['user-agent'] = ua - - def proxies(self, http, https=None): - """设置proxies参数 - :param http: http代理地址 - :param https: https代理地址 - :return: None - """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._page.session.proxies = proxies - - def auth(self, auth): - """设置认证元组或对象 - :param auth: 认证元组或对象 - :return: None - """ - self._page.session.auth = auth - - def hooks(self, hooks): - """设置回调方法 - :param hooks: 回调方法 - :return: None - """ - self._page.session.hooks = hooks - - def params(self, params): - """设置查询参数字典 - :param params: 查询参数字典 - :return: None - """ - self._page.session.params = params - - def verify(self, on_off): - """设置是否验证SSL证书 - :param on_off: 是否验证 SSL 证书 - :return: None - """ - self._page.session.verify = on_off - - def cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 - :param cert: 证书路径或元组 - :return: None - """ - self._page.session.cert = cert - - def stream(self, on_off): - """设置是否使用流式响应内容 - :param on_off: 是否使用流式响应内容 - :return: None - """ - self._page.session.stream = on_off - - def trust_env(self, on_off): - """设置是否信任环境 - :param on_off: 是否信任环境 - :return: None - """ - self._page.session.trust_env = on_off - - def max_redirects(self, times): - """设置最大重定向次数 - :param times: 最大重定向次数 - :return: None - """ - self._page.session.max_redirects = times - - def add_adapter(self, url, adapter): - """添加适配器 - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: None - """ - self._page.session.mount(url, adapter) - - -class DownloadSetter(object): - """用于设置下载参数的类""" - - def __init__(self, page): - self._page = page - self._DownloadKit = None - - @property - def DownloadKit(self): - if self._DownloadKit is None: - self._DownloadKit = DownloadKit(session=self._page.session, goal_path=self._page.download_path) - return self._DownloadKit - - @property - def if_file_exists(self): - """返回用于设置存在同名文件时处理方法的对象""" - return FileExists(self) - - def split(self, on_off): - """设置是否允许拆分大文件用多线程下载 - :param on_off: 是否启用多线程下载大文件 - :return: None - """ - self.DownloadKit.split = on_off - - def save_path(self, path): - """设置下载保存路径 - :param path: 下载保存路径 - :return: None - """ - path = path if path is None else str(path) - self._page._download_path = path - self.DownloadKit.goal_path = path - - -class FileExists(object): - """用于设置存在同名文件时处理方法""" - - def __init__(self, setter): - """ - :param setter: DownloadSetter对象 - """ - self._setter = setter - - def __call__(self, mode): - if mode not in ('skip', 'rename', 'overwrite'): - raise ValueError("mode参数只能是'skip', 'rename', 'overwrite'") - self._setter.DownloadKit.file_exists = mode - - def skip(self): - """设为跳过""" - self._setter.DownloadKit.file_exists = 'skip' - - def rename(self): - """设为重命名,文件名后加序号""" - self._setter.DownloadKit._file_exists = 'rename' - - def overwrite(self): - """设为覆盖""" - self._setter.DownloadKit._file_exists = 'overwrite' - - -def check_headers(kwargs, headers, arg) -> bool: - """检查kwargs或headers中是否有arg所示属性""" - return arg in kwargs['headers'] or arg in headers - - -def set_charset(response) -> Response: - """设置Response对象的编码""" - # 在headers中获取编码 - content_type = response.headers.get('content-type', '').lower() - charset = search(r'charset[=: ]*(.*)?;', content_type) - - if charset: - response.encoding = charset.group(1) - - # 在headers中获取不到编码,且如果是网页 - elif content_type.replace(' ', '').startswith('text/html'): - re_result = search(b']+).*?>', response.content) - - if re_result: - charset = re_result.group(1).decode() - else: - charset = response.apparent_encoding - - response.encoding = charset - - return response diff --git a/DrissionPage/mixpage/session_page.pyi b/DrissionPage/mixpage/session_page.pyi deleted file mode 100644 index 95011aa..0000000 --- a/DrissionPage/mixpage/session_page.pyi +++ /dev/null @@ -1,237 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from pathlib import Path -from typing import Any, Union, Tuple, List - -from DownloadKit import DownloadKit -from requests import Session, Response -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar -from requests.structures import CaseInsensitiveDict - -from .base import BasePage -from DrissionPage.configs.session_options import SessionOptions -from .session_element import SessionElement - - -class SessionPage(BasePage): - def __init__(self, - session_or_options: Union[Session, SessionOptions] = None, - timeout: float = None): - self._session: Session = ... - self._session_options: SessionOptions = ... - self._url: str = ... - self._response: Response = ... - self._download_path: str = ... - self._download_set: DownloadSetter = ... - self._url_available: bool = ... - self.timeout: float = ... - self.retry_times: int = ... - self.retry_interval: float = ... - self._set: SessionPageSetter = ... - - def _set_start_options(self, session_or_options, none) -> None: ... - - def _create_session(self) -> None: ... - - def _set_session(self, opt: SessionOptions) -> None: ... - - def _set_runtime_settings(self) -> None: ... - - def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def set_headers(self, headers: dict) -> None: ... - - def set_user_agent(self, ua: str) -> None: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, None]: ... - - # -----------------共有属性和方法------------------- - @property - def title(self) -> str: ... - - @property - def url(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def json(self) -> Union[dict, None]: ... - - @property - def download_path(self) -> str: ... - - @property - def download_set(self) -> DownloadSetter: ... - - def get(self, - url: str, - show_errmsg: bool | None = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = None, - params: dict | None = ..., - data: Union[dict, str, None] = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - def ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None) -> Union[SessionElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[SessionElement, str]]: ... - - def s_ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement] = None) \ - -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_ele: Union[Tuple[str, str], str, SessionElement], - timeout: float = None, - single: bool = True) -> Union[SessionElement, str, None, List[Union[SessionElement, str]]]: ... - - def get_cookies(self, - as_dict: bool = False, - all_domains: bool = False) -> Union[dict, list]: ... - - # ----------------session独有属性和方法----------------------- - @property - def session(self) -> Session: ... - - @property - def response(self) -> Response: ... - - @property - def set(self) -> SessionPageSetter: ... - - @property - def download(self) -> DownloadKit: ... - - def post(self, - url: str, - data: Union[dict, str, None] = ..., - show_errmsg: bool = False, - retry: int | None = None, - interval: float | None = None, - timeout: float | None = ..., - params: dict | None = ..., - json: Union[dict, str, None] = ..., - headers: dict | None = ..., - cookies: Any | None = ..., - files: Any | None = ..., - auth: Any | None = ..., - allow_redirects: bool = ..., - proxies: dict | None = ..., - hooks: Any | None = ..., - stream: Any | None = ..., - verify: Any | None = ..., - cert: Any | None = ...) -> bool: ... - - def _s_connect(self, - url: str, - mode: str, - data: Union[dict, str, None] = None, - show_errmsg: bool = False, - retry: int = None, - interval: float = None, - **kwargs) -> bool: ... - - def _make_response(self, - url: str, - mode: str = 'get', - data: Union[dict, str] = None, - retry: int = None, - interval: float = None, - show_errmsg: bool = False, - **kwargs) -> tuple: ... - - -class SessionPageSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - - def timeout(self, second: float) -> None: ... - - def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... - - def headers(self, headers: dict) -> None: ... - - def header(self, attr: str, value: str) -> None: ... - - def user_agent(self, ua: str) -> None: ... - - def proxies(self, http, https=None) -> None: ... - - def auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> None: ... - - def hooks(self, hooks: Union[dict, None]) -> None: ... - - def params(self, params: Union[dict, None]) -> None: ... - - def verify(self, on_off: Union[bool, None]) -> None: ... - - def cert(self, cert: Union[str, Tuple[str, str], None]) -> None: ... - - def stream(self, on_off: Union[bool, None]) -> None: ... - - def trust_env(self, on_off: Union[bool, None]) -> None: ... - - def max_redirects(self, times: Union[int, None]) -> None: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> None: ... - - -class DownloadSetter(object): - def __init__(self, page: SessionPage): - self._page: SessionPage = ... - self._DownloadKit: DownloadKit = ... - - @property - def DownloadKit(self) -> DownloadKit: ... - - @property - def if_file_exists(self) -> FileExists: ... - - def split(self, on_off: bool) -> None: ... - - def save_path(self, path: Union[str, Path]): ... - - -class FileExists(object): - def __init__(self, setter: DownloadSetter): - self._setter: DownloadSetter = ... - - def __call__(self, mode: str) -> None: ... - - def skip(self) -> None: ... - - def rename(self) -> None: ... - - def overwrite(self) -> None: ... - - -def check_headers(kwargs: Union[dict, CaseInsensitiveDict], headers: Union[dict, CaseInsensitiveDict], - arg: str) -> bool: ... - - -def set_charset(response: Response) -> Response: ... diff --git a/DrissionPage/mixpage/shadow_root_element.py b/DrissionPage/mixpage/shadow_root_element.py deleted file mode 100644 index 227d39e..0000000 --- a/DrissionPage/mixpage/shadow_root_element.py +++ /dev/null @@ -1,219 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from time import perf_counter -from typing import Union - -from selenium.webdriver.remote.webelement import WebElement - -from .base import BaseElement -from .driver_element import make_driver_ele -from .session_element import make_session_ele, SessionElement -from ..commons.locator import get_loc - - -class ShadowRootElement(BaseElement): - """ShadowRootElement是用于处理ShadowRoot的类,使用方法和DriverElement基本一致""" - - def __init__(self, inner_ele, parent_ele): - super().__init__(parent_ele.page) - self.parent_ele = parent_ele - self._inner_ele = inner_ele - - @property - def inner_ele(self): - return self._inner_ele - - def __repr__(self): - return f'' - - def __call__(self, loc_or_str, timeout=None): - """在内部查找元素 - 例:ele2 = ele1('@id=ele_id') - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 超时时间 - :return: DriverElement对象或属性、文本 - """ - return self.ele(loc_or_str, timeout) - - @property - def tag(self): - """元素标签名""" - return 'shadow-root' - - @property - def html(self): - return f'{self.inner_html}' - - @property - def inner_html(self): - """返回内部的html文本""" - shadow_root = WebElement(self.page.driver, self.inner_ele._id) - return shadow_root.get_attribute('innerHTML') - - def parent(self, level_or_loc=1): - """返回上面某一级父元素,可指定层数或用查询语法定位 - :param level_or_loc: 第几级父元素,或定位符 - :return: DriverElement对象 - """ - if isinstance(level_or_loc, int): - loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' - - elif isinstance(level_or_loc, (tuple, str)): - loc = get_loc(level_or_loc, True) - - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}' - - else: - raise TypeError('level_or_loc参数只能是tuple、int或str。') - - return self.parent_ele.ele(loc, timeout=0) - - def next(self, index=1, filter_loc=''): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: DriverElement对象 - """ - nodes = self.nexts(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def before(self, index=1, filter_loc=''): - """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 前面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素前面的某个元素或节点 - """ - nodes = self.befores(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def after(self, index=1, filter_loc=''): - """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 - :param index: 后面第几个查询结果 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素后面的某个元素或节点 - """ - nodes = self.afters(filter_loc=filter_loc) - return nodes[index - 1] if nodes else None - - def nexts(self, filter_loc=''): - """返回后面所有兄弟元素或节点组成的列表 - :param filter_loc: 用于筛选的查询语法 - :return: DriverElement对象组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./{loc}' - return self.parent_ele.eles(xpath, timeout=0.1) - - def befores(self, filter_loc=''): - """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素前面的元素或节点组成的列表 - """ - loc = get_loc(filter_loc, True) - if loc[0] == 'css selector': - raise ValueError('此css selector语法不受支持,请换成xpath。') - - loc = loc[1].lstrip('./') - xpath = f'xpath:./preceding::{loc}' - return self.parent_ele.eles(xpath, timeout=0.1) - - def afters(self, filter_loc=''): - """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 - :param filter_loc: 用于筛选的查询语法 - :return: 本元素后面的元素或节点组成的列表 - """ - eles1 = self.nexts(filter_loc) - loc = get_loc(filter_loc, True)[1].lstrip('./') - xpath = f'xpath:./following::{loc}' - return eles1 + self.parent_ele.eles(xpath, timeout=0.1) - - def ele(self, loc_or_str, timeout=None): - """返回当前元素下级符合条件的第一个元素,默认返回 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本 - """ - return self._ele(loc_or_str, timeout) - - def eles(self, loc_or_str, timeout=None): - """返回当前元素下级所有符合条件的子元素 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def s_ele(self, loc_or_str=None) -> Union[SessionElement, str, None]: - """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_str) - - def s_eles(self, loc_or_str): - """查找所有符合条件的元素以SessionElement列表形式返回,处理复杂页面时效率很高 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :return: SessionElement对象或属性、文本 - """ - return make_session_ele(self, loc_or_str, single=False) - - def _ele(self, loc_or_str, timeout=None, single=True, relative=False): - """返回当前元素下级符合条件的子元素,默认返回第一个 - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :param relative: WebPage用的表示是否相对定位的参数 - :return: DriverElement对象 - """ - # 先转换为sessionElement,再获取所有元素,获取它们的css selector路径,再用路径在页面上执行查找 - loc = get_loc(loc_or_str) - if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): - loc = loc[0], loc[1][5:] - - timeout = timeout if timeout is not None else self.page.timeout - t1 = perf_counter() - eles = make_session_ele(self.html).eles(loc) - while not eles and perf_counter() - t1 <= timeout: - eles = make_session_ele(self.html).eles(loc) - - if not eles: - return None if single else eles - - css_paths = [i.css_path[47:] for i in eles] - - if single: - return make_driver_ele(self, f'css:{css_paths[0]}', single, timeout) - else: - return [make_driver_ele(self, f'css:{css}', True, timeout) for css in css_paths] - - def run_script(self, script, *args): - """执行js代码,传入自己为第一个参数 - :param script: js文本 - :param args: 传入的参数 - :return: js执行结果 - """ - shadow_root = WebElement(self.page.driver, self.inner_ele._id) - return shadow_root.parent.execute_script(script, shadow_root, *args) - - def is_enabled(self): - """是否可用""" - return self.inner_ele.is_enabled() - - def is_valid(self): - """用于判断元素是否还能用,应对页面跳转元素不能用的情况""" - try: - self.is_enabled() - return True - - except Exception: - return False diff --git a/DrissionPage/mixpage/shadow_root_element.pyi b/DrissionPage/mixpage/shadow_root_element.pyi deleted file mode 100644 index 8647391..0000000 --- a/DrissionPage/mixpage/shadow_root_element.pyi +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from typing import Union, Any, Tuple, List - -from selenium.webdriver.remote.webelement import WebElement - -from .driver_page import DriverPage -from .mix_page import MixPage -from .base import BaseElement -from .driver_element import DriverElement -from .session_element import SessionElement - - -class ShadowRootElement(BaseElement): - - def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): - self._inner_ele: WebElement = ... - self.parent_ele: DriverElement = ... - self.page: Union[MixPage, DriverPage] = ... - - @property - def inner_ele(self) -> WebElement: ... - - def __repr__(self) -> str: ... - - def __call__(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - @property - def tag(self) -> str: ... - - @property - def html(self) -> str: ... - - @property - def inner_html(self) -> str: ... - - def parent(self, level_or_loc: Union[str, int] = 1) -> DriverElement: ... - - def next(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def before(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def after(self, - index: int = 1, - filter_loc: Union[tuple, str] = '') -> Union[DriverElement, str, None]: ... - - def nexts(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def befores(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def afters(self, filter_loc: Union[tuple, str] = '') -> List[Union[DriverElement, str]]: ... - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[DriverElement, str, None]: ... - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union[DriverElement, str]]: ... - - def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: ... - - def s_eles(self, loc_or_str: Union[Tuple[str, str], str]) -> List[Union[SessionElement, str]]: ... - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = ..., - single: bool = ..., - relative: bool = ...) -> Union[DriverElement, str, None, List[Union[DriverElement, str]]]: ... - - def run_script(self, script: str, *args) -> Any: ... - - def is_enabled(self) -> bool: ... - - def is_valid(self) -> bool: ...