每个页面对象都有session属性;修复元素s_ele()问题;修复attr()大写字母问题

This commit is contained in:
g1879 2024-09-04 17:45:23 +08:00
parent bec1e1473c
commit ad78edabec
23 changed files with 134 additions and 109 deletions

View File

@ -12,4 +12,4 @@ from ._pages.chromium_page import ChromiumPage
from ._pages.session_page import SessionPage
from ._pages.web_page import WebPage
__version__ = '4.1.0.0b21'
__version__ = '4.1.0.0b22'

View File

@ -6,12 +6,15 @@
@License : BSD 3-Clause.
"""
from abc import abstractmethod
from copy import copy
from pathlib import Path
from re import sub
from urllib.parse import quote
from DownloadKit import DownloadKit
from requests import Session
from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement
from .._functions.elements import get_frame
from .._functions.locator import get_loc
@ -247,6 +250,9 @@ class BasePage(BaseParser):
self._download_path = None
self._none_ele_return_value = False
self._none_ele_value = None
self._session = None
self._headers = None
self._session_options = None
self._type = 'BasePage'
@property
@ -265,6 +271,8 @@ class BasePage(BaseParser):
@property
def download(self):
if self._DownloadKit is None:
if not self._session:
self._create_session()
self._DownloadKit = DownloadKit(driver=self, goal_path=self.download_path)
return self._DownloadKit
@ -281,6 +289,24 @@ class BasePage(BaseParser):
interval = interval if interval is not None else self.retry_interval
return retry, interval, is_file
def _set_session_options(self, session_or_options=None):
if not session_or_options:
self._session_options = SessionOptions(session_or_options)
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options
elif isinstance(session_or_options, Session):
self._session_options = SessionOptions()
self._session = copy(session_or_options)
self._headers = self._session.headers
self._session.headers = None
def _create_session(self):
if not self._session_options:
self._set_session_options()
self._session, self._headers = self._session_options.make_session()
# ----------------以下属性或方法由后代实现----------------
@property
def url(self):

View File

@ -9,7 +9,10 @@ from abc import abstractmethod
from typing import Union, Tuple, List, Any, Optional
from DownloadKit import DownloadKit
from requests import Session
from requests.structures import CaseInsensitiveDict
from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement
from .._elements.session_element import SessionElement
from .._functions.elements import SessionElementsList
@ -339,6 +342,9 @@ class BasePage(BaseParser):
_none_ele_return_value: bool = ...
_none_ele_value: Any = ...
_page: Union[ChromiumPage, SessionPage, WebPage] = ...
_session: Optional[Session] = ...
_headers: Optional[CaseInsensitiveDict] = ...
_session_options: Optional[SessionOptions] = ...
def __init__(self): ...
@ -371,6 +377,17 @@ class BasePage(BaseParser):
"""
...
def _set_session_options(self, session_or_options: Union[Session, SessionOptions] = None) -> None:
"""启动配置
:param session_or_options: SessionSessionOptions对象
:return: None
"""
...
def _create_session(self) -> None:
"""创建内建Session对象"""
...
# ----------------以下属性或方法由后代实现----------------
@property
def url(self) -> str: ...

View File

@ -76,6 +76,9 @@ class ChromiumOptions(object):
return
def __repr__(self):
return f'<ChromiumOptions at {id(self)}>'
@property
def download_path(self):
return self._download_path
@ -422,6 +425,3 @@ class ChromiumOptions(object):
def save_to_default(self):
return self.save('default')
def __repr__(self):
return f'<ChromiumOptions at {id(self)}>'

View File

@ -85,6 +85,9 @@ class SessionOptions(object):
self._retry_times = others.get('retry_times', 3)
self._retry_interval = others.get('retry_interval', 2)
def __repr__(self):
return f'<SessionOptions at {id(self)}>'
# ===========须独立处理的项开始============
@property
def download_path(self):
@ -347,9 +350,6 @@ class SessionOptions(object):
self._adapters = [(k, i) for k, i in session.adapters.items()]
return self
def __repr__(self):
return f'<SessionOptions at {id(self)}>'
def session_options_to_dict(options):
if options in (False, None):

View File

@ -69,13 +69,13 @@ class ChromiumElement(DrissionElement):
else:
raise ElementLostError
def __call__(self, locator, index=1, timeout=None):
return self.ele(locator, index=index, timeout=timeout)
def __repr__(self):
attrs = [f"{k}='{v}'" for k, v in self.attrs.items()]
return f'<ChromiumElement {self.tag} {" ".join(attrs)}>'
def __call__(self, locator, index=1, timeout=None):
return self.ele(locator, index=index, timeout=timeout)
def __eq__(self, other):
return self._backend_id == getattr(other, '_backend_id', None)
@ -421,7 +421,7 @@ class ChromiumElement(DrissionElement):
def s_ele(self, locator=None, index=1, timeout=None):
return (make_session_ele(self, locator, index=index, method='s_ele()')
if self.ele(locator, index=index, timeout=timeout)
if locator is None or self.ele(locator, index=index, timeout=timeout)
else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index}))
def s_eles(self, locator=None, timeout=None):
@ -701,12 +701,12 @@ class ShadowRoot(BaseElement):
self._states = None
self._type = 'ShadowRoot'
def __repr__(self):
return f'<ShadowRoot in {self.parent_ele}>'
def __call__(self, locator, index=1, timeout=None):
return self.ele(locator, index=index, timeout=timeout)
def __repr__(self):
return f'<ShadowRoot in {self.parent_ele}>'
def __eq__(self, other):
return self._backend_id == getattr(other, '_backend_id', None)
@ -844,7 +844,7 @@ class ShadowRoot(BaseElement):
def s_ele(self, locator=None, index=1, timeout=None):
return (make_session_ele(self, locator, index=index, method='s_ele()')
if self.ele(locator, index=index, timeout=timeout)
if locator is None or self.ele(locator, index=index, timeout=timeout)
else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index}))
def s_eles(self, locator, timeout=None):

View File

@ -58,8 +58,6 @@ class ChromiumElement(DrissionElement):
"""
...
def __repr__(self) -> str: ...
def __call__(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
@ -71,6 +69,8 @@ class ChromiumElement(DrissionElement):
"""
...
def __repr__(self) -> str: ...
def __eq__(self, other: ChromiumElement) -> bool: ...
@property
@ -616,8 +616,6 @@ class ShadowRoot(BaseElement):
"""
...
def __repr__(self) -> str: ...
def __call__(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
@ -631,6 +629,8 @@ class ShadowRoot(BaseElement):
"""
...
def __repr__(self) -> str: ...
def __eq__(self, other: ShadowRoot) -> bool: ...
@property

View File

@ -30,6 +30,9 @@ class NoneElement(object):
else:
return self
def __repr__(self):
return f'<NoneElement method={self.method}, {", ".join([f"{k}={v}" for k, v in self.args.items()])}>'
def __getattr__(self, item):
if not self._none_ele_return_value:
raise ElementNotFoundError(None, self.method, self.args)
@ -48,6 +51,3 @@ class NoneElement(object):
def __bool__(self):
return False
def __repr__(self):
return f'<NoneElement method={self.method}, {", ".join([f"{k}={v}" for k, v in self.args.items()])}>'

View File

@ -24,10 +24,10 @@ class NoneElement(object):
def __call__(self, *args, **kwargs) -> NoneElement: ...
def __repr__(self) -> str: ...
def __getattr__(self, item: str) -> str: ...
def __eq__(self, other: Any) -> bool: ...
def __bool__(self) -> bool: ...
def __repr__(self) -> str: ...

View File

@ -29,10 +29,6 @@ class SessionElement(DrissionElement):
self._inner_ele = ele
self._type = 'SessionElement'
@property
def inner_ele(self):
return self._inner_ele
def __repr__(self):
attrs = [f"{k}='{v}'" for k, v in self.attrs.items()]
return f'<SessionElement {self.tag} {" ".join(attrs)}>'
@ -43,6 +39,10 @@ class SessionElement(DrissionElement):
def __eq__(self, other):
return self.xpath == getattr(other, 'xpath', None)
@property
def inner_ele(self):
return self._inner_ele
@property
def tag(self):
return self._inner_ele.tag
@ -128,7 +128,7 @@ class SessionElement(DrissionElement):
return self.inner_html
else:
return self.inner_ele.get(name)
return self.inner_ele.get(name.lower())
def ele(self, locator, index=1, timeout=None):
return self._ele(locator, index=index, method='ele()')

View File

@ -25,11 +25,6 @@ class SessionElement(DrissionElement):
self.owner: SessionPage = ...
self.page: SessionPage = ...
@property
def inner_ele(self) -> HtmlElement: ...
def __repr__(self) -> str: ...
def __call__(self,
locator: Union[Tuple[str, str], str],
index: int = 1,
@ -43,8 +38,13 @@ class SessionElement(DrissionElement):
"""
...
def __repr__(self) -> str: ...
def __eq__(self, other: SessionElement) -> bool: ...
@property
def inner_ele(self) -> HtmlElement: ...
@property
def tag(self) -> str:
"""返回元素类型"""

View File

@ -39,8 +39,6 @@ __ERROR__ = 'error'
class ChromiumBase(BasePage):
"""标签页、Frame、Page基类"""
def __init__(self, browser, target_id=None):
super().__init__()
self._browser = browser
@ -353,6 +351,12 @@ class ChromiumBase(BasePage):
def upload_list(self):
return self._upload_list
@property
def session(self):
if self._session is None:
self._create_session()
return self._session
@property
def _js_ready_state(self):
try:
@ -868,8 +872,6 @@ class ChromiumBase(BasePage):
class Timeout(object):
"""用于保存d模式timeout信息的类"""
def __init__(self, base=None, page_load=None, script=None):
self.base = 10 if base is None else base
self.page_load = 30 if page_load is None else page_load
@ -884,8 +886,6 @@ class Timeout(object):
class Alert(object):
"""用于保存alert信息的类"""
def __init__(self, auto=None):
self.activated = False
self.text = None

View File

@ -8,6 +8,8 @@
from pathlib import Path
from typing import Union, Tuple, Any, Optional, Literal
from requests import Session
from .chromium_page import ChromiumPage
from .chromium_tab import ChromiumTab
from .mix_tab import MixTab
@ -34,6 +36,7 @@ PIC_TYPE = Literal['jpg', 'jpeg', 'png', 'webp', True]
class ChromiumBase(BasePage):
"""标签页、Frame、Page基类"""
_tab: Union[ChromiumTab, MixTab, ChromiumFrame, ChromiumPage, WebPage] = ...
_browser: Chromium = ...
_driver: Optional[Driver] = ...
@ -263,6 +266,11 @@ class ChromiumBase(BasePage):
"""返回等待上传文件列表"""
...
@property
def session(self)->Session:
"""返回用于转换模式或download的Session对象"""
...
@property
def _js_ready_state(self) -> str:
"""返回js获取的ready state信息"""
@ -632,6 +640,7 @@ class ChromiumBase(BasePage):
class Timeout(object):
"""用于保存d模式timeout信息的类"""
base: float = ...
page_load: float = ...
script: float = ...
@ -651,6 +660,7 @@ class Timeout(object):
class Alert(object):
"""用于保存alert信息的类"""
activated: Optional[bool] = ...
text: Optional[str] = ...
type: Optional[str] = ...

View File

@ -70,13 +70,13 @@ class ChromiumFrame(ChromiumBase):
def __call__(self, locator, index=1, timeout=None):
return self.ele(locator, index=index, timeout=timeout)
def __eq__(self, other):
return self._frame_id == getattr(other, '_frame_id', None)
def __repr__(self):
attrs = [f"{k}='{v}'" for k, v in self._frame_ele.attrs.items()]
return f'<ChromiumFrame {self.frame_ele.tag} {" ".join(attrs)}>'
def __eq__(self, other):
return self._frame_id == getattr(other, '_frame_id', None)
def _d_set_runtime_settings(self):
if not hasattr(self, '_timeouts'):
self._timeouts = copy(self._target_page.timeouts)

View File

@ -59,10 +59,10 @@ class ChromiumFrame(ChromiumBase):
"""
...
def __eq__(self, other: ChromiumFrame) -> bool: ...
def __repr__(self) -> str: ...
def __eq__(self, other: ChromiumFrame) -> bool: ...
def _d_set_runtime_settings(self) -> None:
"""重写设置浏览器运行参数方法"""
...

View File

@ -16,7 +16,6 @@ from .._units.waiter import ChromiumPageWaiter
class ChromiumPage(ChromiumBase):
"""用于管理浏览器的类"""
_PAGES = {}
def __new__(cls, addr_or_opts=None, tab_id=None, timeout=None):
@ -43,6 +42,9 @@ class ChromiumPage(ChromiumBase):
self.set.timeouts(base=timeout)
self._tab = self
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def _d_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeouts = self.browser.timeouts
@ -120,6 +122,3 @@ class ChromiumPage(ChromiumBase):
def _on_disconnect(self):
ChromiumPage._PAGES.pop(self._browser.id, None)
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'

View File

@ -18,6 +18,7 @@ from .._units.waiter import ChromiumPageWaiter
class ChromiumPage(ChromiumBase):
"""用于管理浏览器和一个标签页的类"""
_PAGES: dict = ...
tab: ChromiumPage = ...
_browser: Chromium = ...

View File

@ -37,6 +37,9 @@ class ChromiumTab(ChromiumBase):
self._tab = self
self._type = 'ChromiumTab'
def __repr__(self):
return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>'
def _d_set_runtime_settings(self):
self._timeouts = copy(self.browser.timeouts)
self.retry_times = self.browser.retry_times
@ -63,9 +66,6 @@ class ChromiumTab(ChromiumBase):
def save(self, path=None, name=None, as_pdf=False, **kwargs):
return save_page(self, path, name, as_pdf, kwargs)
def __repr__(self):
return f'<ChromiumTab browser_id={self.browser.id} tab_id={self.tab_id}>'
def _on_disconnect(self):
if not self._disconnect_flag:
ChromiumTab._TABS.pop(self.tab_id, None)

View File

@ -5,7 +5,6 @@
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from .chromium_tab import ChromiumTab
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
@ -33,6 +32,9 @@ class MixTab(SessionPage, ChromiumTab, BasePage):
return super(SessionPage, self).__call__(locator, index=index, timeout=timeout) if self._d_mode \
else super().__call__(locator, index=index)
def __repr__(self):
return f'<MixTab browser_id={self.browser.id} tab_id={self.tab_id}>'
@property
def set(self):
if self._set is None:
@ -75,12 +77,6 @@ class MixTab(SessionPage, ChromiumTab, BasePage):
def user_agent(self):
return super(SessionPage, self).user_agent if self._d_mode else super().user_agent
@property
def session(self):
if self._session is None:
self._create_session()
return self._session
@property
def _session_url(self):
return self._response.url if self._response else None
@ -146,7 +142,7 @@ class MixTab(SessionPage, ChromiumTab, BasePage):
# d模式转s模式
if self._session is None:
self._s_set_start_options(
self._set_session_options(
self.browser._session_options or SessionOptions(read_file=self.browser._session_options is None))
self._create_session()
@ -190,5 +186,8 @@ class MixTab(SessionPage, ChromiumTab, BasePage):
return super(SessionPage, self)._find_elements(locator, timeout=timeout, index=index, relative=relative) \
if self._d_mode else super()._find_elements(locator, index=index)
def __repr__(self):
return f'<MixTab browser_id={self.browser.id} tab_id={self.tab_id}>'
def _set_session_options(self, session_or_options=None):
if session_or_options is None:
session_or_options = self.browser._session_options or SessionOptions(
read_file=self.browser._session_options is None)
super()._set_session_options(session_or_options)

View File

@ -5,18 +5,16 @@
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from copy import copy
from pathlib import Path
from re import search, DOTALL
from time import sleep
from urllib.parse import urlparse
from requests import Session, Response
from requests import Response
from requests.structures import CaseInsensitiveDict
from tldextract import extract
from .._base.base import BasePage
from .._configs.session_options import SessionOptions
from .._elements.session_element import SessionElement, make_session_ele
from .._functions.cookies import cookie_to_dict, CookiesList
from .._functions.web import format_headers
@ -24,34 +22,21 @@ from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
"""SessionPage封装了页面操作的常用功能使用requests来获取、解析网页"""
def __init__(self, session_or_options=None):
super(SessionPage, SessionPage).__init__(self)
self._headers = None
super().__init__()
self._response = None
self._session = None
self._set = None
self._encoding = None
self._type = 'SessionPage'
self._page = self
self._timeout = 10
self._s_set_start_options(session_or_options)
self._set_session_options(session_or_options)
self._s_set_runtime_settings()
self._create_session()
if not self._session:
self._create_session()
def _s_set_start_options(self, session_or_options):
if not session_or_options:
self._session_options = SessionOptions(session_or_options)
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options
elif isinstance(session_or_options, Session):
self._session_options = SessionOptions()
self._session = copy(session_or_options)
self._headers = self._session.headers
self._session.headers = None
def __repr__(self):
return f'<SessionPage url={self.url}>'
def _s_set_runtime_settings(self):
self._timeout = self._session_options.timeout
@ -59,10 +44,6 @@ class SessionPage(BasePage):
self.retry_times = self._session_options.retry_times
self.retry_interval = self._session_options.retry_interval
def _create_session(self):
if not self._session:
self._session, self._headers = self._session_options.make_session()
def __call__(self, locator, index=1, timeout=None):
return self.ele(locator, index=index)
@ -277,9 +258,6 @@ class SessionPage(BasePage):
else:
return None, '连接失败' if err is None else err
def __repr__(self):
return f'<SessionPage url={self.url}>'
def check_headers(kwargs, headers, arg):
return arg in kwargs or arg in headers

View File

@ -20,8 +20,7 @@ from .._units.setter import SessionPageSetter
class SessionPage(BasePage):
_headers: Optional[CaseInsensitiveDict] = ...
_session: Optional[Session] = ...
"""SessionPage封装了页面操作的常用功能使用requests来获取、解析网页"""
_session_options: Optional[SessionOptions] = ...
_url: str = ...
_response: Optional[Response] = ...
@ -40,21 +39,10 @@ class SessionPage(BasePage):
"""
...
def _s_set_start_options(self, session_or_options: Union[Session, SessionOptions]) -> None:
"""启动配置
:param session_or_options: SessionSessionOptions对象
:return: None
"""
...
def _s_set_runtime_settings(self) -> None:
"""设置运行时用到的属性"""
...
def _create_session(self) -> None:
"""创建内建Session对象"""
...
def __call__(self,
locator: Union[Tuple[str, str], str, SessionElement],
index: int = 1,

View File

@ -9,14 +9,13 @@ from .chromium_page import ChromiumPage
from .session_page import SessionPage
from .._base.base import BasePage
from .._configs.chromium_options import ChromiumOptions
from .._configs.session_options import SessionOptions
from .._functions.cookies import set_session_cookies, set_tab_cookies
from .._functions.settings import Settings
from .._units.setter import WebPageSetter
class WebPage(SessionPage, ChromiumPage, BasePage):
"""整合浏览器和request的页面类"""
def __new__(cls, mode='d', timeout=None, chromium_options=None, session_or_options=None):
"""初始化函数
:param mode: 'd' 's'即driver模式和session模式
@ -50,6 +49,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
return super(SessionPage, self).__call__(locator, index=index, timeout=timeout)
return super().__call__(locator, index=index)
def __repr__(self):
return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>'
@property
def latest_tab(self):
return self.browser._get_tab(id_or_num=self.tab_ids[0], mix=True, as_id=not Settings.singleton_tab_obj)
@ -251,5 +253,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
self._driver = None
self._has_driver = None
def __repr__(self):
return f'<WebPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def _set_session_options(self, session_or_options=None):
if session_or_options is None:
session_or_options = self.browser._session_options or SessionOptions(
read_file=self.browser._session_options is None)
super()._set_session_options(session_or_options)

View File

@ -25,6 +25,7 @@ from .._units.waiter import WebPageWaiter
class WebPage(SessionPage, ChromiumPage, BasePage):
"""整合浏览器和request的页面类"""
_d_mode: bool = ...
_set: WebPageSetter = ...
_has_driver: Optional[bool] = ...
@ -352,7 +353,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
timeout: float = None,
index: Optional[int] = 1,
relative: bool = False,
raise_err: bool = None) -> Union[ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]:
raise_err: bool = None) -> Union[
ChromiumElement, SessionElement, ChromiumFrame, SessionElementsList, ChromiumElementsList]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
:param locator: 元素的定位信息可以是元素对象loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用