重构ChromiumPage和WebPage

This commit is contained in:
g1879 2024-07-02 13:59:50 +08:00
parent 94a4e6871f
commit 503b3f1d70
10 changed files with 61 additions and 223 deletions

View File

@ -10,8 +10,8 @@ from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
from ._pages.session_page import SessionPage
# from ._pages.chromium_page import ChromiumPage
# from ._pages.web_page import WebPage
# 即将废弃
from ._pages.chromium_page import ChromiumPage
from ._pages.web_page import WebPage
__all__ = ['Browser', 'ChromiumOptions', 'SessionOptions', 'SessionPage', '__version__']
__version__ = '4.0.5.3'
__version__ = '4.1.0.0b0'

17
DrissionPage/__init__.pyi Normal file
View File

@ -0,0 +1,17 @@
# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from ._base.browser import Browser
from ._configs.chromium_options import ChromiumOptions
from ._configs.session_options import SessionOptions
from ._pages.session_page import SessionPage
from ._pages.chromium_page import ChromiumPage
from ._pages.web_page import WebPage
__all__ = ['WebPage', 'ChromiumPage', 'Browser', 'ChromiumOptions', 'SessionOptions', 'SessionPage', '__version__']
__version__: str = ...

View File

@ -126,6 +126,11 @@ class Browser(object):
"""返回timeouts设置"""
return self._timeouts
@property
def load_mode(self):
"""返回加载模式"""
return self._load_mode
@property
def download_path(self):
"""返回默认下载路径"""

View File

@ -65,6 +65,9 @@ class Browser(object):
@property
def timeouts(self) -> Timeout: ...
@property
def load_mode(self) -> str: ...
@property
def download_path(self) -> str: ...

View File

@ -1158,6 +1158,10 @@ class Timeout(object):
def __repr__(self):
return str({'base': self.base, 'page_load': self.page_load, 'script': self.script})
@property
def as_dict(self):
return {'base': self.base, 'page_load': self.page_load, 'script': self.script}
class Alert(object):
"""用于保存alert信息的类"""

View File

@ -34,7 +34,7 @@ class ChromiumBase(BasePage):
def __init__(self,
browser: Browser,
tab_id: str = None):
self._tab:Union[ChromiumTab, MixTab, ChromiumFrame] = ...
self._tab: Union[ChromiumTab, MixTab, ChromiumFrame] = ...
self._browser: Browser = ...
self._driver: Driver = ...
self._frame_id: str = ...
@ -277,6 +277,9 @@ class Timeout(object):
self.page_load: float = ...
self.script: float = ...
@property
def as_dict(self) -> dict: ...
class Alert(object):

View File

@ -5,22 +5,13 @@
@Copyright: (c) 2024 by g1879, Inc. All Rights Reserved.
@License : BSD 3-Clause.
"""
from pathlib import Path
from time import sleep, perf_counter
from requests import Session
from time import sleep
from .._base.browser import Browser
from .._configs.chromium_options import ChromiumOptions
from .._functions.browser import connect_browser
from .._functions.settings import Settings
from .._functions.tools import PortFinder
from .._functions.web import save_page
from .._pages.chromium_base import ChromiumBase, Timeout
from .._pages.chromium_tab import ChromiumTab
from .._pages.chromium_base import ChromiumBase
from .._units.setter import ChromiumPageSetter
from .._units.waiter import PageWaiter
from ..errors import BrowserConnectError
class ChromiumPage(ChromiumBase):
@ -56,45 +47,18 @@ class ChromiumPage(ChromiumBase):
self._created = True
self.tab = self
super().__init__(self.browser.address, tab_id)
super().__init__(self.browser, tab_id)
self._type = 'ChromiumPage'
self.set.timeouts(base=timeout)
self._page_init()
def _run_browser(self):
"""连接浏览器"""
self._browser = Browser(self._chromium_options.address, self._browser_id, self)
r = self._browser._run_cdp('Browser.getVersion')
self._browser_version = r['product']
if self._is_exist and self._chromium_options._headless is False and 'headless' in r['userAgent'].lower():
self._browser.quit(3)
connect_browser(self._chromium_options)
s = Session()
s.trust_env = False
ws = s.get(f'http://{self._chromium_options.address}/json/version', headers={'Connection': 'close'})
bid = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
self._browser = Browser(self._chromium_options.address, bid, self)
ws.close()
s.close()
self._tab = self
def _d_set_runtime_settings(self):
"""设置运行时用到的属性"""
self._timeouts = Timeout(page_load=self._chromium_options.timeouts['page_load'],
script=self._chromium_options.timeouts['script'],
base=self._chromium_options.timeouts['base'])
if self._chromium_options.timeouts['base'] is not None:
self._timeout = self._chromium_options.timeouts['base']
self._load_mode = self._chromium_options.load_mode
self._download_path = None if self._chromium_options.download_path is None \
else str(Path(self._chromium_options.download_path).absolute())
self.retry_times = self._chromium_options.retry_times
self.retry_interval = self._chromium_options.retry_interval
def _page_init(self):
"""浏览器相关设置"""
self._browser.connect_to_page()
# ----------挂件----------
self._timeouts = self.browser.timeouts
self._load_mode = self.browser._load_mode
self._download_path = self.browser.download_path
self.retry_times = self.browser.retry_times
self.retry_interval = self.browser.retry_interval
@property
def set(self):
@ -131,7 +95,7 @@ class ChromiumPage(ChromiumBase):
def latest_tab(self):
"""返回最新的标签页,最新标签页指最后创建或最后被激活的
当Settings.singleton_tab_obj==True时返回Tab对象否则返回tab id"""
return self.get_tab(self.tab_ids[0], as_id=not Settings.singleton_tab_obj)
return self.browser.latest_tab
@property
def process_id(self):
@ -141,7 +105,7 @@ class ChromiumPage(ChromiumBase):
@property
def browser_version(self):
"""返回所控制的浏览器版本号"""
return self._browser_version
return self._browser.version
def save(self, path=None, name=None, as_pdf=False, **kwargs):
"""把当前页面保存为文件如果path和name参数都为None只返回文本
@ -162,34 +126,7 @@ class ChromiumPage(ChromiumBase):
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象
"""
if id_or_num is not None:
if isinstance(id_or_num, str):
id_or_num = id_or_num
elif isinstance(id_or_num, int):
id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num]
elif isinstance(id_or_num, ChromiumTab):
if as_id:
return id_or_num.tab_id
elif Settings.singleton_tab_obj:
return id_or_num
else:
return self.get_tab(id_or_num.tab_id)
elif title == url == tab_type is None:
id_or_num = self.tab_id
else:
id_or_num = self._browser.find_tabs(title, url, tab_type)
if id_or_num:
id_or_num = id_or_num[0]['id']
else:
return None
if as_id:
return id_or_num
with self._lock:
return ChromiumTab(self.browser, id_or_num)
return self.browser.get_tab(id_or_num=id_or_num, title=title, url=url, tab_type=tab_type, as_id=as_id)
def get_tabs(self, title=None, url=None, tab_type='page', as_id=False):
"""查找符合条件的tab返回它们组成的列表
@ -199,10 +136,7 @@ class ChromiumPage(ChromiumBase):
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
if as_id:
return [tab['id'] for tab in self._browser.find_tabs(title, url, tab_type)]
with self._lock:
return [ChromiumTab(self.browser, tab['id']) for tab in self._browser.find_tabs(title, url, tab_type)]
return self.browser.get_tabs(title=title, url=url, tab_type=tab_type, as_id=as_id)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
@ -212,10 +146,7 @@ class ChromiumPage(ChromiumBase):
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = ChromiumTab(self.browser, tab_id=self.browser.new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
return self.browser.new_tab(url=url, new_window=new_window, background=background, new_context=new_context)
def close(self):
"""关闭Page管理的标签页"""
@ -227,32 +158,7 @@ class ChromiumPage(ChromiumBase):
:param others: 是否关闭指定标签页之外的
:return: None
"""
all_tabs = set(self.tab_ids)
if isinstance(tabs_or_ids, str):
tabs = {tabs_or_ids}
elif isinstance(tabs_or_ids, ChromiumTab):
tabs = {tabs_or_ids.tab_id}
elif tabs_or_ids is None:
tabs = {self.tab_id}
elif isinstance(tabs_or_ids, (list, tuple)):
tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids)
else:
raise TypeError('tabs_or_ids参数只能传入标签页对象或id。')
if others:
tabs = all_tabs - tabs
end_len = len(set(all_tabs) - set(tabs))
if end_len <= 0:
self.quit()
return
for tab in tabs:
self.browser.close_tab(tab)
sleep(.2)
end_time = perf_counter() + 3
while self.tabs_count != end_len and perf_counter() < end_time:
sleep(.1)
self.browser.close_tabs(tabs_or_ids=tabs_or_ids, others=others)
def quit(self, timeout=5, force=True):
"""关闭浏览器
@ -264,69 +170,8 @@ class ChromiumPage(ChromiumBase):
def _on_disconnect(self):
"""浏览器退出时执行"""
ChromiumPage._PAGES.pop(self._browser_id, None)
print('kkk')
ChromiumPage._PAGES.pop(self._browser.id, None)
def __repr__(self):
return f'<ChromiumPage browser_id={self.browser.id} tab_id={self.tab_id}>'
def handle_options(addr_or_opts):
"""设置浏览器启动属性
:param addr_or_opts: 'ip:port'ChromiumOptionsDriver
:return: 返回ChromiumOptions对象
"""
if not addr_or_opts:
_chromium_options = ChromiumOptions(addr_or_opts)
if _chromium_options.is_auto_port:
port, path = PortFinder(_chromium_options.tmp_path).get_port(_chromium_options.is_auto_port)
_chromium_options.set_address(f'127.0.0.1:{port}')
_chromium_options.set_user_data_path(path)
_chromium_options.auto_port(scope=_chromium_options.is_auto_port)
elif isinstance(addr_or_opts, ChromiumOptions):
if addr_or_opts.is_auto_port:
port, path = PortFinder(addr_or_opts.tmp_path).get_port(addr_or_opts.is_auto_port)
addr_or_opts.set_address(f'127.0.0.1:{port}')
addr_or_opts.set_user_data_path(path)
addr_or_opts.auto_port(scope=addr_or_opts.is_auto_port)
_chromium_options = addr_or_opts
elif isinstance(addr_or_opts, str):
_chromium_options = ChromiumOptions()
_chromium_options.set_address(addr_or_opts)
elif isinstance(addr_or_opts, int):
_chromium_options = ChromiumOptions()
_chromium_options.set_local_port(addr_or_opts)
else:
raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。')
return _chromium_options
def run_browser(chromium_options):
"""连接浏览器"""
is_exist = connect_browser(chromium_options)
try:
s = Session()
s.trust_env = False
ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'})
if not ws:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
browser_id = ws.json()['webSocketDebuggerUrl'].split('/')[-1]
ws.close()
s.close()
except KeyError:
raise BrowserConnectError('浏览器版本太旧或此浏览器不支持接管。')
except:
raise BrowserConnectError('\n浏览器连接失败如使用全局代理须设置不代理127.0.0.1地址。')
return is_exist, browser_id
def get_rename(original, rename):
if '.' in rename:
return rename
else:
suffix = original[original.rfind('.'):] if '.' in original else ''
return f'{rename}{suffix}'

View File

@ -110,12 +110,3 @@ class ChromiumPage(ChromiumBase):
def quit(self, timeout: float = 5, force: bool = True) -> None: ...
def _on_disconnect(self) -> None: ...
def handle_options(addr_or_opts): ...
def run_browser(chromium_options): ...
def get_rename(original: str, rename: str) -> str: ...

View File

@ -6,7 +6,6 @@
@License : BSD 3-Clause.
"""
from .chromium_page import ChromiumPage
from .chromium_tab import MixTab
from .session_page import SessionPage
from .._base.base import BasePage
from .._configs.chromium_options import ChromiumOptions
@ -287,17 +286,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
return
set_tab_cookies(self, super().cookies())
def cookies(self, as_dict=False, all_domains=False, all_info=False):
def cookies(self, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 为True时以dict格式返回为False时返回list且all_info无效
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if self._mode == 's':
return super().cookies(as_dict, all_domains, all_info)
return super().cookies(all_domains, all_info)
elif self._mode == 'd':
return super(SessionPage, self).cookies(as_dict, all_domains, all_info)
return super(SessionPage, self).cookies(all_domains, all_info)
def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', as_id=False):
"""获取一个标签页对象id_or_num不为None时后面几个参数无效
@ -308,29 +306,8 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
:param as_id: 是否返回标签页id而不是标签页对象
:return: WebPageTab对象
"""
if id_or_num is not None:
if isinstance(id_or_num, str):
id_or_num = id_or_num
elif isinstance(id_or_num, int):
id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num]
elif isinstance(id_or_num, MixTab):
return id_or_num.tab_id if as_id else id_or_num
elif title == url == tab_type is None:
id_or_num = self.tab_id
else:
id_or_num = self._browser.find_tabs(title, url, tab_type)
if id_or_num:
id_or_num = id_or_num[0]['id']
else:
return None
if as_id:
return id_or_num
with self._lock:
return MixTab(self, id_or_num)
return self.browser._get_tab(id_or_num=id_or_num, title=title, url=url,
tab_type=tab_type, mix=True, as_id=as_id)
def get_tabs(self, title=None, url=None, tab_type='page', as_id=False):
"""查找符合条件的tab返回它们组成的列表
@ -340,10 +317,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
:param as_id: 是否返回标签页id而不是标签页对象
:return: ChromiumTab对象组成的列表
"""
if as_id:
return [tab['id'] for tab in self._browser.find_tabs(title, url, tab_type)]
with self._lock:
return [MixTab(self, tab['id']) for tab in self._browser.find_tabs(title, url, tab_type)]
return self.browser._get_tabs(title=title, url=url, tab_type=tab_type, mix=True, as_id=as_id)
def new_tab(self, url=None, new_window=False, background=False, new_context=False):
"""新建一个标签页
@ -353,10 +327,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
:param new_context: 是否创建新的上下文
:return: 新标签页对象
"""
tab = MixTab(self, tab_id=self.browser.new_tab(new_window, background, new_context))
if url:
tab.get(url)
return tab
return self.browser.new_mix_tab(url=url, new_window=new_window, background=background, new_context=new_context)
def close_driver(self):
"""关闭driver及浏览器"""

View File

@ -121,7 +121,6 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def cookies_to_browser(self) -> None: ...
def cookies(self,
as_dict: bool = False,
all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...