适配selenium4的配置,待测试

This commit is contained in:
g1879 2022-01-11 18:29:20 +08:00
parent 854e389a03
commit c68fd34c6b
4 changed files with 103 additions and 97 deletions

View File

@ -11,7 +11,6 @@ from pathlib import Path
from typing import Any, Union
from requests.cookies import RequestsCookieJar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
@ -469,6 +468,9 @@ class DriverOptions(Options):
self._experimental_options = options_dict.get('experimental_options', {})
self._debugger_address = options_dict.get('debugger_address', None)
self._driver_path = om.paths.get('chromedriver_path', None)
self.set_window_rect = options_dict.get('set_window_rect', None)
self.page_load_strategy = om.paths.get('page_load_strategy', 'normal')
self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 10, 'script': 10})
@property
def driver_path(self) -> str:
@ -558,6 +560,24 @@ class DriverOptions(Options):
return self
def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> 'DriverOptions':
"""设置超时时间selenium4以上版本有效 \n
:param implicit: 查找元素超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
timeouts = self.timeouts
if implicit is not None:
timeouts['implicit'] = implicit
if pageLoad is not None:
timeouts['pageLoad'] = pageLoad
if script is not None:
timeouts['script'] = script
self.timeouts = timeouts
return self
def set_headless(self, on_off: bool = True) -> 'DriverOptions':
"""设置是否隐藏浏览器界面 \n
:param on_off: 开或关
@ -661,57 +681,6 @@ class DriverOptions(Options):
return _chrome_options_to_dict(self)
def _dict_to_chrome_options(options: dict) -> Options:
"""从传入的字典获取浏览器设置返回ChromeOptions对象 \n
:param options: 配置信息字典
:return: 保存浏览器配置的ChromeOptions对象
"""
chrome_options = webdriver.ChromeOptions()
# 已打开的浏览器路径
if options.get('debugger_address', None):
chrome_options.debugger_address = options['debugger_address']
# 创建新的浏览器
else:
# 浏览器的exe文件路径
if options.get('binary_location', None):
chrome_options.binary_location = options['binary_location']
# 启动参数
if options.get('arguments', None):
if not isinstance(options['arguments'], list):
raise Exception(f"参数必须为list现在是{type(options['arguments'])}")
for arg in options['arguments']:
chrome_options.add_argument(arg)
# 加载插件
if options.get('extension_files', None):
if not isinstance(options['extension_files'], list):
raise Exception(f'extension_files必须是list现在是{type(options["extension_files"])}')
for arg in options['extension_files']:
chrome_options.add_extension(arg)
# 扩展设置
if options.get('extensions', None):
if not isinstance(options['extensions'], list):
raise Exception(f'extensions必须是list现在是{type(options["extensions"])}')
for arg in options['extensions']:
chrome_options.add_encoded_extension(arg)
# 实验性质的设置参数
if options.get('experimental_options', None):
if not isinstance(options['experimental_options'], dict):
raise Exception(f'experimental_options必须是dict现在是{type(options["experimental_options"])}')
for i in options['experimental_options']:
chrome_options.add_experimental_option(i, options['experimental_options'][i])
return chrome_options
def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]:
"""把chrome配置对象转换为字典 \n
:param options: chrome配置对象字典或DriverOptions对象
@ -724,11 +693,15 @@ def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, b
return options
re_dict = dict()
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path']
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path',
'timeouts', 'set_window_rect', 'page_load_strategy']
options_dir = options.__dir__()
for attr in attrs:
re_dict[attr] = options.__getattribute__(f'_{attr}') if attr in options_dir else None
try:
re_dict[attr] = options.__getattribute__(f'{attr}') if attr in options_dir else None
except Exception:
pass
return re_dict

View File

@ -8,6 +8,9 @@ binary_location =
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
timeouts = {'implicit': 10, 'pageLoad': 50, 'script': 50}
set_window_rect = None
page_load_strategy = normal
[session_options]
headers = {

View File

@ -15,15 +15,14 @@ from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver
from tldextract import extract
from .config import (_dict_to_chrome_options, _session_options_to_dict,
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
from .config import _session_options_to_dict, SessionOptions, DriverOptions, _cookies_to_tuple
class Drission(object):
"""Drission类用于管理WebDriver对象和Session对象是驱动器的角色"""
def __init__(self,
driver_or_options: Union[WebDriver, dict, Options, DriverOptions, bool] = None,
driver_or_options: Union[WebDriver, Options, DriverOptions, bool] = None,
session_or_options: Union[Session, dict, SessionOptions, bool] = None,
ini_path: str = None,
proxy: dict = None):
@ -35,37 +34,45 @@ class Drission(object):
"""
self._session = None
self._driver = None
self._session_options = None
self._driver_options = None
self._debugger = None
self._proxy = proxy
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
# ------------------处理session options----------------------
if session_or_options is None:
self._session_options = om.session_options
self._session_options = SessionOptions(ini_path=ini_path).as_dict()
elif session_or_options is False:
self._driver_options = SessionOptions(read_file=False).as_dict()
elif isinstance(session_or_options, Session):
self._session = session_or_options
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options.as_dict()
elif isinstance(session_or_options, dict):
self._session_options = session_or_options
else:
# 若接收到Session对象直接记录
if isinstance(session_or_options, Session):
self._session = session_or_options
# 否则记录其配置信息
else:
self._session_options = _session_options_to_dict(session_or_options)
raise TypeError('session_or_options参数只能接收Session, dict, SessionOptions或False。')
# ------------------处理driver options----------------------
if driver_or_options is None:
self._driver_options = om.chrome_options
self._driver_options['driver_path'] = om.get_value('paths', 'chromedriver_path')
self._driver_options = DriverOptions(ini_path=ini_path)
elif driver_or_options is False:
self._driver_options = DriverOptions(read_file=False)
elif isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
elif isinstance(driver_or_options, (Options, DriverOptions)):
self._driver_options = driver_or_options
else:
# 若接收到WebDriver对象直接记录
if isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
# 否则记录其配置信息
else:
self._driver_options = _chrome_options_to_dict(driver_or_options)
raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。')
def __del__(self):
"""关闭对象时关闭浏览器和Session"""
@ -91,28 +98,25 @@ class Drission(object):
如设置了本地调试浏览器可自动接入或打开浏览器进程
"""
if self._driver is None:
if not isinstance(self._driver_options, dict):
raise TypeError('无效的Driver配置。')
# options = _dict_to_chrome_options(self._driver_options)
options = _dict_to_chrome_options(self._driver_options)
if not self.driver_options.debugger_address and self._proxy:
self.driver_options.add_argument(f'--proxy-server={self._proxy["http"]}')
if not self._driver_options.get('debugger_address', None) and self._proxy:
options.add_argument(f'--proxy-server={self._proxy["http"]}')
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe'
driver_path = self.driver_options.driver_path or 'chromedriver'
chrome_path = self.driver_options.binary_location or 'chrome.exe'
# -----------若指定debug端口且该端口未在使用中则先启动浏览器进程-----------
if options.debugger_address and _check_port(options.debugger_address) is False:
if self.driver_options.debugger_address and _check_port(self.driver_options.debugger_address) is False:
from subprocess import Popen
port = options.debugger_address.split(':')[-1]
port = self.driver_options.debugger_address.split(':')[-1]
# 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
chrome_path, self._debugger = _create_chrome(chrome_path, port,
self._driver_options['arguments'], self._proxy)
self.driver_options.arguments, self._proxy)
# -----------创建WebDriver对象-----------
self._driver = _create_driver(chrome_path, driver_path, options)
self._driver = _create_driver(chrome_path, driver_path, self.driver_options)
# 反反爬设置
try:
@ -127,7 +131,7 @@ class Drission(object):
return self._driver
@property
def driver_options(self) -> dict:
def driver_options(self) -> Union[DriverOptions, Options]:
"""返回driver配置信息"""
return self._driver_options
@ -199,7 +203,7 @@ class Drission(object):
if self.debugger_progress:
return self.debugger_progress.pid
address = self.driver_options.get('debugger_address', '').split(':')
address = str(self.driver_options.debugger_address).split(':')
if len(address) == 2:
ip, port = address
if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
@ -469,6 +473,11 @@ def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebD
:return: WebDriver 对象
"""
try:
debugger_address = options.debugger_address
if options.debugger_address:
options = Options()
options.debugger_address = debugger_address
return webdriver.Chrome(driver_path, options=options)
# 若版本不对,获取对应 chromedriver 再试

View File

@ -8,6 +8,7 @@ from typing import Union, List, Tuple
from requests import Response, Session
from requests.cookies import RequestsCookieJar
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
@ -31,13 +32,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
def __init__(self,
mode: str = 'd',
drission: Union[Drission, str] = None,
timeout: float = 10,
driver_options: Union[dict, DriverOptions, bool] = None,
timeout: float = None,
driver_options: Union[Options, DriverOptions, bool] = None,
session_options: Union[dict, SessionOptions, bool] = None) -> None:
"""初始化函数 \n
:param mode: 'd' 's'即driver模式和session模式
:param drission: Drission对象不传入时会自动创建
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param driver_options: 浏览器设置没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象传入False则不创建
:param session_options: requests设置没传入drission参数时会用这个设置新建Drission对象中的Session对象传入False则不创建
"""
@ -45,6 +46,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
if self._mode not in ('s', 'd'):
raise ValueError('mode参数只能是s或d。')
if driver_options:
try:
timeout = driver_options.timeouts.get('implicit', None)
except Exception:
timeout = None
timeout = timeout if timeout is not None else 10
super(DriverPage, self).__init__(timeout) # BasePage的__init__()
self._driver, self._session = (None, True) if self._mode == 's' else (True, None)
self._drission = drission or Drission(driver_options, session_options)
@ -344,6 +352,20 @@ class MixPage(SessionPage, DriverPage, BasePage):
self._response = None
self.drission.close_session()
def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> None:
"""设置超时时间selenium4以上版本有效 \n
:param implicit: 查找元素超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
if implicit is not None:
self.timeout = implicit
if pageLoad is not None:
self.driver.timeouts.page_load = pageLoad
if script is not None:
self.driver.timeouts.script = script
# ----------------重写SessionPage的函数-----------------------
def post(self,
url: str,
@ -398,14 +420,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
interval, **kwargs)
# ----------------重写DriverPage的函数-----------------------
def chrome_downloading(self, download_path: str = None) -> list:
def chrome_downloading(self, path: str = None) -> list:
"""返回浏览器下载中的文件列表 \n
:param download_path: 下载文件夹路径默认读取配置信息
:param path: 下载文件夹路径默认读取配置信息
:return: 正在下载的文件列表
"""
try:
path = download_path or self._drission.driver_options['experimental_options']['prefs'][
'download.default_directory']
path = path or self._drission.driver_options.experimental_options['prefs']['download.default_directory']
if not path:
raise ValueError('未指定下载路径。')
except Exception: