适配selenium4的配置,待测试

This commit is contained in:
g1879 2022-01-11 18:29:20 +08:00
parent 854e389a03
commit c68fd34c6b
4 changed files with 103 additions and 97 deletions

View File

@ -11,7 +11,6 @@ from pathlib import Path
from typing import Any, Union from typing import Any, Union
from requests.cookies import RequestsCookieJar from requests.cookies import RequestsCookieJar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
@ -469,6 +468,9 @@ class DriverOptions(Options):
self._experimental_options = options_dict.get('experimental_options', {}) self._experimental_options = options_dict.get('experimental_options', {})
self._debugger_address = options_dict.get('debugger_address', None) self._debugger_address = options_dict.get('debugger_address', None)
self._driver_path = om.paths.get('chromedriver_path', None) self._driver_path = om.paths.get('chromedriver_path', None)
self.set_window_rect = options_dict.get('set_window_rect', None)
self.page_load_strategy = om.paths.get('page_load_strategy', 'normal')
self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 10, 'script': 10})
@property @property
def driver_path(self) -> str: def driver_path(self) -> str:
@ -558,6 +560,24 @@ class DriverOptions(Options):
return self return self
def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> 'DriverOptions':
"""设置超时时间selenium4以上版本有效 \n
:param implicit: 查找元素超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
timeouts = self.timeouts
if implicit is not None:
timeouts['implicit'] = implicit
if pageLoad is not None:
timeouts['pageLoad'] = pageLoad
if script is not None:
timeouts['script'] = script
self.timeouts = timeouts
return self
def set_headless(self, on_off: bool = True) -> 'DriverOptions': def set_headless(self, on_off: bool = True) -> 'DriverOptions':
"""设置是否隐藏浏览器界面 \n """设置是否隐藏浏览器界面 \n
:param on_off: 开或关 :param on_off: 开或关
@ -661,57 +681,6 @@ class DriverOptions(Options):
return _chrome_options_to_dict(self) return _chrome_options_to_dict(self)
def _dict_to_chrome_options(options: dict) -> Options:
"""从传入的字典获取浏览器设置返回ChromeOptions对象 \n
:param options: 配置信息字典
:return: 保存浏览器配置的ChromeOptions对象
"""
chrome_options = webdriver.ChromeOptions()
# 已打开的浏览器路径
if options.get('debugger_address', None):
chrome_options.debugger_address = options['debugger_address']
# 创建新的浏览器
else:
# 浏览器的exe文件路径
if options.get('binary_location', None):
chrome_options.binary_location = options['binary_location']
# 启动参数
if options.get('arguments', None):
if not isinstance(options['arguments'], list):
raise Exception(f"参数必须为list现在是{type(options['arguments'])}")
for arg in options['arguments']:
chrome_options.add_argument(arg)
# 加载插件
if options.get('extension_files', None):
if not isinstance(options['extension_files'], list):
raise Exception(f'extension_files必须是list现在是{type(options["extension_files"])}')
for arg in options['extension_files']:
chrome_options.add_extension(arg)
# 扩展设置
if options.get('extensions', None):
if not isinstance(options['extensions'], list):
raise Exception(f'extensions必须是list现在是{type(options["extensions"])}')
for arg in options['extensions']:
chrome_options.add_encoded_extension(arg)
# 实验性质的设置参数
if options.get('experimental_options', None):
if not isinstance(options['experimental_options'], dict):
raise Exception(f'experimental_options必须是dict现在是{type(options["experimental_options"])}')
for i in options['experimental_options']:
chrome_options.add_experimental_option(i, options['experimental_options'][i])
return chrome_options
def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]:
"""把chrome配置对象转换为字典 \n """把chrome配置对象转换为字典 \n
:param options: chrome配置对象字典或DriverOptions对象 :param options: chrome配置对象字典或DriverOptions对象
@ -724,11 +693,15 @@ def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, b
return options return options
re_dict = dict() re_dict = dict()
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path'] attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path',
'timeouts', 'set_window_rect', 'page_load_strategy']
options_dir = options.__dir__() options_dir = options.__dir__()
for attr in attrs: for attr in attrs:
re_dict[attr] = options.__getattribute__(f'_{attr}') if attr in options_dir else None try:
re_dict[attr] = options.__getattribute__(f'{attr}') if attr in options_dir else None
except Exception:
pass
return re_dict return re_dict

View File

@ -8,6 +8,9 @@ binary_location =
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars'] arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
extensions = [] extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
timeouts = {'implicit': 10, 'pageLoad': 50, 'script': 50}
set_window_rect = None
page_load_strategy = normal
[session_options] [session_options]
headers = { headers = {

View File

@ -15,15 +15,14 @@ from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from tldextract import extract from tldextract import extract
from .config import (_dict_to_chrome_options, _session_options_to_dict, from .config import _session_options_to_dict, SessionOptions, DriverOptions, _cookies_to_tuple
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
class Drission(object): class Drission(object):
"""Drission类用于管理WebDriver对象和Session对象是驱动器的角色""" """Drission类用于管理WebDriver对象和Session对象是驱动器的角色"""
def __init__(self, def __init__(self,
driver_or_options: Union[WebDriver, dict, Options, DriverOptions, bool] = None, driver_or_options: Union[WebDriver, Options, DriverOptions, bool] = None,
session_or_options: Union[Session, dict, SessionOptions, bool] = None, session_or_options: Union[Session, dict, SessionOptions, bool] = None,
ini_path: str = None, ini_path: str = None,
proxy: dict = None): proxy: dict = None):
@ -35,37 +34,45 @@ class Drission(object):
""" """
self._session = None self._session = None
self._driver = None self._driver = None
self._session_options = None
self._driver_options = None
self._debugger = None self._debugger = None
self._proxy = proxy self._proxy = proxy
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
# ------------------处理session options---------------------- # ------------------处理session options----------------------
if session_or_options is None: if session_or_options is None:
self._session_options = om.session_options self._session_options = SessionOptions(ini_path=ini_path).as_dict()
elif session_or_options is False:
self._driver_options = SessionOptions(read_file=False).as_dict()
elif isinstance(session_or_options, Session):
self._session = session_or_options
elif isinstance(session_or_options, SessionOptions):
self._session_options = session_or_options.as_dict()
elif isinstance(session_or_options, dict):
self._session_options = session_or_options
else: else:
# 若接收到Session对象直接记录 raise TypeError('session_or_options参数只能接收Session, dict, SessionOptions或False。')
if isinstance(session_or_options, Session):
self._session = session_or_options
# 否则记录其配置信息
else:
self._session_options = _session_options_to_dict(session_or_options)
# ------------------处理driver options---------------------- # ------------------处理driver options----------------------
if driver_or_options is None: if driver_or_options is None:
self._driver_options = om.chrome_options self._driver_options = DriverOptions(ini_path=ini_path)
self._driver_options['driver_path'] = om.get_value('paths', 'chromedriver_path')
elif driver_or_options is False:
self._driver_options = DriverOptions(read_file=False)
elif isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
elif isinstance(driver_or_options, (Options, DriverOptions)):
self._driver_options = driver_or_options
else: else:
# 若接收到WebDriver对象直接记录 raise TypeError('driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。')
if isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
# 否则记录其配置信息
else:
self._driver_options = _chrome_options_to_dict(driver_or_options)
def __del__(self): def __del__(self):
"""关闭对象时关闭浏览器和Session""" """关闭对象时关闭浏览器和Session"""
@ -91,28 +98,25 @@ class Drission(object):
如设置了本地调试浏览器可自动接入或打开浏览器进程 如设置了本地调试浏览器可自动接入或打开浏览器进程
""" """
if self._driver is None: if self._driver is None:
if not isinstance(self._driver_options, dict): # options = _dict_to_chrome_options(self._driver_options)
raise TypeError('无效的Driver配置。')
options = _dict_to_chrome_options(self._driver_options) if not self.driver_options.debugger_address and self._proxy:
self.driver_options.add_argument(f'--proxy-server={self._proxy["http"]}')
if not self._driver_options.get('debugger_address', None) and self._proxy: driver_path = self.driver_options.driver_path or 'chromedriver'
options.add_argument(f'--proxy-server={self._proxy["http"]}') chrome_path = self.driver_options.binary_location or 'chrome.exe'
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe'
# -----------若指定debug端口且该端口未在使用中则先启动浏览器进程----------- # -----------若指定debug端口且该端口未在使用中则先启动浏览器进程-----------
if options.debugger_address and _check_port(options.debugger_address) is False: if self.driver_options.debugger_address and _check_port(self.driver_options.debugger_address) is False:
from subprocess import Popen from subprocess import Popen
port = options.debugger_address.split(':')[-1] port = self.driver_options.debugger_address.split(':')[-1]
# 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径 # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
chrome_path, self._debugger = _create_chrome(chrome_path, port, chrome_path, self._debugger = _create_chrome(chrome_path, port,
self._driver_options['arguments'], self._proxy) self.driver_options.arguments, self._proxy)
# -----------创建WebDriver对象----------- # -----------创建WebDriver对象-----------
self._driver = _create_driver(chrome_path, driver_path, options) self._driver = _create_driver(chrome_path, driver_path, self.driver_options)
# 反反爬设置 # 反反爬设置
try: try:
@ -127,7 +131,7 @@ class Drission(object):
return self._driver return self._driver
@property @property
def driver_options(self) -> dict: def driver_options(self) -> Union[DriverOptions, Options]:
"""返回driver配置信息""" """返回driver配置信息"""
return self._driver_options return self._driver_options
@ -199,7 +203,7 @@ class Drission(object):
if self.debugger_progress: if self.debugger_progress:
return self.debugger_progress.pid return self.debugger_progress.pid
address = self.driver_options.get('debugger_address', '').split(':') address = str(self.driver_options.debugger_address).split(':')
if len(address) == 2: if len(address) == 2:
ip, port = address ip, port = address
if ip not in ('127.0.0.1', 'localhost') or not port.isdigit(): if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
@ -469,6 +473,11 @@ def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebD
:return: WebDriver 对象 :return: WebDriver 对象
""" """
try: try:
debugger_address = options.debugger_address
if options.debugger_address:
options = Options()
options.debugger_address = debugger_address
return webdriver.Chrome(driver_path, options=options) return webdriver.Chrome(driver_path, options=options)
# 若版本不对,获取对应 chromedriver 再试 # 若版本不对,获取对应 chromedriver 再试

View File

@ -8,6 +8,7 @@ from typing import Union, List, Tuple
from requests import Response, Session from requests import Response, Session
from requests.cookies import RequestsCookieJar from requests.cookies import RequestsCookieJar
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
@ -31,13 +32,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
def __init__(self, def __init__(self,
mode: str = 'd', mode: str = 'd',
drission: Union[Drission, str] = None, drission: Union[Drission, str] = None,
timeout: float = 10, timeout: float = None,
driver_options: Union[dict, DriverOptions, bool] = None, driver_options: Union[Options, DriverOptions, bool] = None,
session_options: Union[dict, SessionOptions, bool] = None) -> None: session_options: Union[dict, SessionOptions, bool] = None) -> None:
"""初始化函数 \n """初始化函数 \n
:param mode: 'd' 's'即driver模式和session模式 :param mode: 'd' 's'即driver模式和session模式
:param drission: Drission对象不传入时会自动创建 :param drission: Drission对象不传入时会自动创建
:param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间 :param timeout: 超时时间d模式时为寻找元素时间s模式时为连接时间默认10秒
:param driver_options: 浏览器设置没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象传入False则不创建 :param driver_options: 浏览器设置没传入drission参数时会用这个设置新建Drission对象中的WebDriver对象传入False则不创建
:param session_options: requests设置没传入drission参数时会用这个设置新建Drission对象中的Session对象传入False则不创建 :param session_options: requests设置没传入drission参数时会用这个设置新建Drission对象中的Session对象传入False则不创建
""" """
@ -45,6 +46,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
if self._mode not in ('s', 'd'): if self._mode not in ('s', 'd'):
raise ValueError('mode参数只能是s或d。') raise ValueError('mode参数只能是s或d。')
if driver_options:
try:
timeout = driver_options.timeouts.get('implicit', None)
except Exception:
timeout = None
timeout = timeout if timeout is not None else 10
super(DriverPage, self).__init__(timeout) # BasePage的__init__() super(DriverPage, self).__init__(timeout) # BasePage的__init__()
self._driver, self._session = (None, True) if self._mode == 's' else (True, None) self._driver, self._session = (None, True) if self._mode == 's' else (True, None)
self._drission = drission or Drission(driver_options, session_options) self._drission = drission or Drission(driver_options, session_options)
@ -344,6 +352,20 @@ class MixPage(SessionPage, DriverPage, BasePage):
self._response = None self._response = None
self.drission.close_session() self.drission.close_session()
def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> None:
"""设置超时时间selenium4以上版本有效 \n
:param implicit: 查找元素超时时间
:param pageLoad: 页面加载超时时间
:param script: 脚本运行超时时间
:return: 当前对象
"""
if implicit is not None:
self.timeout = implicit
if pageLoad is not None:
self.driver.timeouts.page_load = pageLoad
if script is not None:
self.driver.timeouts.script = script
# ----------------重写SessionPage的函数----------------------- # ----------------重写SessionPage的函数-----------------------
def post(self, def post(self,
url: str, url: str,
@ -398,14 +420,13 @@ class MixPage(SessionPage, DriverPage, BasePage):
interval, **kwargs) interval, **kwargs)
# ----------------重写DriverPage的函数----------------------- # ----------------重写DriverPage的函数-----------------------
def chrome_downloading(self, download_path: str = None) -> list: def chrome_downloading(self, path: str = None) -> list:
"""返回浏览器下载中的文件列表 \n """返回浏览器下载中的文件列表 \n
:param download_path: 下载文件夹路径默认读取配置信息 :param path: 下载文件夹路径默认读取配置信息
:return: 正在下载的文件列表 :return: 正在下载的文件列表
""" """
try: try:
path = download_path or self._drission.driver_options['experimental_options']['prefs'][ path = path or self._drission.driver_options.experimental_options['prefs']['download.default_directory']
'download.default_directory']
if not path: if not path:
raise ValueError('未指定下载路径。') raise ValueError('未指定下载路径。')
except Exception: except Exception: