Merge pull request !4 from g1879/develop
This commit is contained in:
g1879 2020-12-04 22:52:02 +08:00 committed by Gitee
commit e682427273
12 changed files with 1338 additions and 283 deletions

View File

@ -256,7 +256,7 @@ def get_available_file_name(folder_path: str, file_name: str) -> str:
base_name = file_Path.stem
num = base_name.split(' ')[-1]
if num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit():
if num and num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit():
num = int(num[1:-1])
file_name = f'{base_name.replace(f"({num})", "", -1)}({num + 1}){ext_name}'
else:

View File

@ -1,14 +1,16 @@
# -*- coding:utf-8 -*-
"""
配置文件
管理配置的类
@Author : g1879
@Contact : g1879@qq.com
@File : config.py
"""
from configparser import ConfigParser, NoSectionError, NoOptionError
from configparser import RawConfigParser, NoSectionError, NoOptionError
from http.cookiejar import Cookie
from pathlib import Path
from typing import Any, Union
from requests.cookies import RequestsCookieJar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
@ -21,14 +23,18 @@ class OptionsManager(object):
:param path: ini文件的路径默认读取模块文件夹下的
"""
self.ini_path = path or str(Path(__file__).parent / 'configs.ini')
self._conf = ConfigParser()
self._conf = RawConfigParser()
self._conf.read(self.ini_path, encoding='utf-8')
if 'global_tmp_path' not in self.paths or not self.get_value('paths', 'global_tmp_path'):
global_tmp_path = str((Path(__file__).parent / 'tmp').absolute())
Path(global_tmp_path).mkdir(parents=True, exist_ok=True)
self.set_item('paths', 'global_tmp_path', global_tmp_path)
self.save()
self._paths = None
self._chrome_options = None
self._session_options = None
if 'tmp_path' not in self.paths or not self.get_value('paths', 'tmp_path'):
tmp_path = str((Path(__file__).parent / 'tmp').absolute())
Path(tmp_path).mkdir(parents=True, exist_ok=True)
self.set_item('paths', 'tmp_path', tmp_path)
self.save(self.ini_path)
def __text__(self) -> str:
"""打印ini文件内容"""
@ -42,17 +48,26 @@ class OptionsManager(object):
@property
def paths(self) -> dict:
"""返回paths设置"""
return self.get_option('paths')
if self._paths is None:
self._paths = self.get_option('paths')
return self._paths
@property
def chrome_options(self) -> dict:
"""返回chrome设置"""
return self.get_option('chrome_options')
if self._chrome_options is None:
self._chrome_options = self.get_option('chrome_options')
return self._chrome_options
@property
def session_options(self) -> dict:
"""返回session设置"""
return self.get_option('session_options')
if self._session_options is None:
self._session_options = self.get_option('session_options')
return self._session_options
def get_value(self, section: str, item: str) -> Any:
"""获取配置的值 \n
@ -62,7 +77,7 @@ class OptionsManager(object):
"""
try:
return eval(self._conf.get(section, item))
except SyntaxError:
except (SyntaxError, NameError):
return self._conf.get(section, item)
except NoSectionError and NoOptionError:
return None
@ -91,6 +106,7 @@ class OptionsManager(object):
:return: 当前对象
"""
self._conf.set(section, item, str(value))
self.__setattr__(f'_{section}', None)
return self
def save(self, path: str = None):
@ -98,15 +114,309 @@ class OptionsManager(object):
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 当前对象
"""
path = Path(__file__).parent / 'configs.ini' if path == 'default' else path
path = Path(path or self.ini_path)
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
elif path is None:
path = Path(self.ini_path).absolute()
else:
path = Path(path).absolute()
path = path / 'config.ini' if path.is_dir() else path
path = path.absolute()
self._conf.write(open(path, 'w', encoding='utf-8'))
self._conf.write(open(str(path), 'w', encoding='utf-8'))
return self
class SessionOptions(object):
def __init__(self, read_file: bool = True, ini_path: str = None):
"""requests的Session对象配置类 \n
:param read_file: 是否从文件读取配置
:param ini_path: ini文件路径
"""
self.ini_path = None
self._headers = None
self._cookies = None
self._auth = None
self._proxies = None
self._hooks = None
self._params = None
self._verify = None
self._cert = None
self._adapters = None
self._stream = None
self._trust_env = None
self._max_redirects = None
if read_file:
self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini')
om = OptionsManager(self.ini_path)
options_dict = om.session_options
if options_dict.get('headers', None) is not None:
self._headers = {key.lower(): options_dict['headers'][key] for key in options_dict['headers']}
if options_dict.get('cookies', None) is not None:
self._cookies = options_dict['cookies']
if options_dict.get('auth', None) is not None:
self._auth = options_dict['auth']
if options_dict.get('proxies', None) is not None:
self._proxies = options_dict['proxies']
if options_dict.get('hooks', None) is not None:
self._hooks = options_dict['hooks']
if options_dict.get('params', None) is not None:
self._params = options_dict['params']
if options_dict.get('verify', None) is not None:
self._verify = options_dict['verify']
if options_dict.get('cert', None) is not None:
self._cert = options_dict['cert']
# if options_dict.get('adapters', None) is not None:
# self._adapters = options_dict['adapters']
if options_dict.get('stream', None) is not None:
self._stream = options_dict['stream']
if options_dict.get('trust_env', None) is not None:
self._trust_env = options_dict['trust_env']
if options_dict.get('max_redirects', None) is not None:
self._max_redirects = options_dict['max_redirects']
@property
def headers(self) -> dict:
"""返回headers设置信息"""
if self._headers is None:
self._headers = {}
return self._headers
@property
def cookies(self) -> list:
"""返回cookies设置信息"""
if self._cookies is None:
self._cookies = []
return self._cookies
@property
def auth(self) -> tuple:
"""返回auth设置信息"""
return self._auth
@property
def proxies(self) -> dict:
"""返回proxies设置信息"""
if self._proxies is None:
self._proxies = {}
return self._proxies
@property
def hooks(self) -> dict:
"""返回hooks设置信息"""
if self._hooks is None:
self._hooks = {}
return self._hooks
@property
def params(self) -> dict:
"""返回params设置信息"""
if self._params is None:
self._params = {}
return self._params
@property
def verify(self) -> bool:
"""返回verify设置信息"""
return self._verify
@property
def cert(self) -> Union[str, tuple]:
"""返回cert设置信息"""
return self._cert
@property
def adapters(self):
"""返回adapters设置信息"""
return self._adapters
@property
def stream(self) -> bool:
"""返回stream设置信息"""
return self._stream
@property
def trust_env(self) -> bool:
"""返回trust_env设置信息"""
return self._trust_env
@property
def max_redirects(self) -> int:
"""返回max_redirects设置信息"""
return self._max_redirects
@headers.setter
def headers(self, headers: dict) -> None:
"""设置headers参数 \n
:param headers: 参数值
:return: None
"""
self._headers = {key.lower(): headers[key] for key in headers}
@cookies.setter
def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
"""设置cookies参数 \n
:param cookies: 参数值
:return: None
"""
self._cookies = cookies
@auth.setter
def auth(self, auth: tuple) -> None:
"""设置auth参数 \n
:param auth: 参数值
:return: None
"""
self._auth = auth
@proxies.setter
def proxies(self, proxies: dict) -> None:
"""设置proxies参数 \n
:param proxies: 参数值
:return: None
"""
self._proxies = proxies
@hooks.setter
def hooks(self, hooks: dict) -> None:
"""设置hooks参数 \n
:param hooks: 参数值
:return: None
"""
self._hooks = hooks
@params.setter
def params(self, params: dict) -> None:
"""设置params参数 \n
:param params: 参数值
:return: None
"""
self._params = params
@verify.setter
def verify(self, verify: bool) -> None:
"""设置verify参数 \n
:param verify: 参数值
:return: None
"""
self._verify = verify
@cert.setter
def cert(self, cert: Union[str, tuple]) -> None:
"""设置cert参数 \n
:param cert: 参数值
:return: None
"""
self._cert = cert
@adapters.setter
def adapters(self, adapters) -> None:
"""设置 \n
:param adapters: 参数值
:return: None
"""
self._adapters = adapters
@stream.setter
def stream(self, stream: bool) -> None:
"""设置stream参数 \n
:param stream: 参数值
:return: None
"""
self._stream = stream
@trust_env.setter
def trust_env(self, trust_env: bool) -> None:
"""设置trust_env参数 \n
:param trust_env: 参数值
:return: None
"""
self._trust_env = trust_env
@max_redirects.setter
def max_redirects(self, max_redirects: int) -> None:
"""设置max_redirects参数 \n
:param max_redirects: 参数值
:return: None
"""
self._max_redirects = max_redirects
def set_a_header(self, attr: str, value: str):
"""设置headers中一个项 \n
:param attr: 设置名称
:param value: 设置值
:return: 返回当前对象
"""
if self._headers is None:
self._headers = {}
self._headers[attr.lower()] = value
return self
def remove_a_header(self, attr: str):
"""从headers中删除一个设置 \n
:param attr: 要删除的设置
:return: 返回当前对象
"""
if self._headers is None:
return self
attr = attr.lower()
if attr in self._headers:
self._headers.pop(attr)
return self
def save(self, path: str = None):
"""保存设置到文件 \n
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 返回当前对象
"""
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
elif path is None:
path = Path(self.ini_path).absolute()
else:
path = Path(path).absolute()
path = path / 'config.ini' if path.is_dir() else path
if path.exists():
om = OptionsManager(path)
else:
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
options = _session_options_to_dict(self)
for i in options:
om.set_item('session_options', i, options[i])
om.save(str(path))
return self
def as_dict(self) -> dict:
"""以字典形式返回本对象"""
return _session_options_to_dict(self)
class DriverOptions(Options):
"""chrome浏览器配置类继承自selenium.webdriver.chrome.options的Options类
增加了删除配置和保存到文件方法
@ -125,6 +435,7 @@ class DriverOptions(Options):
self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini')
om = OptionsManager(self.ini_path)
options_dict = om.chrome_options
self._binary_location = options_dict.get('binary_location', '')
self._arguments = options_dict.get('arguments', [])
self._extensions = options_dict.get('extensions', [])
@ -145,12 +456,21 @@ class DriverOptions(Options):
:param path: ini文件的路径传入 'default' 保存到默认ini文件
:return: 当前对象
"""
om = OptionsManager()
options = _chrome_options_to_dict(self)
path = Path(__file__).parent / 'configs.ini' if path == 'default' else path
path = Path(path or self.ini_path)
if path == 'default':
path = (Path(__file__).parent / 'configs.ini').absolute()
elif path is None:
path = Path(self.ini_path).absolute()
else:
path = Path(path).absolute()
path = path / 'config.ini' if path.is_dir() else path
path = path.absolute()
if path.exists():
om = OptionsManager(path)
else:
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
options = self.as_dict()
for i in options:
if i == 'driver_path':
@ -158,7 +478,7 @@ class DriverOptions(Options):
else:
om.set_item('chrome_options', i, options[i])
om.save(path)
om.save(str(path))
return self
@ -296,6 +616,9 @@ class DriverOptions(Options):
return self
def as_dict(self) -> dict:
return _chrome_options_to_dict(self)
def _dict_to_chrome_options(options: dict) -> Options:
"""从传入的字典获取浏览器设置返回ChromeOptions对象 \n
@ -349,25 +672,101 @@ def _dict_to_chrome_options(options: dict) -> Options:
return chrome_options
def _chrome_options_to_dict(options: Union[dict, DriverOptions, None]) -> Union[dict, None]:
def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None]) -> Union[dict, None]:
"""把chrome配置对象转换为字典 \n
:param options: chrome配置对象字典或DriverOptions对象
:return: 配置字典
"""
if options is None or isinstance(options, dict):
if isinstance(options, (dict, type(None))):
return options
re_dict = dict()
re_dict['debugger_address'] = options.debugger_address
re_dict['binary_location'] = options.binary_location
re_dict['debugger_address'] = options.debugger_address
re_dict['arguments'] = options.arguments
re_dict['extensions'] = options.extensions
re_dict['experimental_options'] = options.experimental_options
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path']
for attr in attrs:
re_dict[attr] = options.__getattribute__(f'_{attr}')
try:
re_dict['driver_path'] = options.driver_path
except:
re_dict['driver_path'] = None
# re_dict['capabilities'] = options.capabilities
return re_dict
def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]:
"""把session配置对象转换为字典 \n
:param options: session配置对象或字典
:return: 配置字典
"""
if isinstance(options, (dict, type(None))):
return options
re_dict = dict()
attrs = ['headers', 'proxies', 'hooks', 'params', 'verify', 'stream', 'trust_env', 'max_redirects'] # 'adapters',
cookies = options.__getattribute__('_cookies')
if cookies is not None:
re_dict['cookies'] = _cookies_to_tuple(cookies)
for attr in attrs:
val = options.__getattribute__(f'_{attr}')
if val is not None:
re_dict[attr] = val
# cert属性默认值为None未免无法区分是否被设置故主动赋值
re_dict['cert'] = options.__getattribute__('_cert')
re_dict['auth'] = options.__getattribute__('_auth')
return re_dict
def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
"""把Cookie对象转为dict格式 \n
:param cookie: Cookie对象
:return: cookie字典
"""
if isinstance(cookie, Cookie):
cookie_dict = cookie.__dict__.copy()
cookie_dict.pop('rfc2109')
cookie_dict.pop('_rest')
return cookie_dict
elif isinstance(cookie, dict):
cookie_dict = cookie
elif isinstance(cookie, str):
cookie = cookie.split(';')
cookie_dict = {}
for key, attr in enumerate(cookie):
attr_val = attr.lstrip().split('=')
if key == 0:
cookie_dict['name'] = attr_val[0]
cookie_dict['value'] = attr_val[1]
else:
cookie_dict[attr_val[0]] = attr_val[1]
return cookie_dict
else:
raise TypeError
return cookie_dict
def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple:
"""把cookies转为tuple格式 \n
:param cookies: cookies信息可为CookieJar, list, tuple, str, dict
:return: 返回tuple形式的cookies
"""
if isinstance(cookies, (list, tuple, RequestsCookieJar)):
cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies)
elif isinstance(cookies, str):
cookies = tuple(dict([cookie.lstrip().split("=", 1)]) for cookie in cookies.split(";"))
elif isinstance(cookies, dict):
cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies)
else:
raise TypeError
return cookies

View File

@ -1,11 +1,11 @@
[paths]
chromedriver_path =
global_tmp_path =
chromedriver_path =
tmp_path =
[chrome_options]
debugger_address =
binary_location =
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--headless', '--disable-infobars']
binary_location =
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}

View File

@ -6,24 +6,25 @@
"""
from sys import exit
from typing import Union
from urllib.parse import urlparse
from requests import Session
from requests.cookies import RequestsCookieJar
from selenium import webdriver
from selenium.common.exceptions import WebDriverException, SessionNotCreatedException
from selenium.common.exceptions import SessionNotCreatedException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver
from tldextract import extract
from .config import OptionsManager, _dict_to_chrome_options, _chrome_options_to_dict
from .config import (_dict_to_chrome_options, _session_options_to_dict,
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
class Drission(object):
"""Drission类用于管理WebDriver对象和Session对象是驱动器的角色"""
def __init__(self,
driver_or_options: Union[WebDriver, dict, Options] = None,
session_or_options: Union[Session, dict] = None,
driver_or_options: Union[WebDriver, dict, Options, DriverOptions] = None,
session_or_options: Union[Session, dict, SessionOptions] = None,
ini_path: str = None,
proxy: dict = None):
"""初始化可接收现成的WebDriver和Session对象或接收它们的配置信息生成对象 \n
@ -34,53 +35,42 @@ class Drission(object):
"""
self._session = None
self._driver = None
self._driver_path = 'chromedriver'
self._proxy = proxy
# 若接收到Session对象直接记录
if isinstance(session_or_options, Session):
self._session = session_or_options
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
# ------------------处理session options----------------------
if session_or_options is None:
self._session_options = om.session_options
# 否则记录其配置信息
else:
# 若接收到Session对象直接记录
if isinstance(session_or_options, Session):
self._session = session_or_options
# 若接收到配置信息则记录否则从ini文件读取
if session_or_options is None:
self._session_options = OptionsManager(ini_path).session_options
# 否则记录其配置信息
else:
self._session_options = session_or_options
self._session_options = _session_options_to_dict(session_or_options)
# 若接收到WebDriver对象直接记录
if isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
# ------------------处理driver options----------------------
if driver_or_options is None:
self._driver_options = om.chrome_options
self._driver_options['driver_path'] = om.get_value('paths', 'chromedriver_path')
# 否则记录其配置信息
else:
# 若接收到WebDriver对象直接记录
if isinstance(driver_or_options, WebDriver):
self._driver = driver_or_options
# 若接收到配置信息则记录否则从ini文件读取
if driver_or_options is None:
om = OptionsManager(ini_path)
self._driver_options = om.chrome_options
if om.paths.get('chromedriver_path', None):
self._driver_path = om.paths['chromedriver_path']
# 否则记录其配置信息
else:
self._driver_options = _chrome_options_to_dict(driver_or_options)
if self._driver_options.get('driver_path', None):
self._driver_path = self._driver_options['driver_path']
@property
def session(self) -> Session:
"""返回Session对象如未初始化则按配置信息创建"""
if self._session is None:
self._session = Session()
attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'adapters', 'stream', 'trust_env', 'max_redirects']
for i in attrs:
if i in self._session_options:
exec(f'self._session.{i} = self._session_options["{i}"]')
self._set_session(self._session_options)
if self._proxy:
self._session.proxies = self._proxy
@ -99,8 +89,10 @@ class Drission(object):
if self._proxy:
options.add_argument(f'--proxy-server={self._proxy["http"]}')
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
try:
self._driver = webdriver.Chrome(self._driver_path, options=options)
self._driver = webdriver.Chrome(driver_path, options=options)
except SessionNotCreatedException:
print('Chrome版本与chromedriver版本不匹配可执行easy_set.get_match_driver()自动下载匹配的版本。')
exit(0)
@ -127,12 +119,13 @@ class Drission(object):
return self._session_options
@session_options.setter
def session_options(self, value: dict) -> None:
"""设置session配置
:param value: session配置字典
def session_options(self, options: Union[dict, SessionOptions]) -> None:
"""设置session配置 \n
:param options: session配置字典
:return: None
"""
self._session_options = value
self._session_options = _session_options_to_dict(options)
self._set_session(self._session_options)
@property
def proxy(self) -> Union[None, dict]:
@ -159,103 +152,95 @@ class Drission(object):
self._driver.get(url)
for cookie in cookies:
self._ensure_add_cookie(cookie)
self.set_cookies(cookie, set_driver=True)
def cookies_to_session(self, copy_user_agent: bool = False,
driver: WebDriver = None,
session: Session = None) -> None:
def set_cookies(self,
cookies: Union[RequestsCookieJar, list, tuple, str, dict],
set_session: bool = False,
set_driver: bool = False) -> None:
"""设置cookies \n
:param cookies: cookies信息可为CookieJar, list, tuple, str, dict
:param set_session: 是否设置session的cookies
:param set_driver: 是否设置driver的cookies
:return: None
"""
cookies = _cookies_to_tuple(cookies)
for cookie in cookies:
if cookie['value'] is None:
cookie['value'] = ''
# 添加cookie到session
if set_session:
kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')}
if 'expiry' in cookie:
kwargs['expires'] = cookie['expiry']
self.session.cookies.set(cookie['name'], cookie['value'], **kwargs)
# 添加cookie到driver
if set_driver:
if 'expiry' in cookie:
cookie['expiry'] = int(cookie['expiry'])
try:
browser_domain = extract(self.driver.current_url).fqdn
except AttributeError:
browser_domain = ''
if not cookie.get('domain', None):
if browser_domain:
url = extract(browser_domain)
cookie_domain = f'{url.domain}.{url.suffix}'
else:
raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.')
cookie['domain'] = cookie_domain
else:
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
if cookie_domain not in browser_domain:
self.driver.get(cookie_domain if cookie_domain.startswith('http://')
else f'http://{cookie_domain}')
self.driver.add_cookie(cookie)
def _set_session(self, data: dict) -> None:
if self._session is None:
self._session = Session()
attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
if 'cookies' in data:
self.set_cookies(data['cookies'], set_session=True)
for i in attrs:
if i in data:
self._session.__setattr__(i, data[i])
def cookies_to_session(self, copy_user_agent: bool = False) -> None:
"""把driver对象的cookies复制到session对象 \n
:param copy_user_agent: 是否复制ua信息
:param driver: 来源driver对象
:param session: 目标session对象
:return: None
"""
driver = driver or self.driver
session = session or self.session
if copy_user_agent:
self.user_agent_to_session(driver, session)
self.user_agent_to_session(self.driver, self.session)
for cookie in driver.get_cookies():
session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])
self.set_cookies(self.driver.get_cookies(), set_session=True)
def cookies_to_driver(self, url: str,
driver: WebDriver = None,
session: Session = None) -> None:
def cookies_to_driver(self, url: str) -> None:
"""把session对象的cookies复制到driver对象 \n
:param url: 作用域
:param driver: 目标driver对象
:param session: 来源session对象
:return: None
"""
driver = driver or self.driver
session = session or self.session
domain = urlparse(url).netloc
url = extract(url)
domain = f'{url.domain}.{url.suffix}'
cookies = tuple(x for x in self.session.cookies if domain in x.domain)
if not domain:
raise Exception('Without specifying a domain')
# 翻译cookies
for i in [x for x in session.cookies if domain in x.domain]:
cookie_data = {'name': i.name, 'value': str(i.value), 'path': i.path, 'domain': i.domain}
if i.expires:
cookie_data['expiry'] = i.expires
self._ensure_add_cookie(cookie_data, driver=driver)
def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None:
"""添加cookie到driver \n
:param cookie: 要添加的cookie
:param override_domain: 覆盖作用域
:param driver: 操作的driver对象
:return: None
"""
driver = driver or self.driver
if override_domain:
cookie['domain'] = override_domain
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
try:
browser_domain = extract(driver.current_url).fqdn
except AttributeError:
browser_domain = ''
if cookie_domain not in browser_domain:
driver.get(f'http://{cookie_domain.lstrip("http://")}')
if 'expiry' in cookie:
cookie['expiry'] = int(cookie['expiry'])
driver.add_cookie(cookie)
# 如果添加失败,尝试更宽的域名
if not self._is_cookie_in_driver(cookie, driver):
cookie['domain'] = extract(cookie['domain']).registered_domain
driver.add_cookie(cookie)
if not self._is_cookie_in_driver(cookie):
raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n")
def _is_cookie_in_driver(self, cookie, driver=None) -> bool:
"""检查cookie是否已经在driver里 \n
只检查namevaluedomain检查domain时比较宽 \n
:param cookie: 要检查的cookie
:param driver: 被检查的driver
:return: 返回布尔值
"""
driver = driver or self.driver
for driver_cookie in driver.get_cookies():
if (cookie['name'] == driver_cookie['name'] and
cookie['value'] == driver_cookie['value'] and
(cookie['domain'] == driver_cookie['domain'] or
f'.{cookie["domain"]}' == driver_cookie['domain'])):
return True
return False
self.set_cookies(cookies, set_driver=True)
def user_agent_to_session(self, driver: WebDriver = None, session: Session = None) -> None:
"""把driver的user-agent复制到session \n

View File

@ -140,7 +140,7 @@ class DriverElement(DrissionElement):
return self.get_style_property('content', 'after')
# -----------------共有函数-------------------
def texts(self, text_node_only: bool = False) -> List[str]:
def texts(self, text_node_only: bool = False) -> list:
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
:param text_node_only: 是否只返回文本节点
:return: 文本列表
@ -567,7 +567,7 @@ class DriverElement(DrissionElement):
def execute_driver_find(page_or_ele,
loc: Tuple[str, str],
mode: str = 'single',
timeout: float = 10) -> Union[DriverElement, List[DriverElement or str], str, None]:
timeout: float = 10) -> Union[DriverElement, List[DriverElement], str, None]:
"""执行driver模式元素的查找 \n
页面查找元素及元素查找下级元素皆使用此方法 \n
:param page_or_ele: DriverPage对象或DriverElement对象

View File

@ -53,13 +53,20 @@ class DriverPage(object):
@property
def cookies(self) -> list:
"""返回当前网站cookies"""
return self.driver.get_cookies()
return self.get_cookies(True)
@property
def title(self) -> str:
"""返回网页title"""
return self.driver.title
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
"""返回当前网站cookies"""
if as_dict:
return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()}
else:
return self.driver.get_cookies()
def _try_to_connect(self,
to_url: str,
times: int = 0,
@ -108,7 +115,7 @@ class DriverPage(object):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement],
mode: str = None,
timeout: float = None) -> Union[DriverElement, List[DriverElement or str], str, None]:
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
"""返回页面中符合条件的元素,默认返回第一个 \n
示例 \n
- 接收到元素对象时 \n
@ -149,8 +156,8 @@ class DriverPage(object):
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
loc_or_ele = translate_loc(loc_or_ele)
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
# 接收到DriverElement对象直接返回
elif isinstance(loc_or_ele, DriverElement):
@ -169,7 +176,7 @@ class DriverPage(object):
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[DriverElement or str]:
timeout: float = None) -> List[DriverElement]:
"""返回页面中所有符合条件的元素 \n
示例 \n
- 用loc元组查找 \n
@ -328,33 +335,35 @@ class DriverPage(object):
if self.tabs_count:
self.to_tab(0)
def close_other_tabs(self, num_or_handle: Union[int, str] = None) -> None:
"""关闭传入的标签页以外标签页,默认保留当前页 \n
:param num_or_handle: 要保留的标签页序号或handle序号第一个为0最后为-1
def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None:
"""关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组 \n
:param num_or_handles: 要保留的标签页序号或handle可传入handle组成的列表或元组
:return: None
"""
try:
tab = int(num_or_handle)
tab = int(num_or_handles)
except (ValueError, TypeError):
tab = num_or_handle
tab = num_or_handles
tabs = self.driver.window_handles
if tab is None:
page_handle = self.current_tab_handle
page_handle = (self.current_tab_handle,)
elif isinstance(tab, int):
page_handle = tabs[tab]
page_handle = (tabs[tab],)
elif isinstance(tab, str):
page_handle = (tab,)
elif isinstance(tab, (list, tuple)):
page_handle = tab
else:
raise TypeError('Argument num_or_handle can only be int or str.')
raise TypeError('Argument num_or_handle can only be int, str, list or tuple.')
for i in tabs: # 遍历所有标签页,关闭非保留的
if i != page_handle:
if i not in page_handle:
self.driver.switch_to.window(i)
self.driver.close()
self.driver.switch_to.window(page_handle) # 把权柄定位回保留的页面
self.driver.switch_to.window(page_handle[0]) # 把权柄定位回保留的页面
def to_tab(self, num_or_handle: Union[int, str] = 0) -> None:
"""跳转到标签页 \n

View File

@ -65,7 +65,7 @@ def set_paths(driver_path: str = None,
om.set_item('chrome_options', 'debugger_address', format_path(debugger_address))
if tmp_path is not None:
om.set_item('paths', 'global_tmp_path', format_path(tmp_path))
om.set_item('paths', 'tmp_path', format_path(tmp_path))
if download_path is not None:
experimental_options = om.get_value('chrome_options', 'experimental_options')

View File

@ -7,10 +7,11 @@
from typing import Union, List, Tuple
from requests import Response, Session
from requests.cookies import RequestsCookieJar
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
from .config import DriverOptions
from .config import DriverOptions, SessionOptions
from .drission import Drission
from .driver_element import DriverElement
from .driver_page import DriverPage
@ -38,7 +39,7 @@ class MixPage(Null, SessionPage, DriverPage):
mode: str = 'd',
timeout: float = 10,
driver_options: Union[dict, DriverOptions] = None,
session_options: dict = None):
session_options: Union[dict, SessionOptions] = None):
"""初始化函数 \n
:param drission: Drission对象传入's''d'可自动创建Drission对象
:param mode: 'd' 's'即driver模式和session模式
@ -139,6 +140,27 @@ class MixPage(Null, SessionPage, DriverPage):
elif self._mode == 'd':
return super(SessionPage, self).title
def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
"""设置cookies \n
:param cookies: cookies信息可为CookieJar, list, tuple, str, dict
:return: None
"""
if self._mode == 's':
self.drission.set_cookies(cookies, set_session=True)
elif self._mode == 'd':
self.drission.set_cookies(cookies, set_driver=True)
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
"""返回cookies \n
:param as_dict: 是否以字典方式返回
:param all_domains: 是否返回所有域的cookies
:return: cookies信息
"""
if self._mode == 's':
return super().get_cookies(as_dict, all_domains)
elif self._mode == 'd':
return super(SessionPage, self).get_cookies(as_dict)
def change_mode(self, mode: str = None, go: bool = True) -> None:
"""切换模式,接收's''d'除此以外的字符串会切换为d模式 \n
切换时会把当前模式的cookies复制到目标模式 \n
@ -155,8 +177,10 @@ class MixPage(Null, SessionPage, DriverPage):
if self._mode == 'd':
self._driver = True
self._url = None if not self._driver else self._drission.driver.current_url
if self._session_url:
self.cookies_to_driver(self._session_url)
if go:
self.get(self._session_url)
@ -164,8 +188,10 @@ class MixPage(Null, SessionPage, DriverPage):
elif self._mode == 's':
self._session = True
self._url = self._session_url
if self._driver:
self.cookies_to_session()
if go and self._drission.driver.current_url.startswith('http'):
self.get(self._drission.driver.current_url)
@ -310,7 +336,8 @@ class MixPage(Null, SessionPage, DriverPage):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
mode: str = None,
timeout: float = None) -> Union[DriverElement, SessionElement, str]:
timeout: float = None) -> Union[
DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
示例 \n
- 接收到元素对象时 \n
@ -350,7 +377,7 @@ class MixPage(Null, SessionPage, DriverPage):
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[List[DriverElement or str], List[SessionElement or str]]:
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
"""返回页面中所有符合条件的元素、属性或节点文本 \n
示例 \n
- 用loc元组查找 \n

View File

@ -92,7 +92,7 @@ class SessionElement(DrissionElement):
"""返回前一个兄弟元素"""
return self._get_brother(1, 'ele', 'prev')
def texts(self, text_node_only: bool = False) -> List[str]:
def texts(self, text_node_only: bool = False) -> list:
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
:param text_node_only: 是否只返回文本节点
:return: 文本列表
@ -340,7 +340,7 @@ class SessionElement(DrissionElement):
def execute_session_find(page_or_ele,
loc: Tuple[str, str],
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement or str], str, None]:
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
"""执行session模式元素的查找 \n
页面查找元素及元素查找下级元素皆使用此方法 \n
:param page_or_ele: SessionPage对象或SessionElement对象

View File

@ -15,8 +15,10 @@ from typing import Union, List, Tuple
from urllib.parse import urlparse, quote, unquote
from requests import Session, Response
from tldextract import extract
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
from .config import _cookie_to_dict
from .session_element import SessionElement, execute_session_find
@ -54,21 +56,39 @@ class SessionPage(object):
@property
def cookies(self) -> dict:
"""返回session的cookies"""
return self.session.cookies.get_dict()
return self.get_cookies(True)
@property
def title(self) -> str:
"""返回网页title"""
return self.ele(('css selector', 'title')).text
return self.ele('tag:title').text
@property
def html(self) -> str:
"""返回页面html文本"""
return format_html(self.response.text)
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
"""返回cookies \n
:param as_dict: 是否以字典方式返回
:param all_domains: 是否返回所有域的cookies
:return: cookies信息
"""
if all_domains:
cookies = self.session.cookies
else:
url = extract(self.url)
domain = f'{url.domain}.{url.suffix}'
cookies = tuple(x for x in self.session.cookies if domain in x.domain)
if as_dict:
return {x.name: x.value for x in cookies}
else:
return [_cookie_to_dict(cookie) for cookie in cookies]
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
mode: str = None) -> Union[SessionElement, List[SessionElement or str], str, None]:
mode: str = None) -> Union[SessionElement, List[SessionElement], str, None]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
示例 \n
- 接收到元素对象时 \n
@ -108,8 +128,8 @@ class SessionPage(object):
loc_or_ele = translate_loc(loc_or_ele)
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
elif isinstance(loc_or_ele, SessionElement):
return loc_or_ele
@ -120,7 +140,7 @@ class SessionPage(object):
return execute_session_find(self, loc_or_ele, mode)
def eles(self,
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement or str]:
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]:
"""返回页面中所有符合条件的元素、属性或节点文本 \n
示例 \n
- 用loc元组查找 \n
@ -374,11 +394,12 @@ class SessionPage(object):
# -------------------打印要下载的文件-------------------
if show_msg:
print(file_url)
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
print(f'Downloading to: {goal_path}')
if skip:
print('Skipped.')
print('Skipped.\n')
# -------------------开始下载-------------------
if skip:
@ -428,7 +449,7 @@ class SessionPage(object):
# -------------------显示并返回值-------------------
if show_msg:
print(info)
print(info, '\n')
info = f'{goal_path}\\{full_name}' if download_status else info
return download_status, info
@ -447,6 +468,11 @@ class SessionPage(object):
:param kwargs: 其它参数
:return: tuple第一位为Response或None第二位为出错信息或'Sussess'
"""
if not url:
if show_errmsg:
raise ValueError('url is empty.')
return None, 'url is empty.'
if mode not in ['get', 'post']:
raise ValueError("Argument mode can only be 'get' or 'post'.")

View File

@ -405,7 +405,7 @@ In addition to the above two paths, this method can also set the following paths
```python
debugger_address # Debug browser address, such as: 127.0.0.1:9222
download_path # Download file path
global_tmp_path # Temporary folder path
tmp_path # Temporary folder path
user_data_path # User data path
cache_path # cache path
```
@ -418,6 +418,12 @@ Tips:
### Other methods
If you don't want to use the ini file (for example, when you want to package the project), you can write the above two paths in the system path, or fill in the program. See the next section for the use of the latter.
## Create drive object Drission
The creation step is not necessary. If you want to get started quickly, you can skip this section. The MixPage object will automatically create the object.
@ -451,16 +457,21 @@ do.set_paths(chrome_path ='D:\\chrome\\chrome.exe',
# Settings for s mode
session_options = {'headers': {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
# Proxy settings, optional
proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
# Incoming configuration, driver_options and session_options are optional, you need to use the corresponding mode to pass in
drission = Drission(driver_options, session_options)
drission = Drission(driver_options, session_options, proxy=proxy)
```
The usage of DriverOptions and SessionOptions is detailed below.
## Use page object MixPage
The MixPage page object encapsulates common web page operations and realizes the switch between driver and session modes.
MixPage must receive a Drission object and use the driver or session in it. If it is not passed in, MixPage will create a Drission by itself (using the configuration of the default ini file).
MixPage must control a Drission object and use its driver or session. If it is not passed in, MixPage will create one by itself (using the incoming configuration information or reading from the default ini file).
Tips: When multiple objects work together, you can pass the Drission object in one MixPage to another, so that multiple objects can share login information or operate the same page.
@ -485,8 +496,6 @@ page = MixPage(driver_options=DriverOption, session_options=SessionOption) # de
### visit website
If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified.
```python
# Default mode
page.get(url)
@ -496,6 +505,8 @@ page.post(url, data, **kwargs) # Only session mode has post method
page.get(url, retry=5, interval=0.5)
```
Tips: If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified.
### Switch mode
@ -506,6 +517,8 @@ Switch between s and d modes, the cookies and the URL you are visiting will be a
page.change_mode(go=False) # If go is False, it means that the url is not redirected
```
Tips: When using a method unique to a certain mode, it will automatically jump to that mode.
### Page properties
@ -534,7 +547,9 @@ page.current_tab_handle # Return to the current tab page handle
When calling a method that only belongs to d mode, it will automatically switch to d mode. See APIs for detailed usage.
```python
page.change_mode() # switch mode
page.set_cookies() # set cookies
page.get_cookies() # Get cookies, which can be returned by list or dict
page.change_mode() # Switch mode, it will automatically copy cookies
page.cookies_to_session() # Copy cookies from WebDriver object to Session object
page.cookies_to_driver() # Copy cookies from Session object to WebDriver object
page.get(url, retry, interval, **kwargs) # Use get to access the web page, you can specify the number of retries and the interval
@ -553,7 +568,7 @@ page.run_script(js, *args) # Run js statement
page.create_tab(url) # Create and locate a tab page, which is at the end
page.to_tab(num_or_handle) # Jump to tab page
page.close_current_tab() # Close the current tab page
page.close_other_tabs(num) # Close other tabs
page.close_other_tabs(num_or_handles) # Close other tabs
page.to_iframe(iframe) # cut into iframe
page.screenshot(path) # Page screenshot
page.scrool_to_see(element) # Scroll until an element is visible
@ -577,11 +592,9 @@ page.eles() and element.eles() search and return a list of all elements that mee
Description:
- The element search timeout is 10 seconds by default, you can also set it as needed.
- In the following search statement, the colon: indicates a fuzzy match, and the equal sign = indicates an exact match
- There are five types of query strings: @attribute name, tag, text, xpath, and css
- The element search timeout is 10 seconds by default, and it stops waiting when it times out or finds an element. You can also set it as needed.
- -You can find elements with query string or selenium native loc tuple (s mode can also be used)
-The query string has 7 methods such as @attribute name, tag, text, xpath, css, ., #, etc.
```python
# Find by attribute
@ -590,6 +603,12 @@ page.eles('@class') # Find all elements with class attribute
page.eles('@class:class_name') # Find all elements that have ele_class in class
page.eles('@class=class_name') # Find all elements whose class is equal to ele_class
# Find by class or id
page.ele('#ele_id') # equivalent to page.ele('@id=ele_id')
page.ele('#:ele_id') # equivalent to page.ele('@id:ele_id')
page.ele('.ele_class') # equivalent to page.ele('@class=ele_class')
page.ele('.:ele_class') # equivalent to page.ele('@class:ele_class')
# Find by tag name
page.ele('tag:li') # Find the first li element
page.eles('tag:li') # Find all li elements
@ -603,7 +622,7 @@ page.ele('tag:div@text()=search_text') # Find the div element whose text is equ
# Find according to text content
page.ele('search text') # find the element containing the incoming text
page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, add text: in front to avoid conflicts
page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, you should add text: in front to avoid conflicts
page.eles('text=search text') # The text is equal to the element of search_text
# Find according to xpath or css selector
@ -626,7 +645,7 @@ element.parent # parent element
element.next # next sibling element
element.prev # previous sibling element
# Get shadow- dom, only support open shadow- root
# Get the shadow-root and treat it as an element. Only support open shadow-root
ele1 = element.shadow_root.ele('tag:div')
# Chain search
@ -734,9 +753,9 @@ shadow_root_element.is_valid() # Returns whether the element is still in dom
## Docking with selenium code
## Splicing with selenium or requests code
The DrissionPage code can be seamlessly spliced with the selenium code, either directly using the selenium WebDriver object, or using its own WebDriver everywhere for the selenium code. Make the migration of existing projects very convenient.
DrissionPage code can be seamlessly spliced with selenium and requests code. You can use Selenium's WebDriver object directly, or you can export your own WebDriver to selenium code. The Session object of requests can also be passed directly. Make the migration of existing projects very convenient.
### selenium to DrissionPage
@ -745,11 +764,10 @@ driver = webdriver.Chrome()
driver.get('https://www.baidu.com')
page = MixPage(Drission(driver)) # Pass the driver to Drission, create a MixPage object
print(page.title) # Print result: You will know by clicking on Baidu
print(page.title) # Print result: 百度一下,你就知道
element = driver.find_element_by_xpath('//div') # Use selenium native functions
```
### DrissionPage to selenium
```python
@ -757,7 +775,57 @@ page = MixPage()
page.get('https://www.baidu.com')
driver = page.driver # Get the WebDriver object from the MixPage object
print(driver.title) # Print results: You will know by clicking on Baidu
print(driver.title) # Print results: 百度一下,你就知道
```
### requests to DrissionPage
``` python
session = requets.Session()
drission = Drission(session_or_options=session)
page = MixPage(drission, mode='s')
page.get('https://www.baidu.com')
```
### DrissionPage to requests
```python
page = MixPage('s')
session = page.session
response = session.get('https://www.baidu.com')
```
## requests function usage
### Connection parameters
In addition to passing in configuration information and connection parameters when creating, if necessary, you can also set connection parameters every time you visit the URL in the s mode.
```python
headers = {'User-Agent':'...',}
cookies = {'name':'value',}
proxies = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
page.get(url, headers=headers, cookies=cookies, proxies=proxies)
```
Tips:
-If the connection parameters are not specified, the s mode will automatically fill in the Host and Referer attributes according to the current domain name
-The Session configuration passed in when creating MixPage is globally effective
### Response object
The Response object obtained by requests is stored in page.response and can be used directly. Such as:
```python
print(page.response.status_code)
print(page.response.headers)
```
@ -791,7 +859,7 @@ page.download(url, save_path,'img','rename', show_msg=True)
## Chrome Quick Settings
## Chrome Settings
The configuration of chrome is very cumbersome. In order to simplify the use, this library provides setting methods for common configurations.
@ -821,21 +889,60 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use
### Instructions
```python
do = DriverOptions(read_file=False) # Create chrome configuration object, do not read from ini file
do = DriverOptions() # read the default ini file to create a DriverOptions object
do = DriverOptions('D:\\settings.ini') # read the specified ini file to create a DriverOptions object
do = DriverOptions(read_file=False) # Do not read the ini file, create an empty DriverOptions object
do.set_headless(False) # show the browser interface
do.set_no_imgs(True) # Do not load pictures
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path
do.set_headless(False).set_no_imgs(True) # Support chain operation
drission = Drission(driver_options=do) # Create Drission object with configuration object
page = MixPage(drission) # Create MixPage object with Drission object
page = MixPage(driver_options=do) # Create MixPage object with configuration object
do.save() # Save the currently opened ini file
do.save() # save the currently opened ini file
do.save('D:\\settings.ini') # save to the specified ini file
do.save('default') # Save the current settings to the default ini file
```
## Session Settings
### SessionOPtions Object
The SessionOptions object is used to manage the configuration information of the Session. It reads the default ini file configuration information by default when it is created, or you can manually set the required information.
Configurable properties:
headers, cookies, auth, proxies, hooks, params, verify, cert, adapters, stream, trust_env, max_redirects.
**Tips:** cookies can receive information in dict, list, tuple, str, RequestsCookieJar and other formats.
### Instructions
```python
so = SessionOptions() # read the default ini file to create a SessionOptions object
so = SessionOptions('D:\\settings.ini') # read the specified ini file to create a SessionOptions object
so = SessionOptions(read_file=False) # Do not read the ini file, create an empty SessionOptions object
so.cookies = ['key1=val1; domain=xxxx','key2=val2; domain=xxxx'] # set cookies
so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'}
so.set_a_header('Connection','keep-alive')
drission = Drission(session_options=so) # Create Drission object with configuration object
page = MixPage(session_options=so) # Create MixPage object with configuration object
so.save() # Save the currently opened ini file
so.save('D:\\settings.ini') # save to the specified ini file
so.save('default') # Save the current settings to the default ini file
```
## Save configuration
Because there are many configurations of chrome and headers, an ini file is set up specifically to save common configurations. You can use the OptionsManager object to get and save the configuration, and use the DriverOptions object to modify the chrome configuration. You can also save multiple ini files and call them according to different projects.
@ -851,7 +958,7 @@ The ini file has three parts by default: paths, chrome_options, and session_opti
; chromedriver.exe path
chromedriver_path =
; Temporary folder path, used to save screenshots, file downloads, etc.
global_tmp_path =
tmp_path =
[chrome_options]
; The address and port of the opened browser, such as 127.0.0.1:9222
@ -937,17 +1044,20 @@ drission = Drission(ini_path='D:\\settings.ini') # Use the specified ini file to
## easy_set method
The methods of frequently used settings can be quickly modified. Calling the easy_set method will modify the content of the default ini file.
Methods to quickly modify common settings. All for driver mode settings. Calling the easy_set method will modify the content of the default ini file.
```python
set_headless(True) # Turn on headless mode
set_no_imgs(True) # Turn on no image mode
set_no_js(True) # Disable JS
set_mute(True) # Turn on mute mode
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent
set_proxy('127.0.0.1:8888') # set proxy
set_paths(paths) # See [Initialization] section
set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF- 8'), the value is bool, which means switch; otherwise, the value is str. When the value is'' or False, delete the attribute item
get_match_driver() # Identify the chrome version and automatically download the matching chromedriver.exe
show_settings() # Print all settings
set_headless(True) # Turn on headless mode
set_no_imgs(True) # Turn on no image mode
set_no_js(True) # Disable JS
set_mute(True) # Turn on mute mode
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent
set_proxy('127.0.0.1:8888') # set proxy
set_paths(paths) # See [Initialization] section
set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF-8'), the value is bool to indicate the switch; otherwise, the value is str. When the value is'' or False, delete the attribute item
check_driver_version() # Check if chrome and chromedriver versions match
```
# POM mode
@ -1056,7 +1166,7 @@ The Drission class is used to manage WebDriver objects and Session objects, and
Parameter Description:
- driver_or_options: [WebDriver, dict, Options] - WebDriver object or chrome configuration parameters.
- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver object or chrome configuration parameters.
- session_or_options: [Session, dict] - Session object configuration parameters
- ini_path: str - ini file path, the default is the ini file under the DrissionPage folder
- proxy: dict - proxy settings
@ -1118,8 +1228,22 @@ Copy the cookies of the driver object to the session object.
Parameter Description:
- copy_user_agent: bool - whether to copy user_agent to session
- driver: WebDriver- Copy the WebDriver object of cookies
- session: Session- Session object that receives cookies
Returns: None
### set_cookies()
Set cookies.
Parameter Description:
- cookies: Union[RequestsCookieJar, list, tuple, str, dict]-cookies information, can be CookieJar, list, tuple, str, dict
- set_session: bool-whether to set session cookies
- set_driver: bool-whether to set driver cookies
Returns: None
@ -1132,8 +1256,6 @@ Copy cookies from session to driver.
Parameter Description:
- url: str - the domain of cookies
- driver: WebDriver- WebDriver object that receives cookies
- session: Session- Copy the Session object of cookies
Returns: None
@ -1272,6 +1394,32 @@ Returns: bool
### set_cookies()
Set cookies.
Parameter Description:
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies information, can be CookieJar, list, tuple, str, dict
Returns: None
### get_cookies()
Return cookies.
Parameter Description:
- as_dict: bool - Whether to return as dict, the default is to return complete cookies as list
- all_domains: bool - whether to return cookies of all domains, only valid in s mode
Returns: a dictionary or list of cookies
### change_mode()
Switch mode,'d' or's'. When switching, the cookies of the current mode will be copied to the target mode.
@ -1521,11 +1669,11 @@ Returns: None
### close_other_tabs()
Close tab pages other than the incoming tab page, and keep the current page by default.
Close tab pages other than the incoming tab page, and keep the current page by default. You can pass in a list or tuple.
Parameter Description:
- num_or_handle:[int, str] - The serial number or handle of the tab to keep, the first serial number is 0, and the last is - 1
- num_or_handles:[int, str]-The serial number or handle of the tab to keep, you can pass in a list or tuple of handles
Returns: None
@ -2596,6 +2744,161 @@ Return: OptionsManager - return to yourself
## SessionOptions class
### class SessionOptions()
Session object configuration class.
Parameter Description:
-read_file: bool-whether to read configuration information from ini file when creating
-ini_path: str-the path of the ini file, if it is None, the default ini file will be read
### headers
headers configuration information.
Returns: dict
### cookies
Cookies configuration information.
Returns: list
### auth
auth configuration information.
Returns: tuple
### proxies
proxies configuration information.
Returns: dict
### hooks
hooks configuration information.
Returns: dict
### params
params configuration information.
Returns: dict
### verify
Verify configuration information.
Returns: bool
### cert
cert configuration information.
Returns: [str, tuple]
### adapters
Adapters configuration information.
Returns: adapters
### stream
stream configuration information.
Returns: bool
### trust_env
srust_env configuration information.
Returns: bool
### max_redirects
max_redirect configuration information.
Returns: int
### set_a_header()
Set an item in headers.
Parameter Description:
- attr: str-configuration item name
- value: str-configured value
Returns: the current object
### remove_a_header()
Remove a setting from headers.
Parameter Description:
- attr: str-the name of the configuration to be deleted
Returns: current object
### save()
Save the settings to a file.
Parameter Description:
- path: str-the path of the ini file, pass in'default' and save to the default ini file
Returns: current object
### as_dict()
Return the current object as a dictionary.
Returns: dict
## DriverOptions class
### class DriverOptions()
@ -2815,7 +3118,7 @@ Parameter Description:
- download_path: str-download file path
- global_tmp_path: str-Temporary folder path
- tmp_path: str-Temporary folder path
- user_data_path: str-user data path

View File

@ -6,7 +6,7 @@ DrissionPage即 driver 和 session 的合体。
是个基于 python 的 Web 自动化操作集成工具。
它实现了 selenium 和 requests 之间的无缝切换。
因此可以兼顾 selenium 的便利性和 requests 的高效率。
它集成了页面常用功能,两种模式系统一致的 API使用便捷。
它集成了页面常用功能,两种模式系统一致的 API使用便捷。
它用 POM 模式封装了页面元素常用的方法,很适合自动化操作功能扩展。
更棒的是,它的使用方式非常简洁和人性化,代码量少,对新手友好。
@ -17,7 +17,9 @@ DrissionPage即 driver 和 session 的合体。
**示例地址:** [使用DrissionPage的网页自动化及爬虫示例](https://gitee.com/g1879/DrissionPage-demos)
**联系邮箱:** g1879@qq.com
**联系邮箱:** g1879@qq.com
**交流QQ群** 897838127
# 理念及背景
@ -330,9 +332,9 @@ from DrissionPage import MixPage
配置路径有四种方法:
- 使用 easy_set 工具的 get_match_driver() 方法(推荐)
- 将路径写入本库的ini文件
- 将路径写入本库的 ini 文件
- 将两个路径写入系统变量
- 使用时手动传入路径
- 在代码中填写路径
### 使用 get_match_driver() 方法
@ -407,7 +409,7 @@ Message: session not created: Chrome version must be between 70 and 73
```python
debugger_address # 调试浏览器地址127.0.0.1:9222
download_path # 下载文件路径
global_tmp_path # 临时文件夹路径
tmp_path # 临时文件夹路径
user_data_path # 用户数据路径
cache_path # 缓存路径
```
@ -420,6 +422,12 @@ Tips
### 其它方法
若你不想使用 ini 文件(如要打包项目时),可在系统路径写入以上两个路径,或在程序中填写。后者的使用方法见下一节。
## 创建驱动器对象 Drission
创建的步骤不是必须若想快速上手可跳过本节。MixPage 会自动创建该对象。
@ -453,16 +461,21 @@ do.set_paths(chrome_path='D:\\chrome\\chrome.exe',
# 用于 s 模式的设置
session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
# 代理设置,可选
proxy = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'}
# 传入配置driver_options 和 session_options 都是可选的,须要使用对应模式才须要传入
drission = Drission(driver_options, session_options)
drission = Drission(driver_options, session_options, proxy=proxy)
```
DriverOptions 和 SessionOptions 用法详见下文。
## 使用页面对象 MixPage
MixPage 页面对象封装了常用的网页操作,并实现 driver 和 session 模式之间的切换。
MixPage 须接收一个 Drission 对象并使用其中的 driver 或 session如没有传入MixPage 会自己创建一个(使用默认 ini 文件的配置)。
MixPage 须控制一个 Drission 对象并使用其中的 driver 或 session如没有传入MixPage 会自己创建一个(使用传入的配置信息或从默认 ini 文件读取)。
Tips: 多对象协同工作时,可将一个 MixPage 中的 Drission 对象传递给另一个,使多个对象共享登录信息或操作同一个页面。
@ -487,8 +500,6 @@ page = MixPage(driver_options=do, session_options=so) # 默认 d 模式
### 访问网页
若连接出错程序会自动重试2次可指定重试次数和等待间隔。
```python
# 默认方式
page.get(url)
@ -498,6 +509,8 @@ page.post(url, data, **kwargs) # 只有 session 模式才有 post 方法
page.get(url, retry=5, interval=0.5)
```
Tips若连接出错程序会自动重试2次可指定重试次数和等待间隔。
### 切换模式
@ -508,6 +521,8 @@ page.get(url, retry=5, interval=0.5)
page.change_mode(go=False) # go 为 False 表示不跳转 url
```
Tips使用某种模式独有的方法时会自动跳转到该模式。
### 页面属性
@ -536,7 +551,9 @@ page.current_tab_handle # 返回当前标签页 handle
调用只属于 d 模式的方法,会自动切换到 d 模式。详细用法见 APIs。
```python
page.change_mode() # 切换模式
page.set_cookies() # 设置cookies
page.get_cookies() # 获取 cookies可以 list 或 dict 方式返回
page.change_mode() # 切换模式,会自动复制 cookies
page.cookies_to_session() # 从 WebDriver 对象复制 cookies 到 Session 对象
page.cookies_to_driver() # 从 Session 对象复制 cookies 到 WebDriver 对象
page.get(url, retry, interval, **kwargs) # 用 get 方式访问网页,可指定重试次数及间隔时间
@ -555,10 +572,10 @@ page.run_script(js, *args) # 运行 js 语句
page.create_tab(url) # 新建并定位到一个标签页,该标签页在最后面
page.to_tab(num_or_handle) # 跳转到标签页
page.close_current_tab() # 关闭当前标签页
page.close_other_tabs(num) # 关闭其它标签页
page.close_other_tabs(num_or_handles) # 关闭其它标签页
page.to_iframe(iframe) # 切入 iframe
page.screenshot(path) # 页面截图
page.scrool_to_see(element) # 滚动直到某元素可见
page.scroll_to_see(element) # 滚动直到某元素可见
page.scroll_to(mode, pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'
page.refresh() # 刷新当前页面
page.back() # 浏览器后退
@ -579,9 +596,10 @@ page.eles() 和 element.eles() 查找返回符合条件的所有元素列表。
说明:
- 元素查找超时默认为10秒你也可以按需要设置。
- 元素查找超时默认为10秒超时或找到元素时停止等待,你也可以按需要设置。
- 下面的查找语句中,冒号 : 表示模糊匹配,等号 = 表示精确匹配
- 查询字符串有 @属性名、tag、text、xpath、css 五种
- 可用查询字符串或 selenium 原生的 loc 元组s 模式也能用)查找元素
- 查询字符串有 @属性名、tag、text、xpath、css、.、# 等7种方法
```python
# 根据属性查找,@ 后面可跟任意属性
@ -590,6 +608,12 @@ page.eles('@class') # 查找所有拥有 class 属性的元素
page.eles('@class:class_name') # 查找所有 class 含有 ele_class 的元素
page.eles('@class=class_name') # 查找所有 class 等于 ele_class 的元素
# 根据 class 或 id 查找
page.ele('#ele_id') # 等价于 page.ele('@id=ele_id')
page.ele('#:ele_id') # 等价于 page.ele('@id:ele_id')
page.ele('.ele_class') # 等价于 page.ele('@class=ele_class')
page.ele('.:ele_class') # 等价于 page.ele('@class:ele_class')
# 根据 tag name 查找
page.ele('tag:li') # 查找第一个 li 元素
page.eles('tag:li') # 查找所有 li 元素
@ -603,7 +627,7 @@ page.ele('tag:div@text()=search_text') # 查找文本等于 search_text 的 div
# 根据文本内容查找
page.ele('search text') # 查找包含传入文本的元素
page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则在前加上 text: 避免冲突
page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则在前加上 text: 避免冲突
page.eles('text=search text') # 文本等于 search_text 的元素
# 根据 xpath 或 css selector 查找
@ -626,7 +650,7 @@ element.parent # 父元素
element.next # 下一个兄弟元素
element.prev # 上一个兄弟元素
# 获取 shadow-dom只支持 open 的 shadow-root
# 获取 shadow-root把它作为元素对待。只支持 open 的 shadow-root
ele1 = element.shadow_root.ele('tag:div')
# 串连查找
@ -732,11 +756,11 @@ shadow_root_element.is_valid() # 返回元素是否还在 dom 内
## 与 selenium 代码对接
## 与 selenium 及 requests 代码对接
DrissionPage 代码可与 selenium 代码无缝拼接,既可直接使用 selenium 的 WebDriver 对象,也可到处自身的 WebDriver 给 selenium 代码使用。使已有项目的迁移非常方便。
DrissionPage 代码可与 selenium 及 requests 代码无缝拼接。既可直接使用 selenium 的 WebDriver 对象,也可导出自身的 WebDriver 给 selenium 代码使用。requests 的 Session 对象也可直接传递。使已有项目的迁移非常方便。
### selenium 转 DrissionPage
### selenium DrissionPage
```python
driver = webdriver.Chrome()
@ -746,9 +770,7 @@ page = MixPage(Drission(driver)) # 把 driver 传递给 Drission创建 MixPa
print(page.title) # 打印结果:百度一下,你就知道
```
### DrissionPage 转 selenium
### DrissionPage 转 selenium
```python
page = MixPage()
@ -756,6 +778,57 @@ page.get('https://www.baidu.com')
driver = page.driver # 从 MixPage 对象中获取 WebDriver 对象
print(driver.title) # 打印结果:百度一下,你就知道
element = driver.find_element_by_xpath('//div') # 使用 selenium 原生功能
```
### requests 转 DrissionPage
``` python
session = requets.Session()
drission = Drission(session_or_options=session)
page = MixPage(drission, mode='s')
page.get('https://www.baidu.com')
```
### DrissionPage 转 requests
```python
page = MixPage('s')
session = page.session
response = session.get('https://www.baidu.com')
```
## requests 功能使用
### 连接参数
除了在创建时传入配置信息及连接参数如有必要s 模式下也可在每次访问网址时设置连接参数。
```python
headers = {'User-Agent': '......', }
cookies = {'name': 'value', }
proxies = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'}
page.get(url, headers=headers, cookies=cookies, proxies=proxies)
```
Tips
- 如果连接参数内没有指定s 模式会根据当前域名自动填写 Host 和 Referer 属性
- 在创建 MixPage 时传入的 Session 配置是全局有效的
### Response 对象
requests 获取到的 Response 对象存放在 page.response可直接使用。如
```python
print(page.response.status_code)
print(page.response.headers)
```
@ -789,7 +862,7 @@ page.download(url, save_path, 'img', 'rename', show_msg=True)
## Chrome 快捷设置
## Chrome 设置
chrome 的配置很繁琐,为简化使用,本库提供了常用配置的设置方法。
@ -819,21 +892,62 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use
### 使用方法
```python
do = DriverOptions(read_file=False) # 创建chrome配置对象不从 ini 文件读取
do = DriverOptions() # 读取默认 ini 文件创建 DriverOptions 对象
do = DriverOptions('D:\\settings.ini') # 读取指定 ini 文件创建 DriverOptions 对象
do = DriverOptions(read_file=False) # 不读取 ini 文件,创建空的 DriverOptions 对象
do.set_headless(False) # 显示浏览器界面
do.set_no_imgs(True) # 不加载图片
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # 设置路径
do.set_headless(False).set_no_imgs(True) # 支持链式操作
drission = Drission(driver_options=do) # 用配置对象创建 Drission 对象
page = MixPage(drission) # 用Drission对象创建 MixPage 对象
page = MixPage(driver_options=do) # 用配置对象创建 MixPage 对象
do.save() # 保存当前打开的 ini 文件
do.save('D:\\settings.ini') # 保存到指定的 ini 文件
do.save('default') # 保存当前设置到默认 ini 文件
```
## Session 设置
### SessionOPtions 对象
SessionOptions 对象用于管理 Session 的配置信息。它创建时默认读取默认 ini 文件配置信息,也可手动设置所需信息。
可配置的属性:
headers、cookies、auth、proxies、hooks、params、verify、cert、adapters、stream、trust_env、max_redirects。
**Tips:** cookies 可接收 dict、list、tuple、str、RequestsCookieJar 等格式的信息。
### 使用方法
```python
so = SessionOptions() # 读取默认 ini 文件创建 SessionOptions 对象
so = SessionOptions('D:\\settings.ini') # 读取指定 ini 文件创建 SessionOptions 对象
so = SessionOptions(read_file=False) # 不读取 ini 文件,创建空的 SessionOptions 对象
so.cookies = ['key1=val1; domain=xxxx', 'key2=val2; domain=xxxx'] # 设置 cookies
so.headers = {'User-Agent': 'xxxx', 'Accept-Charset': 'xxxx'}
so.set_a_header('Connection', 'keep-alive')
drission = Drission(session_options=so) # 用配置对象创建 Drission 对象
page = MixPage(session_options=so) # 用配置对象创建 MixPage 对象
so.save() # 保存当前打开的 ini 文件
so.save('D:\\settings.ini') # 保存到指定的 ini 文件
so.save('default') # 保存当前设置到默认 ini 文件
```
## 保存配置
因 chrome 和 headers 配置繁多,故设置一个 ini 文件专门用于保存常用配置,你可使用 OptionsManager 对象获取和保存配置,用 DriverOptions 对象修改 chrome 配置。你也可以保存多个 ini 文件,按不同项目须要调用。
@ -849,7 +963,7 @@ ini 文件默认拥有三部分配置paths、chrome_options、session_options
; chromedriver.exe路径
chromedriver_path =
; 临时文件夹路径,用于保存截图、文件下载等
global_tmp_path =
tmp_path =
[chrome_options]
; 已打开的浏览器地址和端口如127.0.0.1:9222
@ -933,9 +1047,11 @@ drission = Drission(ini_path='D:\\settings.ini') # 使用指定 ini 文件创
## easy_set 方法
可快速地修改常用设置的方法调用 easy_set 方法会修改默认 ini 文件相关内容。
可快速地修改常用设置的方法。全部用于 driver 模式的设置。调用 easy_set 方法会修改默认 ini 文件相关内容。
```python
get_match_driver() # 识别chrome版本并自动下载匹配的chromedriver.exe
show_settings() # 打印所有设置
set_headless(True) # 开启 headless 模式
set_no_imgs(True) # 开启无图模式
set_no_js(True) # 禁用 JS
@ -944,6 +1060,7 @@ set_user_agent('Mozilla/5.0 (Macintosh; Int......') # 设置 user agent
set_proxy('127.0.0.1:8888') # 设置代理
set_paths(paths) # 见 [初始化] 一节
set_argument(arg, value) # 设置属性,若属性无值(如'zh_CN.UTF-8'value 为 bool 表示开关否则value为str当 value为''或 False删除该属性项
check_driver_version() # 检查chrome和chromedriver版本是否匹配
```
# POM 模式
@ -1052,10 +1169,10 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
参数说明:
- driver_or_options: [WebDriver, dict, Options] - WebDriver 对象或 chrome 配置参数。
- session_or_options: [Session, dict] - Session 对象配置参数
- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件
- proxy: dict - 代理设置
- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver 对象或 chrome 配置参数。
- session_or_options: [Session, dict] - Session 对象配置参数
- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件
- proxy: dict - 代理设置
@ -1107,6 +1224,20 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
### set_cookies()
设置 cookies。
参数说明:
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息可为CookieJar, list, tuple, str, dict
- set_session: bool - 是否设置 session 的 cookies
- set_driver: bool - 是否设置 driver 的 cookies
返回: None
### cookies_to_session()
把 driver 对象的 cookies 复制到 session 对象。
@ -1114,8 +1245,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
参数说明:
- copy_user_agent: bool - 是否复制 user_agent 到 session
- driver: WebDriver - 复制 cookies 的 WebDriver 对象
- session: Session - 接收 cookies 的 Session 对象
返回: None
@ -1128,8 +1257,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
参数说明:
- url: str - cookies 的域
- driver: WebDriver - 接收 cookies 的 WebDriver 对象
- session: Session - 复制 cookies 的 Session 对象
返回: None
@ -1268,6 +1395,31 @@ MixPage 封装了页面操作的常用功能,可在 driver 和 session 模式
### set_cookies()
设置 cookies。
参数说明:
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息可为CookieJar, list, tuple, str, dict
返回: None
### get_cookies()
返回 cookies。
参数说明:
- as_dict: bool - 是否以 dict 方式返回,默认以 list 返回完整的 cookies
- all_domains: bool - 是否返回所有域名的 cookies只有 s 模式下生效
返回cookies 字典或列表
### change_mode()
切换模式,'d' 或 's'。切换时会把当前模式的 cookies 复制到目标模式。
@ -1517,11 +1669,11 @@ d 模式时检查网页是否符合预期。默认由 response 状态检查,
### close_other_tabs()
关闭传入的标签页以外标签页,默认保留当前页。
关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组。
参数说明:
- num_or_handle:[int, str] - 要保留的标签页序号或 handle序号第一个为0最后为-1
- num_or_handles:[int, str] - 要保留的标签页序号或 handle可传入 handle 组成的列表或元组
返回: None
@ -2554,6 +2706,160 @@ shadow-root 所依赖的父元素。
## SessionOptions 类
### class SessionOptions()
Session 对象配置类。
参数说明:
- read_file: bool - 创建时是否从 ini 文件读取配置信息
- ini_path: str - ini 文件路径为None则读取默认 ini 文件
### headers
headers 配置信息。
返回: dict
### cookies
cookies 配置信息。
返回: list
### auth
auth 配置信息。
返回: tuple
### proxies
proxies 配置信息。
返回: dict
### hooks
hooks 配置信息。
返回: dict
### params
params 配置信息。
返回: dict
### verify
verify 配置信息。
返回: bool
### cert
cert 配置信息。
返回: [str, tuple]
### adapters
adapters 配置信息。
返回: adapters
### stream
stream 配置信息。
返回: bool
### trust_env
srust_env 配置信息。
返回: bool
### max_redirects
max_redirect 配置信息。
返回: int
### set_a_header()
设置 headers 中一个项。
参数说明:
- attr: str - 配置项名称
- value: str - 配置的值
返回: 当前对象
### remove_a_header()
从 headers 中删除一个设置。
参数说明:
- attr: str - 要删除的配置名称
返回:当前对象
### save()
保存设置到文件。
参数说明:
- path: str - ini文件的路径传入 'default' 保存到默认ini文件
返回:当前对象
### as_dict()
以字典形式返回当前对象。
返回: dict
## DriverOptions 类
### class DriverOptions()
@ -2764,15 +3070,15 @@ chrome 配置太复杂,所以把常用的配置写成简单的方法,调用
参数说明:
- driver_path: str - chromedriver.exe 路径
- chrome_path: str - chrome.exe 路径
- driver_path: str - chromedriver.exe 路径
- chrome_path: str - chrome.exe 路径
- debugger_address: str - 调试浏览器地址127.0.0.1:9222
- download_path: str - 下载文件路径
- global_tmp_path: str - 临时文件夹路径
- user_data_path: str - 用户数据路径
- cache_path: str - 缓存路径
- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件
- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配
- download_path: str - 下载文件路径
- tmp_path: str - 临时文件夹路径
- user_data_path: str - 用户数据路径
- cache_path: str - 缓存路径
- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件
- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配
返回: None