mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
commit
e682427273
@ -256,7 +256,7 @@ def get_available_file_name(folder_path: str, file_name: str) -> str:
|
||||
base_name = file_Path.stem
|
||||
num = base_name.split(' ')[-1]
|
||||
|
||||
if num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit():
|
||||
if num and num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit():
|
||||
num = int(num[1:-1])
|
||||
file_name = f'{base_name.replace(f"({num})", "", -1)}({num + 1}){ext_name}'
|
||||
else:
|
||||
|
@ -1,14 +1,16 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
配置文件
|
||||
管理配置的类
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
@File : config.py
|
||||
"""
|
||||
from configparser import ConfigParser, NoSectionError, NoOptionError
|
||||
from configparser import RawConfigParser, NoSectionError, NoOptionError
|
||||
from http.cookiejar import Cookie
|
||||
from pathlib import Path
|
||||
from typing import Any, Union
|
||||
|
||||
from requests.cookies import RequestsCookieJar
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
@ -21,14 +23,18 @@ class OptionsManager(object):
|
||||
:param path: ini文件的路径,默认读取模块文件夹下的
|
||||
"""
|
||||
self.ini_path = path or str(Path(__file__).parent / 'configs.ini')
|
||||
self._conf = ConfigParser()
|
||||
self._conf = RawConfigParser()
|
||||
self._conf.read(self.ini_path, encoding='utf-8')
|
||||
|
||||
if 'global_tmp_path' not in self.paths or not self.get_value('paths', 'global_tmp_path'):
|
||||
global_tmp_path = str((Path(__file__).parent / 'tmp').absolute())
|
||||
Path(global_tmp_path).mkdir(parents=True, exist_ok=True)
|
||||
self.set_item('paths', 'global_tmp_path', global_tmp_path)
|
||||
self.save()
|
||||
self._paths = None
|
||||
self._chrome_options = None
|
||||
self._session_options = None
|
||||
|
||||
if 'tmp_path' not in self.paths or not self.get_value('paths', 'tmp_path'):
|
||||
tmp_path = str((Path(__file__).parent / 'tmp').absolute())
|
||||
Path(tmp_path).mkdir(parents=True, exist_ok=True)
|
||||
self.set_item('paths', 'tmp_path', tmp_path)
|
||||
self.save(self.ini_path)
|
||||
|
||||
def __text__(self) -> str:
|
||||
"""打印ini文件内容"""
|
||||
@ -42,17 +48,26 @@ class OptionsManager(object):
|
||||
@property
|
||||
def paths(self) -> dict:
|
||||
"""返回paths设置"""
|
||||
return self.get_option('paths')
|
||||
if self._paths is None:
|
||||
self._paths = self.get_option('paths')
|
||||
|
||||
return self._paths
|
||||
|
||||
@property
|
||||
def chrome_options(self) -> dict:
|
||||
"""返回chrome设置"""
|
||||
return self.get_option('chrome_options')
|
||||
if self._chrome_options is None:
|
||||
self._chrome_options = self.get_option('chrome_options')
|
||||
|
||||
return self._chrome_options
|
||||
|
||||
@property
|
||||
def session_options(self) -> dict:
|
||||
"""返回session设置"""
|
||||
return self.get_option('session_options')
|
||||
if self._session_options is None:
|
||||
self._session_options = self.get_option('session_options')
|
||||
|
||||
return self._session_options
|
||||
|
||||
def get_value(self, section: str, item: str) -> Any:
|
||||
"""获取配置的值 \n
|
||||
@ -62,7 +77,7 @@ class OptionsManager(object):
|
||||
"""
|
||||
try:
|
||||
return eval(self._conf.get(section, item))
|
||||
except SyntaxError:
|
||||
except (SyntaxError, NameError):
|
||||
return self._conf.get(section, item)
|
||||
except NoSectionError and NoOptionError:
|
||||
return None
|
||||
@ -91,6 +106,7 @@ class OptionsManager(object):
|
||||
:return: 当前对象
|
||||
"""
|
||||
self._conf.set(section, item, str(value))
|
||||
self.__setattr__(f'_{section}', None)
|
||||
return self
|
||||
|
||||
def save(self, path: str = None):
|
||||
@ -98,15 +114,309 @@ class OptionsManager(object):
|
||||
:param path: ini文件的路径,传入 'default' 保存到默认ini文件
|
||||
:return: 当前对象
|
||||
"""
|
||||
path = Path(__file__).parent / 'configs.ini' if path == 'default' else path
|
||||
path = Path(path or self.ini_path)
|
||||
if path == 'default':
|
||||
path = (Path(__file__).parent / 'configs.ini').absolute()
|
||||
elif path is None:
|
||||
path = Path(self.ini_path).absolute()
|
||||
else:
|
||||
path = Path(path).absolute()
|
||||
|
||||
path = path / 'config.ini' if path.is_dir() else path
|
||||
path = path.absolute()
|
||||
self._conf.write(open(path, 'w', encoding='utf-8'))
|
||||
|
||||
self._conf.write(open(str(path), 'w', encoding='utf-8'))
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class SessionOptions(object):
|
||||
def __init__(self, read_file: bool = True, ini_path: str = None):
|
||||
"""requests的Session对象配置类 \n
|
||||
:param read_file: 是否从文件读取配置
|
||||
:param ini_path: ini文件路径
|
||||
"""
|
||||
self.ini_path = None
|
||||
self._headers = None
|
||||
self._cookies = None
|
||||
self._auth = None
|
||||
self._proxies = None
|
||||
self._hooks = None
|
||||
self._params = None
|
||||
self._verify = None
|
||||
self._cert = None
|
||||
self._adapters = None
|
||||
self._stream = None
|
||||
self._trust_env = None
|
||||
self._max_redirects = None
|
||||
|
||||
if read_file:
|
||||
self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini')
|
||||
om = OptionsManager(self.ini_path)
|
||||
options_dict = om.session_options
|
||||
|
||||
if options_dict.get('headers', None) is not None:
|
||||
self._headers = {key.lower(): options_dict['headers'][key] for key in options_dict['headers']}
|
||||
|
||||
if options_dict.get('cookies', None) is not None:
|
||||
self._cookies = options_dict['cookies']
|
||||
|
||||
if options_dict.get('auth', None) is not None:
|
||||
self._auth = options_dict['auth']
|
||||
|
||||
if options_dict.get('proxies', None) is not None:
|
||||
self._proxies = options_dict['proxies']
|
||||
|
||||
if options_dict.get('hooks', None) is not None:
|
||||
self._hooks = options_dict['hooks']
|
||||
|
||||
if options_dict.get('params', None) is not None:
|
||||
self._params = options_dict['params']
|
||||
|
||||
if options_dict.get('verify', None) is not None:
|
||||
self._verify = options_dict['verify']
|
||||
|
||||
if options_dict.get('cert', None) is not None:
|
||||
self._cert = options_dict['cert']
|
||||
|
||||
# if options_dict.get('adapters', None) is not None:
|
||||
# self._adapters = options_dict['adapters']
|
||||
|
||||
if options_dict.get('stream', None) is not None:
|
||||
self._stream = options_dict['stream']
|
||||
|
||||
if options_dict.get('trust_env', None) is not None:
|
||||
self._trust_env = options_dict['trust_env']
|
||||
|
||||
if options_dict.get('max_redirects', None) is not None:
|
||||
self._max_redirects = options_dict['max_redirects']
|
||||
|
||||
@property
|
||||
def headers(self) -> dict:
|
||||
"""返回headers设置信息"""
|
||||
if self._headers is None:
|
||||
self._headers = {}
|
||||
return self._headers
|
||||
|
||||
@property
|
||||
def cookies(self) -> list:
|
||||
"""返回cookies设置信息"""
|
||||
if self._cookies is None:
|
||||
self._cookies = []
|
||||
|
||||
return self._cookies
|
||||
|
||||
@property
|
||||
def auth(self) -> tuple:
|
||||
"""返回auth设置信息"""
|
||||
return self._auth
|
||||
|
||||
@property
|
||||
def proxies(self) -> dict:
|
||||
"""返回proxies设置信息"""
|
||||
if self._proxies is None:
|
||||
self._proxies = {}
|
||||
|
||||
return self._proxies
|
||||
|
||||
@property
|
||||
def hooks(self) -> dict:
|
||||
"""返回hooks设置信息"""
|
||||
if self._hooks is None:
|
||||
self._hooks = {}
|
||||
|
||||
return self._hooks
|
||||
|
||||
@property
|
||||
def params(self) -> dict:
|
||||
"""返回params设置信息"""
|
||||
if self._params is None:
|
||||
self._params = {}
|
||||
return self._params
|
||||
|
||||
@property
|
||||
def verify(self) -> bool:
|
||||
"""返回verify设置信息"""
|
||||
return self._verify
|
||||
|
||||
@property
|
||||
def cert(self) -> Union[str, tuple]:
|
||||
"""返回cert设置信息"""
|
||||
return self._cert
|
||||
|
||||
@property
|
||||
def adapters(self):
|
||||
"""返回adapters设置信息"""
|
||||
return self._adapters
|
||||
|
||||
@property
|
||||
def stream(self) -> bool:
|
||||
"""返回stream设置信息"""
|
||||
return self._stream
|
||||
|
||||
@property
|
||||
def trust_env(self) -> bool:
|
||||
"""返回trust_env设置信息"""
|
||||
return self._trust_env
|
||||
|
||||
@property
|
||||
def max_redirects(self) -> int:
|
||||
"""返回max_redirects设置信息"""
|
||||
return self._max_redirects
|
||||
|
||||
@headers.setter
|
||||
def headers(self, headers: dict) -> None:
|
||||
"""设置headers参数 \n
|
||||
:param headers: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._headers = {key.lower(): headers[key] for key in headers}
|
||||
|
||||
@cookies.setter
|
||||
def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
|
||||
"""设置cookies参数 \n
|
||||
:param cookies: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._cookies = cookies
|
||||
|
||||
@auth.setter
|
||||
def auth(self, auth: tuple) -> None:
|
||||
"""设置auth参数 \n
|
||||
:param auth: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._auth = auth
|
||||
|
||||
@proxies.setter
|
||||
def proxies(self, proxies: dict) -> None:
|
||||
"""设置proxies参数 \n
|
||||
:param proxies: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._proxies = proxies
|
||||
|
||||
@hooks.setter
|
||||
def hooks(self, hooks: dict) -> None:
|
||||
"""设置hooks参数 \n
|
||||
:param hooks: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._hooks = hooks
|
||||
|
||||
@params.setter
|
||||
def params(self, params: dict) -> None:
|
||||
"""设置params参数 \n
|
||||
:param params: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._params = params
|
||||
|
||||
@verify.setter
|
||||
def verify(self, verify: bool) -> None:
|
||||
"""设置verify参数 \n
|
||||
:param verify: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._verify = verify
|
||||
|
||||
@cert.setter
|
||||
def cert(self, cert: Union[str, tuple]) -> None:
|
||||
"""设置cert参数 \n
|
||||
:param cert: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._cert = cert
|
||||
|
||||
@adapters.setter
|
||||
def adapters(self, adapters) -> None:
|
||||
"""设置 \n
|
||||
:param adapters: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._adapters = adapters
|
||||
|
||||
@stream.setter
|
||||
def stream(self, stream: bool) -> None:
|
||||
"""设置stream参数 \n
|
||||
:param stream: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._stream = stream
|
||||
|
||||
@trust_env.setter
|
||||
def trust_env(self, trust_env: bool) -> None:
|
||||
"""设置trust_env参数 \n
|
||||
:param trust_env: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._trust_env = trust_env
|
||||
|
||||
@max_redirects.setter
|
||||
def max_redirects(self, max_redirects: int) -> None:
|
||||
"""设置max_redirects参数 \n
|
||||
:param max_redirects: 参数值
|
||||
:return: None
|
||||
"""
|
||||
self._max_redirects = max_redirects
|
||||
|
||||
def set_a_header(self, attr: str, value: str):
|
||||
"""设置headers中一个项 \n
|
||||
:param attr: 设置名称
|
||||
:param value: 设置值
|
||||
:return: 返回当前对象
|
||||
"""
|
||||
if self._headers is None:
|
||||
self._headers = {}
|
||||
|
||||
self._headers[attr.lower()] = value
|
||||
return self
|
||||
|
||||
def remove_a_header(self, attr: str):
|
||||
"""从headers中删除一个设置 \n
|
||||
:param attr: 要删除的设置
|
||||
:return: 返回当前对象
|
||||
"""
|
||||
if self._headers is None:
|
||||
return self
|
||||
|
||||
attr = attr.lower()
|
||||
if attr in self._headers:
|
||||
self._headers.pop(attr)
|
||||
|
||||
return self
|
||||
|
||||
def save(self, path: str = None):
|
||||
"""保存设置到文件 \n
|
||||
:param path: ini文件的路径,传入 'default' 保存到默认ini文件
|
||||
:return: 返回当前对象
|
||||
"""
|
||||
if path == 'default':
|
||||
path = (Path(__file__).parent / 'configs.ini').absolute()
|
||||
elif path is None:
|
||||
path = Path(self.ini_path).absolute()
|
||||
else:
|
||||
path = Path(path).absolute()
|
||||
|
||||
path = path / 'config.ini' if path.is_dir() else path
|
||||
|
||||
if path.exists():
|
||||
om = OptionsManager(path)
|
||||
else:
|
||||
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
||||
|
||||
options = _session_options_to_dict(self)
|
||||
|
||||
for i in options:
|
||||
om.set_item('session_options', i, options[i])
|
||||
|
||||
om.save(str(path))
|
||||
|
||||
return self
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
"""以字典形式返回本对象"""
|
||||
return _session_options_to_dict(self)
|
||||
|
||||
|
||||
class DriverOptions(Options):
|
||||
"""chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类,
|
||||
增加了删除配置和保存到文件方法。
|
||||
@ -125,6 +435,7 @@ class DriverOptions(Options):
|
||||
self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini')
|
||||
om = OptionsManager(self.ini_path)
|
||||
options_dict = om.chrome_options
|
||||
|
||||
self._binary_location = options_dict.get('binary_location', '')
|
||||
self._arguments = options_dict.get('arguments', [])
|
||||
self._extensions = options_dict.get('extensions', [])
|
||||
@ -145,12 +456,21 @@ class DriverOptions(Options):
|
||||
:param path: ini文件的路径,传入 'default' 保存到默认ini文件
|
||||
:return: 当前对象
|
||||
"""
|
||||
om = OptionsManager()
|
||||
options = _chrome_options_to_dict(self)
|
||||
path = Path(__file__).parent / 'configs.ini' if path == 'default' else path
|
||||
path = Path(path or self.ini_path)
|
||||
if path == 'default':
|
||||
path = (Path(__file__).parent / 'configs.ini').absolute()
|
||||
elif path is None:
|
||||
path = Path(self.ini_path).absolute()
|
||||
else:
|
||||
path = Path(path).absolute()
|
||||
|
||||
path = path / 'config.ini' if path.is_dir() else path
|
||||
path = path.absolute()
|
||||
|
||||
if path.exists():
|
||||
om = OptionsManager(path)
|
||||
else:
|
||||
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
||||
|
||||
options = self.as_dict()
|
||||
|
||||
for i in options:
|
||||
if i == 'driver_path':
|
||||
@ -158,7 +478,7 @@ class DriverOptions(Options):
|
||||
else:
|
||||
om.set_item('chrome_options', i, options[i])
|
||||
|
||||
om.save(path)
|
||||
om.save(str(path))
|
||||
|
||||
return self
|
||||
|
||||
@ -296,6 +616,9 @@ class DriverOptions(Options):
|
||||
|
||||
return self
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
return _chrome_options_to_dict(self)
|
||||
|
||||
|
||||
def _dict_to_chrome_options(options: dict) -> Options:
|
||||
"""从传入的字典获取浏览器设置,返回ChromeOptions对象 \n
|
||||
@ -349,25 +672,101 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
||||
return chrome_options
|
||||
|
||||
|
||||
def _chrome_options_to_dict(options: Union[dict, DriverOptions, None]) -> Union[dict, None]:
|
||||
def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None]) -> Union[dict, None]:
|
||||
"""把chrome配置对象转换为字典 \n
|
||||
:param options: chrome配置对象,字典或DriverOptions对象
|
||||
:return: 配置字典
|
||||
"""
|
||||
if options is None or isinstance(options, dict):
|
||||
if isinstance(options, (dict, type(None))):
|
||||
return options
|
||||
|
||||
re_dict = dict()
|
||||
re_dict['debugger_address'] = options.debugger_address
|
||||
re_dict['binary_location'] = options.binary_location
|
||||
re_dict['debugger_address'] = options.debugger_address
|
||||
re_dict['arguments'] = options.arguments
|
||||
re_dict['extensions'] = options.extensions
|
||||
re_dict['experimental_options'] = options.experimental_options
|
||||
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path']
|
||||
|
||||
for attr in attrs:
|
||||
re_dict[attr] = options.__getattribute__(f'_{attr}')
|
||||
|
||||
try:
|
||||
re_dict['driver_path'] = options.driver_path
|
||||
except:
|
||||
re_dict['driver_path'] = None
|
||||
# re_dict['capabilities'] = options.capabilities
|
||||
return re_dict
|
||||
|
||||
|
||||
def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]:
|
||||
"""把session配置对象转换为字典 \n
|
||||
:param options: session配置对象或字典
|
||||
:return: 配置字典
|
||||
"""
|
||||
if isinstance(options, (dict, type(None))):
|
||||
return options
|
||||
|
||||
re_dict = dict()
|
||||
attrs = ['headers', 'proxies', 'hooks', 'params', 'verify', 'stream', 'trust_env', 'max_redirects'] # 'adapters',
|
||||
|
||||
cookies = options.__getattribute__('_cookies')
|
||||
|
||||
if cookies is not None:
|
||||
re_dict['cookies'] = _cookies_to_tuple(cookies)
|
||||
|
||||
for attr in attrs:
|
||||
val = options.__getattribute__(f'_{attr}')
|
||||
if val is not None:
|
||||
re_dict[attr] = val
|
||||
|
||||
# cert属性默认值为None,未免无法区分是否被设置,故主动赋值
|
||||
re_dict['cert'] = options.__getattribute__('_cert')
|
||||
re_dict['auth'] = options.__getattribute__('_auth')
|
||||
|
||||
return re_dict
|
||||
|
||||
|
||||
def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
|
||||
"""把Cookie对象转为dict格式 \n
|
||||
:param cookie: Cookie对象
|
||||
:return: cookie字典
|
||||
"""
|
||||
if isinstance(cookie, Cookie):
|
||||
cookie_dict = cookie.__dict__.copy()
|
||||
cookie_dict.pop('rfc2109')
|
||||
cookie_dict.pop('_rest')
|
||||
return cookie_dict
|
||||
|
||||
elif isinstance(cookie, dict):
|
||||
cookie_dict = cookie
|
||||
|
||||
elif isinstance(cookie, str):
|
||||
cookie = cookie.split(';')
|
||||
cookie_dict = {}
|
||||
|
||||
for key, attr in enumerate(cookie):
|
||||
attr_val = attr.lstrip().split('=')
|
||||
|
||||
if key == 0:
|
||||
cookie_dict['name'] = attr_val[0]
|
||||
cookie_dict['value'] = attr_val[1]
|
||||
else:
|
||||
cookie_dict[attr_val[0]] = attr_val[1]
|
||||
|
||||
return cookie_dict
|
||||
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
return cookie_dict
|
||||
|
||||
|
||||
def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple:
|
||||
"""把cookies转为tuple格式 \n
|
||||
:param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
|
||||
:return: 返回tuple形式的cookies
|
||||
"""
|
||||
if isinstance(cookies, (list, tuple, RequestsCookieJar)):
|
||||
cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies)
|
||||
|
||||
elif isinstance(cookies, str):
|
||||
cookies = tuple(dict([cookie.lstrip().split("=", 1)]) for cookie in cookies.split(";"))
|
||||
|
||||
elif isinstance(cookies, dict):
|
||||
cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies)
|
||||
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
return cookies
|
||||
|
@ -1,11 +1,11 @@
|
||||
[paths]
|
||||
chromedriver_path =
|
||||
global_tmp_path =
|
||||
chromedriver_path =
|
||||
tmp_path =
|
||||
|
||||
[chrome_options]
|
||||
debugger_address =
|
||||
binary_location =
|
||||
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--headless', '--disable-infobars']
|
||||
binary_location =
|
||||
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
|
||||
extensions = []
|
||||
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
|
||||
|
||||
|
@ -6,24 +6,25 @@
|
||||
"""
|
||||
from sys import exit
|
||||
from typing import Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from requests import Session
|
||||
from requests.cookies import RequestsCookieJar
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import WebDriverException, SessionNotCreatedException
|
||||
from selenium.common.exceptions import SessionNotCreatedException
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from tldextract import extract
|
||||
|
||||
from .config import OptionsManager, _dict_to_chrome_options, _chrome_options_to_dict
|
||||
from .config import (_dict_to_chrome_options, _session_options_to_dict,
|
||||
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
|
||||
|
||||
|
||||
class Drission(object):
|
||||
"""Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
|
||||
|
||||
def __init__(self,
|
||||
driver_or_options: Union[WebDriver, dict, Options] = None,
|
||||
session_or_options: Union[Session, dict] = None,
|
||||
driver_or_options: Union[WebDriver, dict, Options, DriverOptions] = None,
|
||||
session_or_options: Union[Session, dict, SessionOptions] = None,
|
||||
ini_path: str = None,
|
||||
proxy: dict = None):
|
||||
"""初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象 \n
|
||||
@ -34,53 +35,42 @@ class Drission(object):
|
||||
"""
|
||||
self._session = None
|
||||
self._driver = None
|
||||
self._driver_path = 'chromedriver'
|
||||
self._proxy = proxy
|
||||
|
||||
# 若接收到Session对象,直接记录
|
||||
if isinstance(session_or_options, Session):
|
||||
self._session = session_or_options
|
||||
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
|
||||
|
||||
# ------------------处理session options----------------------
|
||||
if session_or_options is None:
|
||||
self._session_options = om.session_options
|
||||
|
||||
# 否则记录其配置信息
|
||||
else:
|
||||
# 若接收到Session对象,直接记录
|
||||
if isinstance(session_or_options, Session):
|
||||
self._session = session_or_options
|
||||
|
||||
# 若接收到配置信息则记录,否则从ini文件读取
|
||||
if session_or_options is None:
|
||||
self._session_options = OptionsManager(ini_path).session_options
|
||||
# 否则记录其配置信息
|
||||
else:
|
||||
self._session_options = session_or_options
|
||||
self._session_options = _session_options_to_dict(session_or_options)
|
||||
|
||||
# 若接收到WebDriver对象,直接记录
|
||||
if isinstance(driver_or_options, WebDriver):
|
||||
self._driver = driver_or_options
|
||||
# ------------------处理driver options----------------------
|
||||
if driver_or_options is None:
|
||||
self._driver_options = om.chrome_options
|
||||
self._driver_options['driver_path'] = om.get_value('paths', 'chromedriver_path')
|
||||
|
||||
# 否则记录其配置信息
|
||||
else:
|
||||
# 若接收到WebDriver对象,直接记录
|
||||
if isinstance(driver_or_options, WebDriver):
|
||||
self._driver = driver_or_options
|
||||
|
||||
# 若接收到配置信息则记录,否则从ini文件读取
|
||||
if driver_or_options is None:
|
||||
om = OptionsManager(ini_path)
|
||||
self._driver_options = om.chrome_options
|
||||
|
||||
if om.paths.get('chromedriver_path', None):
|
||||
self._driver_path = om.paths['chromedriver_path']
|
||||
# 否则记录其配置信息
|
||||
else:
|
||||
self._driver_options = _chrome_options_to_dict(driver_or_options)
|
||||
|
||||
if self._driver_options.get('driver_path', None):
|
||||
self._driver_path = self._driver_options['driver_path']
|
||||
|
||||
@property
|
||||
def session(self) -> Session:
|
||||
"""返回Session对象,如未初始化则按配置信息创建"""
|
||||
if self._session is None:
|
||||
self._session = Session()
|
||||
attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify',
|
||||
'cert', 'adapters', 'stream', 'trust_env', 'max_redirects']
|
||||
|
||||
for i in attrs:
|
||||
if i in self._session_options:
|
||||
exec(f'self._session.{i} = self._session_options["{i}"]')
|
||||
self._set_session(self._session_options)
|
||||
|
||||
if self._proxy:
|
||||
self._session.proxies = self._proxy
|
||||
@ -99,8 +89,10 @@ class Drission(object):
|
||||
if self._proxy:
|
||||
options.add_argument(f'--proxy-server={self._proxy["http"]}')
|
||||
|
||||
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
|
||||
|
||||
try:
|
||||
self._driver = webdriver.Chrome(self._driver_path, options=options)
|
||||
self._driver = webdriver.Chrome(driver_path, options=options)
|
||||
except SessionNotCreatedException:
|
||||
print('Chrome版本与chromedriver版本不匹配,可执行easy_set.get_match_driver()自动下载匹配的版本。')
|
||||
exit(0)
|
||||
@ -127,12 +119,13 @@ class Drission(object):
|
||||
return self._session_options
|
||||
|
||||
@session_options.setter
|
||||
def session_options(self, value: dict) -> None:
|
||||
"""设置session配置
|
||||
:param value: session配置字典
|
||||
def session_options(self, options: Union[dict, SessionOptions]) -> None:
|
||||
"""设置session配置 \n
|
||||
:param options: session配置字典
|
||||
:return: None
|
||||
"""
|
||||
self._session_options = value
|
||||
self._session_options = _session_options_to_dict(options)
|
||||
self._set_session(self._session_options)
|
||||
|
||||
@property
|
||||
def proxy(self) -> Union[None, dict]:
|
||||
@ -159,103 +152,95 @@ class Drission(object):
|
||||
self._driver.get(url)
|
||||
|
||||
for cookie in cookies:
|
||||
self._ensure_add_cookie(cookie)
|
||||
self.set_cookies(cookie, set_driver=True)
|
||||
|
||||
def cookies_to_session(self, copy_user_agent: bool = False,
|
||||
driver: WebDriver = None,
|
||||
session: Session = None) -> None:
|
||||
def set_cookies(self,
|
||||
cookies: Union[RequestsCookieJar, list, tuple, str, dict],
|
||||
set_session: bool = False,
|
||||
set_driver: bool = False) -> None:
|
||||
"""设置cookies \n
|
||||
:param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
|
||||
:param set_session: 是否设置session的cookies
|
||||
:param set_driver: 是否设置driver的cookies
|
||||
:return: None
|
||||
"""
|
||||
cookies = _cookies_to_tuple(cookies)
|
||||
|
||||
for cookie in cookies:
|
||||
if cookie['value'] is None:
|
||||
cookie['value'] = ''
|
||||
|
||||
# 添加cookie到session
|
||||
if set_session:
|
||||
kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')}
|
||||
|
||||
if 'expiry' in cookie:
|
||||
kwargs['expires'] = cookie['expiry']
|
||||
|
||||
self.session.cookies.set(cookie['name'], cookie['value'], **kwargs)
|
||||
|
||||
# 添加cookie到driver
|
||||
if set_driver:
|
||||
if 'expiry' in cookie:
|
||||
cookie['expiry'] = int(cookie['expiry'])
|
||||
|
||||
try:
|
||||
browser_domain = extract(self.driver.current_url).fqdn
|
||||
except AttributeError:
|
||||
browser_domain = ''
|
||||
|
||||
if not cookie.get('domain', None):
|
||||
if browser_domain:
|
||||
url = extract(browser_domain)
|
||||
cookie_domain = f'{url.domain}.{url.suffix}'
|
||||
else:
|
||||
raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.')
|
||||
|
||||
cookie['domain'] = cookie_domain
|
||||
|
||||
else:
|
||||
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
|
||||
|
||||
if cookie_domain not in browser_domain:
|
||||
self.driver.get(cookie_domain if cookie_domain.startswith('http://')
|
||||
else f'http://{cookie_domain}')
|
||||
|
||||
self.driver.add_cookie(cookie)
|
||||
|
||||
def _set_session(self, data: dict) -> None:
|
||||
if self._session is None:
|
||||
self._session = Session()
|
||||
|
||||
attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify',
|
||||
'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters'
|
||||
|
||||
if 'cookies' in data:
|
||||
self.set_cookies(data['cookies'], set_session=True)
|
||||
|
||||
for i in attrs:
|
||||
if i in data:
|
||||
self._session.__setattr__(i, data[i])
|
||||
|
||||
def cookies_to_session(self, copy_user_agent: bool = False) -> None:
|
||||
"""把driver对象的cookies复制到session对象 \n
|
||||
:param copy_user_agent: 是否复制ua信息
|
||||
:param driver: 来源driver对象
|
||||
:param session: 目标session对象
|
||||
:return: None
|
||||
"""
|
||||
driver = driver or self.driver
|
||||
session = session or self.session
|
||||
|
||||
if copy_user_agent:
|
||||
self.user_agent_to_session(driver, session)
|
||||
self.user_agent_to_session(self.driver, self.session)
|
||||
|
||||
for cookie in driver.get_cookies():
|
||||
session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])
|
||||
self.set_cookies(self.driver.get_cookies(), set_session=True)
|
||||
|
||||
def cookies_to_driver(self, url: str,
|
||||
driver: WebDriver = None,
|
||||
session: Session = None) -> None:
|
||||
def cookies_to_driver(self, url: str) -> None:
|
||||
"""把session对象的cookies复制到driver对象 \n
|
||||
:param url: 作用域
|
||||
:param driver: 目标driver对象
|
||||
:param session: 来源session对象
|
||||
:return: None
|
||||
"""
|
||||
driver = driver or self.driver
|
||||
session = session or self.session
|
||||
domain = urlparse(url).netloc
|
||||
url = extract(url)
|
||||
domain = f'{url.domain}.{url.suffix}'
|
||||
cookies = tuple(x for x in self.session.cookies if domain in x.domain)
|
||||
|
||||
if not domain:
|
||||
raise Exception('Without specifying a domain')
|
||||
|
||||
# 翻译cookies
|
||||
for i in [x for x in session.cookies if domain in x.domain]:
|
||||
cookie_data = {'name': i.name, 'value': str(i.value), 'path': i.path, 'domain': i.domain}
|
||||
|
||||
if i.expires:
|
||||
cookie_data['expiry'] = i.expires
|
||||
|
||||
self._ensure_add_cookie(cookie_data, driver=driver)
|
||||
|
||||
def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None:
|
||||
"""添加cookie到driver \n
|
||||
:param cookie: 要添加的cookie
|
||||
:param override_domain: 覆盖作用域
|
||||
:param driver: 操作的driver对象
|
||||
:return: None
|
||||
"""
|
||||
driver = driver or self.driver
|
||||
|
||||
if override_domain:
|
||||
cookie['domain'] = override_domain
|
||||
|
||||
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
|
||||
|
||||
try:
|
||||
browser_domain = extract(driver.current_url).fqdn
|
||||
except AttributeError:
|
||||
browser_domain = ''
|
||||
|
||||
if cookie_domain not in browser_domain:
|
||||
driver.get(f'http://{cookie_domain.lstrip("http://")}')
|
||||
|
||||
if 'expiry' in cookie:
|
||||
cookie['expiry'] = int(cookie['expiry'])
|
||||
|
||||
driver.add_cookie(cookie)
|
||||
|
||||
# 如果添加失败,尝试更宽的域名
|
||||
if not self._is_cookie_in_driver(cookie, driver):
|
||||
cookie['domain'] = extract(cookie['domain']).registered_domain
|
||||
driver.add_cookie(cookie)
|
||||
|
||||
if not self._is_cookie_in_driver(cookie):
|
||||
raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n")
|
||||
|
||||
def _is_cookie_in_driver(self, cookie, driver=None) -> bool:
|
||||
"""检查cookie是否已经在driver里 \n
|
||||
只检查name、value、domain,检查domain时比较宽 \n
|
||||
:param cookie: 要检查的cookie
|
||||
:param driver: 被检查的driver
|
||||
:return: 返回布尔值
|
||||
"""
|
||||
driver = driver or self.driver
|
||||
for driver_cookie in driver.get_cookies():
|
||||
|
||||
if (cookie['name'] == driver_cookie['name'] and
|
||||
cookie['value'] == driver_cookie['value'] and
|
||||
(cookie['domain'] == driver_cookie['domain'] or
|
||||
f'.{cookie["domain"]}' == driver_cookie['domain'])):
|
||||
return True
|
||||
|
||||
return False
|
||||
self.set_cookies(cookies, set_driver=True)
|
||||
|
||||
def user_agent_to_session(self, driver: WebDriver = None, session: Session = None) -> None:
|
||||
"""把driver的user-agent复制到session \n
|
||||
|
@ -140,7 +140,7 @@ class DriverElement(DrissionElement):
|
||||
return self.get_style_property('content', 'after')
|
||||
|
||||
# -----------------共有函数-------------------
|
||||
def texts(self, text_node_only: bool = False) -> List[str]:
|
||||
def texts(self, text_node_only: bool = False) -> list:
|
||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||
:param text_node_only: 是否只返回文本节点
|
||||
:return: 文本列表
|
||||
@ -567,7 +567,7 @@ class DriverElement(DrissionElement):
|
||||
def execute_driver_find(page_or_ele,
|
||||
loc: Tuple[str, str],
|
||||
mode: str = 'single',
|
||||
timeout: float = 10) -> Union[DriverElement, List[DriverElement or str], str, None]:
|
||||
timeout: float = 10) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
"""执行driver模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: DriverPage对象或DriverElement对象
|
||||
|
@ -53,13 +53,20 @@ class DriverPage(object):
|
||||
@property
|
||||
def cookies(self) -> list:
|
||||
"""返回当前网站cookies"""
|
||||
return self.driver.get_cookies()
|
||||
return self.get_cookies(True)
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
"""返回网页title"""
|
||||
return self.driver.title
|
||||
|
||||
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
|
||||
"""返回当前网站cookies"""
|
||||
if as_dict:
|
||||
return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()}
|
||||
else:
|
||||
return self.driver.get_cookies()
|
||||
|
||||
def _try_to_connect(self,
|
||||
to_url: str,
|
||||
times: int = 0,
|
||||
@ -108,7 +115,7 @@ class DriverPage(object):
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement],
|
||||
mode: str = None,
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement or str], str, None]:
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
"""返回页面中符合条件的元素,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
@ -149,8 +156,8 @@ class DriverPage(object):
|
||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
|
||||
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
|
||||
# 接收到DriverElement对象直接返回
|
||||
elif isinstance(loc_or_ele, DriverElement):
|
||||
@ -169,7 +176,7 @@ class DriverPage(object):
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> List[DriverElement or str]:
|
||||
timeout: float = None) -> List[DriverElement]:
|
||||
"""返回页面中所有符合条件的元素 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
@ -328,33 +335,35 @@ class DriverPage(object):
|
||||
if self.tabs_count:
|
||||
self.to_tab(0)
|
||||
|
||||
def close_other_tabs(self, num_or_handle: Union[int, str] = None) -> None:
|
||||
"""关闭传入的标签页以外标签页,默认保留当前页 \n
|
||||
:param num_or_handle: 要保留的标签页序号或handle,序号第一个为0,最后为-1
|
||||
def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None:
|
||||
"""关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组 \n
|
||||
:param num_or_handles: 要保留的标签页序号或handle,可传入handle组成的列表或元组
|
||||
:return: None
|
||||
"""
|
||||
try:
|
||||
tab = int(num_or_handle)
|
||||
tab = int(num_or_handles)
|
||||
except (ValueError, TypeError):
|
||||
tab = num_or_handle
|
||||
tab = num_or_handles
|
||||
|
||||
tabs = self.driver.window_handles
|
||||
|
||||
if tab is None:
|
||||
page_handle = self.current_tab_handle
|
||||
page_handle = (self.current_tab_handle,)
|
||||
elif isinstance(tab, int):
|
||||
page_handle = tabs[tab]
|
||||
page_handle = (tabs[tab],)
|
||||
elif isinstance(tab, str):
|
||||
page_handle = (tab,)
|
||||
elif isinstance(tab, (list, tuple)):
|
||||
page_handle = tab
|
||||
else:
|
||||
raise TypeError('Argument num_or_handle can only be int or str.')
|
||||
raise TypeError('Argument num_or_handle can only be int, str, list or tuple.')
|
||||
|
||||
for i in tabs: # 遍历所有标签页,关闭非保留的
|
||||
if i != page_handle:
|
||||
if i not in page_handle:
|
||||
self.driver.switch_to.window(i)
|
||||
self.driver.close()
|
||||
|
||||
self.driver.switch_to.window(page_handle) # 把权柄定位回保留的页面
|
||||
self.driver.switch_to.window(page_handle[0]) # 把权柄定位回保留的页面
|
||||
|
||||
def to_tab(self, num_or_handle: Union[int, str] = 0) -> None:
|
||||
"""跳转到标签页 \n
|
||||
|
@ -65,7 +65,7 @@ def set_paths(driver_path: str = None,
|
||||
om.set_item('chrome_options', 'debugger_address', format_path(debugger_address))
|
||||
|
||||
if tmp_path is not None:
|
||||
om.set_item('paths', 'global_tmp_path', format_path(tmp_path))
|
||||
om.set_item('paths', 'tmp_path', format_path(tmp_path))
|
||||
|
||||
if download_path is not None:
|
||||
experimental_options = om.get_value('chrome_options', 'experimental_options')
|
||||
|
@ -7,10 +7,11 @@
|
||||
from typing import Union, List, Tuple
|
||||
|
||||
from requests import Response, Session
|
||||
from requests.cookies import RequestsCookieJar
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .config import DriverOptions
|
||||
from .config import DriverOptions, SessionOptions
|
||||
from .drission import Drission
|
||||
from .driver_element import DriverElement
|
||||
from .driver_page import DriverPage
|
||||
@ -38,7 +39,7 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
mode: str = 'd',
|
||||
timeout: float = 10,
|
||||
driver_options: Union[dict, DriverOptions] = None,
|
||||
session_options: dict = None):
|
||||
session_options: Union[dict, SessionOptions] = None):
|
||||
"""初始化函数 \n
|
||||
:param drission: Drission对象,传入's'或'd'可自动创建Drission对象
|
||||
:param mode: 'd' 或 's',即driver模式和session模式
|
||||
@ -139,6 +140,27 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).title
|
||||
|
||||
def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None:
|
||||
"""设置cookies \n
|
||||
:param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
|
||||
:return: None
|
||||
"""
|
||||
if self._mode == 's':
|
||||
self.drission.set_cookies(cookies, set_session=True)
|
||||
elif self._mode == 'd':
|
||||
self.drission.set_cookies(cookies, set_driver=True)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
"""返回cookies \n
|
||||
:param as_dict: 是否以字典方式返回
|
||||
:param all_domains: 是否返回所有域的cookies
|
||||
:return: cookies信息
|
||||
"""
|
||||
if self._mode == 's':
|
||||
return super().get_cookies(as_dict, all_domains)
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).get_cookies(as_dict)
|
||||
|
||||
def change_mode(self, mode: str = None, go: bool = True) -> None:
|
||||
"""切换模式,接收's'或'd',除此以外的字符串会切换为d模式 \n
|
||||
切换时会把当前模式的cookies复制到目标模式 \n
|
||||
@ -155,8 +177,10 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
if self._mode == 'd':
|
||||
self._driver = True
|
||||
self._url = None if not self._driver else self._drission.driver.current_url
|
||||
|
||||
if self._session_url:
|
||||
self.cookies_to_driver(self._session_url)
|
||||
|
||||
if go:
|
||||
self.get(self._session_url)
|
||||
|
||||
@ -164,8 +188,10 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
elif self._mode == 's':
|
||||
self._session = True
|
||||
self._url = self._session_url
|
||||
|
||||
if self._driver:
|
||||
self.cookies_to_session()
|
||||
|
||||
if go and self._drission.driver.current_url.startswith('http'):
|
||||
self.get(self._drission.driver.current_url)
|
||||
|
||||
@ -310,7 +336,8 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
||||
mode: str = None,
|
||||
timeout: float = None) -> Union[DriverElement, SessionElement, str]:
|
||||
timeout: float = None) -> Union[
|
||||
DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
@ -350,7 +377,7 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> Union[List[DriverElement or str], List[SessionElement or str]]:
|
||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
|
@ -92,7 +92,7 @@ class SessionElement(DrissionElement):
|
||||
"""返回前一个兄弟元素"""
|
||||
return self._get_brother(1, 'ele', 'prev')
|
||||
|
||||
def texts(self, text_node_only: bool = False) -> List[str]:
|
||||
def texts(self, text_node_only: bool = False) -> list:
|
||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||
:param text_node_only: 是否只返回文本节点
|
||||
:return: 文本列表
|
||||
@ -340,7 +340,7 @@ class SessionElement(DrissionElement):
|
||||
|
||||
def execute_session_find(page_or_ele,
|
||||
loc: Tuple[str, str],
|
||||
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement or str], str, None]:
|
||||
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""执行session模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: SessionPage对象或SessionElement对象
|
||||
|
@ -15,8 +15,10 @@ from typing import Union, List, Tuple
|
||||
from urllib.parse import urlparse, quote, unquote
|
||||
|
||||
from requests import Session, Response
|
||||
from tldextract import extract
|
||||
|
||||
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
||||
from .config import _cookie_to_dict
|
||||
from .session_element import SessionElement, execute_session_find
|
||||
|
||||
|
||||
@ -54,21 +56,39 @@ class SessionPage(object):
|
||||
@property
|
||||
def cookies(self) -> dict:
|
||||
"""返回session的cookies"""
|
||||
return self.session.cookies.get_dict()
|
||||
return self.get_cookies(True)
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
"""返回网页title"""
|
||||
return self.ele(('css selector', 'title')).text
|
||||
return self.ele('tag:title').text
|
||||
|
||||
@property
|
||||
def html(self) -> str:
|
||||
"""返回页面html文本"""
|
||||
return format_html(self.response.text)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
"""返回cookies \n
|
||||
:param as_dict: 是否以字典方式返回
|
||||
:param all_domains: 是否返回所有域的cookies
|
||||
:return: cookies信息
|
||||
"""
|
||||
if all_domains:
|
||||
cookies = self.session.cookies
|
||||
else:
|
||||
url = extract(self.url)
|
||||
domain = f'{url.domain}.{url.suffix}'
|
||||
cookies = tuple(x for x in self.session.cookies if domain in x.domain)
|
||||
|
||||
if as_dict:
|
||||
return {x.name: x.value for x in cookies}
|
||||
else:
|
||||
return [_cookie_to_dict(cookie) for cookie in cookies]
|
||||
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
|
||||
mode: str = None) -> Union[SessionElement, List[SessionElement or str], str, None]:
|
||||
mode: str = None) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
@ -108,8 +128,8 @@ class SessionPage(object):
|
||||
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
|
||||
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
# if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||
# loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||
|
||||
elif isinstance(loc_or_ele, SessionElement):
|
||||
return loc_or_ele
|
||||
@ -120,7 +140,7 @@ class SessionPage(object):
|
||||
return execute_session_find(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement or str]:
|
||||
loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
示例: \n
|
||||
- 用loc元组查找: \n
|
||||
@ -374,11 +394,12 @@ class SessionPage(object):
|
||||
|
||||
# -------------------打印要下载的文件-------------------
|
||||
if show_msg:
|
||||
print(file_url)
|
||||
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
|
||||
print(f'Downloading to: {goal_path}')
|
||||
|
||||
if skip:
|
||||
print('Skipped.')
|
||||
print('Skipped.\n')
|
||||
|
||||
# -------------------开始下载-------------------
|
||||
if skip:
|
||||
@ -428,7 +449,7 @@ class SessionPage(object):
|
||||
|
||||
# -------------------显示并返回值-------------------
|
||||
if show_msg:
|
||||
print(info)
|
||||
print(info, '\n')
|
||||
|
||||
info = f'{goal_path}\\{full_name}' if download_status else info
|
||||
return download_status, info
|
||||
@ -447,6 +468,11 @@ class SessionPage(object):
|
||||
:param kwargs: 其它参数
|
||||
:return: tuple,第一位为Response或None,第二位为出错信息或'Sussess'
|
||||
"""
|
||||
if not url:
|
||||
if show_errmsg:
|
||||
raise ValueError('url is empty.')
|
||||
return None, 'url is empty.'
|
||||
|
||||
if mode not in ['get', 'post']:
|
||||
raise ValueError("Argument mode can only be 'get' or 'post'.")
|
||||
|
||||
|
387
README.en.md
387
README.en.md
@ -405,7 +405,7 @@ In addition to the above two paths, this method can also set the following paths
|
||||
```python
|
||||
debugger_address # Debug browser address, such as: 127.0.0.1:9222
|
||||
download_path # Download file path
|
||||
global_tmp_path # Temporary folder path
|
||||
tmp_path # Temporary folder path
|
||||
user_data_path # User data path
|
||||
cache_path # cache path
|
||||
```
|
||||
@ -418,6 +418,12 @@ Tips:
|
||||
|
||||
|
||||
|
||||
### Other methods
|
||||
|
||||
If you don't want to use the ini file (for example, when you want to package the project), you can write the above two paths in the system path, or fill in the program. See the next section for the use of the latter.
|
||||
|
||||
|
||||
|
||||
## Create drive object Drission
|
||||
|
||||
The creation step is not necessary. If you want to get started quickly, you can skip this section. The MixPage object will automatically create the object.
|
||||
@ -451,16 +457,21 @@ do.set_paths(chrome_path ='D:\\chrome\\chrome.exe',
|
||||
# Settings for s mode
|
||||
session_options = {'headers': {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
|
||||
|
||||
# Proxy settings, optional
|
||||
proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
|
||||
|
||||
# Incoming configuration, driver_options and session_options are optional, you need to use the corresponding mode to pass in
|
||||
drission = Drission(driver_options, session_options)
|
||||
drission = Drission(driver_options, session_options, proxy=proxy)
|
||||
```
|
||||
|
||||
The usage of DriverOptions and SessionOptions is detailed below.
|
||||
|
||||
|
||||
|
||||
## Use page object MixPage
|
||||
|
||||
The MixPage page object encapsulates common web page operations and realizes the switch between driver and session modes.
|
||||
MixPage must receive a Drission object and use the driver or session in it. If it is not passed in, MixPage will create a Drission by itself (using the configuration of the default ini file).
|
||||
MixPage must control a Drission object and use its driver or session. If it is not passed in, MixPage will create one by itself (using the incoming configuration information or reading from the default ini file).
|
||||
|
||||
Tips: When multiple objects work together, you can pass the Drission object in one MixPage to another, so that multiple objects can share login information or operate the same page.
|
||||
|
||||
@ -485,8 +496,6 @@ page = MixPage(driver_options=DriverOption, session_options=SessionOption) # de
|
||||
|
||||
### visit website
|
||||
|
||||
If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified.
|
||||
|
||||
```python
|
||||
# Default mode
|
||||
page.get(url)
|
||||
@ -496,6 +505,8 @@ page.post(url, data, **kwargs) # Only session mode has post method
|
||||
page.get(url, retry=5, interval=0.5)
|
||||
```
|
||||
|
||||
Tips: If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified.
|
||||
|
||||
|
||||
|
||||
### Switch mode
|
||||
@ -506,6 +517,8 @@ Switch between s and d modes, the cookies and the URL you are visiting will be a
|
||||
page.change_mode(go=False) # If go is False, it means that the url is not redirected
|
||||
```
|
||||
|
||||
Tips: When using a method unique to a certain mode, it will automatically jump to that mode.
|
||||
|
||||
|
||||
|
||||
### Page properties
|
||||
@ -534,7 +547,9 @@ page.current_tab_handle # Return to the current tab page handle
|
||||
When calling a method that only belongs to d mode, it will automatically switch to d mode. See APIs for detailed usage.
|
||||
|
||||
```python
|
||||
page.change_mode() # switch mode
|
||||
page.set_cookies() # set cookies
|
||||
page.get_cookies() # Get cookies, which can be returned by list or dict
|
||||
page.change_mode() # Switch mode, it will automatically copy cookies
|
||||
page.cookies_to_session() # Copy cookies from WebDriver object to Session object
|
||||
page.cookies_to_driver() # Copy cookies from Session object to WebDriver object
|
||||
page.get(url, retry, interval, **kwargs) # Use get to access the web page, you can specify the number of retries and the interval
|
||||
@ -553,7 +568,7 @@ page.run_script(js, *args) # Run js statement
|
||||
page.create_tab(url) # Create and locate a tab page, which is at the end
|
||||
page.to_tab(num_or_handle) # Jump to tab page
|
||||
page.close_current_tab() # Close the current tab page
|
||||
page.close_other_tabs(num) # Close other tabs
|
||||
page.close_other_tabs(num_or_handles) # Close other tabs
|
||||
page.to_iframe(iframe) # cut into iframe
|
||||
page.screenshot(path) # Page screenshot
|
||||
page.scrool_to_see(element) # Scroll until an element is visible
|
||||
@ -577,11 +592,9 @@ page.eles() and element.eles() search and return a list of all elements that mee
|
||||
|
||||
Description:
|
||||
|
||||
- The element search timeout is 10 seconds by default, you can also set it as needed.
|
||||
|
||||
- In the following search statement, the colon: indicates a fuzzy match, and the equal sign = indicates an exact match
|
||||
|
||||
- There are five types of query strings: @attribute name, tag, text, xpath, and css
|
||||
- The element search timeout is 10 seconds by default, and it stops waiting when it times out or finds an element. You can also set it as needed.
|
||||
- -You can find elements with query string or selenium native loc tuple (s mode can also be used)
|
||||
-The query string has 7 methods such as @attribute name, tag, text, xpath, css, ., #, etc.
|
||||
|
||||
```python
|
||||
# Find by attribute
|
||||
@ -590,6 +603,12 @@ page.eles('@class') # Find all elements with class attribute
|
||||
page.eles('@class:class_name') # Find all elements that have ele_class in class
|
||||
page.eles('@class=class_name') # Find all elements whose class is equal to ele_class
|
||||
|
||||
# Find by class or id
|
||||
page.ele('#ele_id') # equivalent to page.ele('@id=ele_id')
|
||||
page.ele('#:ele_id') # equivalent to page.ele('@id:ele_id')
|
||||
page.ele('.ele_class') # equivalent to page.ele('@class=ele_class')
|
||||
page.ele('.:ele_class') # equivalent to page.ele('@class:ele_class')
|
||||
|
||||
# Find by tag name
|
||||
page.ele('tag:li') # Find the first li element
|
||||
page.eles('tag:li') # Find all li elements
|
||||
@ -603,7 +622,7 @@ page.ele('tag:div@text()=search_text') # Find the div element whose text is equ
|
||||
|
||||
# Find according to text content
|
||||
page.ele('search text') # find the element containing the incoming text
|
||||
page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, add text: in front to avoid conflicts
|
||||
page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, you should add text: in front to avoid conflicts
|
||||
page.eles('text=search text') # The text is equal to the element of search_text
|
||||
|
||||
# Find according to xpath or css selector
|
||||
@ -626,7 +645,7 @@ element.parent # parent element
|
||||
element.next # next sibling element
|
||||
element.prev # previous sibling element
|
||||
|
||||
# Get shadow- dom, only support open shadow- root
|
||||
# Get the shadow-root and treat it as an element. Only support open shadow-root
|
||||
ele1 = element.shadow_root.ele('tag:div')
|
||||
|
||||
# Chain search
|
||||
@ -734,9 +753,9 @@ shadow_root_element.is_valid() # Returns whether the element is still in dom
|
||||
|
||||
|
||||
|
||||
## Docking with selenium code
|
||||
## Splicing with selenium or requests code
|
||||
|
||||
The DrissionPage code can be seamlessly spliced with the selenium code, either directly using the selenium WebDriver object, or using its own WebDriver everywhere for the selenium code. Make the migration of existing projects very convenient.
|
||||
DrissionPage code can be seamlessly spliced with selenium and requests code. You can use Selenium's WebDriver object directly, or you can export your own WebDriver to selenium code. The Session object of requests can also be passed directly. Make the migration of existing projects very convenient.
|
||||
|
||||
### selenium to DrissionPage
|
||||
|
||||
@ -745,11 +764,10 @@ driver = webdriver.Chrome()
|
||||
driver.get('https://www.baidu.com')
|
||||
|
||||
page = MixPage(Drission(driver)) # Pass the driver to Drission, create a MixPage object
|
||||
print(page.title) # Print result: You will know by clicking on Baidu
|
||||
print(page.title) # Print result: 百度一下,你就知道
|
||||
element = driver.find_element_by_xpath('//div') # Use selenium native functions
|
||||
```
|
||||
|
||||
|
||||
|
||||
### DrissionPage to selenium
|
||||
|
||||
```python
|
||||
@ -757,7 +775,57 @@ page = MixPage()
|
||||
page.get('https://www.baidu.com')
|
||||
|
||||
driver = page.driver # Get the WebDriver object from the MixPage object
|
||||
print(driver.title) # Print results: You will know by clicking on Baidu
|
||||
print(driver.title) # Print results: 百度一下,你就知道
|
||||
```
|
||||
|
||||
### requests to DrissionPage
|
||||
|
||||
``` python
|
||||
session = requets.Session()
|
||||
drission = Drission(session_or_options=session)
|
||||
page = MixPage(drission, mode='s')
|
||||
|
||||
page.get('https://www.baidu.com')
|
||||
```
|
||||
|
||||
### DrissionPage to requests
|
||||
|
||||
```python
|
||||
page = MixPage('s')
|
||||
session = page.session
|
||||
|
||||
response = session.get('https://www.baidu.com')
|
||||
```
|
||||
|
||||
|
||||
|
||||
## requests function usage
|
||||
|
||||
### Connection parameters
|
||||
|
||||
In addition to passing in configuration information and connection parameters when creating, if necessary, you can also set connection parameters every time you visit the URL in the s mode.
|
||||
|
||||
```python
|
||||
headers = {'User-Agent':'...',}
|
||||
cookies = {'name':'value',}
|
||||
proxies = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
|
||||
page.get(url, headers=headers, cookies=cookies, proxies=proxies)
|
||||
```
|
||||
|
||||
Tips:
|
||||
|
||||
-If the connection parameters are not specified, the s mode will automatically fill in the Host and Referer attributes according to the current domain name
|
||||
-The Session configuration passed in when creating MixPage is globally effective
|
||||
|
||||
|
||||
|
||||
### Response object
|
||||
|
||||
The Response object obtained by requests is stored in page.response and can be used directly. Such as:
|
||||
|
||||
```python
|
||||
print(page.response.status_code)
|
||||
print(page.response.headers)
|
||||
```
|
||||
|
||||
|
||||
@ -791,7 +859,7 @@ page.download(url, save_path,'img','rename', show_msg=True)
|
||||
|
||||
|
||||
|
||||
## Chrome Quick Settings
|
||||
## Chrome Settings
|
||||
|
||||
The configuration of chrome is very cumbersome. In order to simplify the use, this library provides setting methods for common configurations.
|
||||
|
||||
@ -821,21 +889,60 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use
|
||||
### Instructions
|
||||
|
||||
```python
|
||||
do = DriverOptions(read_file=False) # Create chrome configuration object, do not read from ini file
|
||||
do = DriverOptions() # read the default ini file to create a DriverOptions object
|
||||
do = DriverOptions('D:\\settings.ini') # read the specified ini file to create a DriverOptions object
|
||||
do = DriverOptions(read_file=False) # Do not read the ini file, create an empty DriverOptions object
|
||||
|
||||
do.set_headless(False) # show the browser interface
|
||||
do.set_no_imgs(True) # Do not load pictures
|
||||
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path
|
||||
do.set_headless(False).set_no_imgs(True) # Support chain operation
|
||||
|
||||
drission = Drission(driver_options=do) # Create Drission object with configuration object
|
||||
page = MixPage(drission) # Create MixPage object with Drission object
|
||||
page = MixPage(driver_options=do) # Create MixPage object with configuration object
|
||||
|
||||
do.save() # Save the currently opened ini file
|
||||
do.save() # save the currently opened ini file
|
||||
do.save('D:\\settings.ini') # save to the specified ini file
|
||||
do.save('default') # Save the current settings to the default ini file
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Session Settings
|
||||
|
||||
### SessionOPtions Object
|
||||
|
||||
The SessionOptions object is used to manage the configuration information of the Session. It reads the default ini file configuration information by default when it is created, or you can manually set the required information.
|
||||
|
||||
Configurable properties:
|
||||
|
||||
headers, cookies, auth, proxies, hooks, params, verify, cert, adapters, stream, trust_env, max_redirects.
|
||||
|
||||
**Tips:** cookies can receive information in dict, list, tuple, str, RequestsCookieJar and other formats.
|
||||
|
||||
|
||||
|
||||
### Instructions
|
||||
|
||||
```python
|
||||
so = SessionOptions() # read the default ini file to create a SessionOptions object
|
||||
so = SessionOptions('D:\\settings.ini') # read the specified ini file to create a SessionOptions object
|
||||
so = SessionOptions(read_file=False) # Do not read the ini file, create an empty SessionOptions object
|
||||
|
||||
so.cookies = ['key1=val1; domain=xxxx','key2=val2; domain=xxxx'] # set cookies
|
||||
so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'}
|
||||
so.set_a_header('Connection','keep-alive')
|
||||
|
||||
drission = Drission(session_options=so) # Create Drission object with configuration object
|
||||
page = MixPage(session_options=so) # Create MixPage object with configuration object
|
||||
|
||||
so.save() # Save the currently opened ini file
|
||||
so.save('D:\\settings.ini') # save to the specified ini file
|
||||
so.save('default') # Save the current settings to the default ini file
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Save configuration
|
||||
|
||||
Because there are many configurations of chrome and headers, an ini file is set up specifically to save common configurations. You can use the OptionsManager object to get and save the configuration, and use the DriverOptions object to modify the chrome configuration. You can also save multiple ini files and call them according to different projects.
|
||||
@ -851,7 +958,7 @@ The ini file has three parts by default: paths, chrome_options, and session_opti
|
||||
; chromedriver.exe path
|
||||
chromedriver_path =
|
||||
; Temporary folder path, used to save screenshots, file downloads, etc.
|
||||
global_tmp_path =
|
||||
tmp_path =
|
||||
|
||||
[chrome_options]
|
||||
; The address and port of the opened browser, such as 127.0.0.1:9222
|
||||
@ -937,17 +1044,20 @@ drission = Drission(ini_path='D:\\settings.ini') # Use the specified ini file to
|
||||
|
||||
## easy_set method
|
||||
|
||||
The methods of frequently used settings can be quickly modified. Calling the easy_set method will modify the content of the default ini file.
|
||||
Methods to quickly modify common settings. All for driver mode settings. Calling the easy_set method will modify the content of the default ini file.
|
||||
|
||||
```python
|
||||
set_headless(True) # Turn on headless mode
|
||||
set_no_imgs(True) # Turn on no image mode
|
||||
set_no_js(True) # Disable JS
|
||||
set_mute(True) # Turn on mute mode
|
||||
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent
|
||||
set_proxy('127.0.0.1:8888') # set proxy
|
||||
set_paths(paths) # See [Initialization] section
|
||||
set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF- 8'), the value is bool, which means switch; otherwise, the value is str. When the value is'' or False, delete the attribute item
|
||||
get_match_driver() # Identify the chrome version and automatically download the matching chromedriver.exe
|
||||
show_settings() # Print all settings
|
||||
set_headless(True) # Turn on headless mode
|
||||
set_no_imgs(True) # Turn on no image mode
|
||||
set_no_js(True) # Disable JS
|
||||
set_mute(True) # Turn on mute mode
|
||||
set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent
|
||||
set_proxy('127.0.0.1:8888') # set proxy
|
||||
set_paths(paths) # See [Initialization] section
|
||||
set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF-8'), the value is bool to indicate the switch; otherwise, the value is str. When the value is'' or False, delete the attribute item
|
||||
check_driver_version() # Check if chrome and chromedriver versions match
|
||||
```
|
||||
|
||||
# POM mode
|
||||
@ -1056,7 +1166,7 @@ The Drission class is used to manage WebDriver objects and Session objects, and
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- driver_or_options: [WebDriver, dict, Options] - WebDriver object or chrome configuration parameters.
|
||||
- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver object or chrome configuration parameters.
|
||||
- session_or_options: [Session, dict] - Session object configuration parameters
|
||||
- ini_path: str - ini file path, the default is the ini file under the DrissionPage folder
|
||||
- proxy: dict - proxy settings
|
||||
@ -1118,8 +1228,22 @@ Copy the cookies of the driver object to the session object.
|
||||
Parameter Description:
|
||||
|
||||
- copy_user_agent: bool - whether to copy user_agent to session
|
||||
- driver: WebDriver- Copy the WebDriver object of cookies
|
||||
- session: Session- Session object that receives cookies
|
||||
|
||||
Returns: None
|
||||
|
||||
|
||||
|
||||
### set_cookies()
|
||||
|
||||
Set cookies.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- cookies: Union[RequestsCookieJar, list, tuple, str, dict]-cookies information, can be CookieJar, list, tuple, str, dict
|
||||
|
||||
- set_session: bool-whether to set session cookies
|
||||
|
||||
- set_driver: bool-whether to set driver cookies
|
||||
|
||||
Returns: None
|
||||
|
||||
@ -1132,8 +1256,6 @@ Copy cookies from session to driver.
|
||||
Parameter Description:
|
||||
|
||||
- url: str - the domain of cookies
|
||||
- driver: WebDriver- WebDriver object that receives cookies
|
||||
- session: Session- Copy the Session object of cookies
|
||||
|
||||
Returns: None
|
||||
|
||||
@ -1272,6 +1394,32 @@ Returns: bool
|
||||
|
||||
|
||||
|
||||
### set_cookies()
|
||||
|
||||
Set cookies.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies information, can be CookieJar, list, tuple, str, dict
|
||||
|
||||
Returns: None
|
||||
|
||||
|
||||
|
||||
### get_cookies()
|
||||
|
||||
Return cookies.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- as_dict: bool - Whether to return as dict, the default is to return complete cookies as list
|
||||
|
||||
- all_domains: bool - whether to return cookies of all domains, only valid in s mode
|
||||
|
||||
Returns: a dictionary or list of cookies
|
||||
|
||||
|
||||
|
||||
### change_mode()
|
||||
|
||||
Switch mode,'d' or's'. When switching, the cookies of the current mode will be copied to the target mode.
|
||||
@ -1521,11 +1669,11 @@ Returns: None
|
||||
|
||||
### close_other_tabs()
|
||||
|
||||
Close tab pages other than the incoming tab page, and keep the current page by default.
|
||||
Close tab pages other than the incoming tab page, and keep the current page by default. You can pass in a list or tuple.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- num_or_handle:[int, str] - The serial number or handle of the tab to keep, the first serial number is 0, and the last is - 1
|
||||
- num_or_handles:[int, str]-The serial number or handle of the tab to keep, you can pass in a list or tuple of handles
|
||||
|
||||
Returns: None
|
||||
|
||||
@ -2596,6 +2744,161 @@ Return: OptionsManager - return to yourself
|
||||
|
||||
|
||||
|
||||
## SessionOptions class
|
||||
|
||||
### class SessionOptions()
|
||||
|
||||
Session object configuration class.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
-read_file: bool-whether to read configuration information from ini file when creating
|
||||
-ini_path: str-the path of the ini file, if it is None, the default ini file will be read
|
||||
|
||||
|
||||
|
||||
### headers
|
||||
|
||||
headers configuration information.
|
||||
|
||||
Returns: dict
|
||||
|
||||
|
||||
|
||||
### cookies
|
||||
|
||||
Cookies configuration information.
|
||||
|
||||
Returns: list
|
||||
|
||||
|
||||
|
||||
### auth
|
||||
|
||||
auth configuration information.
|
||||
|
||||
Returns: tuple
|
||||
|
||||
|
||||
|
||||
### proxies
|
||||
|
||||
proxies configuration information.
|
||||
|
||||
Returns: dict
|
||||
|
||||
|
||||
|
||||
### hooks
|
||||
|
||||
hooks configuration information.
|
||||
|
||||
Returns: dict
|
||||
|
||||
|
||||
|
||||
### params
|
||||
|
||||
params configuration information.
|
||||
|
||||
Returns: dict
|
||||
|
||||
|
||||
|
||||
### verify
|
||||
|
||||
Verify configuration information.
|
||||
|
||||
Returns: bool
|
||||
|
||||
|
||||
|
||||
### cert
|
||||
|
||||
cert configuration information.
|
||||
|
||||
Returns: [str, tuple]
|
||||
|
||||
|
||||
|
||||
### adapters
|
||||
|
||||
Adapters configuration information.
|
||||
|
||||
Returns: adapters
|
||||
|
||||
|
||||
|
||||
### stream
|
||||
|
||||
stream configuration information.
|
||||
|
||||
Returns: bool
|
||||
|
||||
|
||||
|
||||
### trust_env
|
||||
|
||||
srust_env configuration information.
|
||||
|
||||
Returns: bool
|
||||
|
||||
|
||||
|
||||
### max_redirects
|
||||
|
||||
max_redirect configuration information.
|
||||
|
||||
Returns: int
|
||||
|
||||
|
||||
|
||||
### set_a_header()
|
||||
|
||||
Set an item in headers.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- attr: str-configuration item name
|
||||
|
||||
- value: str-configured value
|
||||
|
||||
Returns: the current object
|
||||
|
||||
|
||||
|
||||
### remove_a_header()
|
||||
|
||||
Remove a setting from headers.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- attr: str-the name of the configuration to be deleted
|
||||
|
||||
Returns: current object
|
||||
|
||||
|
||||
|
||||
### save()
|
||||
|
||||
Save the settings to a file.
|
||||
|
||||
Parameter Description:
|
||||
|
||||
- path: str-the path of the ini file, pass in'default' and save to the default ini file
|
||||
|
||||
Returns: current object
|
||||
|
||||
|
||||
|
||||
### as_dict()
|
||||
|
||||
Return the current object as a dictionary.
|
||||
|
||||
Returns: dict
|
||||
|
||||
|
||||
|
||||
## DriverOptions class
|
||||
|
||||
### class DriverOptions()
|
||||
@ -2815,7 +3118,7 @@ Parameter Description:
|
||||
|
||||
- download_path: str-download file path
|
||||
|
||||
- global_tmp_path: str-Temporary folder path
|
||||
- tmp_path: str-Temporary folder path
|
||||
|
||||
- user_data_path: str-user data path
|
||||
|
||||
|
396
README.zh-cn.md
396
README.zh-cn.md
@ -6,7 +6,7 @@ DrissionPage,即 driver 和 session 的合体。
|
||||
是个基于 python 的 Web 自动化操作集成工具。
|
||||
它实现了 selenium 和 requests 之间的无缝切换。
|
||||
因此可以兼顾 selenium 的便利性和 requests 的高效率。
|
||||
它集成了页面常用功能,两种模式系统一致的 API,使用便捷。
|
||||
它集成了页面常用功能,两种模式系统一致的 API,使用便捷。
|
||||
它用 POM 模式封装了页面元素常用的方法,很适合自动化操作功能扩展。
|
||||
更棒的是,它的使用方式非常简洁和人性化,代码量少,对新手友好。
|
||||
|
||||
@ -17,7 +17,9 @@ DrissionPage,即 driver 和 session 的合体。
|
||||
|
||||
**示例地址:** [使用DrissionPage的网页自动化及爬虫示例](https://gitee.com/g1879/DrissionPage-demos)
|
||||
|
||||
**联系邮箱:** g1879@qq.com
|
||||
**联系邮箱:** g1879@qq.com
|
||||
|
||||
**交流QQ群:** 897838127
|
||||
|
||||
# 理念及背景
|
||||
|
||||
@ -330,9 +332,9 @@ from DrissionPage import MixPage
|
||||
|
||||
配置路径有四种方法:
|
||||
- 使用 easy_set 工具的 get_match_driver() 方法(推荐)
|
||||
- 将路径写入本库的ini文件
|
||||
- 将路径写入本库的 ini 文件
|
||||
- 将两个路径写入系统变量
|
||||
- 使用时手动传入路径
|
||||
- 在代码中填写路径
|
||||
|
||||
### 使用 get_match_driver() 方法
|
||||
|
||||
@ -407,7 +409,7 @@ Message: session not created: Chrome version must be between 70 and 73
|
||||
```python
|
||||
debugger_address # 调试浏览器地址,如:127.0.0.1:9222
|
||||
download_path # 下载文件路径
|
||||
global_tmp_path # 临时文件夹路径
|
||||
tmp_path # 临时文件夹路径
|
||||
user_data_path # 用户数据路径
|
||||
cache_path # 缓存路径
|
||||
```
|
||||
@ -420,6 +422,12 @@ Tips:
|
||||
|
||||
|
||||
|
||||
### 其它方法
|
||||
|
||||
若你不想使用 ini 文件(如要打包项目时),可在系统路径写入以上两个路径,或在程序中填写。后者的使用方法见下一节。
|
||||
|
||||
|
||||
|
||||
## 创建驱动器对象 Drission
|
||||
|
||||
创建的步骤不是必须,若想快速上手,可跳过本节。MixPage 会自动创建该对象。
|
||||
@ -453,16 +461,21 @@ do.set_paths(chrome_path='D:\\chrome\\chrome.exe',
|
||||
# 用于 s 模式的设置
|
||||
session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}}
|
||||
|
||||
# 代理设置,可选
|
||||
proxy = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'}
|
||||
|
||||
# 传入配置,driver_options 和 session_options 都是可选的,须要使用对应模式才须要传入
|
||||
drission = Drission(driver_options, session_options)
|
||||
drission = Drission(driver_options, session_options, proxy=proxy)
|
||||
```
|
||||
|
||||
DriverOptions 和 SessionOptions 用法详见下文。
|
||||
|
||||
|
||||
|
||||
## 使用页面对象 MixPage
|
||||
|
||||
MixPage 页面对象封装了常用的网页操作,并实现 driver 和 session 模式之间的切换。
|
||||
MixPage 须接收一个 Drission 对象并使用其中的 driver 或 session,如没有传入,MixPage 会自己创建一个(使用默认 ini 文件的配置)。
|
||||
MixPage 须控制一个 Drission 对象并使用其中的 driver 或 session,如没有传入,MixPage 会自己创建一个(使用传入的配置信息或从默认 ini 文件读取)。
|
||||
|
||||
Tips: 多对象协同工作时,可将一个 MixPage 中的 Drission 对象传递给另一个,使多个对象共享登录信息或操作同一个页面。
|
||||
|
||||
@ -487,8 +500,6 @@ page = MixPage(driver_options=do, session_options=so) # 默认 d 模式
|
||||
|
||||
### 访问网页
|
||||
|
||||
若连接出错,程序会自动重试2次,可指定重试次数和等待间隔。
|
||||
|
||||
```python
|
||||
# 默认方式
|
||||
page.get(url)
|
||||
@ -498,6 +509,8 @@ page.post(url, data, **kwargs) # 只有 session 模式才有 post 方法
|
||||
page.get(url, retry=5, interval=0.5)
|
||||
```
|
||||
|
||||
Tips:若连接出错,程序会自动重试2次,可指定重试次数和等待间隔。
|
||||
|
||||
|
||||
|
||||
### 切换模式
|
||||
@ -508,6 +521,8 @@ page.get(url, retry=5, interval=0.5)
|
||||
page.change_mode(go=False) # go 为 False 表示不跳转 url
|
||||
```
|
||||
|
||||
Tips:使用某种模式独有的方法时会自动跳转到该模式。
|
||||
|
||||
|
||||
|
||||
### 页面属性
|
||||
@ -536,7 +551,9 @@ page.current_tab_handle # 返回当前标签页 handle
|
||||
调用只属于 d 模式的方法,会自动切换到 d 模式。详细用法见 APIs。
|
||||
|
||||
```python
|
||||
page.change_mode() # 切换模式
|
||||
page.set_cookies() # 设置cookies
|
||||
page.get_cookies() # 获取 cookies,可以 list 或 dict 方式返回
|
||||
page.change_mode() # 切换模式,会自动复制 cookies
|
||||
page.cookies_to_session() # 从 WebDriver 对象复制 cookies 到 Session 对象
|
||||
page.cookies_to_driver() # 从 Session 对象复制 cookies 到 WebDriver 对象
|
||||
page.get(url, retry, interval, **kwargs) # 用 get 方式访问网页,可指定重试次数及间隔时间
|
||||
@ -555,10 +572,10 @@ page.run_script(js, *args) # 运行 js 语句
|
||||
page.create_tab(url) # 新建并定位到一个标签页,该标签页在最后面
|
||||
page.to_tab(num_or_handle) # 跳转到标签页
|
||||
page.close_current_tab() # 关闭当前标签页
|
||||
page.close_other_tabs(num) # 关闭其它标签页
|
||||
page.close_other_tabs(num_or_handles) # 关闭其它标签页
|
||||
page.to_iframe(iframe) # 切入 iframe
|
||||
page.screenshot(path) # 页面截图
|
||||
page.scrool_to_see(element) # 滚动直到某元素可见
|
||||
page.scroll_to_see(element) # 滚动直到某元素可见
|
||||
page.scroll_to(mode, pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'
|
||||
page.refresh() # 刷新当前页面
|
||||
page.back() # 浏览器后退
|
||||
@ -579,9 +596,10 @@ page.eles() 和 element.eles() 查找返回符合条件的所有元素列表。
|
||||
|
||||
说明:
|
||||
|
||||
- 元素查找超时默认为10秒,你也可以按需要设置。
|
||||
- 元素查找超时默认为10秒,超时或找到元素时停止等待,你也可以按需要设置。
|
||||
- 下面的查找语句中,冒号 : 表示模糊匹配,等号 = 表示精确匹配
|
||||
- 查询字符串有 @属性名、tag、text、xpath、css 五种
|
||||
- 可用查询字符串或 selenium 原生的 loc 元组(s 模式也能用)查找元素
|
||||
- 查询字符串有 @属性名、tag、text、xpath、css、.、# 等7种方法
|
||||
|
||||
```python
|
||||
# 根据属性查找,@ 后面可跟任意属性
|
||||
@ -590,6 +608,12 @@ page.eles('@class') # 查找所有拥有 class 属性的元素
|
||||
page.eles('@class:class_name') # 查找所有 class 含有 ele_class 的元素
|
||||
page.eles('@class=class_name') # 查找所有 class 等于 ele_class 的元素
|
||||
|
||||
# 根据 class 或 id 查找
|
||||
page.ele('#ele_id') # 等价于 page.ele('@id=ele_id')
|
||||
page.ele('#:ele_id') # 等价于 page.ele('@id:ele_id')
|
||||
page.ele('.ele_class') # 等价于 page.ele('@class=ele_class')
|
||||
page.ele('.:ele_class') # 等价于 page.ele('@class:ele_class')
|
||||
|
||||
# 根据 tag name 查找
|
||||
page.ele('tag:li') # 查找第一个 li 元素
|
||||
page.eles('tag:li') # 查找所有 li 元素
|
||||
@ -603,7 +627,7 @@ page.ele('tag:div@text()=search_text') # 查找文本等于 search_text 的 div
|
||||
|
||||
# 根据文本内容查找
|
||||
page.ele('search text') # 查找包含传入文本的元素
|
||||
page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则在前面加上 text: 避免冲突
|
||||
page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则应在前加上 text: 避免冲突
|
||||
page.eles('text=search text') # 文本等于 search_text 的元素
|
||||
|
||||
# 根据 xpath 或 css selector 查找
|
||||
@ -626,7 +650,7 @@ element.parent # 父元素
|
||||
element.next # 下一个兄弟元素
|
||||
element.prev # 上一个兄弟元素
|
||||
|
||||
# 获取 shadow-dom,只支持 open 的 shadow-root
|
||||
# 获取 shadow-root,把它作为元素对待。只支持 open 的 shadow-root
|
||||
ele1 = element.shadow_root.ele('tag:div')
|
||||
|
||||
# 串连查找
|
||||
@ -732,11 +756,11 @@ shadow_root_element.is_valid() # 返回元素是否还在 dom 内
|
||||
|
||||
|
||||
|
||||
## 与 selenium 代码对接
|
||||
## 与 selenium 及 requests 代码对接
|
||||
|
||||
DrissionPage 代码可与 selenium 代码无缝拼接,既可直接使用 selenium 的 WebDriver 对象,也可到处自身的 WebDriver 给 selenium 代码使用。使已有项目的迁移非常方便。
|
||||
DrissionPage 代码可与 selenium 及 requests 代码无缝拼接。既可直接使用 selenium 的 WebDriver 对象,也可导出自身的 WebDriver 给 selenium 代码使用。requests 的 Session 对象也可直接传递。使已有项目的迁移非常方便。
|
||||
|
||||
### selenium 转 DrissionPage
|
||||
### selenium 转 DrissionPage
|
||||
|
||||
```python
|
||||
driver = webdriver.Chrome()
|
||||
@ -746,9 +770,7 @@ page = MixPage(Drission(driver)) # 把 driver 传递给 Drission,创建 MixPa
|
||||
print(page.title) # 打印结果:百度一下,你就知道
|
||||
```
|
||||
|
||||
|
||||
|
||||
### DrissionPage 转 selenium
|
||||
### DrissionPage 转 selenium
|
||||
|
||||
```python
|
||||
page = MixPage()
|
||||
@ -756,6 +778,57 @@ page.get('https://www.baidu.com')
|
||||
|
||||
driver = page.driver # 从 MixPage 对象中获取 WebDriver 对象
|
||||
print(driver.title) # 打印结果:百度一下,你就知道
|
||||
element = driver.find_element_by_xpath('//div') # 使用 selenium 原生功能
|
||||
```
|
||||
|
||||
### requests 转 DrissionPage
|
||||
|
||||
``` python
|
||||
session = requets.Session()
|
||||
drission = Drission(session_or_options=session)
|
||||
page = MixPage(drission, mode='s')
|
||||
|
||||
page.get('https://www.baidu.com')
|
||||
```
|
||||
|
||||
### DrissionPage 转 requests
|
||||
|
||||
```python
|
||||
page = MixPage('s')
|
||||
session = page.session
|
||||
|
||||
response = session.get('https://www.baidu.com')
|
||||
```
|
||||
|
||||
|
||||
|
||||
## requests 功能使用
|
||||
|
||||
### 连接参数
|
||||
|
||||
除了在创建时传入配置信息及连接参数,如有必要,s 模式下也可在每次访问网址时设置连接参数。
|
||||
|
||||
```python
|
||||
headers = {'User-Agent': '......', }
|
||||
cookies = {'name': 'value', }
|
||||
proxies = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'}
|
||||
page.get(url, headers=headers, cookies=cookies, proxies=proxies)
|
||||
```
|
||||
|
||||
Tips:
|
||||
|
||||
- 如果连接参数内没有指定,s 模式会根据当前域名自动填写 Host 和 Referer 属性
|
||||
- 在创建 MixPage 时传入的 Session 配置是全局有效的
|
||||
|
||||
|
||||
|
||||
### Response 对象
|
||||
|
||||
requests 获取到的 Response 对象存放在 page.response,可直接使用。如:
|
||||
|
||||
```python
|
||||
print(page.response.status_code)
|
||||
print(page.response.headers)
|
||||
```
|
||||
|
||||
|
||||
@ -789,7 +862,7 @@ page.download(url, save_path, 'img', 'rename', show_msg=True)
|
||||
|
||||
|
||||
|
||||
## Chrome 快捷设置
|
||||
## Chrome 设置
|
||||
|
||||
chrome 的配置很繁琐,为简化使用,本库提供了常用配置的设置方法。
|
||||
|
||||
@ -819,21 +892,62 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use
|
||||
### 使用方法
|
||||
|
||||
```python
|
||||
do = DriverOptions(read_file=False) # 创建chrome配置对象,不从 ini 文件读取
|
||||
do = DriverOptions() # 读取默认 ini 文件创建 DriverOptions 对象
|
||||
do = DriverOptions('D:\\settings.ini') # 读取指定 ini 文件创建 DriverOptions 对象
|
||||
do = DriverOptions(read_file=False) # 不读取 ini 文件,创建空的 DriverOptions 对象
|
||||
|
||||
do.set_headless(False) # 显示浏览器界面
|
||||
do.set_no_imgs(True) # 不加载图片
|
||||
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # 设置路径
|
||||
do.set_headless(False).set_no_imgs(True) # 支持链式操作
|
||||
|
||||
drission = Drission(driver_options=do) # 用配置对象创建 Drission 对象
|
||||
page = MixPage(drission) # 用Drission对象创建 MixPage 对象
|
||||
page = MixPage(driver_options=do) # 用配置对象创建 MixPage 对象
|
||||
|
||||
do.save() # 保存当前打开的 ini 文件
|
||||
do.save('D:\\settings.ini') # 保存到指定的 ini 文件
|
||||
do.save('default') # 保存当前设置到默认 ini 文件
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Session 设置
|
||||
|
||||
### SessionOPtions 对象
|
||||
|
||||
SessionOptions 对象用于管理 Session 的配置信息。它创建时默认读取默认 ini 文件配置信息,也可手动设置所需信息。
|
||||
|
||||
可配置的属性:
|
||||
|
||||
headers、cookies、auth、proxies、hooks、params、verify、cert、adapters、stream、trust_env、max_redirects。
|
||||
|
||||
**Tips:** cookies 可接收 dict、list、tuple、str、RequestsCookieJar 等格式的信息。
|
||||
|
||||
|
||||
|
||||
### 使用方法
|
||||
|
||||
```python
|
||||
so = SessionOptions() # 读取默认 ini 文件创建 SessionOptions 对象
|
||||
so = SessionOptions('D:\\settings.ini') # 读取指定 ini 文件创建 SessionOptions 对象
|
||||
so = SessionOptions(read_file=False) # 不读取 ini 文件,创建空的 SessionOptions 对象
|
||||
|
||||
so.cookies = ['key1=val1; domain=xxxx', 'key2=val2; domain=xxxx'] # 设置 cookies
|
||||
so.headers = {'User-Agent': 'xxxx', 'Accept-Charset': 'xxxx'}
|
||||
so.set_a_header('Connection', 'keep-alive')
|
||||
|
||||
drission = Drission(session_options=so) # 用配置对象创建 Drission 对象
|
||||
page = MixPage(session_options=so) # 用配置对象创建 MixPage 对象
|
||||
|
||||
so.save() # 保存当前打开的 ini 文件
|
||||
so.save('D:\\settings.ini') # 保存到指定的 ini 文件
|
||||
so.save('default') # 保存当前设置到默认 ini 文件
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## 保存配置
|
||||
|
||||
因 chrome 和 headers 配置繁多,故设置一个 ini 文件专门用于保存常用配置,你可使用 OptionsManager 对象获取和保存配置,用 DriverOptions 对象修改 chrome 配置。你也可以保存多个 ini 文件,按不同项目须要调用。
|
||||
@ -849,7 +963,7 @@ ini 文件默认拥有三部分配置:paths、chrome_options、session_options
|
||||
; chromedriver.exe路径
|
||||
chromedriver_path =
|
||||
; 临时文件夹路径,用于保存截图、文件下载等
|
||||
global_tmp_path =
|
||||
tmp_path =
|
||||
|
||||
[chrome_options]
|
||||
; 已打开的浏览器地址和端口,如127.0.0.1:9222
|
||||
@ -933,9 +1047,11 @@ drission = Drission(ini_path='D:\\settings.ini') # 使用指定 ini 文件创
|
||||
|
||||
## easy_set 方法
|
||||
|
||||
可快速地修改常用设置的方法,调用 easy_set 方法会修改默认 ini 文件相关内容。
|
||||
可快速地修改常用设置的方法。全部用于 driver 模式的设置。调用 easy_set 方法会修改默认 ini 文件相关内容。
|
||||
|
||||
```python
|
||||
get_match_driver() # 识别chrome版本并自动下载匹配的chromedriver.exe
|
||||
show_settings() # 打印所有设置
|
||||
set_headless(True) # 开启 headless 模式
|
||||
set_no_imgs(True) # 开启无图模式
|
||||
set_no_js(True) # 禁用 JS
|
||||
@ -944,6 +1060,7 @@ set_user_agent('Mozilla/5.0 (Macintosh; Int......') # 设置 user agent
|
||||
set_proxy('127.0.0.1:8888') # 设置代理
|
||||
set_paths(paths) # 见 [初始化] 一节
|
||||
set_argument(arg, value) # 设置属性,若属性无值(如'zh_CN.UTF-8'),value 为 bool 表示开关;否则value为str,当 value为''或 False,删除该属性项
|
||||
check_driver_version() # 检查chrome和chromedriver版本是否匹配
|
||||
```
|
||||
|
||||
# POM 模式
|
||||
@ -1052,10 +1169,10 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
|
||||
|
||||
参数说明:
|
||||
|
||||
- driver_or_options: [WebDriver, dict, Options] - WebDriver 对象或 chrome 配置参数。
|
||||
- session_or_options: [Session, dict] - Session 对象配置参数
|
||||
- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件
|
||||
- proxy: dict - 代理设置
|
||||
- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver 对象或 chrome 配置参数。
|
||||
- session_or_options: [Session, dict] - Session 对象配置参数
|
||||
- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件
|
||||
- proxy: dict - 代理设置
|
||||
|
||||
|
||||
|
||||
@ -1107,6 +1224,20 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
|
||||
|
||||
|
||||
|
||||
### set_cookies()
|
||||
|
||||
设置 cookies。
|
||||
|
||||
参数说明:
|
||||
|
||||
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息,可为CookieJar, list, tuple, str, dict
|
||||
- set_session: bool - 是否设置 session 的 cookies
|
||||
- set_driver: bool - 是否设置 driver 的 cookies
|
||||
|
||||
返回: None
|
||||
|
||||
|
||||
|
||||
### cookies_to_session()
|
||||
|
||||
把 driver 对象的 cookies 复制到 session 对象。
|
||||
@ -1114,8 +1245,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
|
||||
参数说明:
|
||||
|
||||
- copy_user_agent: bool - 是否复制 user_agent 到 session
|
||||
- driver: WebDriver - 复制 cookies 的 WebDriver 对象
|
||||
- session: Session - 接收 cookies 的 Session 对象
|
||||
|
||||
返回: None
|
||||
|
||||
@ -1128,8 +1257,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角
|
||||
参数说明:
|
||||
|
||||
- url: str - cookies 的域
|
||||
- driver: WebDriver - 接收 cookies 的 WebDriver 对象
|
||||
- session: Session - 复制 cookies 的 Session 对象
|
||||
|
||||
返回: None
|
||||
|
||||
@ -1268,6 +1395,31 @@ MixPage 封装了页面操作的常用功能,可在 driver 和 session 模式
|
||||
|
||||
|
||||
|
||||
### set_cookies()
|
||||
|
||||
设置 cookies。
|
||||
|
||||
参数说明:
|
||||
|
||||
- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息,可为CookieJar, list, tuple, str, dict
|
||||
|
||||
返回: None
|
||||
|
||||
|
||||
|
||||
### get_cookies()
|
||||
|
||||
返回 cookies。
|
||||
|
||||
参数说明:
|
||||
|
||||
- as_dict: bool - 是否以 dict 方式返回,默认以 list 返回完整的 cookies
|
||||
- all_domains: bool - 是否返回所有域名的 cookies,只有 s 模式下生效
|
||||
|
||||
返回:cookies 字典或列表
|
||||
|
||||
|
||||
|
||||
### change_mode()
|
||||
|
||||
切换模式,'d' 或 's'。切换时会把当前模式的 cookies 复制到目标模式。
|
||||
@ -1517,11 +1669,11 @@ d 模式时检查网页是否符合预期。默认由 response 状态检查,
|
||||
|
||||
### close_other_tabs()
|
||||
|
||||
关闭传入的标签页以外标签页,默认保留当前页。
|
||||
关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组。
|
||||
|
||||
参数说明:
|
||||
|
||||
- num_or_handle:[int, str] - 要保留的标签页序号或 handle,序号第一个为0,最后为-1
|
||||
- num_or_handles:[int, str] - 要保留的标签页序号或 handle,可传入 handle 组成的列表或元组
|
||||
|
||||
返回: None
|
||||
|
||||
@ -2554,6 +2706,160 @@ shadow-root 所依赖的父元素。
|
||||
|
||||
|
||||
|
||||
## SessionOptions 类
|
||||
|
||||
### class SessionOptions()
|
||||
|
||||
Session 对象配置类。
|
||||
|
||||
参数说明:
|
||||
|
||||
- read_file: bool - 创建时是否从 ini 文件读取配置信息
|
||||
- ini_path: str - ini 文件路径,为None则读取默认 ini 文件
|
||||
|
||||
|
||||
|
||||
### headers
|
||||
|
||||
headers 配置信息。
|
||||
|
||||
返回: dict
|
||||
|
||||
|
||||
|
||||
### cookies
|
||||
|
||||
cookies 配置信息。
|
||||
|
||||
返回: list
|
||||
|
||||
|
||||
|
||||
### auth
|
||||
|
||||
auth 配置信息。
|
||||
|
||||
返回: tuple
|
||||
|
||||
|
||||
|
||||
### proxies
|
||||
|
||||
proxies 配置信息。
|
||||
|
||||
返回: dict
|
||||
|
||||
|
||||
|
||||
### hooks
|
||||
|
||||
hooks 配置信息。
|
||||
|
||||
返回: dict
|
||||
|
||||
|
||||
|
||||
### params
|
||||
|
||||
params 配置信息。
|
||||
|
||||
返回: dict
|
||||
|
||||
|
||||
|
||||
### verify
|
||||
|
||||
verify 配置信息。
|
||||
|
||||
返回: bool
|
||||
|
||||
|
||||
|
||||
### cert
|
||||
|
||||
cert 配置信息。
|
||||
|
||||
返回: [str, tuple]
|
||||
|
||||
|
||||
|
||||
### adapters
|
||||
|
||||
adapters 配置信息。
|
||||
|
||||
返回: adapters
|
||||
|
||||
|
||||
|
||||
### stream
|
||||
|
||||
stream 配置信息。
|
||||
|
||||
返回: bool
|
||||
|
||||
|
||||
|
||||
### trust_env
|
||||
|
||||
srust_env 配置信息。
|
||||
|
||||
返回: bool
|
||||
|
||||
|
||||
|
||||
### max_redirects
|
||||
|
||||
max_redirect 配置信息。
|
||||
|
||||
返回: int
|
||||
|
||||
|
||||
|
||||
### set_a_header()
|
||||
|
||||
设置 headers 中一个项。
|
||||
|
||||
参数说明:
|
||||
|
||||
- attr: str - 配置项名称
|
||||
- value: str - 配置的值
|
||||
|
||||
返回: 当前对象
|
||||
|
||||
|
||||
|
||||
### remove_a_header()
|
||||
|
||||
从 headers 中删除一个设置。
|
||||
|
||||
参数说明:
|
||||
|
||||
- attr: str - 要删除的配置名称
|
||||
|
||||
返回:当前对象
|
||||
|
||||
|
||||
|
||||
### save()
|
||||
|
||||
保存设置到文件。
|
||||
|
||||
参数说明:
|
||||
|
||||
- path: str - ini文件的路径,传入 'default' 保存到默认ini文件
|
||||
|
||||
返回:当前对象
|
||||
|
||||
|
||||
|
||||
### as_dict()
|
||||
|
||||
以字典形式返回当前对象。
|
||||
|
||||
返回: dict
|
||||
|
||||
|
||||
|
||||
## DriverOptions 类
|
||||
|
||||
### class DriverOptions()
|
||||
@ -2764,15 +3070,15 @@ chrome 配置太复杂,所以把常用的配置写成简单的方法,调用
|
||||
|
||||
参数说明:
|
||||
|
||||
- driver_path: str - chromedriver.exe 路径
|
||||
- chrome_path: str - chrome.exe 路径
|
||||
- driver_path: str - chromedriver.exe 路径
|
||||
- chrome_path: str - chrome.exe 路径
|
||||
- debugger_address: str - 调试浏览器地址,例:127.0.0.1:9222
|
||||
- download_path: str - 下载文件路径
|
||||
- global_tmp_path: str - 临时文件夹路径
|
||||
- user_data_path: str - 用户数据路径
|
||||
- cache_path: str - 缓存路径
|
||||
- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件
|
||||
- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配
|
||||
- download_path: str - 下载文件路径
|
||||
- tmp_path: str - 临时文件夹路径
|
||||
- user_data_path: str - 用户数据路径
|
||||
- cache_path: str - 缓存路径
|
||||
- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件
|
||||
- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配
|
||||
|
||||
返回: None
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user