diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index f76ac4a..d5f2850 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -12,5 +12,7 @@ from .web_page import WebPage from .chromium_page import ChromiumPage from .session_page import SessionPage from .drission import Drission -from .config import DriverOptions, SessionOptions +from .configs.driver_options import DriverOptions +from .configs.chromium_options import ChromiumOptions +from .configs.session_options import SessionOptions from .action_chains import ActionChains diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 4f6bd60..2f3856d 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -10,8 +10,7 @@ from requests import Session from .base import BasePage from .chromium_element import ChromiumElementWaiter, ChromiumScroll, ChromiumElement, run_js, make_chromium_ele -from .common import get_loc, offset_scroll -from .config import cookies_to_tuple +from .common import get_loc, offset_scroll, cookies_to_tuple from .session_element import make_session_ele from .chromium_driver import ChromiumDriver diff --git a/DrissionPage/chromium_driver.py b/DrissionPage/chromium_driver.py index 6c02251..2c97fe5 100644 --- a/DrissionPage/chromium_driver.py +++ b/DrissionPage/chromium_driver.py @@ -174,7 +174,7 @@ class ChromiumDriver(object): timeout = kwargs.pop("_timeout", None) result = self._send({"method": _method, "params": kwargs}, timeout=timeout) if 'result' not in result and 'error' in result: - raise CallMethodException(f"调用方法:{_method} 错误:{result['error']['message']}") + raise CallMethodException(f"\n调用方法:{_method}\n参数:{kwargs}\n错误:{result['error']['message']}") return result['result'] diff --git a/DrissionPage/chromium_page.py b/DrissionPage/chromium_page.py index 501fe93..8e3184c 100644 --- a/DrissionPage/chromium_page.py +++ b/DrissionPage/chromium_page.py @@ -14,7 +14,7 @@ from .chromium_base import Timeout, ChromiumBase from .chromium_driver import ChromiumDriver from .chromium_tab import ChromiumTab from .common import connect_browser -from .config import DriverOptions +from .configs.driver_options import DriverOptions from .session_page import DownloadSetter @@ -418,6 +418,7 @@ class WindowSetter(object): def normal(self): """设置窗口为常规模式""" self._perform({'windowState': 'normal'}) + self._perform({'windowState': 'normal'}) def size(self, width=None, height=None): """设置窗口大小 \n @@ -446,7 +447,7 @@ class WindowSetter(object): def _get_info(self): """获取窗口位置及大小信息""" - return self.driver.Browser.getWindowBounds() + return self.driver.Browser.getWindowForTarget() def _perform(self, bounds): """执行改变窗口大小操作 diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 76c1d0c..acd76ac 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -4,17 +4,21 @@ @Contact : g1879@qq.com """ from html import unescape +from http.cookiejar import Cookie from pathlib import Path from platform import system from re import split, search, sub from shutil import rmtree from subprocess import Popen from time import perf_counter, sleep -from zipfile import ZipFile from urllib.parse import urlparse, urljoin, urlunparse -from requests import get as requests_get +from zipfile import ZipFile -from .config import DriverOptions +from requests import get as requests_get +from requests.cookies import RequestsCookieJar + +# from .configs.chromium_options import ChromiumOptions +from .configs.driver_options import DriverOptions def get_ele_txt(e): @@ -329,6 +333,61 @@ def format_html(text): return unescape(text).replace('\xa0', ' ') if text else text +def cookie_to_dict(cookie): + """把Cookie对象转为dict格式 \n + :param cookie: Cookie对象 + :return: cookie字典 + """ + if isinstance(cookie, Cookie): + cookie_dict = cookie.__dict__.copy() + cookie_dict.pop('rfc2109') + cookie_dict.pop('_rest') + return cookie_dict + + elif isinstance(cookie, dict): + cookie_dict = cookie + + elif isinstance(cookie, str): + cookie = cookie.split(',' if ',' in cookie else ';') + cookie_dict = {} + + for key, attr in enumerate(cookie): + attr_val = attr.lstrip().split('=') + + if key == 0: + cookie_dict['name'] = attr_val[0] + cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else '' + else: + cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else '' + + return cookie_dict + + else: + raise TypeError('cookie参数必须为Cookie、str或dict类型。') + + return cookie_dict + + +def cookies_to_tuple(cookies): + """把cookies转为tuple格式 \n + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :return: 返回tuple形式的cookies + """ + if isinstance(cookies, (list, tuple, RequestsCookieJar)): + cookies = tuple(cookie_to_dict(cookie) for cookie in cookies) + + elif isinstance(cookies, str): + cookies = tuple(cookie_to_dict(cookie.lstrip()) for cookie in cookies.split(";")) + + elif isinstance(cookies, dict): + cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) + + else: + raise TypeError('cookies参数必须为RequestsCookieJar、list、tuple、str或dict类型。') + + return cookies + + def clean_folder(folder_path, ignore=None): """清空一个文件夹,除了ignore里的文件和文件夹 \n :param folder_path: 要清空的文件夹路径 @@ -524,8 +583,8 @@ def connect_browser(option): chrome_path = get_exe_from_port(port) if chrome_path == 'chrome' and system_type == 'windows' else chrome_path return chrome_path, None - args = _get_launch_args(option) - _set_prefs(option) + args = get_launch_args(option) + set_prefs(option) # ----------创建浏览器进程---------- try: @@ -546,6 +605,71 @@ def connect_browser(option): return chrome_path, debugger +def get_launch_args(opt): + """从DriverOptions获取命令行启动参数""" + sys = system().lower() + result = [] + + # ----------处理arguments----------- + args = opt.arguments + for arg in args: + index = arg.find('=') + 1 + if index == 0: + result.append(arg) + else: + a = arg[index:].strip() + if a.startswith('"') and a.endswith('"'): + result.append(arg) + else: + result.append(f'{arg[:index]}"{a}"') + + # ----------处理插件extensions------------- + ext = opt._extension_files if isinstance(opt, DriverOptions) else opt.extensions + if ext: + ext = set(ext) + if sys == 'windows': + ext = '","'.join(ext) + ext = f'"{ext}"' + else: + ext = ','.join(ext) + ext = f'--load-extension={ext}' + result.append(ext) + + return result + + +def set_prefs(opt): + """处理启动配置中的prefs项,目前只能对已存在文件夹配置""" + # todo: 支持删除pref项 + prefs = opt.experimental_options.get('prefs', None) if isinstance(opt, DriverOptions) else opt.preferences + if prefs and opt.user_data_path: + args = opt.arguments + user = 'Default' + for arg in args: + if arg.startswith('--profile-directory'): + user = arg.split('=')[-1].strip() + break + + prefs_file = Path(opt.user_data_path) / user / 'Preferences' + if not prefs_file.exists(): + prefs_file.parent.mkdir(parents=True, exist_ok=True) + with open(prefs_file, 'w') as f: + f.write('{}') + + from json import load, dump + with open(prefs_file, "r", encoding='utf-8') as f: + j = load(f) + + for pref in prefs: + value = prefs[pref] + pref = pref.split('.') + _make_leave_in_dict(j, pref, 0, len(pref)) + _set_value_to_dict(j, pref, value) + + with open(prefs_file, 'w', encoding='utf-8') as f: + dump(j, f) + + def _run_browser(port, path: str, args) -> Popen: """创建chrome进程 \n :param port: 端口号 @@ -574,66 +698,6 @@ def _run_browser(port, path: str, args) -> Popen: raise ConnectionError('无法连接浏览器。') -def _get_launch_args(opt: DriverOptions) -> list: - """从DriverOptions获取命令行启动参数""" - sys = system().lower() - result = [] - - # ----------处理arguments----------- - args = opt.arguments - for arg in args: - if arg.startswith(('--user-data-dir', '--disk-cache-dir', '--user-agent')) and sys == 'windows': - index = arg.find('=') + 1 - result.append(f'{arg[:index]}"{arg[index:].strip()}"') - else: - result.append(arg) - - # ----------处理插件extensions------------- - ext = opt._extension_files - if ext: - ext = set(ext) - if sys == 'windows': - ext = '","'.join(ext) - ext = f'"{ext}"' - else: - ext = ','.join(ext) - ext = f'--load-extension={ext}' - result.append(ext) - - return result - - -def _set_prefs(opt: DriverOptions) -> None: - """处理启动配置中的prefs项,目前只能对已存在文件夹配置""" - prefs = opt.experimental_options.get('prefs', None) - if prefs and opt.user_data_path: - args = opt.arguments - profile = 'Default' - for arg in args: - if arg.startswith('--profile-directory'): - profile = arg.split('=')[-1].strip() - break - - prefs_file = Path(opt.user_data_path) / profile / 'Preferences' - if not prefs_file.exists(): - prefs_file.parent.mkdir(parents=True, exist_ok=True) - with open(prefs_file, 'w') as f: - f.write('{}') - - from json import load, dump - with open(prefs_file, "r", encoding='utf-8') as f: - j = load(f) - - for pref in prefs: - value = prefs[pref] - pref = pref.split('.') - _make_leave_in_dict(j, pref, 0, len(pref)) - _set_value_to_dict(j, pref, value) - - with open(prefs_file, 'w', encoding='utf-8') as f: - dump(j, f) - - def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None: """把prefs中a.b.c形式的属性转为a['b']['c']形式 :param target_dict: 要处理的dict diff --git a/DrissionPage/common.pyi b/DrissionPage/common.pyi index 4d9551e..09236f0 100644 --- a/DrissionPage/common.pyi +++ b/DrissionPage/common.pyi @@ -3,13 +3,16 @@ @Author : g1879 @Contact : g1879@qq.com """ +from http.cookiejar import Cookie from pathlib import Path from typing import Union -from requests import get as requests_get + +from requests.cookies import RequestsCookieJar from .base import BasePage, DrissionElement from .chromium_element import ChromiumElement from .config import DriverOptions +from .configs.chromium_options import ChromiumOptions def get_ele_txt(e: DrissionElement) -> str: ... @@ -27,6 +30,12 @@ def translate_loc(loc: tuple) -> tuple: ... def format_html(text: str) -> str: ... +def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ... + + +def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ... + + def clean_folder(folder_path: str, ignore: list = None) -> None: ... @@ -54,10 +63,16 @@ def make_absolute_link(link, page: BasePage = None) -> str: ... def is_js_func(func: str) -> bool: ... -def port_is_using(ip: str, port: str) -> bool: ... +def port_is_using(ip: str, port: Union[str, int]) -> bool: ... -def connect_browser(option: DriverOptions) -> tuple: ... +def connect_browser(option: Union[ChromiumOptions, DriverOptions]) -> tuple: ... + + +def get_launch_args(opt: Union[ChromiumOptions, DriverOptions]) -> list: ... + + +def set_prefs(opt: Union[ChromiumOptions, DriverOptions]) -> None: ... def location_in_viewport(page, loc_x: int, loc_y: int) -> bool: ... diff --git a/DrissionPage/config.py b/DrissionPage/config.py deleted file mode 100644 index 5efdd6f..0000000 --- a/DrissionPage/config.py +++ /dev/null @@ -1,905 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from configparser import RawConfigParser, NoSectionError, NoOptionError -from http.cookiejar import Cookie -from pathlib import Path - -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options - - -class OptionsManager(object): - """管理配置文件内容的类""" - - def __init__(self, path=None): - """初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 \n - :param path: ini文件的路径,默认读取模块文件夹下的 - """ - self.ini_path = str(Path(__file__).parent / 'configs.ini') if path == 'default' or path is None else path - if not Path(self.ini_path).exists(): - raise FileNotFoundError('ini文件不存在。') - self._conf = RawConfigParser() - self._conf.read(self.ini_path, encoding='utf-8') - - def __getattr__(self, item): - """以dict形似返回获取大项信息 - :param item: 项名 - :return: None - """ - return self.get_option(item) - - def get_value(self, section, item): - """获取配置的值 \n - :param section: 段名 - :param item: 项名 - :return: 项值 - """ - try: - return eval(self._conf.get(section, item)) - except (SyntaxError, NameError): - return self._conf.get(section, item) - except NoSectionError and NoOptionError: - return None - - def get_option(self, section): - """把section内容以字典方式返回 \n - :param section: 段名 - :return: 段内容生成的字典 - """ - items = self._conf.items(section) - option = dict() - - for j in items: - try: - option[j[0]] = eval(self._conf.get(section, j[0])) - except Exception: - option[j[0]] = self._conf.get(section, j[0]) - - return option - - def set_item(self, section, item, value): - """设置配置值 \n - :param section: 段名 - :param item: 项名 - :param value: 项值 - :return: None - """ - self._conf.set(section, item, str(value)) - self.__setattr__(f'_{section}', None) - return self - - def remove_item(self, section, item): - """删除配置值 \n - :param section: 段名 - :param item: 项名 - :return: None - """ - self._conf.remove_option(section, item) - return self - - def save(self, path=None): - """保存配置文件 \n - :param path: ini文件的路径,传入 'default' 保存到默认ini文件 - :return: 保存路径 - """ - default_path = (Path(__file__).parent / 'configs.ini').absolute() - if path == 'default': - path = default_path - elif path is None: - path = Path(self.ini_path).absolute() - else: - path = Path(path).absolute() - - path = path / 'config.ini' if path.is_dir() else path - - path = str(path) - self._conf.write(open(path, 'w', encoding='utf-8')) - - print(f'配置已保存到文件:{path}') - if path == str(default_path): - print('以后程序可自动从文件加载配置。') - - return path - - def save_to_default(self): - """保存当前配置到默认ini文件""" - return self.save('default') - - -class SessionOptions(object): - """requests的Session对象配置类""" - - def __init__(self, read_file=True, ini_path=None): - """ - :param read_file: 是否从文件读取配置 - :param ini_path: ini文件路径 - """ - self.ini_path = None - self._download_path = None - self._headers = None - self._cookies = None - self._auth = None - self._proxies = None - self._hooks = None - self._params = None - self._verify = None - self._cert = None - self._adapters = None - self._stream = None - self._trust_env = None - self._max_redirects = None - self._timeout = 10 - - self._del_set = set() # 记录要从ini文件删除的参数 - - if read_file: - self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') - om = OptionsManager(self.ini_path) - options_dict = om.session_options - - if options_dict.get('headers', None) is not None: - self.set_headers(options_dict['headers']) - - if options_dict.get('cookies', None) is not None: - self.set_cookies(options_dict['cookies']) - - if options_dict.get('auth', None) is not None: - self._auth = options_dict['auth'] - - if options_dict.get('params', None) is not None: - self._params = options_dict['params'] - - if options_dict.get('verify', None) is not None: - self._verify = options_dict['verify'] - - if options_dict.get('cert', None) is not None: - self._cert = options_dict['cert'] - - if options_dict.get('stream', None) is not None: - self._stream = options_dict['stream'] - - if options_dict.get('trust_env', None) is not None: - self._trust_env = options_dict['trust_env'] - - if options_dict.get('max_redirects', None) is not None: - self._max_redirects = options_dict['max_redirects'] - - self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) - self._timeout = om.timeouts.get('implicit', 10) - self._download_path = om.paths.get('download_path', None) - - # ===========须独立处理的项开始============ - @property - def download_path(self): - """返回默认下载路径属性信息""" - return self._download_path - - def set_paths(self, download_path=None): - """设置默认下载路径 \n - :param download_path: 下载路径 - :return: 返回当前对象 - """ - if download_path is not None: - self._download_path = str(download_path) - return self - - @property - def timeout(self): - """返回timeout属性信息""" - return self._timeout - - def set_timeout(self, second): - """设置超时信息 - :param second: 秒数 - :return: 返回当前对象 - """ - self._timeout = second - return self - - @property - def proxies(self): - """返回proxies设置信息""" - if self._proxies is None: - self._proxies = {} - return self._proxies - - def set_proxies(self, http, https=None): - """设置proxies参数 \n - :param http: http代理地址 - :param https: https代理地址 - :return: 返回当前对象 - """ - proxies = None if http == https is None else {'http': http, 'https': https or http} - self._sets('proxies', proxies) - return self - - # ===========须独立处理的项结束============ - - @property - def headers(self): - """返回headers设置信息""" - if self._headers is None: - self._headers = {} - return self._headers - - def set_headers(self, headers): - """设置headers参数 \n - :param headers: 参数值,传入None可在ini文件标记删除 - :return: 返回当前对象 - """ - if headers is None: - self._headers = None - self._del_set.add('headers') - else: - self._headers = {key.lower(): headers[key] for key in headers} - return self - - def set_a_header(self, attr, value): - """设置headers中一个项 \n - :param attr: 设置名称 - :param value: 设置值 - :return: 返回当前对象 - """ - if self._headers is None: - self._headers = {} - - self._headers[attr.lower()] = value - return self - - def remove_a_header(self, attr): - """从headers中删除一个设置 \n - :param attr: 要删除的设置 - :return: 返回当前对象 - """ - if self._headers is None: - return self - - attr = attr.lower() - if attr in self._headers: - self._headers.pop(attr) - - return self - - @property - def cookies(self): - """以list形式返回cookies""" - if self._cookies is None: - self._cookies = [] - return self._cookies - - def set_cookies(self, cookies): - """设置cookies信息 \n - :param cookies: cookies,可为CookieJar, list, tuple, str, dict,传入None可在ini文件标记删除 - :return: 返回当前对象 - """ - cookies = cookies if cookies is None else list(cookies_to_tuple(cookies)) - self._sets('cookies', cookies) - return self - - @property - def auth(self): - """返回auth设置信息""" - return self._auth - - def set_auth(self, auth): - """设置认证元组或对象 \n - :param auth: 认证元组或对象 - :return: 返回当前对象 - """ - self._sets('auth', auth) - return self - - @property - def hooks(self): - """返回回调方法""" - if self._hooks is None: - self._hooks = {} - return self._hooks - - def set_hooks(self, hooks): - """设置回调方法 \n - :param hooks: - :return: 返回当前对象 - """ - self._hooks = hooks - return self - - @property - def params(self): - """返回params设置信息""" - if self._params is None: - self._params = {} - return self._params - - def set_params(self, params): - """设置查询参数字典 \n - :param params: 查询参数字典 - :return: 返回当前对象 - """ - self._sets('params', params) - return self - - @property - def verify(self): - """返回是否验证SSL证书设置""" - return self._verify - - def set_verify(self, on_off): - """设置是否验证SSL证书 \n - :param on_off: 是否验证 SSL 证书 - :return: 返回当前对象 - """ - self._sets('verify', on_off) - return self - - @property - def cert(self): - """返回cert设置信息""" - return self._cert - - def set_cert(self, cert): - """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 \n - :param cert: 证书路径或元组 - :return: 返回当前对象 - """ - self._sets('cert', cert) - return self - - @property - def adapters(self): - """返回适配器设置信息""" - if self._adapters is None: - self._adapters = [] - return self._adapters - - def add_adapter(self, url, adapter): - """添加适配器 \n - :param url: 适配器对应url - :param adapter: 适配器对象 - :return: 返回当前对象 - """ - self._adapters.append((url, adapter)) - return self - - @property - def stream(self): - """返回stream设置信息""" - return self._stream - - def set_stream(self, on_off): - """设置是否使用流式响应内容 \n - :param on_off: 是否使用流式响应内容 - :return: 返回当前对象 - """ - self._sets('stream', on_off) - return self - - @property - def trust_env(self): - """返回trust_env设置信息""" - return self._trust_env - - def set_trust_env(self, on_off): - """设置是否信任环境 \n - :param on_off: 是否信任环境 - :return: 返回当前对象 - """ - self._sets('trust_env', on_off) - return self - - @property - def max_redirects(self): - """返回最大重定向次数""" - return self._max_redirects - - def set_max_redirects(self, times): - """设置最大重定向次数 \n - :param times: 最大重定向次数 - :return: 返回当前对象 - """ - self._sets('max_redirects', times) - return self - - def _sets(self, arg, val): - """给属性赋值或标记删除 - :param arg: 属性名称 - :param val: 参数值 - :return: None - """ - if val is None: - self.__setattr__(f'_{arg}', None) - self._del_set.add(arg) - else: - self.__setattr__(f'_{arg}', val) - if arg in self._del_set: - self._del_set.remove(arg) - - def save(self, path=None): - """保存设置到文件 \n - :param path: ini文件的路径,传入 'default' 保存到默认ini文件 - :return: 保存文件的绝对路径 - """ - if path == 'default': - path = (Path(__file__).parent / 'configs.ini').absolute() - - elif path is None: - if self.ini_path: - path = Path(self.ini_path).absolute() - else: - path = (Path(__file__).parent / 'configs.ini').absolute() - - else: - path = Path(path).absolute() - - path = path / 'config.ini' if path.is_dir() else path - - if path.exists(): - om = OptionsManager(str(path)) - else: - om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) - - options = session_options_to_dict(self) - - for i in options: - if i not in ('download_path', 'timeout', 'proxies'): - om.set_item('session_options', i, options[i]) - - om.set_item('paths', 'download_path', self.download_path) - om.set_item('timeouts', 'implicit', self.timeout) - om.set_item('proxies', 'http', self.proxies.get('http', None)) - om.set_item('proxies', 'https', self.proxies.get('https', None)) - - for i in self._del_set: - if i == 'download_path': - om.set_item('paths', 'download_path', '') - elif i == 'proxies': - om.set_item('proxies', 'http', '') - om.set_item('proxies', 'https', '') - else: - om.remove_item('session_options', i) - - path = str(path) - om.save(path) - - return path - - def save_to_default(self): - """保存当前配置到默认ini文件""" - return self.save('default') - - def as_dict(self): - """以字典形式返回本对象""" - return session_options_to_dict(self) - - -class DriverOptions(Options): - """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, - 增加了删除配置和保存到文件方法。 - """ - - def __init__(self, read_file=True, ini_path=None): - """初始化,默认从文件读取设置 \n - :param read_file: 是否从默认ini文件中读取配置信息 - :param ini_path: ini文件路径,为None则读取默认ini文件 - """ - super().__init__() - self._user_data_path = None - - if read_file: - self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') - om = OptionsManager(self.ini_path) - options_dict = om.chrome_options - - self._driver_path = om.paths.get('chromedriver_path', None) - self._download_path = om.paths.get('download_path', None) - self._binary_location = options_dict.get('binary_location', '') - self._arguments = options_dict.get('arguments', []) - self._extensions = options_dict.get('extensions', []) - self._experimental_options = options_dict.get('experimental_options', {}) - self._debugger_address = options_dict.get('debugger_address', None) - self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') - - for arg in self._arguments: - if arg.startswith('--user-data-dir='): - self.set_paths(user_data_path=arg[16:]) - break - - self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) - return - - self._driver_path = None - self._download_path = None - self.ini_path = None - self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} - self._debugger_address = '127.0.0.1:9222' - - @property - def driver_path(self): - """chromedriver文件路径""" - return self._driver_path - - @property - def download_path(self): - """默认下载路径文件路径""" - return self._download_path - - @property - def chrome_path(self): - """浏览器启动文件路径""" - return self.browser_path - - @property - def browser_path(self): - """浏览器启动文件路径""" - return self.binary_location or 'chrome' - - @property - def user_data_path(self): - """返回用户文件夹路径""" - return self._user_data_path - - # -------------重写父类方法,实现链式操作------------- - def add_argument(self, argument): - """添加一个配置项 \n - :param argument: 配置项内容 - :return: 当前对象 - """ - super().add_argument(argument) - return self - - def set_capability(self, name, value): - """设置一个capability \n - :param name: capability名称 - :param value: capability值 - :return: 当前对象 - """ - super().set_capability(name, value) - return self - - def add_extension(self, extension): - """添加插件 \n - :param extension: crx文件路径 - :return: 当前对象 - """ - super().add_extension(extension) - return self - - def add_encoded_extension(self, extension): - """将带有扩展数据的 Base64 编码字符串添加到将用于将其提取到 ChromeDriver 的列表中 \n - :param extension: 带有扩展数据的 Base64 编码字符串 - :return: 当前对象 - """ - super().add_encoded_extension(extension) - return self - - def add_experimental_option(self, name, value): - """添加一个实验选项到浏览器 \n - :param name: 选项名称 - :param value: 选项值 - :return: 当前对象 - """ - super().add_experimental_option(name, value) - return self - - # -------------重写父类方法结束------------- - - def save(self, path=None): - """保存设置到文件 \n - :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 - :return: 保存文件的绝对路径 - """ - if path == 'default': - path = (Path(__file__).parent / 'configs.ini').absolute() - - elif path is None: - if self.ini_path: - path = Path(self.ini_path).absolute() - else: - path = (Path(__file__).parent / 'configs.ini').absolute() - - else: - path = Path(path).absolute() - - path = path / 'config.ini' if path.is_dir() else path - - if path.exists(): - om = OptionsManager(str(path)) - else: - om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) - - options = self.as_dict() - - for i in options: - if i == 'driver_path': - om.set_item('paths', 'chromedriver_path', options[i]) - elif i == 'download_path': - om.set_item('paths', 'download_path', options[i]) - else: - om.set_item('chrome_options', i, options[i]) - - path = str(path) - om.save(path) - - return path - - def save_to_default(self): - """保存当前配置到默认ini文件""" - return self.save('default') - - def remove_argument(self, value): - """移除一个argument项 \n - :param value: 设置项名,有值的设置项传入设置名称即可 - :return: 当前对象 - """ - del_list = [] - - for argument in self._arguments: - if argument.startswith(value): - del_list.append(argument) - - for del_arg in del_list: - self._arguments.remove(del_arg) - - return self - - def remove_experimental_option(self, key): - """移除一个实验设置,传入key值删除 \n - :param key: 实验设置的名称 - :return: 当前对象 - """ - if key in self._experimental_options: - self._experimental_options.pop(key) - - return self - - def remove_all_extensions(self): - """移除所有插件 \n - :return: 当前对象 - """ - # 因插件是以整个文件储存,难以移除其中一个,故如须设置则全部移除再重设 - self._extensions = [] - return self - - def set_argument(self, arg, value): - """设置浏览器配置的argument属性 \n - :param arg: 属性名 - :param value: 属性值,有值的属性传入值,没有的传入bool - :return: 当前对象 - """ - self.remove_argument(arg) - - if value: - arg_str = arg if isinstance(value, bool) else f'{arg}={value}' - self.add_argument(arg_str) - - return self - - def set_timeouts(self, implicit=None, pageLoad=None, script=None): - """设置超时时间,设置单位为秒,selenium4以上版本有效 \n - :param implicit: 查找元素超时时间 - :param pageLoad: 页面加载超时时间 - :param script: 脚本运行超时时间 - :return: 当前对象 - """ - if implicit is not None: - self.timeouts['implicit'] = implicit - if pageLoad is not None: - self.timeouts['pageLoad'] = pageLoad - if script is not None: - self.timeouts['script'] = script - - return self - - def set_headless(self, on_off=True): - """设置是否隐藏浏览器界面 \n - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--headless', on_off) - - def set_no_imgs(self, on_off=True): - """设置是否加载图片 \n - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--blink-settings=imagesEnabled=false', on_off) - - def set_no_js(self, on_off=True): - """设置是否禁用js \n - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--disable-javascript', on_off) - - def set_mute(self, on_off=True): - """设置是否静音 \n - :param on_off: 开或关 - :return: 当前对象 - """ - on_off = True if on_off else False - return self.set_argument('--mute-audio', on_off) - - def set_user_agent(self, user_agent): - """设置user agent \n - :param user_agent: user agent文本 - :return: 当前对象 - """ - return self.set_argument('--user-agent', user_agent) - - def set_proxy(self, proxy): - """设置代理 \n - :param proxy: 代理url和端口 - :return: 当前对象 - """ - return self.set_argument('--proxy-server', proxy) - - def set_page_load_strategy(self, value): - """设置page_load_strategy,可接收 'normal', 'eager', 'none' \n - selenium4以上版本才支持此功能 - normal:默认情况下使用, 等待所有资源下载完成 - eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 - none:完全不阻塞WebDriver - :param value: 可接收 'normal', 'eager', 'none' - :return: 当前对象 - """ - if value not in ('normal', 'eager', 'none'): - raise ValueError("只能选择'normal', 'eager', 'none'。") - self.page_load_strategy = value.lower() - return self - - def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None, - debugger_address=None, download_path=None, user_data_path=None, cache_path=None): - """快捷的路径设置函数 \n - :param driver_path: chromedriver.exe路径 - :param chrome_path: chrome.exe路径 - :param browser_path: 浏览器可执行文件路径 - :param local_port: 本地端口号 - :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 - :param download_path: 下载文件路径 - :param user_data_path: 用户数据路径 - :param cache_path: 缓存路径 - :return: 当前对象 - """ - if driver_path is not None: - self._driver_path = str(driver_path) - - if chrome_path is not None: - self.binary_location = str(chrome_path) - - if browser_path is not None: - self.binary_location = str(browser_path) - - if local_port is not None: - self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}' - - if debugger_address is not None: - self.debugger_address = debugger_address - - if download_path is not None: - self._download_path = str(download_path) - - if user_data_path is not None: - self.set_argument('--user-data-dir', str(user_data_path)) - self._user_data_path = user_data_path - - if cache_path is not None: - self.set_argument('--disk-cache-dir', str(cache_path)) - - return self - - def as_dict(self): - """已dict方式返回所有配置信息""" - return chrome_options_to_dict(self) - - -def chrome_options_to_dict(options): - """把chrome配置对象转换为字典 \n - :param options: chrome配置对象,字典或DriverOptions对象 - :return: 配置字典 - """ - if options in (False, None): - return DriverOptions(read_file=False).as_dict() - - if isinstance(options, dict): - return options - - re_dict = dict() - attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', - 'page_load_strategy', 'download_path'] - - options_dir = options.__dir__() - for attr in attrs: - try: - re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None - except Exception: - pass - - if 'timeouts' in options_dir and 'timeouts' in options._caps: - timeouts = options.__getattribute__('timeouts') - re_dict['timeouts'] = timeouts - - return re_dict - - -def session_options_to_dict(options): - """把session配置对象转换为字典 \n - :param options: session配置对象或字典 - :return: 配置字典 - """ - if options in (False, None): - return SessionOptions(read_file=False).as_dict() - - if isinstance(options, dict): - return options - - re_dict = dict() - attrs = ['headers', 'cookies', 'proxies', 'params', 'verify', 'stream', 'trust_env', - 'max_redirects', 'timeout', 'download_path'] - - for attr in attrs: - val = options.__getattribute__(f'_{attr}') - if val is not None: - re_dict[attr] = val - - return re_dict - - -def cookie_to_dict(cookie): - """把Cookie对象转为dict格式 \n - :param cookie: Cookie对象 - :return: cookie字典 - """ - if isinstance(cookie, Cookie): - cookie_dict = cookie.__dict__.copy() - cookie_dict.pop('rfc2109') - cookie_dict.pop('_rest') - return cookie_dict - - elif isinstance(cookie, dict): - cookie_dict = cookie - - elif isinstance(cookie, str): - cookie = cookie.split(',' if ',' in cookie else ';') - cookie_dict = {} - - for key, attr in enumerate(cookie): - attr_val = attr.lstrip().split('=') - - if key == 0: - cookie_dict['name'] = attr_val[0] - cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else '' - else: - cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else '' - - return cookie_dict - - else: - raise TypeError('cookie参数必须为Cookie、str或dict类型。') - - return cookie_dict - - -def cookies_to_tuple(cookies): - """把cookies转为tuple格式 \n - :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict - :return: 返回tuple形式的cookies - """ - if isinstance(cookies, (list, tuple, RequestsCookieJar)): - cookies = tuple(cookie_to_dict(cookie) for cookie in cookies) - - elif isinstance(cookies, str): - cookies = tuple(cookie_to_dict(cookie.lstrip()) for cookie in cookies.split(";")) - - elif isinstance(cookies, dict): - cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) - - else: - raise TypeError('cookies参数必须为RequestsCookieJar、list、tuple、str或dict类型。') - - return cookies diff --git a/DrissionPage/config.pyi b/DrissionPage/config.pyi deleted file mode 100644 index 341681c..0000000 --- a/DrissionPage/config.pyi +++ /dev/null @@ -1,229 +0,0 @@ -# -*- coding:utf-8 -*- -""" -@Author : g1879 -@Contact : g1879@qq.com -""" -from configparser import RawConfigParser -from http.cookiejar import Cookie -from pathlib import Path -from typing import Any, Union, List, Tuple - -from requests.adapters import HTTPAdapter -from requests.auth import HTTPBasicAuth -from requests.cookies import RequestsCookieJar -from selenium.webdriver.chrome.options import Options - - -class OptionsManager(object): - ini_path: str = ... - _conf: RawConfigParser = ... - paths: dict = ... - chrome_options: dict = ... - session_options: dict = ... - - def __init__(self, path: str = None): ... - - def __getattr__(self, item) -> dict: ... - - def get_value(self, section: str, item: str) -> Any: ... - - def get_option(self, section: str) -> dict: ... - - def set_item(self, section: str, item: str, value: Any) -> None: ... - - def remove_item(self, section: str, item: str) -> None: ... - - def save(self, path: str = None) -> str: ... - - def save_to_default(self) -> str: ... - - -class SessionOptions(object): - def __init__(self, read_file: bool = True, ini_path: str = None): - self.ini_path: str = ... - self._download_path: str = ... - self._headers: dict = ... - self._cookies: list = ... - self._auth: tuple = ... - self._proxies: dict = ... - self._hooks: dict = ... - self._params: dict = ... - self._verify: bool = ... - self._cert: Union[str, tuple] = ... - self._adapters: list = ... - self._stream: bool = ... - self._trust_env: bool = ... - self._max_redirects: int = ... - self._timeout: float = ... - self._del_set: set = ... - - @property - def download_path(self) -> str: ... - - def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ... - - @property - def timeout(self) -> Union[int, float]: ... - - def set_timeout(self, second: Union[int, float]) -> SessionOptions: ... - - @property - def headers(self) -> dict: ... - - def set_headers(self, headers: Union[dict, None]) -> SessionOptions: ... - - def set_a_header(self, attr: str, value: str) -> SessionOptions: ... - - def remove_a_header(self, attr: str) -> SessionOptions: ... - - @property - def cookies(self) -> list: ... - - def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict, None]) -> SessionOptions: ... - - @property - def auth(self) -> Union[Tuple[str, str], HTTPBasicAuth]: ... - - def set_auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> SessionOptions: ... - - @property - def proxies(self) -> dict: ... - - def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: ... - - @property - def hooks(self) -> dict: ... - - def set_hooks(self, hooks: Union[dict, None]) -> SessionOptions: ... - - @property - def params(self) -> dict: ... - - def set_params(self, params: Union[dict, None]) -> SessionOptions: ... - - @property - def verify(self) -> bool: ... - - def set_verify(self, on_off: Union[bool, None]) -> SessionOptions: ... - - @property - def cert(self) -> Union[str, tuple]: ... - - def set_cert(self, cert: Union[str, Tuple[str, str], None]) -> SessionOptions: ... - - @property - def adapters(self): list: ... - - def add_adapter(self, url: str, adapter: HTTPAdapter) -> SessionOptions: ... - - @property - def stream(self) -> bool: ... - - def set_stream(self, on_off: Union[bool, None]) -> SessionOptions: ... - - @property - def trust_env(self) -> bool: ... - - def set_trust_env(self, on_off: Union[bool, None]) -> SessionOptions: ... - - @property - def max_redirects(self) -> int: ... - - def _sets(self, arg: str, val: Any) -> None: ... - - def set_max_redirects(self, times: Union[int, None]) -> SessionOptions: ... - - def save(self, path: str = None) -> str: ... - - def save_to_default(self) -> str: ... - - def as_dict(self) -> dict: ... - - -class DriverOptions(Options): - - def __init__(self, read_file: bool = True, ini_path: str = None): - self.ini_path: str = ... - self._driver_path: str = ... - self._user_data_path: str = ... - self._download_path: str = ... - - @property - def driver_path(self) -> str: ... - - @property - def download_path(self) -> str: ... - - @property - def chrome_path(self) -> str: ... - - @property - def browser_path(self) -> str: ... - - @property - def user_data_path(self) -> str: ... - - # -------------重写父类方法,实现链式操作------------- - def add_argument(self, argument: str) -> DriverOptions: ... - - def set_capability(self, name: str, value: str) -> DriverOptions: ... - - def add_extension(self, extension: str) -> DriverOptions: ... - - def add_encoded_extension(self, extension: str) -> DriverOptions: ... - - def add_experimental_option(self, name: str, value: Union[str, int, dict, List[str]]) -> DriverOptions: ... - - # -------------重写父类方法结束------------- - - def save(self, path: str = None) -> str: ... - - def save_to_default(self) -> str: ... - - def remove_argument(self, value: str) -> DriverOptions: ... - - def remove_experimental_option(self, key: str) -> DriverOptions: ... - - def remove_all_extensions(self) -> DriverOptions: ... - - def set_argument(self, arg: str, value: Union[bool, str]) -> DriverOptions: ... - - def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> DriverOptions: ... - - def set_headless(self, on_off: bool = True) -> DriverOptions: ... - - def set_no_imgs(self, on_off: bool = True) -> DriverOptions: ... - - def set_no_js(self, on_off: bool = True) -> DriverOptions: ... - - def set_mute(self, on_off: bool = True) -> DriverOptions: ... - - def set_user_agent(self, user_agent: str) -> DriverOptions: ... - - def set_proxy(self, proxy: str) -> DriverOptions: ... - - def set_page_load_strategy(self, value: str) -> DriverOptions: ... - - def set_paths(self, - driver_path: Union[str, Path] = None, - chrome_path: Union[str, Path] = None, - browser_path: Union[str, Path] = None, - local_port: Union[int, str] = None, - debugger_address: str = None, - download_path: str = None, - user_data_path: str = None, - cache_path: str = None) -> DriverOptions: ... - - def as_dict(self) -> dict: ... - - -def chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: ... - - -def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ... - - -def cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: ... - - -def cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: ... diff --git a/DrissionPage/configs/chromium_options.py b/DrissionPage/configs/chromium_options.py new file mode 100644 index 0000000..cfee57a --- /dev/null +++ b/DrissionPage/configs/chromium_options.py @@ -0,0 +1,388 @@ +# -*- coding:utf-8 -*- +from pathlib import Path +from shutil import rmtree + +from DrissionPage.common import port_is_using +from .options_manage import OptionsManager + + +class ChromiumOptions(object): + def __init__(self, read_file=True, ini_path=None): + """初始化,默认从文件读取设置 \n + :param read_file: 是否从默认ini文件中读取配置信息 + :param ini_path: ini文件路径,为None则读取默认ini文件 + """ + self._user_data_path = None + self._user = 'Default' + + if read_file: + self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') + om = OptionsManager(self.ini_path) + options = om.chrome_options + + self._download_path = om.paths.get('download_path', None) + self._arguments = options.get('arguments', []) + self._binary_location = options.get('binary_location', '') + self._extensions = options.get('extensions', []) + self._prefs = options.get('experimental_options', {}).get('prefs', {}) + self._debugger_address = options.get('debugger_address', None) + self._page_load_strategy = options.get('page_load_strategy', 'normal') + self._proxy = om.proxies.get('http', None) + + user_path = user = False + for arg in self._arguments: + if arg.startswith('--user-data-dir='): + self.set_paths(user_data_path=arg[16:]) + user_path = True + if arg.startswith('--profile-directory='): + self.set_user(arg[20:]) + user = True + if user and user_path: + break + + timeouts = om.timeouts + self._timeouts = {'implicit': timeouts['implicit'], + 'pageLoad': timeouts['page_load'], + 'script': timeouts['script']} + return + + self.ini_path = None + self._binary_location = "" + self._arguments = [] + self._download_path = None + self._extensions = [] + self._prefs = {} + self._prefs_to_del = [] + self._timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} + self._debugger_address = '127.0.0.1:9222' + self._page_load_strategy = 'normal' + self._proxy = None + + @property + def download_path(self): + """默认下载路径文件路径""" + return self._download_path + + @property + def browser_path(self): + """浏览器启动文件路径""" + return self._binary_location + + @property + def user_data_path(self): + """返回用户数据文件夹路径""" + return self._user_data_path + + @property + def user(self): + """返回用户配置文件夹名称""" + return self._user + + @property + def page_load_strategy(self): + """返回页面加载策略设置""" + return self._page_load_strategy + + @property + def timeouts(self): + """返回timeouts设置""" + return self._timeouts + + @property + def proxy(self): + """返回代理设置""" + return self._proxy + + @property + def debugger_address(self): + """返回浏览器地址,ip:port""" + return self._debugger_address + + @property + def extensions(self): + """以list形式返回要加载的插件路径""" + return self._extensions + + @property + def preferences(self): + """返回用户首选项配置""" + return self._prefs + + def set_argument(self, arg, value=None): + """设置浏览器配置的argument属性 \n + :param arg: 属性名 + :param value: 属性值,有值的属性传入值,没有的传入None,如传入False,删除该项 + :return: 当前对象 + """ + self.remove_argument(arg) + + if value is not False: + arg_str = arg if value is None else f'{arg}={value}' + self._arguments.append(arg_str) + + return self + + def remove_argument(self, value): + """移除一个argument项 \n + :param value: 设置项名,有值的设置项传入设置名称即可 + :return: 当前对象 + """ + del_list = [] + + for argument in self._arguments: + if argument == value or argument.startswith(f'{value}='): + del_list.append(argument) + + for del_arg in del_list: + self._arguments.remove(del_arg) + + return self + + def add_extension(self, path): + """添加插件 \n + :param path: 插件路径,可指向文件夹 + :return: 当前对象 + """ + path = Path(path) + if not path.exists(): + raise OSError('插件路径不存在。') + self._extensions.append(str(path)) + return self + + def remove_extensions(self): + """移除所有插件 \n + :return: 当前对象 + """ + self._extensions = [] + return self + + def set_pref(self, arg, value): + """设置Preferences文件中的用户设置项 + :param arg: 设置项名称 + :param value: 设置项值 + :return: 当前对象 + """ + self._prefs[arg] = value + return self + + def remove_pref(self, arg): + """删除用户首选项设置,不能删除已设置到文件中的项 \n + :param arg: 设置项名称 + :return: 当前对象 + """ + self._prefs.pop(arg) + return self + + def remove_pref_from_file(self, arg): + """删除用户配置文件中已设置的项 \n + :param arg: 设置项名称 + :return: 当前对象 + """ + self._prefs_to_del.append(arg) + return self + + def set_timeouts(self, implicit=None, pageLoad=None, script=None): + """设置超时时间,单位为秒 \n + :param implicit: 默认超时时间 + :param pageLoad: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: 当前对象 + """ + if implicit is not None: + self._timeouts['implicit'] = implicit + if pageLoad is not None: + self._timeouts['pageLoad'] = pageLoad + if script is not None: + self._timeouts['script'] = script + + return self + + def set_user(self, user='Default'): + """设置使用哪个用户配置文件夹 \n + :param user: 用户文件夹名称 + :return: 当前对象 + """ + self.set_argument('--profile-directory', user) + self._user = user + return self + + def set_headless(self, on_off=True): + """设置是否隐藏浏览器界面 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--headless', on_off) + + def set_no_imgs(self, on_off=True): + """设置是否加载图片 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--blink-settings=imagesEnabled=false', on_off) + + def set_no_js(self, on_off=True): + """设置是否禁用js \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--disable-javascript', on_off) + + def set_mute(self, on_off=True): + """设置是否静音 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--mute-audio', on_off) + + def set_user_agent(self, user_agent): + """设置user agent \n + :param user_agent: user agent文本 + :return: 当前对象 + """ + return self.set_argument('--user-agent', user_agent) + + def set_proxy(self, proxy): + """设置代理 \n + :param proxy: 代理url和端口 + :return: 当前对象 + """ + self._proxy = proxy + return self.set_argument('--proxy-server', proxy) + + def set_page_load_strategy(self, value): + """设置page_load_strategy,可接收 'normal', 'eager', 'none' \n + selenium4以上版本才支持此功能 + normal:默认情况下使用, 等待所有资源下载完成 + eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 + none:完全不阻塞WebDriver + :param value: 可接收 'normal', 'eager', 'none' + :return: 当前对象 + """ + if value not in ('normal', 'eager', 'none'): + raise ValueError("只能选择'normal', 'eager', 'none'。") + self._page_load_strategy = value.lower() + return self + + def set_paths(self, browser_path=None, local_port=None, debugger_address=None, download_path=None, + user_data_path=None, cache_path=None): + """快捷的路径设置函数 \n + :param browser_path: 浏览器可执行文件路径 + :param local_port: 本地端口号 + :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 + :param download_path: 下载文件路径 + :param user_data_path: 用户数据路径 + :param cache_path: 缓存路径 + :return: 当前对象 + """ + if browser_path is not None: + self._binary_location = str(browser_path) + + if local_port is not None: + self._debugger_address = f'127.0.0.1:{local_port}' + + if debugger_address is not None: + self._debugger_address = debugger_address + + if download_path is not None: + self._download_path = str(download_path) + + if user_data_path is not None: + u = str(user_data_path) + self.set_argument('--user-data-dir', u) + self._user_data_path = u + + if cache_path is not None: + self.set_argument('--disk-cache-dir', str(cache_path)) + + return self + + def auto_port(self, data_path): + """自动获取可用端口 \n + :param data_path: 用户文件夹保存路径 + :return: 当前对象 + """ + port, path = PortFinder().get_port(data_path) + self.set_paths(local_port=port, user_data_path=path) + return self + + def save(self, path=None): + """保存设置到文件 \n + :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 + :return: 保存文件的绝对路径 + """ + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + + elif path is None: + if self.ini_path: + path = Path(self.ini_path).absolute() + else: + path = (Path(__file__).parent / 'configs.ini').absolute() + + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + if path.exists(): + om = OptionsManager(str(path)) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + # 设置chrome_options + attrs = ('debugger_address', 'binary_location', 'arguments', 'extensions', 'user', 'page_load_strategy') + for i in attrs: + om.set_item('chrome_options', i, self.__getattribute__(f'_{i}')) + # 设置代理 + om.set_item('proxies', 'http', self._proxy) + om.set_item('proxies', 'https', self._proxy) + # 设置路径 + om.set_item('paths', 'download_path', self._download_path) + # 设置timeout + om.set_item('timeouts', 'implicit', self._timeouts['implicit']) + om.set_item('timeouts', 'page_load', self._timeouts['pageLoad']) + om.set_item('timeouts', 'script', self._timeouts['script']) + # 设置prefs + eo = om.chrome_options.get('experimental_options', {}) + eo['prefs'] = self._prefs + om.set_item('chrome_options', 'experimental_options', eo) + + path = str(path) + om.save(path) + + return path + + def save_to_default(self): + """保存当前配置到默认ini文件""" + return self.save('default') + + +class PortFinder(object): + used_port = [] + + @staticmethod + def get_port(path): + """查找一个可用端口 \n + :param path: 用户文件夹保存路径 + :return: 可以使用的端口和用户文件夹路径组成的元组 + """ + path = Path(path) + for i in range(9600, 9800): + if i in PortFinder.used_port or port_is_using('127.0.0.1', i): + continue + + path = path / f'userData{i}' + if not path.exists(): + PortFinder.used_port.append(i) + return i, str(path) + + try: + rmtree(path) + except PermissionError: + continue + + raise OSError('未找到可用端口。') diff --git a/DrissionPage/configs/chromium_options.pyi b/DrissionPage/configs/chromium_options.pyi new file mode 100644 index 0000000..bb1d4b1 --- /dev/null +++ b/DrissionPage/configs/chromium_options.pyi @@ -0,0 +1,101 @@ +# -*- coding:utf-8 -*- +from pathlib import Path +from typing import Union, Tuple, Any + + +class ChromiumOptions(object): + def __init__(self, read_file: bool = True, ini_path: str = None): + self.ini_path: str = ... + self._driver_path: str = ... + self._user_data_path: str = ... + self._download_path: str = ... + self._arguments: list = ... + self._binary_location: str = ... + self._user: str = ... + self._page_load_strategy: str = ... + self._timeouts: dict = ... + self._proxy: str = ... + self._debugger_address: str = ... + self._extensions: list = ... + self._prefs: dict = ... + self._prefs_to_del: list = ... + + @property + def download_path(self) -> str: ... + + @property + def browser_path(self) -> str: ... + + @property + def user_data_path(self) -> str: ... + + @property + def user(self) -> str: ... + + @property + def page_load_strategy(self) -> str: ... + + @property + def timeouts(self) -> dict: ... + + @property + def proxy(self) -> str: ... + + @property + def debugger_address(self) -> str: ... + + @property + def extensions(self) -> list: ... + + @property + def preferences(self) -> dict: ... + + def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: ... + + def remove_argument(self, value: str) -> ChromiumOptions: ... + + def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: ... + + def remove_extensions(self) -> ChromiumOptions: ... + + def set_pref(self, arg: str, value: Any) -> ChromiumOptions: ... + + def remove_pref(self, arg: str) -> ChromiumOptions: ... + + def remove_pref_from_file(self, arg: str) -> ChromiumOptions: ... + + def set_timeouts(self, implicit: Union[int, float] = None, pageLoad: Union[int, float] = None, + script: Union[int, float] = None) -> ChromiumOptions: ... + + def set_user(self, user: str = 'Default') -> ChromiumOptions: ... + + def set_headless(self, on_off: bool = True) -> ChromiumOptions: ... + + def set_no_imgs(self, on_off: bool = True) -> ChromiumOptions: ... + + def set_no_js(self, on_off: bool = True) -> ChromiumOptions: ... + + def set_mute(self, on_off: bool = True) -> ChromiumOptions: ... + + def set_user_agent(self, user_agent: str) -> ChromiumOptions: ... + + def set_proxy(self, proxy: str) -> ChromiumOptions: ... + + def set_page_load_strategy(self, value: str) -> ChromiumOptions: ... + + def set_paths(self, browser_path: Union[str, Path] = None, local_port: Union[int, str] = None, + debugger_address: str = None, download_path: Union[str, Path] = None, + user_data_path: Union[str, Path] = None, cache_path: Union[str, Path] = None) -> ChromiumOptions: ... + + def auto_port(self, data_path: Union[str, Path]) -> ChromiumOptions: ... + + def save(self, path: Union[str, Path] = None) -> str: ... + + def save_to_default(self) -> str: ... + + +class PortFinder(object): + used_port: list = ... + + @staticmethod + def get_port(path: Union[str, Path]) -> Tuple[int, str]: ... diff --git a/DrissionPage/configs/driver_options.py b/DrissionPage/configs/driver_options.py new file mode 100644 index 0000000..2385857 --- /dev/null +++ b/DrissionPage/configs/driver_options.py @@ -0,0 +1,358 @@ +# -*- coding:utf-8 -*- +from pathlib import Path + +from selenium.webdriver.chrome.options import Options + +from .options_manage import OptionsManager + + +class DriverOptions(Options): + """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, + 增加了删除配置和保存到文件方法。 + """ + + def __init__(self, read_file=True, ini_path=None): + """初始化,默认从文件读取设置 \n + :param read_file: 是否从默认ini文件中读取配置信息 + :param ini_path: ini文件路径,为None则读取默认ini文件 + """ + super().__init__() + self._user_data_path = None + + if read_file: + self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') + om = OptionsManager(self.ini_path) + options_dict = om.chrome_options + + self._driver_path = om.paths.get('chromedriver_path', None) + self._download_path = om.paths.get('download_path', None) + self._binary_location = options_dict.get('binary_location', '') + self._arguments = options_dict.get('arguments', []) + self._extensions = options_dict.get('extensions', []) + self._experimental_options = options_dict.get('experimental_options', {}) + self._debugger_address = options_dict.get('debugger_address', None) + self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') + + for arg in self._arguments: + if arg.startswith('--user-data-dir='): + self.set_paths(user_data_path=arg[16:]) + break + + self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) + return + + self._driver_path = None + self._download_path = None + self.ini_path = None + self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} + self._debugger_address = '127.0.0.1:9222' + + @property + def driver_path(self): + """chromedriver文件路径""" + return self._driver_path + + @property + def download_path(self): + """默认下载路径文件路径""" + return self._download_path + + @property + def chrome_path(self): + """浏览器启动文件路径""" + return self.browser_path + + @property + def browser_path(self): + """浏览器启动文件路径""" + return self.binary_location or 'chrome' + + @property + def user_data_path(self): + """返回用户文件夹路径""" + return self._user_data_path + + # -------------重写父类方法,实现链式操作------------- + def add_argument(self, argument): + """添加一个配置项 \n + :param argument: 配置项内容 + :return: 当前对象 + """ + super().add_argument(argument) + return self + + def set_capability(self, name, value): + """设置一个capability \n + :param name: capability名称 + :param value: capability值 + :return: 当前对象 + """ + super().set_capability(name, value) + return self + + def add_extension(self, extension): + """添加插件 \n + :param extension: crx文件路径 + :return: 当前对象 + """ + super().add_extension(extension) + return self + + def add_encoded_extension(self, extension): + """将带有扩展数据的 Base64 编码字符串添加到将用于将其提取到 ChromeDriver 的列表中 \n + :param extension: 带有扩展数据的 Base64 编码字符串 + :return: 当前对象 + """ + super().add_encoded_extension(extension) + return self + + def add_experimental_option(self, name, value): + """添加一个实验选项到浏览器 \n + :param name: 选项名称 + :param value: 选项值 + :return: 当前对象 + """ + super().add_experimental_option(name, value) + return self + + # -------------重写父类方法结束------------- + + def save(self, path=None): + """保存设置到文件 \n + :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 + :return: 保存文件的绝对路径 + """ + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + + elif path is None: + if self.ini_path: + path = Path(self.ini_path).absolute() + else: + path = (Path(__file__).parent / 'configs.ini').absolute() + + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + if path.exists(): + om = OptionsManager(str(path)) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + options = self.as_dict() + + for i in options: + if i == 'driver_path': + om.set_item('paths', 'chromedriver_path', options[i]) + elif i == 'download_path': + om.set_item('paths', 'download_path', options[i]) + else: + om.set_item('chrome_options', i, options[i]) + + path = str(path) + om.save(path) + + return path + + def save_to_default(self): + """保存当前配置到默认ini文件""" + return self.save('default') + + def remove_argument(self, value): + """移除一个argument项 \n + :param value: 设置项名,有值的设置项传入设置名称即可 + :return: 当前对象 + """ + del_list = [] + + for argument in self._arguments: + if argument.startswith(value): + del_list.append(argument) + + for del_arg in del_list: + self._arguments.remove(del_arg) + + return self + + def remove_experimental_option(self, key): + """移除一个实验设置,传入key值删除 \n + :param key: 实验设置的名称 + :return: 当前对象 + """ + if key in self._experimental_options: + self._experimental_options.pop(key) + + return self + + def remove_all_extensions(self): + """移除所有插件 \n + :return: 当前对象 + """ + # 因插件是以整个文件储存,难以移除其中一个,故如须设置则全部移除再重设 + self._extensions = [] + return self + + def set_argument(self, arg, value): + """设置浏览器配置的argument属性 \n + :param arg: 属性名 + :param value: 属性值,有值的属性传入值,没有的传入bool + :return: 当前对象 + """ + self.remove_argument(arg) + + if value: + arg_str = arg if isinstance(value, bool) else f'{arg}={value}' + self.add_argument(arg_str) + + return self + + def set_timeouts(self, implicit=None, pageLoad=None, script=None): + """设置超时时间,设置单位为秒,selenium4以上版本有效 \n + :param implicit: 查找元素超时时间 + :param pageLoad: 页面加载超时时间 + :param script: 脚本运行超时时间 + :return: 当前对象 + """ + if implicit is not None: + self.timeouts['implicit'] = implicit + if pageLoad is not None: + self.timeouts['pageLoad'] = pageLoad + if script is not None: + self.timeouts['script'] = script + + return self + + def set_headless(self, on_off=True): + """设置是否隐藏浏览器界面 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--headless', on_off) + + def set_no_imgs(self, on_off=True): + """设置是否加载图片 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--blink-settings=imagesEnabled=false', on_off) + + def set_no_js(self, on_off=True): + """设置是否禁用js \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--disable-javascript', on_off) + + def set_mute(self, on_off=True): + """设置是否静音 \n + :param on_off: 开或关 + :return: 当前对象 + """ + on_off = True if on_off else False + return self.set_argument('--mute-audio', on_off) + + def set_user_agent(self, user_agent): + """设置user agent \n + :param user_agent: user agent文本 + :return: 当前对象 + """ + return self.set_argument('--user-agent', user_agent) + + def set_proxy(self, proxy): + """设置代理 \n + :param proxy: 代理url和端口 + :return: 当前对象 + """ + return self.set_argument('--proxy-server', proxy) + + def set_page_load_strategy(self, value): + """设置page_load_strategy,可接收 'normal', 'eager', 'none' \n + selenium4以上版本才支持此功能 + normal:默认情况下使用, 等待所有资源下载完成 + eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 + none:完全不阻塞WebDriver + :param value: 可接收 'normal', 'eager', 'none' + :return: 当前对象 + """ + if value not in ('normal', 'eager', 'none'): + raise ValueError("只能选择'normal', 'eager', 'none'。") + self.page_load_strategy = value.lower() + return self + + def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None, + debugger_address=None, download_path=None, user_data_path=None, cache_path=None): + """快捷的路径设置函数 \n + :param driver_path: chromedriver.exe路径 + :param chrome_path: chrome.exe路径 + :param browser_path: 浏览器可执行文件路径 + :param local_port: 本地端口号 + :param debugger_address: 调试浏览器地址,例:127.0.0.1:9222 + :param download_path: 下载文件路径 + :param user_data_path: 用户数据路径 + :param cache_path: 缓存路径 + :return: 当前对象 + """ + if driver_path is not None: + self._driver_path = str(driver_path) + + if chrome_path is not None: + self.binary_location = str(chrome_path) + + if browser_path is not None: + self.binary_location = str(browser_path) + + if local_port is not None: + self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}' + + if debugger_address is not None: + self.debugger_address = debugger_address + + if download_path is not None: + self._download_path = str(download_path) + + if user_data_path is not None: + self.set_argument('--user-data-dir', str(user_data_path)) + self._user_data_path = user_data_path + + if cache_path is not None: + self.set_argument('--disk-cache-dir', str(cache_path)) + + return self + + def as_dict(self): + """已dict方式返回所有配置信息""" + return chrome_options_to_dict(self) + + +def chrome_options_to_dict(options): + """把chrome配置对象转换为字典 \n + :param options: chrome配置对象,字典或DriverOptions对象 + :return: 配置字典 + """ + if options in (False, None): + return DriverOptions(read_file=False).as_dict() + + if isinstance(options, dict): + return options + + re_dict = dict() + attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path', + 'page_load_strategy', 'download_path'] + + options_dir = options.__dir__() + for attr in attrs: + try: + re_dict[attr] = options.__getattribute__(attr) if attr in options_dir else None + except Exception: + pass + + if 'timeouts' in options_dir and 'timeouts' in options._caps: + timeouts = options.__getattribute__('timeouts') + re_dict['timeouts'] = timeouts + + return re_dict diff --git a/DrissionPage/configs/driver_options.pyi b/DrissionPage/configs/driver_options.pyi new file mode 100644 index 0000000..7590626 --- /dev/null +++ b/DrissionPage/configs/driver_options.pyi @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +from pathlib import Path +from typing import Union, List + +from selenium.webdriver.chrome.options import Options + + +class DriverOptions(Options): + + def __init__(self, read_file: bool = True, ini_path: str = None): + self.ini_path: str = ... + self._driver_path: str = ... + self._user_data_path: str = ... + self._download_path: str = ... + + @property + def driver_path(self) -> str: ... + + @property + def download_path(self) -> str: ... + + @property + def chrome_path(self) -> str: ... + + @property + def browser_path(self) -> str: ... + + @property + def user_data_path(self) -> str: ... + + # -------------重写父类方法,实现链式操作------------- + def add_argument(self, argument: str) -> DriverOptions: ... + + def set_capability(self, name: str, value: str) -> DriverOptions: ... + + def add_extension(self, extension: str) -> DriverOptions: ... + + def add_encoded_extension(self, extension: str) -> DriverOptions: ... + + def add_experimental_option(self, name: str, value: Union[str, int, dict, List[str]]) -> DriverOptions: ... + + # -------------重写父类方法结束------------- + + def save(self, path: str = None) -> str: ... + + def save_to_default(self) -> str: ... + + def remove_argument(self, value: str) -> DriverOptions: ... + + def remove_experimental_option(self, key: str) -> DriverOptions: ... + + def remove_all_extensions(self) -> DriverOptions: ... + + def set_argument(self, arg: str, value: Union[bool, str]) -> DriverOptions: ... + + def set_timeouts(self, implicit: float = None, pageLoad: float = None, script: float = None) -> DriverOptions: ... + + def set_headless(self, on_off: bool = True) -> DriverOptions: ... + + def set_no_imgs(self, on_off: bool = True) -> DriverOptions: ... + + def set_no_js(self, on_off: bool = True) -> DriverOptions: ... + + def set_mute(self, on_off: bool = True) -> DriverOptions: ... + + def set_user_agent(self, user_agent: str) -> DriverOptions: ... + + def set_proxy(self, proxy: str) -> DriverOptions: ... + + def set_page_load_strategy(self, value: str) -> DriverOptions: ... + + def set_paths(self, + driver_path: Union[str, Path] = None, + chrome_path: Union[str, Path] = None, + browser_path: Union[str, Path] = None, + local_port: Union[int, str] = None, + debugger_address: str = None, + download_path: str = None, + user_data_path: str = None, + cache_path: str = None) -> DriverOptions: ... + + def as_dict(self) -> dict: ... + + +def chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None, bool]) -> Union[dict, None]: ... diff --git a/DrissionPage/configs/options_manage.py b/DrissionPage/configs/options_manage.py new file mode 100644 index 0000000..e1dd2ef --- /dev/null +++ b/DrissionPage/configs/options_manage.py @@ -0,0 +1,102 @@ +# -*- coding:utf-8 -*- + +from configparser import RawConfigParser, NoSectionError, NoOptionError +from pathlib import Path + + +class OptionsManager(object): + """管理配置文件内容的类""" + + def __init__(self, path=None): + """初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 \n + :param path: ini文件的路径,默认读取模块文件夹下的 + """ + self.ini_path = str(Path(__file__).parent / 'configs.ini') if path == 'default' or path is None else path + if not Path(self.ini_path).exists(): + raise FileNotFoundError('ini文件不存在。') + self._conf = RawConfigParser() + self._conf.read(self.ini_path, encoding='utf-8') + + def __getattr__(self, item): + """以dict形似返回获取大项信息 + :param item: 项名 + :return: None + """ + return self.get_option(item) + + def get_value(self, section, item): + """获取配置的值 \n + :param section: 段名 + :param item: 项名 + :return: 项值 + """ + try: + return eval(self._conf.get(section, item)) + except (SyntaxError, NameError): + return self._conf.get(section, item) + except NoSectionError and NoOptionError: + return None + + def get_option(self, section): + """把section内容以字典方式返回 \n + :param section: 段名 + :return: 段内容生成的字典 + """ + items = self._conf.items(section) + option = dict() + + for j in items: + try: + option[j[0]] = eval(self._conf.get(section, j[0])) + except Exception: + option[j[0]] = self._conf.get(section, j[0]) + + return option + + def set_item(self, section, item, value): + """设置配置值 \n + :param section: 段名 + :param item: 项名 + :param value: 项值 + :return: None + """ + self._conf.set(section, item, str(value)) + self.__setattr__(f'_{section}', None) + return self + + def remove_item(self, section, item): + """删除配置值 \n + :param section: 段名 + :param item: 项名 + :return: None + """ + self._conf.remove_option(section, item) + return self + + def save(self, path=None): + """保存配置文件 \n + :param path: ini文件的路径,传入 'default' 保存到默认ini文件 + :return: 保存路径 + """ + default_path = (Path(__file__).parent / 'configs.ini').absolute() + if path == 'default': + path = default_path + elif path is None: + path = Path(self.ini_path).absolute() + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + path = str(path) + self._conf.write(open(path, 'w', encoding='utf-8')) + + print(f'配置已保存到文件:{path}') + if path == str(default_path): + print('以后程序可自动从文件加载配置。') + + return path + + def save_to_default(self): + """保存当前配置到默认ini文件""" + return self.save('default') diff --git a/DrissionPage/configs/options_manage.pyi b/DrissionPage/configs/options_manage.pyi new file mode 100644 index 0000000..51438e2 --- /dev/null +++ b/DrissionPage/configs/options_manage.pyi @@ -0,0 +1,30 @@ +# -*- coding:utf-8 -*- + +from configparser import RawConfigParser +from typing import Any + + +class OptionsManager(object): + ini_path: str = ... + _conf: RawConfigParser = ... + paths: dict = ... + chrome_options: dict = ... + session_options: dict = ... + timeouts: dict = ... + proxies: dict = ... + + def __init__(self, path: str = None): ... + + def __getattr__(self, item) -> dict: ... + + def get_value(self, section: str, item: str) -> Any: ... + + def get_option(self, section: str) -> dict: ... + + def set_item(self, section: str, item: str, value: Any) -> None: ... + + def remove_item(self, section: str, item: str) -> None: ... + + def save(self, path: str = None) -> str: ... + + def save_to_default(self) -> str: ... diff --git a/DrissionPage/configs/session_options.py b/DrissionPage/configs/session_options.py new file mode 100644 index 0000000..e2b4525 --- /dev/null +++ b/DrissionPage/configs/session_options.py @@ -0,0 +1,395 @@ +# -*- coding:utf-8 -*- + +from pathlib import Path + +from DrissionPage.common import cookies_to_tuple +from .options_manage import OptionsManager + + +class SessionOptions(object): + """requests的Session对象配置类""" + + def __init__(self, read_file=True, ini_path=None): + """ + :param read_file: 是否从文件读取配置 + :param ini_path: ini文件路径 + """ + self.ini_path = None + self._download_path = None + self._headers = None + self._cookies = None + self._auth = None + self._proxies = None + self._hooks = None + self._params = None + self._verify = None + self._cert = None + self._adapters = None + self._stream = None + self._trust_env = None + self._max_redirects = None + self._timeout = 10 + + self._del_set = set() # 记录要从ini文件删除的参数 + + if read_file: + self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') + om = OptionsManager(self.ini_path) + options_dict = om.session_options + + if options_dict.get('headers', None) is not None: + self.set_headers(options_dict['headers']) + + if options_dict.get('cookies', None) is not None: + self.set_cookies(options_dict['cookies']) + + if options_dict.get('auth', None) is not None: + self._auth = options_dict['auth'] + + if options_dict.get('params', None) is not None: + self._params = options_dict['params'] + + if options_dict.get('verify', None) is not None: + self._verify = options_dict['verify'] + + if options_dict.get('cert', None) is not None: + self._cert = options_dict['cert'] + + if options_dict.get('stream', None) is not None: + self._stream = options_dict['stream'] + + if options_dict.get('trust_env', None) is not None: + self._trust_env = options_dict['trust_env'] + + if options_dict.get('max_redirects', None) is not None: + self._max_redirects = options_dict['max_redirects'] + + self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) + self._timeout = om.timeouts.get('implicit', 10) + self._download_path = om.paths.get('download_path', None) + + # ===========须独立处理的项开始============ + @property + def download_path(self): + """返回默认下载路径属性信息""" + return self._download_path + + def set_paths(self, download_path=None): + """设置默认下载路径 \n + :param download_path: 下载路径 + :return: 返回当前对象 + """ + if download_path is not None: + self._download_path = str(download_path) + return self + + @property + def timeout(self): + """返回timeout属性信息""" + return self._timeout + + def set_timeout(self, second): + """设置超时信息 + :param second: 秒数 + :return: 返回当前对象 + """ + self._timeout = second + return self + + @property + def proxies(self): + """返回proxies设置信息""" + if self._proxies is None: + self._proxies = {} + return self._proxies + + def set_proxies(self, http, https=None): + """设置proxies参数 \n + :param http: http代理地址 + :param https: https代理地址 + :return: 返回当前对象 + """ + proxies = None if http == https is None else {'http': http, 'https': https or http} + self._sets('proxies', proxies) + return self + + # ===========须独立处理的项结束============ + + @property + def headers(self): + """返回headers设置信息""" + if self._headers is None: + self._headers = {} + return self._headers + + def set_headers(self, headers): + """设置headers参数 \n + :param headers: 参数值,传入None可在ini文件标记删除 + :return: 返回当前对象 + """ + if headers is None: + self._headers = None + self._del_set.add('headers') + else: + self._headers = {key.lower(): headers[key] for key in headers} + return self + + def set_a_header(self, attr, value): + """设置headers中一个项 \n + :param attr: 设置名称 + :param value: 设置值 + :return: 返回当前对象 + """ + if self._headers is None: + self._headers = {} + + self._headers[attr.lower()] = value + return self + + def remove_a_header(self, attr): + """从headers中删除一个设置 \n + :param attr: 要删除的设置 + :return: 返回当前对象 + """ + if self._headers is None: + return self + + attr = attr.lower() + if attr in self._headers: + self._headers.pop(attr) + + return self + + @property + def cookies(self): + """以list形式返回cookies""" + if self._cookies is None: + self._cookies = [] + return self._cookies + + def set_cookies(self, cookies): + """设置cookies信息 \n + :param cookies: cookies,可为CookieJar, list, tuple, str, dict,传入None可在ini文件标记删除 + :return: 返回当前对象 + """ + cookies = cookies if cookies is None else list(cookies_to_tuple(cookies)) + self._sets('cookies', cookies) + return self + + @property + def auth(self): + """返回auth设置信息""" + return self._auth + + def set_auth(self, auth): + """设置认证元组或对象 \n + :param auth: 认证元组或对象 + :return: 返回当前对象 + """ + self._sets('auth', auth) + return self + + @property + def hooks(self): + """返回回调方法""" + if self._hooks is None: + self._hooks = {} + return self._hooks + + def set_hooks(self, hooks): + """设置回调方法 \n + :param hooks: + :return: 返回当前对象 + """ + self._hooks = hooks + return self + + @property + def params(self): + """返回params设置信息""" + if self._params is None: + self._params = {} + return self._params + + def set_params(self, params): + """设置查询参数字典 \n + :param params: 查询参数字典 + :return: 返回当前对象 + """ + self._sets('params', params) + return self + + @property + def verify(self): + """返回是否验证SSL证书设置""" + return self._verify + + def set_verify(self, on_off): + """设置是否验证SSL证书 \n + :param on_off: 是否验证 SSL 证书 + :return: 返回当前对象 + """ + self._sets('verify', on_off) + return self + + @property + def cert(self): + """返回cert设置信息""" + return self._cert + + def set_cert(self, cert): + """SSL客户端证书文件的路径(.pem格式),或(‘cert’, ‘key’)元组 \n + :param cert: 证书路径或元组 + :return: 返回当前对象 + """ + self._sets('cert', cert) + return self + + @property + def adapters(self): + """返回适配器设置信息""" + if self._adapters is None: + self._adapters = [] + return self._adapters + + def add_adapter(self, url, adapter): + """添加适配器 \n + :param url: 适配器对应url + :param adapter: 适配器对象 + :return: 返回当前对象 + """ + self._adapters.append((url, adapter)) + return self + + @property + def stream(self): + """返回stream设置信息""" + return self._stream + + def set_stream(self, on_off): + """设置是否使用流式响应内容 \n + :param on_off: 是否使用流式响应内容 + :return: 返回当前对象 + """ + self._sets('stream', on_off) + return self + + @property + def trust_env(self): + """返回trust_env设置信息""" + return self._trust_env + + def set_trust_env(self, on_off): + """设置是否信任环境 \n + :param on_off: 是否信任环境 + :return: 返回当前对象 + """ + self._sets('trust_env', on_off) + return self + + @property + def max_redirects(self): + """返回最大重定向次数""" + return self._max_redirects + + def set_max_redirects(self, times): + """设置最大重定向次数 \n + :param times: 最大重定向次数 + :return: 返回当前对象 + """ + self._sets('max_redirects', times) + return self + + def _sets(self, arg, val): + """给属性赋值或标记删除 + :param arg: 属性名称 + :param val: 参数值 + :return: None + """ + if val is None: + self.__setattr__(f'_{arg}', None) + self._del_set.add(arg) + else: + self.__setattr__(f'_{arg}', val) + if arg in self._del_set: + self._del_set.remove(arg) + + def save(self, path=None): + """保存设置到文件 \n + :param path: ini文件的路径,传入 'default' 保存到默认ini文件 + :return: 保存文件的绝对路径 + """ + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + + elif path is None: + if self.ini_path: + path = Path(self.ini_path).absolute() + else: + path = (Path(__file__).parent / 'configs.ini').absolute() + + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + if path.exists(): + om = OptionsManager(str(path)) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + options = session_options_to_dict(self) + + for i in options: + if i not in ('download_path', 'timeout', 'proxies'): + om.set_item('session_options', i, options[i]) + + om.set_item('paths', 'download_path', self.download_path) + om.set_item('timeouts', 'implicit', self.timeout) + om.set_item('proxies', 'http', self.proxies.get('http', None)) + om.set_item('proxies', 'https', self.proxies.get('https', None)) + + for i in self._del_set: + if i == 'download_path': + om.set_item('paths', 'download_path', '') + elif i == 'proxies': + om.set_item('proxies', 'http', '') + om.set_item('proxies', 'https', '') + else: + om.remove_item('session_options', i) + + path = str(path) + om.save(path) + + return path + + def save_to_default(self): + """保存当前配置到默认ini文件""" + return self.save('default') + + def as_dict(self): + """以字典形式返回本对象""" + return session_options_to_dict(self) + + +def session_options_to_dict(options): + """把session配置对象转换为字典 \n + :param options: session配置对象或字典 + :return: 配置字典 + """ + if options in (False, None): + return SessionOptions(read_file=False).as_dict() + + if isinstance(options, dict): + return options + + re_dict = dict() + attrs = ['headers', 'cookies', 'proxies', 'params', 'verify', 'stream', 'trust_env', + 'max_redirects', 'timeout', 'download_path'] + + for attr in attrs: + val = options.__getattribute__(f'_{attr}') + if val is not None: + re_dict[attr] = val + + return re_dict diff --git a/DrissionPage/configs/session_options.pyi b/DrissionPage/configs/session_options.pyi new file mode 100644 index 0000000..37f4da9 --- /dev/null +++ b/DrissionPage/configs/session_options.pyi @@ -0,0 +1,111 @@ +# -*- coding:utf-8 -*- +from pathlib import Path +from typing import Any, Union, Tuple + +from requests.adapters import HTTPAdapter +from requests.auth import HTTPBasicAuth +from requests.cookies import RequestsCookieJar + + +class SessionOptions(object): + def __init__(self, read_file: bool = True, ini_path: str = None): + self.ini_path: str = ... + self._download_path: str = ... + self._headers: dict = ... + self._cookies: list = ... + self._auth: tuple = ... + self._proxies: dict = ... + self._hooks: dict = ... + self._params: dict = ... + self._verify: bool = ... + self._cert: Union[str, tuple] = ... + self._adapters: list = ... + self._stream: bool = ... + self._trust_env: bool = ... + self._max_redirects: int = ... + self._timeout: float = ... + self._del_set: set = ... + + @property + def download_path(self) -> str: ... + + def set_paths(self, download_path: Union[str, Path]) -> SessionOptions: ... + + @property + def timeout(self) -> Union[int, float]: ... + + def set_timeout(self, second: Union[int, float]) -> SessionOptions: ... + + @property + def headers(self) -> dict: ... + + def set_headers(self, headers: Union[dict, None]) -> SessionOptions: ... + + def set_a_header(self, attr: str, value: str) -> SessionOptions: ... + + def remove_a_header(self, attr: str) -> SessionOptions: ... + + @property + def cookies(self) -> list: ... + + def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict, None]) -> SessionOptions: ... + + @property + def auth(self) -> Union[Tuple[str, str], HTTPBasicAuth]: ... + + def set_auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> SessionOptions: ... + + @property + def proxies(self) -> dict: ... + + def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: ... + + @property + def hooks(self) -> dict: ... + + def set_hooks(self, hooks: Union[dict, None]) -> SessionOptions: ... + + @property + def params(self) -> dict: ... + + def set_params(self, params: Union[dict, None]) -> SessionOptions: ... + + @property + def verify(self) -> bool: ... + + def set_verify(self, on_off: Union[bool, None]) -> SessionOptions: ... + + @property + def cert(self) -> Union[str, tuple]: ... + + def set_cert(self, cert: Union[str, Tuple[str, str], None]) -> SessionOptions: ... + + @property + def adapters(self): list: ... + + def add_adapter(self, url: str, adapter: HTTPAdapter) -> SessionOptions: ... + + @property + def stream(self) -> bool: ... + + def set_stream(self, on_off: Union[bool, None]) -> SessionOptions: ... + + @property + def trust_env(self) -> bool: ... + + def set_trust_env(self, on_off: Union[bool, None]) -> SessionOptions: ... + + @property + def max_redirects(self) -> int: ... + + def _sets(self, arg: str, val: Any) -> None: ... + + def set_max_redirects(self, times: Union[int, None]) -> SessionOptions: ... + + def save(self, path: str = None) -> str: ... + + def save_to_default(self) -> str: ... + + def as_dict(self) -> dict: ... + +def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ... diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index a306171..9230acb 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -14,8 +14,9 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from tldextract import extract -from .common import get_pid_from_port, connect_browser -from .config import SessionOptions, DriverOptions, cookies_to_tuple, session_options_to_dict +from .common import get_pid_from_port, connect_browser, cookies_to_tuple +from .configs.session_options import SessionOptions, session_options_to_dict +from .configs.driver_options import DriverOptions class Drission(object): diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 77583b8..b99ba13 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -12,7 +12,8 @@ from typing import Union from selenium import webdriver from .common import unzip -from .config import OptionsManager, DriverOptions +from .configs.options_manage import OptionsManager +from .configs.driver_options import DriverOptions from .drission import Drission from .session_page import SessionPage diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index a1b2a18..e0dd769 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -13,7 +13,8 @@ from requests.structures import CaseInsensitiveDict from tldextract import extract from .base import BasePage -from .config import SessionOptions, cookies_to_tuple, cookie_to_dict +from .configs.session_options import SessionOptions +from .common import cookies_to_tuple, cookie_to_dict from .session_element import SessionElement, make_session_ele diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index e6f2576..0906dd9 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -13,7 +13,9 @@ from .base import BasePage from .chromium_base import ChromiumBase, Timeout from .chromium_driver import ChromiumDriver from .chromium_page import ChromiumPage, ChromiumDownloadSetter -from .config import DriverOptions, SessionOptions, cookies_to_tuple +from .configs.session_options import SessionOptions +from .configs.driver_options import DriverOptions +from .common import cookies_to_tuple from .session_page import SessionPage